For this homework, use knicks.csv
. Try your best not to “hard code” anything. For example, there are 23 Knicks players, but try not to type the number 23 in your code. This is to keep the script as flexible as possible, incase you had to repeat this code for a completely different roster list. Use View(knicks)
to look at the dataset.
sd()
or var()
.knicks2
knicks <- read.csv("C:/Users/alexc/Desktop/Empirical Workshop/data/knicks.csv", stringsAsFactors = FALSE)
mean(knicks$Wt)
## [1] 218.3913
max(knicks$Wt) - min(knicks$Wt)
## [1] 90
sd(knicks$Wt)
## [1] 23.54056
sd()
or var()
.numerator <- knicks$Wt - mean(knicks$Wt)
numerator <- numerator^2
numerator <- sum(numerator)
denominator <- nrow(knicks) - 1
sqrt(numerator / denominator) #could also do (numerator/denominator)^(1/2)
## [1] 23.54056
t.test(knicks$Wt[knicks$Pos %in% c("PG", "SG")],
knicks$Wt[!knicks$Pos %in% c("PG", "SG")])
##
## Welch Two Sample t-test
##
## data: knicks$Wt[knicks$Pos %in% c("PG", "SG")] and knicks$Wt[!knicks$Pos %in% c("PG", "SG")]
## t = -4.4806, df = 14.245, p-value = 0.0004972
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -49.44191 -17.46578
## sample estimates:
## mean of x mean of y
## 203.8462 237.3000
t.test(knicks$No.[knicks$Pos %in% c("PG", "SG")],
knicks$No.[!knicks$Pos %in% c("PG", "SG", "C")])
##
## Welch Two Sample t-test
##
## data: knicks$No.[knicks$Pos %in% c("PG", "SG")] and knicks$No.[!knicks$Pos %in% c("PG", "SG", "C")]
## t = -0.65027, df = 9.75, p-value = 0.5305
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -18.68041 10.26283
## sample estimates:
## mean of x mean of y
## 13.07692 17.28571
mean(as.numeric(knicks$Exp), na.rm = TRUE); mean(ifelse(knicks$Exp == "R", 0, as.numeric(knicks$Exp)))
## Warning in mean(as.numeric(knicks$Exp), na.rm = TRUE): NAs introduced by
## coercion
## [1] 4.105263
## Warning in ifelse(knicks$Exp == "R", 0, as.numeric(knicks$Exp)): NAs
## introduced by coercion
## [1] 3.391304
knicks$Player[order(knicks$Wt)][1:5]
## [1] "Trey Burke\\burketr01" "Frank Ntilikina\\ntilila01"
## [3] "Dennis Smith\\smithde03" "Kadeem Allen\\allenka01"
## [5] "Emmanuel Mudiay\\mudiaem01"
knicks$Player[order(-knicks$Wt)][1:5]
## [1] "DeAndre Jordan\\jordade01" "Enes Kanter\\kanteen01"
## [3] "Noah Vonleh\\vonleno01" "Henry Ellenson\\ellenhe01"
## [5] "Luke Kornet\\kornelu01"
knicks2
knicks2 <- knicks[knicks$College != "" & knicks$X == "us",]
knicks2 <- knicks[!(knicks$College == "" | knicks$X != "us"),]
knicks$X <- NULL
#knicks <- knicks[,-7] #this is an example of hard coding ... what if another roster had extra columns? Then you'd be deleting the wrong column
#knicks <- knicks[,!colnames(knicks) == "X"]
#knicks <- knicks[,-which(colnames(knicks) == "X")]
#knicks <- knicks[,is.na(match(colnames(knicks), "X"))]
cor.test(knicks$No., as.numeric(knicks$Exp))
## Warning in cor.test.default(knicks$No., as.numeric(knicks$Exp)): NAs
## introduced by coercion
##
## Pearson's product-moment correlation
##
## data: knicks$No. and as.numeric(knicks$Exp)
## t = 0.6127, df = 17, p-value = 0.5482
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.3291994 0.5635717
## sample estimates:
## cor
## 0.1469884
knicks$Exp <- as.numeric(knicks$Exp)
## Warning: NAs introduced by coercion
cor(knicks[!is.na(knicks$Exp),c("Wt", "No.", "Exp")])
## Wt No. Exp
## Wt 1.00000000 0.08057782 0.3759798
## No. 0.08057782 1.00000000 0.1469884
## Exp 0.37597976 0.14698842 1.0000000
knicks$Exp <- ifelse(is.na(knicks$Exp), 0, knicks$Exp)
cor(knicks[,c("Wt", "No.", "Exp")])
## Wt No. Exp
## Wt 1.00000000 0.08911475 0.37758812
## No. 0.08911475 1.00000000 0.09131957
## Exp 0.37758812 0.09131957 1.00000000