Use the sharks dataset for this HW. This should be more slightly more difficult than the first homework. You will likely have to do some googling, but I have hinted some functions to look into. After each line, try to view the data to see if the results of your code match what you wanted it to do.
scales
package.?ifelse
)?ifelse
)?grepl
)library("stargazer")
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library("scales")
rm(list = ls())
sharks <- read.csv("C:/Users/alexc/Desktop/Empirical Workshop/data/sharks.csv", stringsAsFactors = FALSE)
#View(sharks)
timeSeries <- table(sharks$Year)
plot(timeSeries, type = "l", ylab = "Frequency", xlab = "Year")
plot(table(as.numeric(sharks$Age)), ylab = "Frequncy", xlab = "Age")
## Warning in table(as.numeric(sharks$Age)): NAs introduced by coercion
sharks$fatalColor <- as.numeric(as.factor(sharks$Fatal..Y.N.))
#plot(sharks$Longitude, sharks$Latitude, pch = 19, col = c(1,2))
plot(sharks$Longitude, sharks$Latitude,
col = alpha(sharks$fatalColor, .1), pch = 19)
sharks$westCoast <- ifelse(sharks$Longitude < -100, 1, 0)
table(sharks$westCoast)
##
## 0 1
## 761 237
table(sharks$Type)
##
## Invalid Provoked Questionable
## 61 57 4
## Sea Disaster Under investigation Unprovoked
## 2 1 837
## Watercraft
## 36
sharks$unprovoked <- ifelse(sharks$Type == "Unprovoked", 1, 0)
table(sharks$unprovoked)
##
## 0 1
## 161 837
sharks$severe_injury <- ifelse(grepl("No injury", sharks$Injury) | grepl("minor", tolower(sharks$Injury)), 0, 1)
table(sharks$severe_injury)
##
## 0 1
## 229 769
sharks$Sex <- ifelse(sharks$Name == "female", "F", ifelse(sharks$Name == "male", "M", sharks$Sex))
sharks$Sex <- ifelse(sharks$Sex == "", NA, sharks$Sex)
sharks$male <- ifelse(is.na(sharks$Sex), NA, ifelse(sharks$Sex == "M", 1, 0))
table(sharks$male)
##
## 0 1
## 224 755
stargazer(sharks, type = "text")
##
## =============================================================================
## Statistic N Mean St. Dev. Min Pctl(25) Pctl(75) Max
## -----------------------------------------------------------------------------
## Year 998 2,009.572 5.588 2,000 2,005 2,014 2,019
## original.order 998 5,297.683 640.417 4,239 4,739.2 5,842 6,427
## Latitude 998 29.611 5.116 5.889 27.355 32.819 60.691
## Longitude 998 -94.624 26.284 -169.534 -96.548 -80.327 -64.621
## Maximum.Temperature 980 82.559 7.805 42.200 79.100 87.900 98.500
## Minimum.Temperature 980 68.324 9.093 23.100 63.750 74.600 84.800
## Temperature 980 75.079 7.986 35.900 71.900 80.500 91.400
## Precipitation 980 0.154 0.653 0.000 0.000 0.000 11.300
## Precipitation.Cover 980 7.362 16.883 0.000 0.000 8.300 100.000
## Wind.Speed 979 14.200 4.556 0.000 11.200 16.400 44.700
## Wind.Gust 126 32.548 7.686 4.500 30.000 36.425 57.500
## Visibility 975 9.233 1.147 1.500 8.800 10.000 12.800
## fatalColor 998 1.964 0.277 1 2 2 3
## westCoast 998 0.237 0.426 0 0 0 1
## unprovoked 998 0.839 0.368 0 1 1 1
## severe_injury 998 0.771 0.421 0 1 1 1
## male 979 0.771 0.420 0.000 1.000 1.000 1.000
## -----------------------------------------------------------------------------
#exploring weather with these regressions
reg1 <- lm(severe_injury ~ westCoast, data = sharks)
reg2 <- lm(severe_injury ~ westCoast + Temperature + Precipitation + Wind.Speed + Visibility, data = sharks)
reg3 <- lm(severe_injury ~ westCoast*Temperature + Precipitation + Wind.Speed + Visibility, data = sharks)
stargazer(reg1, reg2, reg3,
type = "text", omit.stat = c("ser", "f"))
##
## ===================================================
## Dependent variable:
## -----------------------------
## severe_injury
## (1) (2) (3)
## ---------------------------------------------------
## westCoast -0.297*** -0.256*** -0.914***
## (0.030) (0.035) (0.271)
##
## Temperature 0.006*** 0.003
## (0.002) (0.002)
##
## Precipitation -0.002 -0.001
## (0.020) (0.019)
##
## Wind.Speed 0.003 0.002
## (0.003) (0.003)
##
## Visibility 0.002 -0.002
## (0.011) (0.011)
##
## westCoast:Temperature 0.009**
## (0.004)
##
## Constant 0.841*** 0.288* 0.627***
## (0.015) (0.164) (0.214)
##
## ---------------------------------------------------
## Observations 998 975 975
## R2 0.090 0.111 0.116
## Adjusted R2 0.089 0.106 0.111
## ===================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
#exploring the person/interaction
reg1 <- lm(severe_injury ~ westCoast*Temperature + unprovoked, data = sharks)
reg2 <- lm(severe_injury ~ westCoast*Temperature + male, data = sharks)
reg3 <- lm(severe_injury ~ westCoast*Temperature + unprovoked + male, data = sharks)
reg4 <- lm(severe_injury ~ westCoast*Temperature + unprovoked + male + as.numeric(Age), data = sharks)
## Warning in eval(predvars, data, env): NAs introduced by coercion
reg5 <- lm(severe_injury ~ westCoast*Temperature + unprovoked + male + as.numeric(Age) + Year, data = sharks)
## Warning in eval(predvars, data, env): NAs introduced by coercion
stargazer(reg1, reg2, reg3, reg4, reg5,
add.lines = list(c("Year FE", rep("N", 4), "Y")),
type = "text", omit.stat = c("ser", "f"), omit = "Year")
##
## =======================================================================
## Dependent variable:
## -------------------------------------------------
## severe_injury
## (1) (2) (3) (4) (5)
## -----------------------------------------------------------------------
## westCoast -0.972*** -0.998*** -1.021*** -1.050*** -1.052***
## (0.264) (0.263) (0.263) (0.297) (0.294)
##
## Temperature 0.002 0.002 0.002 0.002 0.002
## (0.002) (0.002) (0.002) (0.003) (0.003)
##
## unprovoked 0.078** 0.066* -0.018 -0.028
## (0.035) (0.035) (0.044) (0.043)
##
## male -0.048 -0.046 -0.057* -0.068**
## (0.030) (0.030) (0.033) (0.033)
##
## as.numeric(Age) 0.001 0.001
## (0.001) (0.001)
##
## westCoast:Temperature 0.010*** 0.011*** 0.011*** 0.012*** 0.012***
## (0.004) (0.004) (0.004) (0.004) (0.004)
##
## Constant 0.618*** 0.710*** 0.682*** 0.746*** 20.865***
## (0.185) (0.189) (0.189) (0.211) (4.907)
##
## -----------------------------------------------------------------------
## Year FE N N N N Y
## Observations 980 963 963 761 761
## R2 0.120 0.116 0.119 0.086 0.106
## Adjusted R2 0.116 0.112 0.114 0.078 0.097
## =======================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
#Including age decreases the the sample size by about 200.
table(sharks$Age[is.na(as.numeric(sharks$Age))])
## Warning in table(sharks$Age[is.na(as.numeric(sharks$Age))]): NAs introduced
## by coercion
##
## 12 or 13 20s 30s 50s 60s 6½ a minor
## 198 1 3 3 1 2 1 1
## mid-30s teen Teen Teens
## 1 4 4 1
#maybe we can start replacing teens with ~16, remove the "s" after some of these. Still, 198 missing all together.
lim <- is.na(as.numeric(sharks$Age))
## Warning: NAs introduced by coercion
t.test(sharks$severe_injury[lim], sharks$severe_injury[!lim])
##
## Welch Two Sample t-test
##
## data: sharks$severe_injury[lim] and sharks$severe_injury[!lim]
## t = -5.0123, df = 305.38, p-value = 9.129e-07
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.247812 -0.108089
## sample estimates:
## mean of x mean of y
## 0.6318182 0.8097686
t.test(sharks$westCoast[lim], sharks$westCoast[!lim])
##
## Welch Two Sample t-test
##
## data: sharks$westCoast[lim] and sharks$westCoast[!lim]
## t = 3.2974, df = 318.61, p-value = 0.001086
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.04646003 0.18392090
## sample estimates:
## mean of x mean of y
## 0.3272727 0.2120823
#So it appears that missing ages are less likely to be severly injured and more likely to be on the east coast. Perhaps this is spurious, but it might be a signal of something non-random.