library("fixest")
library("modelsummary")
library("kableExtra")
# Please note that the tables produced by modelsummary require the following LaTeX packages to compile:
# caption, longtable, booktabs.
# You will need to include those in your header or preamble if you want documents to compile properly.
# generate data
N <- 1000000
data.frame(y = NA,
var1 = rnorm(N),
var2 = rnorm(N),
var3 = rnorm(N),
FE1 = letters[sample(1:26, N, TRUE)],
FE2 = paste0(LETTERS[sample(1:67, N, TRUE)], LETTERS[sample(1:26, N, TRUE)], LETTERS[sample(1:26, N, TRUE)])) -> df
df$y <- -2*df$var1 + 4*df$var2*df$var3 + match(df$FE1, letters) + match(df$FE2, unique(df$FE2)) + rnorm(N, 0, 3)
theColz <- c("y", "var1", "var2", "var3")
theLabz <- c("Outcome", "`Variable 1`", "`Variable 2`", "`Variable 3`")
frmla <- as.formula(paste(paste("(",
paste(theLabz, theColz, sep = " = "),
")",
sep = "",
collapse = " + "),
"Mean + (`St. Dev.` = SD) + Min + Max",
sep=" ~ "))
datasummary(frmla, output = "latex",
data = df,
title = "Summary Statistics\\label{tab:summary_stats}",
fmt = "%.3f",
align = "lccccc") %>%
# kableExtra::kable_styling(latex_options = c("scale_down")) %>%
kableExtra::footnote(general = paste0('This table contains summary statistics.',
' There are ', nrow(df), ' observations used.'),
threeparttable = TRUE, escape = F) %>%
gsub("(?<=[1-9])0?(?= &)", "", ., perl = T) %>%
gsub("\\.0+ ", " ", .) %>%
gsub("\\.0+\\\\", "\\\\", .) %>%
gsub("\\\\label\\{tab:\\}", "", .) %>%
gsub("\\\\textsuperscript\\{\\}", "\\\\textbf\\{Note:\\}", .) %>%
gsub("\\\\item", "\\\\item \\\\textit\\{Note:\\}", .) %>%
gsub("\\\\item \\\\textit\\{Note:\\} \\\\textit\\{Note: \\}[[:blank:]][[:cntrl:]]", "", .) %>%
gsub("\\\\item ", "\\\\item \\\\footnotesize ", .) %>%
gsub("\\[t\\]", "[!htbp]", .) %>%
gsub("\\\\begin\\{tablenotes\\}", "\\\\begin\\{tablenotes\\}\\[flushleft\\]", .) %>%
print()
## \begin{table}
##
## \caption{\label{tab:unnamed-chunk-2}Summary Statistics\label{tab:summary_stats}}
## \centering
## \begin{threeparttable}
## \begin{tabular}[!htbp]{lcccc}
## \toprule
## & Mean & St. Dev. & Min & Max\\
## \midrule
## Outcome & 4073.371 & 5223.76 & -14.009 & 18284.912\\
## Variable 1 & -0 & 1 & -4.788 & 4.762\\
## Variable 2 & -0 & 1.001 & -4.962 & 4.679\\
## Variable 3 & 0 & 0.999 & -4.736 & 5.104\\
## \bottomrule
## \end{tabular}
## \begin{tablenotes}[flushleft]
## \item \footnotesize \textit{Note:} This table contains summary statistics. There are 1000000 observations used.
## \end{tablenotes}
## \end{threeparttable}
## \end{table}
timeNow <- Sys.time()
reg1 <- feols(y ~ var1 + var2:var3, data = df)
print(Sys.time() - timeNow)
## Time difference of 0.131731 secs
timeNow <- Sys.time()
reg_lm <- lm(y ~ var1 + var2:var3, data = df)
print(Sys.time() - timeNow)
## Time difference of 0.6450691 secs
timeNow <- Sys.time()
reg2 <- feols(y ~ var1 + var2:var3 | FE1, data = df)
print(Sys.time() - timeNow)
## Time difference of 0.3968339 secs
timeNow <- Sys.time()
reg_lm <- lm(y ~ var1 + var2:var3 + as.factor(FE1), data = df)
print(Sys.time() - timeNow)
## Time difference of 3.809715 secs
timeNow <- Sys.time()
reg3 <- feols(y ~ var1 + var2:var3 | FE1 + FE2, data = df)
print(Sys.time() - timeNow)
## Time difference of 0.468055 secs
# the reg_lm version of this failed on my machine and cutting the sample size down to 10,000 still took over a few minutes.
Supposing you have ran regressions:
#sample coef_map
cm <- c('var1' = 'Variable 1',
'var2:var3' = 'Variable 2 x Variable 3'
)
#https://vincentarelbundock.github.io/modelsummary/articles/modelsummary.html#add_rows
data.frame(matrix(c("FE1", "No", "Yes", "Yes",
"FE2", "No", "No", "Yes",
"Extra Info", "1", "2", "3"),
ncol = 4, byrow = T)) -> xtraRows
# attr(xtraRows, 'position') <- c(6:8) # use this to specify the row location
THE_REGZ <- list(reg1, reg2, reg3)
modelsummary(
stars = T,
output = "latex", #include latex option to print, otherwise it appears in viewer.
THE_REGZ,
coef_map = cm,
# coef_omit = c("log|asinh"),
title = "Effect of X on Y\\label{tab:reg_1}",
add_rows = xtraRows,
gof_omit = 'IC|Log|Within|Pseudo|Errors|Adj|^FE') %>% #remove certain rows
kableExtra::add_header_above(c(" " = 1, "outcome" = length(THE_REGZ))) %>%
# kableExtra::kable_styling(latex_options = c("scale_down")) %>% #if you have a small table, remove this or it will look huge!
kableExtra::footnote(general = list(paste0('Note Note Note.',
' Other note saying other things.',
'')),
threeparttable = TRUE) %>%
gsub("\\\\label\\{tab:\\}", "", .) %>% gsub("\\\\textsuperscript\\{\\}", "\\\\textbf\\{Note:\\}", .) %>%
gsub("\\\\item", "\\\\item \\\\textit\\{Note:\\}", .) %>%
gsub("\\\\item \\\\textit\\{Note:\\} \\\\textit\\{Note: \\}[[:blank:]][[:cntrl:]]", "", .) %>%
gsub("\\\\item ", "\\\\item \\\\footnotesize ", .) %>%
gsub("\\\\textbf\\{Note:\\} ", "", .) %>%
gsub("\\[t\\]", "[!htbp]", .) %>% gsub("Num\\.Obs\\.", "Observations", .) %>%
gsub("\\\\begin\\{tablenotes\\}", "\\\\begin\\{tablenotes\\}\\[flushleft\\]", .) %>%
gsub("\\\\cmidrule.*?\\\\\\\\", "", .) %>%
gsub("\\\\multicolumn\\{[[:digit:]]\\}\\{l\\}\\{\\* p", paste0("\\\\multicolumn\\{", length(THE_REGZ)+1, "}\\{r\\}\\{\\* p"), .) %>%
print()
## \begin{table}
##
## \caption{\label{tab:unnamed-chunk-4}Effect of X on Y\label{tab:reg_1}}
## \centering
## \begin{threeparttable}
## \begin{tabular}[!htbp]{lccc}
## \toprule
## \multicolumn{1}{c}{ } & \multicolumn{3}{c}{outcome} \\
##
## \midrule
## Variable 1 & -9.395* & -9.429* & -2.001***\\
## & (5.224) & (4.790) & (0.003)\\
## Variable 2 x Variable 3 & 11.020** & 11.016** & 3.998***\\
## & (5.214) & (4.362) & (0.003)\\
## \midrule
## Observations & 1e+06 & 1e+06 & 1e+06\\
## R2 & 0.000 & 0.000 & 1.000\\
## FE1 & No & Yes & Yes\\
## FE2 & No & No & Yes\\
## Extra Info & 1 & 2 & 3\\
## \bottomrule
## \multicolumn{4}{r}{* p $<$ 0.1, ** p $<$ 0.05, *** p $<$ 0.01}\\
## \end{tabular}
## \begin{tablenotes}[flushleft]
## \item \footnotesize \textit{Note:} Note Note Note. Other note saying other things.
## \end{tablenotes}
## \end{threeparttable}
## \end{table}