Automate within function

52 views Asked by At

I made a function to automate the production of .tex files, it was working so far until I decided to put on the stargazer the result from a dplyr aggregation.

mean_outcome is not working if I put within it i, i, outcome or eval(i). I always get this error:

Warning message:
There was 1 warning in `summarise()`.
ℹ In argument: `mean = mean(i, na.rm = T)`.
Caused by warning in `mean.default()`:
! argument is not numeric or logical: returning NA 

I want to show in stargazer in the part of add.lines.

reg_coef_tables <- function(df, y) {
  
  ### Function: Converts panel data to data usable for .tex tables
  # - df     = panel data for the country
  # - y      = outcome (fcs, fies, etc.) <
  # - Full example: reg_pooled_lasso(df, fcs)
  
  df            <- df %>%
    
    filter(round_pooled != 0) 
  
  # First, we need to get the list of lasso controls to be used for the regression
  
  outcome = eval(`i`)  
  
  mean_outcome  <- df %>% 
    
    filter(round_pooled == 11 & treatment_arm == 0) %>% 
    
    summarise(mean = mean(i, na.rm = T)) %>% 
    
    pull(mean)
    
  
  # define control variable (i.e. outcome at baseline), which has the suffix _imp
  control       <- paste0(outcome, "_imp") 
  
  # remove rows with NAs in the outcome variable, otherwise we can't run glmnet
  df            <- df[complete.cases(df[, c(outcome, control)]),] 
  
  # vars
  
  vars          <- df[complete.cases(df[, c(outcome, control, 
                                            'hhh_age_imp',              'sex_hhh_imp',
                                            'hh_total_imp',             'hasplots_imp',
                                            'hh_own_bus_imp',           'own_livestock_imp', 
                                            'tot_lstock_count_tlu_imp', 'wage_job_any_imp', 
                                            'hh_assets_tot_imp',        'farm_assets_tot_imp', 
                                            'round_cont',               'level1')]),] 
  
  # define response variable
  depvar        <- data.matrix(vars[, outcome])  

  # define matrix of possible predictor variables
  indepvars     <- data.matrix(vars[, c('hhh_age_imp',              'sex_hhh_imp',
                                      'hh_total_imp',             'hasplots_imp',
                                      'hh_own_bus_imp',           'own_livestock_imp', 
                                      'tot_lstock_count_tlu_imp', 'wage_job_any_imp', 
                                      'hh_assets_tot_imp',        'farm_assets_tot_imp', 
                                      'round_cont',               'level1')])
  
  # run a lasso to get a value for lambda
  model         <- cv.glmnet(indepvars,
                             depvar,
                             alpha = 1,
                             type.measure = "mse")
  
  # get the coefficients with the best lambda
  c             <- coef(model, s = "lambda.1se", exact = TRUE)
  
  # get the list of selected indicators (where coefficient is not 0, i.e. not dropped through the lasso regression)
  inds          <- which(c!=0)
  
  # store the indicators in a vector
  variables     <- row.names(c)[inds]
  
  # remove unnecessary variables from the lasso vector, these are the fixed effects and intercept using the function we defined above
  variables     <- variables[!(variables %in% c('(Intercept)', 'round_cont', 'level1'))]
  
  # convert the character vector into one string with + in between for the regression below
  controls      <- paste0(variables, collapse = " + ")
  controls      <- paste0("+ ", controls)
  control       <- paste0("+ ", control)
  
  # for some outcomes, no lasso controls will be selected, for this reason we need to adjust the vector of controls so that the regression below still runs
  variables_all <- toString(variables)
  controls      <- ifelse(variables_all == "", "", controls)
  
  # Now, we can run the regression with the selected lasso controls
  
  reg1 <- felm(as.formula(paste(outcome, "~ treatment", " | level1 + round_cont | 0 | level4 + hhid")), data = df)
  
  reg2 <- felm(as.formula(paste(outcome, "~ treatment", control, " | level1 + round_cont | 0 | level4 + hhid")), data = df)
  
  reg3 <- felm(as.formula(paste(outcome, "~ treatment", controls, control, " | level1 + round_cont | 0 | level4 + hhid")), data = df) 

  reg4 <- felm(as.formula(paste(outcome, "~ treat_uct + treat_ffa", " | level1 + round_cont | 0 | level4 + hhid")), data = df)
  
  reg5 <- felm(as.formula(paste(outcome, "~ treat_uct + treat_ffa", control, " | level1 + round_cont | 0 | level4 + hhid")), data = df)
  
  reg6 <- felm(as.formula(paste(outcome, "~ treat_uct + treat_ffa", controls, control, " | level1 + round_cont | 0 | level4 + hhid")), data = df)
 
  stargazer(reg1, reg2,
            reg3, reg4,
            reg5, reg6,
            align = TRUE, 
            dep.var.labels = c("(1)", "(2)","(3)", "(4)", "(5)", "(6)"),
            omit.stat = c("f", "adj.rsq", "ser"),
            no.space = TRUE,
            digits = 3, # number of decimals
            add.lines = list(c('Mean outcome', rep(`mean_outcome`, 6)),
                             c('Lasso controls', "No", "No", "Yes", "No", "No", "Yes"),
                             c('County FE', rep("Yes", 6)),
                             c('Month FE',  rep("Yes", 6))),
            dep.var.caption = "", # remove dep var header
            out = paste0(high_frequency_output_filepath, outcome, ".tex"))

  
   }

liv_vars  = c("hasplots")
liv_out_list = vector("list", 1)
for(i in liv_vars) {
  liv_out_list[[i]] <- reg_coef_tables(df, i)
}
0

There are 0 answers