Create a function that converts linear model into a dataframe

# Turns a Regression into a data frame
library(lmtest)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(sandwich)
library(ggplot2)

#Turns a Regression into a data frame
Model.DF <- function(Model, Robust.SE = NULL) {
  
  #Extract Coefficients
  Model.Output <- as.data.frame(coef(summary(Model)))
  Model.Output$Label <- rownames(Model.Output)
  rownames(Model.Output) <- NULL
  
  #Generate Confidence Intervals
  CI <- as.data.frame(confint(Model, variable.names(Model), level=0.95))
  CI$Label <- rownames(CI)
  rownames(CI) <- NULL
  
  #Merge Model and CIs together 
  Model.Output.Final <- merge(x = Model.Output, y = CI, by =c("Label"))
  
  #Name the columns numeric
  colnames(Model.Output.Final) <- c("Label", "Coeff", "SE", "t.value", "P.Value", "lower", "upper")
  
  Model.Output.Final$Sig.05 <- ifelse(Model.Output.Final$P.Value <= .05, 1,0)
  Model.Output.Final$Sig.10 <- ifelse(Model.Output.Final$P.Value <= .10, 1,0)
  
  #Adjusted R Squared
  Model.Output.Final$AdJ.R2 <- summary(Model)$adj.r.squared
  
  #Dependent Variable
  Model.Output.Final$DV <- all.vars(formula(Model))[1]
  
  #Check for NA's in Model
  for(n in names(coef(Model))){
    if(is.na(Model$coefficients[[n]]) == T){
      newRow <- data.frame(Label=n, 
                           Coeff = NA, 
                           SE = NA, 
                           t.value = NA,
                           P.Value = NA,
                           lower = NA,
                           upper = NA,
                           AdJ.R2 = NA, 
                           Sig.05 = NA,
                           Sig.10 = NA,
                           DV=all.vars(formula(Model))[1])
      
      Model.Output.Final <- rbind(Model.Output.Final, newRow)
      
    }
  }
  
  #Option for Robust Standard Errors
  if(is.null(Robust.SE) == F){
    library(sandwich)
    x<- coeftest(Model, vcov = sandwich::vcovHC(Model, type=Robust.SE))
    xr<- setNames(data.frame(x[1:dim(x)[1], 2]), c("Robust Standard Errors"))
    xr$Label<- rownames(xr); rownames(xr) <- NULL
    
    Model.Output.Final <- merge(Model.Output.Final, xr, by = "Label")
    
  }
  
  return(Model.Output.Final)
  
}

Example without Robost Standard Errors

# Create a fake data set
library(randomNames)

## Warning: package 'randomNames' was built under R version 4.0.5

## Error in get(genname, envir = envir) : object 'testthat_print' not found

set.seed(1992)
age <- sample(18:30, 50, replace=TRUE)
score <- sample(1:100, 50, replace=TRUE)
year <- sample(c("first", "second", "third", "fourth"), 50, replace=TRUE)
major <- sample(c("ps", "cs", "ir", "stats"), 50, replace =TRUE)
attend <- sample(c("yes", "no"), 50, replace=TRUE)
office <- sample(c("always", "sometimes", "never", "not applicable", "don't know"), 50, replace=TRUE)
gender <- sample(c("male", "female"), 50, replace=TRUE)
student <- randomNames(50)
class <- data.frame(age, score, year, major, attend, office, gender, student)


# Example
Result <- lm(score ~ age + year + attend + gender, data=class); summary(Result)

## 
## Call:
## lm(formula = score ~ age + year + attend + gender, data = class)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -48.298 -17.421  -0.337  17.597  56.207 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  67.6690    24.7704   2.732   0.0091 **
## age          -0.7615     0.9557  -0.797   0.4299   
## yearfourth   -3.2172     9.4413  -0.341   0.7349   
## yearsecond   -2.8109    10.6896  -0.263   0.7938   
## yearthird     3.3660    10.4669   0.322   0.7493   
## attendyes    -8.3145     7.4686  -1.113   0.2718   
## gendermale    3.9053     7.4178   0.526   0.6013   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 25.31 on 43 degrees of freedom
## Multiple R-squared:  0.06542,    Adjusted R-squared:  -0.06499 
## F-statistic: 0.5016 on 6 and 43 DF,  p-value: 0.8036

Result.df <-Model.DF(Result)

# ggplot 
ggplot(Result.df, aes(x = Label, y = Coeff, ymin = lower, ymax = upper)) +
  geom_pointrange() +
  coord_flip ()+
  geom_hline(yintercept = 0, linetype = "dashed", color = "grey") +
  guides(color=FALSE) +
  theme_bw() +
  labs(title = "Test Scores Predictors",
       subtitle = "Model 1a",
       y = "Test Scores",
       caption = "add a caption") +
  theme(plot.title = element_text(hjust = 0.5, size = 14),
        plot.subtitle = element_text(hjust = 0.5),
        axis.title.y=element_blank(), 
        axis.title = element_text(size = 14),
        axis.title.x = element_text(size = 14),
        axis.text.y = element_text(size = 14),
        axis.text.x = element_text(size = 14))

Example with Robust Standard Errors

library(lmtest)
Model.DF(Result, Robust.SE = "HC3")

##         Label      Coeff        SE    t.value    P.Value      lower      upper
## 1 (Intercept) 67.6689917 24.770443  2.7318443 0.00909933  17.714633 117.623350
## 2         age -0.7615199  0.955706 -0.7968139 0.42993687  -2.688885   1.165845
## 3   attendyes -8.3144711  7.468565 -1.1132622 0.27178056 -23.376268   6.747326
## 4  gendermale  3.9052879  7.417794  0.5264757 0.60126308 -11.054119  18.864695
## 5  yearfourth -3.2171750  9.441348 -0.3407538 0.73494827 -22.257469  15.823118
## 6  yearsecond -2.8109023 10.689635 -0.2629559 0.79384047 -24.368605  18.746801
## 7   yearthird  3.3660103 10.466899  0.3215862 0.74932428 -17.742504  24.474525
##   Sig.05 Sig.10      AdJ.R2    DV Robust Standard Errors
## 1      1      1 -0.06498932 score             21.6077770
## 2      0      0 -0.06498932 score              0.8578995
## 3      0      0 -0.06498932 score              7.6964403
## 4      0      0 -0.06498932 score              8.2511786
## 5      0      0 -0.06498932 score              9.9457700
## 6      0      0 -0.06498932 score             10.4186155
## 7      0      0 -0.06498932 score             10.6621436

Example with Marginal Effects of Logistic Regression

# Run a glm model
class$office <- ifelse(class$office  %in% c("not applicable", "don't know"), NA, class$office)
Results2 <- glm(factor(office) ~ age + score + major, data=class, family=binomial(link="logit"))

# Calculate the margins
library(margins)
Results2.m<- summary(margins(Results2, variables = c("major"), type="response", change="sd"))
Results2.m

##      factor     AME     SE       z      p   lower  upper
##     majorir  0.2605 0.2058  1.2662 0.2054 -0.1428 0.6638
##     majorps -0.0924 0.2636 -0.3503 0.7261 -0.6091 0.4244
##  majorstats  0.0729 0.2499  0.2917 0.7705 -0.4170 0.5628

# Use multiwaycov to cluster
# vcov=cluster.vcov(Results2, cluster=class$level)))

# Plot it with ggplot

ggplot(Results2.m, aes(x = factor, y = AME, ymin = lower, ymax = upper)) +
  geom_pointrange() +
  coord_flip ()+
  geom_hline(yintercept = 0, linetype = "dashed", color = "grey") +
  guides(color=FALSE) +
  theme_bw() +
  labs(title = "Attending Office Hours Predictors",
       subtitle = "Model 1a",
       y = "",
       caption = "add a caption") +
  theme(plot.title = element_text(hjust = 0.5, size = 14),
        plot.subtitle = element_text(hjust = 0.5),
        axis.title.y=element_blank(), 
        axis.title = element_text(size = 14),
        axis.title.x = element_text(size = 14),
        axis.text.y = element_text(size = 14),
        axis.text.x = element_text(size = 14))

Coeff Plots

Sara Kazemian

6/2/2021

Create a function that converts linear model into a dataframe

Example without Robost Standard Errors

Example with Robust Standard Errors

Example with Marginal Effects of Logistic Regression