# ---- Prepare data ----
step_data <- imputed_model_data %>%
filter(!is.na(ATT8SCR), ATT8SCR > 0)
# ---- Fit 5 nested models ----
m1 <- lm(log(ATT8SCR) ~ log(PTFSM6CLA1A),
data = step_data)
m2 <- lm(log(ATT8SCR) ~ log(PTFSM6CLA1A) + log(PERCTOT),
data = step_data)
m3 <- lm(log(ATT8SCR) ~ log(PTFSM6CLA1A) + log(PERCTOT) + PTPRIORLO,
data = step_data)
m4 <- lm(log(ATT8SCR) ~ log(PTFSM6CLA1A) + log(PERCTOT) + log(PNUMEAL) +
PTPRIORLO + ADMPOL_PT + gorard_segregation +
remained_in_the_same_school +
teachers_on_leadership_pay_range_percent +
log(average_number_of_days_taken),
data = step_data)
m5 <- lmer(log(ATT8SCR) ~ log(PTFSM6CLA1A) + log(PERCTOT) + log(PNUMEAL) +
PTPRIORLO + ADMPOL_PT + gorard_segregation +
remained_in_the_same_school +
teachers_on_leadership_pay_range_percent +
log(average_number_of_days_taken) +
(1 | year_label) + (1 | OFSTEDRATING_1) + (1 | gor_name/LANAME),
data = step_data, REML = TRUE, na.action = na.exclude,
control = lmerControl(optimizer = "bobyqa",
optCtrl = list(maxfun = 20000)))
# ---- Helper: extract coefficients with stars ----
extract_coefs <- function(model, is_lmer = FALSE) {
s <- summary(model)$coefficients
df <- as.data.frame(s)
df$term <- rownames(df)
p_col <- if ("Pr(>|t|)" %in% names(df)) "Pr(>|t|)" else "Pr(>|z|)"
t_col <- if ("t value" %in% names(df)) "t value" else "z value"
df %>%
transmute(
term = term,
estimate = Estimate,
t_value = .data[[t_col]],
p_value = .data[[p_col]],
stars = case_when(
p_value < 0.001 ~ "***",
p_value < 0.01 ~ "**",
p_value < 0.05 ~ "*",
TRUE ~ ""
),
est_display = paste0(format(round(estimate, 3), nsmall = 3), stars),
t_display = format(round(t_value, 2), nsmall = 2)
)
}
c1 <- extract_coefs(m1)
c2 <- extract_coefs(m2)
c3 <- extract_coefs(m3)
c4 <- extract_coefs(m4)
c5 <- extract_coefs(m5, is_lmer = TRUE)
# ---- Build unified term list (in model order) ----
# Use Model 5 (full) order, then add any terms not in it
term_order <- c5$term
all_terms <- c(term_order, setdiff(unique(c(c1$term, c2$term, c3$term, c4$term)), term_order))
# ---- Assemble fixed effects table ----
step_tbl <- tibble(term = all_terms) %>%
left_join(c1 %>% select(term, est1 = est_display, t1 = t_display), by = "term") %>%
left_join(c2 %>% select(term, est2 = est_display, t2 = t_display), by = "term") %>%
left_join(c3 %>% select(term, est3 = est_display, t3 = t_display), by = "term") %>%
left_join(c4 %>% select(term, est4 = est_display, t4 = t_display), by = "term") %>%
left_join(c5 %>% select(term, est5 = est_display, t5 = t_display), by = "term") %>%
mutate(across(everything(), \(x) replace_na(x, ""))) %>%
mutate(term = relabel_terms(term))
n_fixed <- nrow(step_tbl)
# ---- Random effects (Model 5 only) ----
vc <- as.data.frame(VarCorr(m5))
# vc has columns: grp, var1, var2, vcov, sdcor
re_rows <- tibble(
term = c("", paste0(" ", vc$grp)),
est1 = "", t1 = "",
est2 = "", t2 = "",
est3 = "", t3 = "",
est4 = "", t4 = "",
est5 = c("", format(round(vc$sdcor, 3), nsmall = 3)),
t5 = c("", rep("(SD)", nrow(vc)))
)
# Label the header row
re_rows$term[1] <- "Random effects"
re_rows$est5[1] <- ""
re_rows$t5[1] <- ""
# Clean up group names for display
re_rows <- re_rows %>%
mutate(term = case_when(
term == "Random effects" ~ "Random effects",
grepl("LANAME:gor_name", term) ~ " LA (nested in region)",
grepl("gor_name", term) & !grepl("LANAME", term) ~ " Region",
grepl("OFSTEDRATING_1", term) ~ " Ofsted rating",
grepl("year_label", term) ~ " Year",
grepl("Residual", term) ~ " Residual",
TRUE ~ term
))
# ---- R² and N rows ----
r2_m1 <- round(summary(m1)$r.squared, 4)
r2_m2 <- round(summary(m2)$r.squared, 4)
r2_m3 <- round(summary(m3)$r.squared, 4)
r2_m4 <- round(summary(m4)$r.squared, 4)
r2_m5 <- performance::r2(m5)
r2m_m5 <- round(r2_m5$R2_marginal, 4)
r2c_m5 <- round(r2_m5$R2_conditional, 4)
n_obs <- nrow(step_data)
footer_rows <- tibble(
term = c("", "R²", "R² (conditional)", "N"),
est1 = c("", as.character(r2_m1), "", format(n_obs, big.mark = ",")),
t1 = c("", "", "", ""),
est2 = c("", as.character(r2_m2), "", format(n_obs, big.mark = ",")),
t2 = c("", "", "", ""),
est3 = c("", as.character(r2_m3), "", format(n_obs, big.mark = ",")),
t3 = c("", "", "", ""),
est4 = c("", as.character(r2_m4), "", format(n_obs, big.mark = ",")),
t4 = c("", "", "", ""),
est5 = c("", as.character(r2m_m5), as.character(r2c_m5), format(n_obs, big.mark = ",")),
t5 = c("", "(marginal)", "(fixed + random)", "")
)
full_tbl <- bind_rows(step_tbl, re_rows, footer_rows)
# ---- Render with kableExtra ----
names(full_tbl) <- c("Variable",
"Est.", "t",
"Est. ", "t ",
"Est. ", "t ",
"Est. ", "t ",
"Est. ", "t ")
kbl(full_tbl, format = "html", align = "lrlrlrlrlrl", escape = FALSE) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE, font_size = 10) %>%
add_header_above(c(" " = 1,
"Model 1:\nFSM only" = 2,
"Model 2:\n+ Absence" = 2,
"Model 3:\n+ Prior att." = 2,
"Model 4:\nAll fixed" = 2,
"Model 5:\nFull multilevel" = 2)) %>%
row_spec(n_fixed + 1, bold = TRUE, extra_css = "border-top: 2px solid #666;") %>%
row_spec(n_fixed + nrow(re_rows) + 1, extra_css = "border-top: 2px solid #666;") %>%
row_spec(n_fixed + nrow(re_rows) + 2, bold = TRUE) %>%
row_spec(n_fixed + nrow(re_rows) + 3, bold = TRUE) %>%
row_spec(n_fixed + nrow(re_rows) + 4, bold = TRUE) %>%
footnote(general = "Significance: *** p < 0.001, ** p < 0.01, * p < 0.05. Models 1–4 are OLS; Model 5 is a multilevel model (lmer) with random intercepts. Random effects shown as standard deviations.",
general_title = "")