Financial Model Experiments

Do school financial variables improve the attainment models?

Published

March 19, 2026

Show code

library(tidyverse)
library(lme4)
library(lmerTest)
library(performance)
library(knitr)
library(scales)
library(patchwork)
library(here)

source(here::here("R", "helpers.R"))

1 Overview

This document takes the baseline multilevel models from model_experiments.qmd and systematically tests whether adding school-level financial variables (from CFR/AAR returns) improves model fit.

The baseline model predicts log(ATT8SCR) using 9 fixed effects with nested random intercepts for year, Ofsted rating, and region/LA:

\[\log(\text{ATT8}) \sim \text{9 predictors} + (1|\text{year}) + (1|\text{Ofsted}) + (1|\text{region/LA})\]

We test financial variables one at a time, then in combination, comparing marginal and conditional \(R^2\).

2 Data Preparation

Show code

# Load the main panel (has gorard_segregation, OFSTEDRATING_1, fin_* cols, etc.)
panel <- readRDS(here::here("data", "panel_data.rds"))

# Supplement with per-pupil columns not carried by 03a_add_finance.R
finance <- readRDS(here::here("data", "financial_returns.rds"))
finance_extra <- finance %>%
  transmute(URN, academic_year,
            fin_premises_costs_pp = premises_costs_pp)
panel <- panel %>%
  left_join(finance_extra, by = c("URN", "academic_year"))

cat("Panel:", nrow(panel), "rows\n")

Panel: 16174 rows

Show code

cat("Financial match:", sum(!is.na(panel$fin_total_income_pp)), "/", nrow(panel), "\n")

Financial match: 10204 / 16174

2.1 Model dataset

The full model dataset requires workforce variables (so drops 2024-25) and financial data (which further reduces sample size).

Show code

model_data <- panel %>%
  filter(MINORGROUP %in% c("Academy", "Maintained school")) %>%
  filter(
    !is.na(ATT8SCR), ATT8SCR > 0,
    PTFSM6CLA1A > 0, PERCTOT > 0, PNUMEAL > 0,
    !is.na(OFSTEDRATING_1), !is.na(gor_name), !is.na(LANAME),
    !is.na(remained_in_the_same_school),
    !is.na(teachers_on_leadership_pay_range_percent),
    average_number_of_days_taken > 0,
    !is.na(gorard_segregation)
  ) %>%
  mutate(
    OFSTEDRATING_1 = factor(OFSTEDRATING_1,
                            levels = c("Outstanding", "Good",
                                       "Requires Improvement", "Inadequate"),
                            ordered = TRUE),
    gor_name   = factor(gor_name),
    LANAME     = factor(LANAME),
    year_label = factor(year_label)
  ) %>%
  droplevels()

contrasts(model_data$OFSTEDRATING_1) <-
  contr.treatment(levels(model_data$OFSTEDRATING_1))

cat("Full model dataset:", nrow(model_data), "rows\n")

Full model dataset: 12210 rows

Show code

cat("Years:", paste(levels(model_data$year_label), collapse = ", "), "\n")

Years: 2021-22, 2022-23, 2023-24, 2024-25

Show code

cat("With financial data:", sum(!is.na(model_data$fin_total_income_pp)), "\n")

With financial data: 9554

To ensure fair comparison, all models (baseline and extended) are fitted on the same rows — only those with complete financial data.

Show code

# Dataset with complete financial data for fair comparison
d_fin <- model_data %>%
  filter(
    !is.na(fin_total_income_pp), fin_total_income_pp > 0,
    !is.na(fin_staff_income_ratio),
    !is.na(fin_balance_income_ratio)
  ) %>%
  droplevels()

contrasts(d_fin$OFSTEDRATING_1) <-
  contr.treatment(levels(d_fin$OFSTEDRATING_1))

cat("Model dataset with financial data:", nrow(d_fin), "rows\n")

Model dataset with financial data: 9548 rows

Show code

cat("Years:", paste(sort(unique(as.character(d_fin$year_label))), collapse = ", "), "\n")

Years: 2021-22, 2022-23, 2023-24, 2024-25

Show code

cat("LAs:", n_distinct(d_fin$LANAME), "\n")

LAs: 152

2.2 Candidate financial variables

Show code

d_fin %>%
  summarise(
    across(
      starts_with("fin_") & where(is.numeric),
      list(
        n = ~sum(!is.na(.x)),
        Median = ~median(.x, na.rm = TRUE),
        SD = ~sd(.x, na.rm = TRUE)
      ),
      .names = "{.col}|{.fn}"
    )
  ) %>%
  pivot_longer(everything(), names_to = c("Variable", "Stat"), names_sep = "\\|") %>%
  pivot_wider(names_from = Stat, values_from = value) %>%
  mutate(
    Label = var_label(Variable),
    Median = round(Median, 2),
    SD = round(SD, 2),
    n = comma(n)
  ) %>%
  select(Variable, Label, n, Median, SD) %>%
  kable()

Table 1: Financial variables available for model experiments

Variable	Label	n	Median	SD
fin_total_income	Total income (£)	9,548	7780000.00	2925063.78
fin_total_expenditure	Total expenditure (£)	9,548	7450867.50	2873014.47
fin_in_year_balance	In-year balance (£)	9,548	228000.00	653071.57
fin_revenue_reserve	Revenue reserve (£)	9,548	505000.04	1131603.35
fin_grant_funding	Grant funding (£)	9,548	7562000.00	2815213.81
fin_self_generated	Self-generated income (£)	9,548	229509.50	424411.14
fin_staff_costs	Total staff costs (£)	9,548	5609000.00	2171065.82
fin_teaching_staff	Teaching staff costs (£)	8,926	4102546.00	1648305.11
fin_premises_costs	Premises costs (£)	9,548	416514.00	394619.97
fin_energy	Energy costs (£)	8,926	159000.00	108866.84
fin_pupil_premium	Pupil premium income (£)	1,685	275598.00	146758.69
fin_total_income_pp	Total income per pupil (£)	9,548	7208.88	1311.94
fin_total_expenditure_pp	Total expenditure per pupil (£)	9,548	6922.25	1379.05
fin_staff_costs_pp	Staff costs per pupil (£)	9,548	5210.32	979.37
fin_teaching_staff_pp	Teaching staff costs per pupil (£)	8,926	3806.37	736.47
fin_grant_funding_pp	Grant funding per pupil (£)	9,548	6985.92	1239.12
fin_staff_income_ratio	Staff costs as % of income	9,548	0.73	0.15
fin_teaching_expenditure_ratio	Teaching staff as % of expenditure	8,916	0.55	0.07
fin_balance_income_ratio	In-year balance as % of income	9,548	0.03	0.20
fin_premises_costs_pp	fin_premises_costs_pp	9,548	394.93	320.94

3 Baseline Model (no financial variables)

This exactly replicates Analysis A “All Pupils” from model_experiments.qmd, but fitted on the financial-data-complete subset for fair comparison.

Show code

mod_baseline <- lmer(
  log(ATT8SCR) ~
    log(PTFSM6CLA1A) + log(PERCTOT) + log(PNUMEAL) +
    PTPRIORLO + ADMPOL_PT + gorard_segregation +
    remained_in_the_same_school +
    teachers_on_leadership_pay_range_percent +
    log(average_number_of_days_taken) +
    (1 | year_label) + (1 | OFSTEDRATING_1) + (1 | gor_name/LANAME),
  data = d_fin,
  REML = TRUE,
  na.action = na.exclude,
  control = lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 20000))
)

r2_baseline <- r2(mod_baseline)
cat("Baseline R² (marginal):", round(r2_baseline$R2_marginal, 4), "\n")

Baseline R² (marginal): 0.6454

Show code

cat("Baseline R² (conditional):", round(r2_baseline$R2_conditional, 4), "\n")

Baseline R² (conditional): 0.7904

Show code

cat("Singular?", isSingular(mod_baseline), "\n")

Singular? FALSE

4 Single-Variable Additions

Each financial variable is added one at a time to the baseline model. We compare the change in marginal and conditional \(R^2\).

Show code

# Define candidate additions
# Per-pupil vars are log-transformed; ratios are left linear
candidates <- list(
  "log(fin_total_income_pp)"          = "Total income pp (log)",
  "log(fin_total_expenditure_pp)"     = "Total expenditure pp (log)",
  "log(fin_staff_costs_pp)"           = "Staff costs pp (log)",
  "log(fin_teaching_staff_pp)"        = "Teaching staff pp (log)",
  "log(fin_grant_funding_pp)"         = "Grant funding pp (log)",
  "log(fin_premises_costs_pp)"        = "Premises costs pp (log)",
  "fin_staff_income_ratio"            = "Staff/income ratio",
  "fin_teaching_expenditure_ratio"    = "Teaching/expenditure ratio",
  "fin_balance_income_ratio"          = "Balance/income ratio"
)

base_formula <- log(ATT8SCR) ~
  log(PTFSM6CLA1A) + log(PERCTOT) + log(PNUMEAL) +
  PTPRIORLO + ADMPOL_PT + gorard_segregation +
  remained_in_the_same_school +
  teachers_on_leadership_pay_range_percent +
  log(average_number_of_days_taken) +
  (1 | year_label) + (1 | OFSTEDRATING_1) + (1 | gor_name/LANAME)

results <- tibble(
  term = character(),
  label = character(),
  r2_marginal = numeric(),
  r2_conditional = numeric(),
  delta_marginal = numeric(),
  delta_conditional = numeric(),
  coef = numeric(),
  se = numeric(),
  p_value = numeric(),
  singular = logical()
)

# Fit each candidate
for (term in names(candidates)) {

  # Build extended formula
  ext_formula <- update(base_formula, paste("~ . +", term))

  # Filter for valid values of the candidate
  # (log-transformed vars need > 0)
  d_sub <- d_fin
  if (grepl("^log\\(", term)) {
    inner_var <- gsub("^log\\((.+)\\)$", "\\1", term)
    d_sub <- d_sub %>% filter(!is.na(.data[[inner_var]]), .data[[inner_var]] > 0)
  } else {
    d_sub <- d_sub %>% filter(!is.na(.data[[term]]), is.finite(.data[[term]]))
  }
  d_sub <- droplevels(d_sub)
  contrasts(d_sub$OFSTEDRATING_1) <- contr.treatment(levels(d_sub$OFSTEDRATING_1))

  tryCatch({
    mod <- lmer(
      ext_formula, data = d_sub, REML = TRUE, na.action = na.exclude,
      control = lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 20000))
    )

    r2_ext <- r2(mod)
    coefs <- summary(mod)$coefficients

    # Find the row for our added term
    term_clean <- term
    row_match <- grep(gsub("[()]", "", term), rownames(coefs), fixed = FALSE)
    if (length(row_match) == 0) row_match <- nrow(coefs)

    results <- bind_rows(results, tibble(
      term = term,
      label = candidates[[term]],
      r2_marginal = r2_ext$R2_marginal,
      r2_conditional = r2_ext$R2_conditional,
      delta_marginal = r2_ext$R2_marginal - r2_baseline$R2_marginal,
      delta_conditional = r2_ext$R2_conditional - r2_baseline$R2_conditional,
      coef = coefs[row_match[1], "Estimate"],
      se = coefs[row_match[1], "Std. Error"],
      p_value = coefs[row_match[1], "Pr(>|t|)"],
      singular = isSingular(mod)
    ))

  }, error = function(e) {
    message("  Failed: ", term, " — ", e$message)
  })
}

4.1 Results

Show code

results %>%
  mutate(
    `R² (m)` = sprintf("%.4f", r2_marginal),
    `R² (c)` = sprintf("%.4f", r2_conditional),
    `ΔR² (m)` = sprintf("%+.4f", delta_marginal),
    `ΔR² (c)` = sprintf("%+.4f", delta_conditional),
    Coef = sprintf("%.4f", coef),
    SE = sprintf("%.4f", se),
    p = case_when(
      p_value < 0.001 ~ "< 0.001",
      p_value < 0.01  ~ sprintf("%.3f", p_value),
      p_value < 0.05  ~ sprintf("%.3f", p_value),
      TRUE            ~ sprintf("%.3f", p_value)
    ),
    Sig = case_when(
      p_value < 0.001 ~ "***",
      p_value < 0.01  ~ "**",
      p_value < 0.05  ~ "*",
      p_value < 0.1   ~ ".",
      TRUE            ~ ""
    )
  ) %>%
  select(Variable = label, `R² (m)`, `R² (c)`, `ΔR² (m)`, `ΔR² (c)`,
         Coef, SE, p, Sig) %>%
  kable()

Table 2: Effect of adding each financial variable to the baseline model

Variable	R² (m)	R² (c)	ΔR² (m)	ΔR² (c)	Coef	SE	p	Sig
Total income pp (log)	0.6465	0.7896	+0.0011	-0.0008	-0.0199	0.0058	< 0.001	***
Total expenditure pp (log)	0.6469	0.7895	+0.0014	-0.0008	-0.0163	0.0051	0.001	**
Staff costs pp (log)	0.6462	0.7899	+0.0008	-0.0005	-0.0093	0.0038	0.016	*
Teaching staff pp (log)	0.6514	0.7976	+0.0060	+0.0072	0.0048	0.0050	0.330
Grant funding pp (log)	0.6467	0.7894	+0.0013	-0.0010	-0.0245	0.0063	< 0.001	***
Premises costs pp (log)	0.6449	0.7895	-0.0006	-0.0008	-0.0011	0.0015	0.462
Staff/income ratio	0.6454	0.7904	-0.0001	-0.0000	0.0031	0.0061	0.611
Teaching/expenditure ratio	0.6539	0.7976	+0.0084	+0.0072	0.0689	0.0149	< 0.001	***
Balance/income ratio	0.6454	0.7904	-0.0000	-0.0000	-0.0008	0.0045	0.854

Show code

results %>%
  mutate(label = fct_reorder(label, delta_marginal)) %>%
  ggplot(aes(delta_marginal, label)) +
  geom_col(aes(fill = delta_marginal > 0), show.legend = FALSE) +
  geom_vline(xintercept = 0, linetype = "dashed") +
  scale_fill_manual(values = c(`TRUE` = "#2e6260", `FALSE` = "#7b132b")) +
  scale_x_continuous(labels = label_number(accuracy = 0.0001)) +
  labs(x = expression(Delta * R^2 ~ "(marginal)"),
       y = NULL,
       title = "Change in marginal R² from adding each financial variable")

Figure 1: Change in marginal R² from adding each financial variable

Show code

results %>%
  filter(p_value < 0.1) %>%
  mutate(
    label = fct_reorder(label, coef),
    lower = coef - 1.96 * se,
    upper = coef + 1.96 * se
  ) %>%
  ggplot(aes(coef, label)) +
  geom_vline(xintercept = 0, linetype = "dashed", colour = "grey50") +
  geom_pointrange(aes(xmin = lower, xmax = upper), colour = "#2e6260",
                  linewidth = 0.8, size = 0.8) +
  labs(x = "Coefficient estimate (95% CI)", y = NULL,
       title = "Significant or marginally significant financial predictors")

Figure 2: Coefficient estimates and 95% CIs for financial variable additions

5 Multi-Variable Models

Based on the single-variable results, we test combinations of the most promising financial variables.

Show code

# Select variables that improved marginal R² or were significant
sig_vars <- results %>%
  filter(p_value < 0.05, delta_marginal > 0) %>%
  arrange(desc(delta_marginal))

cat("Significant improvers (p < 0.05, positive ΔR²):\n")

Significant improvers (p < 0.05, positive ΔR²):

Show code

for (i in seq_len(nrow(sig_vars))) {
  cat(sprintf("  %s: ΔR²(m) = %+.4f, coef = %.4f, p = %.4f\n",
              sig_vars$label[i], sig_vars$delta_marginal[i],
              sig_vars$coef[i], sig_vars$p_value[i]))
}

  Teaching/expenditure ratio: ΔR²(m) = +0.0084, coef = 0.0689, p = 0.0000
  Total expenditure pp (log): ΔR²(m) = +0.0014, coef = -0.0163, p = 0.0015
  Grant funding pp (log): ΔR²(m) = +0.0013, coef = -0.0245, p = 0.0001
  Total income pp (log): ΔR²(m) = +0.0011, coef = -0.0199, p = 0.0006
  Staff costs pp (log): ΔR²(m) = +0.0008, coef = -0.0093, p = 0.0157

5.1 Model with all significant financial variables

Show code

if (nrow(sig_vars) > 0) {
  # Build formula with all significant vars
  fin_terms <- paste(sig_vars$term, collapse = " + ")
  multi_formula <- update(base_formula, paste("~ . +", fin_terms))

  # Filter data
  d_multi <- d_fin
  for (term in sig_vars$term) {
    if (grepl("^log\\(", term)) {
      inner_var <- gsub("^log\\((.+)\\)$", "\\1", term)
      d_multi <- d_multi %>% filter(!is.na(.data[[inner_var]]), .data[[inner_var]] > 0)
    } else {
      d_multi <- d_multi %>% filter(!is.na(.data[[term]]), is.finite(.data[[term]]))
    }
  }
  d_multi <- droplevels(d_multi)
  contrasts(d_multi$OFSTEDRATING_1) <- contr.treatment(levels(d_multi$OFSTEDRATING_1))

  mod_multi <- lmer(
    multi_formula, data = d_multi, REML = TRUE, na.action = na.exclude,
    control = lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 20000))
  )

  r2_multi <- r2(mod_multi)
  cat("Combined model R² (marginal):", round(r2_multi$R2_marginal, 4), "\n")
  cat("Combined model R² (conditional):", round(r2_multi$R2_conditional, 4), "\n")
  cat("Baseline R² (marginal):", round(r2_baseline$R2_marginal, 4), "\n")
  cat("Improvement:", sprintf("%+.4f", r2_multi$R2_marginal - r2_baseline$R2_marginal), "\n")
  cat("Singular?", isSingular(mod_multi), "\n")
  cat("Observations:", nrow(d_multi), "\n\n")

  summary(mod_multi)
} else {
  cat("No financial variables significantly improved the baseline model.\n")
}

Combined model R² (marginal): 0.6552 
Combined model R² (conditional): 0.7972 
Baseline R² (marginal): 0.6454 
Improvement: +0.0097 
Singular? FALSE 
Observations: 8916

Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: multi_formula
   Data: d_multi
Control: lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 20000))

REML criterion at convergence: -17708.4

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-48.736  -0.483   0.024   0.542   5.276 

Random effects:
 Groups          Name        Variance  Std.Dev.
 LANAME:gor_name (Intercept) 0.0008404 0.02899 
 gor_name        (Intercept) 0.0002928 0.01711 
 OFSTEDRATING_1  (Intercept) 0.0022490 0.04742 
 year_label      (Intercept) 0.0019534 0.04420 
 Residual                    0.0076204 0.08729 
Number of obs: 8916, groups:  
LANAME:gor_name, 152; gor_name, 9; OFSTEDRATING_1, 4; year_label, 4

Fixed effects:
                                           Estimate Std. Error         df
(Intercept)                               4.730e+00  6.926e-02  1.215e+02
log(PTFSM6CLA1A)                         -6.569e-02  2.979e-03  8.024e+03
log(PERCTOT)                             -2.098e-01  5.148e-03  8.888e+03
log(PNUMEAL)                              7.277e-03  1.283e-03  6.910e+03
PTPRIORLO                                -5.879e-03  1.511e-04  8.662e+03
ADMPOL_PTOTHER NON SEL                   -1.520e-03  7.727e-03  1.430e+03
ADMPOL_PTSEL                              1.037e-01  7.215e-03  8.376e+03
gorard_segregation                       -7.739e-02  4.947e-02  4.592e+02
remained_in_the_same_school               3.892e-04  5.349e-05  8.844e+03
teachers_on_leadership_pay_range_percent -1.314e-03  2.213e-04  8.872e+03
log(average_number_of_days_taken)        -1.486e-02  2.571e-03  8.838e+03
fin_teaching_expenditure_ratio            1.014e-01  1.884e-02  8.872e+03
log(fin_total_expenditure_pp)             3.992e-02  1.712e-02  8.855e+03
log(fin_grant_funding_pp)                -3.488e-02  1.690e-02  8.869e+03
log(fin_total_income_pp)                  6.436e-03  1.654e-02  8.838e+03
log(fin_staff_costs_pp)                  -2.904e-02  9.929e-03  8.835e+03
                                         t value Pr(>|t|)    
(Intercept)                               68.294  < 2e-16 ***
log(PTFSM6CLA1A)                         -22.055  < 2e-16 ***
log(PERCTOT)                             -40.746  < 2e-16 ***
log(PNUMEAL)                               5.673 1.46e-08 ***
PTPRIORLO                                -38.899  < 2e-16 ***
ADMPOL_PTOTHER NON SEL                    -0.197  0.84410    
ADMPOL_PTSEL                              14.377  < 2e-16 ***
gorard_segregation                        -1.564  0.11845    
remained_in_the_same_school                7.277 3.69e-13 ***
teachers_on_leadership_pay_range_percent  -5.939 2.98e-09 ***
log(average_number_of_days_taken)         -5.781 7.67e-09 ***
fin_teaching_expenditure_ratio             5.384 7.49e-08 ***
log(fin_total_expenditure_pp)              2.332  0.01970 *  
log(fin_grant_funding_pp)                 -2.064  0.03901 *  
log(fin_total_income_pp)                   0.389  0.69711    
log(fin_staff_costs_pp)                   -2.925  0.00346 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

6 Disadvantaged Pupils

Do financial variables help predict outcomes for disadvantaged pupils specifically?

Show code

d_disadv <- d_fin %>%
  filter(!is.na(ATT8SCR_FSM6CLA1A), ATT8SCR_FSM6CLA1A > 0) %>%
  droplevels()
contrasts(d_disadv$OFSTEDRATING_1) <- contr.treatment(levels(d_disadv$OFSTEDRATING_1))

mod_disadv_base <- lmer(
  log(ATT8SCR_FSM6CLA1A) ~
    log(PTFSM6CLA1A) + log(PERCTOT) + log(PNUMEAL) +
    PTPRIORLO + ADMPOL_PT + gorard_segregation +
    remained_in_the_same_school +
    teachers_on_leadership_pay_range_percent +
    log(average_number_of_days_taken) +
    (1 | year_label) + (1 | OFSTEDRATING_1) + (1 | gor_name/LANAME),
  data = d_disadv, REML = TRUE, na.action = na.exclude,
  control = lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 20000))
)

r2_disadv_base <- r2(mod_disadv_base)
cat("Disadvantaged baseline R² (m):", round(r2_disadv_base$R2_marginal, 4), "\n")

Disadvantaged baseline R² (m): 0.5461

Show code

cat("Disadvantaged baseline R² (c):", round(r2_disadv_base$R2_conditional, 4), "\n")

Disadvantaged baseline R² (c): 0.7206

Show code

results_disadv <- tibble(
  term = character(), label = character(),
  r2_marginal = numeric(), r2_conditional = numeric(),
  delta_marginal = numeric(), delta_conditional = numeric(),
  coef = numeric(), se = numeric(), p_value = numeric()
)

disadv_base_formula <- log(ATT8SCR_FSM6CLA1A) ~
  log(PTFSM6CLA1A) + log(PERCTOT) + log(PNUMEAL) +
  PTPRIORLO + ADMPOL_PT + gorard_segregation +
  remained_in_the_same_school +
  teachers_on_leadership_pay_range_percent +
  log(average_number_of_days_taken) +
  (1 | year_label) + (1 | OFSTEDRATING_1) + (1 | gor_name/LANAME)

for (term in names(candidates)) {
  ext_formula <- update(disadv_base_formula, paste("~ . +", term))

  d_sub <- d_disadv
  if (grepl("^log\\(", term)) {
    inner_var <- gsub("^log\\((.+)\\)$", "\\1", term)
    d_sub <- d_sub %>% filter(!is.na(.data[[inner_var]]), .data[[inner_var]] > 0)
  } else {
    d_sub <- d_sub %>% filter(!is.na(.data[[term]]), is.finite(.data[[term]]))
  }
  d_sub <- droplevels(d_sub)
  contrasts(d_sub$OFSTEDRATING_1) <- contr.treatment(levels(d_sub$OFSTEDRATING_1))

  tryCatch({
    mod <- lmer(
      ext_formula, data = d_sub, REML = TRUE, na.action = na.exclude,
      control = lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 20000))
    )
    r2_ext <- r2(mod)
    coefs <- summary(mod)$coefficients
    row_match <- grep(gsub("[()]", "", term), rownames(coefs), fixed = FALSE)
    if (length(row_match) == 0) row_match <- nrow(coefs)

    results_disadv <- bind_rows(results_disadv, tibble(
      term = term, label = candidates[[term]],
      r2_marginal = r2_ext$R2_marginal, r2_conditional = r2_ext$R2_conditional,
      delta_marginal = r2_ext$R2_marginal - r2_disadv_base$R2_marginal,
      delta_conditional = r2_ext$R2_conditional - r2_disadv_base$R2_conditional,
      coef = coefs[row_match[1], "Estimate"],
      se = coefs[row_match[1], "Std. Error"],
      p_value = coefs[row_match[1], "Pr(>|t|)"]
    ))
  }, error = function(e) message("  Failed: ", term))
}

Show code

results_disadv %>%
  mutate(
    `R² (m)` = sprintf("%.4f", r2_marginal),
    `ΔR² (m)` = sprintf("%+.4f", delta_marginal),
    Coef = sprintf("%.4f", coef),
    p = case_when(
      p_value < 0.001 ~ "< 0.001",
      TRUE ~ sprintf("%.3f", p_value)
    ),
    Sig = case_when(
      p_value < 0.001 ~ "***", p_value < 0.01 ~ "**",
      p_value < 0.05 ~ "*", p_value < 0.1 ~ ".", TRUE ~ ""
    )
  ) %>%
  select(Variable = label, `R² (m)`, `ΔR² (m)`, Coef, p, Sig) %>%
  kable()

Table 3: Financial variable effects on disadvantaged pupil attainment

Variable	R² (m)	ΔR² (m)	Coef	p	Sig
Total income pp (log)	0.5461	-0.0000	0.0008	0.913
Total expenditure pp (log)	0.5463	+0.0001	-0.0010	0.888
Staff costs pp (log)	0.5463	+0.0001	-0.0030	0.574
Teaching staff pp (log)	0.5495	+0.0033	0.0167	0.012	*
Grant funding pp (log)	0.5461	-0.0000	-0.0010	0.904
Premises costs pp (log)	0.5457	-0.0005	-0.0015	0.432
Staff/income ratio	0.5462	+0.0000	-0.0016	0.839
Teaching/expenditure ratio	0.5509	+0.0047	0.0732	< 0.001	***
Balance/income ratio	0.5461	-0.0001	-0.0012	0.840

7 Non-Disadvantaged Pupils

Show code

d_nondisadv <- d_fin %>%
  filter(!is.na(ATT8SCR_NFSM6CLA1A), ATT8SCR_NFSM6CLA1A > 0) %>%
  droplevels()
contrasts(d_nondisadv$OFSTEDRATING_1) <- contr.treatment(levels(d_nondisadv$OFSTEDRATING_1))

mod_nondisadv_base <- lmer(
  log(ATT8SCR_NFSM6CLA1A) ~
    log(PTFSM6CLA1A) + log(PERCTOT) + log(PNUMEAL) +
    PTPRIORLO + ADMPOL_PT + gorard_segregation +
    remained_in_the_same_school +
    teachers_on_leadership_pay_range_percent +
    log(average_number_of_days_taken) +
    (1 | year_label) + (1 | OFSTEDRATING_1) + (1 | gor_name/LANAME),
  data = d_nondisadv, REML = TRUE, na.action = na.exclude,
  control = lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 20000))
)

r2_nondisadv_base <- r2(mod_nondisadv_base)
cat("Non-disadvantaged baseline R² (m):", round(r2_nondisadv_base$R2_marginal, 4), "\n")

Non-disadvantaged baseline R² (m): 0.6131

Show code

cat("Non-disadvantaged baseline R² (c):", round(r2_nondisadv_base$R2_conditional, 4), "\n")

Non-disadvantaged baseline R² (c): 0.7881

Show code

results_nondisadv <- tibble(
  term = character(), label = character(),
  r2_marginal = numeric(), r2_conditional = numeric(),
  delta_marginal = numeric(), delta_conditional = numeric(),
  coef = numeric(), se = numeric(), p_value = numeric()
)

nondisadv_base_formula <- log(ATT8SCR_NFSM6CLA1A) ~
  log(PTFSM6CLA1A) + log(PERCTOT) + log(PNUMEAL) +
  PTPRIORLO + ADMPOL_PT + gorard_segregation +
  remained_in_the_same_school +
  teachers_on_leadership_pay_range_percent +
  log(average_number_of_days_taken) +
  (1 | year_label) + (1 | OFSTEDRATING_1) + (1 | gor_name/LANAME)

for (term in names(candidates)) {
  ext_formula <- update(nondisadv_base_formula, paste("~ . +", term))

  d_sub <- d_nondisadv
  if (grepl("^log\\(", term)) {
    inner_var <- gsub("^log\\((.+)\\)$", "\\1", term)
    d_sub <- d_sub %>% filter(!is.na(.data[[inner_var]]), .data[[inner_var]] > 0)
  } else {
    d_sub <- d_sub %>% filter(!is.na(.data[[term]]), is.finite(.data[[term]]))
  }
  d_sub <- droplevels(d_sub)
  contrasts(d_sub$OFSTEDRATING_1) <- contr.treatment(levels(d_sub$OFSTEDRATING_1))

  tryCatch({
    mod <- lmer(
      ext_formula, data = d_sub, REML = TRUE, na.action = na.exclude,
      control = lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 20000))
    )
    r2_ext <- r2(mod)
    coefs <- summary(mod)$coefficients
    row_match <- grep(gsub("[()]", "", term), rownames(coefs), fixed = FALSE)
    if (length(row_match) == 0) row_match <- nrow(coefs)

    results_nondisadv <- bind_rows(results_nondisadv, tibble(
      term = term, label = candidates[[term]],
      r2_marginal = r2_ext$R2_marginal, r2_conditional = r2_ext$R2_conditional,
      delta_marginal = r2_ext$R2_marginal - r2_nondisadv_base$R2_marginal,
      delta_conditional = r2_ext$R2_conditional - r2_nondisadv_base$R2_conditional,
      coef = coefs[row_match[1], "Estimate"],
      se = coefs[row_match[1], "Std. Error"],
      p_value = coefs[row_match[1], "Pr(>|t|)"]
    ))
  }, error = function(e) message("  Failed: ", term))
}

Show code

results_nondisadv %>%
  mutate(
    `R² (m)` = sprintf("%.4f", r2_marginal),
    `ΔR² (m)` = sprintf("%+.4f", delta_marginal),
    Coef = sprintf("%.4f", coef),
    p = case_when(
      p_value < 0.001 ~ "< 0.001",
      TRUE ~ sprintf("%.3f", p_value)
    ),
    Sig = case_when(
      p_value < 0.001 ~ "***", p_value < 0.01 ~ "**",
      p_value < 0.05 ~ "*", p_value < 0.1 ~ ".", TRUE ~ ""
    )
  ) %>%
  select(Variable = label, `R² (m)`, `ΔR² (m)`, Coef, p, Sig) %>%
  kable()

Table 4: Financial variable effects on non-disadvantaged pupil attainment

Variable	R² (m)	ΔR² (m)	Coef	p	Sig
Total income pp (log)	0.6137	+0.0005	-0.0131	0.007	**
Total expenditure pp (log)	0.6142	+0.0011	-0.0121	0.005	**
Staff costs pp (log)	0.6138	+0.0007	-0.0074	0.028	*
Teaching staff pp (log)	0.6171	+0.0039	0.0025	0.554
Grant funding pp (log)	0.6139	+0.0008	-0.0177	< 0.001	***
Premises costs pp (log)	0.6133	+0.0001	0.0008	0.517
Staff/income ratio	0.6131	-0.0000	0.0007	0.895
Teaching/expenditure ratio	0.6181	+0.0050	0.0369	0.004	**
Balance/income ratio	0.6131	+0.0000	0.0008	0.830

8 Comparison Across Outcomes

Show code

all_results <- bind_rows(
  results %>% mutate(outcome = "All Pupils"),
  results_disadv %>% mutate(outcome = "Disadvantaged"),
  results_nondisadv %>% mutate(outcome = "Non-Disadvantaged")
)

all_results %>%
  mutate(label = fct_reorder(label, delta_marginal, .fun = mean)) %>%
  ggplot(aes(delta_marginal, label, colour = outcome)) +
  geom_vline(xintercept = 0, linetype = "dashed", colour = "grey50") +
  geom_point(size = 3, position = position_dodge(width = 0.5)) +
  scale_colour_manual(values = c("#2e6260", "#4e3c56", "#abc766", "#e16fca", "#f9dd73"), name = "Outcome") +
  scale_x_continuous(labels = label_number(accuracy = 0.0001)) +
  labs(x = expression(Delta * R^2 ~ "(marginal)"), y = NULL,
       title = "Financial variable impact varies by pupil group") +
  theme(legend.position = "bottom")

Figure 3: Change in marginal R² by outcome group

Show code

all_results %>%
  mutate(
    sig = case_when(
      p_value < 0.001 ~ "***",
      p_value < 0.01  ~ "**",
      p_value < 0.05  ~ "*",
      p_value < 0.1   ~ ".",
      TRUE            ~ ""
    ),
    cell = paste0(sprintf("%+.4f", delta_marginal), " ", sig)
  ) %>%
  select(Variable = label, outcome, cell) %>%
  pivot_wider(names_from = outcome, values_from = cell) %>%
  kable()

Table 5: Significance of financial variables across all three outcomes

Variable	All Pupils	Disadvantaged	Non-Disadvantaged
Total income pp (log)	+0.0011 ***	-0.0000	+0.0005 **
Total expenditure pp (log)	+0.0014 **	+0.0001	+0.0011 **
Staff costs pp (log)	+0.0008 *	+0.0001	+0.0007 *
Teaching staff pp (log)	+0.0060	+0.0033 *	+0.0039
Grant funding pp (log)	+0.0013 ***	-0.0000	+0.0008 ***
Premises costs pp (log)	-0.0006	-0.0005	+0.0001
Staff/income ratio	-0.0001	+0.0000	-0.0000
Teaching/expenditure ratio	+0.0084 ***	+0.0047 ***	+0.0050 **
Balance/income ratio	-0.0000	-0.0001	+0.0000

9 Best Combined Model (if improvements found)

Show code

# Collect all significant improvers across all outcomes
all_sig <- all_results %>%
  filter(p_value < 0.05, delta_marginal > 0)

if (nrow(all_sig) > 0) {
  best_terms <- all_sig %>%
    group_by(term, label) %>%
    summarise(
      mean_delta = mean(delta_marginal),
      n_outcomes = n(),
      .groups = "drop"
    ) %>%
    arrange(desc(n_outcomes), desc(mean_delta))

  cat("Financial variables that significantly improved at least one outcome:\n\n")
  for (i in seq_len(nrow(best_terms))) {
    cat(sprintf("  %s: improved %d/3 outcomes, mean ΔR²(m) = %+.4f\n",
                best_terms$label[i], best_terms$n_outcomes[i],
                best_terms$mean_delta[i]))
  }

  # Fit the best combined model for All Pupils
  cat("\n\n--- Best combined model for All Pupils ---\n\n")

  best_fin_terms <- paste(best_terms$term, collapse = " + ")
  best_formula <- update(base_formula, paste("~ . +", best_fin_terms))

  d_best <- d_fin
  for (term in best_terms$term) {
    if (grepl("^log\\(", term)) {
      inner_var <- gsub("^log\\((.+)\\)$", "\\1", term)
      d_best <- d_best %>% filter(!is.na(.data[[inner_var]]), .data[[inner_var]] > 0)
    } else {
      d_best <- d_best %>% filter(!is.na(.data[[term]]), is.finite(.data[[term]]))
    }
  }
  d_best <- droplevels(d_best)
  contrasts(d_best$OFSTEDRATING_1) <- contr.treatment(levels(d_best$OFSTEDRATING_1))

  mod_best <- lmer(
    best_formula, data = d_best, REML = TRUE, na.action = na.exclude,
    control = lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 20000))
  )

  r2_best <- r2(mod_best)
  cat("Best model R² (marginal):", round(r2_best$R2_marginal, 4), "\n")
  cat("Best model R² (conditional):", round(r2_best$R2_conditional, 4), "\n")
  cat("Baseline R² (marginal):", round(r2_baseline$R2_marginal, 4), "\n")
  cat("Total improvement:", sprintf("%+.4f", r2_best$R2_marginal - r2_baseline$R2_marginal), "\n")
  cat("Observations:", nrow(d_best), "\n")
  cat("Singular?", isSingular(mod_best), "\n\n")

  summary(mod_best)
} else {
  cat("No financial variables significantly improved any of the three baseline models.\n")
  cat("This suggests that school-level financial characteristics are largely\n")
  cat("orthogonal to attainment outcomes once the existing predictors\n")
  cat("(deprivation, absence, prior attainment, workforce, segregation)\n")
  cat("are accounted for.\n")
}

Financial variables that significantly improved at least one outcome:

  Teaching/expenditure ratio: improved 3/3 outcomes, mean ΔR²(m) = +0.0061
  Total expenditure pp (log): improved 2/3 outcomes, mean ΔR²(m) = +0.0013
  Grant funding pp (log): improved 2/3 outcomes, mean ΔR²(m) = +0.0010
  Total income pp (log): improved 2/3 outcomes, mean ΔR²(m) = +0.0008
  Staff costs pp (log): improved 2/3 outcomes, mean ΔR²(m) = +0.0008
  Teaching staff pp (log): improved 1/3 outcomes, mean ΔR²(m) = +0.0033


--- Best combined model for All Pupils ---

Best model R² (marginal): 0.655 
Best model R² (conditional): 0.797 
Baseline R² (marginal): 0.6454 
Total improvement: +0.0095 
Observations: 8904 
Singular? FALSE

Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: best_formula
   Data: d_best
Control: lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 20000))

REML criterion at convergence: -17674.5

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-48.712  -0.481   0.024   0.541   5.306 

Random effects:
 Groups          Name        Variance  Std.Dev.
 LANAME:gor_name (Intercept) 0.0008395 0.02897 
 gor_name        (Intercept) 0.0002931 0.01712 
 OFSTEDRATING_1  (Intercept) 0.0022485 0.04742 
 year_label      (Intercept) 0.0019563 0.04423 
 Residual                    0.0076262 0.08733 
Number of obs: 8904, groups:  
LANAME:gor_name, 152; gor_name, 9; OFSTEDRATING_1, 4; year_label, 4

Fixed effects:
                                           Estimate Std. Error         df
(Intercept)                               4.752e+00  8.106e-02  2.255e+02
log(PTFSM6CLA1A)                         -6.559e-02  2.990e-03  8.032e+03
log(PERCTOT)                             -2.097e-01  5.152e-03  8.875e+03
log(PNUMEAL)                              7.271e-03  1.285e-03  6.881e+03
PTPRIORLO                                -5.883e-03  1.514e-04  8.643e+03
ADMPOL_PTOTHER NON SEL                   -1.131e-03  7.734e-03  1.426e+03
ADMPOL_PTSEL                              1.037e-01  7.229e-03  8.368e+03
gorard_segregation                       -7.492e-02  4.950e-02  4.594e+02
remained_in_the_same_school               3.908e-04  5.378e-05  8.826e+03
teachers_on_leadership_pay_range_percent -1.315e-03  2.214e-04  8.859e+03
log(average_number_of_days_taken)        -1.485e-02  2.576e-03  8.826e+03
fin_teaching_expenditure_ratio            7.539e-02  4.917e-02  8.826e+03
log(fin_total_expenditure_pp)             3.409e-02  2.848e-02  8.813e+03
log(fin_grant_funding_pp)                -3.464e-02  1.741e-02  8.862e+03
log(fin_total_income_pp)                  5.112e-03  1.676e-02  8.824e+03
log(fin_staff_costs_pp)                  -3.909e-02  1.695e-02  8.845e+03
log(fin_teaching_staff_pp)                1.676e-02  2.375e-02  8.793e+03
                                         t value Pr(>|t|)    
(Intercept)                               58.628  < 2e-16 ***
log(PTFSM6CLA1A)                         -21.938  < 2e-16 ***
log(PERCTOT)                             -40.699  < 2e-16 ***
log(PNUMEAL)                               5.657 1.60e-08 ***
PTPRIORLO                                -38.848  < 2e-16 ***
ADMPOL_PTOTHER NON SEL                    -0.146   0.8837    
ADMPOL_PTSEL                              14.351  < 2e-16 ***
gorard_segregation                        -1.513   0.1308    
remained_in_the_same_school                7.268 3.97e-13 ***
teachers_on_leadership_pay_range_percent  -5.937 3.01e-09 ***
log(average_number_of_days_taken)         -5.766 8.37e-09 ***
fin_teaching_expenditure_ratio             1.533   0.1252    
log(fin_total_expenditure_pp)              1.197   0.2314    
log(fin_grant_funding_pp)                 -1.989   0.0467 *  
log(fin_total_income_pp)                   0.305   0.7604    
log(fin_staff_costs_pp)                   -2.306   0.0211 *  
log(fin_teaching_staff_pp)                 0.705   0.4805    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

10 Summary

Show code

summary_rows <- tibble(
  Model = c("Baseline (no finance)", "Best financial model"),
  `R² marginal` = c(
    round(r2_baseline$R2_marginal, 4),
    if (exists("mod_best")) round(r2_best$R2_marginal, 4) else NA
  ),
  `R² conditional` = c(
    round(r2_baseline$R2_conditional, 4),
    if (exists("mod_best")) round(r2_best$R2_conditional, 4) else NA
  )
)

if (exists("mod_multi") && nrow(sig_vars) > 0) {
  summary_rows <- bind_rows(
    summary_rows[1, ],
    tibble(
      Model = "All significant financial vars",
      `R² marginal` = round(r2_multi$R2_marginal, 4),
      `R² conditional` = round(r2_multi$R2_conditional, 4)
    ),
    summary_rows[2, ]
  )
}

summary_rows %>%
  filter(!is.na(`R² marginal`)) %>%
  kable()

Table 6: R² comparison: baseline vs best financial model

Model	R² marginal	R² conditional
Baseline (no finance)	0.6454	0.7904
All significant financial vars	0.6552	0.7972
Best financial model	0.6550	0.7970

Interpretation notes

Marginal R² captures variance explained by fixed effects only. This is the main comparison metric since financial variables enter as fixed effects.
Conditional R² includes random effects (Ofsted, region/LA, year). Changes here indicate whether financial data absorbs variance that was previously captured by the random structure.
Even small R² improvements can be substantively interesting if the coefficient is interpretable and policy-relevant.
Financial variables may be collinear with existing predictors (e.g. deprivation drives both FSM% and per-pupil funding via the National Funding Formula), so null results don’t mean finance is unimportant — just that its signal is already captured.

11 SEN / EHCP Experiments

Do special educational needs (SEN) and Education, Health and Care Plan (EHCP) variables add predictive power beyond the 9-variable baseline?

The panel data includes four school-level SEN variables from the school census:

Variable	Description
`PSENELSE`	% pupils with an EHC plan
`TSENELSE`	Number of pupils with an EHC plan
`PSENELK`	% pupils with SEN support (but no EHC plan)
`TSENELK`	Number of pupils with SEN support

We test each variable as a single addition to the baseline model, following the same methodology as the financial experiments above. The baseline here uses average_number_of_days_taken (raw, not log-transformed) to align with the main model specification.

11.1 SEN data availability

Show code

sen_vars <- c("PSENELSE", "TSENELSE", "PSENELK", "TSENELK")

model_data %>%
  summarise(
    across(
      all_of(sen_vars),
      list(
        n_valid = ~sum(!is.na(.x)),
        pct_valid = ~round(mean(!is.na(.x)) * 100, 1),
        Median = ~median(.x, na.rm = TRUE),
        SD = ~sd(.x, na.rm = TRUE)
      ),
      .names = "{.col}|{.fn}"
    )
  ) %>%
  pivot_longer(everything(), names_to = c("Variable", "Stat"), names_sep = "\\|") %>%
  pivot_wider(names_from = Stat, values_from = value) %>%
  mutate(
    Label = var_label(Variable),
    Median = round(Median, 2),
    SD = round(SD, 2),
    n_valid = comma(n_valid)
  ) %>%
  select(Variable, Label, n_valid, pct_valid, Median, SD) %>%
  kable()

Table 7: SEN variable availability in the model dataset

Variable	Label	n_valid	pct_valid	Median	SD
PSENELSE	% pupils with EHC plan	12,210	100	2.3	1.71
TSENELSE	No. pupils with EHC plan	12,210	100	24.0	18.27
PSENELK	% pupils with SEN support	12,210	100	12.8	5.61
TSENELK	No. pupils with SEN support	12,210	100	131.0	66.27

11.2 SEN dataset

We create a dataset with complete SEN data for fair comparison. The baseline is re-fitted on this same subset using the raw (not log-transformed) average_number_of_days_taken.

Show code

d_sen <- model_data %>%
  filter(
    !is.na(PSENELSE), !is.na(PSENELK),
    !is.na(TSENELSE), !is.na(TSENELK)
  ) %>%
  droplevels()

contrasts(d_sen$OFSTEDRATING_1) <-
  contr.treatment(levels(d_sen$OFSTEDRATING_1))

cat("SEN-complete dataset:", nrow(d_sen), "rows\n")

SEN-complete dataset: 12210 rows

Show code

cat("Years:", paste(sort(unique(as.character(d_sen$year_label))), collapse = ", "), "\n")

Years: 2021-22, 2022-23, 2023-24, 2024-25

Show code

cat("LAs:", n_distinct(d_sen$LANAME), "\n")

LAs: 152

Show code

cat("Schools:", n_distinct(d_sen$URN), "\n")

Schools: 3311

11.3 Baseline model (SEN subset)

Show code

mod_sen_baseline <- lmer(
  log(ATT8SCR) ~
    log(PTFSM6CLA1A) + log(PERCTOT) + log(PNUMEAL) +
    PTPRIORLO + ADMPOL_PT + gorard_segregation +
    remained_in_the_same_school +
    teachers_on_leadership_pay_range_percent +
    average_number_of_days_taken +
    (1 | year_label) + (1 | OFSTEDRATING_1) + (1 | gor_name/LANAME),
  data = d_sen,
  REML = TRUE,
  na.action = na.exclude,
  control = lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 20000))
)

r2_sen_baseline <- r2(mod_sen_baseline)
cat("SEN baseline R² (marginal):", round(r2_sen_baseline$R2_marginal, 4), "\n")

SEN baseline R² (marginal): 0.6317

Show code

cat("SEN baseline R² (conditional):", round(r2_sen_baseline$R2_conditional, 4), "\n")

SEN baseline R² (conditional): 0.7712

Show code

cat("Singular?", isSingular(mod_sen_baseline), "\n")

Singular? FALSE

11.4 Single-variable SEN additions

Show code

sen_candidates <- c("PSENELSE", "TSENELSE", "PSENELK", "TSENELK")

base_formula_sen <- log(ATT8SCR) ~
  log(PTFSM6CLA1A) + log(PERCTOT) + log(PNUMEAL) +
  PTPRIORLO + ADMPOL_PT + gorard_segregation +
  remained_in_the_same_school +
  teachers_on_leadership_pay_range_percent +
  average_number_of_days_taken +
  (1 | year_label) + (1 | OFSTEDRATING_1) + (1 | gor_name/LANAME)

sen_results <- map_dfr(sen_candidates, function(term) {
  fml <- update(base_formula_sen, paste("~ . +", term))

  mod <- tryCatch(
    lmer(fml, data = d_sen, REML = TRUE, na.action = na.exclude,
         control = lmerControl(optimizer = "bobyqa",
                               optCtrl = list(maxfun = 20000))),
    error = function(e) NULL
  )

  if (is.null(mod)) {
    return(tibble(term = term, label = var_label(term),
                  coef = NA, se = NA, t_value = NA, p_value = NA,
                  r2_marginal = NA, r2_conditional = NA,
                  delta_marginal = NA, delta_conditional = NA,
                  singular = NA))
  }

  r2_ext <- r2(mod)
  coefs <- summary(mod)$coefficients
  row <- which(rownames(coefs) == term)

  tibble(
    term           = term,
    label          = var_label(term),
    coef           = coefs[row, "Estimate"],
    se             = coefs[row, "Std. Error"],
    t_value        = coefs[row, "t value"],
    p_value        = coefs[row, "Pr(>|t|)"],
    r2_marginal    = r2_ext$R2_marginal,
    r2_conditional = r2_ext$R2_conditional,
    delta_marginal    = r2_ext$R2_marginal - r2_sen_baseline$R2_marginal,
    delta_conditional = r2_ext$R2_conditional - r2_sen_baseline$R2_conditional,
    singular       = isSingular(mod)
  )
})

11.4.1 Results table

Show code

sen_results %>%
  mutate(
    coef  = round(coef, 5),
    se    = round(se, 5),
    t_value = round(t_value, 2),
    p_value = round(p_value, 4),
    r2_marginal    = round(r2_marginal, 4),
    r2_conditional = round(r2_conditional, 4),
    delta_marginal    = round(delta_marginal, 5),
    delta_conditional = round(delta_conditional, 5),
    sig = case_when(
      p_value < 0.001 ~ "***",
      p_value < 0.01  ~ "**",
      p_value < 0.05  ~ "*",
      p_value < 0.1   ~ ".",
      TRUE            ~ ""
    )
  ) %>%
  select(Variable = term, Label = label, Coef = coef, SE = se,
         `t` = t_value, `p` = p_value, Sig = sig,
         `R² marg.` = r2_marginal, `ΔR² marg.` = delta_marginal) %>%
  kable()

Table 8: Effect of adding SEN/EHCP variables to the baseline model

Variable	Label	Coef	SE	t	p	Sig	R² marg.	ΔR² marg.
PSENELSE	% pupils with EHC plan	-0.00249	0.00060	-4.12	0.0000	***	0.6307	-0.00101
TSENELSE	No. pupils with EHC plan	-0.00012	0.00006	-1.93	0.0538	.	0.6315	-0.00023
PSENELK	% pupils with SEN support	-0.00196	0.00019	-10.05	0.0000	***	0.6343	0.00259
TSENELK	No. pupils with SEN support	-0.00009	0.00002	-5.16	0.0000	***	0.6339	0.00224

11.4.2 R² improvement plot

Show code

sen_results %>%
  filter(!is.na(delta_marginal)) %>%
  mutate(label = fct_reorder(label, delta_marginal)) %>%
  ggplot(aes(delta_marginal, label)) +
  geom_col(aes(fill = delta_marginal > 0), show.legend = FALSE) +
  geom_vline(xintercept = 0, linetype = "dashed") +
  scale_fill_manual(values = c(`TRUE` = "#2e6260", `FALSE` = "#7b132b")) +
  scale_x_continuous(labels = label_number(accuracy = 0.0001)) +
  labs(x = expression(Delta * R^2 ~ "(marginal)"),
       y = NULL,
       title = "Change in marginal R² from adding each SEN/EHCP variable")

Figure 4: Change in marginal R² from adding each SEN/EHCP variable

11.4.3 Coefficient estimates

Show code

sen_results %>%
  filter(!is.na(coef)) %>%
  mutate(
    label = fct_reorder(label, coef),
    lower = coef - 1.96 * se,
    upper = coef + 1.96 * se
  ) %>%
  ggplot(aes(coef, label)) +
  geom_vline(xintercept = 0, linetype = "dashed", colour = "grey50") +
  geom_pointrange(aes(xmin = lower, xmax = upper), colour = "#2e6260",
                  linewidth = 0.8, size = 0.8) +
  labs(x = "Coefficient estimate (95% CI)", y = NULL,
       title = "SEN/EHCP variable coefficients")

Figure 5: Coefficient estimates and 95% CIs for SEN/EHCP variable additions

11.5 Multi-variable SEN model

If any SEN variables are significant (p < 0.05) and improve marginal R², we fit them together.

Show code

sen_sig <- sen_results %>%
  filter(p_value < 0.05, delta_marginal > 0)

cat("Significant SEN improvers:", nrow(sen_sig), "\n")

Significant SEN improvers: 2

Show code

if (nrow(sen_sig) > 0) {
  cat("Variables:", paste(sen_sig$term, collapse = ", "), "\n")
}

Variables: PSENELK, TSENELK

Show code

if (nrow(sen_sig) > 1) {
  multi_terms <- paste(sen_sig$term, collapse = " + ")
  fml_multi <- update(base_formula_sen, paste("~ . +", multi_terms))

  mod_sen_multi <- lmer(
    fml_multi, data = d_sen, REML = TRUE, na.action = na.exclude,
    control = lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 20000))
  )

  r2_sen_multi <- r2(mod_sen_multi)
  cat("Multi-SEN model R² (marginal):",
      round(r2_sen_multi$R2_marginal, 4), "\n")
  cat("Multi-SEN model R² (conditional):",
      round(r2_sen_multi$R2_conditional, 4), "\n")
  cat("ΔR² marginal:",
      round(r2_sen_multi$R2_marginal - r2_sen_baseline$R2_marginal, 5), "\n")

  cat("\nCoefficients:\n")
  print(summary(mod_sen_multi)$coefficients[sen_sig$term, , drop = FALSE])
} else if (nrow(sen_sig) == 1) {
  cat("\nOnly one significant SEN variable — no multi-variable model needed.\n")
  cat("The single-variable result above is the best SEN model.\n")
} else {
  cat("\nNo SEN variables significantly improve the baseline model.\n")
}

Multi-SEN model R² (marginal): 0.6324 
Multi-SEN model R² (conditional): 0.7731 
ΔR² marginal: 0.00068 

Coefficients:
             Estimate   Std. Error       df   t value     Pr(>|t|)
PSENELK -0.0032831261 3.297113e-04 12159.61 -9.957578 2.868785e-23
TSENELK  0.0001482161 2.979569e-05 12174.08  4.974415 6.633826e-07

11.6 SEN summary

Show code

sen_summary <- tibble(
  Model = "Baseline (no SEN)",
  `R² marginal` = round(r2_sen_baseline$R2_marginal, 4),
  `R² conditional` = round(r2_sen_baseline$R2_conditional, 4)
)

# Add best single-variable result
best_single <- sen_results %>% filter(!is.na(delta_marginal)) %>%
  slice_max(delta_marginal, n = 1)

if (nrow(best_single) > 0 && best_single$delta_marginal > 0) {
  sen_summary <- bind_rows(
    sen_summary,
    tibble(
      Model = paste0("+ ", best_single$label, " (best single)"),
      `R² marginal` = round(best_single$r2_marginal, 4),
      `R² conditional` = round(best_single$r2_conditional, 4)
    )
  )
}

if (exists("mod_sen_multi") && nrow(sen_sig) > 1) {
  sen_summary <- bind_rows(
    sen_summary,
    tibble(
      Model = paste0("+ all significant SEN vars (",
                     paste(sen_sig$term, collapse = " + "), ")"),
      `R² marginal` = round(r2_sen_multi$R2_marginal, 4),
      `R² conditional` = round(r2_sen_multi$R2_conditional, 4)
    )
  )
}

sen_summary %>% kable()

Table 9: R² comparison: baseline vs SEN-augmented models

Model	R² marginal	R² conditional
Baseline (no SEN)	0.6317	0.7712
+ % pupils with SEN support (best single)	0.6343	0.7724
+ all significant SEN vars (PSENELK + TSENELK)	0.6324	0.7731

SEN interpretation notes

The percentage variables (PSENELSE, PSENELK) are more interpretable than counts because they adjust for school size.
SEN prevalence is strongly correlated with deprivation (PTFSM6CLA1A), so these variables may not add much beyond what FSM% already captures.
A significant negative coefficient on PSENELSE (% with EHCP) would suggest that schools with more EHCP pupils have lower attainment even after controlling for deprivation, absence, prior attainment, and other predictors — potentially reflecting the severity of need rather than school effectiveness.
Count variables (TSENELSE, TSENELK) may partly proxy for school size and should be interpreted with caution.