####R scripts for illustrative examples in "Aberrant distortion of variance components in multilevel models under conflation of level-specific effects" ##Preliminaries: packages to load (and install if needed) library(lme4) library(lmerTest) library(rockchalk) library(r2mlm) #r2mlmlong function (will soon be available in r2mlm package, but pasted here for convenience) #see r2mlm package for documentation r2mlm_long_manual <- function(data, covs, random_covs, clusterID, gammas, Tau, sigma2, bargraph = TRUE) { if (is.null(covs) == FALSE) { centered_data <- gmc(data, covs, clusterID) phi_w <- var(centered_data[, c(paste0(covs, "_dev"))]) phi_b <- var(centered_data[, c(paste0(covs, "_mn"))]) gammas <- matrix(c(gammas), ncol = 1) f1 <- t(gammas) %*% phi_w %*% gammas f2 <- t(gammas) %*% phi_b %*% gammas } else{ f1 <- 0 f2 <- 0 } if (is.null(random_covs) == FALSE) { centered_data_rand <- gmc(data, random_covs, clusterID) Sig_w <- var(centered_data_rand[, c(paste0(random_covs, "_dev"))]) Sig_b <- var(centered_data_rand[, c(paste0(random_covs, "_mn"))]) m_mat <- matrix(c(colMeans(cbind(1, data[, c(random_covs)]))), ncol = 1) v1 <- sum(diag(Tau[2:nrow(Tau), 2:nrow(Tau)] %*% Sig_w)) v2 <- sum(diag(Tau[2:nrow(Tau), 2:nrow(Tau)] %*% Sig_b)) } else{ v1 <- 0 v2 <- 0 m_mat <- 1 } m <- t(m_mat) %*% Tau %*% m_mat sigma <- mean(sigma2) #decompositions decomp_fixed_within <- f1 / sum(f1, f2, v1, v2, m, sigma) decomp_fixed_between <- f2 / sum(f1, f2, v1, v2, m, sigma) decomp_varslopes_within <- v1 / sum(f1, f2, v1, v2, m, sigma) decomp_varslopes_between <- v2 / sum(f1, f2, v1, v2, m, sigma) decomp_varmeans <- m / sum(f1, f2, v1, v2, m, sigma) decomp_sigma <- sigma / sum(f1, f2, v1, v2, m, sigma) decomp_fixed_within_w <- f1 / sum(f1, v1, sigma) decomp_fixed_between_b <- f2 / sum(f2, v2, m) decomp_varslopes_within_w <- v1 / sum(f1, v1, sigma) decomp_varslopes_between_b <- v2 / sum(f2, v2, m) decomp_varmeans_b <- m / sum(f2, v2, m) decomp_sigma_w <- sigma / sum(f1, v1, sigma) #barchart if (bargraph == TRUE) { contributions_stacked <- matrix( c( decomp_fixed_within, decomp_fixed_between, decomp_varslopes_within, decomp_varslopes_between, decomp_varmeans, decomp_sigma, decomp_fixed_within_w, 0, decomp_varslopes_within_w, 0, 0, decomp_sigma_w, 0, decomp_fixed_between_b, 0, decomp_varslopes_between_b, decomp_varmeans_b, 0 ), 6, 3 ) colnames(contributions_stacked) <- c("total", "within", "between") rownames(contributions_stacked) <- c( "fixed slopes (within)", "fixed slopes (between)", "slope variation (within)", "slope variation (between)", "intercept variation (between)", "residual (within)" ) barplot( contributions_stacked, main = "Decomposition", horiz = FALSE, ylim = c(0, 1), col = c( "darkred", "steelblue", "darkred", "steelblue", "midnightblue", "white" ), ylab = "proportion of variance", density = c(NA, NA, 30, 40, 40, NA), angle = c(0, 45, 0, 90, 135, 0), xlim = c(0, 1.5), width = c(.3, .3) ) legend( 1.1, .65, legend = rownames(contributions_stacked), fill = c( "darkred", "steelblue", "darkred", "steelblue", "midnightblue", "white" ), cex = .7, pt.cex = 1, xpd = T, density = c(NA, NA, 30, 40, 40, NA), angle = c(0, 45, 0, 90, 135, 0) ) } #create tables for output decomp_table <- matrix( c( decomp_fixed_within, decomp_fixed_between, decomp_varslopes_within, decomp_varslopes_between, decomp_varmeans, decomp_sigma, decomp_fixed_within_w, "NA", decomp_varslopes_within_w, "NA", "NA", decomp_sigma_w, "NA", decomp_fixed_between_b, "NA", decomp_varslopes_between_b, decomp_varmeans_b, "NA" ), 6, 3 ) colnames(decomp_table) <- c("total", "within", "between") rownames(decomp_table) <- c( "fixed slopes (within)", "fixed slopes (between)", "slope variation (within)", "slope variation (between)", "intercept variation (between)", "residual (within)" ) R2_table <- matrix( c( decomp_fixed_within, decomp_fixed_between, decomp_varslopes_within, decomp_varslopes_between, decomp_varmeans, decomp_fixed_within + decomp_fixed_between, decomp_fixed_within + decomp_fixed_between + decomp_varslopes_within + decomp_varslopes_between, decomp_fixed_within + decomp_fixed_between + decomp_varslopes_within + decomp_varslopes_between + decomp_varmeans, decomp_fixed_within_w, "NA", decomp_varslopes_within_w, "NA", "NA", "NA", decomp_fixed_within_w + decomp_varslopes_within_w, "NA", "NA", decomp_fixed_between_b, "NA", decomp_varslopes_between_b, decomp_varmeans_b, "NA", decomp_fixed_between_b + decomp_varslopes_between_b, "NA" ), 8, 3 ) colnames(R2_table) <- c("total", "within", "between") rownames(R2_table) <- c("f1", "f2", "v1", "v2", "m", "f", "fv", "fvm") Output <- list(noquote(decomp_table), noquote(R2_table)) names(Output) <- c("Decompositions", "R2s") return(Output) } ##Illustration #1: level-2 variance increasing after adding predictor { ##read in "salary" data salary_exdat <- read.table("salary_exdat.txt") #salary = starting salary out of college (in thousands) #GPA = college grade point average #smcGPA = school-mean-centered GPA #smGPA = school-mean GPA #grandmcGPA = grand-mean-centered GPA #grandmcsmGPA = grand-mean-centered school-mean GPA ##fit null model fit_null_salary <- lmer(salary ~ 1 + (1 | schoolID),REML=T, data = salary_exdat) summary(fit_null_salary) ##fit unconflated model (both variances decrease from null model) fit_unconflated_salary <- lmer(salary ~ 1 + smcGPA + grandmcsmGPA + (1 | schoolID),REML=T, data=salary_exdat) summary(fit_unconflated_salary) ##fit conflated model (level-2 variance increases from null model) fit_conflated_salary <- lmer(salary ~ 1 + grandmcGPA + (1 | schoolID),REML=T, data=salary_exdat) summary(fit_conflated_salary) } ##illustration #2: level-1 variance increasing after adding predictor { ##read in "symptoms" data (available at https://www.pilesofvariance.com/index.html) symptoms_exdat <- read.table("MPLUS_Chapter8.csv", header=F, sep=",") colnames(symptoms_exdat) <- c( "PersonID","women","baseage","session","studyday","dayweek","weekend","symp","mood","stress", "PMmood","PMstr","age80","mood2","WPmood","PMmood2","WPstr","PMstr40") #group mean center time (for consistency with manuscript) #note that there is very little between-person variation in session, and hence this is nearly equivalent to grand-mean-centering symptoms_exdat <- gmc(symptoms_exdat,"session","PersonID") #symp = measure of physical symptoms severity #session_dev = session number (person-mean-centered) #mood = measure of one's negative mood at a given session #PMmood = person-mean mood #WPmood = person-mean-centered mood ##fit unconditional model (time only) fit_unconditional_symptoms <- lmer(symp~1+session_dev+(1|PersonID),REML=T, data=symptoms_exdat) summary(fit_unconditional_symptoms) ##fit unconflated model (both variances decrease from the unconditional model); PMmood grand-mean-centered in syntax fit_unconflated_symptoms <- lmer(symp~1+session_dev+I(PMmood-mean(PMmood))+WPmood+(1|PersonID),REML=T, data=symptoms_exdat,control=lmerControl(optimizer="bobyqa")) summary(fit_unconflated_symptoms) ##fit conflated model (level-1 variance increases from null model); mood grand-mean-centered in syntax fit_conflated_symptoms <- lmer(symp~1+session_dev+I(mood-mean(mood))+(1|PersonID),REML=T, data=symptoms_exdat,control=lmerControl(optimizer="bobyqa")) summary(fit_conflated_symptoms) } ##illustration #3: large residual ICC when there are no true between-cluster differences { ##read in "authority" data authority_exdat <- read.table("authority_exdat.txt") #authority = perceived authority within company (scale ranging from 1 to 10) #yearsworked = number of years spent working for company #cmcyearsworked = company-mean-centered number of years spent working for company #cmyearsworked = company-mean number of years spent working for company #grandmcyearsworked = grand-mean-centered number of years spent working for company #grandmccmyearsworked = grand-mean-centered company-mean number of years spent working for company ##fit unconflated model (estimates 0 between-company intercept variability, hence the appropriate warning message) fit_unconflated_authority <- lmer(authority ~ 1 + cmcyearsworked + cmyearsworked + (1 | companyID),REML=T, data=authority_exdat) summary(fit_unconflated_authority) #compute residual ICC (equal to 0) VarCorr(fit_unconflated_authority)$companyID[1]/(sigma(fit_unconflated_authority)^2+VarCorr(fit_unconflated_authority)$companyID[1]) ##fit conflated model (estimates relatively large degree of intercept variability) fit_conflated_authority <- lmer(authority ~ 1 + grandmcyearsworked + (1 | companyID),REML=T, data=authority_exdat) summary(fit_conflated_authority) #compute residual ICC (markedly non-zero) VarCorr(fit_conflated_authority)$companyID[1]/(sigma(fit_conflated_authority)^2+VarCorr(fit_conflated_authority)$companyID[1]) } ##illustration #4: R-squared distortion { ##fit null models (not part of above illustrations, but needed for certain R-squared computations) fit_null_symptoms <- lmer(symp ~ 1 + (1|PersonID), REML=T, data=symptoms_exdat) fit_null_authority <- lmer(authority ~ 1 + (1 | companyID), REML=T, data=authority_exdat) ##compute Rights & Sterba R-squared #unconflated salary model r2mlm_long_manual(salary_exdat,c("smcGPA","grandmcsmGPA"),NULL,"schoolID",summary(fit_unconflated_salary)$coef[2:3,1],VarCorr(fit_unconflated_salary)$schoolID[1],sigma(fit_unconflated_salary)^2) #conflated salary model r2mlm_long_manual(salary_exdat,c("grandmcGPA"),NULL,"schoolID",summary(fit_conflated_salary)$coef[2,1],VarCorr(fit_conflated_salary)$schoolID[1],sigma(fit_conflated_salary)^2) #unconflated symptoms model r2mlm_long_manual(symptoms_exdat,c("session_dev","PMmood","WPmood"),NULL,"PersonID",summary(fit_unconflated_symptoms)$coef[2:4,1],VarCorr(fit_unconflated_symptoms)$PersonID[1],sigma(fit_unconflated_symptoms)^2) #conflated symptoms model r2mlm_long_manual(symptoms_exdat,c("session_dev","mood"),NULL,"PersonID",summary(fit_conflated_symptoms)$coef[2:3,1],VarCorr(fit_conflated_symptoms)$PersonID[1],sigma(fit_conflated_symptoms)^2) #unconflated authority model r2mlm_long_manual(authority_exdat,c("cmcyearsworked","cmyearsworked"),NULL,"companyID",summary(fit_unconflated_authority)$coef[2:3,1],VarCorr(fit_unconflated_authority)$companyID[1],sigma(fit_unconflated_authority)^2) #conflated authority model r2mlm_long_manual(authority_exdat,c("grandmcyearsworked"),NULL,"companyID",summary(fit_conflated_authority)$coef[2,1],VarCorr(fit_conflated_authority)$companyID[1],sigma(fit_conflated_authority)^2) ##compute RaudenBush & Bryk L1 R-squared #unconflated salary model (sigma(fit_null_salary)^2-sigma(fit_unconflated_salary)^2)/sigma(fit_null_salary)^2 #conflated salary model (sigma(fit_null_salary)^2-sigma(fit_conflated_salary)^2)/sigma(fit_null_salary)^2 #unconflated symptoms model (sigma(fit_null_symptoms)^2-sigma(fit_unconflated_symptoms)^2)/sigma(fit_null_symptoms)^2 #conflated symptoms model (sigma(fit_null_symptoms)^2-sigma(fit_conflated_symptoms)^2)/sigma(fit_null_symptoms)^2 #unconflated authority model (sigma(fit_null_authority)^2-sigma(fit_unconflated_authority)^2)/sigma(fit_null_authority)^2 #conflated authority model (sigma(fit_null_authority)^2-sigma(fit_conflated_authority)^2)/sigma(fit_null_authority)^2 ##compute RaudenBush & Bryk L2 R-squared #unconflated salary model (VarCorr(fit_null_salary)$schoolID[1]-VarCorr(fit_unconflated_salary)$schoolID[1])/VarCorr(fit_null_salary)$schoolID[1] #conflated salary model (VarCorr(fit_null_salary)$schoolID[1]-VarCorr(fit_conflated_salary)$schoolID[1])/VarCorr(fit_null_salary)$schoolID[1] #unconflated symptoms model (VarCorr(fit_null_symptoms)$PersonID[1]-VarCorr(fit_unconflated_symptoms)$PersonID[1])/VarCorr(fit_null_symptoms)$PersonID[1] #conflated symptoms model (VarCorr(fit_null_symptoms)$PersonID[1]-VarCorr(fit_conflated_symptoms)$PersonID[1])/VarCorr(fit_null_symptoms)$PersonID[1] #unconflated authority model (VarCorr(fit_null_authority)$companyID[1]-VarCorr(fit_unconflated_authority)$companyID[1])/VarCorr(fit_null_authority)$companyID[1] #conflated authority model (VarCorr(fit_null_authority)$companyID[1]-VarCorr(fit_conflated_authority)$companyID[1])/VarCorr(fit_null_authority)$companyID[1] ##compute Snijders & Bosker R-squared #unconflated salary model 1-(sigma(fit_unconflated_salary)^2+VarCorr(fit_unconflated_salary)$schoolID[1])/(sigma(fit_null_salary)^2+VarCorr(fit_null_salary)$schoolID[1]) #conflated salary model 1-(sigma(fit_conflated_salary)^2+VarCorr(fit_conflated_salary)$schoolID[1])/(sigma(fit_null_salary)^2+VarCorr(fit_null_salary)$schoolID[1]) #unconflated symptoms model 1-(sigma(fit_unconflated_symptoms)^2+VarCorr(fit_unconflated_symptoms)$PersonID[1])/(sigma(fit_null_symptoms)^2+VarCorr(fit_null_symptoms)$PersonID[1]) #conflated symptoms model 1-(sigma(fit_conflated_symptoms)^2+VarCorr(fit_conflated_symptoms)$PersonID[1])/(sigma(fit_null_symptoms)^2+VarCorr(fit_null_symptoms)$PersonID[1]) #unconflated authority model 1-(sigma(fit_unconflated_authority)^2+VarCorr(fit_unconflated_authority)$companyID[1])/(sigma(fit_null_authority)^2+VarCorr(fit_null_authority)$companyID[1]) #conflated authority model 1-(sigma(fit_conflated_authority)^2+VarCorr(fit_conflated_authority)$companyID[1])/(sigma(fit_null_authority)^2+VarCorr(fit_null_authority)$companyID[1]) } ##illustration #5: increased variance estimation for other coefficients { ##read in "quality" data quality_exdat <- read.table("quality_exdat.txt") #quality = rating of overall quality of care (ranging from 1 to 10) #program = binary variable indicating if hospital implemented program designed to increase quality of care (1 = yes, 0 = no) #SES = socioeconomic status (standardized) #hmcSES = hospital-mean-centered SES #hmSES = hospital-mean SES #fit unconflated model (slope of program is sig at alpha = .05) fit_unconflated_quality <- lmer(quality ~ 1 + program + hmcSES + hmSES + (1 | hospitalID),REML=T, data=quality_exdat) summary(fit_unconflated_quality) #fit conflated model (slope of program is non-sig at alpha = .05) fit_conflated_quality <- lmer(quality ~ 1 + program + SES + (1 | hospitalID),REML=T, data=quality_exdat) summary(fit_conflated_quality) }