############################################################################# ############ R Skript Visitor Intervention ########### ############################################################################# require(foreign) require(ggplot2) require(stats) require(car) require(plyr) require(pastecs) require(pscl) require(sandwich) require(MASS) require(emmeans) require(sjmisc) require(psych) require(lmtest) setwd("C:/Users/Susanne Gaube/Dropbox/01.Promotion/02. Research/05_HH Visitors Intervention") data <- read.spss("Data.sav", to.data.frame = T, use.value.labels = F) # Looking at data file head(data) table(data$Condition) # Removing intervals in which usage rates > 100 to NA and removing intervals without traffic dat <- data summary(dat$Rate > 100) dat[which(dat$Rate > 100), "Rate"] <- NA dat <- dat[!is.na(dat$Rate),] summary(dat$Rate) round(sum(dat$Traffic), 0) round(sum(dat$Usage), 0) sum(dat$DispenserIn) sum(dat$DispenserOut) round(describe(dat$Rate),1) round(describe(dat.c$Rate),1) round(describe(dat.e$Rate),1) # Including variable for median split round(describe(dat$Traffic),1) dat$H.L.Traffic <- dicho(dat$Traffic, dich.by = "median") #Including variable for relative increases dat$Increase <- (dat$Rate * 100 / 6.0) -100 Increase_Mean <- round(tapply(dat$Increase, dat$Condition, mean), 1) # Labeling Control (Baseline) and Experimental group is.even <- function(x) x %% 2 == 0 dat$Group <- "Control" dat$Group[is.even(dat$Condition)] <- "Experimental" ## Looking at dispenser usage rate hist(dat$Rate, main="Histogram for Dispenser Usage Rates", xlab="Dispenser Usage Rates in %", ylab="Number of 15-minute intervals ", xlim=c(0,100), breaks=50) ## -> not normally distributed #----------------------------------------------------------------------------------------------- # Looking whether the dispenser usage differes between the baseline weeks ## Creating a data file with only baseline week data dat.c <- dat[which(dat$Group == "Control"), ] dat.c$Condition <- as.factor(dat.c$Condition) dat.c$Condition <- factor(dat.c$Condition, levels = c(1,3,5,7,9,11,13), labels = c("W1_Baseline", "W2_Baseline", "W3_Baseline", "W4_Baseline", "W5_Baseline", "W6_Baseline", "W7_Baseline")) ## Descriptive statistics sum(dat.c$Traffic) by(dat.c$Traffic , dat.c$Condition, sum) sum(dat.c$Usage) round(by(dat.c$Usage , dat.c$Condition, sum), 0) round(tapply(dat.c$Rate, dat.c$Condition, mean),1) round(tapply(dat.c$Rate, dat.c$Condition, sd),1) table(dat.c$Condition) ## Distribution of dispenser usage rate hist(dat.c$Rate, main="Histogram for Usage Rates", xlab="Usage Rates", xlim=c(0,100), breaks=50) ## -> not normally distributed shapiro.test(dat.c$Rate) # not normally distributed ##Plotting the baseline weeks a <- data.frame(dat.c$Rate, dat.c$Condition) a$Rate <- as.numeric(dat.c$Rate) a$Condition <- as.factor(dat.c$Condition) ### mean used for the blue line in the graph round(mean(dat.c$Rate),2) round(sd(dat.c$Rate),2) graph.a <- ggplot(a, aes(Condition, Rate)) + stat_summary(fun.y = mean, geom = "point") + stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2) + theme_bw() + labs(x = "Baseline Weeks", y = "Mean Dispenser Usage in %") + geom_hline(yintercept = 6.0, color="blue", linetype = 2) + expand_limits(y=c(4,9)) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) graph.a ggsave("GaubeFig1.jpeg", width = 6.885, height = 5.16, dpi = 600) ggsave("Fig1.tiff", width = 6.885, height = 5.16, dpi = 600) # Regression Models only baseline weeks ## Normal linear regression -> not appropriate because data is not normally distributed normal_baseline <- lm(Rate ~ Condition, data = dat.c) summary.lm(normal_baseline) ## ANOVA -> not appropriate because data is not normally distributed car::leveneTest(dat.c$Rate, dat.c$Condition, center = mean) anova_aov_baseline <- aov(Rate ~ Condition, data = dat.c) summary.lm(anova_aov_baseline) ## Poisson regression poisson_baseline <- glm(Rate ~ Condition, data = dat.c, family = "poisson") summary(poisson_baseline) ### Testing for overdispersion dispersiontest(poisson_baseline) # there is overdispersion ## Negative binomial regression nbr_baseline <- glm.nb(Rate ~ Condition, data = dat.c) summary(nbr_baseline) BIC(nbr_baseline) logLik(nbr_baseline) ref_grid_nbr_baseline <- ref_grid(nbr_baseline) summary(.Last.ref_grid) post_hoc_nbr_baseline <- emmeans(nbr_baseline, ~ Condition, contr = "tukey") baseline_table <- round((est <- cbind(b = coef(nbr_baseline), se = coef(summary(nbr_baseline))[,2], z = coef(summary(nbr_baseline))[,3], p = coef(summary(nbr_baseline))[,4], eb = exp(coef(nbr_baseline)), CI = exp(confint(nbr_baseline)))),3) write.csv(baseline_table,'baseline_table.csv') #------------------------------------------------------------------------------------------------------- # Experimental weeks ## Creating a data file with only Experimental weeks dat.e <- dat[which(dat$Group == "Experimental"), ] ## Descriptive statistics round(sum(dat.e$Traffic),0) round(by(dat.e$Traffic , dat.e$Condition, sum),0) round(sum(dat.e$Usage), 0) round(by(dat.e$Usage , dat.e$Condition, sum), 0) round(tapply(dat.e$Rate, dat.e$Condition, mean), 1) round(tapply(dat.e$Rate, dat.e$Condition, sd), 1) table(dt$Exp) ##Plotting the Experimental weeks c <- data.frame(dat.e$Rate, dat.e$Conditionnames) c$Rate <- as.numeric(dat.e$Rate) c$Exp <- as.factor(dat.e$Conditionnames) graph.c <- ggplot(c, aes(Exp, Rate)) + stat_summary(fun.y = mean, geom = "point") + stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2) + theme_bw() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + labs(x = "Experimental Weeks", y = "Mean Dispenser Usage in %") + geom_hline(yintercept = 6.0, color="blue", linetype = 2) + expand_limits(y=c(4,9)) graph.c ggsave("GaubeFig2.jpeg", width = 6.885, height = 5.16, dpi = 600) ggsave("Fig2.tiff", width = 6.885, height = 5.16, dpi = 600) # Regression over the combined Baseline against all Experimental weeks ## Creating a data file with Baseline and Experimental weeks data dt <- data.frame(Rate = dat$Rate, Condition = dat$Condition, Group = dat$Group, Exp = dat$Conditionnames, Traffic = dat$Traffic) ## Normal linear regression -> not appropriate because data is not normally distributed normal <- lm(Rate ~ Exp, data = dt) summary.lm(normal) ## ANOVA -> not appropriate because data is not normally distributed car::leveneTest(dt$Rate, dt$Exp, center = mean) anova <- aov(Rate ~ Exp, data = dt) summary.lm(anova) ## Poisson regression poisson <- glm(Rate ~ Exp, data = dt, family = "poisson") summary(poisson) ### Testing for overdispersion dispersiontest(poisson) # there is overdispersion ## Negative binomial regression without traffic nbr <- glm.nb(Rate ~ Exp, data = dt) summary(nbr) BIC(nbr) logLik(nbr) ref_grid_nbr <- ref_grid(nbr) summary(.Last.ref_grid) post_hoc_nbr <- emmeans(nbr, ~ Exp, contr = "tukey") # only signs inlcuded dat.e$Conditionnames <- as.factor(dat.e$Conditionnames) nbr.s <- glm.nb(Rate ~ Conditionnames, data = dat.e) summary(nbr.s) ref_grid_nbr <- ref_grid(nbr.s) summary(.Last.ref_grid) post_hoc_nbr <- emmeans(nbr.s, ~ Conditionnames, contr = "tukey") table <- round((est <- cbind(b = coef(nbr), se = coef(summary(nbr))[,2], z = coef(summary(nbr))[,3], p = coef(summary(nbr))[,4], eb = exp(coef(nbr)), CI = exp(confint(nbr)))),3) write.csv(table,'experiment_table.csv') #------------------------------------------------------------------------------------------------------- # Testing the effect of Traffic ## Correlation between Traffic and Usage Rate corr.all <- cor.test(x=dat$Traffic, y=dat$Rate, method = 'pearson') corr.all corr.c <- cor.test(x=dat.c$Traffic, y=dat.c$Rate, method = 'pearson') corr.c corr.e <- cor.test(x=dat.e$Traffic, y=dat.e$Rate, method = 'pearson') corr.e #Baseline wtest.d.b <- wilcox.test(Rate ~ H.L.Traffic, data = dat.c, conf.int = TRUE) wtest.d.b #Reciprocity d.re <- subset(dat, Condition== 2) corr.re <- cor.test(x=d.re$Traffic, y=d.re$Rate, method = 'pearson') corr.re wtest.d.re <- wilcox.test(Rate ~ H.L.Traffic, data = d.re, conf.int = TRUE) wtest.d.re #Consistency d.co <- subset(dat, Condition== 4) corr.co <- cor.test(x=d.co$Traffic, y=d.co$Rate, method = 'pearson') corr.co wtest.d.co <- wilcox.test(Rate ~ H.L.Traffic, data = d.co, conf.int = TRUE) wtest.d.co #Unity d.un <- subset(dat, Condition== 6) corr.un <- cor.test(x=d.un$Traffic, y=d.un$Rate, method = 'pearson') corr.un wtest.d.un <- wilcox.test(Rate ~ H.L.Traffic, data = d.un, conf.int = TRUE) wtest.d.un #Social Proof d.sp <- subset(dat, Condition== 8) corr.sp <- cor.test(x=d.sp$Traffic, y=d.sp$Rate, method = 'pearson') corr.sp wtest.d.sp <- wilcox.test(Rate ~ H.L.Traffic, data = d.sp, conf.int = TRUE) wtest.d.sp #Liking d.li <- subset(dat, Condition== 10) corr.li <- cor.test(x=d.li$Traffic, y=d.li$Rate, method = 'pearson') corr.li wtest.d.li <- wilcox.test(Rate ~ H.L.Traffic, data = d.li, conf.int = TRUE) wtest.d.li #Authority d.au <- subset(dat, Condition== 12) corr.au <- cor.test(x=d.au$Traffic, y=d.au$Rate, method = 'pearson') corr.au wtest.d.au <- wilcox.test(Rate ~ H.L.Traffic, data = d.au, conf.int = TRUE) wtest.d.au #Scarcity d.sc <- subset(dat, Condition== 14) corr.sc <- cor.test(x=d.sc$Traffic, y=d.sc$Rate, method = 'pearson') corr.sc wtest.d.sc <- wilcox.test(Rate ~ H.L.Traffic, data = d.sc, conf.int = TRUE) wtest.d.sc #----------------------------------------------------------------------------------------------------- ## independent 2-group Mann-Whitney U Test for social-proof against its baseline dsp <- subset(dat, Condition== 7 | Condition == 8) shapiro.test(dsp$Rate) # not normal wtest.dsp <- wilcox.test(Rate ~ Condition, data = dsp, conf.int = TRUE) wtest.dsp ## independent 2-group Mann-Whitney U Test for authority against its baseline da <- subset(dat, Condition== 11 | Condition == 12) shapiro.test(da$Rate) # not normal wtest.da <-wilcox.test(Rate ~ Condition, data = da, conf.int = TRUE) wtest.da #------------------------------------------------------------------------------------- ##Relative increases of all Experimental conditions in comparison to the baseline level Increase_Mean <- round(tapply(dat$Increase, dat$Condition, mean), 1) install.packages("Publish") require(Publish) ci.mean(d.re$Increase) ci.mean(d.co$Increase) ci.mean(d.un$Increase) ci.mean(d.sp$Increase) ci.mean(d.li$Increase) ci.mean(d.au$Increase) ci.mean(d.sc$Increase) ci.mean(dat.e$Increase) #Relative increses from predecessor week d.au$Increase2 <- (d.au$Rate * 100 / 6.4) -100 ci.mean(d.au$Increase2) d.sp$Increase2 <- (d.sp$Rate * 100 / 6.1) -100 ci.mean(d.sp$Increase2) #---------------------------------------------------------------------- # Figures in perspective b <- round((6.0 * 17580 / 100),0) a <- round((7.7 * 17580 / 100),0) (a - b) * 52 # 15548