GLM for Between Subjects Data - R#

GLM for Normal Distribution Data#

  • Distribution: Normal

  • Link: identity

  • Typical Uses: Linear Regression: equivalent to linear (mixed) model (lm / lmm)

  • Reporting: “Figure 12a shows an interaction plot with ±1 standard deviation error bars for X1 and X2. An analysis of variance based on linear regression indicated a statistically significant effect on Y of X1 (F(1, 56) = 7.06, p < .05), but not of X2 (F(1, 56) = 1.02, n.s.). Also, the X1×X2 interaction was not statistically significant (F(1, 56) = 2.74, n.s.).”

# Example data
# df has subjects (S), two between-Ss factors (X1,X2) each w/levels (a,b), and continuous response (Y)
df <- read.csv("data/2F2LBs_normal.csv")
head(df, 20)
A data.frame: 20 × 4
SX1X2Y
<int><chr><chr><dbl>
1 1aa 9.587838
2 2ab 9.217284
3 3ba10.663489
4 4bb14.932123
5 5aa 5.157908
6 6ab11.025094
7 7ba10.741292
8 8bb 3.169121
9 9aa10.863238
1010ab 6.244845
1111ba14.949621
1212bb10.081778
1313aa 6.167002
1414ab 7.761871
1515ba12.335185
1616bb 8.212590
1717aa 6.552768
1818ab10.075766
1919ba13.765247
2020bb 9.038398
library(car) # for Anova
df$S = factor(df$S)
df$X1 = factor(df$X1)
df$X2 = factor(df$X2)
contrasts(df$X1) <- "contr.sum"
contrasts(df$X2) <- "contr.sum"
m = glm(Y ~ X1*X2, data=df, family=gaussian)
Anova(m, type=3, test.statistic="F")
Loading required package: carData
A anova: 4 × 4
Sum SqDfF valuesPr(>F)
<dbl><dbl><dbl><dbl>
X1 61.014582 17.0613310.01024486
X2 8.787594 11.0170050.31756830
X1:X2 23.639063 12.7357930.10371815
Residuals483.87715456 NA NA
library(ggplot2)
library(ggthemes)
library(scales)
library(plyr)

# Interaction plot
# http://www.sthda.com/english/wiki/ggplot2-line-plot-quick-start-guide-r-software-and-data-visualization
# http://www.sthda.com/english/wiki/ggplot2-error-bars-quick-start-guide-r-software-and-data-visualization
# http://www.sthda.com/english/wiki/ggplot2-point-shapes
# http://www.sthda.com/english/wiki/ggplot2-line-types-how-to-change-line-types-of-a-graph-in-r-software
df2 <- ddply(df, ~ X1*X2, function(d) # make a summary data table
  c(NROW(d$Y),
    sum(is.na(d$Y)),
    sum(!is.na(d$Y)),
    mean(d$Y, na.rm=TRUE),
    sd(d$Y, na.rm=TRUE),
    median(d$Y, na.rm=TRUE),
    IQR(d$Y, na.rm=TRUE)))
colnames(df2) <- c("X1","X2","Rows","NAs","NotNAs","Mean","SD","Median","IQR")
ggplot(data=df2, aes(x=X1, y=Mean, color=X2, group=X2)) + theme_minimal() + 
  # set the font styles for the plot title and axis titles
  theme(plot.title   = element_text(face="bold",  color="black", size=18, hjust=0.5, vjust=0.0, angle=0)) + 
  theme(axis.title.x = element_text(face="bold",  color="black", size=16, hjust=0.5, vjust=0.0, angle=0)) + 
  theme(axis.title.y = element_text(face="bold",  color="black", size=16, hjust=0.5, vjust=0.0, angle=90)) + 
  # set the font styles for the value labels that show on each axis
  theme(axis.text.x  = element_text(face="plain", color="black", size=14, hjust=0.5, vjust=0.0, angle=0)) + 
  theme(axis.text.y  = element_text(face="plain", color="black", size=12, hjust=0.0, vjust=0.5, angle=0)) + 
  # set the font styles for the legend
  theme(legend.title = element_text(face="bold", color="black", size=14, hjust=0.5, vjust=0.0, angle=0)) +
  theme(legend.text  = element_text(face="plain", color="black", size=14, hjust=0.5, vjust=0.0, angle=0)) +
  # create the plot lines, points, and error bars
  geom_line(aes(linetype=X2), position=position_dodge(0.05)) + 
  geom_point(aes(shape=X2, size=X2), position=position_dodge(0.05)) + 
  geom_errorbar(aes(ymin=Mean-SD, ymax=Mean+SD), position=position_dodge(0.05), width=0.1) + 
  # place text labels on each bar
  geom_text(aes(label=sprintf("%.2f (±%.2f)", Mean, SD)), position=position_dodge(0.05), hjust=0.0, vjust=-1.0, color="black", size=3.5) +
  # set the labels for the title and each axis
  labs(title="Y by X1, X2", x="X1", y="Y") + 
  # set the ranges and value labels for each axis
  scale_x_discrete(labels=c("a","b")) + 
  scale_y_continuous(breaks=seq(0,14,by=2), labels=seq(0,14,by=2), limits=c(0,14), oob=rescale_none) + 
  # set the name, labels, and colors for the traces
  scale_color_manual(name="X2", labels=c("a","b"), values=c("red", "blue")) +
  # set the size and shape of the points
  scale_size_manual(values=c(4,4)) +
  scale_shape_manual(values=c(16,10)) +
  # set the linetype of the lines
  scale_linetype_manual(values=c("solid", "longdash"))
../../_images/glm-bs-r_4_0.png

GLM for Binomial Distribution Data#

  • Distribution: Binomial

  • Link: logit

  • Typical Uses: Logistic Regression: dichotomous responses (i.e. nominal responses with two categories)

  • Reporting: “Figure 13a shows the number of ‘x’ and ‘y’ outcomes for each level of X1 and X2. An analysis of variance based on logistic regression indicated a statistically significant effect on Y of X1 (χ2(1, N=60) = 6.05, p < .05) and of the X1×X2 interaction (χ2(1, N=60) = 8.63, p < .01), but not of X2 (χ2(1, N=60) = 2.27, n.s.)”

# Example data
# df has subjects (S), two factors (X1,X2) each w/levels (a,b), and dichotomous response (Y)
df <- read.csv("data/2F2LBs_binomial.csv")
head(df, 20)
A data.frame: 20 × 4
SX1X2Y
<int><chr><chr><chr>
1 1aax
2 2aby
3 3bay
4 4bby
5 5aax
6 6aby
7 7bay
8 8bbx
9 9aax
1010aby
1111bax
1212bbx
1313aax
1414abx
1515bax
1616bbx
1717aay
1818abx
1919bay
2020bbx
library(car) # for Anova
df$S = factor(df$S)
df$X1 = factor(df$X1)
df$X2 = factor(df$X2)
df$Y = factor(df$Y) # nominal response
contrasts(df$X1) <- "contr.sum"
contrasts(df$X2) <- "contr.sum"
m = glm(Y ~ X1*X2, data=df, family=binomial)
Anova(m, type=3)
A anova: 3 × 3
LR ChisqDfPr(>Chisq)
<dbl><dbl><dbl>
X16.04579610.013939447
X22.26806310.132064859
X1:X28.63101810.003304868
library(ggplot2)
library(ggthemes)
library(scales)
library(plyr)

# Barplot
# http://www.sthda.com/english/wiki/ggplot2-barplots-quick-start-guide-r-software-and-data-visualization
# https://stackoverflow.com/questions/51892875/how-to-increase-the-space-between-grouped-bars-in-ggplot2
df2 <- as.data.frame(xtabs(~ X1+X2+Y, data=df))
df2$X12 = with(df2, interaction(X1, X2))
df2$X12 = factor(df2$X12, levels=c("a.a","a.b","b.a","b.b"))
df2 <- df2[order(df2$X1, df2$X2),] # sort df2 alphabetically by X1, X2
ggplot(data=df2, aes(y=Freq, x=X12, fill=Y)) + theme_minimal() + 
  # set the font styles for the plot title and axis titles
  theme(plot.title   = element_text(face="bold",  color="black", size=18, hjust=0.5, vjust=0.0, angle=0)) + 
  theme(axis.title.x = element_text(face="bold",  color="black", size=16, hjust=0.5, vjust=0.0, angle=0)) + 
  theme(axis.title.y = element_text(face="bold",  color="black", size=16, hjust=0.5, vjust=0.0, angle=90)) + 
  # set the font styles for the value labels that show on each axis
  theme(axis.text.x  = element_text(face="plain", color="black", size=14, hjust=0.5, vjust=0.0, angle=0)) + 
  theme(axis.text.y  = element_text(face="plain", color="black", size=12, hjust=0.0, vjust=0.5, angle=0)) + 
  # set the font styles for the legend
  theme(legend.title = element_text(face="bold", color="black", size=14, hjust=0.5, vjust=0.0, angle=0)) +
  theme(legend.text  = element_text(face="plain", color="black", size=14, hjust=0.5, vjust=0.0, angle=0)) +
  # create the barplots side-by-side
  geom_col(width=0.75, position=position_dodge(width=0.75), alpha=0.60) +
  # place text labels on each bar
  geom_text(aes(label=Freq), position=position_dodge(width=0.75), vjust=1.2, color="black", size=3.5) +
  # set the labels for the title and each axis
  labs(title="Y by X1, X2", x="X1.X2", y="Count") + 
  # change the order of the bars on the x-axis
  scale_x_discrete(limits=c("a.a","a.b","b.a","b.b")) + 
  # set the scale, breaks, and labels on the y-axis
  scale_y_continuous(breaks=seq(0,17.5,by=5), labels=seq(0,17.5,by=5), limits=c(0,17.5), oob=rescale_none) + 
  # set the name, labels, and colors for the boxes
  scale_fill_manual(name="Y", labels=c("x","y"), values=c("#69b3a2","#404080"))
../../_images/glm-bs-r_8_0.png

GLM for Multinomial Distribution Data#

  • Distribution: Multinomial

  • Link: logit

  • Typical Uses: Multinomial Logistic Regression: polytomous responses (i.e. nominal responses with more two categories)

  • Reporting: “Figure 14a shows the number of ‘x’, ‘y’, and ‘z’ outcomes for each level of X1 and X2. An analysis of variance based on multinomial logistic regression indicated a statistically significant effect on Y of X1 (χ2(2, N=60) = 10.46, p < .01), of X2 (χ2(2, N=60) = 15.21, p < .001), and a marginal X1×X2 interaction (χ2(2, N=60) = 5.09, p = .078).”

# Example data
# df has subjects (S), two between-Ss factors (X1,X2) each w/levels (a,b), and polytomous response (Y)
df <- read.csv("data/2F2LBs_multinomial.csv")
head(df, 20)
A data.frame: 20 × 4
SX1X2Y
<int><chr><chr><chr>
1 1aax
2 2abz
3 3bax
4 4bbx
5 5aax
6 6abx
7 7bax
8 8bbx
9 9aay
1010abx
1111bax
1212bbz
1313aax
1414aby
1515bax
1616bbx
1717aax
1818abx
1919bax
2020bbx
library(nnet) # for multinom
library(car) # for Anova
df$S = factor(df$S)
df$X1 = factor(df$X1)
df$X2 = factor(df$X2)
df$Y = factor(df$Y) # nominal response
contrasts(df$X1) <- "contr.sum"
contrasts(df$X2) <- "contr.sum"
m = multinom(Y ~ X1*X2, data=df)
Anova(m, type=3)
# weights:  15 (8 variable)
initial  value 65.916737 
iter  10 value 41.209129
iter  20 value 41.110241
iter  30 value 41.109293
final  value 41.109253 
converged
A anova: 3 × 3
LR ChisqDfPr(>Chisq)
<dbl><dbl><dbl>
X110.45628420.0053634826
X215.21210420.0004974317
X1:X2 5.09223420.0783854422
library(ggplot2)
library(ggthemes)
library(scales)
library(plyr)

# Barplot
# http://www.sthda.com/english/wiki/ggplot2-barplots-quick-start-guide-r-software-and-data-visualization
# https://stackoverflow.com/questions/51892875/how-to-increase-the-space-between-grouped-bars-in-ggplot2
df2 <- as.data.frame(xtabs(~ X1+X2+Y, data=df)) # build a freq table
df2$X12 = with(df2, interaction(X1, X2))
df2$X12 = factor(df2$X12, levels=c("a.a","a.b","b.a","b.b"))
df2 <- df2[order(df2$X1, df2$X2),] # sort df2 alphabetically by X1, X2
ggplot(data=df2, aes(x=X12, y=Freq, fill=Y)) + theme_minimal() + 
  # set the font styles for the plot title and axis titles
  theme(plot.title   = element_text(face="bold",  color="black", size=18, hjust=0.5, vjust=0.0, angle=0)) + 
  theme(axis.title.x = element_text(face="bold",  color="black", size=16, hjust=0.5, vjust=0.0, angle=0)) + 
  theme(axis.title.y = element_text(face="bold",  color="black", size=16, hjust=0.5, vjust=0.0, angle=90)) + 
  # set the font styles for the value labels that show on each axis
  theme(axis.text.x  = element_text(face="plain", color="black", size=14, hjust=0.5, vjust=0.0, angle=0)) + 
  theme(axis.text.y  = element_text(face="plain", color="black", size=12, hjust=0.0, vjust=0.5, angle=0)) + 
  # set the font styles for the legend
  theme(legend.title = element_text(face="bold", color="black", size=14, hjust=0.5, vjust=0.0, angle=0)) +
  theme(legend.text  = element_text(face="plain", color="black", size=14, hjust=0.5, vjust=0.0, angle=0)) +
  # create the barplots side-by-side
  geom_col(width=0.75, position=position_dodge(width=0.75), alpha=0.60) +
  # place text labels on each bar
  geom_text(aes(label=Freq), position=position_dodge(width=0.75), vjust=1.2, color="black", size=3.5) +
  # set the labels for the title and each axis
  labs(title="Y by X1, X2", x="X1.X2", y="Count") + 
  # change the order of the bars on the x-axis
  scale_x_discrete(limits=c("a.a","a.b","b.a","b.b")) + 
  # set the scale, breaks, and labels on the y-axis
  scale_y_continuous(breaks=seq(0,15,by=5), labels=seq(0,15,by=5), limits=c(0,15), oob=rescale_none) + 
  # set the name, labels, and colors for the boxes
  scale_fill_manual(name="Y", labels=c("x","y","z"), values=c("#69b3a2","#404080","#e69f00"))
../../_images/glm-bs-r_12_0.png

GLM for Ordinal Distribution Data#

  • Distribution: Ordinal

  • Link: cumulative logit

  • Typical Uses: Ordinal Logistic Regression: ordinal responses (i.e. Likert scales)

  • Reporting: “Figure 15a shows the distribution of Likert responses (1-7) for each combination of X1 and X2. An analysis of variance based on ordinal logistic regression indicated a statistically significant effect on Y of X2 (χ2(1, N=60) = 6.14, p < .05), but not of X1 (χ2(1, N=60) = 1.65, n.s.) or of the X1×X2 interaction (χ2(1, N=60) = 0.05, n.s.).”

# Example data
# df has subjects (S), two between-Ss factors (X1,X2) each w/levels (a,b), and polytomous response (Y)
df <- read.csv("data/2F2LBs_ordinal.csv")
head(df, 20)
A data.frame: 20 × 4
SX1X2Y
<int><chr><chr><int>
1 1aa7
2 2ab3
3 3ba3
4 4bb2
5 5aa5
6 6ab4
7 7ba3
8 8bb6
9 9aa4
1010ab2
1111ba6
1212bb3
1313aa2
1414ab5
1515ba5
1616bb1
1717aa7
1818ab5
1919ba2
2020bb4
library(MASS) # for polr
library(car) # for Anova
df$S = factor(df$S)
df$X1 = factor(df$X1)
df$X2 = factor(df$X2)
df$Y = ordered(df$Y) # ordinal response
contrasts(df$X1) <- "contr.sum"
contrasts(df$X2) <- "contr.sum"
m = polr(Y ~ X1*X2, data=df, Hess=TRUE)
Anova(m, type=3)
A anova: 3 × 3
LR ChisqDfPr(>Chisq)
<dbl><dbl><dbl>
X11.6510900810.19881063
X26.1412253810.01320658
X1:X20.0498017610.82340859
library(ggplot2)
library(ggthemes)
library(scales)
library(plyr)

# Ordinal histograms-as-barplots
# http://www.sthda.com/english/wiki/ggplot2-barplots-quick-start-guide-r-software-and-data-visualization
# https://stackoverflow.com/questions/51892875/how-to-increase-the-space-between-grouped-bars-in-ggplot2
df2 <- as.data.frame(xtabs(~ X1+X2+Y, data=df)) # build a freq table
df2$X12 = with(df2, interaction(X1, X2))
df2$X12 = factor(df2$X12, levels=c("a.a","a.b","b.a","b.b"))
df2 <- df2[order(df2$X1, df2$X2),] # sort df2 alphabetically by X1, X2
ggplot(data=df2, aes(x=Y, y=Freq, fill=X12)) + theme_minimal() + 
  # set the font styles for the plot title and axis titles
  theme(plot.title   = element_text(face="bold",  color="black", size=18, hjust=0.5, vjust=0.0, angle=0)) + 
  theme(axis.title.x = element_text(face="bold",  color="black", size=16, hjust=0.5, vjust=0.0, angle=0)) + 
  theme(axis.title.y = element_text(face="bold",  color="black", size=14, hjust=0.5, vjust=0.0, angle=90)) + 
  # set the font styles for the value labels that show on each axis
  theme(axis.text.x  = element_text(face="plain", color="black", size=12, hjust=0.5, vjust=0.0, angle=0)) + 
  theme(axis.text.y  = element_text(face="plain", color="black", size=12, hjust=0.0, vjust=0.5, angle=0)) + 
  # set the font styles for the legend
  theme(legend.title = element_text(face="bold", color="black", size=14, hjust=0.5, vjust=0.0, angle=0)) +
  theme(legend.text  = element_text(face="plain", color="black", size=14, hjust=0.5, vjust=0.0, angle=0)) +
  # set the font style for the facet labels
  theme(strip.text = element_text(face="bold", color="black", size=14, hjust=0.5)) + 
  # use a bar plot to just plot the value for each 1-7 in (X1,X2)
  geom_col(width=0.95, alpha=0.60) + 
  #geom_bar(stat="identity", width=0.95, alpha=0.60) + # equivalent
  # place text labels on each bar
  geom_text(aes(label=Freq), vjust=1.2, color="black", size=3.5) +
  # create a grid of plots by (X1,X2), one for each histogram
  #facet_grid(X12 ~ .) +  # 4x1 stack
  #facet_grid(. ~ X12) +  # 1x4 row
  facet_grid(X2 ~ X1) +   # 2x2 grid
  # determine the fill color values of each histogram
  scale_fill_manual(values=c("#69b3a2","#404080", "#e69f00", "darkred")) + 
  # set the labels for the title, each axis, and the legend
  labs(title="Responses by X1, X2", x="Likert (1-7)", y="Count") + 
  guides(fill=guide_legend(title="X1.X2")) + 
  # set the ranges and value labels for each axis
  scale_x_discrete(labels=seq(1,7,by=1)) +
  scale_y_continuous(breaks=seq(0,6,by=1), minor_breaks=seq(0,6,by=1), labels=seq(0,6,by=1), limits=c(0,6))
../../_images/glm-bs-r_16_0.png

GLM for Poisson Distribution Data#

  • Distribution: Poisson

  • Link: log

  • Typical Uses: Poisson Regression: counts, rare events (e.g. gesture recognition errors, 3-pointers per quarter, number of “F” grades)

  • Reporting: “Figure 16a shows an interaction plot with ±1 standard deviation error bars for X1 and X2. An analysis of variance based on Poisson regression indicated a statistically significant effect on Y of the X1×X2 interaction (χ2(1, N=60) = 3.84, p < .05), but not of either X1 (χ2(1, N=60) = 0.17, n.s.) or X2 (χ2(1, N=60) = 1.19, n.s.).”

# Example data
# df has subjects (S), two factors (X1,X2) each w/levels (a,b), and count response (Y)
df <- read.csv("data/2F2LBs_poisson.csv")
head(df, 20)
A data.frame: 20 × 4
SX1X2Y
<int><chr><chr><int>
1 1aa5
2 2ab6
3 3ba7
4 4bb4
5 5aa4
6 6ab6
7 7ba6
8 8bb4
9 9aa4
1010ab9
1111ba7
1212bb2
1313aa3
1414ab3
1515ba3
1616bb7
1717aa5
1818ab4
1919ba8
2020bb3
library(car) # for Anova
df$S = factor(df$S)
df$X1 = factor(df$X1)
df$X2 = factor(df$X2)
contrasts(df$X1) <- "contr.sum"
contrasts(df$X2) <- "contr.sum"
m = glm(Y ~ X1*X2, data=df, family=poisson)
Anova(m, type=3)
A anova: 3 × 3
LR ChisqDfPr(>Chisq)
<dbl><dbl><dbl>
X10.167382910.68244826
X21.186558510.27602485
X1:X23.842344010.04997361
library(ggplot2)
library(ggthemes)
library(scales)
library(plyr)

# Interaction plot
# http://www.sthda.com/english/wiki/ggplot2-line-plot-quick-start-guide-r-software-and-data-visualization
# http://www.sthda.com/english/wiki/ggplot2-error-bars-quick-start-guide-r-software-and-data-visualization
# http://www.sthda.com/english/wiki/ggplot2-point-shapes
# http://www.sthda.com/english/wiki/ggplot2-line-types-how-to-change-line-types-of-a-graph-in-r-software
df2 <- ddply(df, ~ X1*X2, function(d) # make a summary data table
  c(NROW(d$Y),
    sum(is.na(d$Y)),
    sum(!is.na(d$Y)),
    mean(d$Y, na.rm=TRUE),
    sd(d$Y, na.rm=TRUE),
    median(d$Y, na.rm=TRUE),
    IQR(d$Y, na.rm=TRUE)))
colnames(df2) <- c("X1","X2","Rows","NAs","NotNAs","Mean","SD","Median","IQR")
ggplot(data=df2, aes(x=X1, y=Mean, color=X2, group=X2)) + theme_minimal() + 
  # set the font styles for the plot title and axis titles
  theme(plot.title   = element_text(face="bold",  color="black", size=18, hjust=0.5, vjust=0.0, angle=0)) + 
  theme(axis.title.x = element_text(face="bold",  color="black", size=16, hjust=0.5, vjust=0.0, angle=0)) + 
  theme(axis.title.y = element_text(face="bold",  color="black", size=16, hjust=0.5, vjust=0.0, angle=90)) + 
  # set the font styles for the value labels that show on each axis
  theme(axis.text.x  = element_text(face="plain", color="black", size=14, hjust=0.5, vjust=0.0, angle=0)) + 
  theme(axis.text.y  = element_text(face="plain", color="black", size=12, hjust=0.0, vjust=0.5, angle=0)) + 
  # set the font styles for the legend
  theme(legend.title = element_text(face="bold", color="black", size=14, hjust=0.5, vjust=0.0, angle=0)) +
  theme(legend.text  = element_text(face="plain", color="black", size=14, hjust=0.5, vjust=0.0, angle=0)) +
  # create the plot lines, points, and error bars
  geom_line(aes(linetype=X2), position=position_dodge(0.05)) + 
  geom_point(aes(shape=X2, size=X2), position=position_dodge(0.05)) + 
  geom_errorbar(aes(ymin=Mean-SD, ymax=Mean+SD), position=position_dodge(0.05), width=0.1) + 
  # place text labels on each bar
  geom_text(aes(label=sprintf("%.2f (±%.2f)", Mean, SD)), position=position_dodge(0.05), hjust=0.0, vjust=-1.0, color="black", size=3.5) +
  # set the labels for the title and each axis
  labs(title="Y by X1, X2", x="X1", y="Y") + 
  # set the ranges and value labels for each axis
  scale_x_discrete(labels=c("a","b")) + 
  scale_y_continuous(breaks=seq(0,7,by=2), labels=seq(0,7,by=2), limits=c(0,7), oob=rescale_none) + 
  # set the name, labels, and colors for the traces
  scale_color_manual(name="X2", labels=c("a","b"), values=c("red", "blue")) +
  # set the size and shape of the points
  scale_size_manual(values=c(4,4)) +
  scale_shape_manual(values=c(16,10)) +
  # set the linetype of the lines
  scale_linetype_manual(values=c("solid", "longdash"))
../../_images/glm-bs-r_20_0.png

GLM for Zero-Inflated Poisson Distribution Data#

  • Distribution: Zero-Inflated Poisson

  • Link: log

  • Typical Uses: Zero-Inflated Poisson Regression: counts, rare events with large proportion of zeroes

  • Reporting: “Figure 17a shows histograms of Y by X1 and X2. An analysis of variance based on zero-inflated Poisson regression indicated a statistically significant effect on Y of the X1×X2 interaction (χ2(1, N=60) = 8.14, p < .01), and a marginal effect of X2 (χ2(1, N=60) = 3.11, p = .078). There was no statistically significant effect of X1 on Y (χ2(1, N=60) = 0.29, n.s.).”

# Example data
# df has subjects (S), two factors (X1,X2) each w/levels (a,b), and count response (Y)
df <- read.csv("data/2F2LBs_zipoisson.csv")
head(df, 20)
A data.frame: 20 × 4
SX1X2Y
<int><chr><chr><int>
1 1aa1
2 2ab0
3 3ba0
4 4bb2
5 5aa1
6 6ab5
7 7ba6
8 8bb5
9 9aa1
1010ab0
1111ba0
1212bb8
1313aa4
1414ab1
1515ba0
1616bb0
1717aa8
1818ab5
1919ba5
2020bb3
library(pscl) # for zeroinfl
library(car) # for Anova
df$S = factor(df$S)
df$X1 = factor(df$X1)
df$X2 = factor(df$X2)
contrasts(df$X1) <- "contr.sum"
contrasts(df$X2) <- "contr.sum"
m = zeroinfl(Y ~ X1*X2, data=df, dist="poisson")
Anova(m, type=3)
Classes and Methods for R developed in the
Political Science Computational Laboratory
Department of Political Science
Stanford University
Simon Jackman
hurdle and zeroinfl functions by Achim Zeileis
A anova: 3 × 3
DfChisqPr(>Chisq)
<dbl><dbl><dbl>
X110.28960650.590472795
X213.10828550.077894917
X1:X218.14016570.004329533
library(ggplot2)
library(ggthemes)
library(scales)
library(plyr)

# Histograms
# http://www.sthda.com/english/wiki/ggplot2-histogram-plot-quick-start-guide-r-software-and-data-visualization
ggplot(data=df, aes(x=Y, fill=X1, color=X2)) + theme_minimal() + 
  # set the font styles for the plot title and axis titles
  theme(plot.title   = element_text(face="bold",  color="black", size=18, hjust=0.5, vjust=0.0, angle=0)) + 
  theme(axis.title.x = element_text(face="bold",  color="black", size=16, hjust=0.5, vjust=0.0, angle=0)) + 
  theme(axis.title.y = element_text(face="bold",  color="black", size=16, hjust=0.5, vjust=0.0, angle=90)) + 
  # set the font styles for the value labels that show on each axis
  theme(axis.text.x  = element_text(face="plain", color="black", size=12, hjust=0.5, vjust=0.0, angle=0)) + 
  theme(axis.text.y  = element_text(face="plain", color="black", size=12, hjust=0.0, vjust=0.5, angle=0)) + 
  # set the font style for the facet labels
  theme(strip.text = element_text(face="bold", color="black", size=14, hjust=0.5)) + 
  # remove the legend
  theme(legend.position="none") + 
  # create the histogram; the alpha value ensures overlaps can be seen
  geom_histogram(binwidth=1, breaks=seq(-0.5,10.5,by=1), alpha=0.25) + 
  stat_bin(aes(y=..count.., label=..count..), binwidth=1, geom="text", vjust=-0.5) + 
  # create stacked plots by X, one for each histogram
  facet_grid(X1+X2 ~ .) + 
  # determine the outline and fill color values of each histogram
  scale_color_manual(values=c("darkblue","darkred")) + 
  scale_fill_manual(values=c("#69b3a2","#404080")) + 
  # set the labels for the title and each axis
  labs(title="Y by X1, X2", x="Y", y="Count") + 
  # set the ranges and value labels for each axis
  scale_x_continuous(breaks=seq(-0.5,10.5,by=1), labels=seq(0,11,by=1), limits=c(-0.5,10.5)) +
  scale_y_continuous(breaks=seq(0,8,by=2), labels=seq(0,8,by=2), limits=c(0,8))
Warning message:
“The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
 Please use `after_stat(count)` instead.”
../../_images/glm-bs-r_24_1.png

GLM for Negative Binomial Distribution Data#

  • Distribution: Negative Binomial

  • Link: log

  • Typical Uses: Negative Binomial Regression: Same as Poisson but for use in the presence of overdispersion

  • Reporting: “Figure 18a shows an interaction plot with ±1 standard deviation error bars for X1 and X2. An analysis of variance based on negative binomial regression indicated a statistically significant effect on Y of X1 (χ2(1, N=60) = 13.46, p < .001), but not of X2 (χ2(1, N=60) = 0.07, n.s.) or the X1×X2 interaction (χ2(1, N=60) = 0.92, n.s.).”

# Example data
# df has subjects (S), two factors (X1,X2) each w/levels (a,b), and count response (Y)
df <- read.csv("data/2F2LBs_negbin.csv")
head(df, 20)
A data.frame: 20 × 4
SX1X2Y
<int><chr><chr><int>
1 1aa 1
2 2ab 1
3 3ba 2
4 4bb 9
5 5aa 0
6 6ab 1
7 7ba 1
8 8bb 7
9 9aa 2
1010ab 2
1111ba 6
1212bb 1
1313aa 2
1414ab 3
1515ba 3
1616bb 1
1717aa 2
1818ab 3
1919ba10
2020bb 4
library(MASS) # for glm.nb
library(car) # for Anova
df$S = factor(df$S)
df$X1 = factor(df$X1)
df$X2 = factor(df$X2)
contrasts(df$X1) <- "contr.sum"
contrasts(df$X2) <- "contr.sum"
m = glm.nb(Y ~ X1*X2, data=df)
Anova(m, type=3)
A anova: 3 × 3
LR ChisqDfPr(>Chisq)
<dbl><dbl><dbl>
X113.459633910.0002437513
X2 0.066397710.7966558091
X1:X2 0.924734110.3362350241
library(ggplot2)
library(ggthemes)
library(scales)
library(plyr)

# Interaction plot
# http://www.sthda.com/english/wiki/ggplot2-line-plot-quick-start-guide-r-software-and-data-visualization
# http://www.sthda.com/english/wiki/ggplot2-error-bars-quick-start-guide-r-software-and-data-visualization
# http://www.sthda.com/english/wiki/ggplot2-point-shapes
# http://www.sthda.com/english/wiki/ggplot2-line-types-how-to-change-line-types-of-a-graph-in-r-software
df2 <- ddply(df, ~ X1*X2, function(d) # make a summary data table
  c(NROW(d$Y),
    sum(is.na(d$Y)),
    sum(!is.na(d$Y)),
    mean(d$Y, na.rm=TRUE),
    sd(d$Y, na.rm=TRUE),
    median(d$Y, na.rm=TRUE),
    IQR(d$Y, na.rm=TRUE)))
colnames(df2) <- c("X1","X2","Rows","NAs","NotNAs","Mean","SD","Median","IQR")
ggplot(data=df2, aes(x=X1, y=Mean, color=X2, group=X2)) + theme_minimal() + 
  # set the font styles for the plot title and axis titles
  theme(plot.title   = element_text(face="bold",  color="black", size=18, hjust=0.5, vjust=0.0, angle=0)) + 
  theme(axis.title.x = element_text(face="bold",  color="black", size=16, hjust=0.5, vjust=0.0, angle=0)) + 
  theme(axis.title.y = element_text(face="bold",  color="black", size=16, hjust=0.5, vjust=0.0, angle=90)) + 
  # set the font styles for the value labels that show on each axis
  theme(axis.text.x  = element_text(face="plain", color="black", size=14, hjust=0.5, vjust=0.0, angle=0)) + 
  theme(axis.text.y  = element_text(face="plain", color="black", size=12, hjust=0.0, vjust=0.5, angle=0)) + 
  # set the font styles for the legend
  theme(legend.title = element_text(face="bold", color="black", size=14, hjust=0.5, vjust=0.0, angle=0)) +
  theme(legend.text  = element_text(face="plain", color="black", size=14, hjust=0.5, vjust=0.0, angle=0)) +
  # create the plot lines, points, and error bars
  geom_line(aes(linetype=X2), position=position_dodge(0.05)) + 
  geom_point(aes(shape=X2, size=X2), position=position_dodge(0.05)) + 
  geom_errorbar(aes(ymin=Mean-SD, ymax=Mean+SD), position=position_dodge(0.05), width=0.1) + 
  # place text labels on each bar
  geom_text(aes(label=sprintf("%.2f (±%.2f)", Mean, SD)), position=position_dodge(0.05), hjust=0.0, vjust=-1.0, color="black", size=3.5) +
  # set the labels for the title and each axis
  labs(title="Y by X1, X2", x="X1", y="Y") + 
  # set the ranges and value labels for each axis
  scale_x_discrete(labels=c("a","b")) + 
  scale_y_continuous(breaks=seq(0,10,by=2), labels=seq(0,10,by=2), limits=c(0,10), oob=rescale_none) + 
  # set the name, labels, and colors for the traces
  scale_color_manual(name="X2", labels=c("a","b"), values=c("red", "blue")) +
  # set the size and shape of the points
  scale_size_manual(values=c(4,4)) +
  scale_shape_manual(values=c(16,10)) +
  # set the linetype of the lines
  scale_linetype_manual(values=c("solid", "longdash"))
../../_images/glm-bs-r_28_0.png

GLM for Zero-Inflated Negative Binomial Distribution Data#

  • Distribution: Zero-Inflated Negative Binomial

  • Link: log

  • Typical Uses: Zero-Inflated Negative Binomial Regression: Same as Zero-Inflated Poisson but for use in the presence of overdispersion

  • Reporting: “Figure 19a shows histograms of Y by X1 and X2. An analysis of variance based on zero-inflated negative binomial regression indicated no statistically significant effects on Y of X1 (χ2(1, N=60) = 0.43, n.s.), X2 (χ2(1, N=60) = 1.28, n.s.), or the X1×X2 interaction (χ2(1, N=60) = 0.10, n.s.).”

# Example data
# df has subjects (S), two between-Ss factors (X1,X2) each w/levels (a,b), and count response (Y)
df <- read.csv("data/2F2LBs_zinegbin.csv")
head(df, 20)
A data.frame: 20 × 4
SX1X2Y
<int><chr><chr><int>
1 1aa 3
2 2ab 1
3 3ba13
4 4bb 0
5 5aa 0
6 6ab 1
7 7ba 0
8 8bb 2
9 9aa 0
1010ab 6
1111ba 0
1212bb 0
1313aa 0
1414ab 0
1515ba 2
1616bb 0
1717aa 4
1818ab 4
1919ba 7
2020bb 0
library(pscl) # for zeroinfl
library(car) # for Anova
df$S = factor(df$S)
df$X1 = factor(df$X1)
df$X2 = factor(df$X2)
contrasts(df$X1) <- "contr.sum"
contrasts(df$X2) <- "contr.sum"
m = zeroinfl(Y ~ X1*X2, data=df, dist="negbin")
Anova(m, type=3)
A anova: 3 × 3
DfChisqPr(>Chisq)
<dbl><dbl><dbl>
X110.425881520.5140168
X211.281784000.2575676
X1:X210.099110780.7528994
library(ggplot2)
library(ggthemes)
library(scales)
library(plyr)

# Histograms
# http://www.sthda.com/english/wiki/ggplot2-histogram-plot-quick-start-guide-r-software-and-data-visualization
ggplot(data=df, aes(x=Y, fill=X1, color=X2)) + theme_minimal() + 
  # set the font styles for the plot title and axis titles
  theme(plot.title   = element_text(face="bold",  color="black", size=18, hjust=0.5, vjust=0.0, angle=0)) + 
  theme(axis.title.x = element_text(face="bold",  color="black", size=16, hjust=0.5, vjust=0.0, angle=0)) + 
  theme(axis.title.y = element_text(face="bold",  color="black", size=16, hjust=0.5, vjust=0.0, angle=90)) + 
  # set the font styles for the value labels that show on each axis
  theme(axis.text.x  = element_text(face="plain", color="black", size=12, hjust=0.5, vjust=0.0, angle=0)) + 
  theme(axis.text.y  = element_text(face="plain", color="black", size=12, hjust=0.0, vjust=0.5, angle=0)) + 
  # set the font style for the facet labels
  theme(strip.text = element_text(face="bold", color="black", size=14, hjust=0.5)) + 
  # remove the legend
  theme(legend.position="none") + 
  # create the histogram; the alpha value ensures overlaps can be seen
  geom_histogram(binwidth=1, breaks=seq(-0.5,13.5,by=1), alpha=0.25) + 
  stat_bin(aes(y=..count.., label=..count..), binwidth=1, geom="text", vjust=-0.5) + 
  # create stacked plots by X, one for each histogram
  facet_grid(X1+X2 ~ .) + 
  # determine the outline and fill color values of each histogram
  scale_color_manual(values=c("darkblue","darkred")) + 
  scale_fill_manual(values=c("#69b3a2","#404080")) + 
  # set the labels for the title and each axis
  labs(title="Y by X1, X2", x="Y", y="Count") + 
  # set the ranges and value labels for each axis
  scale_x_continuous(breaks=seq(-0.5,13.5,by=1), labels=seq(0,14,by=1), limits=c(-0.5,13.5)) +
  scale_y_continuous(breaks=seq(0,10,by=2), labels=seq(0,10,by=2), limits=c(0,10))
../../_images/glm-bs-r_32_0.png

GLM for Gamma and Expontential Distribution Data#

  • Distribution: Gamma and Exponential

  • Link: inverse or log when 1/0 is undefined

  • Typical Uses: Gamma Regression: Exponentially distributed responses (e.g. income, wait times)

  • Reporting: “Figure 20a shows an interaction plot with ±1 standard deviation error bars for X1 and X2. An analysis of variance based on Gamma regression indicated no statistically significant effect on Y of X1 (χ2(1, N=60) = 0.40, n.s.) or X2 (χ2(1, N=60) = 0.58, n.s.), but the X1×X2 interaction was marginal (χ2(1, N=60) = 3.26, p = .071).”

# Example data
# df has subjects (S), two factors (X1,X2) each w/levels (a,b), and continuous response (Y)
df <- read.csv("data/2F2LBs_gamma.csv")
head(df, 20)
A data.frame: 20 × 4
SX1X2Y
<int><chr><chr><dbl>
1 1aa 7.210222
2 2ab 4.511329
3 3ba 8.750533
4 4bb 4.636170
5 5aa 5.059694
6 6ab 3.906251
7 7ba 4.144064
8 8bb 2.890245
9 9aa11.229607
1010ab 5.079605
1111ba 4.408323
1212bb 9.266559
1313aa 3.851376
1414ab 6.323390
1515ba 3.870606
1616bb 3.884600
1717aa 4.155613
1818ab 3.056740
1919ba 6.283452
2020bb 3.060357
library(car) # for Anova
df$S = factor(df$S)
df$X1 = factor(df$X1)
df$X2 = factor(df$X2)
contrasts(df$X1) <- "contr.sum"
contrasts(df$X2) <- "contr.sum"
m = glm(Y ~ X1*X2, data=df, family=Gamma)
# family=Gamma(link="log") is often used
Anova(m, type=3)
A anova: 3 × 3
LR ChisqDfPr(>Chisq)
<dbl><dbl><dbl>
X10.397186410.52854588
X20.579167410.44663887
X1:X23.257493310.07109773
library(ggplot2)
library(ggthemes)
library(scales)
library(plyr)

# Interaction plot
# http://www.sthda.com/english/wiki/ggplot2-line-plot-quick-start-guide-r-software-and-data-visualization
# http://www.sthda.com/english/wiki/ggplot2-error-bars-quick-start-guide-r-software-and-data-visualization
# http://www.sthda.com/english/wiki/ggplot2-point-shapes
# http://www.sthda.com/english/wiki/ggplot2-line-types-how-to-change-line-types-of-a-graph-in-r-software
df2 <- ddply(df, ~ X1*X2, function(d) # make a summary data table
  c(NROW(d$Y),
    sum(is.na(d$Y)),
    sum(!is.na(d$Y)),
    mean(d$Y, na.rm=TRUE),
    sd(d$Y, na.rm=TRUE),
    median(d$Y, na.rm=TRUE),
    IQR(d$Y, na.rm=TRUE)))
colnames(df2) <- c("X1","X2","Rows","NAs","NotNAs","Mean","SD","Median","IQR")
ggplot(data=df2, aes(x=X1, y=Mean, color=X2, group=X2)) + theme_minimal() + 
  # set the font styles for the plot title and axis titles
  theme(plot.title   = element_text(face="bold",  color="black", size=18, hjust=0.5, vjust=0.0, angle=0)) + 
  theme(axis.title.x = element_text(face="bold",  color="black", size=16, hjust=0.5, vjust=0.0, angle=0)) + 
  theme(axis.title.y = element_text(face="bold",  color="black", size=16, hjust=0.5, vjust=0.0, angle=90)) + 
  # set the font styles for the value labels that show on each axis
  theme(axis.text.x  = element_text(face="plain", color="black", size=14, hjust=0.5, vjust=0.0, angle=0)) + 
  theme(axis.text.y  = element_text(face="plain", color="black", size=12, hjust=0.0, vjust=0.5, angle=0)) + 
  # set the font styles for the legend
  theme(legend.title = element_text(face="bold", color="black", size=14, hjust=0.5, vjust=0.0, angle=0)) +
  theme(legend.text  = element_text(face="plain", color="black", size=14, hjust=0.5, vjust=0.0, angle=0)) +
  # create the plot lines, points, and error bars
  geom_line(aes(linetype=X2), position=position_dodge(0.05)) + 
  geom_point(aes(shape=X2, size=X2), position=position_dodge(0.05)) + 
  geom_errorbar(aes(ymin=Mean-SD, ymax=Mean+SD), position=position_dodge(0.05), width=0.1) + 
  # place text labels on each bar
  geom_text(aes(label=sprintf("%.2f (±%.2f)", Mean, SD)), position=position_dodge(0.05), hjust=0.0, vjust=-1.0, color="black", size=3.5) +
  # set the labels for the title and each axis
  labs(title="Y by X1, X2", x="X1", y="Y") + 
  # set the ranges and value labels for each axis
  scale_x_discrete(labels=c("a","b")) + 
  scale_y_continuous(breaks=seq(0,9,by=2), labels=seq(0,9,by=2), limits=c(0,9), oob=rescale_none) + 
  # set the name, labels, and colors for the traces
  scale_color_manual(name="X2", labels=c("a","b"), values=c("red", "blue")) +
  # set the size and shape of the points
  scale_size_manual(values=c(4,4)) +
  scale_shape_manual(values=c(16,10)) +
  # set the linetype of the lines
  scale_linetype_manual(values=c("solid", "longdash"))
../../_images/glm-bs-r_36_0.png