Emorie D Beck
my_theme <- function(){
theme_classic() +
theme(
legend.position = "bottom"
, legend.title = element_text(face = "bold", size = rel(1))
, legend.text = element_text(face = "italic", size = rel(1))
, axis.text = element_text(face = "bold", size = rel(1.1), color = "black")
, axis.title = element_text(face = "bold", size = rel(1.2))
, plot.title = element_text(face = "bold", size = rel(1.2), hjust = .5)
, plot.subtitle = element_text(face = "italic", size = rel(1.2), hjust = .5)
, strip.text = element_text(face = "bold", size = rel(1.1), color = "white")
, strip.background = element_rect(fill = "black")
)
}
ggplot2
logicshiny
)cowplot
, so I’m going to teach you thatggplot2
extensions: https://exts.ggplot2.tidyverse.org/gallery/
ggExtra
cowplot
(and lots of assortments)patchwork
ggExtra
ggExtra
because it will help us create plots with distributions in the margins.load(url("https://github.com/emoriebeck/psc290-data-viz-2022/blob/main/04-week4-associations/04-data/week4-data.RData?raw=true"))
pred_data
# A tibble: 5,021 × 25
study o_value p_year SID p_value age gender grsWages parEdu race
<chr> <fct> <dbl> <chr> <dbl> <dbl> <fct> <dbl> <fct> <fct>
1 Study1 0 2005 61215 6.67 -29.9 1 1.02 2 0
2 Study1 0 2005 184965 0 -22.9 0 1.14 2 0
3 Study1 0 2005 488251 10 -3.92 1 0.717 1 0
4 Study1 0 2005 650779 7.22 -25.9 1 0.644 3 0
5 Study1 0 2005 969691 7.22 -0.925 1 0.812 2 0
6 Study1 0 2005 986687 6.11 14.1 0 1.76 2 0
7 Study1 0 2005 1054011 5.56 8.08 0 1.34 1 0
8 Study1 0 2005 1372251 7.78 5.08 1 0.842 1 0
9 Study1 0 2005 1496703 6.11 -23.9 0 1.42 2 0
10 Study1 0 2005 1897887 2.78 38.1 1 0.725 2 0
# … with 5,011 more rows, and 15 more variables: physhlthevnt <fct>,
# SRhealth <dbl>, smokes <fct>, alcohol <fct>, exercise <dbl>, BMI <dbl>,
# parDivorce <fct>, PhysFunc <fct>, religion <fct>, education <fct>,
# married <fct>, numKids <dbl>, parOccPrstg <dbl>, reliability <dbl>,
# predInt <dbl>
ggExtra
Let’s plot the association between conscientiousness and self-rated health across genders in Study 1:
p <- pred_data %>%
filter(study == "Study1") %>%
ggplot(aes(x = p_value, y = SRhealth, color = gender, fill = gender)) +
geom_point(
color = "grey20"
, shape = 21, size = 3
) +
scale_fill_manual(
values = c("cornflowerblue", "coral")
, labels = c("Male", "Female")
) +
labs(
x = "Conscientiousness (POMP, 0-10)"
, y = "Self-Rated Health (POMP, 0-10)"
, fill = "Gender"
) +
my_theme()
p
ggExtra
ggExtra
To get marginal distributions, we can just use ggExtra::ggMarginal()
ggExtra
color
ggExtra
fill
ggExtra
ggExtra
So let’s move on to patchwork
and cowplot
where we can make these much more flexibly with just a few extra lines of code
cowplot
+ pathwork
cowplot
or patchwork
?
R
plots and ggplot2
plotspx <- pred_data %>%
filter(study == "Study1") %>%
ggplot(aes(x = p_value, fill = gender, color = gender)) +
geom_density(alpha = .5) +
scale_color_manual(
values = c("cornflowerblue", "coral")
, labels = c("Male", "Female")
) +
scale_fill_manual(
values = c("cornflowerblue", "coral")
, labels = c("Male", "Female")
) +
labs(fill = "Gender", color = "Gender") +
theme_void()
px
py <- pred_data %>%
filter(study == "Study1") %>%
ggplot(aes(x = SRhealth, fill = gender, color = gender)) +
geom_density(alpha = .5) +
scale_color_manual(
values = c("cornflowerblue", "coral")
, labels = c("Male", "Female")
) +
scale_fill_manual(
values = c("cornflowerblue", "coral")
, labels = c("Male", "Female")
) +
labs(fill = "Gender", color = "Gender") +
coord_flip() +
theme_void()
py
We can use the +
and /
operators to arrange them:
px <- pred_data %>%
filter(study == "Study1") %>%
ggplot(aes(x = p_value, y = gender, fill = gender, color = gender)) +
geom_boxplot(alpha = .5) +
geom_jitter(aes(y = gender), alpha = .5) +
scale_color_manual(
values = c("cornflowerblue", "coral")
, labels = c("Male", "Female")
) +
scale_fill_manual(
values = c("cornflowerblue", "coral")
, labels = c("Male", "Female")
) +
labs(fill = "Gender", color = "Gender") +
theme_void() +
theme(legend.position = "none")
px
py <- pred_data %>%
filter(study == "Study1") %>%
ggplot(aes(x = SRhealth, y = gender, fill = gender, color = gender)) +
geom_boxplot(alpha = .5) +
geom_jitter(aes(y = gender), alpha = .5) +
scale_color_manual(
values = c("cornflowerblue", "coral")
, labels = c("Male", "Female")
) +
scale_fill_manual(
values = c("cornflowerblue", "coral")
, labels = c("Male", "Female")
) +
labs(fill = "Gender", color = "Gender") +
coord_flip() +
theme_void() +
theme(legend.position = "none")
py
cowplot
Let me show you a couple of examples from my work that has used cowplot
And remember these models?
tidy_ci <- function(m) tidy(m, conf.int = T)
nested_m <- pred_data %>%
group_by(study) %>%
nest() %>%
ungroup() %>%
mutate(
m = map(data
, ~glm(
o_value ~ p_value
, data = .
, family = binomial(link = "logit")
)
)
, tidy = map(m, tidy_ci)
)
nested_m
# A tibble: 6 × 4
study data m tidy
<chr> <list> <list> <list>
1 Study1 <tibble [831 × 24]> <glm> <tibble [2 × 7]>
2 Study2 <tibble [1,000 × 24]> <glm> <tibble [2 × 7]>
3 Study3 <tibble [1,000 × 24]> <glm> <tibble [2 × 7]>
4 Study4 <tibble [574 × 24]> <glm> <tibble [2 × 7]>
5 Study5 <tibble [616 × 24]> <glm> <tibble [2 × 7]>
6 Study6 <tibble [1,000 × 24]> <glm> <tibble [2 × 7]>
And remember these models?
Let’s make two small changes
m_fun <- function(d) {
glm(o_value ~ p_value + married + married:p_value
, data = d
, family = binomial(link = "logit"))
}
tidy_ci <- function(m) tidy(m, conf.int = T) %>% mutate(df.resid = m$df.residual, n = nrow(m$data))
nested_m <- pred_data %>%
group_by(study) %>%
nest() %>%
ungroup() %>%
mutate(
m = map(data, m_fun)
, tidy = map(m, tidy_ci)
)
nested_m
# A tibble: 6 × 4
study data m tidy
<chr> <list> <list> <list>
1 Study1 <tibble [831 × 24]> <glm> <tibble [4 × 9]>
2 Study2 <tibble [1,000 × 24]> <glm> <tibble [4 × 9]>
3 Study3 <tibble [1,000 × 24]> <glm> <tibble [4 × 9]>
4 Study4 <tibble [574 × 24]> <glm> <tibble [4 × 9]>
5 Study5 <tibble [616 × 24]> <glm> <tibble [4 × 9]>
6 Study6 <tibble [1,000 × 24]> <glm> <tibble [4 × 9]>
Here’s our unnested model terms
nested_m %>% select(study, tidy) %>%
unnest(tidy) %>%
mutate_at(vars(estimate, conf.low, conf.high), exp)
# A tibble: 24 × 10
study term estim…¹ std.e…² stati…³ p.value conf.…⁴ conf.…⁵ df.re…⁶ n
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <int>
1 Study1 (Interc… 1.33 0.391 0.727 0.467 0.618 2.87 827 831
2 Study1 p_value 0.900 0.0572 -1.84 0.0655 0.804 1.01 827 831
3 Study1 married1 0.926 0.524 -0.147 0.883 0.330 2.59 827 831
4 Study1 p_value… 1.02 0.0745 0.238 0.812 0.880 1.18 827 831
5 Study2 (Interc… 0.705 1.59 -0.220 0.826 0.0250 18.1 992 1000
6 Study2 p_value 1.09 0.218 0.376 0.707 0.702 1.71 992 1000
7 Study2 married1 6.40 1.62 1.14 0.253 0.237 190. 992 1000
8 Study2 p_value… 0.758 0.221 -1.25 0.211 0.478 1.18 992 1000
9 Study3 (Interc… 6.03 1.20 1.49 0.135 0.581 68.2 996 1000
10 Study3 p_value 0.706 0.156 -2.23 0.0256 0.514 0.952 996 1000
# … with 14 more rows, and abbreviated variable names ¹estimate, ²std.error,
# ³statistic, ⁴conf.low, ⁵conf.high, ⁶df.resid
But maybe we are particularly interested in the interaction between marital status and personality in predicting mortality, which we want to plot as a forest plot
nested_m %>% select(study, tidy) %>%
unnest(tidy) %>%
mutate_at(vars(estimate, conf.low, conf.high), exp) %>%
filter(term == "p_value:married1")
# A tibble: 6 × 10
study term estim…¹ std.e…² stati…³ p.value conf.…⁴ conf.…⁵ df.re…⁶ n
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <int>
1 Study1 p_value:… 1.02 0.0745 0.238 0.812 0.880 1.18 827 831
2 Study2 p_value:… 0.758 0.221 -1.25 0.211 0.478 1.18 992 1000
3 Study3 p_value:… 1.21 0.161 1.17 0.242 0.886 1.68 996 1000
4 Study4 p_value:… 0.824 0.284 -0.683 0.495 0.471 1.46 570 574
5 Study5 p_value:… 0.618 0.159 -3.04 0.00239 0.449 0.838 612 616
6 Study6 p_value:… 0.957 0.109 -0.407 0.684 0.773 1.19 996 1000
# … with abbreviated variable names ¹estimate, ²std.error, ³statistic,
# ⁴conf.low, ⁵conf.high, ⁶df.resid
Let’s add our point estimates and uncertainty intervals
But we want to order the points by the effect sizes:
p1 <- nested_m %>% select(study, tidy) %>%
unnest(tidy) %>%
mutate_at(vars(estimate, conf.low, conf.high), exp) %>%
filter(term == "p_value:married1")
p1 <- p1 %>%
mutate(study = factor(study, (p1 %>% arrange(desc(estimate)))$study)) %>%
ggplot(aes(x = estimate, y = study)) +
labs(
x = "Model Estimated OR (CI)"
, y = NULL
) +
my_theme()
p1
Let’s add our point estimates and uncertainty intervals back in
And add in a vertical line at OR = 1:
cowplot
or patchwork
p2 <- nested_m %>% select(study, tidy) %>%
unnest(tidy) %>%
mutate_at(vars(estimate, conf.low, conf.high), exp) %>%
filter(term == "p_value:married1")
stdy_levs <- tibble(num = 1:6, new = (p2 %>% arrange(desc(estimate)))$study)
p2 <- p2 %>%
arrange(desc(estimate)) %>%
mutate(study = factor(study, stdy_levs$new)
, study2 = 1:n()) %>%
mutate_at(vars(estimate, conf.low, conf.high), ~sprintf("%.2f", .)) %>%
mutate(est = sprintf("%s [%s, %s]", estimate, conf.low, conf.high)
, n = as.character(n)) %>%
select(study, study2, estimate, n, est) %>%
pivot_longer(
cols = c(est, n)
, values_to = "lab"
, names_to = "est"
)
p2
# A tibble: 12 × 5
study study2 estimate est lab
<fct> <int> <chr> <chr> <chr>
1 Study3 1 1.21 est 1.21 [0.89, 1.68]
2 Study3 1 1.21 n 1000
3 Study1 2 1.02 est 1.02 [0.88, 1.18]
4 Study1 2 1.02 n 831
5 Study6 3 0.96 est 0.96 [0.77, 1.19]
6 Study6 3 0.96 n 1000
7 Study4 4 0.82 est 0.82 [0.47, 1.46]
8 Study4 4 0.82 n 574
9 Study2 5 0.76 est 0.76 [0.48, 1.18]
10 Study2 5 0.76 n 1000
11 Study5 6 0.62 est 0.62 [0.45, 0.84]
12 Study5 6 0.62 n 616
Let’s build our base
Add in the text:
Set the theme
We’ll add a horizontal line at the top and bottom to match the forest plot:
Add the column labels:
We added an extra row at the top of the table, so we need to do that for the forest plot, too
p1 <- nested_m %>% select(study, tidy) %>%
unnest(tidy) %>%
mutate_at(vars(estimate, conf.low, conf.high), exp) %>%
filter(term == "p_value:married1")
stdy_levs <- tibble(num = 1:6, new = (p1 %>% arrange(desc(estimate)))$study)
p1 <- p1 %>%
arrange(desc(estimate)) %>%
mutate(study = factor(study, stdy_levs$new)
, study2 = 1:n()) %>%
ggplot(aes(x = estimate, y = study2)) +
labs(
x = "Model Estimated OR (CI)"
, y = NULL
) +
my_theme()
p1
Add our point estimates and uncertainty intervals, along with the vertical line at OR = 1
Change the y scale back
Add in that top bar
Remove the y axis line
And let’s block out where the dashed line touches the top:
patchwork
annotate()
is a great tool for thisscale_[map]_[type]
functions, especially given the labels can be anything we want!theme
elements also let us hack many more parts!ggplot2
is simply having lots of tricks up your sleeve, which come from knowledge (and StackOverflow)patchwork
is great, and a little more intuitive for simple use casespatchwork
allows you to use the +
to piece plots together and makes a lot of default assumptions about alignmentcowplot
doesn’t do (easily)patchwork
: Piecing the Plots TogetherWe can just use the +
operator!
patchwork
: Piecing the Plots TogetherWe can also add rows using the /
patchwork
: Piecing the Plots TogetherAnd change their arrangement using plot_layout()
patchwork
: Piecing the Plots TogetherAnd change their arrangement using plot_layout()
patchwork
: Piecing the Plots TogetherWe can add titles using plot_annotation()
patchwork
: Piecing the Plots TogetherWe can add labels to plot using plot_annotation()
patchwork
: Piecing the Plots TogetherWe can add labels to plot using plot_annotation()
# A tibble: 6 × 5
study data m tidy pred
<chr> <list> <list> <list> <list>
1 Study1 <tibble [831 × 24]> <glm> <tibble [4 × 9]> <tibble [200 × 4]>
2 Study2 <tibble [1,000 × 24]> <glm> <tibble [4 × 9]> <tibble [200 × 4]>
3 Study3 <tibble [1,000 × 24]> <glm> <tibble [4 × 9]> <tibble [200 × 4]>
4 Study4 <tibble [574 × 24]> <glm> <tibble [4 × 9]> <tibble [200 × 4]>
5 Study5 <tibble [616 × 24]> <glm> <tibble [4 × 9]> <tibble [200 × 4]>
6 Study6 <tibble [1,000 × 24]> <glm> <tibble [4 × 9]> <tibble [200 × 4]>
nested_m %>%
mutate(df.resid = map_dbl(m, df.residual)) %>%
select(study, pred, df.resid) %>%
unnest(pred)
# A tibble: 1,200 × 6
study married p_value .fitted .se.fit df.resid
<chr> <fct> <dbl> <dbl> <dbl> <dbl>
1 Study1 0 0 0.571 0.0957 827
2 Study1 0 0.101 0.568 0.0945 827
3 Study1 0 0.202 0.565 0.0933 827
4 Study1 0 0.303 0.563 0.0920 827
5 Study1 0 0.404 0.560 0.0908 827
6 Study1 0 0.505 0.557 0.0896 827
7 Study1 0 0.606 0.555 0.0883 827
8 Study1 0 0.707 0.552 0.0871 827
9 Study1 0 0.808 0.550 0.0858 827
10 Study1 0 0.909 0.547 0.0845 827
# … with 1,190 more rows
Let’s set up the core part of the simple effects
p3 <- nested_m %>%
mutate(df.resid = map_dbl(m, df.residual)) %>%
select(study, pred, df.resid) %>%
unnest(pred) %>%
mutate(married = factor(married, c(0,1), c("Never Married", "Married"))) %>%
ggplot(aes(x = p_value, y = .fitted, fill = study, color = study)) +
labs(x = "Conscientiousness (POMP, 0-10)"
, y = "Predicted Odds Ratio\nof Mortality (95% CI)"
, fill = NULL
, color = NULL) +
facet_grid(~married) +
my_theme()
p3
Add in our lineribbon:
patchwork
: Piecing the Plots Togetherpatchwork
: Piecing the Plots Togetherpatchwork
: Piecing the Plots Togetherpatchwork
, cowplot
also adds some other new tools to our repertoire:
ggdraw()
draw_label()
draw_plot_label()
draw_grob()
draw_image()
ggdraw()
+ draw_label()
ggdraw()
is more or a setup function that allows us to add grobs on topdraw_label()
to make our title (just some text to put on the plot)ggdraw()
+ draw_label()
It’d be nice if the title was centered, right?
ggdraw()
+ draw_label()
It’d be nice if the title was centered, right?
ggdraw()
+ draw_label()
We could use cowplot::draw_label()
to add a title
and subtitle
to our plot:
ggdraw()
+ draw_label()
draw_label()
draw_label()
is meant to be a better wrapper for geom_text()
that requires less customizationgeom_text()
would require 10+ arguments and has no easy application to figures put together with cowplot (or other packages for doing so)draw_label()
draw_plot()
Imagine you want to put a plot inside of another
inset <-
pred_data %>%
filter(study == "Study1") %>%
ggplot(aes(y = gender, x = SRhealth, fill = gender)) +
scale_fill_manual(values = c("cornflowerblue", "coral")) +
scale_y_discrete(labels = c("Male", "Female")) +
stat_halfeye(alpha = .8) +
my_theme() +
theme(legend.position = "none") + theme_half_open(12)
draw_plot()
Imagine you want to put a plot inside of another
draw_image()
We can also add images!
draw_image()
We can also add images!
cowplot::plot_grid()
plot_grid()
cowplot
is plot_grid()
, which allows us to place differnt figures within the same figure in a grid, and it has a lot of useful arguments+
, /
in `patchworkplotlist = NULL
align = c("none", "h", "v", "hv")
axis = c("none", "l", "r", "t", "b", "lr", "tb", "tblr")
nrow = NULL
ncol = NULL
rel_widths = 1
rel_heights = 1
labels = NULL
label_size = 14
label_fontfamily = NULL
label_fontface = "bold"
label_colour = NULL
label_x = 0
label_y = 1
hjust = -0.5
vjust = 1.5
scale = 1
greedy = TRUE
byrow = TRUE
cols = NULL
rows = NULL
plot_grid()
plotlist = NULL
align = c("none", "h", "v", "hv")
axis = c("none", "l", "r", "t", "b", "lr", "tb", "tblr")
nrow = NULL
ncol = NULL
rel_widths = 1
rel_heights = 1
labels = NULL
label_size = 14
label_fontfamily = NULL
label_fontface = "bold"
label_colour = NULL
label_x = 0
label_y = 1
hjust = -0.5
vjust = 1.5
scale = 1
greedy = TRUE
byrow = TRUE
cols = NULL
rows = NULL
plot_grid()
plot_grid()
"hv"
leads to odd spacingplot_grid()
plot_grid()
plot_grid()
rel_heights
plot_grid()
: Labelsplot_grid()
: Labelsplot_grid()
: Labelsplot_grid()
: Labelsplot_grid()
: LabelsPSC 290 - Data Visualization