my_theme <- function(){
theme_bw() +
theme(
legend.position = "bottom"
, legend.title = element_text(face = "bold", size = rel(1))
, legend.text = element_text(face = "italic", size = rel(1))
, axis.text = element_text(face = "bold", size = rel(1.1), color = "black")
, axis.title = element_text(face = "bold", size = rel(1.2))
, plot.title = element_text(face = "bold", size = rel(1.2), hjust = .5)
, plot.subtitle = element_text(face = "italic", size = rel(1.2), hjust = .5)
, strip.text = element_text(face = "bold", size = rel(1.1), color = "white")
, strip.background = element_rect(fill = "black")
)
}
ggplot2
logicshiny
)There are lots of packages for piecing visualizations together
Over the years, I’ve tried:
ggExtra
cowplot
patchwork
Although patchwork
wins by a landslide (imo), each has helpful unique features, so I’ll show you elements of each
ggplot2
extensions: https://exts.ggplot2.tidyverse.org/gallery/
ggExtra
patchwork
cowplot
(and lots of assortments)ggExtra
ggExtra
because it will help us create plots with distributions in the margins.load(url("https://github.com/emoriebeck/psc290-data-viz-2022/blob/main/04-week4-associations/04-data/week4-data.RData?raw=true"))
pred_data
# A tibble: 5,021 × 25
study o_value p_year SID p_value age gender grsWages parEdu race
<chr> <fct> <dbl> <chr> <dbl> <dbl> <fct> <dbl> <fct> <fct>
1 Study1 0 2005 61215 6.67 -29.9 1 1.02 2 0
2 Study1 0 2005 184965 0 -22.9 0 1.14 2 0
3 Study1 0 2005 488251 10 -3.92 1 0.717 1 0
4 Study1 0 2005 650779 7.22 -25.9 1 0.644 3 0
5 Study1 0 2005 969691 7.22 -0.925 1 0.812 2 0
6 Study1 0 2005 986687 6.11 14.1 0 1.76 2 0
7 Study1 0 2005 1054011 5.56 8.08 0 1.34 1 0
8 Study1 0 2005 1372251 7.78 5.08 1 0.842 1 0
9 Study1 0 2005 1496703 6.11 -23.9 0 1.42 2 0
10 Study1 0 2005 1897887 2.78 38.1 1 0.725 2 0
# ℹ 5,011 more rows
# ℹ 15 more variables: physhlthevnt <fct>, SRhealth <dbl>, smokes <fct>,
# alcohol <fct>, exercise <dbl>, BMI <dbl>, parDivorce <fct>, PhysFunc <fct>,
# religion <fct>, education <fct>, married <fct>, numKids <dbl>,
# parOccPrstg <dbl>, reliability <dbl>, predInt <dbl>
ggExtra
Let’s plot the association between conscientiousness and self-rated health across genders in Study 1:
p <- pred_data %>%
filter(study == "Study1") %>%
ggplot(aes(x = p_value, y = SRhealth, color = gender)) +
geom_point(
, size = 2
, alpha = .5
) +
scale_color_manual(
values = c("cornflowerblue", "coral")
, labels = c("Male", "Female")
) +
labs(
x = "Conscientiousness (POMP, 0-10)"
, y = "Self-Rated Health (POMP, 0-10)"
, color = "Gender"
) +
my_theme()
p
ggExtra
ggExtra
To get marginal distributions, we can just use ggExtra::ggMarginal()
ggExtra
color
ggExtra
fill
ggExtra
ggExtra
cowplot
+ pathwork
cowplot
or patchwork
?
R
plots and ggplot2
plotspx <- pred_data %>%
filter(study == "Study1") %>%
ggplot(aes(x = p_value, fill = gender, color = gender)) +
geom_density(alpha = .5) +
scale_color_manual(
values = c("cornflowerblue", "coral")
, labels = c("Male", "Female")
) +
scale_fill_manual(
values = c("cornflowerblue", "coral")
, labels = c("Male", "Female")
) +
labs(fill = "Gender", color = "Gender") +
theme_void()
px
py <- pred_data %>%
filter(study == "Study1") %>%
ggplot(aes(x = SRhealth, fill = gender, color = gender)) +
geom_density(alpha = .5) +
scale_color_manual(
values = c("cornflowerblue", "coral")
, labels = c("Male", "Female")
) +
scale_fill_manual(
values = c("cornflowerblue", "coral")
, labels = c("Male", "Female")
) +
labs(fill = "Gender", color = "Gender") +
coord_flip() +
theme_void()
py
We can use the +
and /
operators to arrange them:
px <- pred_data %>%
filter(study == "Study1") %>%
ggplot(aes(x = p_value, y = gender, fill = gender, color = gender)) +
geom_boxplot(alpha = .5) +
geom_jitter(aes(y = gender), alpha = .5) +
scale_color_manual(
values = c("cornflowerblue", "coral")
, labels = c("Male", "Female")
) +
scale_fill_manual(
values = c("cornflowerblue", "coral")
, labels = c("Male", "Female")
) +
labs(fill = "Gender", color = "Gender") +
theme_void() +
theme(legend.position = "none")
px
py <- pred_data %>%
filter(study == "Study1") %>%
ggplot(aes(x = SRhealth, y = gender, fill = gender, color = gender)) +
geom_boxplot(alpha = .5) +
geom_jitter(aes(y = gender), alpha = .5) +
scale_color_manual(
values = c("cornflowerblue", "coral")
, labels = c("Male", "Female")
) +
scale_fill_manual(
values = c("cornflowerblue", "coral")
, labels = c("Male", "Female")
) +
labs(fill = "Gender", color = "Gender") +
coord_flip() +
theme_void() +
theme(legend.position = "none")
py
Let me show you a couple of examples from my work that has used cowplot
or patchwork
And remember these models?
tidy_ci <- function(m) tidy(m, conf.int = T)
nested_m <- pred_data %>%
group_by(study) %>%
nest() %>%
ungroup() %>%
mutate(
m = map(data
, ~glm(
o_value ~ p_value
, data = .
, family = binomial(link = "logit")
)
)
, tidy = map(m, tidy_ci)
)
nested_m
# A tibble: 6 × 4
study data m tidy
<chr> <list> <list> <list>
1 Study1 <tibble [831 × 24]> <glm> <tibble [2 × 7]>
2 Study2 <tibble [1,000 × 24]> <glm> <tibble [2 × 7]>
3 Study3 <tibble [1,000 × 24]> <glm> <tibble [2 × 7]>
4 Study4 <tibble [574 × 24]> <glm> <tibble [2 × 7]>
5 Study5 <tibble [616 × 24]> <glm> <tibble [2 × 7]>
6 Study6 <tibble [1,000 × 24]> <glm> <tibble [2 × 7]>
m_fun <- function(d) {
glm(o_value ~ p_value + married + married:p_value
, data = d
, family = binomial(link = "logit"))
}
tidy_ci <- function(m) tidy(m, conf.int = T) %>% mutate(df.resid = m$df.residual, n = nrow(m$data))
nested_m <- pred_data %>%
group_by(study) %>%
nest() %>%
ungroup() %>%
mutate(
m = map(data, m_fun)
, tidy = map(m, tidy_ci)
)
nested_m
# A tibble: 6 × 4
study data m tidy
<chr> <list> <list> <list>
1 Study1 <tibble [831 × 24]> <glm> <tibble [4 × 9]>
2 Study2 <tibble [1,000 × 24]> <glm> <tibble [4 × 9]>
3 Study3 <tibble [1,000 × 24]> <glm> <tibble [4 × 9]>
4 Study4 <tibble [574 × 24]> <glm> <tibble [4 × 9]>
5 Study5 <tibble [616 × 24]> <glm> <tibble [4 × 9]>
6 Study6 <tibble [1,000 × 24]> <glm> <tibble [4 × 9]>
Here’s our unnested model terms
nested_m %>%
select(study, tidy) %>%
unnest(tidy) %>%
mutate_at(vars(estimate, conf.low, conf.high), exp)
# A tibble: 24 × 10
study term estimate std.error statistic p.value conf.low conf.high df.resid
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
1 Study1 (Int… 1.33 0.391 0.727 0.467 0.618 2.87 827
2 Study1 p_va… 0.900 0.0572 -1.84 0.0655 0.804 1.01 827
3 Study1 marr… 0.926 0.524 -0.147 0.883 0.330 2.59 827
4 Study1 p_va… 1.02 0.0745 0.238 0.812 0.880 1.18 827
5 Study2 (Int… 0.705 1.59 -0.220 0.826 0.0250 18.1 992
6 Study2 p_va… 1.09 0.218 0.376 0.707 0.702 1.71 992
7 Study2 marr… 6.40 1.62 1.14 0.253 0.237 190. 992
8 Study2 p_va… 0.758 0.221 -1.25 0.211 0.478 1.18 992
9 Study3 (Int… 6.03 1.20 1.49 0.135 0.581 68.2 996
10 Study3 p_va… 0.706 0.156 -2.23 0.0256 0.514 0.952 996
# ℹ 14 more rows
# ℹ 1 more variable: n <int>
But maybe we are particularly interested in the interaction between marital status and personality in predicting mortality, which we want to plot as a forest plot
nested_m %>%
select(study, tidy) %>%
unnest(tidy) %>%
mutate_at(vars(estimate, conf.low, conf.high), exp) %>%
filter(term == "p_value:married1")
# A tibble: 6 × 10
study term estimate std.error statistic p.value conf.low conf.high df.resid
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
1 Study1 p_val… 1.02 0.0745 0.238 0.812 0.880 1.18 827
2 Study2 p_val… 0.758 0.221 -1.25 0.211 0.478 1.18 992
3 Study3 p_val… 1.21 0.161 1.17 0.242 0.886 1.68 996
4 Study4 p_val… 0.824 0.284 -0.683 0.495 0.471 1.46 570
5 Study5 p_val… 0.618 0.159 -3.04 0.00239 0.449 0.838 612
6 Study6 p_val… 0.957 0.109 -0.407 0.684 0.773 1.19 996
# ℹ 1 more variable: n <int>
Let’s add our point estimates and uncertainty intervals
But we want to order the points by the effect sizes:
p1 <- nested_m %>%
select(study, tidy) %>%
unnest(tidy) %>%
mutate_at(vars(estimate, conf.low, conf.high), exp) %>%
filter(term == "p_value:married1") %>%
arrange(desc(estimate)) %>%
mutate(study = fct_inorder(study)) %>%
ggplot(aes(x = estimate, y = study)) +
labs(
x = "Model Estimated OR (CI)"
, y = NULL
) +
my_theme()
p1
Let’s add our point estimates and uncertainty intervals back in
And add in a vertical line at OR = 1:
cowplot
or patchwork
p2 <- nested_m %>%
select(study, tidy) %>%
unnest(tidy) %>%
mutate_at(vars(estimate, conf.low, conf.high), exp) %>%
filter(term == "p_value:married1") %>%
arrange(desc(estimate)) %>%
mutate(
study = fct_inorder(study)
, study2 = 1:n()
) %>%
mutate_at(vars(estimate, conf.low, conf.high), ~sprintf("%.2f", .)) %>%
mutate(
est = sprintf("%s [%s, %s]", estimate, conf.low, conf.high)
, n = as.character(n)
) %>%
select(study, study2, estimate, n, est) %>%
pivot_longer(
cols = c(est, n)
, values_to = "lab"
, names_to = "est"
)
p2
# A tibble: 12 × 5
study study2 estimate est lab
<fct> <int> <chr> <chr> <chr>
1 Study1 1 1.02 est 1.02 [0.88, 1.18]
2 Study1 1 1.02 n 831
3 Study2 2 0.76 est 0.76 [0.48, 1.18]
4 Study2 2 0.76 n 1000
5 Study3 3 1.21 est 1.21 [0.89, 1.68]
6 Study3 3 1.21 n 1000
7 Study4 4 0.82 est 0.82 [0.47, 1.46]
8 Study4 4 0.82 n 574
9 Study5 5 0.62 est 0.62 [0.45, 0.84]
10 Study5 5 0.62 n 616
11 Study6 6 0.96 est 0.96 [0.77, 1.19]
12 Study6 6 0.96 n 1000
Let’s build our base
Add in the text:
Set the theme
We’ll add a horizontal line at the top and bottom to match the forest plot:
Add the column labels:
We need a little margin on the top and bottom:
p1 <- nested_m %>% select(study, tidy) %>%
unnest(tidy) %>%
mutate_at(vars(estimate, conf.low, conf.high), exp) %>%
filter(term == "p_value:married1") %>%
arrange(desc(estimate)) %>%
mutate(study = fct_inorder(study)
, study2 = 1:n()) %>%
ggplot(aes(x = estimate, y = study2)) +
labs(
x = "Model Estimated OR (CI)"
, y = NULL
) +
my_theme()
p1
Add our point estimates and uncertainty intervals, along with the vertical line at OR = 1
Change the y scale back
Add in that top bar:
Remove the y axis line
annotate()
rectangles to block out portions of the plot.patchwork
patchwork
: Piecing the Plots Togetherannotate()
is a great tool for thisscale_[map]_[type]
functions, especially given the labels can be anything we want!theme
elements also let us hack many more parts!ggplot2
is simply having lots of tricks up your sleeve, which come from knowledge (and StackOverflow)patchwork
: Piecing the Plots Togetherpatchwork
is great, and a little more intuitive for simple use casespatchwork
allows you to use the +
to piece plots together and makes a lot of default assumptions about alignmentcowplot
doesn’t do (easily)patchwork
: Piecing the Plots TogetherWe can just use the +
operator!
patchwork
: Piecing the Plots TogetherWe can also add rows using the /
patchwork
: Piecing the Plots TogetherAnd change their arrangement using plot_layout()
patchwork
: Piecing the Plots TogetherAnd change their arrangement using plot_layout()
patchwork
: Piecing the Plots TogetherWe can add titles using plot_annotation()
patchwork
: Piecing the Plots TogetherWe can add labels to plot using plot_annotation()
patchwork
: Piecing the Plots TogetherWe can add labels to plot using plot_annotation()
# A tibble: 6 × 5
study data m tidy pred
<chr> <list> <list> <list> <list>
1 Study1 <tibble [831 × 24]> <glm> <tibble [4 × 9]> <tibble [200 × 4]>
2 Study2 <tibble [1,000 × 24]> <glm> <tibble [4 × 9]> <tibble [200 × 4]>
3 Study3 <tibble [1,000 × 24]> <glm> <tibble [4 × 9]> <tibble [200 × 4]>
4 Study4 <tibble [574 × 24]> <glm> <tibble [4 × 9]> <tibble [200 × 4]>
5 Study5 <tibble [616 × 24]> <glm> <tibble [4 × 9]> <tibble [200 × 4]>
6 Study6 <tibble [1,000 × 24]> <glm> <tibble [4 × 9]> <tibble [200 × 4]>
nested_m %>%
mutate(df.resid = map_dbl(m, df.residual)) %>%
select(study, pred, df.resid) %>%
unnest(pred)
# A tibble: 1,200 × 6
study married p_value .fitted .se.fit df.resid
<chr> <fct> <dbl> <dbl> <dbl> <dbl>
1 Study1 0 0 0.571 0.0957 827
2 Study1 0 0.101 0.568 0.0945 827
3 Study1 0 0.202 0.565 0.0933 827
4 Study1 0 0.303 0.563 0.0920 827
5 Study1 0 0.404 0.560 0.0908 827
6 Study1 0 0.505 0.557 0.0896 827
7 Study1 0 0.606 0.555 0.0883 827
8 Study1 0 0.707 0.552 0.0871 827
9 Study1 0 0.808 0.550 0.0858 827
10 Study1 0 0.909 0.547 0.0845 827
# ℹ 1,190 more rows
Let’s set up the core part of the simple effects
p3 <- nested_m %>%
mutate(df.resid = map_dbl(m, df.residual)) %>%
select(study, pred, df.resid) %>%
unnest(pred) %>%
mutate(married = factor(married, c(0,1), c("Never Married", "Married"))) %>%
ggplot(aes(x = p_value, y = .fitted, fill = study, color = study)) +
labs(x = "Conscientiousness (POMP, 0-10)"
, y = "Predicted Odds Ratio\nof Mortality (95% CI)"
, fill = NULL
, color = NULL) +
facet_grid(~married) +
my_theme()
p3
Add in our lineribbon:
patchwork
, cowplot
also adds some other new tools to our repertoire:
ggdraw()
draw_label()
draw_plot_label()
draw_grob()
draw_image()
ggdraw()
+ draw_label()
ggdraw()
is more or a setup function that allows us to add grobs on topdraw_label()
to make our title (just some text to put on the plot)ggdraw()
+ draw_label()
It’d be nice if the title was centered, right?
ggdraw()
+ draw_label()
It’d be nice if the title was centered, right?
ggdraw()
+ draw_label()
We could use cowplot::draw_label()
to add a title
and subtitle
to our plot:
ggdraw()
+ draw_label()
draw_label()
draw_label()
is meant to be a better wrapper for geom_text()
that requires less customizationgeom_text()
would require 10+ arguments and has no easy application to figures put together with cowplot (or other packages for doing so)draw_label()
draw_label()
Imagine you want to put a plot inside of another
inset <-
pred_data %>%
filter(study == "Study1") %>%
ggplot(aes(y = gender, x = SRhealth, fill = gender)) +
scale_fill_manual(values = c("cornflowerblue", "coral")) +
scale_y_discrete(labels = c("Male", "Female")) +
stat_halfeye(alpha = .8) +
my_theme() +
theme(legend.position = "none") + theme_half_open(12)
draw_label()
Imagine you want to put a plot inside of another
draw_label()
We can also add images!
draw_label()
We can also add images!
cowplot::plot_grid()
plot_grid()
cowplot
is plot_grid()
, which allows us to place differnt figures within the same figure in a grid, and it has a lot of useful arguments+
, /
in `patchworkplotlist = NULL
align = c("none", "h", "v", "hv")
axis = c("none", "l", "r", "t", "b", "lr", "tb", "tblr")
nrow = NULL
ncol = NULL
rel_widths = 1
rel_heights = 1
labels = NULL
label_size = 14
label_fontfamily = NULL
label_fontface = "bold"
label_colour = NULL
label_x = 0
label_y = 1
hjust = -0.5
vjust = 1.5
scale = 1
greedy = TRUE
byrow = TRUE
cols = NULL
rows = NULL
plot_grid()
plotlist = NULL
align = c("none", "h", "v", "hv")
axis = c("none", "l", "r", "t", "b", "lr", "tb", "tblr")
nrow = NULL
ncol = NULL
rel_widths = 1
rel_heights = 1
labels = NULL
label_size = 14
label_fontfamily = NULL
label_fontface = "bold"
label_colour = NULL
label_x = 0
label_y = 1
hjust = -0.5
vjust = 1.5
scale = 1
greedy = TRUE
byrow = TRUE
cols = NULL
rows = NULL
plot_grid()
plot_grid()
"hv"
leads to odd spacingplot_grid()
plot_grid()
plot_grid()
rel_heights
plot_grid()
: Labelsplot_grid()
: Labelsplot_grid()
: Labelsplot_grid()
: Labelsplot_grid()
: LabelsPSC 290 - Data Visualization