my_theme <- function(){
theme_bw() +
theme(
legend.position = "bottom"
, legend.title = element_text(face = "bold", size = rel(1))
, legend.text = element_text(face = "italic", size = rel(1))
, axis.text = element_text(face = "bold", size = rel(1.1), color = "black")
, axis.title = element_text(face = "bold", size = rel(1.2))
, plot.title = element_text(face = "bold", size = rel(1.2), hjust = .5)
, plot.subtitle = element_text(face = "italic", size = rel(1.2), hjust = .5)
, strip.text = element_text(face = "bold", size = rel(1.1), color = "white")
, strip.background = element_rect(fill = "black")
)
}ggplot2 logicshiny)There are lots of packages for piecing visualizations together
Over the years, I’ve tried:
ggExtracowplotpatchworkAlthough patchwork wins by a landslide (imo), each has helpful unique features, so I’ll show you elements of each
ggplot2 extensions: https://exts.ggplot2.tidyverse.org/gallery/
ggExtrapatchworkcowplot (and lots of assortments)ggExtraggExtra because it will help us create plots with distributions in the margins.load(url("https://github.com/emoriebeck/psc290-data-viz-2022/blob/main/04-week4-associations/04-data/week4-data.RData?raw=true"))
pred_data# A tibble: 5,021 × 25
study o_value p_year SID p_value age gender grsWages parEdu race
<chr> <fct> <dbl> <chr> <dbl> <dbl> <fct> <dbl> <fct> <fct>
1 Study1 0 2005 61215 6.67 -29.9 1 1.02 2 0
2 Study1 0 2005 184965 0 -22.9 0 1.14 2 0
3 Study1 0 2005 488251 10 -3.92 1 0.717 1 0
4 Study1 0 2005 650779 7.22 -25.9 1 0.644 3 0
5 Study1 0 2005 969691 7.22 -0.925 1 0.812 2 0
6 Study1 0 2005 986687 6.11 14.1 0 1.76 2 0
7 Study1 0 2005 1054011 5.56 8.08 0 1.34 1 0
8 Study1 0 2005 1372251 7.78 5.08 1 0.842 1 0
9 Study1 0 2005 1496703 6.11 -23.9 0 1.42 2 0
10 Study1 0 2005 1897887 2.78 38.1 1 0.725 2 0
# ℹ 5,011 more rows
# ℹ 15 more variables: physhlthevnt <fct>, SRhealth <dbl>, smokes <fct>,
# alcohol <fct>, exercise <dbl>, BMI <dbl>, parDivorce <fct>, PhysFunc <fct>,
# religion <fct>, education <fct>, married <fct>, numKids <dbl>,
# parOccPrstg <dbl>, reliability <dbl>, predInt <dbl>
ggExtraLet’s plot the association between conscientiousness and self-rated health across genders in Study 1:
p <- pred_data %>%
filter(study == "Study1") %>%
ggplot(aes(x = p_value, y = SRhealth, color = gender)) +
geom_point(
, size = 2
, alpha = .5
) +
scale_color_manual(
values = c("cornflowerblue", "coral")
, labels = c("Male", "Female")
) +
labs(
x = "Conscientiousness (POMP, 0-10)"
, y = "Self-Rated Health (POMP, 0-10)"
, color = "Gender"
) +
my_theme()
pggExtraggExtraTo get marginal distributions, we can just use ggExtra::ggMarginal()
ggExtracolor
ggExtrafill
ggExtraggExtracowplot + pathwork
cowplot or patchwork?
R plots and ggplot2 plotspx <- pred_data %>%
filter(study == "Study1") %>%
ggplot(aes(x = p_value, fill = gender, color = gender)) +
geom_density(alpha = .5) +
scale_color_manual(
values = c("cornflowerblue", "coral")
, labels = c("Male", "Female")
) +
scale_fill_manual(
values = c("cornflowerblue", "coral")
, labels = c("Male", "Female")
) +
labs(fill = "Gender", color = "Gender") +
theme_void()
pxpy <- pred_data %>%
filter(study == "Study1") %>%
ggplot(aes(x = SRhealth, fill = gender, color = gender)) +
geom_density(alpha = .5) +
scale_color_manual(
values = c("cornflowerblue", "coral")
, labels = c("Male", "Female")
) +
scale_fill_manual(
values = c("cornflowerblue", "coral")
, labels = c("Male", "Female")
) +
labs(fill = "Gender", color = "Gender") +
coord_flip() +
theme_void()
pyWe can use the + and / operators to arrange them:
px <- pred_data %>%
filter(study == "Study1") %>%
ggplot(aes(x = p_value, y = gender, fill = gender, color = gender)) +
geom_boxplot(alpha = .5) +
geom_jitter(aes(y = gender), alpha = .5) +
scale_color_manual(
values = c("cornflowerblue", "coral")
, labels = c("Male", "Female")
) +
scale_fill_manual(
values = c("cornflowerblue", "coral")
, labels = c("Male", "Female")
) +
labs(fill = "Gender", color = "Gender") +
theme_void() +
theme(legend.position = "none")
pxpy <- pred_data %>%
filter(study == "Study1") %>%
ggplot(aes(x = SRhealth, y = gender, fill = gender, color = gender)) +
geom_boxplot(alpha = .5) +
geom_jitter(aes(y = gender), alpha = .5) +
scale_color_manual(
values = c("cornflowerblue", "coral")
, labels = c("Male", "Female")
) +
scale_fill_manual(
values = c("cornflowerblue", "coral")
, labels = c("Male", "Female")
) +
labs(fill = "Gender", color = "Gender") +
coord_flip() +
theme_void() +
theme(legend.position = "none")
pyLet me show you a couple of examples from my work that has used cowplot or patchwork
And remember these models?
tidy_ci <- function(m) tidy(m, conf.int = T)
nested_m <- pred_data %>%
group_by(study) %>%
nest() %>%
ungroup() %>%
mutate(
m = map(data
, ~glm(
o_value ~ p_value
, data = .
, family = binomial(link = "logit")
)
)
, tidy = map(m, tidy_ci)
)
nested_m# A tibble: 6 × 4
study data m tidy
<chr> <list> <list> <list>
1 Study1 <tibble [831 × 24]> <glm> <tibble [2 × 7]>
2 Study2 <tibble [1,000 × 24]> <glm> <tibble [2 × 7]>
3 Study3 <tibble [1,000 × 24]> <glm> <tibble [2 × 7]>
4 Study4 <tibble [574 × 24]> <glm> <tibble [2 × 7]>
5 Study5 <tibble [616 × 24]> <glm> <tibble [2 × 7]>
6 Study6 <tibble [1,000 × 24]> <glm> <tibble [2 × 7]>
m_fun <- function(d) {
glm(o_value ~ p_value + married + married:p_value
, data = d
, family = binomial(link = "logit"))
}
tidy_ci <- function(m) tidy(m, conf.int = T) %>% mutate(df.resid = m$df.residual, n = nrow(m$data))
nested_m <- pred_data %>%
group_by(study) %>%
nest() %>%
ungroup() %>%
mutate(
m = map(data, m_fun)
, tidy = map(m, tidy_ci)
)
nested_m# A tibble: 6 × 4
study data m tidy
<chr> <list> <list> <list>
1 Study1 <tibble [831 × 24]> <glm> <tibble [4 × 9]>
2 Study2 <tibble [1,000 × 24]> <glm> <tibble [4 × 9]>
3 Study3 <tibble [1,000 × 24]> <glm> <tibble [4 × 9]>
4 Study4 <tibble [574 × 24]> <glm> <tibble [4 × 9]>
5 Study5 <tibble [616 × 24]> <glm> <tibble [4 × 9]>
6 Study6 <tibble [1,000 × 24]> <glm> <tibble [4 × 9]>
Here’s our unnested model terms
nested_m %>%
select(study, tidy) %>%
unnest(tidy) %>%
mutate_at(vars(estimate, conf.low, conf.high), exp)# A tibble: 24 × 10
study term estimate std.error statistic p.value conf.low conf.high df.resid
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
1 Study1 (Int… 1.33 0.391 0.727 0.467 0.618 2.87 827
2 Study1 p_va… 0.900 0.0572 -1.84 0.0655 0.804 1.01 827
3 Study1 marr… 0.926 0.524 -0.147 0.883 0.330 2.59 827
4 Study1 p_va… 1.02 0.0745 0.238 0.812 0.880 1.18 827
5 Study2 (Int… 0.705 1.59 -0.220 0.826 0.0250 18.1 992
6 Study2 p_va… 1.09 0.218 0.376 0.707 0.702 1.71 992
7 Study2 marr… 6.40 1.62 1.14 0.253 0.237 190. 992
8 Study2 p_va… 0.758 0.221 -1.25 0.211 0.478 1.18 992
9 Study3 (Int… 6.03 1.20 1.49 0.135 0.581 68.2 996
10 Study3 p_va… 0.706 0.156 -2.23 0.0256 0.514 0.952 996
# ℹ 14 more rows
# ℹ 1 more variable: n <int>
But maybe we are particularly interested in the interaction between marital status and personality in predicting mortality, which we want to plot as a forest plot
nested_m %>%
select(study, tidy) %>%
unnest(tidy) %>%
mutate_at(vars(estimate, conf.low, conf.high), exp) %>%
filter(term == "p_value:married1")# A tibble: 6 × 10
study term estimate std.error statistic p.value conf.low conf.high df.resid
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
1 Study1 p_val… 1.02 0.0745 0.238 0.812 0.880 1.18 827
2 Study2 p_val… 0.758 0.221 -1.25 0.211 0.478 1.18 992
3 Study3 p_val… 1.21 0.161 1.17 0.242 0.886 1.68 996
4 Study4 p_val… 0.824 0.284 -0.683 0.495 0.471 1.46 570
5 Study5 p_val… 0.618 0.159 -3.04 0.00239 0.449 0.838 612
6 Study6 p_val… 0.957 0.109 -0.407 0.684 0.773 1.19 996
# ℹ 1 more variable: n <int>
Let’s add our point estimates and uncertainty intervals
But we want to order the points by the effect sizes:
p1 <- nested_m %>%
select(study, tidy) %>%
unnest(tidy) %>%
mutate_at(vars(estimate, conf.low, conf.high), exp) %>%
filter(term == "p_value:married1") %>%
arrange(desc(estimate)) %>%
mutate(study = fct_inorder(study)) %>%
ggplot(aes(x = estimate, y = study)) +
labs(
x = "Model Estimated OR (CI)"
, y = NULL
) +
my_theme()
p1Let’s add our point estimates and uncertainty intervals back in
And add in a vertical line at OR = 1:
cowplot or patchwork
p2 <- nested_m %>%
select(study, tidy) %>%
unnest(tidy) %>%
mutate_at(vars(estimate, conf.low, conf.high), exp) %>%
filter(term == "p_value:married1") %>%
arrange(desc(estimate)) %>%
mutate(
study = fct_inorder(study)
, study2 = 1:n()
) %>%
mutate_at(vars(estimate, conf.low, conf.high), ~sprintf("%.2f", .)) %>%
mutate(
est = sprintf("%s [%s, %s]", estimate, conf.low, conf.high)
, n = as.character(n)
) %>%
select(study, study2, estimate, n, est) %>%
pivot_longer(
cols = c(est, n)
, values_to = "lab"
, names_to = "est"
)
p2# A tibble: 12 × 5
study study2 estimate est lab
<fct> <int> <chr> <chr> <chr>
1 Study1 1 1.02 est 1.02 [0.88, 1.18]
2 Study1 1 1.02 n 831
3 Study2 2 0.76 est 0.76 [0.48, 1.18]
4 Study2 2 0.76 n 1000
5 Study3 3 1.21 est 1.21 [0.89, 1.68]
6 Study3 3 1.21 n 1000
7 Study4 4 0.82 est 0.82 [0.47, 1.46]
8 Study4 4 0.82 n 574
9 Study5 5 0.62 est 0.62 [0.45, 0.84]
10 Study5 5 0.62 n 616
11 Study6 6 0.96 est 0.96 [0.77, 1.19]
12 Study6 6 0.96 n 1000
Let’s build our base
Add in the text:
Set the theme
We’ll add a horizontal line at the top and bottom to match the forest plot:
Add the column labels:
We need a little margin on the top and bottom:
p1 <- nested_m %>% select(study, tidy) %>%
unnest(tidy) %>%
mutate_at(vars(estimate, conf.low, conf.high), exp) %>%
filter(term == "p_value:married1") %>%
arrange(desc(estimate)) %>%
mutate(study = fct_inorder(study)
, study2 = 1:n()) %>%
ggplot(aes(x = estimate, y = study2)) +
labs(
x = "Model Estimated OR (CI)"
, y = NULL
) +
my_theme()
p1Add our point estimates and uncertainty intervals, along with the vertical line at OR = 1
Change the y scale back
Add in that top bar:
Remove the y axis line
annotate() rectangles to block out portions of the plot.patchworkpatchwork: Piecing the Plots Togetherannotate() is a great tool for thisscale_[map]_[type] functions, especially given the labels can be anything we want!theme elements also let us hack many more parts!ggplot2 is simply having lots of tricks up your sleeve, which come from knowledge (and StackOverflow)patchwork: Piecing the Plots Togetherpatchwork is great, and a little more intuitive for simple use casespatchwork allows you to use the + to piece plots together and makes a lot of default assumptions about alignmentcowplot doesn’t do (easily)patchwork: Piecing the Plots TogetherWe can just use the + operator!
patchwork: Piecing the Plots TogetherWe can also add rows using the /
patchwork: Piecing the Plots TogetherAnd change their arrangement using plot_layout()
patchwork: Piecing the Plots TogetherAnd change their arrangement using plot_layout()
patchwork: Piecing the Plots TogetherWe can add titles using plot_annotation()
patchwork: Piecing the Plots TogetherWe can add labels to plot using plot_annotation()
patchwork: Piecing the Plots TogetherWe can add labels to plot using plot_annotation()
# A tibble: 6 × 5
study data m tidy pred
<chr> <list> <list> <list> <list>
1 Study1 <tibble [831 × 24]> <glm> <tibble [4 × 9]> <tibble [200 × 4]>
2 Study2 <tibble [1,000 × 24]> <glm> <tibble [4 × 9]> <tibble [200 × 4]>
3 Study3 <tibble [1,000 × 24]> <glm> <tibble [4 × 9]> <tibble [200 × 4]>
4 Study4 <tibble [574 × 24]> <glm> <tibble [4 × 9]> <tibble [200 × 4]>
5 Study5 <tibble [616 × 24]> <glm> <tibble [4 × 9]> <tibble [200 × 4]>
6 Study6 <tibble [1,000 × 24]> <glm> <tibble [4 × 9]> <tibble [200 × 4]>
nested_m %>%
mutate(df.resid = map_dbl(m, df.residual)) %>%
select(study, pred, df.resid) %>%
unnest(pred) # A tibble: 1,200 × 6
study married p_value .fitted .se.fit df.resid
<chr> <fct> <dbl> <dbl> <dbl> <dbl>
1 Study1 0 0 0.571 0.0957 827
2 Study1 0 0.101 0.568 0.0945 827
3 Study1 0 0.202 0.565 0.0933 827
4 Study1 0 0.303 0.563 0.0920 827
5 Study1 0 0.404 0.560 0.0908 827
6 Study1 0 0.505 0.557 0.0896 827
7 Study1 0 0.606 0.555 0.0883 827
8 Study1 0 0.707 0.552 0.0871 827
9 Study1 0 0.808 0.550 0.0858 827
10 Study1 0 0.909 0.547 0.0845 827
# ℹ 1,190 more rows
Let’s set up the core part of the simple effects
p3 <- nested_m %>%
mutate(df.resid = map_dbl(m, df.residual)) %>%
select(study, pred, df.resid) %>%
unnest(pred) %>%
mutate(married = factor(married, c(0,1), c("Never Married", "Married"))) %>%
ggplot(aes(x = p_value, y = .fitted, fill = study, color = study)) +
labs(x = "Conscientiousness (POMP, 0-10)"
, y = "Predicted Odds Ratio\nof Mortality (95% CI)"
, fill = NULL
, color = NULL) +
facet_grid(~married) +
my_theme()
p3Add in our lineribbon:
patchwork, cowplot also adds some other new tools to our repertoire:
ggdraw()draw_label()draw_plot_label()draw_grob()draw_image()ggdraw() + draw_label()ggdraw() is more or a setup function that allows us to add grobs on topdraw_label() to make our title (just some text to put on the plot)ggdraw() + draw_label()It’d be nice if the title was centered, right?
ggdraw() + draw_label()It’d be nice if the title was centered, right?
ggdraw() + draw_label()We could use cowplot::draw_label() to add a title and subtitle to our plot:
ggdraw() + draw_label()draw_label()draw_label() is meant to be a better wrapper for geom_text() that requires less customizationgeom_text() would require 10+ arguments and has no easy application to figures put together with cowplot (or other packages for doing so)draw_label()draw_label()Imagine you want to put a plot inside of another
inset <-
pred_data %>%
filter(study == "Study1") %>%
ggplot(aes(y = gender, x = SRhealth, fill = gender)) +
scale_fill_manual(values = c("cornflowerblue", "coral")) +
scale_y_discrete(labels = c("Male", "Female")) +
stat_halfeye(alpha = .8) +
my_theme() +
theme(legend.position = "none") + theme_half_open(12)draw_label()Imagine you want to put a plot inside of another
draw_label()We can also add images!
draw_label()We can also add images!
cowplot::plot_grid()
plot_grid()cowplot is plot_grid(), which allows us to place differnt figures within the same figure in a grid, and it has a lot of useful arguments+, / in `patchworkplotlist = NULLalign = c("none", "h", "v", "hv")axis = c("none", "l", "r", "t", "b", "lr", "tb", "tblr")nrow = NULLncol = NULLrel_widths = 1rel_heights = 1labels = NULLlabel_size = 14label_fontfamily = NULLlabel_fontface = "bold"label_colour = NULLlabel_x = 0label_y = 1hjust = -0.5vjust = 1.5scale = 1greedy = TRUEbyrow = TRUEcols = NULLrows = NULLplot_grid()plotlist = NULLalign = c("none", "h", "v", "hv")axis = c("none", "l", "r", "t", "b", "lr", "tb", "tblr")nrow = NULLncol = NULLrel_widths = 1rel_heights = 1labels = NULLlabel_size = 14label_fontfamily = NULLlabel_fontface = "bold"label_colour = NULLlabel_x = 0label_y = 1hjust = -0.5vjust = 1.5scale = 1greedy = TRUEbyrow = TRUEcols = NULLrows = NULLplot_grid()plot_grid()"hv" leads to odd spacingplot_grid()plot_grid()plot_grid()rel_heights
plot_grid(): Labelsplot_grid(): Labelsplot_grid(): Labelsplot_grid(): Labelsplot_grid(): LabelsPSC 290 - Data Visualization