From last week:
ggplot()
and aes()
scale_x_continuous()
and scale_fill_manual()
geom_point()
and geom_line()
facet_wrap()
and facet_grid()
theme_classic()
theme()
(legend
, strip
, axis
, plot
, panel
)cbsafe_pal <- tribble(
~name, ~rgb
, "black", c(0, 0, 0)
, "sky blue", c(86, 180, 233)
, "bluish green", c(0, 158, 115)
, "yellow", c(240, 228, 66)
, "orange", c(230, 159, 0)
, "blue", c(0, 114, 178)
, "vermillion", c(213, 94, 0)
, "reddish purple", c(204, 121, 167)
) %>%
mutate(hex = map_chr(rgb, function(x) rgb(x[1], x[2], x[3], maxColorValue = 255)))
cbsafe_pal
# A tibble: 8 × 3
name rgb hex
<chr> <list> <chr>
1 black <dbl [3]> #000000
2 sky blue <dbl [3]> #56B4E9
3 bluish green <dbl [3]> #009E73
4 yellow <dbl [3]> #F0E442
5 orange <dbl [3]> #E69F00
6 blue <dbl [3]> #0072B2
7 vermillion <dbl [3]> #D55E00
8 reddish purple <dbl [3]> #CC79A7
load(url("https://github.com/emoriebeck/psc290-data-viz-2022/raw/main/03-week3-proportions/04-data/gsoep.RData"))
gsoep
# A tibble: 480,798 × 10
year SID SRhealth marital chldbrth gender yearBrth mortality job age
<dbl> <dbl> <dbl> <dbl> <dbl> <hvn_lb> <dbl> <dbl> <dbl> <dbl>
1 1984 901 NA NA NA 2 1951 0 NA 33
2 1984 1001 NA NA NA 2 1913 0 NA 71
3 1984 1101 NA NA NA 2 1906 0 NA 78
4 1984 1201 NA NA NA 1 1911 0 NA 73
5 1984 1202 NA NA NA 2 1913 0 NA 71
6 1984 1301 NA NA NA 2 1943 0 NA 41
7 1984 1302 NA NA NA 1 1965 0 NA 19
8 1984 1901 NA NA NA 2 1948 0 NA 36
9 1984 2001 NA NA NA 2 1949 0 NA 35
10 1984 2002 NA NA NA 1 1952 0 NA 32
# ℹ 480,788 more rows
ggplot2
doesn’t specifically support pie chartscoord_polar()
geom_bar() + coord_polar()
gsoep %>%
filter(year == 2009 & !is.na(marital)) %>% # random
group_by(marital) %>%
tally() %>%
mutate(marital = factor(
marital
, 1:4
, c("Married", "Separated", "Widowed", "Never Married")
)) %>%
ggplot(aes(x = "", y = n, fill = marital)) +
geom_bar(stat = "identity", width = 1, color = "white") +
coord_polar("y", start = 0) +
theme_void()
gsoep %>%
filter(year == 2009 & !is.na(marital)) %>% # random
group_by(marital) %>%
tally() %>%
mutate(marital = factor(
marital
, 1:4
, c("Married", "Separated", "Widowed", "Never Married")
)) %>%
arrange(desc(marital)) %>%
mutate(prop = n / sum(n) * 100
, ypos = cumsum(prop)- 0.5*prop) %>%
ggplot(aes(x = "", y = prop, fill = marital)) +
geom_bar(stat = "identity", width = 1, color = "white") +
geom_text(
aes(y = ypos, label = marital)
, color = "white"
, size=4
) +
coord_polar("y", start = 0) +
theme_void()
gsoep %>%
filter(year == 2009 & !is.na(marital)) %>% # random
group_by(marital) %>%
tally() %>%
mutate(marital = factor(
marital
, 1:4
, c("Married", "Separated", "Widowed", "Never Married")
)) %>%
arrange(desc(marital)) %>%
mutate(prop = n / sum(n) * 100
, ypos = cumsum(prop)- 0.5*prop) %>%
ggplot(aes(x = "", y = prop, fill = marital)) +
geom_bar(stat = "identity", width = 1, color = "white") +
geom_text(
aes(y = ypos, label = sprintf("%s\n%.1f%%", marital, prop))
, color = "white"
, size=4
) +
scale_fill_manual(values = cbsafe_pal$hex[c(2, 8, 3, 4)]) +
coord_polar("y", start = 0) +
theme_void() +
theme(legend.position = "none")
labs()
function) to the plot that makes it clear what you want the reader to see.geom_text()
for geom_label()
.gsoep %>%
filter(year == 2009 & !is.na(marital)) %>% # random
group_by(marital) %>%
tally() %>%
mutate(marital = factor(marital, 1:4, c("Married", "Separated", "Widowed", "Never Married"))) %>%
arrange(desc(marital)) %>%
mutate(prop = n / sum(n) * 100
, ypos = cumsum(prop)- 0.5*prop) %>%
ggplot(aes(x = "", y = prop, fill = marital)) +
geom_bar(stat = "identity", width = 1, color = "black") +
geom_label(
aes(y = ypos, label = sprintf("%s\n%.1f%%", marital, prop))
, color = "white"
, size = 6
, fontface = 2) +
scale_fill_manual(values = c(rev(brewer.pal(9,"Greens")[c(4,6,8)]), "grey60")) +
coord_polar("y", start = 0) +
labs(
title = "In 2009, the majority of GSOEP participants\nwere or had been married/partnered"
) +
theme_void() +
theme(
legend.position = "none"
, plot.title = element_text(face = "bold.italic", size = rel(1.4), hjust = .5)
)
gsoep %>%
filter(age %in% 18:26 & !is.na(marital)) %>%
group_by(age, marital) %>%
tally() %>%
group_by(age) %>%
mutate(
marital = factor(
marital
, 1:4
, c("Married", "Separated", "Widowed", "Never Married")
)
, age = factor(age)
, prop = n/sum(n)
) %>%
ggplot(aes(x = age, y = prop, fill = marital)) +
geom_col(color = "black") + # geom_bar(stat = "identity", color = "black")
theme_classic()
Let’s improve how this looks. Let’s start by adjusting the color and labels, like last time. Do the following:
scale_y_continuous()
to improve the labels on the y-axis (i.e. change them to intuitive percentages)labs()
to add/improve the title
, subtitle
, x
, and y
labels on the plotlabs()
function) and move the legend to the bottomgsoep %>%
filter(age %in% 18:26 & !is.na(marital)) %>%
group_by(age, marital) %>%
tally() %>%
group_by(age) %>%
mutate(
marital = factor(
marital
, seq(4,1,-1)
, rev(c("Married", "Separated", "Widowed", "Never Married"))
)
, age = factor(age)
, prop = n/sum(n)
) %>%
ggplot(aes(x = age, y = prop, fill = marital)) +
geom_bar(stat = "identity", color = "black") +
scale_fill_manual(values = c("grey80",brewer.pal(9,"Greens")[c(2,4,6)])) +
scale_y_continuous(
limits = c(0,1)
, breaks = seq(0, 1, .25)
, labels = c("0%", "25%", "50%", "75%", "100%")
) +
labs(
x = "Age"
, y = "Percent of Sample"
, title = "Rates of relationships increase in emerging adulthood"
, subtitle = "But most remain unpartnered by 26"
, fill = NULL
) +
theme_classic() +
theme(legend.position = "bottom")
gsoep %>%
filter(age %in% 18:26 & !is.na(marital)) %>%
group_by(age, marital) %>%
tally() %>%
group_by(age) %>%
mutate(marital = factor(marital, seq(4,1,-1), rev(c("Married", "Separated", "Widowed", "Never Married")))
, age = factor(age)
, prop = n/sum(n)) %>%
ggplot(aes(x = age, y = prop, fill = marital)) +
geom_bar(stat = "identity", color = "black") +
scale_fill_manual(values = c("grey80",brewer.pal(9,"Greens")[c(2,4,6)])) +
scale_y_continuous(
limits = c(0,1)
, breaks = seq(0, 1, .25)
, labels = c("0%", "25%", "50%", "75%", "100%")
) +
annotate("text", x = "26", y = .60, label = "Never Married", angle = 90) +
annotate("text", x = "26", y = .13, label = "Married", angle = 90, color = "white") +
labs(
x = "Age"
, y = "Percent of Sample"
, title = "Rates of relationships increase in emerging adulthood"
, subtitle = "But most remain unpartnered by 26"
, fill = NULL
) +
theme_classic() +
theme(legend.position = "bottom")
\n
)gsoep %>%
filter(age %in% 18:26 & !is.na(marital)) %>%
group_by(age, marital) %>%
tally() %>%
group_by(age) %>%
mutate(marital = factor(marital, seq(4,1,-1), rev(c("Married", "Separated", "Widowed", "Never Married")))
, age = factor(age)
, prop = n/sum(n)) %>%
ggplot(aes(x = age, y = prop, fill = marital)) +
geom_bar(stat = "identity", color = "black") +
scale_fill_manual(values = c("grey80",brewer.pal(9,"Greens")[c(2,4,6)])) +
scale_y_continuous(
limits = c(0,1)
, breaks = seq(0, 1, .25)
, labels = c("0%", "25%", "50%", "75%", "100%")
) +
annotate("text", x = "26", y = .60, label = "Never Married", angle = 90) +
annotate("text", x = "26", y = .13, label = "Married", angle = 90, color = "white") +
labs(
x = "Age"
, y = "Percent of Sample"
, title = "Rates of relationships increase in\nemerging adulthood"
, subtitle = "But most remain unpartnered by 26"
, fill = NULL
) +
theme_classic() +
theme(
legend.position = "bottom"
, axis.text = element_text(face = "bold", size = rel(1.1))
, axis.title = element_text(face = "bold", size = rel(1.1))
, plot.title = element_text(face = "bold", size = rel(1.2), hjust = .5)
, plot.subtitle = element_text(face = "italic", size = rel(1.1), hjust = .5)
)
gsoep %>%
filter(year %in% c(2000, 2005, 2010, 2015) & !is.na(marital)) %>% # random
group_by(year, marital) %>%
tally() %>%
mutate(marital = factor(marital, 1:4, c("Married", "Separated", "Widowed", "Never Married"))) %>%
group_by(year) %>%
mutate(prop = n/sum(n)) %>%
ggplot(aes(x = year, y = prop, fill = marital)) +
geom_col(position = "dodge", color = "black") + # geom_bar(stat = "identity", color = "black", position =
theme_classic()
gsoep %>%
filter(year %in% c(2000, 2005, 2010, 2015) & !is.na(marital)) %>% # random
group_by(year, marital) %>%
tally() %>%
mutate(marital = factor(marital, c(1,4,2,3), c("Married", "Never Married", "Separated", "Widowed"))) %>%
group_by(year) %>%
mutate(prop = n/sum(n)) %>%
ggplot(aes(x = year, y = prop, fill = marital)) +
geom_bar(stat = "identity", color = "black", position = "dodge") +
theme_classic()
We could label the bars, but let’s label the axes instead. To do so:
theme(axis.text.x = element_text()
) to adjust the angle of the x-axis labels (hint: use angle
and hjust
).scale_y_continuous()
to interpretable percentages.labs()
:gsoep %>%
filter(year %in% c(2000, 2005, 2010, 2015) & !is.na(marital)) %>% # random
group_by(year, marital) %>%
tally() %>%
mutate(marital = factor(marital, c(1,4,2,3), c("Married", "Never Married", "Separated", "Widowed"))) %>%
group_by(year) %>%
mutate(prop = n/sum(n)) %>%
ggplot(aes(x = marital, y = prop, fill = marital)) +
geom_bar(stat = "identity", color = "black", position = "dodge") +
scale_y_continuous(
limits = c(0,.7), breaks = seq(0,.7, .2), labels = c("0%", "20%", "40%", "60%")
) +
facet_grid(~year) +
labs(
x = NULL
, y = "Percentage of Participants"
, title = "Marital Status Has Remained Consistent Throughout the 21st Century"
) +
theme_classic() +
theme(
legend.position = "none"
, axis.text.x = element_text(angle = 45, hjust = 1)
)
As with the last plot type, let’s use theme() to improve the appearance of our plot. Do the following:
\n
)theme(strip.background = element_rect(fill = [your argument]))
. Note that if you use a dark color, you will also have to change the text color.)gsoep %>%
filter(year %in% c(2000, 2005, 2010, 2015) & !is.na(marital)) %>% # random
group_by(year, marital) %>%
tally() %>%
mutate(marital = factor(marital, c(1,4,2,3), c("Married", "Never Married", "Separated", "Widowed"))) %>%
group_by(year) %>%
mutate(prop = n/sum(n)) %>%
ggplot(aes(x = marital, y = prop, fill = marital)) +
geom_bar(stat = "identity", color = "black", position = "dodge") +
scale_y_continuous(
limits = c(0,.7), breaks = seq(0,.7, .2), labels = c("0%", "20%", "40%", "60%")
) +
facet_grid(~year) +
labs(
x = NULL
, y = "Percentage of Participants"
, title = "Marital Status Has Remained Consistent\nThroughout the 21st Century"
) +
theme_classic() +
theme(
legend.position = "none"
, axis.text = element_text(face = "bold", size = rel(1.2))
, axis.text.x = element_text(angle = 45, hjust = 1, size = rel(1))
, axis.title = element_text(face = "bold", size = rel(1.2))
, strip.background = element_rect(fill = "grey90", color = "black")
, strip.text = element_text(face = "bold", size = rel(1.2))
, plot.title = element_text(face = "bold", size = rel(1.1), hjust = .5)
)
Exercise:
gsoep %>%
filter(year %in% c(2000, 2005, 2010, 2015) & !is.na(marital)) %>% # random
group_by(year, marital) %>%
tally() %>%
mutate(marital = factor(marital, c(1,4,2,3), c("Married", "Never Married", "Separated", "Widowed"))) %>%
group_by(year) %>%
mutate(prop = n/sum(n)) %>%
ggplot(aes(x = marital, y = prop, fill = marital)) +
geom_bar(stat = "identity", color = "black", position = "dodge") +
scale_y_continuous(
limits = c(0,.7), breaks = seq(0,.7, .2), labels = c("0%", "20%", "40%", "60%")
) +
scale_fill_manual(values = cbsafe_pal$hex[2:5]) +
facet_grid(~year) +
labs(
x = NULL
, y = "Percentage of Participants"
, title = "Marital Status Has Remained Consistent\nThroughout the 21st Century"
) +
theme_classic() +
theme(
legend.position = "none"
, axis.text = element_text(face = "bold", size = rel(1.2))
, axis.text.x = element_text(angle = 45, hjust = 1, size = rel(1))
, axis.title = element_text(face = "bold", size = rel(1.2))
, strip.background = element_rect(fill = "grey90", color = "black")
, strip.text = element_text(face = "bold", size = rel(1.2))
, plot.title = element_text(face = "bold", size = rel(1.1), hjust = .5)
)
gsoep %>%
filter(age %in% c(20, 30, 40, 50, 60, 70, 80) & !is.na(SRhealth)) %>% # random
group_by(age, SRhealth) %>%
tally() %>%
mutate(SRhealth = factor(
SRhealth
, seq(5,1,-1)
, c("Very good", "Good", "Satisfactory", "Poor", "Bad")
)) %>%
group_by(age) %>%
mutate(prop = n/sum(n)) %>%
ggplot(aes(x = age, y = prop, fill = SRhealth)) +
geom_bar(stat = "identity", color = "black") +
scale_fill_manual(values = cbsafe_pal$hex[2:6]) +
theme_classic()
gsoep %>%
filter(!is.na(SRhealth) & age >= 18 & age <= 100) %>% # random
group_by(age, SRhealth) %>%
tally() %>%
mutate(SRhealth = factor(
SRhealth
, seq(5,1,-1)
, c("Very good", "Good", "Satisfactory", "Poor", "Bad")
)) %>%
group_by(age) %>%
mutate(prop = n/sum(n)) %>%
ggplot(aes(x = age, y = prop, fill = SRhealth)) +
geom_area() +
theme_classic()
Let’s improve the coloring of this figure. Since we have ordinal data, we want these to be a somewhat coherent gradient to communicate amount. But we don’t want to a full gradient because it’s ordinal!
scale_fill_viridis_[type]()
).gsoep %>%
filter(!is.na(SRhealth) & age >= 18 & age <= 100) %>% # random
group_by(age, SRhealth) %>%
tally() %>%
mutate(SRhealth = factor(
SRhealth
, seq(5,1,-1)
, c("Very good", "Good", "Satisfactory", "Poor", "Bad")
)) %>%
group_by(age) %>%
mutate(prop = n/sum(n)) %>%
ggplot(aes(x = age, y = prop, fill = SRhealth)) +
geom_area(color = "white", alpha = .6) +
scale_fill_viridis_d() +
theme_classic()
Remember, legends tax working memory, and these plots are not the exception to the rule. Let’s add the labels to the plot directly using annotate(). (Note, we could also use geom_text() but the setup would be a pain and dubiously worth the hassle.)
annotate()
function is a manual powerhouse.grobs
to a plot that match any kind of geom
(e.g., lines, dots, rectangles, text, labels, and more).gsoep %>%
filter(!is.na(SRhealth) & age >= 18 & age <= 100) %>% # random
group_by(age, SRhealth) %>%
tally() %>%
mutate(SRhealth = factor(
SRhealth
, 1:5
, rev(c("Very good", "Good", "Satisfactory", "Poor", "Bad"))
)) %>%
group_by(age) %>%
mutate(prop = n/sum(n)) %>%
ggplot(aes(x = age, y = prop, fill = SRhealth)) +
geom_area(color = "white", alpha = .6) +
annotate("text", x = 85, y = .95, label = "Bad" , color = "white", fontface = 2) +
annotate("text", x = 75, y = .80, label = "Poor" , color = "white", fontface = 2) +
annotate("text", x = 62, y = .55, label = "Satisfactory", color = "white", fontface = 2) +
annotate("text", x = 43, y = .3 , label = "Good" , color = "black", fontface = 2) +
annotate("text", x = 30, y = .07, label = "Very Good" , color = "black", fontface = 2) +
scale_fill_viridis_d() +
theme_classic() +
theme(legend.position = "none")
Exercise:
x
and y
scale labels and titles\n
)Note how much clearer this is than if I’d just plotted the mean of self-rated health across groups!!
gsoep %>%
filter(!is.na(SRhealth) & age >= 18 & age <= 100) %>% # random
group_by(age, SRhealth) %>%
tally() %>%
mutate(SRhealth = factor(SRhealth, 1:5, rev(c("Very good", "Good", "Satisfactory", "Poor", "Bad")))) %>%
group_by(age) %>%
mutate(prop = n/sum(n)) %>%
ggplot(aes(x = age, y = prop, fill = SRhealth)) +
geom_area(color = "white", alpha = .6) +
annotate("text", x = 85, y = .95, label = "Bad", color = "white", fontface = 2) +
annotate("text", x = 75, y = .80, label = "Poor", color = "white", fontface = 2) +
annotate("text", x = 62, y = .55, label = "Satisfactory", color = "white", fontface = 2) +
annotate("text", x = 43, y = .3, label = "Good", color = "black", fontface = 2) +
annotate("text", x = 30, y = .07, label = "Very Good", color = "black", fontface = 2) +
scale_x_continuous(limits = c(18, 100), breaks = seq(20, 100, 10)) +
scale_y_continuous(limits = c(0,1), breaks = seq(0,1, .25), labels = c("0%", "25%", "50%", "75%", "100%")) +
scale_fill_viridis_d() +
labs(
x = "Age (Years)"
, y = "Percentage of Participants"
, title = "Levels of Self-Rated Health Decrease Across the Lifespan"
) +
theme_classic() +
theme(legend.position = "none"
, axis.text = element_text(face = "bold", size = rel(1.1))
, axis.title = element_text(face = "bold", size = rel(1.1))
, plot.title = element_text(face = "bold", size = rel(1.1), hjust = .5)
)
Let’s start by using stat_smooth()
to get a smoothed geom_area()
of the total sample size onto the figure
gsoep %>%
filter(!is.na(SRhealth) & age >= 18 & age <= 100) %>% # random
group_by(age, SRhealth) %>%
tally() %>%
mutate(SRhealth = factor(SRhealth, 1:5, rev(c("Very good", "Good", "Satisfactory", "Poor", "Bad")))) %>%
group_by(age) %>%
mutate(total_n = sum(n)) %>%
ggplot(aes(x = age, y = n)) +
stat_smooth(
aes(y = total_n)
, geom = 'area'
, method = 'loess'
, span = 1/3
, alpha = .8
, fill = "grey"
) +
facet_grid(~SRhealth) +
theme_classic()
Now let’s add each of the ordinal values
gsoep %>%
filter(!is.na(SRhealth) & age >= 18 & age <= 100) %>% # random
group_by(age, SRhealth) %>%
tally() %>%
mutate(SRhealth = factor(SRhealth, 1:5, rev(c("Very good", "Good", "Satisfactory", "Poor", "Bad")))) %>%
group_by(age) %>%
mutate(total_n = sum(n)) %>%
ggplot(aes(x = age, y = n)) +
stat_smooth(
aes(y = total_n)
, geom = 'area'
, method = 'loess'
, span = 1/3
, alpha = .8
, fill = "grey"
) +
stat_smooth(
aes(fill = SRhealth)
, geom = 'area'
, method = 'loess'
, span = 1/3
, alpha = .8
) +
annotate("text", x = 45, y = 3000, label = "Total") +
facet_grid(~SRhealth) +
theme_classic() +
theme(legend.position = "none")
Let’s not belabor this too much.
gsoep %>%
filter(!is.na(SRhealth) & age >= 18 & age <= 100) %>% # random
group_by(age, SRhealth) %>%
tally() %>%
mutate(SRhealth = factor(SRhealth, 1:5, rev(c("Very good", "Good", "Satisfactory", "Poor", "Bad")))) %>%
group_by(age) %>%
mutate(total_n = sum(n)) %>%
ggplot(aes(x = age, y = n)) +
stat_smooth(aes(y = total_n), geom = 'area', method = 'loess'
, span = 1/3, alpha = .8, fill = "grey") +
stat_smooth(aes(fill = SRhealth), geom = 'area', method = 'loess'
, span = 1/3, alpha = .8) +
scale_x_continuous(limits = c(18, 100), breaks = seq(20, 100, 20)) +
scale_fill_viridis_d() +
annotate("text", x = 45, y = 3000, label = "Total") +
labs(
x = "Age (Years)"
, y = "Number of People"
, title = "Good Self-Rated Health Decreases Across the Lifespan"
, subtitle = "But bad decreases less, likely because all-cause sample drop-out"
) +
facet_grid(~SRhealth) +
theme_classic() +
theme(legend.position = "none"
, axis.text = element_text(face = "bold", size = rel(1.1))
, axis.title = element_text(face = "bold", size = rel(1.1))
, plot.title = element_text(face = "bold", size = rel(1.1), hjust = .5)
, plot.subtitle = element_text(face = "italic", size = rel(1), hjust = .5)
, strip.background = element_rect(fill = "grey90", color = "black")
, strip.text = element_text(face = "bold", size = rel(1.2))
)
Perfectly fine but may not communicate what we want to show as well as other methods we’ve seen (at least in this instance).
Category | Job | Code |
---|---|---|
Agriculture | Agriculture Hunting Rel.Serv.Activities | 1 |
Forestry Logging Rel.Service activities | 2 | |
Fishing Fish Hatcheries Fish Farms | 5 | |
Energy and Utilities | Mining Coal Lignite; Extraction Of Peat | 10 |
Extraction Crude Petroleum Natural Gas | 11 | |
Mining Of Uranium And Thorium Ores | 12 | |
Mining Of Metal Ores | 13 | |
Other Mining And Quarrying | 14 | |
Recycling | 37 | |
Electricity Gas Steam Hot Water Supply | 40 | |
Sewage Refuse Disposal Sanitationa.a.Re | 90 | |
Finance and Tech | Financ.Intermediat. Exc.Insur. Pens.Fund | 65 |
Insurance Pens.Funding Ex.Compuls.SocSe | 66 | |
Activ.Aux.To Financial Intermediation | 67 | |
Computer And Related Activities | 72 | |
Research And Development | 73 | |
Other Business Activities | 74 | |
Industry - NEC | 96 | |
Manufacturing | Manuf Food Products And Beverages | 15 |
Manuf Tobacco Products | 16 | |
Manuf Textiles | 17 | |
Manuf Wear. Apparel; Dressing Dyeing Fur | 18 | |
Tanning Dress.Leather; luggage Footwear | 19 | |
Manuf Wood Products Except Furniture | 20 | |
Manuf Pulp Paper And Paper Products | 21 | |
Manuf Coke Ref.Petroleum Nuclear Fuel | 23 | |
Manuf Chemicals And Chemical Products | 24 | |
Manuf Rubber And Plastic Products | 25 | |
Manuf Other Non-metallic Mineral Product | 26 | |
Manuf Basic Metals | 27 | |
Manuf Fabric.Metal Prod. Ex.Machin. Equi | 28 | |
Manuf Machinery And Equipment NEC | 29 | |
Manuf Office Machinery And Computers | 30 | |
Manuf Electrical Machinery Apparatus NE | 31 | |
Manuf Radio Television Communic.Equipmen | 32 | |
Manuf Medical Precision Optical Instrum. | 33 | |
Manuf Motor Vehicles Trailers Semi-tr. | 34 | |
Manuf Other Transport Equipment | 35 | |
Manuf Furniture; Manufacturing NEC | 36 | |
Collection Purification Distrib.Of Water | 41 | |
Handcraft Trade - NEC | 97 | |
Manufacturing - NEC | 100 | |
Other | Private Households With Employed Persons | 95 |
Extra-territorial Organizations.a.Bodies | 99 | |
Public Service | Publ.Administr. Defense; Compuls.SocSec | 75 |
Education | 80 | |
Health And Social Work | 85 | |
Activit.of.Membership Organizations NEC. | 91 | |
Other Service Activities | 93 | |
Sales and Service | Publishing Printing Recorded Media | 22 |
Construction | 45 | |
Sale Maint Rep.Mot.Vehicles;Ret.Sale Fue | 50 | |
Wholesale Commission Trade Exc.Mot.Vehic | 51 | |
Retail Trade Exc.Mot.Vehic;Mot.Cyc Repai | 52 | |
Hotels And Restaurants | 55 | |
Post And Telecommunications | 64 | |
Real Estate Property Activities | 70 | |
Rent.Machinery Equip Wo.Oper. P. HH Good | 71 | |
Recreational Cultural Sporting Activity | 92 | |
Services - NEC | 98 | |
Transportation | Land Transport; Transport Via Pipelines | 60 |
Water Transport | 61 | |
Air Transport | 62 | |
Supporting Aux.Transp.Activ;Trav.Agencie | 63 |
ggplot2
package and use the ggmosaic
package(Review this on your own time!)
if(!"ggmosaic" %in% installed.packages()) install.packages("ggmosaic")
library(ggmosaic)
gsoep_jobs <- gsoep %>%
mutate(age_gr = mapvalues(age, 20:99, rep(seq(20, 90, 10), each = 10))) %>%
filter(!is.na(age_gr) & age >= 20 & age < 100) %>%
group_by(SID) %>%
filter(!is.na(job)) %>%
filter(age_gr == max(age_gr)) %>%
group_by(SID, age_gr) %>%
summarize(job = max(job)) %>%
ungroup() %>%
rename(code = job) %>%
left_join(jobs %>% rename(code = old)) %>%
group_by(code) %>%
filter(n() / nrow(.) >= .02) %>%
ungroup()
gsoep_jobs
# A tibble: 4,326 × 5
SID age_gr code job cat
<dbl> <dbl> <dbl> <chr> <chr>
1 12303 40 85 Health And Social Work Public Service
2 13603 60 80 Education Public Service
3 35905 60 85 Health And Social Work Public Service
4 71704 30 28 Manuf Fabric.Metal Prod. Ex.Machin. Equi Manufacturing
5 73605 80 29 Manuf Machinery And Equipment NEC Manufacturing
6 79603 20 55 Hotels And Restaurants Sales and Servi…
7 84805 70 45 Construction Sales and Servi…
8 85307 30 74 Other Business Activities Finance and Tech
9 89405 70 74 Other Business Activities Finance and Tech
10 112505 30 65 Financ.Intermediat. Exc.Insur. Pens.Fund Finance and Tech
# ℹ 4,316 more rows
gsoep_tm <- gsoep %>%
group_by(SID) %>%
filter(!is.na(job)) %>%
group_by(SID) %>%
summarize(job = max(job)) %>%
ungroup() %>%
rename(code = job) %>%
left_join(jobs %>% rename(code = old)) %>%
group_by(code, cat, job) %>%
tally() %>%
ungroup() %>%
filter(n/sum(n) > .02) %>%
mutate(job = str_wrap(job, 15))
gsoep_tm
# A tibble: 15 × 4
code cat job n
<dbl> <chr> <chr> <int>
1 1 Agriculture "Agriculture\nHunting\nRel.Serv.Activities" 272
2 15 Manufacturing "Manuf Food\nProducts And\nBeverages" 172
3 17 Manufacturing "Manuf Textiles" 122
4 28 Manufacturing "Manuf\nFabric.Metal\nProd.\nEx.Machin. Equi" 188
5 29 Manufacturing "Manuf Machinery\nAnd Equipment\nNEC" 144
6 31 Manufacturing "Manuf\nElectrical\nMachinery\nApparatus NE" 123
7 45 Sales and Service "Construction" 370
8 52 Sales and Service "Retail Trade\nExc.Mot.Vehic;Mot.Cyc\nRepai" 718
9 55 Sales and Service "Hotels And\nRestaurants" 305
10 60 Transportation "Land Transport;\nTransport Via\nPipelines" 162
11 65 Finance and Tech "Financ.Intermediat.\nExc.Insur.\nPens.Fund" 123
12 74 Finance and Tech "Other Business\nActivities" 286
13 75 Public Service "Publ.Administr.\nDefense;\nCompuls.SocSec" 421
14 80 Public Service "Education" 339
15 85 Public Service "Health And\nSocial Work" 587
gsoep_tm %>%
arrange(cat, code) %>%
ggplot(aes(area = n, fill = cat, label = job, subgroup = cat)) +
geom_treemap(color = "white", size = 3) +
geom_treemap_text(
colour = c(rep("white", 11), rep("black",4))
, place = "centre"
, size = 15
, grow = FALSE
) +
geom_treemap_subgroup_text(
place = "bottom"
, grow = TRUE
, alpha = 0.4
, colour = "white"
, fontface = "italic"
) +
theme(legend.position = "none")
gsoep_tm %>%
arrange(cat, code) %>%
ggplot(aes(area = n, fill = cat, label = job, subgroup = cat)) +
geom_treemap(color = "white", size = 3) +
geom_treemap_text(
colour = "white"
, place = "centre"
, size = 15
, grow = FALSE
) +
geom_treemap_subgroup_text(
place = "bottom"
, grow = TRUE
, alpha = 0.4
, colour = "white"
, fontface = "italic"
) +
scale_fill_viridis_d() +
theme(legend.position = "none")
gsoep_tm %>%
arrange(cat, code) %>%
ggplot(aes(area = n, fill = cat, label = job, subgroup = cat)) +
geom_treemap(color = "white", size = 3) +
geom_treemap_text(
colour = c(rep("white", 11), rep("black",4))
, place = "centre"
, size = 15
, grow = FALSE
) +
geom_treemap_subgroup_text(
place = "bottom"
, grow = TRUE
, alpha = 0.4
, colour = c(rep("white", 11), rep("black",4))
, fontface = "italic"
) +
geom_treemap_subgroup_border(
colour = "white"
, size = 5
) +
scale_fill_viridis_d() +
theme(legend.position = "none")
gsoep_tm %>%
arrange(cat, code) %>%
ggplot(aes(area = n, fill = cat, label = job, subgroup = cat)) +
geom_treemap(color = "white", size = 3) +
geom_treemap_text(
colour = c(rep("white", 11), rep("black",4))
, place = "centre"
, size = 15
, grow = FALSE
) +
geom_treemap_subgroup_text(
place = "bottom"
, grow = TRUE
, alpha = 0.4
, colour = c(rep("white", 11), rep("black",4))
, fontface = "italic"
) +
geom_treemap_subgroup_border(
colour = "white"
, size = 5
) +
scale_fill_viridis_d() +
labs(title = "White Collar Public Service, Sales, and\nFinance Jobs Far Outnumber Blue Collar Jobs") +
theme(legend.position = "none"
, plot.title = element_text(face = "bold", hjust = .5))
PSC 290 - Data Visualization