Emorie D Beck
“a series of values of a quantity obtained at successive times, often with equal intervals between them”
# A tibble: 100 × 2
t value
<int> <dbl>
1 1 5
2 2 5
3 3 3
4 4 4
5 5 4
6 6 4
7 7 4
8 8 3
9 9 3
10 10 2
11 11 3
12 12 4
13 13 4
14 14 3
15 15 3
# … with 85 more rows
People who:
study longitudinal change (e.g., development)
study variability (e.g., experience sampling, passive sensing)
run experiments with multiple trials
study cohort or age differences
simulations (e.g., trace plots in bayesian models)
Time is everywhere, and ignoring it can be problematic
Why visualize a time series if you don’t care about the trend?
load(url("https://github.com/emoriebeck/psc290-data-viz-2022/raw/main/05-week5-time-series/01-data/ipcs_data.RData"))
ipcs_data %>%
print(n = 6)
# A tibble: 4,222 × 70
SID Full_D…¹ afraid angry atten…² content excited goaldir guilty happy proud
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 02 2018-10… 1 2 4 4 2 5 2 3 4
2 02 2018-10… 1 1 4 3 2 5 1 3 3
3 02 2018-10… 2 1 2 3 1 2 2 3 2
4 02 2018-10… 2 2 4 3 2 4 1 3 3
5 02 2018-10… 2 1 4 4 3 4 1 3 3
6 02 2018-10… 2 1 4 4 2 4 1 3 3
# … with 4,216 more rows, 59 more variables: purposeful <dbl>,
# agreeableness_Compassion <dbl>, agreeableness_Respectfulness <dbl>,
# agreeableness_Trust <dbl>, conscientiousness_Organization <dbl>,
# conscientiousness_Productiveness <dbl>,
# conscientiousness_Responsibility <dbl>, extraversion_Assertiveness <dbl>,
# extraversion_Energy.Level <dbl>, extraversion_Sociability <dbl>,
# neuroticism_Anxiety <dbl>, neuroticism_Depression <dbl>, …
Let’s simplify a bit and say we care about 4 different states for two people:
ipcs_data %>%
filter(SID == c("216")) %>%
select(SID, Full_Date, beep, excited
, goaldir, content, guilty) %>%
print(n = 10)
# A tibble: 108 × 7
SID Full_Date beep excited goaldir content guilty
<chr> <chr> <int> <dbl> <dbl> <dbl> <dbl>
1 216 2019-12-09 12:08 1 3 4 4 2
2 216 2019-12-09 16:06 2 3 4 4 1
3 216 2019-12-09 20:14 3 3 3 3 1
4 216 2019-12-10 12:02 4 3 4 3 2
5 216 2019-12-10 16:08 5 2 4 3 2
6 216 2019-12-10 20:05 6 3 3 4 2
7 216 2019-12-10 8:01 7 2 4 3 2
8 216 2019-12-11 12:29 8 3 3 3 2
9 216 2019-12-11 16:05 9 2 3 3 2
10 216 2019-12-11 20:10 10 3 4 3 1
# … with 98 more rows
ipcs_data %>%
filter(SID == c("02")) %>%
select(SID, Full_Date, beep, excited
, goaldir, content, guilty) %>%
print(n = 10)
# A tibble: 48 × 7
SID Full_Date beep excited goaldir content guilty
<chr> <chr> <int> <dbl> <dbl> <dbl> <dbl>
1 02 2018-10-22 13:23 1 2 5 4 2
2 02 2018-10-22 17:23 2 2 5 3 1
3 02 2018-10-23 10:00 3 1 2 3 2
4 02 2018-10-23 13:24 4 2 4 3 1
5 02 2018-10-23 17:53 5 3 4 4 1
6 02 2018-10-24 10:00 6 2 4 4 1
7 02 2018-10-24 13:23 7 2 4 3 1
8 02 2018-10-24 17:29 8 4 4 4 1
9 02 2018-10-24 21:23 9 3 3 3 2
10 02 2018-10-25 13:29 10 3 3 4 2
# … with 38 more rows
It’s hard to make much sense of this because we end up trying to draw the line connecting the points with our eyes:
It’s easier to make much sense of this because we can just follow the line:
But often in time series, we won’t want / need to plot the points:
One way to highlight increases is using trend lines:
ipcs_data %>%
filter(SID == c("02")) %>%
select(SID, Full_Date, beep, excited
, goaldir, content, guilty) %>%
ggplot(aes(x = beep, y = content)) +
geom_line(color = "grey", size = .9) +
geom_smooth(method = "lm", formula = y ~ poly(x,2)) +
scale_x_continuous(limits = c(1,50), breaks = c(1,seq(5,50,5))) +
labs(
x = "Beep (1-50)"
, y = "Self-Reported Momentary Contentedness (1-5)"
, title = "Ccontentedness increased in the second week"
, subtitle = "Participant 2"
, caption = "y ~ x2"
) +
theme_classic() +
theme(
axis.text = element_text(face = "bold", color ="black", size = rel(1.1))
, axis.title = element_text(face = "bold", size = rel(1.1))
, plot.title = element_text(face = "bold", hjust = .5, size = rel(1.1))
, plot.subtitle = element_text(face = "italic", hjust = .5, size = rel(1.1))
)
Another way to highlight changes is to use area:
ipcs_data %>%
filter(SID == c("02")) %>%
select(SID, Full_Date, beep, excited
, goaldir, content, guilty) %>%
ggplot(aes(x = beep, y = content-1)) +
geom_area(fill = "purple4", alpha = .2) +
geom_line(color = "purple4", size = .9) +
scale_x_continuous(limits = c(1,50), breaks = c(1,seq(5,50,5))) +
scale_y_continuous(limits = c(0,4), breaks = seq(0,4,1), labels = 1:5) +
labs(
x = "Beep (1-50)"
, y = "Self-Reported Momentary Contentedness (1-5)"
, title = "Ccontentedness increased in the second week"
, subtitle = "Participant 2"
) +
theme_classic() +
theme(
axis.text = element_text(face = "bold", color ="black", size = rel(1.1))
, axis.title = element_text(face = "bold", size = rel(1.1))
, plot.title = element_text(face = "bold", hjust = .5, size = rel(1.1))
, plot.subtitle = element_text(face = "italic", hjust = .5, size = rel(1.1))
)
Another way to highlight changes is to use area, but let’s also add some text to highlight what we want to show:
ipcs_data %>%
filter(SID == c("02")) %>%
select(SID, Full_Date, beep, excited
, goaldir, content, guilty) %>%
ggplot(aes(x = beep, y = content-1)) +
geom_area(fill = "purple4", alpha = .2) +
geom_line(color = "purple4", size = .9) +
geom_vline(aes(xintercept = 28), linetype = "dashed", size = 1) +
annotate("text", label = "Week 1", x = 15, y = 0.1, hjust = .5) +
annotate("text", label = "Week 2", x = 40, y = 0.1, hjust = .5) +
scale_x_continuous(limits = c(1,50), breaks = c(1,seq(5,50,5))) +
scale_y_continuous(limits = c(0,4), breaks = seq(0,4,1), labels = 1:5) +
labs(
x = "Beep (1-50)"
, y = "Self-Reported Momentary Contentedness (1-5)"
, title = "Ccontentedness increased in the second week"
, subtitle = "Participant 2"
) +
theme_classic() +
theme(
axis.text = element_text(face = "bold", color ="black", size = rel(1.1))
, axis.title = element_text(face = "bold", size = rel(1.1))
, plot.title = element_text(face = "bold", hjust = .5, size = rel(1.1))
, plot.subtitle = element_text(face = "italic", hjust = .5, size = rel(1.1))
)
One way to build a multivariate time series figure is using the color
aesthetic:
Oof, this looks a little rough
One way to build a multivariate time series figure is using the color
aesthetic, improved with our own color scale:
ipcs_data %>%
filter(SID %in% c("05", "02")) %>%
select(SID, Full_Date, beep, excited
, goaldir, content, guilty) %>%
ggplot(aes(x = beep, y = content)) +
geom_line(aes(color = SID)) +
scale_color_manual(values = c("darkblue", "orange3")) +
scale_x_continuous(limits = c(1,50), breaks = c(1,seq(5,50,5))) +
scale_y_continuous(limits = c(1,5), breaks = seq(1,5,1), labels = 1:5) +
labs(
x = "Beep (1-50)"
, y = "Self-Reported Momentary Contentedness (1-5)"
, color = "Participant ID"
) +
theme_classic() +
theme(
legend.position = "bottom"
, legend.text = element_text(face = "bold", size = rel(1.1))
, legend.title = element_text(face = "bold", size = rel(1.1))
, axis.text = element_text(face = "bold", color ="black", size = rel(1.1))
, axis.title = element_text(face = "bold", size = rel(1.1))
)
One way to build a multivariate time series figure is using the color
aesthetic. Bur remember, legends add cognitive load, so let’s add labels
:
ipcs_data %>%
filter(SID %in% c("05", "02")) %>%
select(SID, Full_Date, beep, excited
, goaldir, content, guilty) %>%
ggplot(aes(x = beep, y = content)) +
geom_line(aes(color = SID), size = .8) +
annotate("label", label = "Participant 02", color = "white", fill = "darkblue", x = 33, y = 2.5, hjust = 0) +
annotate("label", label = "Participant 5", color = "white", fill = "orange3", x = 18, y = 1.75, hjust = 0) +
scale_color_manual(values = c("darkblue", "orange3")) +
scale_x_continuous(limits = c(1,50), breaks = c(1,seq(5,50,5))) +
scale_y_continuous(limits = c(1,5), breaks = seq(1,5,1), labels = 1:5) +
labs(
x = "Beep (1-50)"
, y = "Self-Reported Momentary Contentedness (1-5)"
, color = "Participant ID"
) +
theme_classic() +
theme(
legend.position = "bottom"
, legend.text = element_text(face = "bold", size = rel(1.1))
, legend.title = element_text(face = "bold", size = rel(1.1))
, axis.text = element_text(face = "bold", color ="black", size = rel(1.1))
, axis.title = element_text(face = "bold", size = rel(1.1))
)
But what’s the story here?
Let’s (1) add the story with a title, (2) change the colors to match the story, (3) add trend lines:
ipcs_data %>%
filter(SID %in% c("05", "02")) %>%
select(SID, Full_Date, beep, excited
, goaldir, content, guilty) %>%
ggplot(aes(x = beep, y = content)) +
geom_line(aes(color = SID), size = .8) +
geom_smooth(aes(color = SID), method = "lm",se = F) +
annotate("label", label = "Participant 02", color = "white", fill = "darkblue", x = 33, y = 2.5, hjust = 0) +
annotate("label", label = "Participant 5", color = "white", fill = "grey60", x = 18, y = 1.75, hjust = 0) +
scale_color_manual(values = c("darkblue", "grey60")) +
scale_x_continuous(limits = c(1,50), breaks = c(1,seq(5,50,5))) +
scale_y_continuous(limits = c(1,5), breaks = seq(1,5,1), labels = 1:5) +
labs(
x = "Beep (1-50)"
, y = "Self-Reported Momentary Contentedness (1-5)"
, color = "Participant ID"
, title = "Both Participants' Contentedness Increased"
, subtitle = "But Participant 5 remained more content on average"
) +
theme_classic() +
theme(
legend.position = "bottom"
, legend.text = element_text(face = "bold", size = rel(1.1))
, legend.title = element_text(face = "bold", size = rel(1.1))
, axis.text = element_text(face = "bold", color ="black", size = rel(1.1))
, axis.title = element_text(face = "bold", size = rel(1.1))
, plot.title = element_text(face = "bold", hjust = .5, size = rel(1.1))
, plot.subtitle = element_text(face = "italic", hjust = .5, size = rel(1.1))
)
Basic Syntax:
Let’s improve the colors and add some labels (I’m feeling lavendar haze) ::::{.columns} :::{.column}
ipcs_data %>%
filter(SID == "02") %>%
select(SID, Full_Date, beep, goaldir, guilty) %>%
pivot_longer(
cols = c(goaldir, guilty)
, names_to = "item"
, values_to = "value"
) %>%
ggplot(aes(x = beep, y = value)) +
geom_line(aes(color = item), size = .9) +
geom_point(size = .9) +
annotate("text", label = "Goal\nDirected", x = 50, y = 4, hjust = 0) +
annotate("text", label = "Guilty", x = 50, y = 2, hjust = 0) +
scale_color_manual(values = c("orchid4", "orchid2")) +
scale_x_continuous(limits = c(1,57), breaks = seq(0,50,5)) +
theme_classic() +
theme(legend.position = "none")
::: :::{.column}
::: ::::
Let’s (1) add a story with the title/subtitle and (2) highlight a core part of the time series
ipcs_data %>%
filter(SID == "02") %>%
select(SID, Full_Date, beep, goaldir, guilty) %>%
pivot_longer(
cols = c(goaldir, guilty)
, names_to = "item"
, values_to = "value"
) %>%
ggplot(aes(x = beep, y = value)) +
annotate("rect", xmin = 13, xmax = 22, ymin = 1.8, ymax = 3.2, fill = "orchid", alpha = .3) +
geom_line(aes(color = item), size = .9) +
geom_point(size = .9) +
annotate("text", label = "Goal\nDirected", x = 50, y = 4, hjust = 0) +
annotate("text", label = "Guilty", x = 50, y = 2, hjust = 0) +
scale_color_manual(values = c("orchid4", "orchid2")) +
scale_x_continuous(limits = c(1,57), breaks = seq(0,50,5)) +
labs(
x = "Beep (1-50)"
, y = "Self-Rated Momentary Value (1-5)"
, title = "When goal-directedness was high, guilt was low"
, subtitle = "Guilt was rarely equal to or higher than goal-directedness"
) +
theme_classic() +
theme(legend.position = "none")
Let’s clean up our theme elements:
ipcs_data %>%
filter(SID == "02") %>%
select(SID, Full_Date, beep, goaldir, guilty) %>%
pivot_longer(
cols = c(goaldir, guilty)
, names_to = "item"
, values_to = "value"
) %>%
ggplot(aes(x = beep, y = value)) +
annotate("rect", xmin = 13, xmax = 22, ymin = 1.8, ymax = 3.2, fill = "orchid", alpha = .3) +
geom_line(aes(color = item), size = .9) +
geom_point(size = .9) +
annotate("text", label = "Goal\nDirected", x = 50, y = 4, hjust = 0) +
annotate("text", label = "Guilty", x = 50, y = 2, hjust = 0) +
scale_color_manual(values = c("orchid4", "orchid2")) +
scale_x_continuous(limits = c(1,57), breaks = seq(0,50,5)) +
labs(
x = "Beep (1-50)"
, y = "Self-Rated Momentary Value (1-5)"
, title = "When goal-directedness was high, guilt was low"
, subtitle = "Guilt was rarely equal to or higher than goal-directedness"
) +
theme_classic() +
theme(
legend.position = "none"
, legend.text = element_text(face = "bold", size = rel(1.1))
, legend.title = element_text(face = "bold", size = rel(1.1))
, axis.text = element_text(face = "bold", color ="black", size = rel(1.1))
, axis.title = element_text(face = "bold", size = rel(1.1))
, plot.title = element_text(face = "bold", hjust = .5, size = rel(1.2))
, plot.subtitle = element_text(face = "italic", hjust = .5, size = rel(1.1))
)
Alternatively, we can facet
instead of splitting items by color
:
ipcs_data %>%
filter(SID == "02") %>%
select(SID, Full_Date, beep, goaldir, guilty) %>%
pivot_longer(
cols = c(goaldir, guilty)
, names_to = "item"
, values_to = "value"
) %>%
ggplot(aes(x = beep, y = value-1)) +
geom_area(aes(fill = item), alpha = .4) +
geom_line(color = "orchid4", size = .9) +
geom_point(size = .9) +
scale_fill_manual(values = c("orchid4", "orchid2")) +
scale_x_continuous(limits = c(1,50), breaks = seq(0,50,5)) +
scale_y_continuous(limits = c(0,4), breaks = seq(0,4,1), labels = 1:5) +
labs(
x = "Beep (1-50)"
, y = "Self-Rated Momentary Value (1-5)"
, title = "When goal-directedness was high, guilt was low"
, subtitle = "Guilt was rarely equal to or higher than goal-directedness"
) +
facet_wrap(~item, nrow = 2) +
theme_classic() +
theme(
legend.position = "none"
, legend.text = element_text(face = "bold", size = rel(1.1))
, legend.title = element_text(face = "bold", size = rel(1.1))
, axis.text = element_text(face = "bold", color ="black", size = rel(1.1))
, axis.title = element_text(face = "bold", size = rel(1.1))
, plot.title = element_text(face = "bold", hjust = .5, size = rel(1.2))
, plot.subtitle = element_text(face = "italic", hjust = .5, size = rel(1.1))
, strip.background = element_rect(fill = "orchid4")
, strip.text = element_text(face = "bold", size = rel(1.2), color = "white")
)
First, let’s wrangle our data. We’ll create a positive and negative emotion composite in order to get around the ordinal nature of the data.
ipcs_data %>%
filter(SID == "02") %>%
select(SID, Full_Date, beep, afraid:purposeful) %>%
pivot_longer(
cols = afraid:purposeful
, names_to = "item"
, values_to = "value"
) %>%
mutate(valence = ifelse(item %in% c("afraid", "angry", "guilty"), "Negative", "Positive")) %>%
group_by(SID, Full_Date, beep, valence) %>%
summarize(value = mean(value, na.rm = T)) %>%
ungroup() %>%
pivot_wider(
names_from = "valence"
, values_from = "value"
) %>%
arrange(beep)
# A tibble: 48 × 5
SID Full_Date beep Negative Positive
<chr> <chr> <int> <dbl> <dbl>
1 02 2018-10-22 13:23 1 1.67 3.71
2 02 2018-10-22 17:23 2 1 3.29
3 02 2018-10-23 10:00 3 1.67 2.14
4 02 2018-10-23 13:24 4 1.67 3
5 02 2018-10-23 17:53 5 1.33 3.57
6 02 2018-10-24 10:00 6 1.33 3.43
7 02 2018-10-24 13:23 7 1.33 3
8 02 2018-10-24 17:29 8 1.33 4
9 02 2018-10-24 21:23 9 2 2.71
10 02 2018-10-25 13:29 10 1.67 3.43
# … with 38 more rows
Let’s look at them negative and positive emotion composites using geom_path()
ipcs_data %>%
filter(SID == "02") %>%
select(SID, Full_Date, beep, afraid:purposeful) %>%
pivot_longer(
cols = afraid:purposeful
, names_to = "item"
, values_to = "value"
) %>%
mutate(valence = ifelse(item %in% c("afraid", "angry", "guilty"), "Negative", "Positive")) %>%
group_by(SID, Full_Date, beep, valence) %>%
summarize(value = mean(value, na.rm = T)) %>%
ungroup() %>%
pivot_wider(
names_from = "valence"
, values_from = "value"
) %>%
arrange(beep) %>%
ggplot(aes(x = Negative, y = Positive)) +
geom_path(aes(color = beep)) +
geom_point() +
scale_color_viridis_c() +
theme_classic() +
theme(legend.position = "bottom")
Let’s look at them negative and positive emotion composites using geom_segment()
ipcs_data %>%
filter(SID == "02") %>%
select(SID, Full_Date, beep, afraid:purposeful) %>%
pivot_longer(
cols = afraid:purposeful
, names_to = "item"
, values_to = "value"
) %>%
mutate(valence = ifelse(item %in% c("afraid", "angry", "guilty"), "Negative", "Positive")) %>%
group_by(SID, Full_Date, beep, valence) %>%
summarize(value = mean(value, na.rm = T)) %>%
ungroup() %>%
pivot_wider(
names_from = "valence"
, values_from = "value"
) %>%
ggplot(aes(x = Negative, y = Positive, label = beep)) +
geom_segment(aes(
xend=c(tail(Negative, n=-1), NA)
, yend=c(tail(Positive, n=-1), NA)
, color = beep
)
, arrow = arrow(length = unit(0.4, "cm"))
) +
geom_point() +
scale_color_viridis_c() +
theme_classic()
load(url("https://github.com/emoriebeck/psc290-data-viz-2022/blob/main/05-week5-time-series/01-data/gsoep.RData?raw=true"))
gsoep
# A tibble: 480,798 × 11
year SID SRhealth satHealth marital chldbrth gender yearB…¹ morta…² job
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl+l> <dbl> <dbl> <dbl>
1 1984 901 NA NA NA NA 2 [[2]… 1951 0 NA
2 1984 1001 NA NA NA NA 2 [[2]… 1913 0 NA
3 1984 1101 NA NA NA NA 2 [[2]… 1906 0 NA
4 1984 1201 NA NA NA NA 1 [[1]… 1911 0 NA
5 1984 1202 NA NA NA NA 2 [[2]… 1913 0 NA
6 1984 1301 NA NA NA NA 2 [[2]… 1943 0 NA
7 1984 1302 NA NA NA NA 1 [[1]… 1965 0 NA
8 1984 1901 NA NA NA NA 2 [[2]… 1948 0 NA
9 1984 2001 NA NA NA NA 2 [[2]… 1949 0 NA
10 1984 2002 NA NA NA NA 1 [[1]… 1952 0 NA
# … with 480,788 more rows, 1 more variable: age <dbl>, and abbreviated
# variable names ¹yearBrth, ²mortality
gsoep %>%
select(SID, age, SRhealth, satHealth) %>%
filter(age < 100 & age > 19) %>%
mutate(age_gr = as.numeric(mapvalues(age, seq(20,99,1), rep(seq(20,99,2), each = 2)))) %>%
drop_na() %>%
group_by(age, age_gr) %>%
summarize_at(vars(SRhealth, satHealth), mean) %>%
group_by(age_gr) %>%
summarize_at(vars(SRhealth, satHealth), mean) %>%
ungroup()
# A tibble: 40 × 3
age_gr SRhealth satHealth
<dbl> <dbl> <dbl>
1 20 3.96 7.81
2 22 3.92 7.72
3 24 3.91 7.66
4 26 3.86 7.55
5 28 3.83 7.48
6 30 3.78 7.44
7 32 3.74 7.34
8 34 3.70 7.27
9 36 3.67 7.20
10 38 3.60 7.08
# … with 30 more rows
gsoep %>%
select(SID, age, SRhealth, satHealth) %>%
filter(age < 100 & age > 19) %>%
mutate(age_gr = as.numeric(mapvalues(age, seq(20,99,1), rep(seq(20,99,2), each = 2)))) %>%
drop_na() %>%
group_by(age, age_gr) %>%
summarize_at(vars(SRhealth, satHealth), mean) %>%
group_by(age_gr) %>%
summarize_at(vars(SRhealth, satHealth), mean) %>%
ungroup() %>%
ggplot(aes(x = SRhealth, y = satHealth, label = age_gr)) +
geom_segment(aes(
xend=c(tail(SRhealth, n=-1), NA)
, yend=c(tail(satHealth, n=-1), NA)
, color = age_gr
)
, arrow = arrow(length = unit(0.4, "cm"))
) +
geom_point() +
scale_y_continuous(limits = c(0,10), breaks = seq(0,10,2)) +
scale_color_viridis_c() +
theme_classic()
Rather than a connected scatterplot showing general patterns of change, we may want to look at person-level trajectories coupled with an overall trajectory.
# A tibble: 13,336 × 4
SID age item value
<dbl> <dbl> <chr> <dbl>
1 2501 68 SRhealth 1
2 2501 68 satHealth 2
3 25801 60 SRhealth 3
4 25801 60 satHealth 6
5 27403 27 SRhealth 3
6 27403 27 satHealth 5
7 29402 29 SRhealth 5
8 29402 29 satHealth 10
9 35701 43 SRhealth 4
10 35701 43 satHealth 9
# … with 13,326 more rows
Rather than a connected scatterplot showing general patterns of change, we may want to look at person-level trajectories coupled with an overall trajectory.
gsoep %>%
select(SID, age, SRhealth, satHealth) %>%
filter(age < 100 & age > 19) %>%
drop_na() %>%
filter(SID %in% sample(unique(.$SID), 1000)) %>%
pivot_longer(
cols = c(SRhealth, satHealth)
, names_to = "item"
, values_to = "value"
) %>%
ggplot(aes(x = age, y = value, color = item)) +
geom_point(alpha = .2, size = .5, color = "grey") +
geom_smooth(
aes(group = interaction(SID, item))
, method = "lm", se = F
, alpha = .3, size = .5
) +
scale_color_manual(values = c("darkblue", "darkred")) +
facet_wrap(~item) +
theme_classic() +
theme(legend.position = "none")
But this is a longitudinal change, so we can’t just do that. We have to estimate the growth models first
The model we’ll use is:
\(Y_{ij} = \beta_{0j} + \beta_{1j}*age_{ij} + \epsilon_{ij}\)
\[\epsilon \sim \mathcal{N}(0, \sigma^2)\]
\[ \begin{align*} \Biggr[ \begin{array}{c} u_{0j}\\ u_{1j}\end{array} \Biggr] \sim \mathcal{N} \left( \Biggr[ \begin{array}{c} 0\\ 0\end{array} \Biggr], \begin{array}{cc} \tau_{00}^2 & \tau_{10} \\ \tau_{10} & \tau_{11}^2 \end{array} \right) \end{align*} \]
library(lme4)
lmer_fun <- function(d) lmer(value ~ 1 + age + (1 + age | SID), data = d)
m_nested <- gsoep %>%
select(SID, age, SRhealth, satHealth) %>%
filter(age < 100 & age > 19) %>%
drop_na() %>%
pivot_longer(
cols = c(SRhealth, satHealth)
, names_to = "item"
, values_to = "value"
) %>%
mutate(age = age/10) %>%
group_by(item) %>%
nest() %>%
ungroup() %>%
mutate(
m = map(data, lmer_fun)
, tidy = map(m, broom.mixed::tidy)); m_nested
# A tibble: 2 × 4
item data m tidy
<chr> <list> <list> <list>
1 SRhealth <tibble [227,516 × 3]> <lmerMod> <tibble [6 × 6]>
2 satHealth <tibble [227,516 × 3]> <lmerMod> <tibble [6 × 6]>
But this is a longitudinal change, so we can’t just do that. We have to estimate the growth models first
The model we’ll use is:
\(Y_{ij} = \beta_{0j} + \beta_{1j}*age_{ij} + \epsilon_{ij}\)
\[\epsilon \sim \mathcal{N}(0, \sigma^2)\]
\[ \begin{align*} \Biggr[ \begin{array}{c} u_{0j}\\ u_{1j}\end{array} \Biggr] \sim \mathcal{N} \left( \Biggr[ \begin{array}{c} 0\\ 0\end{array} \Biggr], \begin{array}{cc} \tau_{00}^2 & \tau_{10} \\ \tau_{10} & \tau_{11}^2 \end{array} \right) \end{align*} \]
# A tibble: 12 × 7
item effect group term estimate std.error statistic
<chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
1 SRhealth fixed <NA> (Intercept) 4.57 0.0111 410.
2 SRhealth fixed <NA> age -0.245 0.00242 -101.
3 SRhealth ran_pars SID sd__(Intercept) 1.05 NA NA
4 SRhealth ran_pars SID cor__(Intercept).age -0.822 NA NA
5 SRhealth ran_pars SID sd__age 0.214 NA NA
6 SRhealth ran_pars Residual sd__Observation 0.616 NA NA
7 satHealth fixed <NA> (Intercept) 9.04 0.0261 346.
8 satHealth fixed <NA> age -0.488 0.00575 -84.7
9 satHealth ran_pars SID sd__(Intercept) 2.42 NA NA
10 satHealth ran_pars SID cor__(Intercept).age -0.818 NA NA
11 satHealth ran_pars SID sd__age 0.510 NA NA
12 satHealth ran_pars Residual sd__Observation 1.47 NA NA
fx_pred_fun <- function(m){
tibble(age = seq(2,9.9, .1)) %>%
mutate(
pred = predict(m, newdata = ., re.form = NA)
)
}
rx_pred_fun <- function(m, d){
d %>%
select(SID, age) %>%
distinct() %>%
mutate(pred = predict(m, newdata = .))
}
m_nested <- m_nested %>%
mutate(fx_pred = map(m, fx_pred_fun)
, rx_pred = map2(m, data, rx_pred_fun))
# A tibble: 2 × 6
item data m tidy fx_pred rx_pred
<chr> <list> <list> <list> <list> <list>
1 SRhealth <tibble [227,516 × 3]> <lmerMod> <tibble [6 × 6]> <tibble> <tibble>
2 satHealth <tibble [227,516 × 3]> <lmerMod> <tibble [6 × 6]> <tibble> <tibble>
sids <- sample(unique(m_nested$data[[1]]$SID), 1000)
m_nested %>%
select(item, rx_pred) %>%
unnest(rx_pred) %>%
filter(SID %in% sids) %>%
ggplot(aes(x = age*10, y = pred, color = item)) +
geom_line(
aes(group = factor(SID))
, alpha = .25
, size = .75) +
geom_line(
data = m_nested %>%
select(item, fx_pred) %>%
unnest(fx_pred)
, size = 1
) +
scale_color_manual(values = c("darkblue", "darkred")) +
facet_wrap(~item) +
theme_classic() +
theme(legend.position = "none")
sids <- sample(unique(m_nested$data[[1]]$SID), 1000)
m_nested %>%
select(item, rx_pred) %>%
unnest(rx_pred) %>%
filter(SID %in% sids) %>%
ggplot(aes(x = age*10, y = pred, color = item)) +
geom_line(
aes(group = factor(SID))
, alpha = .25
, size = .75) +
geom_line(
data = m_nested %>%
select(item, fx_pred) %>%
unnest(fx_pred)
, size = 1
) +
scale_color_manual(values = c("darkblue", "darkred")) +
labs(x = "Age"
, y = "Model Predicted Values (0-10)"
, title = "Self-Rated Health and Health Satisfaction Decreased\nAcross the Lifespan, on Average") +
facet_wrap(~item) +
theme_classic() +
theme(legend.position = "none"
, axis.text = element_text(face = "bold", size = rel(1.1))
, axis.title = element_text(face = "bold", size = rel(1.2))
, plot.title = element_text(face = "bold", size = rel(1.1), hjust=.5)
, strip.background = element_rect(fill = "darkblue")
, strip.text = element_text(face = "bold", size = rel(1.1), color = "white"))
Often, we want to smooth our trajectories, like if we wanted to look at the trajectory of self-rated health and satisfaction with health across age groups
gsoep %>%
select(SID, age, SRhealth, satHealth, marital, gender) %>%
drop_na() %>%
filter(age >= 20 & age <= 100) %>%
group_by(age, marital, gender) %>%
summarize_at(vars(satHealth, SRhealth), mean)
# A tibble: 597 × 5
# Groups: age, marital [310]
age marital gender satHealth SRhealth
<dbl> <dbl> <dbl+lbl> <dbl> <dbl>
1 20 1 1 [[1] Male] 7 4.33
2 20 1 2 [[2] Female] 8.06 3.87
3 20 2 2 [[2] Female] 4.5 2
4 20 4 1 [[1] Male] 8.01 4.04
5 20 4 2 [[2] Female] 7.70 3.89
6 21 1 1 [[1] Male] 8.58 4.42
7 21 1 2 [[2] Female] 7.82 3.82
8 21 2 2 [[2] Female] 8.67 4.67
9 21 4 1 [[1] Male] 7.83 3.97
10 21 4 2 [[2] Female] 7.71 3.90
# … with 587 more rows
gsoep %>%
select(SID, age, SRhealth, satHealth, marital, gender) %>%
drop_na() %>%
filter(age >= 20 & age <= 100) %>%
group_by(age, marital, gender) %>%
summarize_at(vars(satHealth, SRhealth), mean) %>%
pivot_longer(
cols = c(SRhealth, satHealth)
, names_to = "item"
, values_to = "value"
) %>%
ggplot(aes(x = age, y = value)) +
geom_line(aes(color = interaction(gender, marital))) +
geom_smooth(method = "loess") +
facet_wrap(~item, nrow = 2) +
theme_classic() +
theme(legend.position = "bottom")
gsoep %>%
select(SID, age, SRhealth, satHealth, marital, gender) %>%
drop_na() %>%
filter(age >= 20 & age <= 100) %>%
group_by(age, marital, gender) %>%
summarize_at(vars(satHealth, SRhealth), mean) %>%
pivot_longer(
cols = c(SRhealth, satHealth)
, names_to = "item"
, values_to = "value"
) %>%
ggplot(aes(x = age, y = value)) +
geom_line(aes(color = interaction(gender, marital))) +
geom_smooth(method = "lm") +
facet_wrap(~item, nrow = 2) +
theme_classic() +
theme(legend.position = "bottom")
We may want to highlight overall trajectories v. stratefied ones differently to highlight their consistency:
gsoep %>%
select(SID, age, SRhealth, satHealth, marital, gender) %>%
drop_na() %>%
filter(age >= 20 & age <= 100) %>%
group_by(age, marital, gender) %>%
summarize_at(vars(satHealth, SRhealth), mean) %>%
ungroup() %>%
mutate(marital = factor(
marital
, 1:4
, c("Married", "Separated", "Widowed", "Never Married")
), gender = factor(gender, c(1,2), c("Male", "Female"))
) %>%
pivot_longer(
cols = c(SRhealth, satHealth)
, names_to = "item"
, values_to = "value"
) %>%
ggplot(aes(x = age, y = value)) +
geom_line(aes(color = interaction(gender, marital))) +
geom_smooth(method = "lm", color = "orchid3", fill = "orchid3") +
scale_color_grey(start = .01, end = .99) +
facet_wrap(~item, nrow = 2) +
theme_classic() +
theme(legend.position = "bottom")
gsoep %>%
select(SID, age, SRhealth, satHealth, marital, gender) %>%
drop_na() %>%
filter(age >= 20 & age <= 100) %>%
group_by(age, marital, gender) %>%
summarize_at(vars(satHealth, SRhealth), mean) %>%
mutate(marital = factor(
marital
, 1:4
, c("Married", "Separated", "Widowed", "Never Married")
), gender = factor(gender, c(1,2), c("Male", "Female"))
) %>%
pivot_longer(
cols = c(SRhealth, satHealth)
, names_to = "item"
, values_to = "value"
) %>%
ggplot(aes(x = age, y = value)) +
geom_smooth(
aes(color = interaction(gender, marital))
, method = "loess"
, se = F
) +
geom_smooth(method = "lm", color = "orchid3", fill = "orchid3") +
scale_color_grey(start = .01, end = .99) +
facet_wrap(~item, nrow = 2) +
labs(x = "Age", y = "Smoothed Value", color = NULL) +
guides(color = guide_legend(nrow = 4)) +
theme_classic() +
theme(legend.position = "bottom")
gsoep %>%
select(SID, age, SRhealth, satHealth, marital, gender) %>%
drop_na() %>%
filter(age >= 20 & age <= 100) %>%
group_by(age, marital, gender) %>%
summarize_at(vars(satHealth, SRhealth), mean) %>%
mutate(marital = factor(
marital
, 1:4
, c("Married", "Separated", "Widowed", "Never Married")
), gender = factor(gender, c(1,2), c("Male", "Female"))
) %>%
pivot_longer(
cols = c(SRhealth, satHealth)
, names_to = "item"
, values_to = "value"
) %>%
ggplot(aes(x = age, y = value)) +
geom_smooth(
aes(color = interaction(gender, marital))
, method = "loess"
, se = F
, span = .25
, size = .5
) +
geom_smooth(method = "lm", color = "orchid3", fill = "orchid3") +
scale_color_grey(start = .01, end = .99) +
facet_wrap(~item, nrow = 2) +
labs(x = "Age", y = "Smoothed Value", color = NULL) +
guides(color = guide_legend(nrow = 4)) +
theme_classic() +
theme(legend.position = "bottom"
, axis.text = element_text(face = "bold", size = rel(1.1))
, axis.title = element_text(face = "bold", size = rel(1.2))
, strip.background = element_rect(fill = "orchid4")
, strip.text = element_text(face = "bold", size = rel(1.2), color = "white"))
pomp <- function(x) (x - min(x, na.rm = T))/(max(x, na.rm = T) - min(x, na.rm = T))*10
gsoep_sg <- gsoep %>%
select(SID, age, satHealth, SRhealth, marital, gender) %>%
filter(age %in% c(60,70)) %>%
drop_na() %>%
group_by(SID) %>%
filter(n() == 2) %>%
group_by(age) %>%
mutate(SRhealth = pomp(SRhealth)) %>%
group_by(age, SID, marital, gender) %>%
mutate(health = mean(SRhealth, satHealth)) %>%
group_by(age, marital, gender) %>%
summarize(health = mean(health)) %>%
ungroup() %>%
mutate(marital = factor(
marital
, 1:4
, c("Married", "Separated", "Widowed", "Never Married")
), gender = factor(gender, c(1,2), c("Male", "Female"))
); gsoep_sg
# A tibble: 16 × 4
age marital gender health
<dbl> <fct> <fct> <dbl>
1 60 Married Male 5.59
2 60 Married Female 5.67
3 60 Separated Male 5.08
4 60 Separated Female 5.62
5 60 Widowed Male 5.56
6 60 Widowed Female 5.46
7 60 Never Married Male 4.75
8 60 Never Married Female 6
9 70 Married Male 5.45
10 70 Married Female 5.20
11 70 Separated Male 5
12 70 Separated Female 4.44
13 70 Widowed Male 4.81
14 70 Widowed Female 4.81
15 70 Never Married Male 5.83
16 70 Never Married Female 5
gsoep_sg %>%
ggplot(aes(x = age, y = health, group = interaction(marital, gender))) +
geom_line(aes(color = interaction(marital, gender))) +
geom_point(size = 2) +
geom_text(data = . %>% filter(age == 60)
, aes(label = paste(marital, gender, sep = ", "))
, hjust = 1
, nudge_x = -1
) +
scale_x_continuous(
limits = c(52, 72), breaks = c(60,70)
, labels = c("Age 60", "Age 70"), position = "top"
) +
scale_y_continuous(position = "right") +
scale_color_viridis_d() +
labs(x = NULL
, y = "Health"
, title = "Only widowed men perceived worsening health\nfrom 60-70") +
theme_classic() +
theme(
legend.position = "none"
, axis.line = element_blank()
, axis.text = element_text(face = "bold", size = rel(1.1), color = "black")
, axis.title = element_text(face = "bold", size = rel(1.1))
, plot.title = element_text(face = "bold", size = rel(1.2), hjust = .5)
, panel.border = element_rect(fill = NA, size = 1.5)
)
PSC 290 - Data Visualization