First, we’ll load the R packages we need to run the analyses.
library(lavaan)
library(psych)
library(knitr)
library(kableExtra)
library(qgraph)
library(graphicalVAR)
library(mlVAR)
library(animation)
library(stringr)
library(plyr)
library(tidyverse)
Now, we’ll set the data path, which will be both where we load the data from and where we save results to. Then, we’ll load the codebook that will index the variables we’ll use.
data_path <- "~/Box/network/presentations/APS 2018"
esm_codebook <- sprintf("%s/Codebook.csv", data_path) %>%
read.csv(., stringsAsFactors = F) %>%
tbl_df
The chunk below won’t run but is included for transparency. This chunk will read in the raw data files with identifying information and replace identifying information (Subject IDs) with random IDs and remove variables not used in this analysis. Then, we’ll save those data to .csv that will be open access.
subs <- sprintf("%s/subs.csv", data_path) %>% read.csv %>% tbl_df
wave1_all <- sprintf("%s/data/esm_w1_RENAMED.csv", data_path) %>%
read.csv %>%
tbl_df %>%
select(one_of(paste(esm_codebook$old_name, "w1", sep = "."))) %>%
mutate(esm.IDnum.w1 = mapvalues(esm.IDnum.w1, subs$old, subs$new))
wave4_all <- sprintf("%s/data/esm_w4_RENAMED_all.csv", data_path) %>%
read.csv %>%
tbl_df %>%
select(one_of(paste(esm_codebook$old_name, "w4", sep = "."))) %>%
mutate(esm.IDnum.w4 = mapvalues(esm.IDnum.w4, subs$old, subs$new))
wave7_all <- sprintf("%s/data/esm_w7_RENAMED_all.csv", data_path) %>%
read.csv %>%
tbl_df %>%
select(one_of(paste(esm_codebook$old_name, "w7", sep = "."))) %>%
mutate(esm.IDnum.w7 = mapvalues(esm.IDnum.w7, subs$old, subs$new))
write.csv(wave1_all, sprintf("%s/data/esm_w1_redacted.csv", data_path), row.names = F)
write.csv(wave4_all, sprintf("%s/data/esm_w4_redacted.csv", data_path), row.names = F)
write.csv(wave7_all, sprintf("%s/data/esm_w7_redacted.csv", data_path), row.names = F)
Now, we’ll read in the redacted data and clean them. Basically, we want to read in the reduced data files, rename the variables to more informative names, and do some basic cleaning.
wave1_all <- sprintf("%s/data/esm_w1_redacted.csv", data_path) %>% read.csv %>% tbl_df
wave4_all <- sprintf("%s/data/esm_w4_redacted.csv", data_path) %>% read.csv %>% tbl_df
wave7_all <- sprintf("%s/data/esm_w7_redacted.csv", data_path) %>% read.csv %>% tbl_df
old.names <- esm_codebook$old_name
new.names <- esm_codebook$new_name
#Getting necessary columns
#Keeping subject ID and all esm.BFI items
w1 <- wave1_all %>%
select(one_of(paste(old.names, "w1", sep = "."))) %>%
setNames(new.names) %>% # change column names
mutate(Wave = "S1")
w4 <- wave4_all %>%
select(one_of(paste(old.names, "w4", sep = "."))) %>%
setNames(new.names) %>% # change column names
mutate(Wave = "S4")
w7 <- wave7_all %>%
select(one_of(paste(old.names, "w7", sep = "."))) %>%
setNames(new.names) %>% # change column names
mutate(Wave = "S7")
# short column names (for plots)
varnames2 <- c("A\nrude", "E\nquiet", "C\nlazy", "N\nrelaxed", "N\ndepressed", "E\noutgoing", "A\nkind",
"C\nreliable", "N\nworried", "positive\nemotion", "negative\nemotion", "authentic", "self\nesteem",
"happy", "lonely", "academic\nmotiv", "around\nothers", "connected")
# create wave variable before combining data sets.
w4$Wave <- "4"
w7$Wave <- "7"
# merge wave 4 and 7 data sets
w2 <- w4 %>% full_join(w7)
# retain cases where all personality data are retained
w1_com <- w1[complete.cases(w1[,c(7:11, 13:23)]),]
w2_com <- w2[complete.cases(w2[,c(7:11, 13:23)]),]
for (i in unique(w1_com$SID)){
mean_A_rude <- mean(w1_com$A_rude[w1_com$SID == i], na.rm = T)
w1_com$A_rude[is.na(w1_com$A_rude) & w1_com$SID == i] <- mean_A_rude
mean_A_kind <- mean(w1_com$A_kind[w1_com$SID == i], na.rm = T)
w1_com$A_kind[is.na(w1_com$A_kind) & w1_com$SID == i] <- mean_A_kind
}
for (i in unique(w2_com$SID)){
mean_A_rude <- mean(w2_com$A_rude[w2_com$SID == i], na.rm = T)
w2_com$A_rude[is.na(w2_com$A_rude) & w2_com$SID == i] <- mean_A_rude
mean_A_kind <- mean(w2_com$A_kind[w2_com$SID == i], na.rm = T)
w2_com$A_kind[is.na(w2_com$A_kind) & w2_com$SID == i] <- mean_A_kind
}
# for waves 4 and 7, create a variable that combines wave and day of study
w2_com$waveDay <- paste(w2_com$Wave, w2_com$day, sep = ".")
# Make numeric subject IDs for each df because mlVAR won't run for factors #
w1_com$SID2 <- as.numeric(as.character(w1_com$SID))
w2_com$SID2 <- as.numeric(as.character(w2_com$SID))
# a few variable have 0 SD. This will create a problem with estimation, so we'll add
# a tiny amount of jitter. That variable won't have any edges due to regularization
# but we won't have to toss those subjects
jitter_fun <- function(df){
sd_fun <- function(x){if(sd(x, na.rm = T) == 0) jitter(x, amount = runif(1,0,.05)) else x}
df2 <- data.frame(apply(df, 2, sd_fun))
colnames(df2) <- colnames(df2)
return(df2)
}
# wave 1
# Need to keep subjects who have at least 10 assessments
w1_com <- tbl_df(w1_com) %>%
group_by(SID) %>%
arrange(day, hourBlock) %>%
mutate(beepvar3 = seq(1, n(), 1)) %>%
ungroup() %>%
select(SID, SID2, beepvar3, A_rude:connected) %>%
group_by(SID) %>%
mutate_if(is.integer, as.numeric) %>%
mutate(count = n(), wave = "1") %>%
filter(count > 10)
# adding the jitter
w1_test <- w1_com %>%
group_by(SID, SID2, count, wave) %>%
nest() %>%
mutate(data2 = map(data, jitter_fun)) %>%
unnest(data2, .drop = T)
# wave 2
# Need to keep subjects who have at least 10 assessments
w2_com <- tbl_df(w2_com) %>%
group_by(SID) %>%
arrange(waveDay, hourBlock) %>%
mutate(beepvar3 = seq(1, n(), 1)) %>%
ungroup() %>%
select(SID, SID2, beepvar3, A_rude:connected) %>%
group_by(SID) %>%
mutate_if(is.integer, as.numeric) %>%
mutate(count = n(), wave = "2") %>%
filter(count > 10)
# adding the jitter
w2_test <- w2_com %>%
group_by(SID, SID2, count, wave) %>%
nest() %>%
mutate(data2 = map(data, jitter_fun)) %>%
unnest(data2, .drop = T)
We need to detrend the data because they violate the non-stationarity assumption. We’re just going to use simple OLS although there are alternative and arguably better alternatives.
detrend <- function(df)
dat <- w1_test %>%
full_join(w2_test) %>%
gather(key = var, value = value, A_rude:connected) %>%
group_by(SID, wave, count, var) %>%
nest() %>%
mutate(m = map(data, ~lm(value ~ beepvar3, data = .)),
p = map(m, broom::augment)) %>%
unnest(p) %>%
select(SID:var, .resid, beepvar3) %>%
spread(key = var, value = .resid)
Now, we’ll fit the idiographic graphical VAR models using the graphicalVAR
package. We’re going to the eBIC hyperparameter gamma to 0 for discovery and allow the regularizing parameter lambda to vary between .025 and .25. Some of these models will throw errors because of the small amount of data. We’ll ignore them for now since the goal here is largely demonstrative.
gVAR_fun <- function(x, SID, wave){
print(paste(wave, SID))
n <- dim(x)[1]
gamma <- 0
lambda <- seq(.025, .25, .025)
x <- x %>% select(A_rude:connected, -beepvar3)
fit <-
graphicalVAR(x, gamma = gamma, maxit.in = 1000, maxit.out = 1000,
lambda_beta = lambda, lambda_kappa = lambda,
verbose = T, scale = F, centerWithin = F)
return(fit)
}
gVAR_fit <- dat %>%
filter(!(SID %in% c("97898", "13518", "64875", "74435", "83106", "97159", "64869", "37024", "12715"))) %>%
group_by(SID, wave, count) %>%
nest() %>%
mutate(gVAR_fit = pmap(list(data, SID, wave), gVAR_fun))
save(gVAR_fit, file = sprintf("%s/graphicalVAR_allSubs.RData", data_path))
Now, that the models are run, we want to extract the partial directed and contemporaneous correlations, which can be used to construct the idiographic networks. All of these will be saved into the same data nested data frame using tools from the R packages purrr
and tidyr
.
load(sprintf("%s/graphicalVAR_allSubs.RData", data_path))
############################################
############## idiographic #################
############################################
temp_fun <- function(fit, SID){
PDC <- fit$PDC
from <- row.names(PDC)
PDC.long <- tbl_df(PDC) %>%
mutate(from = from, type = "Temporal") %>%
gather(key = to, value = weight, -from, -type)
}
contemp_mat_fun <- function(fit){fit$PCC}
contemp_long_fun <- function(fit){
PCC <- fit$PCC
PCC <- PCC[,order(colnames(PCC))]
PCC <- PCC[order(rownames(PCC)),]
PCC[lower.tri(PCC, diag = T)] <- NA
vars <- rownames(PCC)
PCC.long <- tbl_df(PCC) %>%
mutate(Var1 = vars,
type = "Contemporaneous") %>%
gather(key = Var2, value = weight, -Var1, -type) %>%
filter(!is.na(weight)) %>%
unite(var, Var1, Var2, sep = ".", remove = F)
}
gVAR_fit <- gVAR_fit %>%
filter(!is.na(gVAR_fit)) %>%
mutate(temp = map2(gVAR_fit, SID, temp_fun),
contemp_mat = map(gVAR_fit, contemp_mat_fun),
contemp = map(gVAR_fit, contemp_long_fun))
edge_colors <- RColorBrewer::brewer.pal(8, "Purples")[c(4,6,8)]
idio_plot_fun <- function(data, subject, wave, type){
if(type == "Temporal"){data_mod <- data$PDC}
else{data_mod <- data$PCC}
n <- nrow(data$PDC)
if(n == 9){b5_groups <- list(A = c(1,7), E = c(2, 6), C = c(3,8), N = c(4,5,9))}
else{b5_groups <- list(A = c(1,7), E = c(2, 6), C = c(3,8), N = c(4,5,9), other = 10:n)}
subject <- ifelse(subject == "10506", "1",
ifelse(subject == "39941", "2", subject))
plot <-
qgraph(data_mod, layout = "spring", loop = .7, node.width = 1.85, edge.width = 1, esize = 7,
title = sprintf("%s Wave %s for S%s", type, wave, subject), label.font = 2, repulsion = .8,
label.fill.vertical = 1, label.fill.horizontal = 1, edge.color = "black",
groups = b5_groups, color = rev(c("#FFFFB3", t(RColorBrewer::brewer.pal(9, "Purples")[seq(3,9,2)]))),
legend = F, DoNotPlot = TRUE, mar = c(4,4,4,4))
#change lines to dashed
plot$graphAttributes$Edges$lty[plot$Edgelist$weight < 0] <- 2
#change line colors
plot$graphAttributes$Edges$color <-
ifelse(abs(plot$Edgelist$weight) <.1, edge_colors[1],
ifelse(abs(plot$Edgelist$weight) <.2, edge_colors[2], edge_colors[3]))
dark_colors <- c("#9E9AC8", "#807DBA", "#6A51A3", "#54278F", "#3F007D")
plot$graphAttributes$Nodes$label.color[plot$graphAttributes$Nodes$color %in% dark_colors] <- "white"
vars <- str_replace(colnames(data$PDC), "_", "\n")
#change variable names
plot$graphAttributes$Nodes$labels <- vars
return(plot)
}
gVAR_fit <- gVAR_fit %>%
mutate(temp_plot = pmap(list(gVAR_fit, SID, wave, "Temporal"),
possibly(idio_plot_fun, NA_real_)),
contemp_plot = pmap(list(gVAR_fit, SID, wave, "Contemporaneous"),
possibly(idio_plot_fun, NA_real_)))
First up is structure. From an idiographic perspective, structure is the stable personal tendencies within a person, or the consistent temporal and contemporaneous behavioral patterns across assessments.
The first graph just shows two example participants from the first wave.
pdf("~/Box/network/presentations/APS 2018/plots/structure_plot.pdf", width = 8, height = 4)
par(mfrow = c(1,2))
gVAR_fit %>%
filter(SID %in% c("10506", "39941") & wave == 1) %>%
select(SID, wave, temp_plot, contemp_plot) %>%
arrange(wave, desc(SID)) %>%
mutate(map(contemp_plot, plot))
dev.off()
par(mfrow = c(1,2))
gVAR_fit %>%
filter(SID %in% c("10506", "39941") & wave == 1) %>%
select(SID, wave, temp_plot, contemp_plot) %>%
arrange(wave, desc(SID)) %>%
mutate(map(contemp_plot, plot))
The code below creates a gif of each individual’s contemporaneous network in the first wave using the R package animation
.
subjects <- (gVAR_fit %>% group_by(SID) %>% summarize(n = n()) %>% filter(n == 2))$SID
draw.a.plot <- function(sid){
par(mfrow = c(1,1))
gVAR_fit %>%
filter(SID == sid & wave == 1) %>%
select(SID, wave, temp_plot, contemp_plot) %>%
arrange(wave, desc(SID)) %>%
mutate(map(contemp_plot, plot))
}
#set up function to loop through the draw.a.plot() function
loop.animate <- function() {
lapply(subjects, function(i) {
draw.a.plot(i)
})
}
# create GIF of all edges
# saveGIF(loop.animate(), interval = .5, movie.name="PCC_plots_single.gif", ani.width = 500, ani.height = 500,
# imgdir = data_path)
The code below creates a gif of the profile of edge weights of the idiographic contemporaneous networks in the first wave for each participant using the R packages ggplot2
and animation
.
draw.a.plot <- function(sid){
sub <- subjects[seq(1,which(subjects == sid))]
df <- gVAR_fit %>% unnest(contemp) %>%
filter(SID %in% sub & wave == 1)
df %>%
ggplot(aes(x = var, y = weight, color = factor(SID))) +
# scale_color_manual(values = c("royalblue", "red")) +
# geom_point(aes(shape = wave)) +
geom_line(aes(group = SID), size = .5) +
# geom_label(aes(x = (nrow(.)/2)-4, y = 0, label = sprintf("r = %.2f", unique(r))),
# fill= "royalblue", color = "white", size = 6) +
labs(x = "") +
coord_flip() +
theme_classic() +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
legend.position = "none",
legend.text = element_text(face = "bold"),
legend.title = element_text(face = "bold"),
axis.text.x = element_text(face = "bold", size = rel(1.2)),
axis.title.x = element_text(face = "bold", size = rel(1.2)),
plot.title = element_text(face = "bold", size = rel(1.5), hjust = .5),
plot.subtitle = element_text(face = "bold", size = rel(1.2), hjust = .5))
}
loop.animate <- function() {
lapply(subjects, function(i) {
print(draw.a.plot(i))
})
}
# create GIF of all edges
# saveGIF(loop.animate(), interval = .5, movie.name="PCC_plots_ew_structure.gif", ani.width = 250, ani.height = 500,
# imgdir = data_path)
Next, is the question of structure. First, we’ll look at one network with a bunch of processes thrown in. Then we’ll estimate 4 new networks for that participant:
1. Personality only (9 items from the BFI)
2. Personality + Triggering Situation
3. Personality + Expectancy
4. Personality + States / State Expressions
Then, we will extract those results and plot both the network plots and a profile of the partial directed temporal and contemporaneous networks.
pdf("~/Box/network/presentations/APS 2018/plots/process_plot.pdf", width = 4, height = 4)
par(mfrow = c(1,1))
gVAR_fit %>%
filter(SID %in% c("10506") & wave == 1) %>%
select(SID, wave, temp_plot, contemp_plot) %>%
arrange(wave, desc(SID)) %>%
mutate(map(contemp_plot, plot))
dev.off()
## quartz_off_screen
## 2
par(mfrow = c(1,1))
gVAR_fit %>%
filter(SID %in% c("10506") & wave == 1) %>%
select(SID, wave, temp_plot, contemp_plot) %>%
arrange(wave, desc(SID)) %>%
mutate(map(contemp_plot, plot))
df <- (gVAR_fit %>% filter(SID =="19497" & wave == 1))$data[[1]]
gamma <- 0
lambda <- seq(.025, .25, .025)
x <- df %>% select(A_rude:N_worried, around_others)
fit_t <- graphicalVAR(
x
, gamma = gamma
, maxit.in = 1000
, maxit.out = 1000
, lambda_beta = lambda
, lambda_kappa = lambda
, verbose = F
, scale = F
, centerWithin = F
)
x <- df %>% select(A_rude:N_worried, aca_motiv)
fit_e <- graphicalVAR(
x
, gamma = gamma
, maxit.in = 1000
, maxit.out = 1000
, lambda_beta = lambda
, lambda_kappa = lambda
, verbose = F
, scale = F
, centerWithin = F
)
x <- df %>% select(A_rude:N_worried, authentic)
fit_s <- graphicalVAR(
x
, gamma = gamma
, maxit.in = 1000
, maxit.out = 1000
, lambda_beta = lambda
, lambda_kappa = lambda
, verbose = F
, scale = F
, centerWithin = F
)
x <- df %>% select(A_rude:N_worried)
fit <- graphicalVAR(
x
, gamma = gamma
, maxit.in = 1000
, maxit.out = 1000
, lambda_beta = lambda
, lambda_kappa = lambda
, verbose = F
, scale = F
, centerWithin = F
)
temp <- temp_fun(fit, 19497)
temp_t <- temp_fun(fit_t, 19497)
temp_e <- temp_fun(fit_e, 19497)
temp_s <- temp_fun(fit_s, 19497)
# network plots
pdf("~/Box/network/presentations/APS 2018/plots/processes_plot.pdf", width = 8, height = 8)
par(mfrow = c(2,2))
plot(idio_plot_fun(fit, "19497", "1", "Temporal"))
plot(idio_plot_fun(fit_t, "19497", "1", "Temporal"))
# title("Triggering Situation")
plot(idio_plot_fun(fit_e, "19497", "1", "Temporal"))
# title("Expectancies")
plot(idio_plot_fun(fit_s, "19497", "1", "Temporal"))
# title("States / State Expression")
dev.off()
plot(idio_plot_fun(fit, "19497", "1", "Temporal"))
plot(idio_plot_fun(fit_t, "19497", "1", "Temporal"))
plot(idio_plot_fun(fit_e, "19497", "1", "Temporal"))
plot(idio_plot_fun(fit_s, "19497", "1", "Temporal"))
vars <- unique(temp$from)
# profile correlations between personality variables across the networks
(temp %>% mutate(model = "Personality") %>%
full_join(temp_t %>% mutate(model = "Triggering Situations")) %>%
full_join(temp_e %>% mutate(model = "Expectancies")) %>%
full_join(temp_s %>% mutate(model = "States / State Expressions")) %>%
filter(from %in% vars & to %in% vars) %>%
spread(key = model, value = weight) %>%
select(Personality, everything(),-from, -type, -to) %>%
as.matrix() %>% cor(., use = "pairwise"))[,1]
# join the data together for plotting
df <- temp %>% mutate(model = "Personality") %>%
full_join(temp_t %>% mutate(model = "Triggering Situations")) %>%
full_join(temp_e %>% mutate(model = "Expectancies")) %>%
full_join(temp_s %>% mutate(model = "States / State Expressions")) %>%
filter(from %in% vars & to %in% vars) %>%
mutate_at(vars(from, to), funs(str_replace(., "_", " "))) %>%
mutate(model = factor(model, levels = c("Personality", "Triggering Situations", "Expectancies", "States / State Expressions")))
# unite(var, from, to, sep = " -> ") %>%
# create a plot to iteratively add a subset of the models to a profile plot
model_fun <- function(situations){
df %>% filter(model %in% situations) %>%
ggplot(aes(x = from, y = weight, color = model, group = model)) +
geom_line() +
facet_grid(~to) +
labs(x = NULL, color = NULL)+
# coord_flip() +
theme_classic() +
theme(legend.position = c(.45, .2),
axis.text.x = element_text(face = "bold", size = rel(.65), angle = 45, hjust = 1),
axis.text.y = element_text(face = "bold", size = rel(1.2)),
legend.text = element_text(face = "bold", size = rel(.85)),
legend.background = element_rect(fill = NULL, color = NULL),
strip.text = element_text(face = "bold", color = "white"),
strip.background = element_rect(fill = "#6f30a0", color = NULL))
file <- str_remove(situations[length(situations)], "/")
ggsave(sprintf("%s/plots/processes_profile_%s.png", data_path, file), width = 9, height = 4)
}
# profile plots
model_fun("Personality")
model_fun(c("Personality", "Triggering Situations"))
model_fun(c("Personality", "Triggering Situations", "Expectancies"))
model_fun(c("Personality", "Triggering Situations", "Expectancies", "States / State Expressions"))
First, I’ll create a plot that shades the region between the edge weight profiles to highlight cross-wave consistency and change. This is an imperfect solution taken from Stack Overflow, so I’m only going to use a subset of the edges.
# set the data up
df <- gVAR_fit %>% unnest(contemp) %>% select(-count) %>%
filter(SID %in% 10506) %>%
spread(key = wave, value = weight) %>%
mutate(change = `1` - `2`) %>%
filter(change != 0) %>%
gather(key = wave, value = weight, `1`, `2`) %>%
filter(var %in% unique(var)[1:20]) %>%
group_by(SID, var) %>%
mutate(lower = min(weight, na.rm = T), upper = max(weight, na.rm = T)) %>%
ungroup()
v <- tibble(x = 1:20, y = unique(df$var))
# change the names for ordering and use in the function below
df <- df %>% mutate(v = as.numeric(plyr::mapvalues(var, v$y, v$x)))
# solution taken from stack overflow
mwave <- function(x){
x <- x[order(x$v), ]
y <- x[-c(1,2, nrow(x) -1, nrow(x)), ]
x <- rbind(x,y)
x <- x[order(x$v), ]
x$group <- rep(letters[1:(nrow(x)/4)], each = 4)
return(x)
}
df2 <- plyr::ddply(df, .(SID), mwave)
mgroup <- function(x){
x <- x[order(x$weight), ]
left <- x[x$v == min(x$v), ]
right <- x[x$v == max(x$v), ]
if(all(left$wave == right$wave)){
left <- left[order(left$weight, decreasing = T), ]
right <- right[order(right$weight, decreasing = F), ]
return(rbind(left, right))
} else{
return(x[order(x$v), ])
}
}
df2 <- ddply(df2, .(group), mgroup)
# plot the differences across wave for the example subjects
df %>%
ggplot(aes(x = v, y = weight, group = wave)) +
scale_x_continuous(breaks = seq(1,20,1), labels = v$y) +
geom_line(aes(color = factor(wave))) +
# geom_point(aes(color = factor(wave))) +
geom_polygon(data = df2, aes(y = weight, group = group), alpha = .3, fill = "springgreen3") +
labs(x = NULL, y = "Edge Weight", color = "Wave") +
theme_classic()+
theme(axis.text.x = element_text(face = "bold", angle = 45, hjust = 1, size = rel(.75)),
legend.position = c(.8,.8),
axis.text.y = element_text(face = "bold", size = rel(1.2)),
axis.title = element_text(face = "bold", size = rel(1.2)),
legend.text = element_text(face = "bold", size = rel(1)),
legend.title = element_text(face = "bold", size = rel(1)))
ggsave(sprintf("%s/plots/devel_plot.png", data_path), width = 6, height = 3)
Now, I’ll create a gif of the network plots for each subject with two waves to highlight differences in network structure across waves.
subs <- (gVAR_fit %>% group_by(SID) %>% summarize(n = n()) %>% filter(n == 2))$SID
draw.a.plot <- function(sid){
par(mfrow = c(2,1))
gVAR_fit %>%
filter(SID == sid) %>%
select(SID, wave, temp_plot, contemp_plot) %>%
arrange(wave, desc(SID)) %>%
mutate(map(contemp_plot, plot))
}
#set up function to loop through the draw.a.plot() function
loop.animate <- function() {
lapply(subs, function(i) {
draw.a.plot(i)
})
}
# create GIF of all edges
# saveGIF(loop.animate(), interval = .5, movie.name="PCC_plots.gif", ani.width = 500, ani.height = 1000,
# imgdir = data_path)
And another gif showing the profile across waves, as well as the ipsative consistency (profile correlation) across waves.
draw.a.plot <- function(sid){
df <- gVAR_fit %>% unnest(contemp) %>%
filter(SID == sid) %>%
mutate(r = cor(weight[wave == 1], weight[wave == 2], use = "pairwise"))
df %>%
ggplot(aes(x = var, y = weight)) +
scale_color_manual(values = c("royalblue", "red")) +
# geom_point(aes(shape = wave)) +
geom_line(aes(group = wave, color = wave)) +
# geom_label(aes(x = (nrow(.)/2)-4, y = 0, label = sprintf("r = %.2f", unique(r))),
# fill= "royalblue", color = "white", size = 6) +
labs(x = "", title = sprintf("Subject %s", sid), subtitle = sprintf("r = %.2f", unique(df$r))) +
coord_flip() +
theme_classic() +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
legend.position = "bottom",
legend.text = element_text(face = "bold"),
legend.title = element_text(face = "bold"),
axis.text.x = element_text(face = "bold", size = rel(1.2)),
axis.title.x = element_text(face = "bold", size = rel(1.2)),
plot.title = element_text(face = "bold", size = rel(1.5), hjust = .5),
plot.subtitle = element_text(face = "bold", size = rel(1.2), hjust = .5))
}
loop.animate <- function() {
lapply(subjects, function(i) {
print(draw.a.plot(i))
})
}
# create GIF of all edges
# saveGIF(loop.animate(), interval = .5, movie.name="PCC_plots_ew.gif", ani.width = 250, ani.height = 500,
# imgdir = data_path)
Given that each person has an estimate of ipsative consistency, we look at individual differences in ipsative consistency by looking at a histogram of the the profile correlations.
gVAR_fit %>% unnest(contemp) %>%
full_join(gVAR_fit %>% unnest(temp) %>% rename(Var1 = from, Var2 = to)) %>%
select(SID, wave, Var1, Var2, type, weight) %>%
spread(key = wave, value = weight) %>%
group_by(type, SID) %>%
summarize(r = cor(`1`, `2`, use = "pairwise")) %>%
filter(!is.na(r)) %>%
ggplot(aes(x = r, fill = type)) +
scale_fill_manual(values = c("royalblue", "purple")) +
xlim(-1,1)+
geom_histogram(aes(y =..density..), color = "black", fill = "gray") +
geom_density(bw = .1, alpha = .3) +
geom_vline(aes(xintercept = 0), linetype = "dashed", size = .8) +
facet_grid(.~type) +
theme_classic() +
theme(legend.position = "none",
axis.text = element_text(face = "bold", size = rel(1.2)),
axis.title = element_text(face = "bold", size = rel(1.2)),
strip.text = element_text(face = "bold", size = rel(1.2), color = "white"),
strip.background = element_rect(fill = "#6f30a0"))
ggsave(file = sprintf("%s/plots/consistency_histogram.png", data_path), width = 6, height = 3)
tibble(
Hour = 1:10,
z = c(0,1,4,7,1,-2,-4,0,1,1)
) %>%
mutate(Hour = Hour*2/5) %>%
ggplot(aes(x = Hour, y = z)) +
geom_hline(aes(yintercept = 0), linetype = "dashed") +
geom_smooth(se = F, color = "blue", span = 1.5) +
theme_classic() +
theme(axis.text = element_text(face = "bold", size = rel(1.2)),
axis.title = element_text(face = "bold", size = rel(1.2)))
ggsave(sprintf("%s/plots/displacement.png", data_path), width = 6, height = 3)
To highlight how indivdiuals may manifest the same traits differently or the same manifestations may be indicators for different traits, I’m going to run some simple EFA’s to extract 2 factor models for each subject.
fa_fun <- function(df){df <- df %>% select(A_rude:connected); cor(df, use = "pairwise")}
gVAR_fit <- gVAR_fit %>%
mutate(cor = map(data, possibly(fa_fun, NA_real_)),
fa = map(cor, possibly(~fa(., nfactors = 2, rotate = "varimax"), NA_real_))#,
# vss = map(data, possibly(~vss(., n = 8, rotate = "varimax", plot = F, n.obs = nrow(.)), NA_real_))
)
Now, we’ll pull out the loadings and assign different indicators to different factors. Again, this is largely for demonstrative purposes.
eigen_fun <- function(x){
x <- sum(x$values > 1, na.rm = T)
}
Vaccounted_fun <- function(fa, nfactor){
y <- print(fa)$Vaccounted[3,]
z <- y[nfactor]
return(z)
}
loadings_fun <- function(fa){
y <- loadings(fa)[,] %>% data.frame %>% select(one_of(paste("MR", 1:2, sep = ""))) %>%
mutate(var = rownames(.)) %>%
gather(factor, weight, MR1, MR2) %>%
mutate(Retain = ifelse(abs(weight) > .4, "Keep", "Toss"))
}
sink("/dev/null")
gVAR_fit <- gVAR_fit %>%
mutate(eigen_factor = map_dbl(fa, possibly(eigen_fun, NA_real_)),
Vacc_first = map_dbl(fa, possibly(~print(.)$Vaccounted[3,1], NA_real_)),
Vacc_eigen = map2_dbl(fa, eigen_factor, possibly(Vaccounted_fun, NA_real_)), loadings = map(fa, possibly(loadings_fun, NA_real_)))
sink()
# just going to keep the first factor and loadings that meet the arbitrary minimum
keep_fun <- function(load){
(load %>% filter(factor == "MR1" & Retain == "Keep"))$var
}
gVAR_fit <- gVAR_fit %>%
mutate(keep = map(loadings, possibly(keep_fun, NA_real_)))
# going to plot two people to show how the "same" manifestations might not
# look the same for different people
gVAR_fit %>% filter(!is.na(loadings)) %>% unnest(loadings) %>%
filter(wave == 1) %>%
mutate(top = dense_rank(Vacc_first),
bottom = dense_rank(desc(Vacc_first)),
SID = as.character(SID)) %>%
filter(top == 1 | bottom == 1) %>%
filter(factor == "MR1") %>%
filter(grepl("A_", var) | grepl("E_", var) | grepl("C_", var) | grepl("N_", var)) %>%
ggplot(aes(x = var, y = weight, group = SID)) +
# scale_x_continuous(limits = c(.5,6.5), breaks = seq(1,6,1)) +
scale_color_manual(values = c("blue", "gray"))+
geom_line(aes(linetype = SID)) +
geom_point(aes(shape = SID, color = Retain), size = 3)+
geom_hline(aes(yintercept = 0)) +
labs(x = "Indicator", y = "Factor Loading")+
facet_grid(~factor) +
theme_classic() +
theme(legend.position = "bottom",
axis.text = element_text(face = "bold", size = rel(1.1)),
axis.title = element_text(face = "bold", size = rel(1.2)),
legend.title = element_text(face = "bold", size = rel(1.2)),
legend.text = element_text(face = "bold", size = rel(1.1)),
strip.background = element_rect(fill = "blue", color = NULL),
strip.text = element_text(face = "bold", size = rel(1.2), color = "white"),
axis.text.x = element_text(angle=45, hjust = 1))
ggsave(file = sprintf("%s/plots/p_filtering.png", data_path), width = 6, height = 4)
# let's plot these for people who have a lot of items that load highly on
# the first factor or few items that load highly on it
gVAR_fit %>% filter(!is.na(loadings)) %>% unnest(loadings) %>%
filter(wave == 1) %>%
mutate(top = dense_rank(Vacc_first),
bottom = dense_rank(desc(Vacc_first)),
SID = as.character(SID)) %>%
filter(top %in% 1:6 | bottom %in% 1:6) %>%
filter(factor == "MR1") %>%
filter(grepl("A_", var) | grepl("E_", var) | grepl("C_", var) | grepl("N_", var)) %>% # plot
ggplot(aes(x = var, y = weight, group = SID)) +
# scale_x_continuous(limits = c(.5,6.5), breaks = seq(1,6,1)) +
scale_color_manual(values = c("blue", "gray"))+
geom_line() +
geom_point(aes(color = Retain), size = 3) +
geom_hline(aes(yintercept = 0)) +
labs(x = "Indicator", y = "Factor Loading")+
facet_wrap(~SID, nrow = 3) +
theme_classic() +
theme(legend.position = "bottom",
axis.text = element_text(face = "bold", size = rel(1.1)),
axis.title = element_text(face = "bold", size = rel(1.2)),
legend.title = element_text(face = "bold", size = rel(1.2)),
legend.text = element_text(face = "bold", size = rel(1.1)),
strip.background = element_rect(fill = "blue", color = NULL),
strip.text = element_text(face = "bold", size = rel(1.2), color = "white"),
axis.text.x = element_text(angle=45, hjust = 1, size = rel(.7)))
ggsave(file = sprintf("%s/plots/p_filtering_agg.png", data_path), width = 11, height = 7)
Last, the question of aggregation. How do we take a bunch of individual models and link them back to the group or population? I’m going to highlight centrality and density, so I’ll keep two networks with really high density and two with really low density.
subs <- (gVAR_fit %>%
unnest(contemp) %>%
group_by(SID, wave) %>%
summarize(density = sum(weight!=0)/n()) %>%
ungroup() %>%
filter(density != 0) %>%
group_by(wave) %>%
mutate(rank = rank(density, ties.method = "first"),
rank2 = rank(desc(density), ties.method = "first")) %>%
filter((rank %in% 1:2 | rank2 %in% 1:2) & wave == 1) %>%
arrange(rank))$SID
pdf(sprintf("%s/plots/agg_nets.pdf", data_path), width = 4, height = 4)
par(mfrow = c(2,2))
gVAR_fit %>%
filter(wave == 1 & SID %in% subs) %>%
mutate(SID = factor(SID, levels = subs)) %>%
arrange(SID) %>%
select(SID, wave, temp_plot, contemp_plot) %>%
arrange(wave, desc(SID)) %>%
mutate(map(contemp_plot, plot))
dev.off()
## quartz_off_screen
## 2
par(mfrow = c(2,2))
gVAR_fit %>%
filter(wave == 1 & SID %in% subs) %>%
mutate(SID = factor(SID, levels = subs)) %>%
arrange(SID) %>%
select(SID, wave, temp_plot, contemp_plot) %>%
arrange(wave, desc(SID)) %>%
mutate(map(contemp_plot, plot))