Workspace

Packages

First, we’ll load the R packages we need to run the analyses.

library(lavaan)
library(psych)
library(knitr)
library(kableExtra)
library(qgraph)
library(graphicalVAR)
library(mlVAR)
library(animation)
library(stringr)
library(plyr)
library(tidyverse)

Now, we’ll set the data path, which will be both where we load the data from and where we save results to. Then, we’ll load the codebook that will index the variables we’ll use.

data_path <- "~/Box/network/presentations/APS 2018"

esm_codebook <- sprintf("%s/Codebook.csv", data_path) %>%
  read.csv(., stringsAsFactors = F) %>% 
  tbl_df

The chunk below won’t run but is included for transparency. This chunk will read in the raw data files with identifying information and replace identifying information (Subject IDs) with random IDs and remove variables not used in this analysis. Then, we’ll save those data to .csv that will be open access.

subs <- sprintf("%s/subs.csv", data_path) %>% read.csv %>% tbl_df

wave1_all <- sprintf("%s/data/esm_w1_RENAMED.csv",     data_path) %>% 
  read.csv %>% 
  tbl_df %>%
  select(one_of(paste(esm_codebook$old_name, "w1", sep = "."))) %>%
  mutate(esm.IDnum.w1 = mapvalues(esm.IDnum.w1, subs$old, subs$new)) 
wave4_all <- sprintf("%s/data/esm_w4_RENAMED_all.csv", data_path) %>% 
  read.csv %>% 
  tbl_df %>%
  select(one_of(paste(esm_codebook$old_name, "w4", sep = "."))) %>%
  mutate(esm.IDnum.w4 = mapvalues(esm.IDnum.w4, subs$old, subs$new)) 
wave7_all <- sprintf("%s/data/esm_w7_RENAMED_all.csv", data_path) %>% 
  read.csv %>% 
  tbl_df %>%
  select(one_of(paste(esm_codebook$old_name, "w7", sep = "."))) %>%
  mutate(esm.IDnum.w7 = mapvalues(esm.IDnum.w7, subs$old, subs$new)) 

write.csv(wave1_all, sprintf("%s/data/esm_w1_redacted.csv", data_path), row.names = F)
write.csv(wave4_all, sprintf("%s/data/esm_w4_redacted.csv", data_path), row.names = F)
write.csv(wave7_all, sprintf("%s/data/esm_w7_redacted.csv", data_path), row.names = F)

Now, we’ll read in the redacted data and clean them. Basically, we want to read in the reduced data files, rename the variables to more informative names, and do some basic cleaning.

wave1_all <- sprintf("%s/data/esm_w1_redacted.csv", data_path) %>% read.csv %>% tbl_df
wave4_all <- sprintf("%s/data/esm_w4_redacted.csv", data_path) %>% read.csv %>% tbl_df
wave7_all <- sprintf("%s/data/esm_w7_redacted.csv", data_path) %>% read.csv %>% tbl_df

old.names <- esm_codebook$old_name
new.names <- esm_codebook$new_name

#Getting necessary columns
#Keeping subject ID and all esm.BFI items
w1 <- wave1_all %>%
  select(one_of(paste(old.names, "w1", sep = "."))) %>%
  setNames(new.names) %>% # change column names
  mutate(Wave = "S1") 
w4 <- wave4_all %>%
  select(one_of(paste(old.names, "w4", sep = "."))) %>%
  setNames(new.names) %>% # change column names
  mutate(Wave = "S4")
w7 <- wave7_all %>%
  select(one_of(paste(old.names, "w7", sep = "."))) %>%
  setNames(new.names) %>% # change column names
  mutate(Wave = "S7")

# short column names (for plots)
varnames2 <- c("A\nrude", "E\nquiet", "C\nlazy", "N\nrelaxed", "N\ndepressed", "E\noutgoing", "A\nkind", 
               "C\nreliable", "N\nworried", "positive\nemotion", "negative\nemotion", "authentic", "self\nesteem", 
               "happy", "lonely", "academic\nmotiv", "around\nothers", "connected")

# create wave variable before combining data sets.
w4$Wave <- "4"
w7$Wave <- "7"
# merge wave 4 and 7 data sets
w2 <- w4 %>% full_join(w7)

# retain cases where all personality data are retained
w1_com <- w1[complete.cases(w1[,c(7:11, 13:23)]),]
w2_com <- w2[complete.cases(w2[,c(7:11, 13:23)]),]

for (i in unique(w1_com$SID)){
  mean_A_rude <- mean(w1_com$A_rude[w1_com$SID == i], na.rm = T)
  w1_com$A_rude[is.na(w1_com$A_rude) & w1_com$SID == i] <- mean_A_rude
  mean_A_kind <- mean(w1_com$A_kind[w1_com$SID == i], na.rm = T)
  w1_com$A_kind[is.na(w1_com$A_kind) & w1_com$SID == i] <- mean_A_kind
}

for (i in unique(w2_com$SID)){
  mean_A_rude <- mean(w2_com$A_rude[w2_com$SID == i], na.rm = T)
  w2_com$A_rude[is.na(w2_com$A_rude) & w2_com$SID == i] <- mean_A_rude
  mean_A_kind <- mean(w2_com$A_kind[w2_com$SID == i], na.rm = T)
  w2_com$A_kind[is.na(w2_com$A_kind) & w2_com$SID == i] <- mean_A_kind
}

# for waves 4 and 7, create a variable that combines wave and day of study
w2_com$waveDay <- paste(w2_com$Wave, w2_com$day, sep = ".")

# Make numeric subject IDs for each df because mlVAR won't run for factors #
w1_com$SID2 <- as.numeric(as.character(w1_com$SID))
w2_com$SID2 <- as.numeric(as.character(w2_com$SID))

# a few variable have 0 SD. This will create a problem with estimation, so we'll add
# a tiny amount of jitter. That variable won't have any edges due to regularization 
# but we won't have to toss those subjects 
jitter_fun <- function(df){
  sd_fun <- function(x){if(sd(x, na.rm = T) == 0) jitter(x, amount = runif(1,0,.05)) else x}
  df2 <- data.frame(apply(df, 2, sd_fun))
  colnames(df2) <- colnames(df2)
  return(df2)
}

# wave 1
# Need to keep subjects who have at least 10 assessments
w1_com <- tbl_df(w1_com) %>%
  group_by(SID) %>%
  arrange(day, hourBlock) %>%
  mutate(beepvar3 = seq(1, n(), 1)) %>%
  ungroup() %>%
  select(SID, SID2, beepvar3, A_rude:connected) %>%
  group_by(SID) %>%
  mutate_if(is.integer, as.numeric) %>%
  mutate(count = n(), wave = "1") %>%
  filter(count > 10) 

# adding the jitter 
w1_test <- w1_com %>%
  group_by(SID, SID2, count, wave) %>% 
  nest() %>%
  mutate(data2 = map(data, jitter_fun)) %>%
  unnest(data2, .drop = T)

# wave 2
# Need to keep subjects who have at least 10 assessments
w2_com <- tbl_df(w2_com) %>%
  group_by(SID) %>%
  arrange(waveDay, hourBlock) %>%
  mutate(beepvar3 = seq(1, n(), 1)) %>%
  ungroup() %>%
  select(SID, SID2, beepvar3, A_rude:connected) %>%
  group_by(SID) %>%
  mutate_if(is.integer, as.numeric) %>%
  mutate(count = n(), wave = "2") %>%
  filter(count > 10) 

# adding the jitter 
w2_test <- w2_com %>%
  group_by(SID, SID2, count, wave) %>% 
  nest() %>%
  mutate(data2 = map(data, jitter_fun)) %>%
  unnest(data2, .drop = T)

We need to detrend the data because they violate the non-stationarity assumption. We’re just going to use simple OLS although there are alternative and arguably better alternatives.

detrend <- function(df)

dat <- w1_test %>%
  full_join(w2_test) %>%
  gather(key = var, value = value, A_rude:connected) %>%
  group_by(SID, wave, count, var) %>%
  nest() %>%
  mutate(m = map(data, ~lm(value ~ beepvar3, data = .)),
         p = map(m, broom::augment)) %>%
  unnest(p) %>%
  select(SID:var, .resid, beepvar3) %>%
  spread(key = var, value = .resid)

Now, we’ll fit the idiographic graphical VAR models using the graphicalVAR package. We’re going to the eBIC hyperparameter gamma to 0 for discovery and allow the regularizing parameter lambda to vary between .025 and .25. Some of these models will throw errors because of the small amount of data. We’ll ignore them for now since the goal here is largely demonstrative.

gVAR_fun <- function(x, SID, wave){
  print(paste(wave, SID))
  n <- dim(x)[1]
  gamma <- 0
  lambda <- seq(.025, .25, .025)
  x <- x %>% select(A_rude:connected, -beepvar3)
  fit <-
    graphicalVAR(x, gamma = gamma, maxit.in = 1000, maxit.out = 1000,
                      lambda_beta = lambda, lambda_kappa = lambda, 
                      verbose = T, scale = F, centerWithin = F)
  return(fit)
}



gVAR_fit <- dat %>%
  filter(!(SID %in% c("97898", "13518", "64875", "74435", "83106", "97159", "64869", "37024", "12715"))) %>%
  group_by(SID, wave, count) %>%
  nest() %>%
  mutate(gVAR_fit = pmap(list(data, SID, wave), gVAR_fun))
save(gVAR_fit, file = sprintf("%s/graphicalVAR_allSubs.RData", data_path))

Now, that the models are run, we want to extract the partial directed and contemporaneous correlations, which can be used to construct the idiographic networks. All of these will be saved into the same data nested data frame using tools from the R packages purrr and tidyr.

load(sprintf("%s/graphicalVAR_allSubs.RData", data_path))
############################################
############## idiographic #################
############################################

temp_fun <- function(fit, SID){
  PDC <- fit$PDC
  from <- row.names(PDC)
  PDC.long <- tbl_df(PDC) %>%
    mutate(from = from, type = "Temporal") %>%
    gather(key = to, value = weight, -from, -type)
}

contemp_mat_fun <- function(fit){fit$PCC}

contemp_long_fun <- function(fit){
  PCC <- fit$PCC
  PCC <- PCC[,order(colnames(PCC))]
  PCC <- PCC[order(rownames(PCC)),]
  PCC[lower.tri(PCC, diag = T)] <- NA
  vars <- rownames(PCC)
  PCC.long <- tbl_df(PCC) %>%
    mutate(Var1 = vars,
           type = "Contemporaneous") %>%
    gather(key = Var2, value = weight, -Var1, -type) %>%
    filter(!is.na(weight)) %>%
    unite(var, Var1, Var2, sep = ".", remove = F)
}

gVAR_fit <- gVAR_fit %>%
  filter(!is.na(gVAR_fit)) %>%
  mutate(temp = map2(gVAR_fit, SID, temp_fun),
         contemp_mat = map(gVAR_fit, contemp_mat_fun),
         contemp = map(gVAR_fit, contemp_long_fun))

edge_colors <- RColorBrewer::brewer.pal(8, "Purples")[c(4,6,8)]

idio_plot_fun <- function(data, subject, wave, type){
  if(type == "Temporal"){data_mod <- data$PDC}
  else{data_mod <- data$PCC}
  n <- nrow(data$PDC)
  if(n == 9){b5_groups <- list(A = c(1,7), E = c(2, 6), C = c(3,8), N = c(4,5,9))}
  else{b5_groups <- list(A = c(1,7), E = c(2, 6), C = c(3,8), N = c(4,5,9), other = 10:n)}
  subject <- ifelse(subject == "10506", "1",
             ifelse(subject == "39941", "2", subject))
  plot <- 
    qgraph(data_mod, layout = "spring", loop = .7, node.width = 1.85, edge.width = 1, esize = 7,
           title = sprintf("%s Wave %s for S%s", type, wave, subject), label.font = 2, repulsion = .8,
                   label.fill.vertical = 1, label.fill.horizontal = 1, edge.color = "black",
                   groups = b5_groups, color = rev(c("#FFFFB3", t(RColorBrewer::brewer.pal(9, "Purples")[seq(3,9,2)]))),
                   legend = F, DoNotPlot = TRUE, mar = c(4,4,4,4))
  #change lines to dashed
  plot$graphAttributes$Edges$lty[plot$Edgelist$weight < 0] <- 2
  #change line colors
  plot$graphAttributes$Edges$color <-
    ifelse(abs(plot$Edgelist$weight) <.1, edge_colors[1],
    ifelse(abs(plot$Edgelist$weight) <.2, edge_colors[2], edge_colors[3]))
  dark_colors <- c("#9E9AC8", "#807DBA", "#6A51A3", "#54278F", "#3F007D")
  plot$graphAttributes$Nodes$label.color[plot$graphAttributes$Nodes$color %in% dark_colors] <- "white"
  vars <- str_replace(colnames(data$PDC), "_", "\n")
  #change variable names
  plot$graphAttributes$Nodes$labels <- vars
  return(plot)
}

gVAR_fit <- gVAR_fit %>%
  mutate(temp_plot = pmap(list(gVAR_fit, SID, wave, "Temporal"),
                          possibly(idio_plot_fun, NA_real_)),
         contemp_plot = pmap(list(gVAR_fit, SID, wave, "Contemporaneous"),
                          possibly(idio_plot_fun, NA_real_)))

Promises

Structure

First up is structure. From an idiographic perspective, structure is the stable personal tendencies within a person, or the consistent temporal and contemporaneous behavioral patterns across assessments.

The first graph just shows two example participants from the first wave.

pdf("~/Box/network/presentations/APS 2018/plots/structure_plot.pdf", width = 8, height = 4)
par(mfrow = c(1,2))
gVAR_fit %>%
filter(SID %in% c("10506", "39941") & wave == 1) %>%
select(SID, wave, temp_plot, contemp_plot) %>%
arrange(wave, desc(SID)) %>%
mutate(map(contemp_plot, plot))
dev.off()

par(mfrow = c(1,2))
gVAR_fit %>%
filter(SID %in% c("10506", "39941") & wave == 1) %>%
select(SID, wave, temp_plot, contemp_plot) %>%
arrange(wave, desc(SID)) %>%
mutate(map(contemp_plot, plot))

Structure Difference gifs

The code below creates a gif of each individual’s contemporaneous network in the first wave using the R package animation.

subjects <- (gVAR_fit %>% group_by(SID) %>% summarize(n = n()) %>% filter(n == 2))$SID

draw.a.plot <- function(sid){
  par(mfrow = c(1,1))
  gVAR_fit %>%
  filter(SID == sid & wave == 1) %>%
  select(SID, wave, temp_plot, contemp_plot) %>%
  arrange(wave, desc(SID)) %>%
  mutate(map(contemp_plot, plot))
}

#set up function to loop through the draw.a.plot() function
loop.animate <- function() {
    lapply(subjects, function(i) {
        draw.a.plot(i)
    })
}

# create GIF of all edges
# saveGIF(loop.animate(), interval = .5, movie.name="PCC_plots_single.gif", ani.width = 500, ani.height = 500,
#         imgdir = data_path)

The code below creates a gif of the profile of edge weights of the idiographic contemporaneous networks in the first wave for each participant using the R packages ggplot2 and animation.

draw.a.plot <- function(sid){
  sub <- subjects[seq(1,which(subjects == sid))]
  df <- gVAR_fit %>% unnest(contemp) %>%
    filter(SID %in% sub & wave == 1) 
  df %>%
    ggplot(aes(x = var, y = weight, color = factor(SID))) +
      # scale_color_manual(values = c("royalblue", "red")) +
      # geom_point(aes(shape = wave)) +
      geom_line(aes(group = SID), size = .5) +
      # geom_label(aes(x = (nrow(.)/2)-4, y = 0, label = sprintf("r = %.2f", unique(r))),
                 # fill= "royalblue", color = "white", size = 6) +
      labs(x = "") +
      coord_flip() +
      theme_classic() +
      theme(axis.text.y = element_blank(),
            axis.ticks.y = element_blank(),
            legend.position = "none",
            legend.text = element_text(face = "bold"),
            legend.title = element_text(face = "bold"),
            axis.text.x = element_text(face = "bold", size = rel(1.2)),
            axis.title.x = element_text(face = "bold", size = rel(1.2)),
            plot.title = element_text(face = "bold", size = rel(1.5), hjust = .5),
            plot.subtitle = element_text(face = "bold", size = rel(1.2), hjust = .5))
}

loop.animate <- function() {
    lapply(subjects, function(i) {
        print(draw.a.plot(i))
    })
}

# create GIF of all edges
# saveGIF(loop.animate(), interval = .5, movie.name="PCC_plots_ew_structure.gif", ani.width = 250, ani.height = 500,
#         imgdir = data_path)

Processes

Next, is the question of structure. First, we’ll look at one network with a bunch of processes thrown in. Then we’ll estimate 4 new networks for that participant:
1. Personality only (9 items from the BFI)
2. Personality + Triggering Situation
3. Personality + Expectancy
4. Personality + States / State Expressions
Then, we will extract those results and plot both the network plots and a profile of the partial directed temporal and contemporaneous networks.

pdf("~/Box/network/presentations/APS 2018/plots/process_plot.pdf", width = 4, height = 4)
par(mfrow = c(1,1))
gVAR_fit %>%
filter(SID %in% c("10506") & wave == 1) %>%
select(SID, wave, temp_plot, contemp_plot) %>%
arrange(wave, desc(SID)) %>%
mutate(map(contemp_plot, plot))

dev.off()

## quartz_off_screen 
##                 2

par(mfrow = c(1,1))
gVAR_fit %>%
filter(SID %in% c("10506") & wave == 1) %>%
select(SID, wave, temp_plot, contemp_plot) %>%
arrange(wave, desc(SID)) %>%
mutate(map(contemp_plot, plot))

df <- (gVAR_fit %>% filter(SID =="19497" & wave == 1))$data[[1]]

gamma <- 0
lambda <- seq(.025, .25, .025)
x <- df %>% select(A_rude:N_worried, around_others)
fit_t <- graphicalVAR(
  x
  , gamma = gamma
  , maxit.in = 1000
  , maxit.out = 1000
  , lambda_beta = lambda
  , lambda_kappa = lambda
  , verbose = F
  , scale = F
  , centerWithin = F
  )

x <- df %>% select(A_rude:N_worried, aca_motiv)
fit_e <- graphicalVAR(
  x
  , gamma = gamma
  , maxit.in = 1000
  , maxit.out = 1000
  , lambda_beta = lambda
  , lambda_kappa = lambda
  , verbose = F
  , scale = F
  , centerWithin = F
  )

x <- df %>% select(A_rude:N_worried, authentic)
fit_s <- graphicalVAR(
  x
  , gamma = gamma
  , maxit.in = 1000
  , maxit.out = 1000
  , lambda_beta = lambda
  , lambda_kappa = lambda
  , verbose = F
  , scale = F
  , centerWithin = F
  )

x <- df %>% select(A_rude:N_worried)
fit <- graphicalVAR(
  x
  , gamma = gamma
  , maxit.in = 1000
  , maxit.out = 1000
  , lambda_beta = lambda
  , lambda_kappa = lambda
  , verbose = F
  , scale = F
  , centerWithin = F
  )

temp   <- temp_fun(fit,   19497)
temp_t <- temp_fun(fit_t, 19497)
temp_e <- temp_fun(fit_e, 19497)
temp_s <- temp_fun(fit_s, 19497)

# network plots
pdf("~/Box/network/presentations/APS 2018/plots/processes_plot.pdf", width = 8, height = 8)
par(mfrow = c(2,2))
plot(idio_plot_fun(fit, "19497", "1", "Temporal"))
plot(idio_plot_fun(fit_t, "19497", "1", "Temporal"))
# title("Triggering Situation")
plot(idio_plot_fun(fit_e, "19497", "1", "Temporal"))
# title("Expectancies")
plot(idio_plot_fun(fit_s, "19497", "1", "Temporal"))
# title("States / State Expression")
dev.off()

plot(idio_plot_fun(fit,   "19497", "1", "Temporal"))

plot(idio_plot_fun(fit_t, "19497", "1", "Temporal"))

plot(idio_plot_fun(fit_e, "19497", "1", "Temporal"))

plot(idio_plot_fun(fit_s, "19497", "1", "Temporal"))

vars <- unique(temp$from)

# profile correlations between personality variables across the networks
(temp %>% mutate(model = "Personality") %>%
  full_join(temp_t %>% mutate(model = "Triggering Situations")) %>%
  full_join(temp_e %>% mutate(model = "Expectancies")) %>%
  full_join(temp_s %>% mutate(model = "States / State Expressions")) %>%
  filter(from %in% vars & to %in% vars) %>% 
  spread(key = model, value = weight) %>%
  select(Personality, everything(),-from, -type, -to) %>% 
  as.matrix() %>% cor(., use = "pairwise"))[,1]

# join the data together for plotting 
df <- temp %>% mutate(model = "Personality") %>%
  full_join(temp_t %>% mutate(model = "Triggering Situations")) %>%
  full_join(temp_e %>% mutate(model = "Expectancies")) %>%
  full_join(temp_s %>% mutate(model = "States / State Expressions")) %>%
  filter(from %in% vars & to %in% vars) %>%
  mutate_at(vars(from, to), funs(str_replace(., "_", " "))) %>%
  mutate(model = factor(model, levels = c("Personality", "Triggering Situations", "Expectancies", "States / State Expressions")))
  # unite(var, from, to, sep = " -> ") %>%

# create a plot to iteratively add a subset of the models to a profile plot 
model_fun <- function(situations){
  df %>% filter(model %in% situations) %>%
    ggplot(aes(x = from, y = weight, color = model, group = model)) +
      geom_line() +
      facet_grid(~to) +
      labs(x = NULL, color = NULL)+
      # coord_flip() +
      theme_classic() +
      theme(legend.position = c(.45, .2),
            axis.text.x = element_text(face = "bold", size = rel(.65), angle = 45, hjust = 1),
            axis.text.y = element_text(face = "bold", size = rel(1.2)),
            legend.text = element_text(face = "bold", size = rel(.85)),
            legend.background = element_rect(fill = NULL, color = NULL),
            strip.text = element_text(face = "bold", color = "white"),
            strip.background = element_rect(fill = "#6f30a0", color = NULL))
  file <- str_remove(situations[length(situations)], "/")
  ggsave(sprintf("%s/plots/processes_profile_%s.png", data_path, file), width = 9, height = 4)
}

# profile plots
model_fun("Personality")
model_fun(c("Personality", "Triggering Situations"))
model_fun(c("Personality", "Triggering Situations", "Expectancies"))
model_fun(c("Personality", "Triggering Situations", "Expectancies", "States / State Expressions"))

Development

Difference Plots

First, I’ll create a plot that shades the region between the edge weight profiles to highlight cross-wave consistency and change. This is an imperfect solution taken from Stack Overflow, so I’m only going to use a subset of the edges.

# set the data up
df <- gVAR_fit %>% unnest(contemp) %>% select(-count) %>%
  filter(SID %in% 10506) %>%
  spread(key = wave, value = weight) %>%
  mutate(change = `1` - `2`) %>%
  filter(change != 0) %>%
  gather(key = wave, value = weight, `1`, `2`) %>%
  filter(var %in% unique(var)[1:20]) %>%
  group_by(SID, var) %>%
  mutate(lower = min(weight, na.rm = T), upper = max(weight, na.rm = T)) %>%
  ungroup()
v <- tibble(x = 1:20, y = unique(df$var))

# change the names for ordering and use in the function below 
df <- df %>% mutate(v = as.numeric(plyr::mapvalues(var, v$y, v$x)))

# solution taken from stack overflow
mwave <- function(x){
  x <- x[order(x$v), ]
  y <- x[-c(1,2, nrow(x) -1, nrow(x)), ]
  x <- rbind(x,y)
  x <- x[order(x$v), ]
  x$group <- rep(letters[1:(nrow(x)/4)], each = 4)
  return(x)
}

df2 <- plyr::ddply(df, .(SID), mwave)

mgroup <- function(x){
  x <- x[order(x$weight), ]
  left <- x[x$v == min(x$v), ]
  right <- x[x$v == max(x$v), ]
  if(all(left$wave == right$wave)){
    left <- left[order(left$weight, decreasing = T), ]
    right <- right[order(right$weight, decreasing = F), ]
    return(rbind(left, right))
  } else{
    return(x[order(x$v), ])
  }
}

df2 <- ddply(df2, .(group), mgroup)

# plot the differences across wave for the example subjects 
df %>%
  ggplot(aes(x = v, y = weight, group = wave)) +
  scale_x_continuous(breaks = seq(1,20,1), labels = v$y) +
  geom_line(aes(color = factor(wave))) +
  # geom_point(aes(color = factor(wave))) +
  geom_polygon(data = df2, aes(y = weight, group = group), alpha = .3, fill = "springgreen3") +
  labs(x = NULL, y = "Edge Weight", color = "Wave") +
  theme_classic()+
  theme(axis.text.x = element_text(face = "bold", angle = 45, hjust = 1, size = rel(.75)),
        legend.position = c(.8,.8),
        axis.text.y = element_text(face = "bold", size = rel(1.2)),
        axis.title = element_text(face = "bold", size = rel(1.2)),
        legend.text = element_text(face = "bold", size = rel(1)),
        legend.title = element_text(face = "bold", size = rel(1)))

ggsave(sprintf("%s/plots/devel_plot.png", data_path), width = 6, height = 3)

Cross-Wave gifs

Now, I’ll create a gif of the network plots for each subject with two waves to highlight differences in network structure across waves.

subs <- (gVAR_fit %>% group_by(SID) %>% summarize(n = n()) %>% filter(n == 2))$SID

draw.a.plot <- function(sid){
  par(mfrow = c(2,1))
  gVAR_fit %>%
  filter(SID == sid) %>%
  select(SID, wave, temp_plot, contemp_plot) %>%
  arrange(wave, desc(SID)) %>%
  mutate(map(contemp_plot, plot))
}

#set up function to loop through the draw.a.plot() function
loop.animate <- function() {
    lapply(subs, function(i) {
        draw.a.plot(i)
    })
}

# create GIF of all edges
# saveGIF(loop.animate(), interval = .5, movie.name="PCC_plots.gif", ani.width = 500, ani.height = 1000,
#         imgdir = data_path)

And another gif showing the profile across waves, as well as the ipsative consistency (profile correlation) across waves.

draw.a.plot <- function(sid){
  df <- gVAR_fit %>% unnest(contemp) %>%
    filter(SID == sid) %>%
    mutate(r = cor(weight[wave == 1], weight[wave == 2], use = "pairwise")) 
  df %>%
    ggplot(aes(x = var, y = weight)) +
      scale_color_manual(values = c("royalblue", "red")) +
      # geom_point(aes(shape = wave)) +
      geom_line(aes(group = wave, color = wave)) +
      # geom_label(aes(x = (nrow(.)/2)-4, y = 0, label = sprintf("r = %.2f", unique(r))),
                 # fill= "royalblue", color = "white", size = 6) +
      labs(x = "", title = sprintf("Subject %s", sid), subtitle = sprintf("r = %.2f", unique(df$r))) +
      coord_flip() +
      theme_classic() +
      theme(axis.text.y = element_blank(),
            axis.ticks.y = element_blank(),
            legend.position = "bottom",
            legend.text = element_text(face = "bold"),
            legend.title = element_text(face = "bold"),
            axis.text.x = element_text(face = "bold", size = rel(1.2)),
            axis.title.x = element_text(face = "bold", size = rel(1.2)),
            plot.title = element_text(face = "bold", size = rel(1.5), hjust = .5),
            plot.subtitle = element_text(face = "bold", size = rel(1.2), hjust = .5))
}

loop.animate <- function() {
    lapply(subjects, function(i) {
        print(draw.a.plot(i))
    })
}

# create GIF of all edges
# saveGIF(loop.animate(), interval = .5, movie.name="PCC_plots_ew.gif", ani.width = 250, ani.height = 500,
#         imgdir = data_path)

Given that each person has an estimate of ipsative consistency, we look at individual differences in ipsative consistency by looking at a histogram of the the profile correlations.

gVAR_fit %>% unnest(contemp) %>%
  full_join(gVAR_fit %>% unnest(temp) %>% rename(Var1 = from, Var2 = to)) %>%
  select(SID, wave, Var1, Var2, type, weight) %>%
  spread(key = wave, value = weight) %>% 
  group_by(type, SID) %>% 
  summarize(r = cor(`1`, `2`, use = "pairwise")) %>% 
  filter(!is.na(r)) %>%
    ggplot(aes(x = r, fill = type)) +
    scale_fill_manual(values = c("royalblue", "purple")) +
    xlim(-1,1)+
    geom_histogram(aes(y =..density..), color = "black", fill = "gray") +
    geom_density(bw = .1, alpha = .3) +
    geom_vline(aes(xintercept = 0), linetype = "dashed", size = .8) +
    facet_grid(.~type) +
    theme_classic() +
    theme(legend.position = "none",
          axis.text = element_text(face = "bold", size = rel(1.2)),
          axis.title = element_text(face = "bold", size = rel(1.2)),
          strip.text = element_text(face = "bold", size = rel(1.2), color = "white"),
          strip.background = element_rect(fill = "#6f30a0"))

ggsave(file = sprintf("%s/plots/consistency_histogram.png", data_path), width = 6, height = 3)

Challenges

The Time Interval Problem

tibble(
  Hour = 1:10,
  z = c(0,1,4,7,1,-2,-4,0,1,1)
) %>%
  mutate(Hour = Hour*2/5) %>%
  ggplot(aes(x = Hour, y = z)) +
  geom_hline(aes(yintercept = 0), linetype = "dashed") +
  geom_smooth(se = F, color = "blue", span = 1.5) +
  theme_classic() +
  theme(axis.text = element_text(face = "bold", size = rel(1.2)),
        axis.title = element_text(face = "bold", size = rel(1.2)))

ggsave(sprintf("%s/plots/displacement.png", data_path), width = 6, height = 3)

Psychometrics

Idiographic Factor Analyses

To highlight how indivdiuals may manifest the same traits differently or the same manifestations may be indicators for different traits, I’m going to run some simple EFA’s to extract 2 factor models for each subject.

fa_fun <- function(df){df <- df %>% select(A_rude:connected); cor(df, use = "pairwise")}
gVAR_fit <- gVAR_fit %>%
  mutate(cor = map(data, possibly(fa_fun, NA_real_)),
         fa = map(cor, possibly(~fa(., nfactors = 2, rotate = "varimax"), NA_real_))#,
         # vss = map(data, possibly(~vss(., n = 8, rotate = "varimax", plot = F, n.obs = nrow(.)), NA_real_))
         )

Now, we’ll pull out the loadings and assign different indicators to different factors. Again, this is largely for demonstrative purposes.

eigen_fun <- function(x){
  x <- sum(x$values > 1, na.rm = T)
}

Vaccounted_fun <- function(fa, nfactor){
  y <- print(fa)$Vaccounted[3,]
  z <- y[nfactor]
  return(z)
}

loadings_fun <- function(fa){
  y <- loadings(fa)[,] %>% data.frame %>% select(one_of(paste("MR", 1:2, sep = ""))) %>%
    mutate(var = rownames(.)) %>%
    gather(factor, weight, MR1, MR2) %>%
    mutate(Retain = ifelse(abs(weight) > .4, "Keep", "Toss"))
}

sink("/dev/null")
gVAR_fit <- gVAR_fit %>%
  mutate(eigen_factor = map_dbl(fa, possibly(eigen_fun, NA_real_)),
         Vacc_first = map_dbl(fa, possibly(~print(.)$Vaccounted[3,1], NA_real_)),
         Vacc_eigen = map2_dbl(fa, eigen_factor, possibly(Vaccounted_fun, NA_real_)),           loadings = map(fa, possibly(loadings_fun, NA_real_)))
sink()

# just going to keep the first factor and loadings that meet the arbitrary minimum
keep_fun <- function(load){
  (load %>% filter(factor == "MR1" & Retain == "Keep"))$var
}

gVAR_fit <- gVAR_fit %>%
  mutate(keep = map(loadings, possibly(keep_fun, NA_real_)))

# going to plot two people to show how the "same" manifestations might not 
# look the same for different people 
gVAR_fit %>% filter(!is.na(loadings)) %>% unnest(loadings) %>%
  filter(wave == 1) %>%
  mutate(top = dense_rank(Vacc_first),
         bottom = dense_rank(desc(Vacc_first)),
         SID = as.character(SID)) %>% 
  filter(top == 1 | bottom == 1) %>%
  filter(factor == "MR1") %>%
  filter(grepl("A_", var) | grepl("E_", var) | grepl("C_", var) | grepl("N_", var)) %>%
  ggplot(aes(x = var, y = weight, group = SID)) +
  # scale_x_continuous(limits = c(.5,6.5), breaks = seq(1,6,1)) +
  scale_color_manual(values = c("blue", "gray"))+
    geom_line(aes(linetype = SID)) +
  geom_point(aes(shape = SID, color = Retain), size = 3)+
  geom_hline(aes(yintercept = 0)) +
  labs(x = "Indicator", y = "Factor Loading")+
  facet_grid(~factor) +
  theme_classic() +
  theme(legend.position = "bottom",
        axis.text = element_text(face = "bold", size = rel(1.1)),
        axis.title = element_text(face = "bold", size = rel(1.2)),
        legend.title = element_text(face = "bold", size = rel(1.2)),
        legend.text = element_text(face = "bold", size = rel(1.1)),
        strip.background = element_rect(fill = "blue", color = NULL),
        strip.text = element_text(face = "bold", size = rel(1.2), color = "white"),
        axis.text.x = element_text(angle=45, hjust = 1))

ggsave(file = sprintf("%s/plots/p_filtering.png", data_path), width = 6, height = 4)  

# let's plot these for people who have a lot of items that load highly on
# the first factor or few items that load highly on it
gVAR_fit %>% filter(!is.na(loadings)) %>% unnest(loadings) %>%
  filter(wave == 1) %>%
  mutate(top = dense_rank(Vacc_first),
         bottom = dense_rank(desc(Vacc_first)),
         SID = as.character(SID)) %>% 
  filter(top %in% 1:6 | bottom %in% 1:6) %>%
  filter(factor == "MR1") %>%
  filter(grepl("A_", var) | grepl("E_", var) | grepl("C_", var) | grepl("N_", var)) %>% # plot
  ggplot(aes(x = var, y = weight, group = SID)) +
  # scale_x_continuous(limits = c(.5,6.5), breaks = seq(1,6,1)) +
    scale_color_manual(values = c("blue", "gray"))+
    geom_line() +
    geom_point(aes(color = Retain), size = 3) +
    geom_hline(aes(yintercept = 0)) +
    labs(x = "Indicator", y = "Factor Loading")+
    facet_wrap(~SID, nrow = 3) +
    theme_classic() +
    theme(legend.position = "bottom",
          axis.text = element_text(face = "bold", size = rel(1.1)),
          axis.title = element_text(face = "bold", size = rel(1.2)),
          legend.title = element_text(face = "bold", size = rel(1.2)),
          legend.text = element_text(face = "bold", size = rel(1.1)),
          strip.background = element_rect(fill = "blue", color = NULL),
          strip.text = element_text(face = "bold", size = rel(1.2), color = "white"),
          axis.text.x = element_text(angle=45, hjust = 1, size = rel(.7)))

ggsave(file = sprintf("%s/plots/p_filtering_agg.png", data_path), width = 11, height = 7)

Aggregation

Last, the question of aggregation. How do we take a bunch of individual models and link them back to the group or population? I’m going to highlight centrality and density, so I’ll keep two networks with really high density and two with really low density.

subs <- (gVAR_fit %>% 
  unnest(contemp) %>% 
  group_by(SID, wave) %>% 
  summarize(density = sum(weight!=0)/n()) %>% 
  ungroup() %>% 
  filter(density != 0) %>%
  group_by(wave) %>% 
  mutate(rank = rank(density, ties.method = "first"), 
         rank2 = rank(desc(density), ties.method = "first")) %>% 
  filter((rank %in% 1:2 | rank2 %in% 1:2) & wave == 1) %>% 
  arrange(rank))$SID

pdf(sprintf("%s/plots/agg_nets.pdf", data_path), width = 4, height = 4)
par(mfrow = c(2,2))
gVAR_fit %>%
  filter(wave == 1 & SID %in% subs) %>%
  mutate(SID = factor(SID, levels = subs)) %>%
  arrange(SID) %>%
  select(SID, wave, temp_plot, contemp_plot) %>%
  arrange(wave, desc(SID)) %>%
  mutate(map(contemp_plot, plot))

dev.off()

## quartz_off_screen 
##                 2

par(mfrow = c(2,2))
gVAR_fit %>%
  filter(wave == 1 & SID %in% subs) %>%
  mutate(SID = factor(SID, levels = subs)) %>%
  arrange(SID) %>%
  select(SID, wave, temp_plot, contemp_plot) %>%
  arrange(wave, desc(SID)) %>%
  mutate(map(contemp_plot, plot))

Idiographic Personality: A Methodological Perspective on Measuring and Modeling Individuals

Association for Psychological Science Conference 2018

Emorie D Beck

26. May 2018