Chapter 5 Specification Curve Analysis
5.1 Load Data
5.1.1 Covariates
loadRData <- function(fileName, type){
#loads an RData file, and returns it
path <- sprintf("%s/data/imputed/%s_imputed_small.RData", wd, fileName)
load(path)
get(ls()[grepl(type, ls())])
}
studies <- c("addhealth", "bhps", "gsoep", "hilda", "hrs", "liss", "midus", "nlsy", "shp", "wls")
studies_long <- c("Add Health", "BHPS", "GSOEP", "HILDA", "HRS", "LISS", "MIDUS", "NLSY", "SHP", "WLS")
studies_num <- 1:10
sca_cov <- tibble(study = studies) %>%
mutate(data = map(study, ~loadRData(., "SCA_imp")),
data = map(data, ~(.) %>% mutate(
p_year = as.numeric(p_year), SID = as.character(SID))),
study = mapvalues(study, studies, studies_long)) %>%
unnest(data) %>%
filter(age < 100)
sca_cov_fun <- function(item){
p_waves %>%
filter(p_item == item) %>%
select(study = Study, Used) %>%
full_join(sca_cov) %>%
filter(p_year == Used) %>%
distinct() %>%
select(-Used) %>%
mutate(BMI = ifelse(is.nan(BMI) | is.infinite(BMI), NA, BMI))
}
sca_cov <- tibble(Trait = unique(p_waves$p_item)) %>%
mutate(data = map(Trait, sca_cov_fun))
5.1.2 Outcomes
loadRData <- function(fileName, type){
#loads an RData file, and returns it
path <- sprintf("%s/data/clean/%s_cleaned.RData", wd, fileName)
load(path)
get(ls()[grepl(type, ls())])
}
sca_out <- tibble(study = studies) %>%
mutate(data = map(study, ~loadRData(., "out")),
data = map(data, ~(.) %>% mutate(p_year = as.numeric(p_year), SID = as.character(SID))),
study = mapvalues(study, studies, studies_long)) %>%
unnest(data) %>%
filter(!grepl("sep", name)) %>%
select(study, SID, name, p_year, value) %>%
filter(complete.cases(.)) %>%
distinct() %>%
group_by(study, p_year, SID, name) %>%
summarize(value = max(value, na.rm = T)) %>%
ungroup() %>%
spread(name, value) %>%
full_join(p_waves %>% select(study = Study, p_item, Used)) %>%
filter(p_year == Used) %>%
select(-p_year, -Used) %>%
group_by(p_item) %>%
nest() %>%
ungroup() %>%
mutate(data = map(data, ~(.) %>% gather(Outcome, value, -study, -SID, na.rm = T))) %>%
rename(Trait = p_item)
5.1.3 Personality
sca_pers <- tibble(study = studies) %>%
mutate(data = map(study, ~loadRData(., "pers")),
data = map(data, ~(.) %>% ungroup() %>% mutate(year = as.numeric(year), SID = as.character(SID))),
study = mapvalues(study, studies, studies_long)) %>%
unnest(data) %>%
filter(!is.na(name) & !is.na(value)) %>%
full_join(p_waves %>% select(study = Study, name = p_item, Used)) %>%
filter(year == Used) %>%
select(study:value) %>%
group_by(name) %>%
nest() %>%
ungroup() %>%
rename(Trait = name)
5.1.4 Moderators
mod_data <- tibble(study = studies) %>%
mutate(data = map(study, ~loadRData(., "alpha")),
data = map(data, ~(.) %>% ungroup() %>% mutate(year = as.numeric(year))),
study = mapvalues(study, studies, studies_long)) %>%
unnest(data) %>%
mutate(alpha = map(alpha, possibly(~(.)$total[[1]], NA_real_))) %>%
filter(!is.na(name)) %>%
select(study, p_item = name, year, alpha) %>%
unnest(alpha) %>%
full_join(p_waves %>% select(p_item, study = Study, Used, predInt)) %>%
filter(year == Used) %>%
select(-Used) %>%
rename(Trait = p_item, reliability = alpha)
5.2 SCA Data Setup
factor_fun <- function(x){if(is.numeric(x)){diff(range(x, na.rm = T)) %in% 1:2} else{F}}
sca_data_fun <- function(trait, outcome){
df1 <- (sca_pers %>% filter(Trait == trait))$data[[1]] %>% select(study, SID, p_value = value) %>%
full_join((sca_out %>% filter(Trait == trait))$data[[1]] %>% filter(Outcome == outcome) %>% select(study, SID, o_value = value)) %>%
filter(complete.cases(.)) %>%
mutate(o_value = factor(ifelse(as.numeric(as.character(o_value)) > 1, 1, o_value))) %>%
full_join((sca_cov %>% filter(Trait == trait))$data[[1]] %>% select(-p_year)) %>%
full_join(mod_data %>% filter(Trait == trait) %>% select(-Trait, -year)) %>%
mutate_if(factor_fun, as.factor) %>%
filter(!is.na(o_value) & !is.na(p_value)) %>%
group_by(study) %>%
mutate_at(vars(p_value, grsWages, SRhealth, BMI, exercise, parOccPrstg,
parOccPrstg, numKids),
~((. - min(., na.rm = T))/(max(., na.rm = T) - min(., na.rm = T))*10)) %>%
mutate_if(is.numeric, ~ifelse(is.nan(.) | is.infinite(.), NA, .)) %>%
mutate(age = scale(age, center = T, scale = F)) %>%
ungroup() #%>%
if(trait == "IQ"){df1<-df1 %>% mutate_at(vars(physhlthevnt,numKids), ~ifelse(study == "NLSY" & is.na(.), 0, .))}
# mutate_at(vars(education, parEdu), ~ifelse(!is.na(.), as.numeric(as.character(.)) - 1, 0))
save(df1, file = sprintf("%s/data/sca/sca_%s_%s.RData", wd, trait, outcome))
}
5.3 Functions
5.3.1 Permutation Samples
5.3.2 Permutation Models
perm_fun <- function(i, df3, f1, po){
n <- df3$n
po <- po[n]
which(n %in% 1:length(po))
x <- df3 %>% select(-p_value) %>% mutate(p_value = po)
start <- Sys.time()
fit2 <- femlm(formula(f1), data = x, family = "logit")
print(Sys.time() - start)
par2 <- coef(fit2)["p_value"]
ci2 <- as.matrix(confint(fit2, parm="p_value")[1,])
# tidy2 <- tidy(fit2, effects = "fixed", conf.int = T) %>%
# filter(term == "p_value") %>%
# select(estimate, conf.low, conf.high) %>%
# as.matrix()
tidy2 <- c(par2, ci2)
names(tidy2) <- c("p_value", "ci_lower", "ci_upper")
rm(list = c("x", "df3", "fit2"))
gc()
return(tidy2)
}
5.3.3 Terms and Formulas
sca_term_fun <- function(trait, outcome){
terms <- c("p_value", (specifications %>% select(name, Effect, one_of(outcome)) %>% filter(complete.cases(.)) %>%
mutate(name = ifelse(Effect == "moderator", paste(name, ": p_value", sep = " "), name)))$name)
}
sca_formula_fun <- function(terms){
f <- paste("o_value ~ ", # outcome
paste(terms, collapse = " + "), # fixed effects
"| study", collapse = "") # random effects
}
5.3.4 Full Models and Workspace Setup
sca_setup_fun <- function(trait, outcome, fixed = "p_value"){
terms <- sca_term_fun(trait, outcome)
terms <- if(trait == "OP" & outcome == "mvInPrtnr"){terms[terms != "alcohol"]}
f <- sca_formula_fun(terms)
# wd <- "~/selection"
load(sprintf("%s/data/sca/sca_%s_%s.RData", wd, trait, outcome))
df1 <- df1
std <- df1 %>%
group_by(study, o_value) %>%
tally() %>%
full_join(crossing(study = unique(.$study), o_value = factor(c(0,1))))
std <- unique(std$study[std$n < 50 | is.na(std$n)])
df1 <- df1 %>%
filter(!study %in% std) %>%
mutate(o_value = as.numeric(as.character(o_value)),
n = row_number())
df2 <- df1 %>%
select(study, SID, p_value, o_value, n, one_of(terms)) %>%
filter(complete.cases(.)) #%>%
# mutate_if(is.factor, factor)
start <- Sys.time()
# optCtrl=list(maxfun=2e5))
full_mod <- femlm(formula(f), data = df2, family = "logit")
# full_mod <- glmer(formula(f), data = df1, family = "binomial", na.action = "na.omit", control = cntrl)
print(Sys.time() - start)
gmCall <- get_call(full_mod)
gmEnv <- parent.frame()
gmFormulaEnv <- environment(as.formula(formula(full_mod), env = gmEnv))
# set up terms
allTerms <- allTerms0 <- getAllTerms.fixest(full_mod, terms)
deps <- attr(allTerms0, "deps")
fixed <- union(fixed, rownames(deps)[rowSums(deps, na.rm = TRUE) ==
ncol(deps)])
fixed <- c(fixed, allTerms[allTerms %in% "(Intercept)"])
nFixed <- length(fixed)
gmCoefNames <- gmCoefNames0 <- names(coef(full_mod))
# this chunk makes sure that interactions include main effects
varsOrder <- order(allTerms %in% fixed)
termsOrder <- order(gmCoefNames %in% fixed)
allTerms <- allTerms[varsOrder]
gmCoefNames <- gmCoefNames[termsOrder]
di <- match(allTerms, rownames(deps))
deps <- deps[di, di, drop = FALSE]
# setup output
nVars <- length(allTerms)
nTerms <- length(gmCoefNames)
# rvNcol <- nVars + 3L + 2
# rtNcol <- nTerms + 3L + 2
# resultChunkSize <- 1000000L
# rval <- matrix(NA_real_, ncol = rtNcol, nrow = resultChunkSize)
nFixed <- 2 # p_value & intercept
nVariants <- 1L
nov <- as.integer(nVars - nFixed)
ncomb <- (2L^nov)
comb.sfx <- rep(TRUE, nFixed)
comb.seq <- if (nov != 0L) {seq_len(nov)}
# ord <- integer(resultChunkSize)
pval_out <- sapply(1:500, function(x){sample_fun(x, df1)}) # change back to 500
# perm_res <- array(NA_real_, c(10, 3, resultChunkSize)) # change back to 500
# no_cores <- detectCores() - 1
k <- 0L
save(list = ls(all.names = TRUE),
file = sprintf("%s/results/sca_workspace/sca_workspace_%s_%s.RData", wd, outcome, trait))
return(ncomb)
}
5.3.5 Specifications and Permutations for Cluster
sca_run_fun <- function(trait, outcome, min, max){
load(sprintf("/scratch/edbeck/sca_workspace/sca_workspace_%s_%s.RData", wd, outcome, trait))
k <- 0L
seq_comb <- seq(min, max, 1)
# seq_comb <- seq_comb[1:2] # remove after testing
res <- lapply(seq_comb, function(iComb){
varComb <- iComb%%nVariants
jComb <- (iComb - varComb)%/%nVariants
if (varComb == 0L) {
isok <- TRUE
comb <- c(as.logical(intToBits(jComb)[comb.seq]),
comb.sfx)
nvar <- sum(comb) - 1
if (!formula_margin_check(comb, deps)) {
isok <- FALSE
res <- NA
}
new_terms <- allTerms[comb]
if(sum(grepl(":", new_terms)) > 1){
isok <- FALSE
res <- NA
}
}
if (!isok) {
res <- NA
} else{
f1 <- paste("o_value ~ ", # outcome
paste(new_terms[new_terms != "(Intercept)"], collapse = " + "), # fixed effects
" | study", collapse = "")
std <- df1 %>%
group_by(study, o_value) %>%
tally() %>%
full_join(crossing(study = unique(.$study), o_value = c(0,1)))
std <- unique(std$study[std$n < 50 | is.na(std$n)])
df3 <- df1 %>%
filter(!study %in% std) %>%
select(study:o_value, n, one_of(new_terms)) %>%
mutate(o_value = as.numeric(as.character(o_value))) %>%
filter(complete.cases(.))
# filter(study %in% std)
fit <- femlm(formula(f1), data = df3, family = "logit")
print("made it to creating fit")
mci1 <- confint(fit, parm = "p_value")
mcoef1 <- matchCoef(fit, all.terms = gmCoefNames)
ll1 <- logLik(fit)
bic1 <- BIC(fit)
nobs1 <- nobs(fit)
psr1 <- fit$pseudo_r2
row1 <- c(mcoef1[gmCoefNames], ci.lower = mci1[1,1], ci.upper = mci1[1,2],
bic = bic1, psr2 = psr1, ll = ll1)
perm1 <- t(sapply(1:500, function(ii) perm_fun(ii, df3, f1, as.numeric(pval_out[,ii]))))
print(Sys.time() - start)
k <- k + 1L
print(sprintf("%s %s %s", iComb, f1, k))
res <- list(rval = row1, perm_res = perm1, ord = iComb)
rm(list = c("row1", "perm1", "fit", "mci1", "mcoef1", "ll1", "nobs1"))
gc()
}
return(res)
})
res <- res[!is.na(res)]
rval <- ldply(res, `[[`, 1)
perm_res <- llply(res, `[[`, 2)
ord <- laply(res, `[[`, 3)
perm_res <- abind::abind(perm_res, along = 3)
# colnames(rval) <- c(gmCoefNames, "ci.lower", "ci.upper", "df", "ll")
row.names(rval) <- ord
# rval[, seq_along(gmCoefNames)] <- rval[, v <- order(termsOrder)]
save(rval, file = sprintf("/scratch/edbeck/raw/raw_%s_%s_%s_%s.RData", trait, outcome, min, max))
save(perm_res, file = sprintf("/scratch/edbeck/perm/perm_%s_%s_%s_%s.RData", trait, outcome, min, max))
}
5.4 Setup
5.4.1 Workspace
5.4.2 Job Arrays for the Cluster
spl_fun <- function(trait, outcome){
print(paste(trait, outcome))
load(sprintf("%s/results/sca_workspace/sca_workspace_%s_%s.RData", wd, outcome, trait))
x <- seq(0,ncomb,1)
seq_comb = split(x, sort(x%%150))
}
done <- tibble(file = list.files(sprintf("%s/results/sca/perm", wd))) %>%
separate(file, c("scrap", "Trait", "Outcome", "min", "max"), sep = "_") %>%
mutate(perm = "done", max = str_remove_all(max, ".RData")) %>%
select(-scrap) %>%
mutate_at(vars(min, max), as.numeric)
sca_jobs <- crossing(
Trait = unique(p_waves$p_item),
Outcome = (codebook$codebook[[2]] %>% filter(category == "out"))$name
) %>%
# filter(Outcome %in% c("mvInPrtnr", "frstjob") |
# (Trait %in% c("E", "SS", "SWL") & Outcome %in% c("physhlthevnt", "mntlhlthevnt"))) %>%
mutate(data = map2(Trait, Outcome, possibly(spl_fun, NA_real_))) %>%
unnest(data) %>%
mutate(min = map_dbl(data, min),
max = map_dbl(data, max)) %>%
select(trait = Trait, outcome = Outcome, min, max) %>%
write.table(., file = sprintf("%s/scripts/sca_cluster/sca_args.txt", wd), row.names = F, col.names = F)
5.5 Re-Compiling
5.5.1 Raw Data Functions
# read raw data
raw_read_fun <- function(file){
load(sprintf("%s/results/sca/raw/%s", wd, file))
rval <- rval %>%
mutate(ci.lower = ifelse(is.na(ci.lower), -.01, ci.lower),
ci.upper = ifelse(is.na(ci.upper), -.01, ci.upper))
return(rval)
}
# determine dominant sign for personality-outcome association
dom_sign_fun <- function(p){
signs <- sign(p)
dom<-if(sum(signs == 1, na.rm = T) > sum(signs == -1, na.rm = T)){1} else {-1}
num_dom <- sum(signs == dom, na.rm = T)
return(c("dom_sign" = dom, "num_dom" = num_dom))
}
# determine number of significant dominant signs
num_sig_fun <- function(l, u, p, dom){
sum(sign(l) == sign(u) & sign(p) == dom, na.rm = T)
}
5.5.2 Perm Data Functions
# read in permuted data
perm_read_fun <- function(file){
file <- str_replace(file, "raw", "perm")
load(sprintf("%s/results/sca/perm/%s", wd, file))
if(any(is.na(perm_res))){
perm_res[,2,] <- apply(perm_res[,2,], 1, function(x) {x[is.na(x)] <- -.01; x}) %>% t()
perm_res[,3,] <- apply(perm_res[,3,], 1, function(x) {x[is.na(x)] <- -.01; x}) %>% t()
}
return(perm_res)
}
# calculate median and quantiles of permuted personality-outcome associations
sum_fun <- function(x){
t(apply(x[,1,], 2, function(y) c(median(y, na.rm = T), quantile(y, prob = c(.15, .85), na.rm = T)))) %>%
data.frame() %>%
setNames(c("perm_median", "perm_lower", "perm_upper"))
}
5.5.3 Inferential-Based Tests
Simonsohn et al. define 3 inferential-based tests:
1. the median overall point estimate within each specification curve.
2. the percentage of specifications that are of the dominant sign.
3. the percentage of specifications with the dominant sign that are also significant.
# median permuted personality-outcome association for each model
perm_med_fun <- function(m){
apply(m[, 1, ], 1, function(x){ # m[, 1, ] = p_value column for each specification with length = 500 perm
median(x, na.rm = T)
})
}
# number of (out of 500) dominant permuted sign of personality-outcome associations for each model
perm_dom_sign_fun <- function(m){
sapply(1:500, function(i){
x <- m[i, 1, ]
signs <- sign(x)
dom <- if(sum(signs == 1, na.rm = T) > sum(signs == -1, na.rm = T)){1} else {-1}
num_dom <- sum(signs == dom, na.rm = T)
return(c("dom_sign" = dom, "num_dom" = num_dom))
# return(dom)
}, simplify = T) %>% t()
}
# number of (out of 500) dominant and significant permuted sign of personality-outcome associations for each model
perm_sig_fun <- function(m, dom){
apply(m, 1, function(x){
sum(sign(x[2,]) == sign(x[3,]) & sign(x[1,]) == dom, na.rm = T)
})
}
# combine permuted results across different models
# previously split up to run smaller batches of models
combine_fun <- function(d1){
m<-abind::abind(d1, along = 3)
perm_med <- perm_med_fun(m)
perm_sign <- perm_dom_sign_fun(m)
perm_num_sign <- perm_sign[,"num_dom"]; perm_sign <- perm_sign[,"dom_sign"]
perm_sig <- perm_sig_fun(m, perm_sign["dom_sign"])
tibble(perm_med, perm_sign, perm_num_sign, perm_sig)
}
# compute inferential permutation tests by comparing raw results to each of the permutations
perm_inf_test_fun <- function(perm,med, sign, sig){
tibble(med_test = sum(abs(perm$perm_med) > abs(med))/500,
sign_test = sum(perm$perm_num_sign > sign)/500,
sig = sum(perm$perm_sig > sig)/500)
}
5.5.4 Plotting Function
# plot the results
sca_plot_fun <- function(d1, type, trait, outcome){
d <- round(max(abs(1-min(exp(d1$p_value))), abs(1-exp(max(d1$p_value)))), 2)
lim <- c(1-d-(d/2.5), 1+d+(d/2.5))
brk <- round(c(1-d-(d/5), 1, 1+d+(d/5)),2)
lab <- str_remove(round(c(1-d-(d/5), 1, 1+d+(d/5)),2), "^0")
ns <- diff(lim) / 100; sg <- diff(lim) / 50
top <- d1 %>%
arrange(p_value) %>%
mutate(sig = ifelse(sign(ci.lower) == sign(ci.upper), "sig", "ns"),
specification = 1:n()) %>%
mutate_at(vars(-(Min:Max), -n, -(bic:specification)), exp) %>%
mutate(p_min = ifelse(sig == "sig", p_value - sg, p_value - ns),
p_max = ifelse(sig == "sig", p_value + sg, p_value + ns)) %>%
ggplot(aes(x = specification, y = p_value)) +
scale_y_continuous(limits = lim, breaks = brk, labels = lab) +
scale_color_manual(values = c("black", "red")) +
geom_hline(aes(yintercept = 1), linetype = "dashed") +
geom_linerange(aes(ymin = p_min, ymax = p_max, color = sig), size = .05) +
# geom_errorbar(aes(color = sig, ymin = ci.lower, ymax = ci.upper),
# width = 0, size = .01, alpha = .4) +
# geom_point(shape = "|", aes(color = sig), size = .25) +
labs(x = "\nSpecification Number",
y = str_wrap(sprintf("Effect of %s on %s (OR)\n", trait, outcome), 30),
title = bquote(.(trait) %->% .(outcome))) +
theme_minimal(base_size = 11) +
theme(legend.title = element_text(size = 10),
legend.text = element_text(size = 9),
axis.text = element_text(color = "black"),
axis.line = element_line(colour = "black"),
legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
plot.title = element_text(hjust = .5, face = "bold"))
if(type == "perm"){
top <- top +
geom_smooth(aes(y = perm_lower), color = "gray", linetype = "dotted", se = F) +
geom_smooth(aes(y = perm_upper), color = "gray", linetype = "dotted", se = F) +
geom_smooth(aes(y = perm_median), color = "lightblue", linetype = "dashed", se = F)
}
bottom <- d1 %>%
arrange(p_value) %>%
mutate(sig = ifelse(sign(ci.lower) == sign(ci.upper), "sig", "ns"),
specification = 1:n(),
# model = fct_reorder(specification, .x = p_value, .fun = min)
) %>%
select(-(perm_median:perm_upper), -(bic:ll)) %>%
gather(variable, value, -(Min:n), -(ci.lower:specification)) %>%
mutate(value = ifelse(!is.na(value), "|", "")) %>%
ggplot(aes(specification, variable, color = sig)) +
# geom_point(aes(size = sig), shape = "|") +
geom_text(aes(label = value), size = rel(2)) +
scale_color_manual(values = c("black", "red")) +
# scale_size_manual(values = c(2,3))+
labs(x = "\nSpecification Number", y = "Variables\n") +
theme_minimal(base_size = 11) +
theme(legend.title = element_text(size = 10),
legend.text = element_text(size = 9),
axis.text = element_text(color = "black"),
axis.line = element_line(colour = "black"),
legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank())
p_val = cowplot::plot_grid(top, bottom, ncol = 1, align = "v", labels = c('A', 'B'))
# file <- sprintf("%s/results/sca/plots/%s/sca_%s_%s.pdf", res_path, type, outcome, trait)
# ggsave(p_val, width = 12, height = 8, file = file)
outcome <- mapvalues(outcome, outcomes$long_name, outcomes$short_name, warn_missing = F)
trait <- mapvalues(trait, traits$long_name, traits$short_name, warn_missing = F)
file <- sprintf("%s/results/sca/plots/png/%s_%s_%s.png", res_path, type, outcome, trait)
ggsave(p_val, width = 12, height = 8, file = file)
}
5.5.5 Compiling Function
# Compile the results
# written as below because of RAM issues. Everything done at once.
sca_compile_fun <- function(trait, outcome, d){
# bring in raw data
raw <- d %>%
mutate(raw = map(files, raw_read_fun),
Min = as.numeric(Min)) %>%
select(-files, -type, -scrap) %>%
arrange(Min) %>%
unnest(raw) %>%
mutate(n = 1:n())
# get values for inferential tests
med_size <- median(raw$p_value)
dom_sign <- dom_sign_fun(raw$p_value)
sig_sign <- with(raw, num_sig_fun(ci.lower, ci.upper, p_value, dom_sign["dom_sign"]))
raw_test <- c(med_size, dom_sign, sig_sign)
names(raw_test) <- c("med_size", "dom_sign", "num_dom", "sig_sign")
# bring in permutations
perm <- d %>%
mutate(data = map(files, perm_read_fun),
quant = map(data, sum_fun),
Min = as.numeric(Min)) %>%
select(-files, -type, -scrap) %>%
arrange(Min)
# get values for inferential test
perm_test <- combine_fun(perm$data)
# perform inferential permutation test
test <- perm_inf_test_fun(perm_test, med_size, dom_sign["num_dom"], sig_sign)
# plotting
d <- perm %>% select(-data) %>% unnest(quant) %>% mutate(n = 1:n()) %>%
full_join(raw)
sca_plot_fun(d, "raw", trait, outcome)
sca_plot_fun(d, "perm", trait, outcome)
results <- list(inf_test = test, raw_test = raw_test, perm_test = perm_test)
save(results, file = sprintf("%s/results/sca/inf_test/%s_%s.RData", res_path, outcome, trait))
rm(list = c("d", "test", "raw_test", "perm_test", "perm", "raw"))
gc()
return(results)
}
done <- tibble(
files = sprintf("%s/results/sca/plots/png", res_path) %>% list.files()
) %>%
separate(files, c("scrap", "Outcome", "Trait"), sep = "_") %>%
mutate(Trait = str_remove(Trait, ".png"),
done = "done") %>%
group_by(Outcome,Trait) %>%
filter(n()==2) %>%
ungroup() %>%
select(-scrap) %>%
distinct()
plan(multiprocess(workers = 3L))
sca_res_nested <- tibble(
files = sprintf("%s/results/sca/raw", wd) %>% list.files()
) %>%
separate(files, c("type", "Trait", "Outcome", "Min", "Max", "scrap"), remove = F) %>%
arrange(Min) %>%
group_by(Trait, Outcome) %>%
nest() %>%
filter(map(data, nrow) == 150) %>%
ungroup() %>%
filter(Trait %in% c("E", "A", "C", "N", "O")) %>%
# full_join(done) %>%
# filter(is.na(done)) %>%
mutate(Trait = mapvalues(Trait, traits$short_name, traits$long_name),
Outcome = mapvalues(Outcome, outcomes$short_name, outcomes$long_name)) %>%
mutate(#results = pmap(list(Trait, Outcome, data), sca_compile_fun))
results = future_pmap(list(Trait, Outcome, data)
, sca_compile_fun
, .progress = T
, .options = future_options(
globals = c("combine_fun", "dom_sign_fun", "num_sig_fun",
"perm_dom_sign_fun", "perm_inf_test_fun",
"perm_med_fun", "perm_read_fun", "wd", "res_path",
"perm_sig_fun", "raw_read_fun", "read_fun",
"sca_plot_fun", "sum_fun", "traits", "outcomes")
, packages = c("plyr", "tidyverse", "cowplot", "abind"))))
closeAllConnections()
5.6 Results
5.6.1 Tables
read_fun <- function(file){
load(sprintf("%s/results/sca/inf_test/%s", res_path, file))
return(results)
}
sca_res <- tibble(
files = sprintf("%s/results/sca/inf_test", res_path) %>% list.files()
) %>%
separate(files, c("Outcome", "Trait"), sep = "_", remove = F) %>%
mutate(Trait = str_remove(Trait, ".RData"),
data = map(files, read_fun),
inf_test = map(data, ~(.)$inf_test),
raw_test = map(data, ~(.)$raw_test),
perm_test = map(data, ~(.)$perm_test))
sca_res %>%
select(Outcome, Trait, inf_test) %>%
unnest(inf_test) %>%
mutate(num_sig = rowSums(cbind(med_test, sign_test, sig) < (.05/3))) %>%
group_by(Outcome, num_sig) %>%
tally() %>%
spread(num_sig, n)
## # A tibble: 14 x 4
## # Groups: Outcome [14]
## Outcome `1` `2` `3`
## <chr> <int> <int> <int>
## 1 Child Birth 3 NA 2
## 2 Child Moves Out 5 NA NA
## 3 Criminality 1 NA 4
## 4 Divorce NA NA 5
## 5 First Job 2 2 1
## 6 Higher Ed 3 1 1
## 7 Major Health Event 4 1 NA
## 8 Marriage 1 1 3
## 9 Mental Health Event NA 1 4
## 10 Mortality 1 3 1
## 11 Move in with a partner 2 NA 3
## 12 Retirement 1 1 3
## 13 Unemployment NA NA 5
## 14 Volunteering NA NA 5
t_names <- c(1, rep(3, 5)); names(t_names) <- c(" ", traits$long_name)
cols_names <- paste(rep(c("med_test", "sign_test", "sig"), times = 5),
rep(traits$short_name, each = 3), sep = "_")
sca_res %>%
select(Outcome, Trait, inf_test) %>%
unnest(inf_test) %>%
# pivot_longer(cols = c("med_test", "sign_test", "sig")
# , names_to = "Test"
# , values_to = "val") %>%
mutate(Trait = mapvalues(Trait, traits$long_name, traits$short_name)) %>%
pivot_wider(names_from = "Trait"
, values_from = c("med_test", "sign_test", "sig")) %>%
select(Outcome, all_of(cols_names)) %>%
# mutate(Test = mapvalues(Test, c("med_test", "sign_test", "sig"), c("Median", "Sign", "Significance"))) %>%
mutate_at(vars(-Outcome), ~ifelse(. < .05, sprintf("<strong>%.2f</strong",.), sprintf("%.2f",.))) %>%
kable(.
, "html"
, escape = F
, col.names = c("Outcome", rep(c("Median", "Sign", "Signif"), times = 5))
, align = c("r", rep("c", 15))
, caption = "Results of the Permuation Based Inference Tests from Specification Curve Analyses") %>%
kable_styling(full_width = F) %>%
collapse_rows(1
, valign = "top"
, row_group_label_position = "stack") %>%
add_header_above(t_names) %>%
footnote("Median = Percentage of permutations in which the raw median effect size of the dominant sign was larger than the permuted median effect size; Sign = Percentage of permutations in which the total number of raw specifications of the dominant sign was greater than permuted. Signifiance = Percentage of permutations in which the total number of raw, significant specifications of the dominant sign was greater than the permuted.") %>%
save_kable(., file = sprintf("%s/results/sca/inferential_tests.html", res_path))