Chapter 1 Workspace

In this section, we’ll set up everything we need to clean data in the next section. This includes:

  1. Loading in all packages
  2. Loading in the codebook
  • Setting up data frames for personality traits / well-being, outcomes, covariates, and moderators, so that we can more easily rename their short-hand names to production ready ones later
  1. Loading in and rendering html tables of some descriptives, measures, etc.

1.1 Packages

library(psych)       # psychometrics
library(knitr)       # knit documents
library(kableExtra)  # formatted tables
library(brms)        # bayesian models
library(readxl)      # read excel files
library(haven)       # read spss files
library(estimatr)    # robust standard error regression
library(lme4)        # Frequentist MLM
library(broom.mixed) # summaries of models
library(bootpredictlme4)    # for calculating prediction intervals
library(effectsize)  # effect sizes for meta-analysis
library(metafor)     # Frequentist meta-analysis
library(rstan)       # bayes underpinnings
library(tidybayes)   # pretty bayes draws and plots
library(cowplot)     # Plotting and faceting
library(plyr)        # data wrangling
library(tidyverse)   # data wrangling
library(furrr)       # parallel purrr mapping

pkg <- c("psych","knitr","kableExtra","brms","readxl","haven","estimatr",
         "lme4","broom.mixed","bootpredictlme4","effectsize","metafor",
         "rstan","tidybayes","cowplot","plyr","tidyverse","furrr")

lapply(pkg[!pkg %in% rownames(installed.packages())], function(x) install.packages(x))
## list()
lapply(pkg, function(x) print(citation(x), bibtex = T))
## To cite package 'psych' in publications use:
## 
##   William Revelle (2024). _psych: Procedures for Psychological, Psychometric, and Personality Research_.
##   Northwestern University, Evanston, Illinois. R package version 2.4.1,
##   <https://CRAN.R-project.org/package=psych>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {psych: Procedures for Psychological, Psychometric, and Personality Research},
##     author = {{William Revelle}},
##     organization = {Northwestern University},
##     address = {Evanston, Illinois},
##     year = {2024},
##     note = {R package version 2.4.1},
##     url = {https://CRAN.R-project.org/package=psych},
##   }
## To cite package 'knitr' in publications use:
## 
##   Xie Y (2023). _knitr: A General-Purpose Package for Dynamic Report Generation in R_. R package version 1.45,
##   <https://yihui.org/knitr/>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {knitr: A General-Purpose Package for Dynamic Report Generation in R},
##     author = {Yihui Xie},
##     year = {2023},
##     note = {R package version 1.45},
##     url = {https://yihui.org/knitr/},
##   }
## 
##   Yihui Xie (2015) Dynamic Documents with R and knitr. 2nd edition. Chapman and Hall/CRC. ISBN 978-1498716963
## 
## A BibTeX entry for LaTeX users is
## 
##   @Book{,
##     title = {Dynamic Documents with {R} and knitr},
##     author = {Yihui Xie},
##     publisher = {Chapman and Hall/CRC},
##     address = {Boca Raton, Florida},
##     year = {2015},
##     edition = {2nd},
##     note = {ISBN 978-1498716963},
##     url = {https://yihui.org/knitr/},
##   }
## 
##   Yihui Xie (2014) knitr: A Comprehensive Tool for Reproducible Research in R. In Victoria Stodden, Friedrich
##   Leisch and Roger D. Peng, editors, Implementing Reproducible Computational Research. Chapman and Hall/CRC.
##   ISBN 978-1466561595
## 
## A BibTeX entry for LaTeX users is
## 
##   @InCollection{,
##     booktitle = {Implementing Reproducible Computational Research},
##     editor = {Victoria Stodden and Friedrich Leisch and Roger D. Peng},
##     title = {knitr: A Comprehensive Tool for Reproducible Research in {R}},
##     author = {Yihui Xie},
##     publisher = {Chapman and Hall/CRC},
##     year = {2014},
##     note = {ISBN 978-1466561595},
##   }
## To cite package 'kableExtra' in publications use:
## 
##   Zhu H (2024). _kableExtra: Construct Complex Table with 'kable' and Pipe Syntax_. R package version 1.4.0,
##   <https://CRAN.R-project.org/package=kableExtra>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {kableExtra: Construct Complex Table with 'kable' and Pipe Syntax},
##     author = {Hao Zhu},
##     year = {2024},
##     note = {R package version 1.4.0},
##     url = {https://CRAN.R-project.org/package=kableExtra},
##   }
## To cite brms in publications use:
## 
##   Paul-Christian Bürkner (2017). brms: An R Package for Bayesian Multilevel Models Using Stan. Journal of
##   Statistical Software, 80(1), 1-28. doi:10.18637/jss.v080.i01
## 
## A BibTeX entry for LaTeX users is
## 
##   @Article{,
##     title = {{brms}: An {R} Package for {Bayesian} Multilevel Models Using {Stan}},
##     author = {Paul-Christian Bürkner},
##     journal = {Journal of Statistical Software},
##     year = {2017},
##     volume = {80},
##     number = {1},
##     pages = {1--28},
##     doi = {10.18637/jss.v080.i01},
##     encoding = {UTF-8},
##   }
## 
## Paul-Christian Bürkner (2018). Advanced Bayesian Multilevel Modeling with the R Package brms. The R Journal,
## 10(1), 395-411. doi:10.32614/RJ-2018-017
## 
## A BibTeX entry for LaTeX users is
## 
##   @Article{,
##     title = {Advanced {Bayesian} Multilevel Modeling with the {R} Package {brms}},
##     author = {Paul-Christian Bürkner},
##     journal = {The R Journal},
##     year = {2018},
##     volume = {10},
##     number = {1},
##     pages = {395--411},
##     doi = {10.32614/RJ-2018-017},
##     encoding = {UTF-8},
##   }
## 
## Paul-Christian Bürkner (2021). Bayesian Item Response Modeling in R with brms and Stan. Journal of Statistical
## Software, 100(5), 1-54. doi:10.18637/jss.v100.i05
## 
## A BibTeX entry for LaTeX users is
## 
##   @Article{,
##     title = {Bayesian Item Response Modeling in {R} with {brms} and {Stan}},
##     author = {Paul-Christian Bürkner},
##     journal = {Journal of Statistical Software},
##     year = {2021},
##     volume = {100},
##     number = {5},
##     pages = {1--54},
##     doi = {10.18637/jss.v100.i05},
##     encoding = {UTF-8},
##   }
## To cite package 'readxl' in publications use:
## 
##   Wickham H, Bryan J (2023). _readxl: Read Excel Files_. R package version 1.4.3,
##   <https://CRAN.R-project.org/package=readxl>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {readxl: Read Excel Files},
##     author = {Hadley Wickham and Jennifer Bryan},
##     year = {2023},
##     note = {R package version 1.4.3},
##     url = {https://CRAN.R-project.org/package=readxl},
##   }
## To cite package 'haven' in publications use:
## 
##   Wickham H, Miller E, Smith D (2023). _haven: Import and Export 'SPSS', 'Stata' and 'SAS' Files_. R package
##   version 2.5.4, <https://CRAN.R-project.org/package=haven>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {haven: Import and Export 'SPSS', 'Stata' and 'SAS' Files},
##     author = {Hadley Wickham and Evan Miller and Danny Smith},
##     year = {2023},
##     note = {R package version 2.5.4},
##     url = {https://CRAN.R-project.org/package=haven},
##   }
## To cite package 'estimatr' in publications use:
## 
##   Blair G, Cooper J, Coppock A, Humphreys M, Sonnet L (2024). _estimatr: Fast Estimators for Design-Based
##   Inference_. R package version 1.0.2, <https://CRAN.R-project.org/package=estimatr>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {estimatr: Fast Estimators for Design-Based Inference},
##     author = {Graeme Blair and Jasper Cooper and Alexander Coppock and Macartan Humphreys and Luke Sonnet},
##     year = {2024},
##     note = {R package version 1.0.2},
##     url = {https://CRAN.R-project.org/package=estimatr},
##   }
## To cite lme4 in publications use:
## 
##   Douglas Bates, Martin Maechler, Ben Bolker, Steve Walker (2015). Fitting Linear Mixed-Effects Models Using
##   lme4. Journal of Statistical Software, 67(1), 1-48. doi:10.18637/jss.v067.i01.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Article{,
##     title = {Fitting Linear Mixed-Effects Models Using {lme4}},
##     author = {Douglas Bates and Martin M{\"a}chler and Ben Bolker and Steve Walker},
##     journal = {Journal of Statistical Software},
##     year = {2015},
##     volume = {67},
##     number = {1},
##     pages = {1--48},
##     doi = {10.18637/jss.v067.i01},
##   }
## To cite package 'broom.mixed' in publications use:
## 
##   Bolker B, Robinson D (2022). _broom.mixed: Tidying Methods for Mixed Models_. R package version 0.2.9.4,
##   <https://CRAN.R-project.org/package=broom.mixed>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {broom.mixed: Tidying Methods for Mixed Models},
##     author = {Ben Bolker and David Robinson},
##     year = {2022},
##     note = {R package version 0.2.9.4},
##     url = {https://CRAN.R-project.org/package=broom.mixed},
##   }
## To cite package 'bootpredictlme4' in publications use:
## 
##   Duursma R (2024). _bootpredictlme4: Predict Method For lme4 With Bootstrap_. R package version 0.1.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {bootpredictlme4: Predict Method For lme4 With Bootstrap},
##     author = {Remko Duursma},
##     year = {2024},
##     note = {R package version 0.1},
##   }
## 
## ATTENTION: This citation information has been auto-generated from the package DESCRIPTION file and may need
## manual editing, see 'help("citation")'.
## To cite effectsize in publications use:
## 
##   Ben-Shachar M, Lüdecke D, Makowski D (2020). effectsize: Estimation of Effect Size Indices and Standardized
##   Parameters. Journal of Open Source Software, 5(56), 2815. doi: 10.21105/joss.02815
## 
## A BibTeX entry for LaTeX users is
## 
##   @Article{,
##     title = {{e}ffectsize: Estimation of Effect Size Indices and Standardized Parameters},
##     author = {Mattan S. Ben-Shachar and Daniel Lüdecke and Dominique Makowski},
##     year = {2020},
##     journal = {Journal of Open Source Software},
##     volume = {5},
##     number = {56},
##     pages = {2815},
##     publisher = {The Open Journal},
##     doi = {10.21105/joss.02815},
##     url = {https://doi.org/10.21105/joss.02815},
##   }
## To cite the metafor package in publications, please use:
## 
##   Viechtbauer, W. (2010). Conducting meta-analyses in R with the metafor package. Journal of Statistical
##   Software, 36(3), 1-48. https://doi.org/10.18637/jss.v036.i03
## 
## A BibTeX entry for LaTeX users is
## 
##   @Article{,
##     title = {Conducting meta-analyses in {R} with the {metafor} package},
##     author = {Wolfgang Viechtbauer},
##     journal = {Journal of Statistical Software},
##     year = {2010},
##     volume = {36},
##     number = {3},
##     pages = {1--48},
##     doi = {10.18637/jss.v036.i03},
##   }
## To cite RStan in publications use:
## 
##   Stan Development Team (2024). RStan: the R interface to Stan. R package version 2.32.5. https://mc-stan.org/.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Misc{,
##     title = {{RStan}: the {R} interface to {Stan}},
##     author = {{Stan Development Team}},
##     note = {R package version 2.32.5},
##     year = {2024},
##     url = {https://mc-stan.org/},
##   }
## To cite package 'tidybayes' in publications use:
## 
##   Kay M (2023). _tidybayes: Tidy Data and Geoms for Bayesian Models_. doi:10.5281/zenodo.1308151
##   <https://doi.org/10.5281/zenodo.1308151>, R package version 3.0.6, <http://mjskay.github.io/tidybayes/>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {{tidybayes}: Tidy Data and Geoms for {Bayesian} Models},
##     author = {Matthew Kay},
##     year = {2023},
##     note = {R package version 3.0.6},
##     url = {http://mjskay.github.io/tidybayes/},
##     doi = {10.5281/zenodo.1308151},
##   }
## To cite package 'cowplot' in publications use:
## 
##   Wilke C (2024). _cowplot: Streamlined Plot Theme and Plot Annotations for 'ggplot2'_. R package version
##   1.1.3, <https://CRAN.R-project.org/package=cowplot>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {cowplot: Streamlined Plot Theme and Plot Annotations for 'ggplot2'},
##     author = {Claus O. Wilke},
##     year = {2024},
##     note = {R package version 1.1.3},
##     url = {https://CRAN.R-project.org/package=cowplot},
##   }
## To cite package 'plyr' in publications use:
## 
##   Hadley Wickham (2011). The Split-Apply-Combine Strategy for Data Analysis. Journal of Statistical Software,
##   40(1), 1-29. URL https://www.jstatsoft.org/v40/i01/.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Article{,
##     title = {The Split-Apply-Combine Strategy for Data Analysis},
##     author = {Hadley Wickham},
##     journal = {Journal of Statistical Software},
##     year = {2011},
##     volume = {40},
##     number = {1},
##     pages = {1--29},
##     url = {https://www.jstatsoft.org/v40/i01/},
##   }
## To cite package 'tidyverse' in publications use:
## 
##   Wickham H, Averick M, Bryan J, Chang W, McGowan LD, François R, Grolemund G, Hayes A, Henry L, Hester J, Kuhn
##   M, Pedersen TL, Miller E, Bache SM, Müller K, Ooms J, Robinson D, Seidel DP, Spinu V, Takahashi K, Vaughan D,
##   Wilke C, Woo K, Yutani H (2019). "Welcome to the tidyverse." _Journal of Open Source Software_, *4*(43),
##   1686. doi:10.21105/joss.01686 <https://doi.org/10.21105/joss.01686>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Article{,
##     title = {Welcome to the {tidyverse}},
##     author = {Hadley Wickham and Mara Averick and Jennifer Bryan and Winston Chang and Lucy D'Agostino McGowan and Romain François and Garrett Grolemund and Alex Hayes and Lionel Henry and Jim Hester and Max Kuhn and Thomas Lin Pedersen and Evan Miller and Stephan Milton Bache and Kirill Müller and Jeroen Ooms and David Robinson and Dana Paige Seidel and Vitalie Spinu and Kohske Takahashi and Davis Vaughan and Claus Wilke and Kara Woo and Hiroaki Yutani},
##     year = {2019},
##     journal = {Journal of Open Source Software},
##     volume = {4},
##     number = {43},
##     pages = {1686},
##     doi = {10.21105/joss.01686},
##   }
## To cite package 'furrr' in publications use:
## 
##   Vaughan D, Dancho M (2022). _furrr: Apply Mapping Functions in Parallel using Futures_. R package version
##   0.3.1, <https://CRAN.R-project.org/package=furrr>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {furrr: Apply Mapping Functions in Parallel using Futures},
##     author = {Davis Vaughan and Matt Dancho},
##     year = {2022},
##     note = {R package version 0.3.1},
##     url = {https://CRAN.R-project.org/package=furrr},
##   }
## [[1]]
## To cite package 'psych' in publications use:
## 
##   William Revelle (2024). _psych: Procedures for Psychological, Psychometric, and Personality Research_.
##   Northwestern University, Evanston, Illinois. R package version 2.4.1,
##   <https://CRAN.R-project.org/package=psych>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {psych: Procedures for Psychological, Psychometric, and Personality Research},
##     author = {{William Revelle}},
##     organization = {Northwestern University},
##     address = {Evanston, Illinois},
##     year = {2024},
##     note = {R package version 2.4.1},
##     url = {https://CRAN.R-project.org/package=psych},
##   }
## 
## [[2]]
## To cite package 'knitr' in publications use:
## 
##   Xie Y (2023). _knitr: A General-Purpose Package for Dynamic Report Generation in R_. R package version 1.45,
##   <https://yihui.org/knitr/>.
## 
##   Yihui Xie (2015) Dynamic Documents with R and knitr. 2nd edition. Chapman and Hall/CRC. ISBN 978-1498716963
## 
##   Yihui Xie (2014) knitr: A Comprehensive Tool for Reproducible Research in R. In Victoria Stodden, Friedrich
##   Leisch and Roger D. Peng, editors, Implementing Reproducible Computational Research. Chapman and Hall/CRC.
##   ISBN 978-1466561595
## 
## To see these entries in BibTeX format, use 'print(<citation>, bibtex=TRUE)', 'toBibtex(.)', or set
## 'options(citation.bibtex.max=999)'.
## 
## [[3]]
## To cite package 'kableExtra' in publications use:
## 
##   Zhu H (2024). _kableExtra: Construct Complex Table with 'kable' and Pipe Syntax_. R package version 1.4.0,
##   <https://CRAN.R-project.org/package=kableExtra>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {kableExtra: Construct Complex Table with 'kable' and Pipe Syntax},
##     author = {Hao Zhu},
##     year = {2024},
##     note = {R package version 1.4.0},
##     url = {https://CRAN.R-project.org/package=kableExtra},
##   }
## 
## [[4]]
## To cite brms in publications use:
## 
##   Paul-Christian Bürkner (2017). brms: An R Package for Bayesian Multilevel Models Using Stan. Journal of
##   Statistical Software, 80(1), 1-28. doi:10.18637/jss.v080.i01
## 
## Paul-Christian Bürkner (2018). Advanced Bayesian Multilevel Modeling with the R Package brms. The R Journal,
## 10(1), 395-411. doi:10.32614/RJ-2018-017
## 
## Paul-Christian Bürkner (2021). Bayesian Item Response Modeling in R with brms and Stan. Journal of Statistical
## Software, 100(5), 1-54. doi:10.18637/jss.v100.i05
## 
## To see these entries in BibTeX format, use 'print(<citation>, bibtex=TRUE)', 'toBibtex(.)', or set
## 'options(citation.bibtex.max=999)'.
## 
## [[5]]
## To cite package 'readxl' in publications use:
## 
##   Wickham H, Bryan J (2023). _readxl: Read Excel Files_. R package version 1.4.3,
##   <https://CRAN.R-project.org/package=readxl>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {readxl: Read Excel Files},
##     author = {Hadley Wickham and Jennifer Bryan},
##     year = {2023},
##     note = {R package version 1.4.3},
##     url = {https://CRAN.R-project.org/package=readxl},
##   }
## 
## [[6]]
## To cite package 'haven' in publications use:
## 
##   Wickham H, Miller E, Smith D (2023). _haven: Import and Export 'SPSS', 'Stata' and 'SAS' Files_. R package
##   version 2.5.4, <https://CRAN.R-project.org/package=haven>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {haven: Import and Export 'SPSS', 'Stata' and 'SAS' Files},
##     author = {Hadley Wickham and Evan Miller and Danny Smith},
##     year = {2023},
##     note = {R package version 2.5.4},
##     url = {https://CRAN.R-project.org/package=haven},
##   }
## 
## [[7]]
## To cite package 'estimatr' in publications use:
## 
##   Blair G, Cooper J, Coppock A, Humphreys M, Sonnet L (2024). _estimatr: Fast Estimators for Design-Based
##   Inference_. R package version 1.0.2, <https://CRAN.R-project.org/package=estimatr>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {estimatr: Fast Estimators for Design-Based Inference},
##     author = {Graeme Blair and Jasper Cooper and Alexander Coppock and Macartan Humphreys and Luke Sonnet},
##     year = {2024},
##     note = {R package version 1.0.2},
##     url = {https://CRAN.R-project.org/package=estimatr},
##   }
## 
## [[8]]
## To cite lme4 in publications use:
## 
##   Douglas Bates, Martin Maechler, Ben Bolker, Steve Walker (2015). Fitting Linear Mixed-Effects Models Using
##   lme4. Journal of Statistical Software, 67(1), 1-48. doi:10.18637/jss.v067.i01.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Article{,
##     title = {Fitting Linear Mixed-Effects Models Using {lme4}},
##     author = {Douglas Bates and Martin M{\"a}chler and Ben Bolker and Steve Walker},
##     journal = {Journal of Statistical Software},
##     year = {2015},
##     volume = {67},
##     number = {1},
##     pages = {1--48},
##     doi = {10.18637/jss.v067.i01},
##   }
## 
## [[9]]
## To cite package 'broom.mixed' in publications use:
## 
##   Bolker B, Robinson D (2022). _broom.mixed: Tidying Methods for Mixed Models_. R package version 0.2.9.4,
##   <https://CRAN.R-project.org/package=broom.mixed>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {broom.mixed: Tidying Methods for Mixed Models},
##     author = {Ben Bolker and David Robinson},
##     year = {2022},
##     note = {R package version 0.2.9.4},
##     url = {https://CRAN.R-project.org/package=broom.mixed},
##   }
## 
## [[10]]
## To cite package 'bootpredictlme4' in publications use:
## 
##   Duursma R (2024). _bootpredictlme4: Predict Method For lme4 With Bootstrap_. R package version 0.1.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {bootpredictlme4: Predict Method For lme4 With Bootstrap},
##     author = {Remko Duursma},
##     year = {2024},
##     note = {R package version 0.1},
##   }
## 
## ATTENTION: This citation information has been auto-generated from the package DESCRIPTION file and may need
## manual editing, see 'help("citation")'.
## 
## [[11]]
## To cite effectsize in publications use:
## 
##   Ben-Shachar M, Lüdecke D, Makowski D (2020). effectsize: Estimation of Effect Size Indices and Standardized
##   Parameters. Journal of Open Source Software, 5(56), 2815. doi: 10.21105/joss.02815
## 
## A BibTeX entry for LaTeX users is
## 
##   @Article{,
##     title = {{e}ffectsize: Estimation of Effect Size Indices and Standardized Parameters},
##     author = {Mattan S. Ben-Shachar and Daniel Lüdecke and Dominique Makowski},
##     year = {2020},
##     journal = {Journal of Open Source Software},
##     volume = {5},
##     number = {56},
##     pages = {2815},
##     publisher = {The Open Journal},
##     doi = {10.21105/joss.02815},
##     url = {https://doi.org/10.21105/joss.02815},
##   }
## 
## [[12]]
## To cite the metafor package in publications, please use:
## 
##   Viechtbauer, W. (2010). Conducting meta-analyses in R with the metafor package. Journal of Statistical
##   Software, 36(3), 1-48. https://doi.org/10.18637/jss.v036.i03
## 
## A BibTeX entry for LaTeX users is
## 
##   @Article{,
##     title = {Conducting meta-analyses in {R} with the {metafor} package},
##     author = {Wolfgang Viechtbauer},
##     journal = {Journal of Statistical Software},
##     year = {2010},
##     volume = {36},
##     number = {3},
##     pages = {1--48},
##     doi = {10.18637/jss.v036.i03},
##   }
## 
## [[13]]
## To cite RStan in publications use:
## 
##   Stan Development Team (2024). RStan: the R interface to Stan. R package version 2.32.5. https://mc-stan.org/.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Misc{,
##     title = {{RStan}: the {R} interface to {Stan}},
##     author = {{Stan Development Team}},
##     note = {R package version 2.32.5},
##     year = {2024},
##     url = {https://mc-stan.org/},
##   }
## 
## [[14]]
## To cite package 'tidybayes' in publications use:
## 
##   Kay M (2023). _tidybayes: Tidy Data and Geoms for Bayesian Models_. doi:10.5281/zenodo.1308151
##   <https://doi.org/10.5281/zenodo.1308151>, R package version 3.0.6, <http://mjskay.github.io/tidybayes/>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {{tidybayes}: Tidy Data and Geoms for {Bayesian} Models},
##     author = {Matthew Kay},
##     year = {2023},
##     note = {R package version 3.0.6},
##     url = {http://mjskay.github.io/tidybayes/},
##     doi = {10.5281/zenodo.1308151},
##   }
## 
## [[15]]
## To cite package 'cowplot' in publications use:
## 
##   Wilke C (2024). _cowplot: Streamlined Plot Theme and Plot Annotations for 'ggplot2'_. R package version
##   1.1.3, <https://CRAN.R-project.org/package=cowplot>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {cowplot: Streamlined Plot Theme and Plot Annotations for 'ggplot2'},
##     author = {Claus O. Wilke},
##     year = {2024},
##     note = {R package version 1.1.3},
##     url = {https://CRAN.R-project.org/package=cowplot},
##   }
## 
## [[16]]
## To cite package 'plyr' in publications use:
## 
##   Hadley Wickham (2011). The Split-Apply-Combine Strategy for Data Analysis. Journal of Statistical Software,
##   40(1), 1-29. URL https://www.jstatsoft.org/v40/i01/.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Article{,
##     title = {The Split-Apply-Combine Strategy for Data Analysis},
##     author = {Hadley Wickham},
##     journal = {Journal of Statistical Software},
##     year = {2011},
##     volume = {40},
##     number = {1},
##     pages = {1--29},
##     url = {https://www.jstatsoft.org/v40/i01/},
##   }
## 
## [[17]]
## To cite package 'tidyverse' in publications use:
## 
##   Wickham H, Averick M, Bryan J, Chang W, McGowan LD, François R, Grolemund G, Hayes A, Henry L, Hester J, Kuhn
##   M, Pedersen TL, Miller E, Bache SM, Müller K, Ooms J, Robinson D, Seidel DP, Spinu V, Takahashi K, Vaughan D,
##   Wilke C, Woo K, Yutani H (2019). "Welcome to the tidyverse." _Journal of Open Source Software_, *4*(43),
##   1686. doi:10.21105/joss.01686 <https://doi.org/10.21105/joss.01686>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Article{,
##     title = {Welcome to the {tidyverse}},
##     author = {Hadley Wickham and Mara Averick and Jennifer Bryan and Winston Chang and Lucy D'Agostino McGowan and Romain François and Garrett Grolemund and Alex Hayes and Lionel Henry and Jim Hester and Max Kuhn and Thomas Lin Pedersen and Evan Miller and Stephan Milton Bache and Kirill Müller and Jeroen Ooms and David Robinson and Dana Paige Seidel and Vitalie Spinu and Kohske Takahashi and Davis Vaughan and Claus Wilke and Kara Woo and Hiroaki Yutani},
##     year = {2019},
##     journal = {Journal of Open Source Software},
##     volume = {4},
##     number = {43},
##     pages = {1686},
##     doi = {10.21105/joss.01686},
##   }
## 
## [[18]]
## To cite package 'furrr' in publications use:
## 
##   Vaughan D, Dancho M (2022). _furrr: Apply Mapping Functions in Parallel using Futures_. R package version
##   0.3.1, <https://CRAN.R-project.org/package=furrr>.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {furrr: Apply Mapping Functions in Parallel using Futures},
##     author = {Davis Vaughan and Matt Dancho},
##     year = {2022},
##     note = {R package version 0.3.1},
##     url = {https://CRAN.R-project.org/package=furrr},
##   }
lapply(pkg, function(x) print(paste(x, "version", packageVersion(x))))
## [1] "psych version 2.4.1"
## [1] "knitr version 1.45"
## [1] "kableExtra version 1.4.0"
## [1] "brms version 2.20.4"
## [1] "readxl version 1.4.3"
## [1] "haven version 2.5.4"
## [1] "estimatr version 1.0.2"
## [1] "lme4 version 1.1.35.1"
## [1] "broom.mixed version 0.2.9.4"
## [1] "bootpredictlme4 version 0.1"
## [1] "effectsize version 0.8.6"
## [1] "metafor version 4.4.0"
## [1] "rstan version 2.32.5"
## [1] "tidybayes version 3.0.6"
## [1] "cowplot version 1.1.3"
## [1] "plyr version 1.8.9"
## [1] "tidyverse version 2.0.0"
## [1] "furrr version 0.3.1"
## [[1]]
## [1] "psych version 2.4.1"
## 
## [[2]]
## [1] "knitr version 1.45"
## 
## [[3]]
## [1] "kableExtra version 1.4.0"
## 
## [[4]]
## [1] "brms version 2.20.4"
## 
## [[5]]
## [1] "readxl version 1.4.3"
## 
## [[6]]
## [1] "haven version 2.5.4"
## 
## [[7]]
## [1] "estimatr version 1.0.2"
## 
## [[8]]
## [1] "lme4 version 1.1.35.1"
## 
## [[9]]
## [1] "broom.mixed version 0.2.9.4"
## 
## [[10]]
## [1] "bootpredictlme4 version 0.1"
## 
## [[11]]
## [1] "effectsize version 0.8.6"
## 
## [[12]]
## [1] "metafor version 4.4.0"
## 
## [[13]]
## [1] "rstan version 2.32.5"
## 
## [[14]]
## [1] "tidybayes version 3.0.6"
## 
## [[15]]
## [1] "cowplot version 1.1.3"
## 
## [[16]]
## [1] "plyr version 1.8.9"
## 
## [[17]]
## [1] "tidyverse version 2.0.0"
## 
## [[18]]
## [1] "furrr version 0.3.1"

1.2 Directory Path

# res_path <- "https://github.com/emoriebeck/big-five-prediction/blob/master"
data_path <- "/Volumes/Emorie/data"
res_path <- "https://github.com/emoriebeck/data-synthesis-tutorial/raw/main"
# local_path <- "/Volumes/Emorie/projects/data synthesis/crystallized"
local_path <- "~/Documents/projects/data synthesis/crystallized"

1.3 Introduction

A key part of the scientific enterprise involves establishing robust, replicable, and generalizable relationships among diverse phenomena. For the better part of a century, meta-analytic techniques, in which effect sizes of relationships among phenomena are pulled from the published or unpublished literature and statistically pooled, have been the cornerstone of testing the robustness and generalizability. However, as more data become publicly available, it is becoming increasingly easy and important to synthesize data sources, rather than just results of those sources. In response, a number of new techniques, including pooled analyses, individual participant meta-analyses, and coordinated analyses, have emerged to synthesize such diverse data sources.

Despite the promises of each of these techniques, there has been little to no systematic review of the methods available or how to carry them out. As a result, many researchers are unaware of the wealth of methods available for data synthesis. But understanding what methods are available and how to best carry them out is critical for guiding future research using different data synthesis techniques. The present study aims to fill this gap.

In addition, in recent years, the links between personality and cognitive ability and their links to aging have become increasingly popular as researchers look to use them to understand how aging processes unfold. However, less research has looked at links between personality and specific domains of cognitive functioning, particularly in a multi-study format, which is critical to understand how the interplay among personality, cognitive function, and aging unfold in a more nuanced manner.

To demonstrate how to conduct a variety of data synthesis techniques, as well as their utility and challenges, the proposed study investigates whether the Big Five prospectively predicts crystallized / knowledge domain of cognitive ability in 13 longitudinal panel studies. Because of the many options available for synthesizing the data from these studies to test the association, we will detail five broad data synthesis methods: (1) pooled analysis of individual participant data (IPD), (2) pooled analysis of individual participant data (IPD) using dummy codes or random effects, (3) coordinated analyses followed by random effects meta-analysis, (4) coordinated analyses reported together, and (5) traditional meta-analysis of effect sizes from the published and unpublished literature. In addition, we will demonstrate how to carry out four of these five methods (excluding traditional meta-analyses).

Each of these methods will be explained in more detail later, but key features and differences across methods are summarized in Table 1 below.

url <- "https://github.com/emoriebeck/data-synthesis-tutorial/raw/main/codebooks/crystallized_tables.xlsx"
destfile <- "tables.xlsx"
curl::curl_download(url, destfile)

tab1 <- readxl::read_xlsx(destfile, sheet = "Table 1") %>%
  select(-ModNum) %>%
  kable(.
        , "html"
        , align = c("r", "c", "l", "l", "c", "l")
        , caption = "<strong>Table 1</strong><br><em>Key Features of Five Levels of Data Synthesis</em>") %>%
  kable_classic(full_width = F, html_font = "Times New Roman") %>%
  kableExtra::group_rows("Single Model", 1, 2) %>%
  kableExtra::group_rows("Multiple Models", 3, 5) 
tab1
(#tab:data synth tables)Table 1
Key Features of Five Levels of Data Synthesis
Names Individual Participant Data Number of Models Study-Specific Estimates Degree of Harmonization Examples
Single Model
Pooled Analysis of Individual Participant Data (IPD) Yes One No High Jokela et al., 2013
Pooled Analysis of Individual Participant Data using Contrasts or Random Effects Yes One Yes, random effects or dummy variables High Beck & Jackson, 2020; Paige et al., 2017
Multiple Models
Coordinated Analyses Followed by Meta-Analysis Yes Number of studies + meta-analysis model Yes, original study effect sizes Moderate Graham et al., 2020; Wood et al., 2018
Coordinated Analyses Reported Together Yes Number of studies Yes, original study effect sizes Moderate Graham et al., 2017
Traditional Meta-Analyses No One (Meta-analysis model) Yes, original study effect sizes None Bogg & Roberts, 2004
save_kable(tab1, file = sprintf("%s/results/tables/tab-1-taxonomy.html", local_path))
tab2 <- readxl::read_xlsx(destfile, sheet = "Sheet1") %>%
  mutate_all(~str_replace_all(., "\\r\\n", "<br>")) %>%
  kable(.
        , "html"
        , align = rep("c", 10)
        , caption = "<strong>Table 2</strong><br><em>Sample characteristics and sample-level moderators</em>"
        , escape = F
        , col.names = c("Sample", "Country (Continent)", "Prediction Interval", "Measure", "Scale", "Domains", 
                        "Median Year (SD)", "Baseline Age", "Measure(s)","Median Year (SD)")
        ) %>%
  kable_classic(full_width = F, html_font = "Times New Roman") %>%
  add_header_above(c(" " = 3, "<strong>Personality Characteristics</strong>" = 5, "<strong>Crystallized / Knowledge Domain Cognitive Ability</strong>" = 2)
                     , escape = F) %>%
  footnote("E = Extraversion; A = Agreeableness; C = Conscientiousness; N = Neuroticism; O = Openness. NEO-FFI = 60 item NEO Five Factor Inventory (Costa & McCrae, 1992); IPIP NEO = International Item Pool in Personality NEO (Johnson, 2014); BFI-S = Big Five Inventory, Short Form (German; Hahn et al., 2012); TDA-40 = Trait Descriptive Adjectives-40 (Saucier, 1994); MIDI = The Midlife Development Inventory (Lachman & Weaver, 1997); DPQ = Dutch Personality Questionnaire (Barelds & Luteijn, 2002); Eysenck = Eysenck Personality Questionnaire (Eysenck & Eysenck, 1965). Prediction interval was calculated by taking each participants’ first personality measurement year from their last cognitive ability measurement year. Baseline age is the average participant age at their first personality assessment. ")
tab2
(#tab:tab 2)Table 2
Sample characteristics and sample-level moderators
Personality Characteristics
Crystallized / Knowledge Domain Cognitive Ability
Sample Country (Continent) Prediction Interval Measure Scale Domains Median Year (SD) Baseline Age Measure(s) Median Year (SD)
BASE-I Germany
(Europe)
8.56
(-3.51)
NEO-FFI 1-5 E, N, O 1990
(0)
78.23
(6.66)
Vocabulary
Spot a Word
1997
(3.51)
EAS United States
(North America)
2.15
(2.41)
IPIP NEO 1-5 E, A, C, N, O 2011
(3.18)
79.47
(5.36)
Boston Naming Test
Information
2014
(3.09)
GSOEP Germany
(Europe)
7.00
(0)
BFI-S 1-7 E, A, C, N, O 2005
(0)
49.83
(15.83)
Vocabulary 2012
(0)
HILDA Australia
(Australia)
7.00
(0)
TDA-40 1-7 E, A, C, N, O 2005
(0)
44.51
(16.92)
Vocabulary 2012
(0)
HRS United States
(North America)
4.00
(0)
MIDI 1-4 E, A, C, N, O 2006/8
(0)
71.71
(6.97)
Vocabulary 2010
(0)
LASA The Netherlands
(Europe)
8.39
(9.45)
DPQ 1-3 N 1992
(1.19)
61.46
(15.71)
Vocabulary 1995
(9.09)
MAP United States
(North America)
6.75
(4.53)
NEO-FFI 1-5 E, A, C, N 79.45
(7.32)
Boston Naming Test
MARS United States
(North America)
6.46
(3.86)
NEO-FFI 1-5 N, O 73.60
(2.66)
Boston Naming Test
OCTO-TWIN Sweden
(Europe)
6.21
(2.82)
Eysenck 0-1 E, N 1991
(0)
82.99
(2.66)
Information 1997
ROS United States
(North America)
9.53
(6.42)
NEO-FFI 1-5 E, A, C, N, O 75.87
(7.38)
Boston Naming Test
SATSA Sweden
(Europe)
15.00
(0)
Eysenck 1-5 E, A, C, N, O 1984
(0)
54.77
(9.84)
Information 1999
(0)
Note:
E = Extraversion; A = Agreeableness; C = Conscientiousness; N = Neuroticism; O = Openness. NEO-FFI = 60 item NEO Five Factor Inventory (Costa & McCrae, 1992); IPIP NEO = International Item Pool in Personality NEO (Johnson, 2014); BFI-S = Big Five Inventory, Short Form (German; Hahn et al., 2012); TDA-40 = Trait Descriptive Adjectives-40 (Saucier, 1994); MIDI = The Midlife Development Inventory (Lachman & Weaver, 1997); DPQ = Dutch Personality Questionnaire (Barelds & Luteijn, 2002); Eysenck = Eysenck Personality Questionnaire (Eysenck & Eysenck, 1965). Prediction interval was calculated by taking each participants’ first personality measurement year from their last cognitive ability measurement year. Baseline age is the average participant age at their first personality assessment.
save_kable(tab2, file = sprintf("%s/results/tables/tab-2-samples.html", local_path))

1.4 Codebook

Each study has a separate codebook indexing matching, covariate, personality, and outcome variables. Moreover, these codebooks contain information about the original scale of the variable, any recoding of the variable (including binarizing outcomes, changing the scale, and removing missing data), reverse coding of scale variables, categories, etc.

url <- "https://github.com/emoriebeck/data-synthesis-tutorial/raw/main/codebooks/crystallized_codebook_10.02.20.xlsx"
destfile2 <- "crystallized_codebook_10.02.20.xlsx"
curl::curl_download(url, destfile2)
# list of all codebook sheets
sheets <- excel_sheets(destfile2)

# function for reading in sheets
read_fun <- function(x){
  print(x)
  read_xlsx(destfile2, sheet = x)
}

# read in sheets and index source
codebook <- tibble(
  study = sheets,
  codebook = map(study, read_fun)
)
## [1] "Overview"
## [1] "Key"
## [1] "Datasets"
## [1] "Sample"
## [1] "BASE-I"
## [1] "CNLSY"
## [1] "EAS"
## [1] "GSOEP"
## [1] "HILDA"
## [1] "HRS"
## [1] "LASA"
## [1] "RADC-MAP"
## [1] "MARS"
## [1] "OCTO-TWIN"
## [1] "ROS"
## [1] "SATSA"
## [1] "SLS"
## short and long versions of names of all categories for later use
studies <- c("BASE-I", "CNLSY", "EAS", "GSOEP", "HILDA", "HRS", "LASA", "MAP", "MARS", "OCTO-TWIN", "ROS", "SATSA", "SLS")
studies_long <- c("BASE", "CNLSY", "EAS", "GSOEP", "HILDA", "HRS", "LASA", "MAP", "MARS", "OCTO-Twin", "ROS", "SATSA", "SLS")
studies_sp   <- c("   BASE", "   CNLSY", "     EAS", "   GSOEP", "   HILDA", "     HRS", "    LASA", "     MAP", "    MARS", "OCTO-Twin", "     ROS", "   SATSA", "     SLS")

traits <- codebook$codebook[[2]] %>% filter(category == "pers") %>% 
  select(long_name = Construct, short_name = name)

outcomes <- codebook$codebook[[2]] %>% filter(category == "outcome") %>%
  select(long_name = Construct, short_name = name)

covars <- codebook$codebook[[2]] %>% filter(category == "moder") %>%
  select(long_name = Construct, short_name = name, long_term = new_terms, short_term = old_terms)

moders <- covars %>% 
  mutate(long_name = mapvalues(long_name, "Unadjusted", "None"))

stdyModers <- codebook$codebook[[2]] %>% filter(category == "metaMod") %>%
  select(long_name = Construct, short_name = name, long_term = new_terms, medium_term = short_term, short_term = old_terms)

mthds <- codebook$codebook[[2]] %>% filter(Category == "Methods") %>%
  select(long_name = Construct, short_name = name, old_name = old_terms)

stdcolors <- tibble(
  studies = c("Overall", studies_long)
  , std_text = str_remove_all(studies, "[-]")
  , colors = c("black", "#332288", "#88ccee", "#44aa99", "#117733", "#999933", "#ddcc77", 
               "#cc6677", "#332288", "#88ccee", "#44aa99", "#117733", "#999933", "#ddcc77")
       , lt = c(rep("solid", 8), rep("dotted", 6)))

# used personality waves 
p_waves <- read_xlsx(destfile, sheet = "Table 2")