Chapter 3 Propensity Score Matching

Study 1 tests whether personality still predicts outcomes following propensity score matching to control for selection bias. The analyses will proceed in several parts: 1. Data cleaning and combining
2. Running the matching procedure
3. Creating Balance Plots and Tables
4. Getting descriptive statistics of the matched and unmatched samples
5. Combining matched and unmatched data across studies into 14 personality characteristics x 14 outcomes x 9 models (one unmoderator + 8 moderated) x 5 imputations
6. Running matched and unmatched models in brms using the High Performance Computing Cluster
7. Recompiling and combining imputations and pulling model terms, random effects, predictions
8. Creating a series of tables and figures describing the results of all the models

3.1 Part 1: Data

First, the data for the matching procedure need to be set up, so we’ll load in the matching, outcome, and personality data, and combine data from the same study in the year into one for each personality-outcome-moderator combination.
### Matching
First, let’s load in the matching data.

3.2 Part 2: Run Matching

Now that we have data, we can run the matching. But first, we need a series of functions to do so.
### Functions

# takes the data and generates combinationsn for the moderators
mod_fun <- function(outcome, imp, Study, year, trait){
  tibble(moderator = c("none", "age", "gender", "race", "parEdu", "grsWages", "parOccPrstg")) %>% #, list(c("parEdu", "grsWages", "parOccPrstg"))
    mutate(psm = pmap(list(outcome, imp, Study, year, moderator, trait),  psm_fun))
}

# function to run the propensity score matching procedure
psm_fun <- function(outcome, imp, Study, year, mod, trait){
  m <- ifelse(length(mod) > 1, "SES", mod)  # indexing the moderator
  # file path
  file <- sprintf("%s_%s_%s_%s_%s_%s.RData", Study, trait, outcome, year, imp, m)
  # check the year to make sure personality was measured in this year
  pw <- (p_waves %>% filter(p_item == trait & study == Study))$Used
  if(!file %in% done & pw == year){
  Ratio <- 4 # set the ratio
  # load the data
  load(sprintf("%s/data/psm_raw/%s_%s_%s_%s_%s.RData", wd, Study, year, outcome, trait, imp))
  # short function to find all missing columns 
  not_all_na <- function(x) any(!is.na(x))
  # remove columns with all missing data
  x <- x %>% filter(!is.na(o_value)) %>% select_if(not_all_na) %>% select(-one_of(c("study", "p_year", ".id")), -one_of(c(unique(p_waves$p_item), "NegAff")))
  # find columns with too high percentage missing data 
  # typically happens due to reshaping
  sum_na <- function(x) sum(is.na(x))/length(x)*100
  min <- ifelse(Study %in% c("GSOEP", "HILDA", "BHPS"), 97, ifelse(Study == "HRS", 100, 55))
  pos <- which(apply(x, 2, sum_na) < min)
  x <- x %>% select(one_of(names(pos)))
  # keep only complete cases for psm
  x <- x[complete.cases(x),]
  # make sure all data has variance 
  has_var <- function(x) if(class(x) %in% c("factor", "character")) !all(duplicated(x)[-1L]) else sd(x, na.rm = T) != 0 
  x <- x %>% select_if(has_var) 
  x <- data.frame(unclass(x)) # unclass because tibbles 
  # vector of variables not to match on
  no.match <- c("yearBrth", "o_value", "weight", "height", "SID", unique(p_waves$p_item), mod)
  # vector of variables to match on
  to.match <- colnames(x)[-which(colnames(x) %in% no.match)]
  # matching formula
  match.formula <- as.formula(paste("o_value ~ ", paste(to.match, collapse=" + "), sep = " "))
  # call the matching procedure
  y <- matchit(match.formula, data = x, method = "nearest", ratio = Ratio, caliper = .25)
  # get the psm df
  d <- psm_df(y)
  # get the balance plots 
  u <- unbalanced_fun(y)
  # save the matching data
  save(d, file = sprintf("%s/data/psm_matched/%s_%s_%s_%s_%s_%s.RData", wd, Study, trait, outcome, year, imp, m))
  # save the balance plots
  save(u, file = sprintf("%s/results/psm/bal_tabs/%s_%s_%s_%s_%s_%s.RData", wd, Study, trait, outcome, year, imp, m))
  # clean up the environment
  rm(list = c("y", "d", "u", "x"))
  gc()}
  return(NULL)
}

# creates the matched data frames
psm_df <- function(psm){
  data.frame(match.data(psm)) %>% tbl_df %>% select(SID, o_value, distance)
}

# this function creates the balance table of the psm weights and filters 
# the results into variables the matching procedure did not fix and 
# those that it did
unbalanced_fun <- function(x){
  #x <- bal.table(psm)
  y <- summary(x, standardize = T)
  raw <- y$sum.all %>%
    mutate(var = rownames(.)) %>%
    select(var, `Means Treated`, `Means Control`, `Std. Mean Diff.`)
  smalldiff.var <- raw %>% filter(abs(`Std. Mean Diff.`) <= .05)
  matched <- y$sum.matched %>%
    mutate(var = rownames(.)) %>%
    select(var, `Means Treated`, `Means Control`, `Std. Mean Diff.`)
  unbalanced.var <- matched %>% filter(abs(`Std. Mean Diff.`) >= .2)
  return(list(raw = raw, matched = matched, 
              unbalanced = unbalanced.var,smalldiff = smalldiff.var))
}

3.2.1 Run Models

The code below will activate and the matching procedure described in the functions above.

The number of, a sample list of, and a sample file of the data and balance files the above functions create is below:

## 51523 total propensity score matched data sets.
##  [1] "Add Health_A_chldbrth_1995_1_age.RData"         "Add Health_A_chldbrth_1995_1_gender.RData"      "Add Health_A_chldbrth_1995_1_grsWages.RData"   
##  [4] "Add Health_A_chldbrth_1995_1_none.RData"        "Add Health_A_chldbrth_1995_1_parEdu.RData"      "Add Health_A_chldbrth_1995_1_parOccPrstg.RData"
##  [7] "Add Health_A_chldbrth_1995_1_race.RData"        "Add Health_A_chldbrth_1995_1_SES.RData"         "Add Health_A_chldbrth_1995_2_age.RData"        
## [10] "Add Health_A_chldbrth_1995_2_gender.RData"      "Add Health_A_chldbrth_1995_2_grsWages.RData"    "Add Health_A_chldbrth_1995_2_none.RData"       
## [13] "Add Health_A_chldbrth_1995_2_parEdu.RData"      "Add Health_A_chldbrth_1995_2_parOccPrstg.RData" "Add Health_A_chldbrth_1995_2_race.RData"
## # A tibble: 7,869 x 3
##    SID   o_value distance
##    <fct> <fct>      <dbl>
##  1 1602  1          0.713
##  2 1603  0          0.625
##  3 1705  0          0.609
##  4 2304  0          0.728
##  5 8303  0          0.713
##  6 8604  1          0.741
##  7 8606  1          0.643
##  8 9103  1          0.692
##  9 9206  0          0.656
## 10 9403  1          0.630
## # … with 7,859 more rows
## 51670 total propensity score matched balance tables.
##  [1] "Add Health_A_chldbrth_1995_1_age.RData"         "Add Health_A_chldbrth_1995_1_gender.RData"      "Add Health_A_chldbrth_1995_1_grsWages.RData"   
##  [4] "Add Health_A_chldbrth_1995_1_none.RData"        "Add Health_A_chldbrth_1995_1_parEdu.RData"      "Add Health_A_chldbrth_1995_1_parOccPrstg.RData"
##  [7] "Add Health_A_chldbrth_1995_1_race.RData"        "Add Health_A_chldbrth_1995_1_SES.RData"         "Add Health_A_chldbrth_1995_2_age.RData"        
## [10] "Add Health_A_chldbrth_1995_2_gender.RData"      "Add Health_A_chldbrth_1995_2_grsWages.RData"    "Add Health_A_chldbrth_1995_2_none.RData"       
## [13] "Add Health_A_chldbrth_1995_2_parEdu.RData"      "Add Health_A_chldbrth_1995_2_parOccPrstg.RData" "Add Health_A_chldbrth_1995_2_race.RData"
## $raw
##         var Means Treated Means Control Std. Mean Diff.
## 1  distance  7.229429e-01  6.974633e-01      0.37021081
## 2       age  3.828463e+01  3.576200e+01      0.13025100
## 3  drVisits  9.123587e+00  9.438600e+00     -0.03188327
## 4  exercise  2.457522e+00  2.406367e+00      0.04532784
## 5  grsWages  3.464262e+04  3.731640e+04     -0.07724024
## 6 satHealth  7.167131e+00  7.031542e+00      0.08087773
## 
## $matched
##         var Means Treated Means Control Std. Mean Diff.
## 1  distance      0.715749  6.978787e-01      0.25964913
## 2       age     37.184545  3.575470e+01      0.07382703
## 3  drVisits      9.334570  9.438977e+00     -0.01056727
## 4  exercise      2.453304  2.406168e+00      0.04176619
## 5  grsWages  35534.978656  3.608431e+04     -0.01586910
## 6 satHealth      7.150223  7.030899e+00      0.07117581
## 
## $unbalanced
##        var Means Treated Means Control Std. Mean Diff.
## 1 distance      0.715749     0.6978787       0.2596491
## 
## $smalldiff
##           var Means Treated Means Control Std. Mean Diff.
## 1    drVisits     9.1235867     9.4386003    -0.031883271
## 2    exercise     2.4575219     2.4063669     0.045327843
## 3       satHH     3.6968789     3.6912472     0.015429692
## 4    SRhealth     3.6124892     3.5791317     0.045109309
## 5 religiosity     1.1911660     1.1958667    -0.005129461
## 6 disability0     0.8890122     0.8808738     0.025907658

3.3 Part 3: Balance

Once we’ve run the propensity score matching procedure, we’re ready to check the balance of the procedure using balance plots and tables.
### Functions
The functions below will load create balance plots and tables for each study-personality-outcome-moderator-imputation combination in the matched and unmatched data sets.
#### Data
These functions will load in the matched and unmatched data.

3.3.0.2 Balance Plots

These functions will calculate Cohen’s d across groups in the matched and unmatched data and run the balance plots.

# identify variables that aren't factors
not_factor <- function(x) !is.factor(x)

# calculate cohen's d
cohens_d <- function(x, y) {
    x <- x$value; y <- y$value
    lx <- length(x)- 1; ly <- length(y)- 1
    md  <- mean(x, na.rm = T) - mean(y, na.rm = T)        ## mean difference (numerator)
    csd <- lx * var(x, na.rm = T) + ly * var(y, na.rm = T)
    csd <- csd/(lx + ly); csd <- sqrt(csd)                     ## common sd computation
    cd  <- md/csd                        ## cohen's d
    return(cd)
}

# create the plots
plot_fun <- function(d, outcome, trait, mod){
  ## setup variables for titles, prettified ##
  # setup nicer names for plotting
  o <- mapvalues(outcome, outcomes$short_name, outcomes$long_name, warn_missing = F)
  md <- mapvalues(mod, c("none", moderators$short_name), c("No", moderators$long_name), warn_missing = F)
  trt <- mapvalues(trait, traits$short_name, traits$long_name, warn_missing = F)
  ttl <- sprintf("Outcome = %s, %s Set, %s Moderation", o, trt, md) 
  
  ## load matched and unmatched data ##
  d1 <- d %>%
    mutate(m = map(m_file, m_read_fun),
           um = map(um_file, um_read_fun)) %>%
    select(-m_file, -year, -um_file) 
  
  ## clean matched data ##
  m <- d1 %>% 
    mutate(m = map2(m, um, ~(.x) %>% left_join(.y %>% select(-o_value)))) %>%
    select(-um) %>%
    mutate(type = "Matched",
           m = map(m, ~(.) %>% 
                   gather(variable, value, -SID, -o_value, -distance) %>%
                   select(-SID, -distance) %>% 
                   group_by(o_value, variable) %>% 
                   nest() %>% ungroup() %>% 
                   spread(o_value, data))) %>%
    unnest(m) %>%
    
    ## calculate cohen's d ##
    
    mutate(d = map2_dbl(`0`, `1`, cohens_d)) %>%
    select(-`0`, -`1`)
  
  ## clean unmatched data ##
  um <- d1 %>% 
    select(study, imp, um) %>% 
    mutate(type = "Unmatched",
           um = map(um, ~(.) %>% 
                   gather(variable, value, -SID, -o_value) %>%
                   select(-SID) %>% 
                   group_by(o_value, variable) %>% 
                   nest() %>% ungroup() %>% 
                   spread(o_value, data))) %>%
    unnest(um) %>%
    
    ## calculate cohen's d ##
    
    mutate(d = map2_dbl(`0`, `1`, cohens_d)) %>%
    select(-`0`, -`1`)
  
  ## join together d's and plot them for all studies ##
  std <- ceiling(length(unique(um$study))/2)
  p <- um %>% full_join(m) %>%
    group_by(study, variable, type) %>%
    summarize(d = mean(d, na.rm = T)) %>%
    ungroup() %>%
    mutate(study = mapvalues(study, studies, studies_long, warn_missing = F)) %>%
    filter(!variable %in% c(mod, "yearBrth")) %>%
    ggplot(aes(x = variable, y = d)) +
      scale_shape_manual(values = c(19,1)) +
      scale_y_continuous(limits = c(-1.5, 1.5), breaks = seq(-1, 1, 1)) +
      geom_hline(aes(yintercept = 0), linetype = "dashed", size = .25) +
      geom_point(aes(shape = type), size = 1.5) +
      labs(y = "Cohen's d", x = NULL, shape = NULL, title = ttl) +
      facet_wrap(study~., ncol = 2, scales = "free") +
      theme_classic() +
      theme(legend.position = "bottom"
            , axis.text.x = element_text(face = "bold", size = rel(.7), angle = 45, hjust = 1)
            , axis.text.y = element_text(face = "bold", size = rel(1.2))
            , axis.title = element_text(face = "bold", size = rel(1.2)),
          strip.text = element_text(face = "bold")
          , plot.title = element_text(face = "bold", size = rel(1.2), hjust = .5)
          , legend.text = element_text(face = "bold")
          , legend.title = element_text(face = "bold", size = rel(1.2)))
  ggsave(p, filename = sprintf("%s/results/psm/bal_plots/%s/%s_%s.pdf", wd, mod, outcome, trait), width = 10, height = 2.5*std)
  # ggsave(p, filename = sprintf("%s/results/psm/bal_plots/png/%s_%s_%s.png", wd, mod, outcome, trait), width = 10, height = 2.5*std)
  return(T)
} 

3.3.1 Run

Run the above functions to create the balance tables and plots.

## # A tibble: 7,660 x 6
##    um_file                            study      year  Outcome  Trait imp  
##    <chr>                              <chr>      <chr> <chr>    <chr> <chr>
##  1 Add Health_1995_chldbrth_A_1.RData Add Health 1995  chldbrth A     1    
##  2 Add Health_1995_chldbrth_A_2.RData Add Health 1995  chldbrth A     2    
##  3 Add Health_1995_chldbrth_A_3.RData Add Health 1995  chldbrth A     3    
##  4 Add Health_1995_chldbrth_A_4.RData Add Health 1995  chldbrth A     4    
##  5 Add Health_1995_chldbrth_A_5.RData Add Health 1995  chldbrth A     5    
##  6 Add Health_1995_chldbrth_C_1.RData Add Health 1995  chldbrth C     1    
##  7 Add Health_1995_chldbrth_C_2.RData Add Health 1995  chldbrth C     2    
##  8 Add Health_1995_chldbrth_C_3.RData Add Health 1995  chldbrth C     3    
##  9 Add Health_1995_chldbrth_C_4.RData Add Health 1995  chldbrth C     4    
## 10 Add Health_1995_chldbrth_C_5.RData Add Health 1995  chldbrth C     5    
## # … with 7,650 more rows
## # A tibble: 51,523 x 7
##    m_file                                         study      Trait Outcome  year  imp   Moderator  
##    <chr>                                          <chr>      <chr> <chr>    <chr> <chr> <chr>      
##  1 Add Health_A_chldbrth_1995_1_age.RData         Add Health A     chldbrth 1995  1     age        
##  2 Add Health_A_chldbrth_1995_1_gender.RData      Add Health A     chldbrth 1995  1     gender     
##  3 Add Health_A_chldbrth_1995_1_grsWages.RData    Add Health A     chldbrth 1995  1     grsWages   
##  4 Add Health_A_chldbrth_1995_1_none.RData        Add Health A     chldbrth 1995  1     none       
##  5 Add Health_A_chldbrth_1995_1_parEdu.RData      Add Health A     chldbrth 1995  1     parEdu     
##  6 Add Health_A_chldbrth_1995_1_parOccPrstg.RData Add Health A     chldbrth 1995  1     parOccPrstg
##  7 Add Health_A_chldbrth_1995_1_race.RData        Add Health A     chldbrth 1995  1     race       
##  8 Add Health_A_chldbrth_1995_1_SES.RData         Add Health A     chldbrth 1995  1     SES        
##  9 Add Health_A_chldbrth_1995_2_age.RData         Add Health A     chldbrth 1995  2     age        
## 10 Add Health_A_chldbrth_1995_2_gender.RData      Add Health A     chldbrth 1995  2     gender     
## # … with 51,513 more rows

3.4 Part 4: Descriptives

Next, let’s get the descriptives of the matched and unmatched samples, including sample sizes, average age and gender breakdown.

Because there are so many different sets, I’ll create separate tables for each outcome that includes info for each study and trait. But for the manuscript, I’ll also create a table that has ranges for each outcome across traits for simplicity.

# slightly different function for unnmatched data reading
# in order to incorporate age and gender.
um_read_fun <- function(file){
  load(sprintf("%s/data/psm_raw/%s", wd, file))
  x %>% tbl_df %>% 
    mutate(o_value = as.numeric(as.character(o_value)),
           age = as.numeric(as.character(age))) %>%
    select(SID, o_value, age, gender) 
} 

desc_fun <- function(d, outcome){
  o <- mapvalues(outcome, outcomes$short_name, outcomes$long_name, warn_missing = F)
  # read in the matched and unmatched data
  d1 <- d %>%
    mutate(m = map(m_file, m_read_fun),
           um = map(um_file, um_read_fun)) %>%
    select(-m_file, -year, -um_file) 
  
  ## clean matched data ##
  m <- d1 %>% 
    mutate(m = map2(m, um, ~(.x) %>% left_join(.y %>% select(-o_value)))) %>%
    select(-um) %>%
    unnest(m) %>%
    mutate(type = "Matched")
  
  ## clean unmatched data ##
  um <- d1 %>%  
    select(-m) %>% 
    unnest(um) %>%
    mutate(type = "Unmatched")
  
  rm(d1)
  
  d1 <- m %>% 
    full_join(um %>% mutate(o_value = factor(o_value))) 
  # sample sizes of matched and unmatched samples
  d1 %>%
    group_by(study, Trait, o_value, type) %>%
    tally() %>%
    ungroup() %>%
    spread(o_value, n) %>%
    mutate(freq = sprintf("%i (%i)", `0`, `1`)) %>%
    select(study, Trait, type, freq) %>%
  # percentages of gender
    full_join(
      d1 %>% filter(!is.na(gender)) %>%
        group_by(study, Trait, type, gender) %>%
        summarize(Gender = n()) %>% 
        group_by(study, Trait, type) %>%
        summarize(Gender = (Gender[gender == 1]/(sum(Gender))*100))
  # mean and sd of age
      ) %>% full_join(
      d1 %>% group_by(study, Trait, type) %>%
        summarize_at(vars(age), lst(m = mean, sd = sd), na.rm = T) %>%
        ungroup() 
      ) %>%
    # spread out for the table
    pivot_wider(values_from = c("freq", "Gender", "m", "sd")
                , names_from = "type"
                , names_sep = "_") %>%
    # reorder the columns
    select(study, Trait, contains("freq"), contains("m_"), contains("sd"), contains("gender")) %>%
    # reformat the Trait names
    mutate(Trait = factor(Trait, levels = traits$short_name, labels = traits$long_name)) %>%
    arrange(study, Trait) %>% # reorder the rows
    # create this wild table
    kable(.
          , "html"
          , digits = 1
          , col.names = c("Study", "Trait", rep(c("Matched", "Raw"), times = 4))
          , align = c("r", "r", rep("c", 8))
          , caption = sprintf("Descriptive Statistics of Matched and Raw Samples for Those Who Experienced %s", o)) %>% 
    kable_styling(full_width = F) %>%
    collapse_rows(1, valign = "top") %>%
    add_header_above(c(" " = 2, "Frequency" = 2, "M" = 2, "SD" = 2, "% Women" = 2)) %>%
    add_header_above(c(" " = 4, "Age at Baseline" = 4, " " = 2)) %>%
    footnote("Frequency = Experienced (Did not Experience); M = Mean age at baseline; SD = Standard deviation of age at baseline"
             , escape = F) %>%
    save_kable(., file = sprintf("%s/results/psm/descriptives/%s.html", wd, outcome))
  
  # return the range of sample sizes for each study and outcome.
  d2 <- d1 %>%
    group_by(study, Trait, o_value, type) %>%
    tally() %>%
    ungroup() %>%
    group_by(study, o_value, type) %>%
    summarize_at(vars(n), lst(min, max), na.rm = T) %>%
    mutate(range = sprintf("%i-%i", min, max)) %>%
    ungroup() %>%
    select(-min, -max) %>%
    spread(o_value, range) %>%
    mutate(freq = sprintf("%s (%s)", `0`, `1`)) %>%
    select(study, type, freq) %>%
    full_join(
      d1 %>% filter(!is.na(gender)) %>%
        group_by(study, Trait, type, gender) %>%
        summarize(Gender = n()) %>% 
        group_by(study, Trait, type) %>%
        summarize(Gender = (Gender[gender == 1]/(sum(Gender))*100)) %>%
        group_by(study, type) %>%
        summarize_at(vars(Gender), lst(min, max), na.rm = T) %>%
        mutate(Gender = sprintf("%.1f-%.1f", min, max)) %>%
        ungroup() %>%
        select(-min, -max)
      ) %>% full_join(
      d1 %>% group_by(study, Trait, type) %>%
        summarize_at(vars(age), lst(m = mean, sd = sd), na.rm = T) %>%
        ungroup() %>%
        group_by(study, type) %>%
        summarize_at(vars(m, sd), lst(min, max), na.rm = T) %>%
        mutate(m = sprintf("%.1f-%.1f", m_min, m_max),
               sd = sprintf("%.1f-%.1f", sd_min, sd_max)) %>%
        ungroup() %>%
        select(-m_min, -m_max, -sd_max, -sd_min)
      ) %>%
    pivot_wider(values_from = c("freq", "Gender", "m", "sd")
                , names_from = "type"
                , names_sep = "_") %>%
    select(study, contains("freq"), contains("m_"), contains("sd"), contains("Gender")) 
    rm(d1)
    gc()
    return(d2)
}
load(sprintf("%s/results/psm/descriptives/psm_comb.RData", wd))
psm_comb %>%
  select(-Moderator) %>%
  mutate(Outcome = factor(Outcome, levels = outcomes$short_name, labels = outcomes$long_name)) %>%
  # arrange(Outcome)
  unnest(data) %>%
  arrange(Outcome, study) %>%
  select(-Outcome) %>%
  kable(.
          , "html"
          , digits = 1
          , col.names = c("Study", rep(c("Matched", "Raw"), times = 4))
          , align = c("r", rep("c", 8))
          , caption = sprintf("Descriptive Statistics of Matched and Raw Samples for Those Who Experienced Outcomes")) %>% 
    kable_styling(full_width = F) %>%
    add_header_above(c(" " = 1, "Frequency" = 2, "M" = 2, "SD" = 2, "% Women" = 2)) %>%
    add_header_above(c(" " = 3, "Age at Baseline" = 4, " " = 2)) %>%
    kableExtra::group_rows("Mortality", 1, 8, label_row_css = NULL) %>%
    kableExtra::group_rows("Major Health Event", 9, 18, label_row_css = NULL) %>%
    kableExtra::group_rows("Mental Health Event", 19, 28, label_row_css = NULL) %>%
    kableExtra::group_rows("Child Birth", 29, 36, label_row_css = NULL) %>%
    kableExtra::group_rows("Move in with a partner", 37, 44, label_row_css = NULL) %>%
    kableExtra::group_rows("Marriage", 45, 53, label_row_css = NULL) %>%
    kableExtra::group_rows("Divorce", 54, 63, label_row_css = NULL) %>%
    kableExtra::group_rows("Child Moves Out", 64, 67, label_row_css = NULL) %>%
    kableExtra::group_rows("Higher Education", 68, 76, label_row_css = NULL) %>%
    kableExtra::group_rows("First Job", 77, 81, label_row_css = NULL) %>%
    kableExtra::group_rows("Unemployment", 82, 91, label_row_css = NULL) %>%
    kableExtra::group_rows("Retirement", 92, 99, label_row_css = NULL) %>%
    kableExtra::group_rows("Volunteering", 100, 107, label_row_css = NULL) %>%
    kableExtra::group_rows("Criminality", 108, 110, label_row_css = NULL) %>%
    
    footnote("All results presented as a range. Frequency = Experienced (Did not Experience); M = Mean age at baseline; SD = Standard deviation of age at baseline"
             , escape = F) %>%
    save_kable(., file = sprintf("%s/results/psm/descriptives/ranges.html", wd))