Splitting cohorts

For this example we’ll use the Eunomia synthetic data from the CDMConnector package.

con <- DBI::dbConnect(duckdb::duckdb(), dbdir = eunomiaDir())
cdm <- CDMConnector::cdmFromCon(con, cdmSchema = "main", 
                    writeSchema = "main", writePrefix = "my_study_")

Let’s start by creating two drug cohorts, one for users of diclofenac and another for users of acetaminophen.

cdm$medications <- conceptCohort(cdm = cdm, 
                                 conceptSet = list("diclofenac" = 1124300,
                                                   "acetaminophen" = 1127433), 
                                 name = "medications")
cohortCount(cdm$medications)
#> # A tibble: 2 × 3
#>   cohort_definition_id number_records number_subjects
#>                  <int>          <int>           <int>
#> 1                    1           9365            2580
#> 2                    2            830             830
settings(cdm$medications)
#> # A tibble: 2 × 4
#>   cohort_definition_id cohort_name   cdm_version vocabulary_version
#>                  <int> <chr>         <chr>       <chr>             
#> 1                    1 acetaminophen 5.3         v5.0 18-JAN-19    
#> 2                    2 diclofenac    5.3         v5.0 18-JAN-19

We can stratify cohorts based on specified columns using the function stratifyCohorts(). In this example, let’s stratify the medications cohort by age and sex.

cdm$stratified <- cdm$medications |>
  addAge(ageGroup = list("Child" = c(0,17), "18 to 65" = c(18,64), "65 and Over" = c(65, Inf))) |>
  addSex(name = "stratified") |>
  stratifyCohorts(strata = list("sex", "age_group"), name = "stratified")

cohortCount(cdm$stratified)
#> # A tibble: 10 × 3
#>    cohort_definition_id number_records number_subjects
#>                   <int>          <int>           <int>
#>  1                    1           4718            2264
#>  2                    2           4647            2215
#>  3                    3            435             435
#>  4                    4            395             395
#>  5                    5           5857            2331
#>  6                    6            716             397
#>  7                    7           2792            1751
#>  8                    8            830             830
#>  9                    9              0               0
#> 10                   10              0               0
settings(cdm$stratified)
#> # A tibble: 10 × 10
#>    cohort_definition_id cohort_name          target_cohort_id target_cohort_name
#>                   <int> <chr>                           <int> <chr>             
#>  1                    1 acetaminophen_female                1 acetaminophen     
#>  2                    2 acetaminophen_male                  1 acetaminophen     
#>  3                    3 diclofenac_female                   2 diclofenac        
#>  4                    4 diclofenac_male                     2 diclofenac        
#>  5                    5 acetaminophen_18_to…                1 acetaminophen     
#>  6                    6 acetaminophen_65_an…                1 acetaminophen     
#>  7                    7 acetaminophen_child                 1 acetaminophen     
#>  8                    8 diclofenac_18_to_65                 2 diclofenac        
#>  9                    9 diclofenac_65_and_o…                2 diclofenac        
#> 10                   10 diclofenac_child                    2 diclofenac        
#> # ℹ 6 more variables: cdm_version <chr>, vocabulary_version <chr>,
#> #   target_cohort_table_name <chr>, strata_columns <chr>, sex <chr>,
#> #   age_group <chr>