Split a data.frame by any number of columns and apply a function to subset.

stratiply(
    data,
    f,
    by,
    ...
)

Arguments

data

A data.frame.

f

A function that takes a data.frame as an argument.

by

A vector of quoted/unquoted columns, positions, and/or tidyselect::select_helpers

...

Additional arguments passed to f.

Value

A list

Author

Alex Zajichek

Examples

#Unquoted selection
heart_disease %>%
    stratiply(
        head,
        Sex
    )
#> $Female
#> # A tibble: 6 × 8
#>     Age ChestPain      BP Cholesterol BloodSugar MaximumHR ExerciseInducedAngina
#>   <dbl> <fct>       <dbl>       <dbl> <lgl>          <dbl> <fct>                
#> 1    41 Atypical a…   130         204 FALSE            172 No                   
#> 2    62 Asymptomat…   140         268 FALSE            160 No                   
#> 3    57 Asymptomat…   120         354 FALSE            163 Yes                  
#> 4    56 Atypical a…   140         294 FALSE            153 No                   
#> 5    48 Non-angina…   130         275 FALSE            139 No                   
#> 6    58 Typical an…   150         283 TRUE             162 No                   
#> # ℹ 1 more variable: HeartDisease <fct>
#> 
#> $Male
#> # A tibble: 6 × 8
#>     Age ChestPain      BP Cholesterol BloodSugar MaximumHR ExerciseInducedAngina
#>   <dbl> <fct>       <dbl>       <dbl> <lgl>          <dbl> <fct>                
#> 1    63 Typical an…   145         233 TRUE             150 No                   
#> 2    67 Asymptomat…   160         286 FALSE            108 Yes                  
#> 3    67 Asymptomat…   120         229 FALSE            129 Yes                  
#> 4    37 Non-angina…   130         250 FALSE            187 No                   
#> 5    56 Atypical a…   120         236 FALSE            178 No                   
#> 6    63 Asymptomat…   130         254 FALSE            147 No                   
#> # ℹ 1 more variable: HeartDisease <fct>
#> 

#Select helper
heart_disease %>%
    stratiply(
        f = head,
        by = starts_with("S")
    )
#> $Female
#> # A tibble: 6 × 8
#>     Age ChestPain      BP Cholesterol BloodSugar MaximumHR ExerciseInducedAngina
#>   <dbl> <fct>       <dbl>       <dbl> <lgl>          <dbl> <fct>                
#> 1    41 Atypical a…   130         204 FALSE            172 No                   
#> 2    62 Asymptomat…   140         268 FALSE            160 No                   
#> 3    57 Asymptomat…   120         354 FALSE            163 Yes                  
#> 4    56 Atypical a…   140         294 FALSE            153 No                   
#> 5    48 Non-angina…   130         275 FALSE            139 No                   
#> 6    58 Typical an…   150         283 TRUE             162 No                   
#> # ℹ 1 more variable: HeartDisease <fct>
#> 
#> $Male
#> # A tibble: 6 × 8
#>     Age ChestPain      BP Cholesterol BloodSugar MaximumHR ExerciseInducedAngina
#>   <dbl> <fct>       <dbl>       <dbl> <lgl>          <dbl> <fct>                
#> 1    63 Typical an…   145         233 TRUE             150 No                   
#> 2    67 Asymptomat…   160         286 FALSE            108 Yes                  
#> 3    67 Asymptomat…   120         229 FALSE            129 Yes                  
#> 4    37 Non-angina…   130         250 FALSE            187 No                   
#> 5    56 Atypical a…   120         236 FALSE            178 No                   
#> 6    63 Asymptomat…   130         254 FALSE            147 No                   
#> # ℹ 1 more variable: HeartDisease <fct>
#> 
    
#Use additional arguments for the function
heart_disease %>%
  stratiply(
        f = glm,
        by = Sex,
        formula = HeartDisease ~ .,
        family = "binomial"
  )
#> $Female
#> 
#> Call:  .f(formula = ..1, family = "binomial", data = .x[[i]])
#> 
#> Coefficients:
#>               (Intercept)                        Age  
#>                -26.201202                   0.063173  
#>  ChestPainAtypical angina  ChestPainNon-anginal pain  
#>                 16.672159                  14.881710  
#>     ChestPainAsymptomatic                         BP  
#>                 18.613467                   0.045810  
#>               Cholesterol             BloodSugarTRUE  
#>                  0.002053                   1.766565  
#>                 MaximumHR   ExerciseInducedAnginaYes  
#>                 -0.021872                   1.040728  
#> 
#> Degrees of Freedom: 96 Total (i.e. Null);  87 Residual
#> Null Deviance:	    110.7 
#> Residual Deviance: 56.66 	AIC: 76.66
#> 
#> $Male
#> 
#> Call:  .f(formula = ..1, family = "binomial", data = .x[[i]])
#> 
#> Coefficients:
#>               (Intercept)                        Age  
#>                  0.141398                   0.024326  
#>  ChestPainAtypical angina  ChestPainNon-anginal pain  
#>                 -0.470775                   0.107361  
#>     ChestPainAsymptomatic                         BP  
#>                  1.552618                   0.011718  
#>               Cholesterol             BloodSugarTRUE  
#>                  0.009023                  -0.343345  
#>                 MaximumHR   ExerciseInducedAnginaYes  
#>                 -0.037817                   0.524487  
#> 
#> Degrees of Freedom: 205 Total (i.e. Null);  196 Residual
#> Null Deviance:	    283.2 
#> Residual Deviance: 191.5 	AIC: 211.5
#> 

#Use mixed selections to split by desired columns
heart_disease %>%
  stratiply(
        f = glm,
        by = c(Sex, where(is.logical)),
        formula = HeartDisease ~ Age,
        family = "binomial"
  ) 
#> $Female
#> $Female$`FALSE`
#> 
#> Call:  .f(formula = ..1, family = "binomial", data = .x[[i]])
#> 
#> Coefficients:
#> (Intercept)          Age  
#>    -5.17802      0.06904  
#> 
#> Degrees of Freedom: 84 Total (i.e. Null);  83 Residual
#> Null Deviance:	    90.33 
#> Residual Deviance: 84.84 	AIC: 88.84
#> 
#> $Female$`TRUE`
#> 
#> Call:  .f(formula = ..1, family = "binomial", data = .x[[i]])
#> 
#> Coefficients:
#> (Intercept)          Age  
#>     4.29409     -0.07303  
#> 
#> Degrees of Freedom: 11 Total (i.e. Null);  10 Residual
#> Null Deviance:	    16.64 
#> Residual Deviance: 15.96 	AIC: 19.96
#> 
#> 
#> $Male
#> $Male$`FALSE`
#> 
#> Call:  .f(formula = ..1, family = "binomial", data = .x[[i]])
#> 
#> Coefficients:
#> (Intercept)          Age  
#>    -3.58959      0.07275  
#> 
#> Degrees of Freedom: 172 Total (i.e. Null);  171 Residual
#> Null Deviance:	    236.8 
#> Residual Deviance: 220.3 	AIC: 224.3
#> 
#> $Male$`TRUE`
#> 
#> Call:  .f(formula = ..1, family = "binomial", data = .x[[i]])
#> 
#> Coefficients:
#> (Intercept)          Age  
#>    -4.28261      0.07483  
#> 
#> Degrees of Freedom: 32 Total (i.e. Null);  31 Residual
#> Null Deviance:	    45.72 
#> Residual Deviance: 43.66 	AIC: 47.66
#> 
#>