Split a data.frame by any number of columns and apply a function to subset.

stratiply(
    data,
    f,
    by,
    ...
)

Arguments

data

A data.frame.

f

A function that takes a data.frame as an argument.

by

A vector of quoted/unquoted columns, positions, and/or tidyselect::select_helpers

...

Additional arguments passed to f.

Value

A list

Author

Alex Zajichek

Examples

#Unquoted selection
heart_disease %>%
    stratiply(
        head,
        Sex
    )
#> $Female
#> # A tibble: 6 × 8
#>     Age ChestPain           BP Cholesterol BloodSugar MaximumHR Exerci…¹ Heart…²
#>   <dbl> <fct>            <dbl>       <dbl> <lgl>          <dbl> <fct>    <fct>  
#> 1    41 Atypical angina    130         204 FALSE            172 No       No     
#> 2    62 Asymptomatic       140         268 FALSE            160 No       Yes    
#> 3    57 Asymptomatic       120         354 FALSE            163 Yes      No     
#> 4    56 Atypical angina    140         294 FALSE            153 No       No     
#> 5    48 Non-anginal pain   130         275 FALSE            139 No       No     
#> 6    58 Typical angina     150         283 TRUE             162 No       No     
#> # … with abbreviated variable names ¹​ExerciseInducedAngina, ²​HeartDisease
#> 
#> $Male
#> # A tibble: 6 × 8
#>     Age ChestPain           BP Cholesterol BloodSugar MaximumHR Exerci…¹ Heart…²
#>   <dbl> <fct>            <dbl>       <dbl> <lgl>          <dbl> <fct>    <fct>  
#> 1    63 Typical angina     145         233 TRUE             150 No       No     
#> 2    67 Asymptomatic       160         286 FALSE            108 Yes      Yes    
#> 3    67 Asymptomatic       120         229 FALSE            129 Yes      Yes    
#> 4    37 Non-anginal pain   130         250 FALSE            187 No       No     
#> 5    56 Atypical angina    120         236 FALSE            178 No       No     
#> 6    63 Asymptomatic       130         254 FALSE            147 No       Yes    
#> # … with abbreviated variable names ¹​ExerciseInducedAngina, ²​HeartDisease
#> 

#Select helper
heart_disease %>%
    stratiply(
        f = head,
        by = starts_with("S")
    )
#> $Female
#> # A tibble: 6 × 8
#>     Age ChestPain           BP Cholesterol BloodSugar MaximumHR Exerci…¹ Heart…²
#>   <dbl> <fct>            <dbl>       <dbl> <lgl>          <dbl> <fct>    <fct>  
#> 1    41 Atypical angina    130         204 FALSE            172 No       No     
#> 2    62 Asymptomatic       140         268 FALSE            160 No       Yes    
#> 3    57 Asymptomatic       120         354 FALSE            163 Yes      No     
#> 4    56 Atypical angina    140         294 FALSE            153 No       No     
#> 5    48 Non-anginal pain   130         275 FALSE            139 No       No     
#> 6    58 Typical angina     150         283 TRUE             162 No       No     
#> # … with abbreviated variable names ¹​ExerciseInducedAngina, ²​HeartDisease
#> 
#> $Male
#> # A tibble: 6 × 8
#>     Age ChestPain           BP Cholesterol BloodSugar MaximumHR Exerci…¹ Heart…²
#>   <dbl> <fct>            <dbl>       <dbl> <lgl>          <dbl> <fct>    <fct>  
#> 1    63 Typical angina     145         233 TRUE             150 No       No     
#> 2    67 Asymptomatic       160         286 FALSE            108 Yes      Yes    
#> 3    67 Asymptomatic       120         229 FALSE            129 Yes      Yes    
#> 4    37 Non-anginal pain   130         250 FALSE            187 No       No     
#> 5    56 Atypical angina    120         236 FALSE            178 No       No     
#> 6    63 Asymptomatic       130         254 FALSE            147 No       Yes    
#> # … with abbreviated variable names ¹​ExerciseInducedAngina, ²​HeartDisease
#> 
    
#Use additional arguments for the function
heart_disease %>%
  stratiply(
        f = glm,
        by = Sex,
        formula = HeartDisease ~ .,
        family = "binomial"
  )
#> $Female
#> 
#> Call:  .f(formula = ..1, family = "binomial", data = .x[[i]])
#> 
#> Coefficients:
#>               (Intercept)                        Age  
#>                -26.201202                   0.063173  
#>  ChestPainAtypical angina  ChestPainNon-anginal pain  
#>                 16.672159                  14.881710  
#>     ChestPainAsymptomatic                         BP  
#>                 18.613467                   0.045810  
#>               Cholesterol             BloodSugarTRUE  
#>                  0.002053                   1.766565  
#>                 MaximumHR   ExerciseInducedAnginaYes  
#>                 -0.021872                   1.040728  
#> 
#> Degrees of Freedom: 96 Total (i.e. Null);  87 Residual
#> Null Deviance:	    110.7 
#> Residual Deviance: 56.66 	AIC: 76.66
#> 
#> $Male
#> 
#> Call:  .f(formula = ..1, family = "binomial", data = .x[[i]])
#> 
#> Coefficients:
#>               (Intercept)                        Age  
#>                  0.141398                   0.024326  
#>  ChestPainAtypical angina  ChestPainNon-anginal pain  
#>                 -0.470775                   0.107361  
#>     ChestPainAsymptomatic                         BP  
#>                  1.552618                   0.011718  
#>               Cholesterol             BloodSugarTRUE  
#>                  0.009023                  -0.343345  
#>                 MaximumHR   ExerciseInducedAnginaYes  
#>                 -0.037817                   0.524487  
#> 
#> Degrees of Freedom: 205 Total (i.e. Null);  196 Residual
#> Null Deviance:	    283.2 
#> Residual Deviance: 191.5 	AIC: 211.5
#> 

#Use mixed selections to split by desired columns
heart_disease %>%
  stratiply(
        f = glm,
        by = c(Sex, where(is.logical)),
        formula = HeartDisease ~ Age,
        family = "binomial"
  ) 
#> $Female
#> $Female$`FALSE`
#> 
#> Call:  .f(formula = ..1, family = "binomial", data = .x[[i]])
#> 
#> Coefficients:
#> (Intercept)          Age  
#>    -5.17802      0.06904  
#> 
#> Degrees of Freedom: 84 Total (i.e. Null);  83 Residual
#> Null Deviance:	    90.33 
#> Residual Deviance: 84.84 	AIC: 88.84
#> 
#> $Female$`TRUE`
#> 
#> Call:  .f(formula = ..1, family = "binomial", data = .x[[i]])
#> 
#> Coefficients:
#> (Intercept)          Age  
#>     4.29409     -0.07303  
#> 
#> Degrees of Freedom: 11 Total (i.e. Null);  10 Residual
#> Null Deviance:	    16.64 
#> Residual Deviance: 15.96 	AIC: 19.96
#> 
#> 
#> $Male
#> $Male$`FALSE`
#> 
#> Call:  .f(formula = ..1, family = "binomial", data = .x[[i]])
#> 
#> Coefficients:
#> (Intercept)          Age  
#>    -3.58959      0.07275  
#> 
#> Degrees of Freedom: 172 Total (i.e. Null);  171 Residual
#> Null Deviance:	    236.8 
#> Residual Deviance: 220.3 	AIC: 224.3
#> 
#> $Male$`TRUE`
#> 
#> Call:  .f(formula = ..1, family = "binomial", data = .x[[i]])
#> 
#> Coefficients:
#> (Intercept)          Age  
#>    -4.28261      0.07483  
#> 
#> Degrees of Freedom: 32 Total (i.e. Null);  31 Residual
#> Null Deviance:	    45.72 
#> Residual Deviance: 43.66 	AIC: 47.66
#> 
#>