Shuffle any of the columns of a data.frame to artificially distort relationships.

muddle(
    data,
    at,
    ...
)

Arguments

data

A data.frame.

at

A vector of quoted/unquoted columns, positions, and/or tidyselect::select_helpers. Defaults to all columns.

...

Additional arguments passed to sample.

Value

A tibble::tibble

Author

Alex Zajichek

Examples

#Set a seed
set.seed(123)

#Default permutes all columns
heart_disease %>%
  muddle
#> # A tibble: 303 × 9
#>      Age Sex    ChestPain           BP Cholest…¹ Blood…² Maxim…³ Exerc…⁴ Heart…⁵
#>    <dbl> <fct>  <fct>            <dbl>     <dbl> <lgl>     <dbl> <fct>   <fct>  
#>  1    43 Female Non-anginal pain   130       214 FALSE       120 Yes     No     
#>  2    44 Male   Asymptomatic       130       204 FALSE       143 Yes     Yes    
#>  3    68 Female Asymptomatic       128       242 FALSE       125 Yes     No     
#>  4    35 Female Asymptomatic       120       269 FALSE       163 Yes     Yes    
#>  5    45 Female Atypical angina    138       240 FALSE       169 No      No     
#>  6    54 Male   Asymptomatic       128       209 FALSE       182 Yes     No     
#>  7    61 Male   Asymptomatic       100       258 FALSE       152 Yes     Yes    
#>  8    57 Female Asymptomatic       152       229 FALSE       142 No      No     
#>  9    67 Male   Non-anginal pain   110       283 FALSE       138 No      Yes    
#> 10    51 Female Atypical angina    125       204 TRUE        161 No      No     
#> # … with 293 more rows, and abbreviated variable names ¹​Cholesterol,
#> #   ²​BloodSugar, ³​MaximumHR, ⁴​ExerciseInducedAngina, ⁵​HeartDisease

#Permute select columns
heart_disease %>%
  muddle(
    at = c(Age, Sex)
  )
#> # A tibble: 303 × 9
#>      Age Sex    ChestPain           BP Cholest…¹ Blood…² Maxim…³ Exerc…⁴ Heart…⁵
#>    <dbl> <fct>  <fct>            <dbl>     <dbl> <lgl>     <dbl> <fct>   <fct>  
#>  1    47 Male   Typical angina     145       233 TRUE        150 No      No     
#>  2    49 Female Asymptomatic       160       286 FALSE       108 Yes     Yes    
#>  3    59 Male   Asymptomatic       120       229 FALSE       129 Yes     Yes    
#>  4    35 Male   Non-anginal pain   130       250 FALSE       187 No      No     
#>  5    44 Female Atypical angina    130       204 FALSE       172 No      No     
#>  6    29 Female Atypical angina    120       236 FALSE       178 No      No     
#>  7    45 Male   Asymptomatic       140       268 FALSE       160 No      Yes    
#>  8    59 Male   Asymptomatic       120       354 FALSE       163 Yes     No     
#>  9    46 Male   Asymptomatic       130       254 FALSE       147 No      Yes    
#> 10    54 Male   Asymptomatic       140       203 TRUE        155 Yes     Yes    
#> # … with 293 more rows, and abbreviated variable names ¹​Cholesterol,
#> #   ²​BloodSugar, ³​MaximumHR, ⁴​ExerciseInducedAngina, ⁵​HeartDisease

#Using a select helper
heart_disease %>%
  muddle(
    at = matches("^S")
  )
#> # A tibble: 303 × 9
#>      Age Sex    ChestPain           BP Cholest…¹ Blood…² Maxim…³ Exerc…⁴ Heart…⁵
#>    <dbl> <fct>  <fct>            <dbl>     <dbl> <lgl>     <dbl> <fct>   <fct>  
#>  1    63 Male   Typical angina     145       233 TRUE        150 No      No     
#>  2    67 Male   Asymptomatic       160       286 FALSE       108 Yes     Yes    
#>  3    67 Male   Asymptomatic       120       229 FALSE       129 Yes     Yes    
#>  4    37 Male   Non-anginal pain   130       250 FALSE       187 No      No     
#>  5    41 Female Atypical angina    130       204 FALSE       172 No      No     
#>  6    56 Female Atypical angina    120       236 FALSE       178 No      No     
#>  7    62 Male   Asymptomatic       140       268 FALSE       160 No      Yes    
#>  8    57 Male   Asymptomatic       120       354 FALSE       163 Yes     No     
#>  9    63 Male   Asymptomatic       130       254 FALSE       147 No      Yes    
#> 10    53 Male   Asymptomatic       140       203 TRUE        155 Yes     Yes    
#> # … with 293 more rows, and abbreviated variable names ¹​Cholesterol,
#> #   ²​BloodSugar, ³​MaximumHR, ⁴​ExerciseInducedAngina, ⁵​HeartDisease

#Pass other arguments
heart_disease %>%
  muddle(
    size = 5,
    replace = TRUE
  )
#> # A tibble: 5 × 9
#>     Age Sex   ChestPain           BP Cholesterol Blood…¹ Maxim…² Exerc…³ Heart…⁴
#>   <dbl> <fct> <fct>            <dbl>       <dbl> <lgl>     <dbl> <fct>   <fct>  
#> 1    41 Male  Atypical angina    135         321 FALSE       180 Yes     Yes    
#> 2    42 Male  Atypical angina    128         212 TRUE        172 Yes     No     
#> 3    63 Male  Typical angina     152         234 FALSE       115 No      Yes    
#> 4    37 Male  Non-anginal pain   130         289 FALSE        71 No      No     
#> 5    41 Male  Atypical angina    150         160 FALSE       192 No      Yes    
#> # … with abbreviated variable names ¹​BloodSugar, ²​MaximumHR,
#> #   ³​ExerciseInducedAngina, ⁴​HeartDisease