51  dplyr Package


52 Creating function to configrure tables

Code
tbl_style <- function(df){
    df %>% 
        gt::gt() %>% 
        gt::tab_options(
            table.font.size = 14, 
            table.font.names = "serif", 
            data_row.padding = gt::px(2)
        ) %>% 
        gt::opt_stylize(style = 5)
}

53 Manual data entry

Code
dat <- tribble(~"name", ~"day", ~"month", ~"year", ~"bp",
               "Ama", 12, 05, 2020, "120/80",
               "Kwame", 14, 02, 2019, "132/66",
               "Akosua", 21, 12, 2010, "110/76",
               "Yaw", 13, 03, 1982, "144/98",
               "Yaa", 19, 08, 2000, "117/77")
dat %>% 
tbl_style()
name day month year bp
Ama 12 5 2020 120/80
Kwame 14 2 2019 132/66
Akosua 21 12 2010 110/76
Yaw 13 3 1982 144/98
Yaa 19 8 2000 117/77

54 Reading in data

Code
dataF <-
    readxl::read_xlsx(
        "C:/Dataset/SBPDATA.xlsx"
    ) %>% 
    janitor::clean_names() %>% 
    rename(
        ageyrs = a3_how_old_are_you_years,
        dxs_class = disease_class,
        gender = a1_gender
    ) %>% 
    mutate(
        dxs_class = factor(dxs_class),
        gender = factor(gender, levels = c(0, 1), labels = c("Male", "Female"))
    )

55 dplyr functions

55.1 arrange

Code
dat %>% 
    arrange(name, desc(day))
# A tibble: 5 × 5
  name     day month  year bp    
  <chr>  <dbl> <dbl> <dbl> <chr> 
1 Akosua    21    12  2010 110/76
2 Ama       12     5  2020 120/80
3 Kwame     14     2  2019 132/66
4 Yaa       19     8  2000 117/77
5 Yaw       13     3  1982 144/98

55.2 unite()

Code
dat %>% 
    unite(col = "dob", c(day, month, year), sep="/") %>% 
    tbl_style()
name dob bp
Ama 12/5/2020 120/80
Kwame 14/2/2019 132/66
Akosua 21/12/2010 110/76
Yaw 13/3/1982 144/98
Yaa 19/8/2000 117/77

55.3 seperate()

Code
dat %>% 
    separate(col = bp, into = c("sbp", "dbp"), sep = "/") %>% 
    tbl_style()
name day month year sbp dbp
Ama 12 5 2020 120 80
Kwame 14 2 2019 132 66
Akosua 21 12 2010 110 76
Yaw 13 3 1982 144 98
Yaa 19 8 2000 117 77
Code
dat %>% 
    separate(col = bp, into = c("sbp", "dbp"), sep = "/") %>% 
    unite(col = "dob", c(day, month, year), sep="/") %>% 
    mutate(dob_new = lubridate::dmy(dob)) %>% 
    tbl_style()
name dob sbp dbp dob_new
Ama 12/5/2020 120 80 2020-05-12
Kwame 14/2/2019 132 66 2019-02-14
Akosua 21/12/2010 110 76 2010-12-21
Yaw 13/3/1982 144 98 1982-03-13
Yaa 19/8/2000 117 77 2000-08-19

56 Visualise and summarize the data

Code
dataF %>% glimpse()
Rows: 3,296
Columns: 14
$ sid       <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 1…
$ dxs_class <fct> HPT, DM+HPT, HPT, HPT, HPT, DM+HPT, DM+HPT, DM, HPT, HPT, HP…
$ sbp_0     <dbl> 139, 155, 109, 130, 124, 140, 137, 160, 153, 135, 112, 137, …
$ sbp_2     <dbl> 124, NA, 123, NA, 120, 114, 135, 130, 218, 130, 118, 150, 14…
$ sbp_4     <dbl> 130, NA, 109, NA, 146, 163, 132, NA, NA, 118, 112, 130, 138,…
$ sbp_6     <dbl> 130, NA, 126, NA, 144, 117, 147, NA, NA, 150, 141, 112, 120,…
$ sbp_8     <dbl> 104, NA, 108, NA, 157, 124, 130, NA, NA, NA, 120, 129, 148, …
$ sbp_10    <dbl> 129, NA, 115, NA, 123, 121, NA, NA, 218, 127, NA, 148, 143, …
$ sbp_12    <dbl> 80, NA, 115, NA, 120, 128, 124, NA, NA, NA, NA, 142, NA, 116…
$ sbp_14    <dbl> 129, NA, 122, NA, 131, 119, 142, NA, NA, 123, NA, 131, NA, 1…
$ sbp_16    <dbl> 126, NA, 131, NA, 120, 100, 144, NA, NA, 149, 96, 119, NA, 2…
$ sbp_18    <dbl> 135, NA, 102, NA, 123, 127, 128, NA, NA, 132, 119, 119, 167,…
$ ageyrs    <dbl> 75, 60, 62, 70, 72, 56, 51, 73, 61, 59, 75, 53, 43, 64, 70, …
$ gender    <fct> Male, Male, Male, Male, Male, Male, Male, Male, Female, Male…
Code
dataF %>% skimr::skim()
Data summary
Name Piped data
Number of rows 3296
Number of columns 14
_______________________
Column type frequency:
factor 2
numeric 12
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
dxs_class 2 1 FALSE 3 HPT: 1867, DM+: 1005, DM: 422
gender 6 1 FALSE 2 Mal: 2520, Fem: 770

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
sid 0 1.00 1648.50 951.62 1 824.75 1648.5 2472.25 3296 ▇▇▇▇▇
sbp_0 6 1.00 141.20 22.21 70 125.00 139.0 155.00 230 ▁▇▇▂▁
sbp_2 499 0.85 139.49 23.34 70 123.00 137.0 153.00 250 ▁▇▅▁▁
sbp_4 671 0.80 138.46 23.42 70 122.00 136.0 151.00 238 ▁▇▆▂▁
sbp_6 751 0.77 137.82 22.55 70 122.00 135.0 151.00 234 ▁▇▇▂▁
sbp_8 846 0.74 137.15 22.97 73 120.25 134.0 150.00 277 ▂▇▂▁▁
sbp_10 867 0.74 136.83 22.12 74 121.00 134.0 150.00 250 ▁▇▃▁▁
sbp_12 960 0.71 135.46 21.82 71 120.75 133.0 148.00 250 ▁▇▃▁▁
sbp_14 1063 0.68 135.49 21.94 81 120.00 133.0 148.00 228 ▂▇▅▁▁
sbp_16 1276 0.61 134.93 22.06 79 120.00 132.0 147.00 243 ▂▇▃▁▁
sbp_18 1483 0.55 134.74 21.93 83 120.00 132.0 147.00 239 ▂▇▃▁▁
ageyrs 5 1.00 57.53 12.71 1 49.00 58.0 66.00 99 ▁▂▇▇▁
Code
dataF %>% summarytools::dfSummary(graph.col = FALSE)
Data Frame Summary  
dataF  
Dimensions: 3296 x 14  
Duplicates: 0  

-----------------------------------------------------------------------------------------
No   Variable    Stats / Values               Freqs (% of Valid)     Valid      Missing  
---- ----------- ---------------------------- ---------------------- ---------- ---------
1    sid         Mean (sd) : 1648.5 (951.6)   3296 distinct values   3296       0        
     [numeric]   min < med < max:                                    (100.0%)   (0.0%)   
                 1 < 1648.5 < 3296                                                       
                 IQR (CV) : 1647.5 (0.6)                                                 

2    dxs_class   1. DM                         422 (12.8%)           3294       2        
     [factor]    2. DM+HPT                    1005 (30.5%)           (99.9%)    (0.1%)   
                 3. HPT                       1867 (56.7%)                               

3    sbp_0       Mean (sd) : 141.2 (22.2)     137 distinct values    3290       6        
     [numeric]   min < med < max:                                    (99.8%)    (0.2%)   
                 70 < 139 < 230                                                          
                 IQR (CV) : 30 (0.2)                                                     

4    sbp_2       Mean (sd) : 139.5 (23.3)     140 distinct values    2797       499      
     [numeric]   min < med < max:                                    (84.9%)    (15.1%)  
                 70 < 137 < 250                                                          
                 IQR (CV) : 30 (0.2)                                                     

5    sbp_4       Mean (sd) : 138.5 (23.4)     144 distinct values    2625       671      
     [numeric]   min < med < max:                                    (79.6%)    (20.4%)  
                 70 < 136 < 238                                                          
                 IQR (CV) : 29 (0.2)                                                     

6    sbp_6       Mean (sd) : 137.8 (22.6)     137 distinct values    2545       751      
     [numeric]   min < med < max:                                    (77.2%)    (22.8%)  
                 70 < 135 < 234                                                          
                 IQR (CV) : 29 (0.2)                                                     

7    sbp_8       Mean (sd) : 137.1 (23)       134 distinct values    2450       846      
     [numeric]   min < med < max:                                    (74.3%)    (25.7%)  
                 73 < 134 < 277                                                          
                 IQR (CV) : 29.8 (0.2)                                                   

8    sbp_10      Mean (sd) : 136.8 (22.1)     133 distinct values    2429       867      
     [numeric]   min < med < max:                                    (73.7%)    (26.3%)  
                 74 < 134 < 250                                                          
                 IQR (CV) : 29 (0.2)                                                     

9    sbp_12      Mean (sd) : 135.5 (21.8)     133 distinct values    2336       960      
     [numeric]   min < med < max:                                    (70.9%)    (29.1%)  
                 71 < 133 < 250                                                          
                 IQR (CV) : 27.2 (0.2)                                                   

10   sbp_14      Mean (sd) : 135.5 (21.9)     132 distinct values    2233       1063     
     [numeric]   min < med < max:                                    (67.7%)    (32.3%)  
                 81 < 133 < 228                                                          
                 IQR (CV) : 28 (0.2)                                                     

11   sbp_16      Mean (sd) : 134.9 (22.1)     129 distinct values    2020       1276     
     [numeric]   min < med < max:                                    (61.3%)    (38.7%)  
                 79 < 132 < 243                                                          
                 IQR (CV) : 27 (0.2)                                                     

12   sbp_18      Mean (sd) : 134.7 (21.9)     126 distinct values    1813       1483     
     [numeric]   min < med < max:                                    (55.0%)    (45.0%)  
                 83 < 132 < 239                                                          
                 IQR (CV) : 27 (0.2)                                                     

13   ageyrs      Mean (sd) : 57.5 (12.7)      76 distinct values     3291       5        
     [numeric]   min < med < max:                                    (99.8%)    (0.2%)   
                 1 < 58 < 99                                                             
                 IQR (CV) : 17 (0.2)                                                     

14   gender      1. Male                      2520 (76.6%)           3290       6        
     [factor]    2. Female                     770 (23.4%)           (99.8%)    (0.2%)   
-----------------------------------------------------------------------------------------
Code
dataF %>% visdat::vis_dat(palette = "cb_safe")

Code
dataF %>% visdat::vis_miss()

57 The relocate function

Code
dataF %>% 
    relocate(ageyrs, gender, .before = sbp_0) %>% 
    slice_head(n=10) %>% 
    tbl_style()
sid dxs_class ageyrs gender sbp_0 sbp_2 sbp_4 sbp_6 sbp_8 sbp_10 sbp_12 sbp_14 sbp_16 sbp_18
1 HPT 75 Male 139 124 130 130 104 129 80 129 126 135
2 DM+HPT 60 Male 155 NA NA NA NA NA NA NA NA NA
3 HPT 62 Male 109 123 109 126 108 115 115 122 131 102
4 HPT 70 Male 130 NA NA NA NA NA NA NA NA NA
5 HPT 72 Male 124 120 146 144 157 123 120 131 120 123
6 DM+HPT 56 Male 140 114 163 117 124 121 128 119 100 127
7 DM+HPT 51 Male 137 135 132 147 130 NA 124 142 144 128
8 DM 73 Male 160 130 NA NA NA NA NA NA NA NA
9 HPT 61 Female 153 218 NA NA NA 218 NA NA NA NA
10 HPT 59 Male 135 130 118 150 NA 127 NA 123 149 132
Code
dataF %>% 
    relocate(sid, .after = last_col()) %>% 
    slice_head(n=10) %>% 
    tbl_style()
dxs_class sbp_0 sbp_2 sbp_4 sbp_6 sbp_8 sbp_10 sbp_12 sbp_14 sbp_16 sbp_18 ageyrs gender sid
HPT 139 124 130 130 104 129 80 129 126 135 75 Male 1
DM+HPT 155 NA NA NA NA NA NA NA NA NA 60 Male 2
HPT 109 123 109 126 108 115 115 122 131 102 62 Male 3
HPT 130 NA NA NA NA NA NA NA NA NA 70 Male 4
HPT 124 120 146 144 157 123 120 131 120 123 72 Male 5
DM+HPT 140 114 163 117 124 121 128 119 100 127 56 Male 6
DM+HPT 137 135 132 147 130 NA 124 142 144 128 51 Male 7
DM 160 130 NA NA NA NA NA NA NA NA 73 Male 8
HPT 153 218 NA NA NA 218 NA NA NA NA 61 Female 9
HPT 135 130 118 150 NA 127 NA 123 149 132 59 Male 10
Code
dataF %>% 
    relocate(where(is.numeric)) %>% 
    slice_head(n=10) %>% 
    tbl_style()
sid sbp_0 sbp_2 sbp_4 sbp_6 sbp_8 sbp_10 sbp_12 sbp_14 sbp_16 sbp_18 ageyrs dxs_class gender
1 139 124 130 130 104 129 80 129 126 135 75 HPT Male
2 155 NA NA NA NA NA NA NA NA NA 60 DM+HPT Male
3 109 123 109 126 108 115 115 122 131 102 62 HPT Male
4 130 NA NA NA NA NA NA NA NA NA 70 HPT Male
5 124 120 146 144 157 123 120 131 120 123 72 HPT Male
6 140 114 163 117 124 121 128 119 100 127 56 DM+HPT Male
7 137 135 132 147 130 NA 124 142 144 128 51 DM+HPT Male
8 160 130 NA NA NA NA NA NA NA NA 73 DM Male
9 153 218 NA NA NA 218 NA NA NA NA 61 HPT Female
10 135 130 118 150 NA 127 NA 123 149 132 59 HPT Male
Code
dataF %>% 
    relocate(contains("sbp")) %>% 
    slice_head(n=10) %>% 
    tbl_style()
sbp_0 sbp_2 sbp_4 sbp_6 sbp_8 sbp_10 sbp_12 sbp_14 sbp_16 sbp_18 sid dxs_class ageyrs gender
139 124 130 130 104 129 80 129 126 135 1 HPT 75 Male
155 NA NA NA NA NA NA NA NA NA 2 DM+HPT 60 Male
109 123 109 126 108 115 115 122 131 102 3 HPT 62 Male
130 NA NA NA NA NA NA NA NA NA 4 HPT 70 Male
124 120 146 144 157 123 120 131 120 123 5 HPT 72 Male
140 114 163 117 124 121 128 119 100 127 6 DM+HPT 56 Male
137 135 132 147 130 NA 124 142 144 128 7 DM+HPT 51 Male
160 130 NA NA NA NA NA NA NA NA 8 DM 73 Male
153 218 NA NA NA 218 NA NA NA NA 9 HPT 61 Female
135 130 118 150 NA 127 NA 123 149 132 10 HPT 59 Male

58 The summarize and across functions

Code
dataF %>% 
    summarize(across(sbp_0:sbp_8, .fns = mean, na.rm=T)) %>% 
    tbl_style()
Warning: There was 1 warning in `summarize()`.
ℹ In argument: `across(sbp_0:sbp_8, .fns = mean, na.rm = T)`.
Caused by warning:
! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
Supply arguments directly to `.fns` through an anonymous function instead.

  # Previously
  across(a:b, mean, na.rm = TRUE)

  # Now
  across(a:b, \(x) mean(x, na.rm = TRUE))
sbp_0 sbp_2 sbp_4 sbp_6 sbp_8
141.2021 139.4923 138.457 137.8216 137.149
Code
dataF %>% 
    na.omit() %>% 
    group_by(dxs_class) %>% 
    summarize(across(where(is.numeric), ~quantile(.x))) %>% 
    ungroup() %>% 
    tbl_style()
Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
dplyr 1.1.0.
ℹ Please use `reframe()` instead.
ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
  always returns an ungrouped data frame and adjust accordingly.
`summarise()` has grouped output by 'dxs_class'. You can override using the
`.groups` argument.
dxs_class sid sbp_0 sbp_2 sbp_4 sbp_6 sbp_8 sbp_10 sbp_12 sbp_14 sbp_16 sbp_18 ageyrs
DM 23.00 81.00 70.0 84.0 94.0 87 74.00 71.0 85.0 79 86 27
DM 980.50 114.50 115.5 113.0 114.5 112 113.00 113.0 112.0 112 110 44
DM 1650.00 124.00 125.0 125.0 126.0 127 124.00 123.0 122.0 120 121 50
DM 2340.00 134.00 139.0 137.5 140.5 138 138.00 136.0 136.5 130 135 58
DM 3284.00 189.00 194.0 199.0 187.0 187 175.00 183.0 196.0 185 197 75
DM+HPT 6.00 98.00 81.0 88.0 70.0 82 90.00 83.0 87.0 87 83 25
DM+HPT 825.75 129.00 127.0 125.0 126.0 127 124.00 125.0 124.0 124 122 53
DM+HPT 1566.00 142.00 141.0 142.0 139.0 140 139.00 139.5 138.0 140 137 60
DM+HPT 2441.75 158.25 157.0 158.0 155.0 157 156.25 153.0 154.0 152 154 66
DM+HPT 3283.00 216.00 231.0 231.0 234.0 277 240.00 232.0 220.0 228 226 89
HPT 1.00 90.00 71.0 78.0 88.0 73 87.00 80.0 81.0 79 86 30
HPT 783.00 126.00 124.0 120.0 120.0 120 120.00 120.0 120.0 120 120 51
HPT 1593.00 138.00 135.0 132.0 131.0 131 132.00 130.0 131.0 130 130 59
HPT 2510.50 151.00 150.0 147.0 146.0 146 144.00 144.0 144.0 145 142 66
HPT 3294.00 219.00 221.0 220.0 209.0 202 217.00 228.0 215.0 201 213 99

59 Distinct observations

Code
dataF %>% 
    summarise(across(where(is.numeric), n_distinct)) %>% 
    tbl_style()
sid sbp_0 sbp_2 sbp_4 sbp_6 sbp_8 sbp_10 sbp_12 sbp_14 sbp_16 sbp_18 ageyrs
3296 138 141 145 138 135 134 134 133 130 127 77

60 Using list of functions, defining column names and removing NA

Code
dataF %>% 
    filter(!is.na(dxs_class)&!is.na(gender)) %>% 
    group_by(dxs_class, gender) %>%
    summarise(
        across(
            starts_with("sbp"), 
            list(
                AVG = mean, 
                SD = sd, 
                N_missing = ~sum(is.na(.x), na.rm=TRUE)
                )
            )
        ) %>%
    ungroup() %>% 
    tbl_style()
`summarise()` has grouped output by 'dxs_class'. You can override using the
`.groups` argument.
dxs_class gender sbp_0_AVG sbp_0_SD sbp_0_N_missing sbp_2_AVG sbp_2_SD sbp_2_N_missing sbp_4_AVG sbp_4_SD sbp_4_N_missing sbp_6_AVG sbp_6_SD sbp_6_N_missing sbp_8_AVG sbp_8_SD sbp_8_N_missing sbp_10_AVG sbp_10_SD sbp_10_N_missing sbp_12_AVG sbp_12_SD sbp_12_N_missing sbp_14_AVG sbp_14_SD sbp_14_N_missing sbp_16_AVG sbp_16_SD sbp_16_N_missing sbp_18_AVG sbp_18_SD sbp_18_N_missing
DM Male NA NA 1 NA NA 57 NA NA 70 NA NA 75 NA NA 74 NA NA 79 NA NA 79 NA NA 85 NA NA 94 NA NA 110
DM Female 128.3596 19.83209 0 NA NA 17 NA NA 23 NA NA 29 NA NA 30 NA NA 29 NA NA 32 NA NA 36 NA NA 50 NA NA 55
DM+HPT Male 144.8366 22.95160 0 NA NA 84 NA NA 113 NA NA 119 NA NA 145 NA NA 153 NA NA 153 NA NA 186 NA NA 204 NA NA 247
DM+HPT Female 146.9735 21.95934 0 NA NA 26 NA NA 36 NA NA 39 NA NA 45 NA NA 43 NA NA 55 NA NA 57 NA NA 68 NA NA 84
HPT Male NA NA 3 NA NA 237 NA NA 309 NA NA 354 NA NA 401 NA NA 414 NA NA 473 NA NA 522 NA NA 639 NA NA 741
HPT Female 143.9884 20.86011 0 NA NA 74 NA NA 116 NA NA 131 NA NA 147 NA NA 145 NA NA 165 NA NA 173 NA NA 216 NA NA 241

61 Summarizing by anonymous functions

Code
dataF %>% 
    filter(!is.na(dxs_class)) %>% 
    group_by(dxs_class) %>%
    summarise(across(.cols = c(sbp_0, sbp_18), 
                     .fns = list("Mean" = ~mean(.x, na.rm=T), 
                                 "UpperCI" = ~(mean(.x, na.rm=T) + 1.96*sd(.x, na.rm=T)/sqrt(n())) ,
                                 "LowerCI" = ~(mean(.x, na.rm=T) - 1.96*sd(.x, na.rm=T)/sqrt(n()))))) %>%
    ungroup() %>% 
    tbl_style()
dxs_class sbp_0_Mean sbp_0_UpperCI sbp_0_LowerCI sbp_18_Mean sbp_18_UpperCI sbp_18_LowerCI
DM 125.8385 127.5809 124.0960 123.5214 125.2375 121.8053
DM+HPT 145.3254 146.7299 143.9209 139.2685 140.7099 137.8272
HPT 142.4490 143.4152 141.4829 134.5420 135.4799 133.6040

62 Generating all combinations of variables: expand

Code
dataF %>% 
    filter(!is.na(dxs_class) & !is.na(gender)) %>% 
    expand(dxs_class, gender) %>% 
    tbl_style()
dxs_class gender
DM Male
DM Female
DM+HPT Male
DM+HPT Female
HPT Male
HPT Female
Code
dataF %>% 
    filter(!is.na(dxs_class) & !is.na(gender)) %>% 
    select(dxs_class, gender) %>% 
    crossing() %>% 
    tbl_style()
dxs_class gender
DM Male
DM Female
DM+HPT Male
DM+HPT Female
HPT Male
HPT Female

63 Adding a running id to data

Code
dataF %>% 
    filter(!is.na(dxs_class) & !is.na(gender)) %>% 
    select(dxs_class, gender) %>%
    mutate(running_id = row_number()) %>% 
    slice_head(n=10) %>% 
    tbl_style()
dxs_class gender running_id
HPT Male 1
DM+HPT Male 2
HPT Male 3
HPT Male 4
HPT Male 5
DM+HPT Male 6
DM+HPT Male 7
DM Male 8
HPT Female 9
HPT Male 10

64 Reshaping data pivot_longer & pivot_wider

Code
dataF_long <-
    dataF %>% 
    select(gender, dxs_class, sbp_0:sbp_18) %>% 
    pivot_longer(cols = starts_with("sbp"),
                 names_to = "measure",
                 values_to = "sbp",
                 values_drop_na = TRUE)
dataF_long %>% 
    slice_head(n=10) %>% 
    tbl_style()
gender dxs_class measure sbp
Male HPT sbp_0 139
Male HPT sbp_2 124
Male HPT sbp_4 130
Male HPT sbp_6 130
Male HPT sbp_8 104
Male HPT sbp_10 129
Male HPT sbp_12 80
Male HPT sbp_14 129
Male HPT sbp_16 126
Male HPT sbp_18 135
Code
dataF %>% 
    select(dxs_class, gender, sbp_0, sbp_2, sbp_4) %>%
    na.omit() %>% 
    group_by(dxs_class) %>% 
    pivot_wider(names_from = gender, 
                values_from = c(sbp_0, sbp_2, sbp_4), 
                values_fn = ~mean(.x, na.rm = TRUE)) %>% 
    ungroup() %>% 
    slice_head(n=10) %>% 
    tbl_style()
dxs_class sbp_0_Male sbp_0_Female sbp_2_Male sbp_2_Female sbp_4_Male sbp_4_Female
HPT 141.1099 143.7839 138.6872 141.8000 136.3622 140.6677
DM+HPT 144.6651 146.3641 143.5340 145.5707 143.3594 146.3587
DM 126.3578 130.2381 126.5642 131.6548 124.9266 134.2857

65 tidyquant Tabulation

Code
dataF %>% 
    select(dxs_class, gender) %>% 
    na.omit() %>% 
    tidyquant::pivot_table(
        .rows = gender, .columns = dxs_class, .values = ~n()
    ) %>% 
    tbl_style()
Registered S3 method overwritten by 'quantmod':
  method            from
  as.zoo.data.frame zoo 
gender DM DM+HPT HPT
Male 308 777 1434
Female 114 226 430
Code
dataF %>% 
    select(dxs_class, gender, sbp_0, sbp_2) %>% 
    na.omit() %>% 
    tidyquant::pivot_table(.rows = gender, 
                           .columns = dxs_class, 
                           .values = ~quantile(sbp_0)) %>% 
    unnest(cols = c("DM","HPT","DM+HPT")) %>% 
    tbl_style()
Warning: Values from `quantile(sbp_0)` are not uniquely identified; output will contain
list-cols.
• Use `values_fn = list` to suppress this warning.
• Use `values_fn = {summary_fun}` to summarise duplicates.
• Use the following dplyr code to identify duplicates.
  {data} |>
  dplyr::summarise(n = dplyr::n(), .by = c(gender, dxs_class)) |>
  dplyr::filter(n > 1L)
gender DM DM+HPT HPT
Male 90.00 95 70
Male 113.25 128 126
Male 124.00 142 140
Male 134.00 158 154
Male 182.00 228 224
Female 81.00 98 98
Female 117.00 131 129
Female 127.00 145 141
Female 138.00 161 159
Female 208.00 220 210

66 rowwise manipulations

Code
dataF %>% 
    rowwise() %>% 
    mutate(sbp_mean = mean(c(sbp_0,sbp_2,sbp_4,sbp_6,sbp_8, sbp_10, sbp_12,
                             sbp_14,sbp_16, sbp_18), na.rm=T),
           sbp_sd = sd(c(sbp_0,sbp_2,sbp_4,sbp_6,sbp_8, sbp_10, sbp_12,sbp_14,
                         sbp_16, sbp_18), na.rm=T),
           n=n()) %>% 
    ungroup() %>% 
    relocate(sid, dxs_class, ageyrs, gender, sbp_mean, sbp_sd, contains("sbp")) %>% 
    slice_head(n=10) %>% 
    tbl_style()
sid dxs_class ageyrs gender sbp_mean sbp_sd sbp_0 sbp_2 sbp_4 sbp_6 sbp_8 sbp_10 sbp_12 sbp_14 sbp_16 sbp_18 n
1 HPT 75 Male 122.6000 17.601768 139 124 130 130 104 129 80 129 126 135 1
2 DM+HPT 60 Male 155.0000 NA 155 NA NA NA NA NA NA NA NA NA 1
3 HPT 62 Male 116.0000 9.249625 109 123 109 126 108 115 115 122 131 102 1
4 HPT 70 Male 130.0000 NA 130 NA NA NA NA NA NA NA NA NA 1
5 HPT 72 Male 130.8000 13.373274 124 120 146 144 157 123 120 131 120 123 1
6 DM+HPT 56 Male 125.3000 16.813024 140 114 163 117 124 121 128 119 100 127 1
7 DM+HPT 51 Male 135.4444 7.747760 137 135 132 147 130 NA 124 142 144 128 1
8 DM 73 Male 145.0000 21.213203 160 130 NA NA NA NA NA NA NA NA 1
9 HPT 61 Female 196.3333 37.527767 153 218 NA NA NA 218 NA NA NA NA 1
10 HPT 59 Male 133.0000 11.464230 135 130 118 150 NA 127 NA 123 149 132 1

67 Glue function via str_glue functions

Code
x <- c("Ama", "is", "a", "Girl")
cat(x)
Ama is a Girl
Code
name <- "Fred"
str_glue('My name is {name}.')
My name is Fred.
Code
stringr_fcn <- "`stringr::str_glue()`"
glue_fcn    <- "`glue::glue()`"

str_glue('{stringr_fcn} is essentially an alias for {glue_fcn}.')
`stringr::str_glue()` is essentially an alias for `glue::glue()`.
Code
name <- "Fred"
age <- 50
anniversary <- as.Date("1991-10-12")
str_glue('My name is {name},',
  ' my age next year is {age + 1},',
  ' my anniversary is {format(anniversary, "%A, %B %d, %Y")}.')
My name is Fred, my age next year is 51, my anniversary is Saturday, October 12, 1991.
Code
str_glue('My name is {name},',
  ' my age next year is {age + 1},',
  ' my anniversary is {format(anniversary, "%A, %B %d, %Y")}.',
  name = "Joe",
  age = 40,
  anniversary = as.Date("2001-10-12"))
My name is Joe, my age next year is 41, my anniversary is Friday, October 12, 2001.
Code
mtcars %>% 
    head() %>% 
    tbl_style()
mpg cyl disp hp drat wt qsec vs am gear carb
21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
Code
head(mtcars) %>% 
    glue::glue_data("{rownames(.)} has {hp} hp")
Mazda RX4 has 110 hp
Mazda RX4 Wag has 110 hp
Datsun 710 has 93 hp
Hornet 4 Drive has 110 hp
Hornet Sportabout has 175 hp
Valiant has 105 hp
Code
head(iris) %>%
  mutate(
      description = str_glue(
          "This {Species} has a petal length of {Petal.Length}"
          )
      ) %>% 
    tbl_style()
Sepal.Length Sepal.Width Petal.Length Petal.Width Species description
5.1 3.5 1.4 0.2 setosa This setosa has a petal length of 1.4
4.9 3.0 1.4 0.2 setosa This setosa has a petal length of 1.4
4.7 3.2 1.3 0.2 setosa This setosa has a petal length of 1.3
4.6 3.1 1.5 0.2 setosa This setosa has a petal length of 1.5
5.0 3.6 1.4 0.2 setosa This setosa has a petal length of 1.4
5.4 3.9 1.7 0.4 setosa This setosa has a petal length of 1.7
Code
str_glue("
    A formatted string
    Can have multiple lines
      with additional indention preserved
    ")
A formatted string
Can have multiple lines
  with additional indention preserved
Code
str_glue("

  leading or trailing newlines can be added explicitly

  ")

leading or trailing newlines can be added explicitly
Code
str_glue("
    A formatted string \\
    can also be on a \\
    single line
    ")
A formatted string can also be on a single line
Code
name <- "Fred"
str_glue("My name is {name}, not {{name}}.")
My name is Fred, not {name}.
Code
one <- "1"
str_glue("The value of $e^{2\\pi i}$ is $<<one>>$.", .open = "<<", .close = ">>")
The value of $e^{2\pi i}$ is $1$.
Code
dataF %>% 
    filter(!is.na(sbp_0)) %>% 
    ggplot(aes(x=sbp_0)) +
    geom_histogram(col = "grey", fill = "wheat") +
    labs(title = str_glue("Histogram with Mean = {mean_sbp0}mmHg and \\
                          Standard Deviation = {sd_sbp0}",
                          mean_sbp0 = mean(dataF$sbp_0, na.rm=T) %>% round(1),
                          sd_sbp0   = sd(dataF$sbp_0,   na.rm=T) %>% round(1)),
         x = "Systolic Blood Pressure (mmHg)",
         y = "Frequency") +
    theme_light(base_size = 12, base_family = "serif")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.