52  gtsummary

53 Reading in the data

Code
df_cint_all <- dget("C:/Dataset/cint_data_clean")
df_mbu <- readxl::read_xlsx("C:/Dataset/mbu.xlsx")

54 Formatting and cleaning the cint data

Code
df_cint <- 
    df_cint_all %>%
    select(sex, bulb_0, bulb_12, bmi, sbp, dbp) %>% 
    mutate(
      bmicat = case_when(bmi < 25 ~ "Normal", bmi > 25 ~ "High") %>% 
        factor(levels = c("Normal", "High")),
      hpt = case_when(
        sbp > 120 | dbp > 80 ~ "Yes", sbp <= 120 & dbp <= 80 ~ "No" ) %>% 
        factor()) %>% 
    labelled::set_variable_labels(
        sex = "Sex",
        bulb_0 ="Bulb diameter at time 0",
        bulb_12 ="Bulb diameter at 12 months",
        bmicat = "Categorized BMI",
        sbp = "Systolic Blood Pressure",
        dbp = "Diastolic Blood Pressure",
        bmi = "Body Mass Index",
        hpt = "Hypertension present")

55 Formatting and cleaning the MBU data

Yes it is here, and maybe here too

Code
df_mbu_clean <-
    df_mbu %>% 
    select(-c(pt_name, starts_with("diag"), address, Field1, ip, disweight)) %>% 
    mutate(wt = ifelse(wt > 8, NA, wt),
           age = ifelse(age > 60, NA, age),
           sex = ifelse(sex == "Missing", NA, sex) %>% factor(),
           place = ifelse((place == "Missing" | place == "Self"), NA, place),
           apgar1 = ifelse(apgar1 >10, NA, apgar1),
           apgar5 = ifelse(apgar2 >10, NA, apgar2), 
           dura_adm = difftime( disDateTime, admDateTime,units = "days"),
           dura_adm = ifelse((dura_adm <=0 | dura_adm > 90), NA, dura_adm),
           died = case_when(outcome == "Discharged" ~ "No",
                            outcome == "Died" ~ "Yes") %>% factor(),
           gestage = ifelse(gestage < 26 |gestage > 48, NA, gestage),
           del = case_when(del == "SVD" ~ "SVD",
                           del == "C/S" ~ "C/S",
                           del == "Forceps" ~ "Vacuum",
                           del == "Vacuum" ~ "Vacuum"))%>% 
    select(-c(apgar2, admDateTime, disDateTime, delDateTime, outcome)) %>% 
    drop_na() %>% 
    labelled::set_variable_labels(
      wt = "Weight (kgs)",
      sex = "Sex",
      age = "Age (days)",
      apgar1 = "APGAR (min 1)",
      apgar5 = "APGAR (min 5)",
      gestage = "Gestational Age",
      place = "Place of birth",
      del = "Mode of delivery",
      dura_adm = "Admission duration",
      died = "mortality")

56 Summarizing data

Code
df_cint %>% 
  summarytools::dfSummary(graph.col = F, labels.col = F)
Data Frame Summary  
df_cint  
Dimensions: 712 x 8  
Duplicates: 2  

-------------------------------------------------------------------------------------
No   Variable    Stats / Values            Freqs (% of Valid)    Valid      Missing  
---- ----------- ------------------------- --------------------- ---------- ---------
1    sex         1. Female                 554 (77.8%)           712        0        
     [factor]    2. Male                   158 (22.2%)           (100.0%)   (0.0%)   

2    bulb_0      Mean (sd) : 0.9 (0.2)     50 distinct values    712        0        
     [numeric]   min < med < max:                                (100.0%)   (0.0%)   
                 0.5 < 0.9 < 2.8                                                     
                 IQR (CV) : 0.3 (0.2)                                                

3    bulb_12     Mean (sd) : 0.9 (0.2)     46 distinct values    363        349      
     [numeric]   min < med < max:                                (51.0%)    (49.0%)  
                 0.4 < 0.8 < 2.6                                                     
                 IQR (CV) : 0.2 (0.3)                                                

4    bmi         Mean (sd) : 26.6 (5.6)    589 distinct values   712        0        
     [numeric]   min < med < max:                                (100.0%)   (0.0%)   
                 13.3 < 26.1 < 45.3                                                  
                 IQR (CV) : 7.6 (0.2)                                                

5    sbp         Mean (sd) : 125.1 (24)    118 distinct values   712        0        
     [numeric]   min < med < max:                                (100.0%)   (0.0%)   
                 66 < 121 < 231                                                      
                 IQR (CV) : 29 (0.2)                                                 

6    dbp         Mean (sd) : 79.6 (14.1)   71 distinct values    712        0        
     [numeric]   min < med < max:                                (100.0%)   (0.0%)   
                 43 < 79 < 135                                                       
                 IQR (CV) : 19 (0.2)                                                 

7    bmicat      1. Normal                 318 (44.7%)           711        1        
     [factor]    2. High                   393 (55.3%)           (99.9%)    (0.1%)   

8    hpt         1. No                     303 (42.6%)           712        0        
     [factor]    2. Yes                    409 (57.4%)           (100.0%)   (0.0%)   
-------------------------------------------------------------------------------------

57 Summary statistics tables

57.1 Table 1

Code
df_cint %>% 
    tbl_summary(
        by = sex,                    # aggregate table by sex
        missing_text = "(Missing)",  # Label missing data as such
        type = sbp ~ "continuous2",  # Report sbp with 2 or more statistics
        statistic = list(sbp ~ c("{mean},({sd})",
                                 "({min},{max})"),
                         bmicat ~ "{n}/{N} ({p}%)",
                         bulb_0 ~ c("{mean} ({sd})")),
        label = bmi ~ "BMI (Kg/m sq.)",   # To modify labels
        digits = dbp ~ 1) %>%         # Force dbp to have one decimal
    add_overall(last = T) %>%           # Add overall column
    modify_spanning_header(all_stat_cols() ~ "**Sex of Participants**") %>% 
    add_p(pvalue_fun = ~ gtsummary::style_pvalue(.x, digits = 3)) %>%    # Adds p-value column
    add_q(method = "fdr",                   #Add p-value for multiple comparison
          pvalue_fun = ~ gtsummary::style_pvalue(.x, digits = 3)) %>% 
    add_stat_label() %>%        # Add specific stats to each variable
    add_n() %>%                     # Add valid observation to each variable
    bold_p() %>%       # Bold significant p-values
    add_significance_stars()   # add significance stars
Characteristic N
Sex of Participants
p-value1 q-value2
Female
N = 554
Male
N = 158
Overall
N = 712
Bulb diameter at time 0, Mean (SD) 712 0.90 (0.22) 0.93 (0.20) 0.91 (0.22) 0.035* 0.082
Bulb diameter at 12 months, Median (Q1, Q3) 363 0.83 (0.70, 0.95) 0.85 (0.68, 0.98) 0.83 (0.70, 0.95) 0.683 0.683
    (Missing)
262 87 349

BMI (Kg/m sq.), Median (Q1, Q3) 712 27.1 (23.2, 31.2) 23.0 (21.0, 25.6) 26.1 (22.4, 30.1) <0.001*** <0.001
Systolic Blood Pressure 712


0.242 0.423
    Mean,(SD)
125,(24) 127,(24) 125,(24)

    (Min,Max)
(66,231) (82,199) (66,231)

Diastolic Blood Pressure, Median (Q1, Q3) 712 79.0 (70.0, 88.0) 78.0 (69.0, 90.0) 79.0 (70.0, 89.0) 0.563 0.657
Categorized BMI, n/N (%) 711


<0.001*** <0.001
    Normal
208/553 (38%) 110/158 (70%) 318/711 (45%)

    High
345/553 (62%) 48/158 (30%) 393/711 (55%)

    (Missing)
1 0 1

Hypertension present, n (%) 712 314 (57%) 95 (60%) 409 (57%) 0.439 0.615
1 p<0.05; p<0.01; p<0.001
2 False discovery rate correction for multiple testing

57.2 Table 2

Code
df_mbu_clean %>% 
    tbl_summary(by = died) %>% 
    add_overall(last = T) %>%
    add_p() %>% 
    modify_spanning_header(all_stat_cols() ~ "**Mortality**") %>% 
    modify_caption(caption = "**Table 2**: MBU data by outcome") %>% 
    bold_labels()
Table 2: MBU data by outcome
Characteristic
Mortality
p-value2
No
N = 4,2571
Yes
N = 7271
Overall
N = 4,9841
Weight (kgs) 2.80 (2.00, 3.30) 1.60 (1.10, 2.70) 2.70 (1.90, 3.30) <0.001
Age (days) 0.00 (0.00, 1.00) 0.00 (0.00, 1.00) 0.00 (0.00, 1.00) 0.008
Sex


0.6
    Female 1,887 (44%) 329 (45%) 2,216 (44%)
    Male 2,370 (56%) 398 (55%) 2,768 (56%)
Place of birth


<0.001
    Clinic/Hospital 601 (14%) 195 (27%) 796 (16%)
    Home 55 (1.3%) 9 (1.2%) 64 (1.3%)
    KATH 3,594 (84%) 520 (72%) 4,114 (83%)
    Maternity Home 7 (0.2%) 3 (0.4%) 10 (0.2%)
APGAR (min 1) 6.00 (4.00, 7.00) 4.00 (2.00, 6.00) 6.00 (4.00, 7.00) <0.001
Mode of delivery


<0.001
    C/S 2,344 (55%) 318 (44%) 2,662 (53%)
    SVD 1,790 (42%) 398 (55%) 2,188 (44%)
    Vacuum 123 (2.9%) 11 (1.5%) 134 (2.7%)
Gestational Age 38.0 (35.0, 40.0) 34.0 (29.0, 39.0) 38.0 (34.0, 40.0) <0.001
APGAR (min 5) 8.00 (7.00, 9.00) 6.00 (5.00, 7.00) 8.00 (7.00, 9.00) <0.001
Admission duration 5 (3, 8) 1 (0, 4) 5 (2, 8) <0.001
1 Median (Q1, Q3); n (%)
2 Wilcoxon rank sum test; Pearson’s Chi-squared test; Fisher’s exact test

57.3 Table 3

This is table 3 of many😉

1

Code
df_cint %>% 
    tbl_summary(
        by = sex,                    # aggregate table by sex
        statistic = all_continuous() ~ "{mean} ({sd})") %>% # Stats touse for all continuous variables
    add_stat_label(label = all_continuous()~ "Mean(StD)") %>% # Label to give statistics
    add_difference() %>%    # Add a difeference, ci and p-value column
    modify_spanning_header(all_stat_cols()~ "**Sex**") %>%  # Add spanning header
    modify_caption("**Table 1. Patient Characteristics**") %>% # Add table title
    italicize_levels() %>%  # Italics for the levels
    bold_labels()   # Bold fo rhe labels
The following errors were returned during `modify_caption()`:
✖ For variable `bmicat` (`sex`) and "estimate" statistic: The package "smd" (>=
  0.6.6) is required.
✖ For variable `hpt` (`sex`) and "estimate", "statistic", "p.value",
  "parameter", "conf.low", and "conf.high" statistics: Expecting `variable` to
  be either <logical> or <numeric/integer> coded as 0 and 1.
Table 1. Patient Characteristics
Characteristic
Sex
Difference1 95% CI1,2 p-value1
Female
N = 554
Male
N = 158
Bulb diameter at time 0, Mean(StD) 0.90 (0.22) 0.93 (0.20) -0.03 -0.07, 0.01 0.095
Bulb diameter at 12 months, Mean(StD) 0.85 (0.23) 0.87 (0.29) -0.02 -0.10, 0.05 0.5
    Unknown 262 87


Body Mass Index, Mean(StD) 27.4 (5.7) 23.7 (4.0) 3.7 3.0, 4.5 <0.001
Systolic Blood Pressure, Mean(StD) 125 (24) 127 (24) -2.5 -6.8, 1.8 0.3
Diastolic Blood Pressure, Mean(StD) 80 (14) 79 (15) 0.35 -2.3, 3.0 0.8
Categorized BMI, n (%)




    Normal 208 (38%) 110 (70%)


    High 345 (62%) 48 (30%)


    Unknown 1 0


Hypertension present, n (%) 314 (57%) 95 (60%)


1 Welch Two Sample t-test
2 CI = Confidence Interval

57.4 Stratified table

Code
df_cint %>% 
    drop_na(bmicat) %>% 
    tbl_strata(
        strata = bmicat, ~.x %>%      # Add a strata to the table
            tbl_summary(
                by = sex
            ) %>% 
            add_p()      # P value for each strata
    ) 
Characteristic
Normal
High
Female
N = 2081
Male
N = 1101
p-value2 Female
N = 3451
Male
N = 481
p-value2
Bulb diameter at time 0 0.85 (0.75, 0.99) 0.88 (0.78, 1.03) 0.074 0.85 (0.75, 1.03) 0.91 (0.80, 1.05) 0.13
Bulb diameter at 12 months 0.80 (0.70, 0.90) 0.84 (0.68, 0.98) 0.2 0.85 (0.70, 0.95) 0.88 (0.68, 0.95) 0.9
    Unknown 97 60
164 27
Body Mass Index 22.22 (20.09, 23.51) 21.85 (20.31, 23.11) 0.4 29.8 (27.5, 33.3) 27.0 (25.9, 30.9) <0.001
Systolic Blood Pressure 118 (100, 133) 120 (107, 132) 0.3 123 (111, 140) 136 (122, 158) <0.001
Diastolic Blood Pressure 76 (67, 84) 74 (66, 84) 0.7 80 (71, 90) 85 (77, 95) 0.035
Hypertension present 97 (47%) 57 (52%) 0.4 216 (63%) 38 (79%) 0.025
1 Median (Q1, Q3); n (%)
2 Wilcoxon rank sum test; Pearson’s Chi-squared test

57.5 Specifying tables tests

Code
df_cint %>% 
    tbl_summary(by = sex,
                statistic = list(all_continuous() ~ "{mean} ({sd})")
    ) %>% 
    add_p(
        test = list(
            all_continuous()~ "t.test",     # Specify T test for all continuous variables
            all_categorical() ~ "fisher.test"     # Specify fisher's text for all categorical variables
        )
    ) %>% 
    separate_p_footnotes()     # Specific p-value labelled
Characteristic Female
N = 5541
Male
N = 1581
p-value
Bulb diameter at time 0 0.90 (0.22) 0.93 (0.20) 0.0952
Bulb diameter at 12 months 0.85 (0.23) 0.87 (0.29) 0.52
    Unknown 262 87
Body Mass Index 27.4 (5.7) 23.7 (4.0) <0.0012
Systolic Blood Pressure 125 (24) 127 (24) 0.32
Diastolic Blood Pressure 80 (14) 79 (15) 0.82
Categorized BMI

<0.0013
    Normal 208 (38%) 110 (70%)
    High 345 (62%) 48 (30%)
    Unknown 1 0
Hypertension present 314 (57%) 95 (60%) 0.53
1 Mean (SD); n (%)
2 Welch Two Sample t-test
3 Fisher’s exact test

57.6 Customised tables

Code
df_cint %>% 
    select(bmi, hpt, bulb_0, sbp, bmicat) %>% 
    tbl_summary(
        by = hpt,
        statistic = list(
            bmi ~ "{mean} ({sd})",
            all_categorical() ~ "{n} ({p})",
            sbp ~ "{median} ({p25}, {p75})",
            bulb_0 ~ "{mean} ({sd})"),
         missing_text = "(Missing",
        digits = list(all_categorical() ~ c(0, 1))
    ) %>% 
    add_stat_label(label = list(
        bmi = "Mean(SD)",
        bulb_0 = "Mean(SD)",
        all_categorical() ~ "n(%)",
        sbp = "Median(IQR)"
    )) %>% 
    add_p(
        test = list(
            bmi ~ "t.test",
            bulb_0 ~ "t.test",
            all_categorical() ~ "fisher.test",
            sbp ~ "wilcox.test"
        )
    ) %>% 
    separate_p_footnotes()
Characteristic No
N = 303
Yes
N = 409
p-value
Body Mass Index, Mean(SD) 25.3 (5.4) 27.5 (5.5) <0.0011
Bulb diameter at time 0, Mean(SD) 0.88 (0.22) 0.92 (0.22) 0.0131
Systolic Blood Pressure, Median(IQR) 108 (98, 113) 134 (125, 150) <0.0012
Categorized BMI, n(%)

<0.0013
    Normal 164 (54.1) 154 (37.7)
    High 139 (45.9) 254 (62.3)
    (Missing 0 1
1 Welch Two Sample t-test
2 Wilcoxon rank sum test
3 Fisher’s exact test

57.7 Creating data for the paired table

Code
df_paired_cint <- 
    df_cint_all %>% 
    select(sid, cart, cca_0, cca_12, ica_0, ica_12) %>%
    mutate(
        ica_12 = case_when(ica_12 > median(ica_12, na.rm=T) ~ "High",
                               ica_12 <= median(ica_12, na.rm=T) ~ "Low") %>% 
            factor(),
        ica_0 = case_when(ica_0 > median(ica_0, na.rm=T) ~ "High",
                               ica_0 <= median(ica_0, na.rm=T) ~ "Low") %>% 
            factor()
    ) %>% 
    select(sid, cart, cca_0, cca_12, ica_0, ica_12)

df_A <-
    df_paired_cint %>% 
    pivot_longer(cols =  c(cca_0, cca_12), 
                 names_to = c("cca", "period"), 
                 names_sep = "_", 
                 values_to  = "cca_measure") %>% 
    select(sid, cart, period, cca_measure)

df_paired_long <-
    df_paired_cint %>%
    pivot_longer(cols =  c(ica_0, ica_12), 
                 names_to = c("ica", "period"), 
                 names_sep = "_", 
                 values_to  = "ica_measure") %>% 
    select(sid, cart, period, ica_measure) %>% 
    full_join(df_A, by = c("sid", "period", "cart"))

57.8 Paired table

Code
df_paired_long %>% 
    mutate(period = case_when(period == "0" ~ "Month 0",
                              period == "12" ~ "Month 12")) %>% 
    filter(complete.cases(.)) %>% 
    group_by(sid) %>% 
    filter(n()==2) %>% 
    ungroup() %>%
    tbl_strata(strata = cart, ~.x %>%
        tbl_summary(by = period, 
                    include = -sid,
                    statistic = list(cca_measure ~ "{mean} ({sd})",
                                     ica_measure ~ "{n} ({p})"),
                    label = list(ica_measure = "ICA",
                                 cca_measure = "CCA(mm)"),
                    digits = list(all_categorical() ~ c(0, 1)))%>% 
            add_p(test = list(ica_measure ~ "mcnemar.test",
                              cca_measure ~ "paired.t.test"),
                              group = sid,
                  pvalue_fun = ~ gtsummary::style_pvalue(.x, digits = 3)) %>% 
            separate_p_footnotes() %>% 
            add_stat_label(label = list(ica_measure = "n(%)",
                                        cca_measure = "mean(SD)"))
    ) %>% 
    modify_caption("**Table 2: Comparative month 0 and 12 measures**")
Table 2: Comparative month 0 and 12 measures
Characteristic
Naive
cART
CONTROL
Month 0
N = 86
Month 12
N = 86
p-value Month 0
N = 237
Month 12
N = 237
p-value Month 0
N = 40
Month 12
N = 40
p-value
ICA, n(%)

0.7281

0.7591

0.0021
    High 32 (37.2) 29 (33.7)
154 (65.0) 150 (63.3)
14 (35.0) 1 (2.5)
    Low 54 (62.8) 57 (66.3)
83 (35.0) 87 (36.7)
26 (65.0) 39 (97.5)
CCA(mm), mean(SD) 0.83 (0.13) 0.77 (0.14) 0.0022 0.93 (0.17) 0.88 (0.16) <0.0012 0.84 (0.13) 0.70 (0.11) <0.0012
1 McNemar’s Chi-squared test with continuity correction
2 Paired t-test

58 Cross table

Code
df_cint %>% 
    gtsummary::tbl_cross(row = bmicat, 
                         col = sex, 
                         percent = "row", 
                         digits = c(0,1),
                         missing = "ifany",
                         missing_text = "[Missing]") %>% 
    add_p() %>% 
    modify_caption(caption = "**Table X: tbl_cross Example**")
Table X: tbl_cross Example
Sex
Total p-value1
Female Male
Categorized BMI


<0.001
    Normal 208 (65.4%) 110 (34.6%) 318 (100.0%)
    High 345 (87.8%) 48 (12.2%) 393 (100.0%)
    [Missing] 1 (100.0%) 0 (0.0%) 1 (100.0%)
Total 554 (77.8%) 158 (22.2%) 712 (100.0%)
1 Fisher’s exact test

59 Regression models

Univariate regression

59.0.1 Univariate linear regression

Code
tbl1 <- 
    df_cint %>% 
    tbl_uvregression(y = sbp, method = lm) %>% 
    modify_header(update = list(estimate ~ "**Estimate**",
                                label ~ "**Variable**")) %>% 
    modify_caption(caption = "**Table XI:** Univariate linear regression")
Warning: The `update` argument of `modify_header()` is deprecated as of gtsummary 2.0.0.
ℹ Use `modify_header(...)` input instead. Dynamic dots allow for syntax like
  `modify_header(!!!list(...))`.
ℹ The deprecated feature was likely used in the gtsummary package.
  Please report the issue at <https://github.com/ddsjoberg/gtsummary/issues>.
Code
tbl1 %>% show_header_names() # Show header names of table
Column Name   Header            
label         "**Variable**"    
stat_n        "**N**"           
estimate      "**Estimate**"    
conf.low      "**95% CI**"      
p.value       "**p-value**"     
* These values may be dynamically placed into headers (and other locations).
ℹ Review the `modify_header()` (`?gtsummary::modify_header()`) help for
  examples.
Code
tbl1
Table XI: Univariate linear regression
Variable N Estimate 95% CI1 p-value
Sex 712


    Female

    Male
2.5 -1.7, 6.7 0.2
Bulb diameter at time 0 712 22 14, 30 <0.001
Bulb diameter at 12 months 363 18 7.7, 27 <0.001
Body Mass Index 712 0.85 0.53, 1.2 <0.001
Diastolic Blood Pressure 712 1.4 1.3, 1.4 <0.001
Categorized BMI 711


    Normal

    High
9.5 6.0, 13 <0.001
Hypertension present 712


    No

    Yes
35 32, 37 <0.001
1 CI = Confidence Interval

59.0.2 Univariate logistic regression

Code
df_mbu_clean %>% 
    select(-dura_adm) %>% 
    tbl_uvregression(
        method = glm,
        y = died,
        method.args = family(binomial),
        exponentiate = TRUE,
        pvalue_fun = function(x) style_pvalue(x, digits = 3)
    ) %>% 
    modify_header(update = list(estimate ~ "**Estimate**",
                                label ~ "**Variable**"))
Variable N Estimate1 95% CI1 p-value
Weight (kgs) 4,984 0.34 0.31, 0.38 <0.001
Age (days) 4,984 0.99 0.94, 1.04 0.808
Sex 4,984


    Female

    Male
0.96 0.82, 1.13 0.642
Place of birth 4,984


    Clinic/Hospital

    Home
0.50 0.23, 0.99 0.064
    KATH
0.45 0.37, 0.54 <0.001
    Maternity Home
1.32 0.28, 4.80 0.689
APGAR (min 1) 4,984 0.67 0.64, 0.70 <0.001
Mode of delivery 4,984


    C/S

    SVD
1.64 1.40, 1.92 <0.001
    Vacuum
0.66 0.33, 1.18 0.193
Gestational Age 4,984 0.82 0.81, 0.84 <0.001
APGAR (min 5) 4,984 0.56 0.53, 0.58 <0.001
1 OR = Odds Ratio, CI = Confidence Interval

59.0.3 Univariate survival analysis

Code
library(survival)

df_mbu_clean %>% 
    tbl_uvregression(
        method = coxph,
        y = Surv(dura_adm, died=="Yes"),
        exponentiate = TRUE,
        pvalue_fun = function(x) style_pvalue(x, digits = 3)
    ) %>% 
    modify_caption(caption = "**Table 10**: Univariate Cox's Proportional Model")
Table 10: Univariate Cox’s Proportional Model
Characteristic N HR1 95% CI1 p-value
Weight (kgs) 4,984 0.45 0.41, 0.50 <0.001
Age (days) 4,984 1.00 0.96, 1.04 0.957
Sex 4,984


    Female

    Male
1.03 0.89, 1.19 0.682
Place of birth 4,984


    Clinic/Hospital

    Home
0.57 0.29, 1.11 0.098
    KATH
0.54 0.46, 0.64 <0.001
    Maternity Home
1.09 0.35, 3.42 0.877
APGAR (min 1) 4,984 0.72 0.69, 0.74 <0.001
Mode of delivery 4,984


    C/S

    SVD
1.48 1.28, 1.71 <0.001
    Vacuum
0.75 0.41, 1.36 0.342
Gestational Age 4,984 0.87 0.86, 0.88 <0.001
APGAR (min 5) 4,984 0.65 0.63, 0.67 <0.001
1 HR = Hazard Ratio, CI = Confidence Interval

59.1 Multivariate regression

59.1.1 Multiple liner regreession

Code
df_cint %>% 
    lm(sbp ~ ., data = .) %>% 
    tbl_regression(pvalue_fun = function(x) style_pvalue(x, digits = 3)) %>% 
    modify_header(update = list(estimate ~ "**Estimate**",
                                label ~ "**Variable**")) %>% 
    modify_caption(caption = "**Table XI:** Multivariate linear regression") %>%
    bold_labels() %>% 
    as_gt() %>% 
    gt::tab_options(
        table.font.size = 14,
        table.font.names = "Times New Roman",
        data_row.padding = 1
    ) 
Table XI: Multivariate linear regression
Variable Estimate 95% CI1 p-value
Sex


    Female
    Male -0.25 -3.6, 3.1 0.883
Bulb diameter at time 0 4.6 -1.2, 10 0.122
Bulb diameter at 12 months 7.4 1.7, 13 0.011
Body Mass Index 0.05 -0.32, 0.43 0.781
Diastolic Blood Pressure 0.92 0.79, 1.0 <0.001
Categorized BMI


    Normal
    High 0.19 -4.0, 4.3 0.930
Hypertension present


    No
    Yes 17 13, 20 <0.001
1 CI = Confidence Interval

59.1.2 Multiple variable logistic regression

Code
df_mbu_clean %>% 
    select(-dura_adm) %>% 
    glm(died ~ ., data = ., family = binomial) %>% 
    tbl_regression(exponentiate = T) %>% 
    add_n(location = "level") %>% 
    add_nevent(location = "level") %>% 
    add_global_p() %>% 
    add_q() %>% 
    add_significance_stars(
        hide_p = FALSE,
        hide_ci = FALSE,
        hide_se = TRUE) %>% 
    add_vif() %>% 
    modify_header(label = "**Predictor**") %>% 
    modify_caption(caption = "**Table 5: Highly customised logistic regression**") %>% 
    modify_footnote(ci = "CI = My 95%CI", abbreviation = TRUE) %>% 
    sort_p() %>% 
    bold_p(t=0.1, q=TRUE) %>% 
    bold_labels() %>% 
    italicize_levels()
Warning: Use of the "ci" column was deprecated in gtsummary v2.0, and the column will
eventually be removed from the tables.
! Review `?deprecated_ci_column()` for details on how to update your code.
ℹ The "ci" column has been replaced by the merged "conf.low" and "conf.high"
  columns (merged with `modify_column_merge()`).
ℹ In most cases, a simple update from `ci = 'a new label'` to `conf.low = 'a
  new label'` is sufficient.
Table 5: Highly customised logistic regression
Predictor N Event N OR1,2 95% CI2 p-value q-value3 GVIF2 Adjusted GVIF4,2
APGAR (min 5) 4,984 727 0.57*** 0.51, 0.63 <0.001 <0.001 3.8 2.0
Weight (kgs) 4,984 727 0.51*** 0.43, 0.62 <0.001 <0.001 2.9 1.7
Place of birth


***

<0.001 <0.001 1.1 1.0
    Clinic/Hospital 796 195



    Home 64 9 0.66 0.27, 1.48



    KATH 4,114 520 0.56 0.45, 0.70



    Maternity Home 10 3 1.68 0.30, 8.14



Gestational Age 4,984 727 0.94*** 0.91, 0.97 <0.001 0.001 2.9 1.7
Age (days) 4,984 727 1.05* 1.00, 1.09 0.032 0.052 1.0 1.0
Mode of delivery


*

0.045 0.060 1.2 1.0
    C/S 2,662 318



    SVD 2,188 398 0.78 0.64, 0.95



    Vacuum 134 11 1.00 0.48, 1.90



Sex



0.095 0.11 1.0 1.0
    Female 2,216 329



    Male 2,768 398 1.17 0.97, 1.41



APGAR (min 1) 4,984 727 1.00 0.92, 1.10 >0.9 >0.9 3.9 2.0
1 p<0.05; p<0.01; p<0.001
2 OR = Odds Ratio, CI = Confidence Interval, GVIF = Generalized Variance Inflation Factor
3 False discovery rate correction for multiple testing
4 GVIF2

59.1.3 Cox proportional hazard regression

Code
df_mbu_clean %>% 
    coxph(Surv(dura_adm, died == "Yes")~., data = .) %>% 
    tbl_regression(exponentiate = T)
Characteristic HR1 95% CI1 p-value
Weight (kgs) 0.63 0.54, 0.74 <0.001
Age (days) 1.04 1.01, 1.07 0.021
Sex


    Female
    Male 1.11 0.95, 1.28 0.2
Place of birth


    Clinic/Hospital
    Home 0.55 0.28, 1.08 0.083
    KATH 0.66 0.55, 0.78 <0.001
    Maternity Home 1.08 0.34, 3.39 0.9
APGAR (min 1) 0.96 0.90, 1.03 0.3
Mode of delivery


    C/S
    SVD 0.77 0.66, 0.90 0.001
    Vacuum 0.96 0.52, 1.77 >0.9
Gestational Age 0.97 0.94, 1.00 0.031
APGAR (min 5) 0.68 0.64, 0.74 <0.001
1 HR = Hazard Ratio, CI = Confidence Interval

60 Combining tables

60.1 Merging

60.2 Stacking

60.3 Multiple comparison table

Code
library(titanic)
library(plotrix) #has a std.error function

# create smaller version of the dataset
df <- 
  titanic_train %>%
  select(Sex, Embarked, Age, Fare) %>%
  filter(Embarked != "") # deleting empty Embarked status

# first, write a little function to get the 2-way ANOVA p-values in a table
# function to get 2-way ANOVA p-values in tibble
twoway_p <- function(variable) {
  paste(variable, "~ Sex * Embarked") %>%
    as.formula() %>%
    aov(data = df) %>% 
    broom::tidy() %>%
    select(term, p.value) %>%
    filter(complete.cases(.)) %>%
    pivot_wider(names_from = term, values_from = p.value) %>%
    mutate(
      variable = .env$variable,
      row_type = "label"
    )
}

# add all results to a single table (will be merged with gtsummary table in next step)
twoway_results <-
  bind_rows(
    twoway_p("Age"),
    twoway_p("Fare")
  )
twoway_results
# A tibble: 2 × 5
           Sex Embarked `Sex:Embarked` variable row_type
         <dbl>    <dbl>          <dbl> <chr>    <chr>   
1 0.00823      3.97e- 1         0.611  Age      label   
2 0.0000000191 4.27e-16         0.0958 Fare     label   
Code
tbl <-
  # first build a stratified `tbl_summary()` table to get summary stats by two variables
  df %>%
  tbl_strata(
    strata =  Sex,
    .tbl_fun =
      ~.x %>%
      tbl_summary(
        by = Embarked,
        missing = "no",
        statistic = all_continuous() ~ "{mean} ({std.error})",
        digits = everything() ~ 1
      ) %>%
      modify_header(all_stat_cols() ~ "**{level}**")
  ) %>%
  # merge the 2way ANOVA results into tbl_summary table
  modify_table_body(
    ~.x %>%
      left_join(
        twoway_results,
        by = c("variable", "row_type")
      )
  ) %>%
  # by default the new columns are hidden, add a header to unhide them
  modify_header(list(
    Sex ~ "**Sex**", 
    Embarked ~ "**Embarked**", 
    `Sex:Embarked` ~ "**Sex * Embarked**"
  )) %>%
  # adding spanning header to analysis results
  modify_spanning_header(c(Sex, Embarked, `Sex:Embarked`) ~ "**Two-way ANOVA p-values**") %>%
  # format the p-values with a pvalue formatting function
  modify_fmt_fun(c(Sex, Embarked, `Sex:Embarked`) ~ style_pvalue) %>%
  # update the footnote to be nicer looking
  modify_footnote(all_stat_cols() ~ "Mean (SE)")

61 hh

Code
# standardized difference
tbl1 <- 
  trial %>% 
  select(trt, age, marker) %>%
  tbl_summary(by = trt, missing = "no",
              statistic = all_continuous() ~ "{mean} ({sd})") %>%
  add_difference(all_continuous() ~ "cohens_d")

# table with p-value and corrected p-values
tbl2 <- 
  trial %>% 
  select(trt, age, marker) %>%
  tbl_summary(by = trt, missing = "no") %>%
  add_p(all_continuous() ~ "t.test") %>%
  add_q(method = "bonferroni") %>%
  modify_column_hide(all_stat_cols())

# merge tbls together
tbl_final <- 
  tbl_merge(list(tbl1, tbl2)) %>%
  # remove spanning headers
  modify_spanning_header(everything() ~ NA)

tbl1
Characteristic Drug A
N = 981
Drug B
N = 1021
Difference2 95% CI2,3
Age 47 (15) 47 (14) -0.03 -0.32, 0.25
Marker Level (ng/mL) 1.02 (0.89) 0.82 (0.83) 0.23 -0.06, 0.51
1 Mean (SD)
2 Cohen’s D
3 CI = Confidence Interval
Code
tbl2
Characteristic p-value1 q-value2
Age 0.8 >0.9
Marker Level (ng/mL) 0.12 0.2
1 Welch Two Sample t-test
2 Bonferroni correction for multiple testing
Code
tbl_final
Characteristic Drug A
N = 981
Drug B
N = 1021
Difference2 95% CI2,3 p-value4 q-value5
Age 47 (15) 47 (14) -0.03 -0.32, 0.25 0.8 >0.9
Marker Level (ng/mL) 1.02 (0.89) 0.82 (0.83) 0.23 -0.06, 0.51 0.12 0.2
1 Mean (SD)
2 Cohen’s D
3 CI = Confidence Interval
4 Welch Two Sample t-test
5 Bonferroni correction for multiple testing

  1. Need to insert this k3k3↩︎

  2. 1/(2*df)↩︎