52 `gtsummary`

53 Reading in the data

Code

df_cint_all <- dget("C:/Dataset/cint_data_clean")
df_mbu <- readxl::read_xlsx("C:/Dataset/mbu.xlsx")

54 Formatting and cleaning the cint data

Code

df_cint <- 
    df_cint_all %>%
    select(sex, bulb_0, bulb_12, bmi, sbp, dbp) %>% 
    mutate(
      bmicat = case_when(bmi < 25 ~ "Normal", bmi > 25 ~ "High") %>% 
        factor(levels = c("Normal", "High")),
      hpt = case_when(
        sbp > 120 | dbp > 80 ~ "Yes", sbp <= 120 & dbp <= 80 ~ "No" ) %>% 
        factor()) %>% 
    labelled::set_variable_labels(
        sex = "Sex",
        bulb_0 ="Bulb diameter at time 0",
        bulb_12 ="Bulb diameter at 12 months",
        bmicat = "Categorized BMI",
        sbp = "Systolic Blood Pressure",
        dbp = "Diastolic Blood Pressure",
        bmi = "Body Mass Index",
        hpt = "Hypertension present")

55 Formatting and cleaning the MBU data

Yes it is here, and maybe here too

Code

df_mbu_clean <-
    df_mbu %>% 
    select(-c(pt_name, starts_with("diag"), address, Field1, ip, disweight)) %>% 
    mutate(wt = ifelse(wt > 8, NA, wt),
           age = ifelse(age > 60, NA, age),
           sex = ifelse(sex == "Missing", NA, sex) %>% factor(),
           place = ifelse((place == "Missing" | place == "Self"), NA, place),
           apgar1 = ifelse(apgar1 >10, NA, apgar1),
           apgar5 = ifelse(apgar2 >10, NA, apgar2), 
           dura_adm = difftime( disDateTime, admDateTime,units = "days"),
           dura_adm = ifelse((dura_adm <=0 | dura_adm > 90), NA, dura_adm),
           died = case_when(outcome == "Discharged" ~ "No",
                            outcome == "Died" ~ "Yes") %>% factor(),
           gestage = ifelse(gestage < 26 |gestage > 48, NA, gestage),
           del = case_when(del == "SVD" ~ "SVD",
                           del == "C/S" ~ "C/S",
                           del == "Forceps" ~ "Vacuum",
                           del == "Vacuum" ~ "Vacuum"))%>% 
    select(-c(apgar2, admDateTime, disDateTime, delDateTime, outcome)) %>% 
    drop_na() %>% 
    labelled::set_variable_labels(
      wt = "Weight (kgs)",
      sex = "Sex",
      age = "Age (days)",
      apgar1 = "APGAR (min 1)",
      apgar5 = "APGAR (min 5)",
      gestage = "Gestational Age",
      place = "Place of birth",
      del = "Mode of delivery",
      dura_adm = "Admission duration",
      died = "mortality")

56 Summarizing data

Code

df_cint %>% 
  summarytools::dfSummary(graph.col = F, labels.col = F)

Data Frame Summary  
df_cint  
Dimensions: 712 x 8  
Duplicates: 2  

-------------------------------------------------------------------------------------
No   Variable    Stats / Values            Freqs (% of Valid)    Valid      Missing  
---- ----------- ------------------------- --------------------- ---------- ---------
1    sex         1. Female                 554 (77.8%)           712        0        
     [factor]    2. Male                   158 (22.2%)           (100.0%)   (0.0%)   

2    bulb_0      Mean (sd) : 0.9 (0.2)     50 distinct values    712        0        
     [numeric]   min < med < max:                                (100.0%)   (0.0%)   
                 0.5 < 0.9 < 2.8                                                     
                 IQR (CV) : 0.3 (0.2)                                                

3    bulb_12     Mean (sd) : 0.9 (0.2)     46 distinct values    363        349      
     [numeric]   min < med < max:                                (51.0%)    (49.0%)  
                 0.4 < 0.8 < 2.6                                                     
                 IQR (CV) : 0.2 (0.3)                                                

4    bmi         Mean (sd) : 26.6 (5.6)    589 distinct values   712        0        
     [numeric]   min < med < max:                                (100.0%)   (0.0%)   
                 13.3 < 26.1 < 45.3                                                  
                 IQR (CV) : 7.6 (0.2)                                                

5    sbp         Mean (sd) : 125.1 (24)    118 distinct values   712        0        
     [numeric]   min < med < max:                                (100.0%)   (0.0%)   
                 66 < 121 < 231                                                      
                 IQR (CV) : 29 (0.2)                                                 

6    dbp         Mean (sd) : 79.6 (14.1)   71 distinct values    712        0        
     [numeric]   min < med < max:                                (100.0%)   (0.0%)   
                 43 < 79 < 135                                                       
                 IQR (CV) : 19 (0.2)                                                 

7    bmicat      1. Normal                 318 (44.7%)           711        1        
     [factor]    2. High                   393 (55.3%)           (99.9%)    (0.1%)   

8    hpt         1. No                     303 (42.6%)           712        0        
     [factor]    2. Yes                    409 (57.4%)           (100.0%)   (0.0%)   
-------------------------------------------------------------------------------------

57 Summary statistics tables

57.1 Table 1

Code

df_cint %>% 
    tbl_summary(
        by = sex,                    # aggregate table by sex
        missing_text = "(Missing)",  # Label missing data as such
        type = sbp ~ "continuous2",  # Report sbp with 2 or more statistics
        statistic = list(sbp ~ c("{mean},({sd})",
                                 "({min},{max})"),
                         bmicat ~ "{n}/{N} ({p}%)",
                         bulb_0 ~ c("{mean} ({sd})")),
        label = bmi ~ "BMI (Kg/m sq.)",   # To modify labels
        digits = dbp ~ 1) %>%         # Force dbp to have one decimal
    add_overall(last = T) %>%           # Add overall column
    modify_spanning_header(all_stat_cols() ~ "**Sex of Participants**") %>% 
    add_p(pvalue_fun = ~ gtsummary::style_pvalue(.x, digits = 3)) %>%    # Adds p-value column
    add_q(method = "fdr",                   #Add p-value for multiple comparison
          pvalue_fun = ~ gtsummary::style_pvalue(.x, digits = 3)) %>% 
    add_stat_label() %>%        # Add specific stats to each variable
    add_n() %>%                     # Add valid observation to each variable
    bold_p() %>%       # Bold significant p-values
    add_significance_stars()   # add significance stars

Characteristic	N	Sex of Participants			p-value¹	q-value²
Characteristic	N	Female N = 554	Male N = 158	Overall N = 712	p-value¹	q-value²
Bulb diameter at time 0, Mean (SD)	712	0.90 (0.22)	0.93 (0.20)	0.91 (0.22)	0.035*	0.082
Bulb diameter at 12 months, Median (Q1, Q3)	363	0.83 (0.70, 0.95)	0.85 (0.68, 0.98)	0.83 (0.70, 0.95)	0.683	0.683
(Missing)		262	87	349
BMI (Kg/m sq.), Median (Q1, Q3)	712	27.1 (23.2, 31.2)	23.0 (21.0, 25.6)	26.1 (22.4, 30.1)	<0.001***	<0.001
Systolic Blood Pressure	712				0.242	0.423
Mean,(SD)		125,(24)	127,(24)	125,(24)
(Min,Max)		(66,231)	(82,199)	(66,231)
Diastolic Blood Pressure, Median (Q1, Q3)	712	79.0 (70.0, 88.0)	78.0 (69.0, 90.0)	79.0 (70.0, 89.0)	0.563	0.657
Categorized BMI, n/N (%)	711				<0.001***	<0.001
Normal		208/553 (38%)	110/158 (70%)	318/711 (45%)
High		345/553 (62%)	48/158 (30%)	393/711 (55%)
(Missing)		1	0	1
Hypertension present, n (%)	712	314 (57%)	95 (60%)	409 (57%)	0.439	0.615
¹ p<0.05; p<0.01; p<0.001
² False discovery rate correction for multiple testing

57.2 Table 2

Code

df_mbu_clean %>% 
    tbl_summary(by = died) %>% 
    add_overall(last = T) %>%
    add_p() %>% 
    modify_spanning_header(all_stat_cols() ~ "**Mortality**") %>% 
    modify_caption(caption = "**Table 2**: MBU data by outcome") %>% 
    bold_labels()

**Table 2**: MBU data by outcome
Characteristic	Mortality			p-value²
Characteristic	No N = 4,257¹	Yes N = 727¹	Overall N = 4,984¹	p-value²
Weight (kgs)	2.80 (2.00, 3.30)	1.60 (1.10, 2.70)	2.70 (1.90, 3.30)	<0.001
Age (days)	0.00 (0.00, 1.00)	0.00 (0.00, 1.00)	0.00 (0.00, 1.00)	0.008
Sex				0.6
Female	1,887 (44%)	329 (45%)	2,216 (44%)
Male	2,370 (56%)	398 (55%)	2,768 (56%)
Place of birth				<0.001
Clinic/Hospital	601 (14%)	195 (27%)	796 (16%)
Home	55 (1.3%)	9 (1.2%)	64 (1.3%)
KATH	3,594 (84%)	520 (72%)	4,114 (83%)
Maternity Home	7 (0.2%)	3 (0.4%)	10 (0.2%)
APGAR (min 1)	6.00 (4.00, 7.00)	4.00 (2.00, 6.00)	6.00 (4.00, 7.00)	<0.001
Mode of delivery				<0.001
C/S	2,344 (55%)	318 (44%)	2,662 (53%)
SVD	1,790 (42%)	398 (55%)	2,188 (44%)
Vacuum	123 (2.9%)	11 (1.5%)	134 (2.7%)
Gestational Age	38.0 (35.0, 40.0)	34.0 (29.0, 39.0)	38.0 (34.0, 40.0)	<0.001
APGAR (min 5)	8.00 (7.00, 9.00)	6.00 (5.00, 7.00)	8.00 (7.00, 9.00)	<0.001
Admission duration	5 (3, 8)	1 (0, 4)	5 (2, 8)	<0.001
¹ Median (Q1, Q3); n (%)
² Wilcoxon rank sum test; Pearson’s Chi-squared test; Fisher’s exact test

57.3 Table 3

This is table 3 of many😉

Code

df_cint %>% 
    tbl_summary(
        by = sex,                    # aggregate table by sex
        statistic = all_continuous() ~ "{mean} ({sd})") %>% # Stats touse for all continuous variables
    add_stat_label(label = all_continuous()~ "Mean(StD)") %>% # Label to give statistics
    add_difference() %>%    # Add a difeference, ci and p-value column
    modify_spanning_header(all_stat_cols()~ "**Sex**") %>%  # Add spanning header
    modify_caption("**Table 1. Patient Characteristics**") %>% # Add table title
    italicize_levels() %>%  # Italics for the levels
    bold_labels()   # Bold fo rhe labels

The following errors were returned during `modify_caption()`:
✖ For variable `bmicat` (`sex`) and "estimate" statistic: The package "smd" (>=
  0.6.6) is required.
✖ For variable `hpt` (`sex`) and "estimate", "statistic", "p.value",
  "parameter", "conf.low", and "conf.high" statistics: Expecting `variable` to
  be either <logical> or <numeric/integer> coded as 0 and 1.

**Table 1. Patient Characteristics**
Characteristic	Sex		Difference¹	95% CI^1,2	p-value¹
Characteristic	Female N = 554	Male N = 158	Difference¹	95% CI^1,2	p-value¹
Bulb diameter at time 0, Mean(StD)	0.90 (0.22)	0.93 (0.20)	-0.03	-0.07, 0.01	0.095
Bulb diameter at 12 months, Mean(StD)	0.85 (0.23)	0.87 (0.29)	-0.02	-0.10, 0.05	0.5
Unknown	262	87
Body Mass Index, Mean(StD)	27.4 (5.7)	23.7 (4.0)	3.7	3.0, 4.5	<0.001
Systolic Blood Pressure, Mean(StD)	125 (24)	127 (24)	-2.5	-6.8, 1.8	0.3
Diastolic Blood Pressure, Mean(StD)	80 (14)	79 (15)	0.35	-2.3, 3.0	0.8
Categorized BMI, n (%)
Normal	208 (38%)	110 (70%)
High	345 (62%)	48 (30%)
Unknown	1	0
Hypertension present, n (%)	314 (57%)	95 (60%)
¹ Welch Two Sample t-test
² CI = Confidence Interval

57.4 Stratified table

Code

df_cint %>% 
    drop_na(bmicat) %>% 
    tbl_strata(
        strata = bmicat, ~.x %>%      # Add a strata to the table
            tbl_summary(
                by = sex
            ) %>% 
            add_p()      # P value for each strata
    )

Characteristic	Normal			High
Characteristic	Female N = 208¹	Male N = 110¹	p-value²	Female N = 345¹	Male N = 48¹	p-value²
Bulb diameter at time 0	0.85 (0.75, 0.99)	0.88 (0.78, 1.03)	0.074	0.85 (0.75, 1.03)	0.91 (0.80, 1.05)	0.13
Bulb diameter at 12 months	0.80 (0.70, 0.90)	0.84 (0.68, 0.98)	0.2	0.85 (0.70, 0.95)	0.88 (0.68, 0.95)	0.9
Unknown	97	60		164	27
Body Mass Index	22.22 (20.09, 23.51)	21.85 (20.31, 23.11)	0.4	29.8 (27.5, 33.3)	27.0 (25.9, 30.9)	<0.001
Systolic Blood Pressure	118 (100, 133)	120 (107, 132)	0.3	123 (111, 140)	136 (122, 158)	<0.001
Diastolic Blood Pressure	76 (67, 84)	74 (66, 84)	0.7	80 (71, 90)	85 (77, 95)	0.035
Hypertension present	97 (47%)	57 (52%)	0.4	216 (63%)	38 (79%)	0.025
¹ Median (Q1, Q3); n (%)
² Wilcoxon rank sum test; Pearson’s Chi-squared test

57.5 Specifying tables tests

Code

df_cint %>% 
    tbl_summary(by = sex,
                statistic = list(all_continuous() ~ "{mean} ({sd})")
    ) %>% 
    add_p(
        test = list(
            all_continuous()~ "t.test",     # Specify T test for all continuous variables
            all_categorical() ~ "fisher.test"     # Specify fisher's text for all categorical variables
        )
    ) %>% 
    separate_p_footnotes()     # Specific p-value labelled

Characteristic	Female N = 554¹	Male N = 158¹	p-value
Bulb diameter at time 0	0.90 (0.22)	0.93 (0.20)	0.095²
Bulb diameter at 12 months	0.85 (0.23)	0.87 (0.29)	0.5²
Unknown	262	87
Body Mass Index	27.4 (5.7)	23.7 (4.0)	<0.001²
Systolic Blood Pressure	125 (24)	127 (24)	0.3²
Diastolic Blood Pressure	80 (14)	79 (15)	0.8²
Categorized BMI			<0.001³
Normal	208 (38%)	110 (70%)
High	345 (62%)	48 (30%)
Unknown	1	0
Hypertension present	314 (57%)	95 (60%)	0.5³
¹ Mean (SD); n (%)
² Welch Two Sample t-test
³ Fisher’s exact test

57.6 Customised tables

Code

df_cint %>% 
    select(bmi, hpt, bulb_0, sbp, bmicat) %>% 
    tbl_summary(
        by = hpt,
        statistic = list(
            bmi ~ "{mean} ({sd})",
            all_categorical() ~ "{n} ({p})",
            sbp ~ "{median} ({p25}, {p75})",
            bulb_0 ~ "{mean} ({sd})"),
         missing_text = "(Missing",
        digits = list(all_categorical() ~ c(0, 1))
    ) %>% 
    add_stat_label(label = list(
        bmi = "Mean(SD)",
        bulb_0 = "Mean(SD)",
        all_categorical() ~ "n(%)",
        sbp = "Median(IQR)"
    )) %>% 
    add_p(
        test = list(
            bmi ~ "t.test",
            bulb_0 ~ "t.test",
            all_categorical() ~ "fisher.test",
            sbp ~ "wilcox.test"
        )
    ) %>% 
    separate_p_footnotes()

Characteristic	No N = 303	Yes N = 409	p-value
Body Mass Index, Mean(SD)	25.3 (5.4)	27.5 (5.5)	<0.001¹
Bulb diameter at time 0, Mean(SD)	0.88 (0.22)	0.92 (0.22)	0.013¹
Systolic Blood Pressure, Median(IQR)	108 (98, 113)	134 (125, 150)	<0.001²
Categorized BMI, n(%)			<0.001³
Normal	164 (54.1)	154 (37.7)
High	139 (45.9)	254 (62.3)
(Missing	0	1
¹ Welch Two Sample t-test
² Wilcoxon rank sum test
³ Fisher’s exact test

57.7 Creating data for the paired table

Code

df_paired_cint <- 
    df_cint_all %>% 
    select(sid, cart, cca_0, cca_12, ica_0, ica_12) %>%
    mutate(
        ica_12 = case_when(ica_12 > median(ica_12, na.rm=T) ~ "High",
                               ica_12 <= median(ica_12, na.rm=T) ~ "Low") %>% 
            factor(),
        ica_0 = case_when(ica_0 > median(ica_0, na.rm=T) ~ "High",
                               ica_0 <= median(ica_0, na.rm=T) ~ "Low") %>% 
            factor()
    ) %>% 
    select(sid, cart, cca_0, cca_12, ica_0, ica_12)

df_A <-
    df_paired_cint %>% 
    pivot_longer(cols =  c(cca_0, cca_12), 
                 names_to = c("cca", "period"), 
                 names_sep = "_", 
                 values_to  = "cca_measure") %>% 
    select(sid, cart, period, cca_measure)

df_paired_long <-
    df_paired_cint %>%
    pivot_longer(cols =  c(ica_0, ica_12), 
                 names_to = c("ica", "period"), 
                 names_sep = "_", 
                 values_to  = "ica_measure") %>% 
    select(sid, cart, period, ica_measure) %>% 
    full_join(df_A, by = c("sid", "period", "cart"))

57.8 Paired table

Code

df_paired_long %>% 
    mutate(period = case_when(period == "0" ~ "Month 0",
                              period == "12" ~ "Month 12")) %>% 
    filter(complete.cases(.)) %>% 
    group_by(sid) %>% 
    filter(n()==2) %>% 
    ungroup() %>%
    tbl_strata(strata = cart, ~.x %>%
        tbl_summary(by = period, 
                    include = -sid,
                    statistic = list(cca_measure ~ "{mean} ({sd})",
                                     ica_measure ~ "{n} ({p})"),
                    label = list(ica_measure = "ICA",
                                 cca_measure = "CCA(mm)"),
                    digits = list(all_categorical() ~ c(0, 1)))%>% 
            add_p(test = list(ica_measure ~ "mcnemar.test",
                              cca_measure ~ "paired.t.test"),
                              group = sid,
                  pvalue_fun = ~ gtsummary::style_pvalue(.x, digits = 3)) %>% 
            separate_p_footnotes() %>% 
            add_stat_label(label = list(ica_measure = "n(%)",
                                        cca_measure = "mean(SD)"))
    ) %>% 
    modify_caption("**Table 2: Comparative month 0 and 12 measures**")

**Table 2: Comparative month 0 and 12 measures**
Characteristic	Naive			cART			CONTROL
Characteristic	Month 0 N = 86	Month 12 N = 86	p-value	Month 0 N = 237	Month 12 N = 237	p-value	Month 0 N = 40	Month 12 N = 40	p-value
ICA, n(%)			0.728¹			0.759¹			0.002¹
High	32 (37.2)	29 (33.7)		154 (65.0)	150 (63.3)		14 (35.0)	1 (2.5)
Low	54 (62.8)	57 (66.3)		83 (35.0)	87 (36.7)		26 (65.0)	39 (97.5)
CCA(mm), mean(SD)	0.83 (0.13)	0.77 (0.14)	0.002²	0.93 (0.17)	0.88 (0.16)	<0.001²	0.84 (0.13)	0.70 (0.11)	<0.001²
¹ McNemar’s Chi-squared test with continuity correction
² Paired t-test

58 Cross table

Code

df_cint %>% 
    gtsummary::tbl_cross(row = bmicat, 
                         col = sex, 
                         percent = "row", 
                         digits = c(0,1),
                         missing = "ifany",
                         missing_text = "[Missing]") %>% 
    add_p() %>% 
    modify_caption(caption = "**Table X: tbl_cross Example**")

**Table X: tbl_cross Example**
	Sex		Total	p-value¹
	Female	Male	Total	p-value¹
Categorized BMI				<0.001
Normal	208 (65.4%)	110 (34.6%)	318 (100.0%)
High	345 (87.8%)	48 (12.2%)	393 (100.0%)
[Missing]	1 (100.0%)	0 (0.0%)	1 (100.0%)
Total	554 (77.8%)	158 (22.2%)	712 (100.0%)
¹ Fisher’s exact test

59 Regression models

Univariate regression

59.0.1 Univariate linear regression

Code

tbl1 <- 
    df_cint %>% 
    tbl_uvregression(y = sbp, method = lm) %>% 
    modify_header(update = list(estimate ~ "**Estimate**",
                                label ~ "**Variable**")) %>% 
    modify_caption(caption = "**Table XI:** Univariate linear regression")

Warning: The `update` argument of `modify_header()` is deprecated as of gtsummary 2.0.0.
ℹ Use `modify_header(...)` input instead. Dynamic dots allow for syntax like
  `modify_header(!!!list(...))`.
ℹ The deprecated feature was likely used in the gtsummary package.
  Please report the issue at <https://github.com/ddsjoberg/gtsummary/issues>.

Code

tbl1 %>% show_header_names() # Show header names of table

Column Name   Header            
label         "**Variable**"    
stat_n        "**N**"           
estimate      "**Estimate**"    
conf.low      "**95% CI**"      
p.value       "**p-value**"

* These values may be dynamically placed into headers (and other locations).
ℹ Review the `modify_header()` (`?gtsummary::modify_header()`) help for
  examples.

Code

tbl1

**Table XI:** Univariate linear regression
Variable	N	Estimate	95% CI¹	p-value
Sex	712
Female		—	—
Male		2.5	-1.7, 6.7	0.2
Bulb diameter at time 0	712	22	14, 30	<0.001
Bulb diameter at 12 months	363	18	7.7, 27	<0.001
Body Mass Index	712	0.85	0.53, 1.2	<0.001
Diastolic Blood Pressure	712	1.4	1.3, 1.4	<0.001
Categorized BMI	711
Normal		—	—
High		9.5	6.0, 13	<0.001
Hypertension present	712
No		—	—
Yes		35	32, 37	<0.001
¹ CI = Confidence Interval

59.0.2 Univariate logistic regression

Code

df_mbu_clean %>% 
    select(-dura_adm) %>% 
    tbl_uvregression(
        method = glm,
        y = died,
        method.args = family(binomial),
        exponentiate = TRUE,
        pvalue_fun = function(x) style_pvalue(x, digits = 3)
    ) %>% 
    modify_header(update = list(estimate ~ "**Estimate**",
                                label ~ "**Variable**"))

Variable	N	Estimate¹	95% CI¹	p-value
Weight (kgs)	4,984	0.34	0.31, 0.38	<0.001
Age (days)	4,984	0.99	0.94, 1.04	0.808
Sex	4,984
Female		—	—
Male		0.96	0.82, 1.13	0.642
Place of birth	4,984
Clinic/Hospital		—	—
Home		0.50	0.23, 0.99	0.064
KATH		0.45	0.37, 0.54	<0.001
Maternity Home		1.32	0.28, 4.80	0.689
APGAR (min 1)	4,984	0.67	0.64, 0.70	<0.001
Mode of delivery	4,984
C/S		—	—
SVD		1.64	1.40, 1.92	<0.001
Vacuum		0.66	0.33, 1.18	0.193
Gestational Age	4,984	0.82	0.81, 0.84	<0.001
APGAR (min 5)	4,984	0.56	0.53, 0.58	<0.001
¹ OR = Odds Ratio, CI = Confidence Interval

59.0.3 Univariate survival analysis

Code

library(survival)

df_mbu_clean %>% 
    tbl_uvregression(
        method = coxph,
        y = Surv(dura_adm, died=="Yes"),
        exponentiate = TRUE,
        pvalue_fun = function(x) style_pvalue(x, digits = 3)
    ) %>% 
    modify_caption(caption = "**Table 10**: Univariate Cox's Proportional Model")

**Table 10**: Univariate Cox’s Proportional Model
Characteristic	N	HR¹	95% CI¹	p-value
Weight (kgs)	4,984	0.45	0.41, 0.50	<0.001
Age (days)	4,984	1.00	0.96, 1.04	0.957
Sex	4,984
Female		—	—
Male		1.03	0.89, 1.19	0.682
Place of birth	4,984
Clinic/Hospital		—	—
Home		0.57	0.29, 1.11	0.098
KATH		0.54	0.46, 0.64	<0.001
Maternity Home		1.09	0.35, 3.42	0.877
APGAR (min 1)	4,984	0.72	0.69, 0.74	<0.001
Mode of delivery	4,984
C/S		—	—
SVD		1.48	1.28, 1.71	<0.001
Vacuum		0.75	0.41, 1.36	0.342
Gestational Age	4,984	0.87	0.86, 0.88	<0.001
APGAR (min 5)	4,984	0.65	0.63, 0.67	<0.001
¹ HR = Hazard Ratio, CI = Confidence Interval

59.1 Multivariate regression

59.1.1 Multiple liner regreession

Code

df_cint %>% 
    lm(sbp ~ ., data = .) %>% 
    tbl_regression(pvalue_fun = function(x) style_pvalue(x, digits = 3)) %>% 
    modify_header(update = list(estimate ~ "**Estimate**",
                                label ~ "**Variable**")) %>% 
    modify_caption(caption = "**Table XI:** Multivariate linear regression") %>%
    bold_labels() %>% 
    as_gt() %>% 
    gt::tab_options(
        table.font.size = 14,
        table.font.names = "Times New Roman",
        data_row.padding = 1
    )

**Table XI:** Multivariate linear regression
Variable	Estimate	95% CI¹	p-value
Sex
Female	—	—
Male	-0.25	-3.6, 3.1	0.883
Bulb diameter at time 0	4.6	-1.2, 10	0.122
Bulb diameter at 12 months	7.4	1.7, 13	0.011
Body Mass Index	0.05	-0.32, 0.43	0.781
Diastolic Blood Pressure	0.92	0.79, 1.0	<0.001
Categorized BMI
Normal	—	—
High	0.19	-4.0, 4.3	0.930
Hypertension present
No	—	—
Yes	17	13, 20	<0.001
¹ CI = Confidence Interval

59.1.2 Multiple variable logistic regression

Code

df_mbu_clean %>% 
    select(-dura_adm) %>% 
    glm(died ~ ., data = ., family = binomial) %>% 
    tbl_regression(exponentiate = T) %>% 
    add_n(location = "level") %>% 
    add_nevent(location = "level") %>% 
    add_global_p() %>% 
    add_q() %>% 
    add_significance_stars(
        hide_p = FALSE,
        hide_ci = FALSE,
        hide_se = TRUE) %>% 
    add_vif() %>% 
    modify_header(label = "**Predictor**") %>% 
    modify_caption(caption = "**Table 5: Highly customised logistic regression**") %>% 
    modify_footnote(ci = "CI = My 95%CI", abbreviation = TRUE) %>% 
    sort_p() %>% 
    bold_p(t=0.1, q=TRUE) %>% 
    bold_labels() %>% 
    italicize_levels()

Warning: Use of the "ci" column was deprecated in gtsummary v2.0, and the column will
eventually be removed from the tables.
! Review `?deprecated_ci_column()` for details on how to update your code.
ℹ The "ci" column has been replaced by the merged "conf.low" and "conf.high"
  columns (merged with `modify_column_merge()`).
ℹ In most cases, a simple update from `ci = 'a new label'` to `conf.low = 'a
  new label'` is sufficient.

**Table 5: Highly customised logistic regression**
Predictor	N	Event N	OR^1,2	95% CI²	p-value	q-value³	GVIF²	Adjusted GVIF^4,2
APGAR (min 5)	4,984	727	0.57***	0.51, 0.63	<0.001	<0.001	3.8	2.0
Weight (kgs)	4,984	727	0.51***	0.43, 0.62	<0.001	<0.001	2.9	1.7
Place of birth			***		<0.001	<0.001	1.1	1.0
Clinic/Hospital	796	195	—	—
Home	64	9	0.66	0.27, 1.48
KATH	4,114	520	0.56	0.45, 0.70
Maternity Home	10	3	1.68	0.30, 8.14
Gestational Age	4,984	727	0.94***	0.91, 0.97	<0.001	0.001	2.9	1.7
Age (days)	4,984	727	1.05*	1.00, 1.09	0.032	0.052	1.0	1.0
Mode of delivery			*		0.045	0.060	1.2	1.0
C/S	2,662	318	—	—
SVD	2,188	398	0.78	0.64, 0.95
Vacuum	134	11	1.00	0.48, 1.90
Sex					0.095	0.11	1.0	1.0
Female	2,216	329	—	—
Male	2,768	398	1.17	0.97, 1.41
APGAR (min 1)	4,984	727	1.00	0.92, 1.10	>0.9	>0.9	3.9	2.0
¹ p<0.05; p<0.01; p<0.001
² OR = Odds Ratio, CI = Confidence Interval, GVIF = Generalized Variance Inflation Factor
³ False discovery rate correction for multiple testing
⁴ GVIF²

59.1.3 Cox proportional hazard regression

Code

df_mbu_clean %>% 
    coxph(Surv(dura_adm, died == "Yes")~., data = .) %>% 
    tbl_regression(exponentiate = T)

Characteristic	HR¹	95% CI¹	p-value
Weight (kgs)	0.63	0.54, 0.74	<0.001
Age (days)	1.04	1.01, 1.07	0.021
Sex
Female	—	—
Male	1.11	0.95, 1.28	0.2
Place of birth
Clinic/Hospital	—	—
Home	0.55	0.28, 1.08	0.083
KATH	0.66	0.55, 0.78	<0.001
Maternity Home	1.08	0.34, 3.39	0.9
APGAR (min 1)	0.96	0.90, 1.03	0.3
Mode of delivery
C/S	—	—
SVD	0.77	0.66, 0.90	0.001
Vacuum	0.96	0.52, 1.77	>0.9
Gestational Age	0.97	0.94, 1.00	0.031
APGAR (min 5)	0.68	0.64, 0.74	<0.001
¹ HR = Hazard Ratio, CI = Confidence Interval

60 Combining tables

60.1 Merging

60.2 Stacking

60.3 Multiple comparison table

Code

library(titanic)
library(plotrix) #has a std.error function

# create smaller version of the dataset
df <- 
  titanic_train %>%
  select(Sex, Embarked, Age, Fare) %>%
  filter(Embarked != "") # deleting empty Embarked status

# first, write a little function to get the 2-way ANOVA p-values in a table
# function to get 2-way ANOVA p-values in tibble
twoway_p <- function(variable) {
  paste(variable, "~ Sex * Embarked") %>%
    as.formula() %>%
    aov(data = df) %>% 
    broom::tidy() %>%
    select(term, p.value) %>%
    filter(complete.cases(.)) %>%
    pivot_wider(names_from = term, values_from = p.value) %>%
    mutate(
      variable = .env$variable,
      row_type = "label"
    )
}

# add all results to a single table (will be merged with gtsummary table in next step)
twoway_results <-
  bind_rows(
    twoway_p("Age"),
    twoway_p("Fare")
  )
twoway_results

# A tibble: 2 × 5
           Sex Embarked `Sex:Embarked` variable row_type
         <dbl>    <dbl>          <dbl> <chr>    <chr>   
1 0.00823      3.97e- 1         0.611  Age      label   
2 0.0000000191 4.27e-16         0.0958 Fare     label

Code

tbl <-
  # first build a stratified `tbl_summary()` table to get summary stats by two variables
  df %>%
  tbl_strata(
    strata =  Sex,
    .tbl_fun =
      ~.x %>%
      tbl_summary(
        by = Embarked,
        missing = "no",
        statistic = all_continuous() ~ "{mean} ({std.error})",
        digits = everything() ~ 1
      ) %>%
      modify_header(all_stat_cols() ~ "**{level}**")
  ) %>%
  # merge the 2way ANOVA results into tbl_summary table
  modify_table_body(
    ~.x %>%
      left_join(
        twoway_results,
        by = c("variable", "row_type")
      )
  ) %>%
  # by default the new columns are hidden, add a header to unhide them
  modify_header(list(
    Sex ~ "**Sex**", 
    Embarked ~ "**Embarked**", 
    `Sex:Embarked` ~ "**Sex * Embarked**"
  )) %>%
  # adding spanning header to analysis results
  modify_spanning_header(c(Sex, Embarked, `Sex:Embarked`) ~ "**Two-way ANOVA p-values**") %>%
  # format the p-values with a pvalue formatting function
  modify_fmt_fun(c(Sex, Embarked, `Sex:Embarked`) ~ style_pvalue) %>%
  # update the footnote to be nicer looking
  modify_footnote(all_stat_cols() ~ "Mean (SE)")

61 hh

Code

# standardized difference
tbl1 <- 
  trial %>% 
  select(trt, age, marker) %>%
  tbl_summary(by = trt, missing = "no",
              statistic = all_continuous() ~ "{mean} ({sd})") %>%
  add_difference(all_continuous() ~ "cohens_d")

# table with p-value and corrected p-values
tbl2 <- 
  trial %>% 
  select(trt, age, marker) %>%
  tbl_summary(by = trt, missing = "no") %>%
  add_p(all_continuous() ~ "t.test") %>%
  add_q(method = "bonferroni") %>%
  modify_column_hide(all_stat_cols())

# merge tbls together
tbl_final <- 
  tbl_merge(list(tbl1, tbl2)) %>%
  # remove spanning headers
  modify_spanning_header(everything() ~ NA)

tbl1

Characteristic	Drug A N = 98¹	Drug B N = 102¹	Difference²	95% CI^2,3
Age	47 (15)	47 (14)	-0.03	-0.32, 0.25
Marker Level (ng/mL)	1.02 (0.89)	0.82 (0.83)	0.23	-0.06, 0.51
¹ Mean (SD)
² Cohen’s D
³ CI = Confidence Interval

Code

tbl2

Characteristic	p-value¹	q-value²
Age	0.8	>0.9
Marker Level (ng/mL)	0.12	0.2
¹ Welch Two Sample t-test
² Bonferroni correction for multiple testing

Code

tbl_final

Characteristic	Drug A N = 98¹	Drug B N = 102¹	Difference²	95% CI^2,3	p-value⁴	q-value⁵
Age	47 (15)	47 (14)	-0.03	-0.32, 0.25	0.8	>0.9
Marker Level (ng/mL)	1.02 (0.89)	0.82 (0.83)	0.23	-0.06, 0.51	0.12	0.2
¹ Mean (SD)
² Cohen’s D
³ CI = Confidence Interval
⁴ Welch Two Sample t-test
⁵ Bonferroni correction for multiple testing

Need to insert this k3k3↩︎
1/(2*df)↩︎