67 Error Plot

67.1 Introduction

An error plot (confidence interval plot) visually represents a statistic with its uncertainty. It is often drawn to show the range within which one can reasonably expect to see the statistic if the experiment is repeated on multiple occasions. Key components include:

Point Estimate: This is the central value, such as the mean, often represented by a dot or a small line.
Error Bars: These lines extend from the point estimate to indicate the upper and lower bounds of the confidence interval.

67.2 Plots

We begin by creating one from fictitious data. The data is as shown below:

Code

label <- paste0("X", 1:6)
mean  <- c(1.29,0.76,2.43,1.68,1.22,1.7) 
lower <- c(0.84,0.50,1.58,1.1,0.8,1.11)
upper <- c(1.95,1.16,3.67,2.54,1.85,2.56)

df <- data.frame(label, mean, lower, upper)

df$label <- factor(df$label, levels=rev(df$label))

df %>% kableExtra::kable()

label	mean	lower	upper
X1	1.29	0.84	1.95
X2	0.76	0.50	1.16
X3	2.43	1.58	3.67
X4	1.68	1.10	2.54
X5	1.22	0.80	1.85
X6	1.70	1.11	2.56

We the plot the data below

Code

library(ggplot2)
fp <- 
        ggplot(
            data = df, 
            aes(x=label, y=mean, ymin=lower, ymax=upper)) +
        geom_pointrange() + 
        geom_hline(yintercept=1, lty=2) +  
        coord_flip() +  
        xlab("Label") + ylab("Mean (95% CI)") +
        theme_bw()  
print(fp)

In this example, we will construct the error plots from raw data. The first few rows are shown below.

Code

df_data <- 
    readxl::read_xlsx("C:\\Dataset\\SBPDATA.xlsx") %>% 
    janitor::clean_names() %>% 
    select(
        disease_class, a1_gender, sbp_0, sbp_6, sbp_12, sbp_18) %>% 
    mutate(
        a1_gender = factor(
            a1_gender, 
            levels = c(0,1), 
            labels = c("Female","Male"))) %>% 
    pivot_longer(
        cols = sbp_0:sbp_18, 
        names_to = "month", 
        values_to = "sbp") %>% 
    drop_na() 


df_data %>% head() %>% kableExtra::kable()

disease_class	a1_gender	month	sbp
HPT	Female	sbp_0	139
HPT	Female	sbp_6	130
HPT	Female	sbp_12	80
HPT	Female	sbp_18	135
DM+HPT	Female	sbp_0	155
HPT	Female	sbp_0	109

For this example, we summarise the raw data into its mean and standard errors and plot the means with one standard error on both sides for each BP checked per visit. Not that this is stratified by sex.

Code

df_data %>% 
    group_by(a1_gender, month) %>% 
    reframe(across(sbp, ~epiDisplay::ci.numeric(.x))) %>% 
    unnest(sbp) %>% 
    mutate(month = factor(
        month, 
        levels = c("sbp_0", "sbp_6", "sbp_12", "sbp_18"))) %>% 
    ggplot(
        aes(
            x = month, 
            y = mean, 
            ymin = mean - se, 
            ymax = mean+ se,
            color = a1_gender)) +
    geom_errorbar(
        position = position_dodge2(width = 0.4),
        width = 0.2) +    
    geom_point(
        position = position_dodge2(width = 0.2),
        size = 3)+
    labs(
        x = NULL, 
        y = "Systolic Blood Pressure (mmHg)",
        color = NULL
        )+
    theme_bw()+
    scale_x_discrete(
        breaks = c("sbp_0", "sbp_6", "sbp_12", "sbp_18"),
        labels = c(
            "Month\nZero", "Month\nSix", 
            "Month\nTwelve", "Month\nEighteen"))+
    scale_y_continuous(
        breaks = seq(132, 144, 2),
        limits = c(132, 144),
        labels = c(
            "1.32 x10<sup>2</sup>","1.34 x10<sup>2</sup>",
            "1.36 x10<sup>2</sup>","1.38 x10<sup>2</sup>",
            "1.40 x10<sup>2</sup>", "1.42 x10<sup>2</sup>", 
            "1.44 x10<sup>2</sup>"))+
    scale_color_manual(
        breaks = c("Female", "Male"),
        values = c("#C952B9","dodgerblue")
        )+
    theme(
        legend.position = "inside",
        legend.position.inside = c(0.9, 0.9),
        legend.background = element_rect(color = "black"),
        axis.text.y = element_markdown(),
        axis.text = element_text(color = "black"),
        panel.grid = element_blank(),
        legend.spacing = unit(0, "pt"), 
        axis.text.x = element_text(size = 11),
        legend.margin = margin(t = 1, b = 2,r = 3, l = 3),
        legend.key.spacing = unit(0, "pt")
        )

The final plot uses a function derived variables from stat_summary to plot the mean and 95% confidence intervals, stratified by the disease condition

Code

fun_one <- 
    function(x){
    data.frame(
        y = epiDisplay::ci.numeric(x)[[2]],
        ymin = epiDisplay::ci.numeric(x)[[5]],
        ymax = epiDisplay::ci.numeric(x)[[6]])
        }

df_data %>% 
    mutate(
        month = factor(
            month, 
            levels = c("sbp_0", "sbp_6", "sbp_12", "sbp_18"))) %>%
    ggplot(
        aes(
            x = month, 
            y = sbp,
            color = disease_class)) +
    stat_summary(
        geom = "pointrange", 
        fun.data = fun_one, 
        position = position_dodge(width = 0.3)) +
    labs(x = NULL, color = NULL)+
    scale_y_continuous(name = "Systolic Blood Pressure (mmHg)")+
    scale_color_manual(values =  c("#1A73E8","#EC6ACF", "#7ED4AD")) +
    scale_x_discrete(
        breaks = c("sbp_0", "sbp_6", "sbp_12", "sbp_18"),
        labels = c(
            "Month\nZero", "Month\nSix", 
            "Month\nTwelve", "Month\nEighteen"))+
    theme_bw()+
    theme(
        legend.position = "top",
        legend.background = element_rect(color = "blue")
        )