1 Data Example

N = c(
  NA, 98333, 160871, 7408,
  NA, 141694, 51864, 7152, 5856, 49909, 10137,
  NA, 54643, 58171, 84435, 69363,
  NA, 70238, 78440, 108806, 9128,
  NA, 53724, 20969, 27352, 39551, 68469, 56547,
  NA, 112322, 68148, 77794, 8348,
  NA, 113752, 64575, 79026, 9259,
  NA, 14770, 242936, 8906
)

n = c(
  NA, 1970, 7737, 275,
  NA, 5855, 1779, 211, 195, 1513, 429,
  NA, 929, 1832, 3562, 3659,
  NA, 2447, 2786, 4475, 274,
  NA, 1970, 861, 1165, 1553, 2414, 2019,
  NA, 4351, 2914, 2436, 281,
  NA, 4480, 1816, 3392, 294,
  NA, 272, 9432, 278
)

category = c(
  "Gender",
  "Male", "Female", "Unknown",
  "Race/ethnicity",
  "NonHisp White", "NonHisp Black", "NonHisp Asian", "NonHisp other", "Hispanic", "Unknown",
  "Age",
  "<40", "40-54", "55-69", "70+",
  "BMI",
  "Normal", "Overweight", "Obese", "Unknown",
  "Income",
  "100k+", "[75k,100k)", "[50k,75k)", "[25k,50k)", "<25k", "Unknown",
  "Education",
  "College degree", "Some college", "HS/GED or less", "Unknown",
  "Marital status",
  "Married", "Never married", "Other", "Unknown",
  "Insurance",
  "Uninsured", "Insured", "Unknown"
)

p = n / N
se = sqrt(p * (1 - p) / N)
ll = p - 1.96 * se
ul = p + 1.96 * se

dat = data.frame(
  category = category,
  rate = p,
  ll = ll,
  ul = ul
)

2 Manuscript-Ready Prevalence Rate Plots

2.1 Prevalence Rate by Category with Background Highlights

y_pos = nrow(dat):1
valid_idx = !is.na(dat$rate)
plot(dat[, 2], y_pos, xlab = "Prevalence rate", ylab = "", yaxt = "n", 
     xlim = c(-0.005, 0.075), type = "n", xaxt = "n", main = "", frame.plot = FALSE)

gray_box_height = 0.9
for(i in which(valid_idx)){
  rect(xleft = 0, 
       xright = 0.075,
       ybottom = y_pos[i] - gray_box_height / 2,
       ytop = y_pos[i] + gray_box_height / 2,
       col = "gray90", border = NA)
}

points(dat[, 2], y_pos, cex = 1.2, pch = 18)
segments(dat[, 3], y_pos, dat[, 4], y_pos, col = "black", lwd = 1)
eps = 0.08
segments(dat[, 3], y_pos - eps, dat[, 3], y_pos + eps, col = "black", lwd = 1)
segments(dat[, 4], y_pos - eps, dat[, 4], y_pos + eps, col = "black", lwd = 1)
axis(1, at = seq(0, 0.075, length.out = 4))

loc = -0.002
index = nrow(dat) - which(is.na(dat[, 2])) + 1
text(loc, index[1], labels = substitute(paste(bold("Gender"))), xpd = TRUE, adj = 1)
text(loc, index[2], labels = substitute(paste(bold("Race/ethnicity"))), xpd = TRUE, adj = 1)
text(loc, index[3], labels = substitute(paste(bold("Age"))), xpd = TRUE, adj = 1)
text(loc, index[4], labels = substitute(paste(bold("BMI"))), xpd = TRUE, adj = 1)
text(loc, index[5], labels = substitute(paste(bold("Income"))), xpd = TRUE, adj = 1)
text(loc, index[6], labels = substitute(paste(bold("Education"))), xpd = TRUE, adj = 1)
text(loc, index[7], labels = substitute(paste(bold("Marital status"))), xpd = TRUE, adj = 1)
text(loc, index[8], labels = substitute(paste(bold("Insurance"))), xpd = TRUE, adj = 1)

text(loc, (nrow(dat) - which(!is.na(dat[, 2])) + 1), 
     labels = dat[-c(1, which(is.na(dat[, 2]))), 1], xpd = TRUE, adj = 1)

2.2 Prevalence Rate Bars with Confidence Intervals and Category Labels

y_pos = nrow(dat):1
subcat_idx = which(valid_idx)
plot(dat[, 2], y_pos, xlab = "Prevalence rate", ylab = "", yaxt = "n", 
     xlim = c(-0.005, 0.075), type = "n", xaxt = "n", main = "", frame.plot = FALSE)

for(i in subcat_idx){
  rect(xleft = 0,
       xright = dat$rate[i],
       ybottom = y_pos[i] - 0.3,
       ytop = y_pos[i] + 0.3,
       col = "gray80",
       border = "black")
}

points(dat$rate, y_pos, cex = 1.2, pch = 18)
segments(dat$ll, y_pos, dat$ul, y_pos, col = "black", lwd = 1)
segments(dat$ll, y_pos - eps, dat$ll, y_pos + eps, lwd = 1)
segments(dat$ul, y_pos - eps, dat$ul, y_pos + eps, lwd = 1)
axis(1, at = seq(0, 0.075, length.out = 4))

text(loc, index[1], labels = substitute(paste(bold("Gender"))), xpd = TRUE, adj = 1)
text(loc, index[2], labels = substitute(paste(bold("Race/ethnicity"))), xpd = TRUE, adj = 1)
text(loc, index[3], labels = substitute(paste(bold("Age"))), xpd = TRUE, adj = 1)
text(loc, index[4], labels = substitute(paste(bold("BMI"))), xpd = TRUE, adj = 1)
text(loc, index[5], labels = substitute(paste(bold("Income"))), xpd = TRUE, adj = 1)
text(loc, index[6], labels = substitute(paste(bold("Education"))), xpd = TRUE, adj = 1)
text(loc, index[7], labels = substitute(paste(bold("Marital status"))), xpd = TRUE, adj = 1)
text(loc, index[8], labels = substitute(paste(bold("Insurance"))), xpd = TRUE, adj = 1)

text(loc, (nrow(dat) - which(!is.na(dat[, 2])) + 1), 
     labels = dat[-c(1, which(is.na(dat[, 2]))), 1], xpd = TRUE, adj = 1)

2.3 Prevalence Point Estimates with Confidence Intervals

Notes: Gray dashed separating lines were added

plot(dat[, 2], nrow(dat):1, xlab = "Prevalence rate", ylab = "", yaxt = "n", 
     xlim = c(-0.005, 0.075), type = "n", xaxt = "n", main = "", frame.plot = FALSE)

points(dat[, 2], nrow(dat):1, cex = 1.2, pch = 18)
segments(dat[, 3], nrow(dat):1, dat[, 4], nrow(dat):1, col = "black", lwd = 1)
segments(dat[, 3], nrow(dat):1 - eps, dat[, 3], nrow(dat):1 + eps, col = "black", lwd = 1)
segments(dat[, 4], nrow(dat):1 - eps, dat[, 4], nrow(dat):1 + eps, col = "black", lwd = 1)
axis(1, at = seq(0, 0.075, length.out = 4))

text(0, index[1], labels = substitute(paste(bold("Gender"))), xpd = TRUE, adj = 1)
text(0, index[2], labels = substitute(paste(bold("Race/ethnicity"))), xpd = TRUE, adj = 1)
text(0, index[3], labels = substitute(paste(bold("Age"))), xpd = TRUE, adj = 1)
text(0, index[4], labels = substitute(paste(bold("BMI"))), xpd = TRUE, adj = 1)
text(0, index[5], labels = substitute(paste(bold("Income"))), xpd = TRUE, adj = 1)
text(0, index[6], labels = substitute(paste(bold("Education"))), xpd = TRUE, adj = 1)
text(0, index[7], labels = substitute(paste(bold("Marital status"))), xpd = TRUE, adj = 1)
text(0, index[8], labels = substitute(paste(bold("Insurance"))), xpd = TRUE, adj = 1)

text(0, (nrow(dat) - which(!is.na(dat[, 2])) + 1), 
     labels = dat[-c(1, which(is.na(dat[, 2]))), 1], xpd = TRUE, adj = 1)

abline(h = c(4, 9, 14, 21, 26, 31, 38) + 0.5, col = "grey", lty = 2)