1 NYS hospital ED data

The data spans from 2018 (Q1/Q2/Q3/Q4) to 2023 (Q1/Q2/Q3/Q4).

library(dplyr)
library(tidyr)
library(ggplot2)
library(stringr)
library(sf)
library(tigris)
library(knitr)
library(patchwork)

options(tigris_use_cache = TRUE)

wd = getwd()
filename = paste0(wd, "/Hospital_Emergency_Department.csv")
dat = read.csv(filename)

str(dat)
## 'data.frame':    6255 obs. of  14 variables:
##  $ Year                                      : int  2018 2018 2018 2018 2018 2019 2019 2019 2019 2019 ...
##  $ Quarter                                   : chr  "Q1" "Q2" "Q3" "Q4" ...
##  $ Facility.ID                               : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Facility.Name                             : chr  "Albany Medical Center Hospital" "Albany Medical Center Hospital" "Albany Medical Center Hospital" "Albany Medical Center Hospital" ...
##  $ Facility.Type.Description                 : chr  "Hospital" "Hospital" "Hospital" "Hospital" ...
##  $ Facility.County.FIPS                      : int  36001 36001 36001 36001 36001 36001 36001 36001 36001 36001 ...
##  $ Facility.County                           : chr  "Albany" "Albany" "Albany" "Albany" ...
##  $ Facility.Region                           : chr  "Outside of New York City" "Outside of New York City" "Outside of New York City" "Outside of New York City" ...
##  $ Operating.Certificate.Number              : chr  "0101000H" "0101000H" "0101000H" "0101000H" ...
##  $ Operator.Name                             : chr  "Albany Medical Center Hospital" "Albany Medical Center Hospital" "Albany Medical Center Hospital" "Albany Medical Center Hospital" ...
##  $ Total.ED.Encounters                       : chr  "19980" "19643" "21000" "21049" ...
##  $ Treat.and.Release.ED.Encounters           : chr  "13672" "13532" "14556" "14605" ...
##  $ ED.Encounters.Requiring.Ambulatory.Surgery: chr  "158" "195" "221" "172" ...
##  $ ED.Encounters.Admitted.as.Inpatient       : chr  "6150" "5916" "6223" "6272" ...

1.1 Variables of interest

  • prop_treat_release = Treat.and.Release.ED.Encounters / Total.ED.Encounters
    • A higher proportion indicates that most ED visits are lower acuity and can be managed without inpatient admission, suggesting lower downstream inpatient resource demand and greater reliance on outpatient-level care.
  • prop_ambulatory_surg = ED.Encounters.Requiring.Ambulatory.Surgery / Total.ED.Encounters
    • A higher proportion indicates greater procedural intensity within the ED, reflecting increased demand for surgical, anesthesia, and peri-procedural resources.
  • prop_admitted = ED.Encounters.Admitted.as.Inpatient / Total.ED.Encounters
    • A high proportion indicates greater inpatient bed demand and a higher inpatient resource burden, which has direct implications for health system capacity planning and effective resource allocation.
dat = dat %>%
  filter(Quarter != "CY") %>%
  mutate(
    Total.ED.Encounters = as.numeric(Total.ED.Encounters),
    Treat.and.Release.ED.Encounters = as.numeric(Treat.and.Release.ED.Encounters),
    ED.Encounters.Requiring.Ambulatory.Surgery = as.numeric(ED.Encounters.Requiring.Ambulatory.Surgery),
    ED.Encounters.Admitted.as.Inpatient = as.numeric(ED.Encounters.Admitted.as.Inpatient)
  ) %>%
  mutate(
    prop_treat_release = Treat.and.Release.ED.Encounters / Total.ED.Encounters,
    prop_ambulatory_surg = ED.Encounters.Requiring.Ambulatory.Surgery / Total.ED.Encounters,
    prop_admitted = ED.Encounters.Admitted.as.Inpatient / Total.ED.Encounters
  ) %>%
  mutate(
    Quarter = factor(Quarter, levels = c("Q1", "Q2", "Q3", "Q4")),
    time = paste(Year, Quarter),
    time = factor(time, levels = unique(paste(Year, Quarter)[order(Year, Quarter)]))
  )

1.2 Exclusion criteria

We excluded facilities where prop_treat_release, prop_ambulatory_surg, or prop_admitted was equal to 1.0, as this indicated extreme or potentially implausible reporting patterns. All observations from these facilities were removed prior to analysis.

facilities_to_be_excluded = dat %>%
  filter(prop_treat_release == 1 | prop_ambulatory_surg == 1 | prop_admitted == 1) %>%
  distinct(Facility.ID)

dat_clean = dat %>%
  anti_join(facilities_to_be_excluded, by = "Facility.ID")

1.2.1 Facilities per county (NY)

fac_before = dat %>%
  mutate(Facility.County.FIPS = str_pad(as.character(Facility.County.FIPS), 5, pad = "0")) %>%
  distinct(Facility.County.FIPS, Facility.ID) %>%
  count(Facility.County.FIPS, name = "n_facilities") %>%
  mutate(Period = "Before exclusion")

fac_after = dat_clean %>%
  mutate(Facility.County.FIPS = str_pad(as.character(Facility.County.FIPS), 5, pad = "0")) %>%
  distinct(Facility.County.FIPS, Facility.ID) %>%
  count(Facility.County.FIPS, name = "n_facilities") %>%
  mutate(Period = "After exclusion")

fac_long = bind_rows(fac_before, fac_after)

ny_counties = tigris::counties(state = "NY", cb = TRUE, year = 2023) %>%
  st_as_sf() %>%
  transmute(
    Facility.County.FIPS = GEOID,
    geometry
  )

map_data = expand_grid(
  Facility.County.FIPS = ny_counties$Facility.County.FIPS,
  Period = unique(fac_long$Period)
) %>%
  left_join(fac_long, by = c("Facility.County.FIPS", "Period")) %>%
  left_join(ny_counties, by = "Facility.County.FIPS") %>%
  st_as_sf() %>%
  mutate(
    n_facilities = ifelse(n_facilities == 0, NA, n_facilities)
  )

p_before = ggplot(filter(map_data, Period == "Before exclusion")) +
  geom_sf(aes(fill = n_facilities), color = "white", linewidth = 0.2) +
  scale_fill_viridis_c(option = "C", na.value = "grey85") +
  coord_sf(expand = FALSE) +
  labs(title = "Before exclusion", fill = "Facilities") +
  theme_minimal() +
  theme(
    axis.title = element_blank(),
    axis.text  = element_blank(),
    axis.ticks = element_blank(),
    panel.grid = element_blank()
  )

p_after = ggplot(filter(map_data, Period == "After exclusion")) +
  geom_sf(aes(fill = n_facilities), color = "white", linewidth = 0.2) +
  scale_fill_viridis_c(option = "C", na.value = "grey85") +
  coord_sf(expand = FALSE) +
  labs(title = "After exclusion", fill = "Facilities") +
  theme_minimal() +
  theme(
    axis.title = element_blank(),
    axis.text  = element_blank(),
    axis.ticks = element_blank(),
    panel.grid = element_blank()
  )

fac_before_delta = fac_before %>%
  select(Facility.County.FIPS, n_before = n_facilities)

fac_after_delta = fac_after %>%
  select(Facility.County.FIPS, n_after = n_facilities)

county_delta = full_join(fac_before_delta, fac_after_delta, by = "Facility.County.FIPS") %>%
  mutate(
    n_before = replace_na(n_before, 0L),
    n_after  = replace_na(n_after, 0L),
    delta = n_after - n_before,
    delta_cat = factor(delta, levels = c(0, -1, -2))
  )

delta_map = county_delta %>%
  left_join(ny_counties, by = "Facility.County.FIPS") %>%
  st_as_sf()

table(delta_map$delta)
## 
## -2 -1  0 
##  3 11 43
p_delta = ggplot(delta_map) +
  geom_sf(aes(fill = delta_cat), color = "white", linewidth = 0.2) +
  scale_fill_manual(
    values = c(
      "0"  = "grey80",
      "-1" = "#5DA5DA",
      "-2" = "#1B3C73"
    ),
    name = "After - Before"
  ) +
  coord_sf(expand = FALSE) +
  labs(title = "Change in number of facilities per county (NY)") +
  theme_minimal() +
  theme(
    axis.title = element_blank(),
    axis.text  = element_blank(),
    axis.ticks = element_blank(),
    panel.grid = element_blank()
  )

(p_before | p_after) /
  p_delta +
  plot_layout(heights = c(1, 1))

summary_table = tibble(
  Metric = c("Unique Facilities", "Unique Counties"),
  Before_Exclusion = c(
    n_distinct(dat$Facility.ID),
    n_distinct(dat$Facility.County.FIPS)
  ),
  After_Exclusion = c(
    n_distinct(dat_clean$Facility.ID),
    n_distinct(dat_clean$Facility.County.FIPS)
  )
)

kable(summary_table, align = "lcc", caption = "Data Summary Before and After Exclusion")
Data Summary Before and After Exclusion
Metric Before_Exclusion After_Exclusion
Unique Facilities 225 208
Unique Counties 57 57

2 Trajectory over time

dat_long = dat_clean %>%
  select(
    Facility.ID,
    Facility.Region,
    Facility.County.FIPS,
    time,
    prop_treat_release,
    prop_ambulatory_surg,
    prop_admitted
  ) %>%
  pivot_longer(
    cols = starts_with("prop_"),
    names_to = "measure",
    values_to = "value"
  )

ggplot(
  dat_long,
  aes(
    x = time,
    y = value,
    group = Facility.ID,
    color = Facility.ID
  )
) +
  geom_line(alpha = 0.3) +
  facet_wrap(~measure, scales = "free_y", ncol = 1) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "none"
  ) +
  labs(
    x = "Quarter",
    y = "Proportion",
    title = "Longitudinal Trajectory Plot of ED Encounter Proportions by Facility"
  )

ggplot(
  dat_long,
  aes(
    x = time,
    y = value,
    group = Facility.ID,
    color = Facility.Region
  )
) +
  geom_line(alpha = 0.25) +
  stat_summary(
    aes(group = Facility.Region),
    fun = mean,
    geom = "line",
    linewidth = 1.3
  ) +
  facet_wrap(~measure, scales = "free_y", ncol = 1) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1)
  ) +
  labs(
    x = "Quarter",
    y = "Proportion",
    title = "Longitudinal Trajectory Plot of ED Encounter Proportions by Facility",
    color = "Facility Region"
  )

2.1 Leading counties by number of facilities

fac_after_check = dat_clean %>%
  mutate(Facility.County.FIPS = str_pad(as.character(Facility.County.FIPS), 5, pad = "0")) %>%
  distinct(Facility.County.FIPS, Facility.ID) %>%
  count(Facility.County.FIPS, name = "n_facilities") %>%
  arrange(desc(n_facilities))

ny_county_names = tigris::counties(state = "NY", cb = TRUE, year = 2023) %>%
  st_drop_geometry() %>%
  transmute(
    Facility.County.FIPS = GEOID,
    county_name = NAME
  )

fac_after_named = fac_after_check %>%
  left_join(ny_county_names, by = "Facility.County.FIPS") %>%
  arrange(desc(n_facilities))

fac_after_named %>% head(20)
##    Facility.County.FIPS n_facilities  county_name
## 1                 36061           21     New York
## 2                 36047           14        Kings
## 3                 36059           11       Nassau
## 4                 36103           11      Suffolk
## 5                 36119           11  Westchester
## 6                 36005           10        Bronx
## 7                 36029           10         Erie
## 8                 36081           10       Queens
## 9                 36055            6       Monroe
## 10                36063            6      Niagara
## 11                36067            6     Onondaga
## 12                36065            5       Oneida
## 13                36089            5 St. Lawrence
## 14                36001            4       Albany
## 15                36013            4   Chautauqua
## 16                36071            4       Orange
## 17                36007            3       Broome
## 18                36015            3      Chemung
## 19                36025            3     Delaware
## 20                36027            3     Dutchess
top = fac_after_named %>%
  slice(1:10)

ny_counties = tigris::counties(state = "NY", cb = TRUE, year = 2023) %>%
  st_as_sf() %>%
  transmute(
    Facility.County.FIPS = GEOID,
    county_name = NAME,
    geometry
  )

ny_map = ny_counties %>%
  left_join(top, by = c("Facility.County.FIPS", "county_name"))

fac_after_threshold = fac_after_named %>%
  filter(n_facilities >= 10)
ny_counties = tigris::counties(state = "NY", cb = TRUE, year = 2023) %>%
  st_as_sf() %>%
  transmute(
    Facility.County.FIPS = GEOID,
    county_name = NAME,
    geometry
  )

ny_map = ny_counties %>%
  left_join(fac_after_threshold, 
            by = c("Facility.County.FIPS", "county_name"))

highlight = ny_map %>%
  filter(!is.na(n_facilities))

nyc_li_bbox = st_bbox(c(
  xmin = -74.3,
  xmax = -72.5,
  ymin = 40.4,
  ymax = 41.4
), crs = st_crs(ny_map))

highlight_inside  = st_crop(highlight, nyc_li_bbox)
highlight_outside = highlight[!highlight$Facility.County.FIPS %in% 
                                  highlight_inside$Facility.County.FIPS, ]

centroid_inside  = st_centroid(highlight_inside)
centroid_outside = st_centroid(highlight_outside)

p_full = ggplot(ny_map) +
  geom_sf(fill = "grey90", color = "white") +
  geom_sf(data = highlight, fill = "#5DA5DA", color = "white") +
  geom_sf_text(
    data = centroid_outside,
    aes(label = county_name),
    size = 3,
    fontface = "bold"
  ) +
  coord_sf(expand = FALSE) +
  theme_minimal() +
  theme(
    axis.title = element_blank(),
    axis.text  = element_blank(),
    axis.ticks = element_blank(),
    panel.grid = element_blank()
  ) +
  labs(title = "Counties with at least 10 facilities")

nyc_li_zoom = st_crop(ny_map, nyc_li_bbox)

p_zoom = ggplot(nyc_li_zoom) +
  geom_sf(fill = "grey90", color = "white") +
  geom_sf(data = highlight_inside, fill = "#5DA5DA", color = "white") +
  geom_sf_text(
    data = centroid_inside,
    aes(label = county_name),
    size = 3,
    fontface = "bold"
  ) +
  coord_sf(expand = FALSE) +
  theme_minimal() +
  theme(
    axis.title = element_blank(),
    axis.text  = element_blank(),
    axis.ticks = element_blank(),
    panel.grid = element_blank()
  ) +
  labs(title = "NYC / Long Island Zoom")

p_full + p_zoom + plot_layout(widths = c(2.2, 1))