The data spans from 2018 (Q1/Q2/Q3/Q4) to 2023 (Q1/Q2/Q3/Q4).
library(dplyr)
library(tidyr)
library(ggplot2)
library(stringr)
library(sf)
library(tigris)
library(knitr)
library(patchwork)
options(tigris_use_cache = TRUE)
wd = getwd()
filename = paste0(wd, "/Hospital_Emergency_Department.csv")
dat = read.csv(filename)
str(dat)
## 'data.frame': 6255 obs. of 14 variables:
## $ Year : int 2018 2018 2018 2018 2018 2019 2019 2019 2019 2019 ...
## $ Quarter : chr "Q1" "Q2" "Q3" "Q4" ...
## $ Facility.ID : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Facility.Name : chr "Albany Medical Center Hospital" "Albany Medical Center Hospital" "Albany Medical Center Hospital" "Albany Medical Center Hospital" ...
## $ Facility.Type.Description : chr "Hospital" "Hospital" "Hospital" "Hospital" ...
## $ Facility.County.FIPS : int 36001 36001 36001 36001 36001 36001 36001 36001 36001 36001 ...
## $ Facility.County : chr "Albany" "Albany" "Albany" "Albany" ...
## $ Facility.Region : chr "Outside of New York City" "Outside of New York City" "Outside of New York City" "Outside of New York City" ...
## $ Operating.Certificate.Number : chr "0101000H" "0101000H" "0101000H" "0101000H" ...
## $ Operator.Name : chr "Albany Medical Center Hospital" "Albany Medical Center Hospital" "Albany Medical Center Hospital" "Albany Medical Center Hospital" ...
## $ Total.ED.Encounters : chr "19980" "19643" "21000" "21049" ...
## $ Treat.and.Release.ED.Encounters : chr "13672" "13532" "14556" "14605" ...
## $ ED.Encounters.Requiring.Ambulatory.Surgery: chr "158" "195" "221" "172" ...
## $ ED.Encounters.Admitted.as.Inpatient : chr "6150" "5916" "6223" "6272" ...
Treat.and.Release.ED.Encounters /
Total.ED.Encounters
ED.Encounters.Requiring.Ambulatory.Surgery /
Total.ED.Encounters
ED.Encounters.Admitted.as.Inpatient /
Total.ED.Encounters
dat = dat %>%
filter(Quarter != "CY") %>%
mutate(
Total.ED.Encounters = as.numeric(Total.ED.Encounters),
Treat.and.Release.ED.Encounters = as.numeric(Treat.and.Release.ED.Encounters),
ED.Encounters.Requiring.Ambulatory.Surgery = as.numeric(ED.Encounters.Requiring.Ambulatory.Surgery),
ED.Encounters.Admitted.as.Inpatient = as.numeric(ED.Encounters.Admitted.as.Inpatient)
) %>%
mutate(
prop_treat_release = Treat.and.Release.ED.Encounters / Total.ED.Encounters,
prop_ambulatory_surg = ED.Encounters.Requiring.Ambulatory.Surgery / Total.ED.Encounters,
prop_admitted = ED.Encounters.Admitted.as.Inpatient / Total.ED.Encounters
) %>%
mutate(
Quarter = factor(Quarter, levels = c("Q1", "Q2", "Q3", "Q4")),
time = paste(Year, Quarter),
time = factor(time, levels = unique(paste(Year, Quarter)[order(Year, Quarter)]))
)
We excluded facilities where prop_treat_release, prop_ambulatory_surg, or prop_admitted was equal to 1.0, as this indicated extreme or potentially implausible reporting patterns. All observations from these facilities were removed prior to analysis.
facilities_to_be_excluded = dat %>%
filter(prop_treat_release == 1 | prop_ambulatory_surg == 1 | prop_admitted == 1) %>%
distinct(Facility.ID)
dat_clean = dat %>%
anti_join(facilities_to_be_excluded, by = "Facility.ID")
fac_before = dat %>%
mutate(Facility.County.FIPS = str_pad(as.character(Facility.County.FIPS), 5, pad = "0")) %>%
distinct(Facility.County.FIPS, Facility.ID) %>%
count(Facility.County.FIPS, name = "n_facilities") %>%
mutate(Period = "Before exclusion")
fac_after = dat_clean %>%
mutate(Facility.County.FIPS = str_pad(as.character(Facility.County.FIPS), 5, pad = "0")) %>%
distinct(Facility.County.FIPS, Facility.ID) %>%
count(Facility.County.FIPS, name = "n_facilities") %>%
mutate(Period = "After exclusion")
fac_long = bind_rows(fac_before, fac_after)
ny_counties = tigris::counties(state = "NY", cb = TRUE, year = 2023) %>%
st_as_sf() %>%
transmute(
Facility.County.FIPS = GEOID,
geometry
)
map_data = expand_grid(
Facility.County.FIPS = ny_counties$Facility.County.FIPS,
Period = unique(fac_long$Period)
) %>%
left_join(fac_long, by = c("Facility.County.FIPS", "Period")) %>%
left_join(ny_counties, by = "Facility.County.FIPS") %>%
st_as_sf() %>%
mutate(
n_facilities = ifelse(n_facilities == 0, NA, n_facilities)
)
p_before = ggplot(filter(map_data, Period == "Before exclusion")) +
geom_sf(aes(fill = n_facilities), color = "white", linewidth = 0.2) +
scale_fill_viridis_c(option = "C", na.value = "grey85") +
coord_sf(expand = FALSE) +
labs(title = "Before exclusion", fill = "Facilities") +
theme_minimal() +
theme(
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
panel.grid = element_blank()
)
p_after = ggplot(filter(map_data, Period == "After exclusion")) +
geom_sf(aes(fill = n_facilities), color = "white", linewidth = 0.2) +
scale_fill_viridis_c(option = "C", na.value = "grey85") +
coord_sf(expand = FALSE) +
labs(title = "After exclusion", fill = "Facilities") +
theme_minimal() +
theme(
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
panel.grid = element_blank()
)
fac_before_delta = fac_before %>%
select(Facility.County.FIPS, n_before = n_facilities)
fac_after_delta = fac_after %>%
select(Facility.County.FIPS, n_after = n_facilities)
county_delta = full_join(fac_before_delta, fac_after_delta, by = "Facility.County.FIPS") %>%
mutate(
n_before = replace_na(n_before, 0L),
n_after = replace_na(n_after, 0L),
delta = n_after - n_before,
delta_cat = factor(delta, levels = c(0, -1, -2))
)
delta_map = county_delta %>%
left_join(ny_counties, by = "Facility.County.FIPS") %>%
st_as_sf()
table(delta_map$delta)
##
## -2 -1 0
## 3 11 43
p_delta = ggplot(delta_map) +
geom_sf(aes(fill = delta_cat), color = "white", linewidth = 0.2) +
scale_fill_manual(
values = c(
"0" = "grey80",
"-1" = "#5DA5DA",
"-2" = "#1B3C73"
),
name = "After - Before"
) +
coord_sf(expand = FALSE) +
labs(title = "Change in number of facilities per county (NY)") +
theme_minimal() +
theme(
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
panel.grid = element_blank()
)
(p_before | p_after) /
p_delta +
plot_layout(heights = c(1, 1))
summary_table = tibble(
Metric = c("Unique Facilities", "Unique Counties"),
Before_Exclusion = c(
n_distinct(dat$Facility.ID),
n_distinct(dat$Facility.County.FIPS)
),
After_Exclusion = c(
n_distinct(dat_clean$Facility.ID),
n_distinct(dat_clean$Facility.County.FIPS)
)
)
kable(summary_table, align = "lcc", caption = "Data Summary Before and After Exclusion")
| Metric | Before_Exclusion | After_Exclusion |
|---|---|---|
| Unique Facilities | 225 | 208 |
| Unique Counties | 57 | 57 |
dat_long = dat_clean %>%
select(
Facility.ID,
Facility.Region,
Facility.County.FIPS,
time,
prop_treat_release,
prop_ambulatory_surg,
prop_admitted
) %>%
pivot_longer(
cols = starts_with("prop_"),
names_to = "measure",
values_to = "value"
)
ggplot(
dat_long,
aes(
x = time,
y = value,
group = Facility.ID,
color = Facility.ID
)
) +
geom_line(alpha = 0.3) +
facet_wrap(~measure, scales = "free_y", ncol = 1) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none"
) +
labs(
x = "Quarter",
y = "Proportion",
title = "Longitudinal Trajectory Plot of ED Encounter Proportions by Facility"
)
ggplot(
dat_long,
aes(
x = time,
y = value,
group = Facility.ID,
color = Facility.Region
)
) +
geom_line(alpha = 0.25) +
stat_summary(
aes(group = Facility.Region),
fun = mean,
geom = "line",
linewidth = 1.3
) +
facet_wrap(~measure, scales = "free_y", ncol = 1) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1)
) +
labs(
x = "Quarter",
y = "Proportion",
title = "Longitudinal Trajectory Plot of ED Encounter Proportions by Facility",
color = "Facility Region"
)
fac_after_check = dat_clean %>%
mutate(Facility.County.FIPS = str_pad(as.character(Facility.County.FIPS), 5, pad = "0")) %>%
distinct(Facility.County.FIPS, Facility.ID) %>%
count(Facility.County.FIPS, name = "n_facilities") %>%
arrange(desc(n_facilities))
ny_county_names = tigris::counties(state = "NY", cb = TRUE, year = 2023) %>%
st_drop_geometry() %>%
transmute(
Facility.County.FIPS = GEOID,
county_name = NAME
)
fac_after_named = fac_after_check %>%
left_join(ny_county_names, by = "Facility.County.FIPS") %>%
arrange(desc(n_facilities))
fac_after_named %>% head(20)
## Facility.County.FIPS n_facilities county_name
## 1 36061 21 New York
## 2 36047 14 Kings
## 3 36059 11 Nassau
## 4 36103 11 Suffolk
## 5 36119 11 Westchester
## 6 36005 10 Bronx
## 7 36029 10 Erie
## 8 36081 10 Queens
## 9 36055 6 Monroe
## 10 36063 6 Niagara
## 11 36067 6 Onondaga
## 12 36065 5 Oneida
## 13 36089 5 St. Lawrence
## 14 36001 4 Albany
## 15 36013 4 Chautauqua
## 16 36071 4 Orange
## 17 36007 3 Broome
## 18 36015 3 Chemung
## 19 36025 3 Delaware
## 20 36027 3 Dutchess
top = fac_after_named %>%
slice(1:10)
ny_counties = tigris::counties(state = "NY", cb = TRUE, year = 2023) %>%
st_as_sf() %>%
transmute(
Facility.County.FIPS = GEOID,
county_name = NAME,
geometry
)
ny_map = ny_counties %>%
left_join(top, by = c("Facility.County.FIPS", "county_name"))
fac_after_threshold = fac_after_named %>%
filter(n_facilities >= 10)
ny_counties = tigris::counties(state = "NY", cb = TRUE, year = 2023) %>%
st_as_sf() %>%
transmute(
Facility.County.FIPS = GEOID,
county_name = NAME,
geometry
)
ny_map = ny_counties %>%
left_join(fac_after_threshold,
by = c("Facility.County.FIPS", "county_name"))
highlight = ny_map %>%
filter(!is.na(n_facilities))
nyc_li_bbox = st_bbox(c(
xmin = -74.3,
xmax = -72.5,
ymin = 40.4,
ymax = 41.4
), crs = st_crs(ny_map))
highlight_inside = st_crop(highlight, nyc_li_bbox)
highlight_outside = highlight[!highlight$Facility.County.FIPS %in%
highlight_inside$Facility.County.FIPS, ]
centroid_inside = st_centroid(highlight_inside)
centroid_outside = st_centroid(highlight_outside)
p_full = ggplot(ny_map) +
geom_sf(fill = "grey90", color = "white") +
geom_sf(data = highlight, fill = "#5DA5DA", color = "white") +
geom_sf_text(
data = centroid_outside,
aes(label = county_name),
size = 3,
fontface = "bold"
) +
coord_sf(expand = FALSE) +
theme_minimal() +
theme(
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
panel.grid = element_blank()
) +
labs(title = "Counties with at least 10 facilities")
nyc_li_zoom = st_crop(ny_map, nyc_li_bbox)
p_zoom = ggplot(nyc_li_zoom) +
geom_sf(fill = "grey90", color = "white") +
geom_sf(data = highlight_inside, fill = "#5DA5DA", color = "white") +
geom_sf_text(
data = centroid_inside,
aes(label = county_name),
size = 3,
fontface = "bold"
) +
coord_sf(expand = FALSE) +
theme_minimal() +
theme(
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
panel.grid = element_blank()
) +
labs(title = "NYC / Long Island Zoom")
p_full + p_zoom + plot_layout(widths = c(2.2, 1))