The data is pulled from my Google Scholar profile on the date specified above.
For more information, please refer to the scholar R
package. It allows users to extract citation data from Google Scholar,
compare multiple scholars, and even predict future h-index values. See
https://github.com/YuLab-SMU/scholar
for details.
library(scholar)
library(tidyverse)
library(knitr)
library(kableExtra)
library(tm)
library(wordcloud)
library(RColorBrewer)
library(dplyr)
library(purrr)
library(tidyr)
library(ggplot2)
library(stringr)
library(forcats)
id = "zpclPhcAAAAJ" # specify "your_google_scholar_id"
pubs = get_publications(id)
dat = pubs %>%
dplyr::select(author, title, year, cites, journal, number) %>%
mutate(journal = linebreak(journal)) %>%
arrange(-year) %>%
relocate(cites, .after = number) %>%
rename(Author = author, Title = title, Year = year,
Journal = journal, Number = number,
`# of Cites` = cites)
dat %>%
kable(format = "html") %>%
kable_styling(
bootstrap_options = c("striped", "hover", "condensed", "responsive"),
font_size = 12)
| Author | Title | Year | Journal | Number | # of Cites |
|---|---|---|---|---|---|
| E Cohn, H Anton-Culver, EW Karlson, Y Zhou, H Joffe, EJ Oh, J Fang, … | Leveraging the All of Us Research Program to Advance Women’s Health: Addressing Conditions that Affect Women Differently, Disproportionately, and Uniquely | 2026 | Journal of Women’s Health | 15409996261434111 | 0 |
| T Tham, P Giannaris, M Kokabee, A Kim, J Wilensky, C Tsang, BY Wang, … | A Novel Risk Stratification Tool for Sinonasal Inverted Papilloma Recurrence: Multi-Institutional Nomogram Incorporating Dysplasia Severity | 2026 | International forum of allergy & rhinology | 0 | |
| S Ahn, EJ Oh | A Network-Guided Penalized Regression with Application to Proteomics Data | 2026 | Bioinformatics Advances, vbag | g038 | 0 |
| EJ Siembida, B Greene, EJ Oh, KP Wiseman, M Basile, NP Vadhan, … | Tobacco and other substance co-use among adolescents and young adults with cancer who use tobacco: prevalence and associations with nicotine dependence and depression | 2025 | Family Medicine and Community Health | 13 (3), e003362 | 3 |
| AH Kim, T Tham, PS Giannaris, M Kokabee, J Wilensky, C Tsang, … | Sinonasal Inverted Papilloma and Clinical Significance of Dysplasia: A Multi‐Institutional Study | 2025 | The Laryngoscope | 1 | |
| T Tham, AH Kim, J Wilensky, C Tsang, PS Giannaris, BY Wang, K Panara, … | Frontal Sinus Inverted Papilloma: Surgical Challenges and Outcomes of a Multi‐Institutional Cohort | 2025 | The Laryngoscope | 1 | |
| EJ Oh, M Qian | Reluctant Transfer Learning in Penalized Regressions for Individualized Treatment Rules under Effect Heterogeneity | 2025 | arXiv preprint arXiv: | :2511.08559 | 0 |
| EJ Oh, S Ahn, T Tham, M Qian | Leveraging two-phase data for improved prediction of survival outcomes with application to nasopharyngeal cancer | 2025 | Biometrics | 81 (2), ujaf080 | 0 |
| M Basile, MA Diefenbach, B Greene, EJ Oh, EJ Siembida, NP Vadhan, … | Tobacco and Cannabis Co-Use among Adolescent and Young Adults With and Without a Cancer History | 2025 | Research Society on Marijuana and Society for Research on Nicotine and … | 0 | |
| EJ Oh, CM Alfano, FJ Esteva, PL Baron, W Xiong, BE Tortorella, EI Chen, … | Risk Stratification Using Tree-Based Models for Recurrence-Free Survival in Breast Cancer | 2025 | JCO Oncology Advances | 2, e2400011 | 0 |
| S Ahn, EJ Oh, MI Saleem, T Tham | Machine Learning Methods in Classification of Prolonged Radiation Therapy in Oropharyngeal Cancer: National Cancer Database | 2024 | Otolaryngology–Head and Neck Surgery | 171 (6), 1764-1772 | 5 |
| AW Liu, EJ Oh, ER Gazzara, GF Coppa, DK Deperalta, CLS Molmenti, … | Identifying Treatment Disparities in Curative-Intent Liver Resection Rates for Metastatic Colorectal Cancer in a Large Regional Healthcare System | 2024 | JOURNAL OF THE AMERICAN COLLEGE OF SURGEONS | 239 (5), S445-S445 | 0 |
| E Siembida, B Greene, EJ Oh, M Basile, KP Wiseman, MA Diefenbach, … | Substance co-use among adolescents and young adult tobacco users with and without a cancer history | 2023 | Annals of Behavioral Medicine | 57, S270-S270 | 0 |
| RD Gartrell, T Enzler, PS Kim, BT Fullerton, L Fazlollahi, AX Chen, … | Neoadjuvant chemoradiation alters the immune microenvironment in pancreatic ductal adenocarcinoma | 2022 | Oncoimmunology | 11 (1), 2066767 | 18 |
| JB Overdevest, AL Irace, V Mazzanti, EJ Oh, PV Joseph, DP Devanand, … | Chemosensory deficits are best predictor of serologic response among individuals infected with SARS-CoV-2 | 2022 | PLOS ONE | 17 (12), e0274611 | 6 |
| EJ Oh, M Qian, YK Cheung | Generalization error bounds of dynamic treatment regimes in penalized regression-based learning | 2022 | The Annals of Statistics | 50 (4), 2047-2071 | 4 |
| EJ Oh, RB Parikh, C Chivers, J Chen | Two-Stage Approaches to Accounting for Patient Heterogeneity in Machine Learning Risk Prediction Models in Oncology | 2021 | JCO Clinical Cancer Informatics | 5, 1015-1023 | 6 |
| D Zhu, A Wong, EJ Oh, S Ahn, M Wotman, T Sahai, D Bottalico, D Frank, … | Impact of Treatment Parameters on Racial Survival Differences in Oropharyngeal Cancer: National Cancer Database Study | 2021 | Otolaryngology–Head and Neck Surgery | 01945998211035056 | 3 |
| Y Toyoda, EJ Oh, ID Premaratne, C Chiuzan, CH Rohde | Affordable Care Act State-Specific Medicaid Expansion: Impact on Health Insurance Coverage and Breast Cancer Screening Rate | 2020 | Journal of the American College of Surgeons | 230 (5), 775-783 | 53 |
| EJ Oh, M Qian, K Cheung, DC Mohr | Building Health Application Recommender System Using Partially Penalized Regression | 2020 | Statistical Modeling in Biomedical Research | 105-123 | 3 |
| B Fullerton, R Gartrell, T Enzler, P Kim, L Fazlollahi, A Chen, S Perni, … | Neoadjuvant chemoradiotherapy enhances T cell infiltration in pancreatic ductal adenocarcinoma but high percentage of regulatory T cells associates with poor survival | 2020 | Journal for ImmunoTherapy of Cancer | 8 (Suppl 3) | 0 |
| EJ Oh | Optimal Treatment Regimes for Personalized Medicine and Mobile Health | 2020 | Columbia University | 0 | |
| M Wotman, EJ Oh, S Ahn, D Kraus, P Costantino, T Tham | HPV status in patients with nasopharyngeal carcinoma in the United States: a SEER database study | 2019 | American Journal of Otolaryngology | 40 (5), 705-710 | 36 |
| Y Toyoda, EJ Oh, C Chiuzan, CH Rohde | Affordable Care Act State-Specific Medicaid Expansion Increased Insurance Coverage and Breast Reconstruction Rates: A Difference-in-Difference Model | 2019 | Journal of the American College of Surgeons | 229 (4), S219 | 0 |
| Y Toyoda, EJ Oh, A Lin, C Chiuzan, CH Rohde | The Affordable Care Act State-Specific Medicaid Expansion Effect on Insurance Coverage and Breast Reconstruction Rates: A Difference-in-Difference Model Quasi-Experimental Study | 2019 | Plastic and Reconstructive Surgery–Global Open | 7 (4S), 55-56 | 0 |
| EP Petkova, J Beedasy, EJ Oh, JJ Sury, EM Sehnert, WY Tsai, MJ Reilly | Long-term Recovery From Hurricane Sandy: Evidence From a Survey in New York City | 2018 | Disaster medicine and public health preparedness | 12 (2), 172-175 | 16 |
| EJ Oh, H Lee | Dimension Reduction and Prediction for High-dimensional Regression Models Using the Graphical Lasso | 2013 | Journal of The Korean Data Analysis Society | 15 (5), 2321-2332 | 9 |
title = dat$Title
title = gsub("\\s*\\([^\\)]+\\)", "", title)
title = gsub("[0-9]+", "", title)
title_text = Corpus(VectorSource(title))
title_text_clean = tm_map(title_text, removePunctuation)
title_text_clean = tm_map(title_text_clean, content_transformer(tolower))
title_text_clean = tm_map(title_text_clean, removeNumbers)
title_text_clean = tm_map(title_text_clean, stripWhitespace)
title_text_clean = tm_map(title_text_clean, removeWords, stopwords("english"))
par(bg = "black")
cp = brewer.pal(7, "YlOrRd")
wordcloud(title_text_clean, scale = c(2, 1), min.freq = 15, colors = cp)
current_year = as.integer(format(Sys.Date(), "%Y"))
safe_cite_fetch = purrr::safely(function(article_id){
Sys.sleep(3) # to prevent hitting API rate limits (e.g., HTTP 429 too many requests)
get_article_cite_history(id, article_id)}
)
all_cites = map_dfr(pubs$pubid, ~{safe_cite_fetch(.x)$result})
pubs_summary = pubs %>%
select(pubid, title, paper_year = year, total_cites = cites)
all_cites = all_cites %>%
left_join(pubs_summary, by = "pubid")
all_cites_filled = all_cites %>%
group_by(pubid) %>%
tidyr::complete(year = seq(min(paper_year), current_year, 1)) %>%
fill(title, paper_year, total_cites, .direction = "downup") %>%
mutate(cites = replace_na(cites, 0)) %>%
arrange(year) %>%
ungroup()
all_cites_filled = all_cites_filled %>%
group_by(pubid) %>%
mutate(cum_cites = cumsum(cites)) %>%
ungroup()
top10_info = all_cites_filled %>%
group_by(pubid) %>%
summarise(total_cum_cites = max(cum_cites), .groups = "drop") %>%
arrange(desc(total_cum_cites)) %>%
slice_head(n = 10)
cites_plot_data = all_cites_filled %>%
filter(pubid %in% top10_info$pubid) %>%
left_join(top10_info, by = "pubid")
my_colors = brewer.pal(10, "Set3")
ggplot(cites_plot_data, aes(
x = factor(year),
y = cum_cites,
group = pubid,
color = fct_reorder(str_trunc(title, 30), total_cum_cites))
) +
geom_line(size = 1.2, alpha = 0.85) +
scale_color_manual(values = my_colors) +
labs(
title = "Top 10 Most Cited Papers Over Time",
x = "Year",
y = "Cumulative Citations",
color = "Paper"
) +
theme_classic()
compute_h_index = function(citations){
citations = sort(citations, decreasing = TRUE)
sum(citations >= seq_along(citations))
}
h_index_over_time = all_cites_filled %>%
group_by(year) %>%
summarise(h_index = compute_h_index(cum_cites))
print(h_index_over_time)
## # A tibble: 14 × 2
## year h_index
## <dbl> <int>
## 1 2013 0
## 2 2014 1
## 3 2015 1
## 4 2016 1
## 5 2017 1
## 6 2018 1
## 7 2019 2
## 8 2020 3
## 9 2021 4
## 10 2022 4
## 11 2023 5
## 12 2024 5
## 13 2025 6
## 14 2026 6
predicted_h_index = predict_h_index(id)
print(predicted_h_index)
## years_ahead h_index
## 1 0 6.000000
## 2 1 8.035165
## 3 2 9.803195
## 4 3 11.525748
## 5 4 12.997867
## 6 5 14.371529
## 7 6 16.243771
## 8 7 18.077424
## 9 8 20.465290
## 10 9 23.234240
## 11 10 25.677098
predicted = predicted_h_index %>%
mutate(year = max(h_index_over_time$year) + years_ahead)
current_year = max(h_index_over_time$year)
ggplot() +
geom_step(data = h_index_over_time, aes(x = year, y = h_index), direction = "hv", size = 0.8) +
geom_line(data = predicted, aes(x = year, y = h_index), color = "gray50", linetype = "dotted", size = 0.8) +
geom_vline(xintercept = current_year, color = "red", linetype = "solid") +
scale_x_continuous(breaks = sort(unique(c(h_index_over_time$year, predicted$year)))) +
scale_y_continuous(breaks = seq(0, max(c(h_index_over_time$h_index, predicted$h_index)), by = 5)) +
labs(
title = "Historical and Future Predicted h-index",
x = "Year",
y = "h-index"
) +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))