The data were pulled from my Google Scholar profile at the end of March 2026.
For more information, please refer to the scholar R
package. It allows users to extract citation data from Google Scholar,
compare multiple scholars, and even predict future h-index values. See
https://github.com/YuLab-SMU/scholar
for details.
library(scholar)
library(tidyverse)
library(knitr)
library(kableExtra)
library(tm)
library(wordcloud)
library(RColorBrewer)
library(dplyr)
library(purrr)
library(tidyr)
library(ggplot2)
library(stringr)
library(forcats)
id = "zpclPhcAAAAJ" # specify "your_google_scholar_id"
pubs = get_publications(id)
dat = pubs %>%
dplyr::select(author, title, year, cites, journal, number) %>%
mutate(journal = linebreak(journal)) %>%
arrange(-year) %>%
relocate(cites, .after = number) %>%
rename(Author = author, Title = title, Year = year,
Journal = journal, Number = number,
`# of Cites` = cites)
dat %>%
kable(format = "html") %>%
kable_styling(
bootstrap_options = c("striped", "hover", "condensed", "responsive"),
font_size = 12)
title = dat$Title
title = gsub("\\s*\\([^\\)]+\\)", "", title)
title = gsub("[0-9]+", "", title)
title_text = Corpus(VectorSource(title))
title_text_clean = tm_map(title_text, removePunctuation)
title_text_clean = tm_map(title_text_clean, content_transformer(tolower))
title_text_clean = tm_map(title_text_clean, removeNumbers)
title_text_clean = tm_map(title_text_clean, stripWhitespace)
title_text_clean = tm_map(title_text_clean, removeWords, stopwords("english"))
par(bg = "black")
cp = brewer.pal(7, "YlOrRd")
wordcloud(title_text_clean, scale = c(2, 1), min.freq = 15, colors = cp)
current_year = as.integer(format(Sys.Date(), "%Y"))
safe_cite_fetch = purrr::safely(function(article_id){
Sys.sleep(runif(1, 2, 4))
get_article_cite_history(id, article_id)
}
)
all_cites = map_dfr(pubs$pubid, ~{safe_cite_fetch(.x)$result})
pubs_summary = pubs %>%
select(pubid, title, paper_year = year, total_cites = cites)
all_cites = all_cites %>%
left_join(pubs_summary, by = "pubid")
all_cites_filled = all_cites %>%
group_by(pubid) %>%
tidyr::complete(year = seq(min(paper_year), current_year, 1)) %>%
fill(title, paper_year, total_cites, .direction = "downup") %>%
mutate(cites = replace_na(cites, 0)) %>%
arrange(year) %>%
ungroup()
all_cites_filled = all_cites_filled %>%
group_by(pubid) %>%
mutate(cum_cites = cumsum(cites)) %>%
ungroup()
top10_info = all_cites_filled %>%
group_by(pubid) %>%
summarise(total_cum_cites = max(cum_cites), .groups = "drop") %>%
arrange(desc(total_cum_cites)) %>%
slice_head(n = 10)
cites_plot_data = all_cites_filled %>%
filter(pubid %in% top10_info$pubid) %>%
left_join(top10_info, by = "pubid")
my_colors = brewer.pal(10, "Set3")
ggplot(cites_plot_data, aes(
x = factor(year),
y = cum_cites,
group = pubid,
color = fct_reorder(str_trunc(title, 30), total_cum_cites))
) +
geom_line(size = 1.2, alpha = 0.85) +
scale_color_manual(values = my_colors) +
labs(
title = "Top 10 Most Cited Papers Over Time",
x = "Year",
y = "Cumulative Citations",
color = "Paper"
) +
theme_bw() +
theme(panel.border = element_rect(color = "black", fill = NA, size = 1))
citations = get_citation_history(id)
ggplot(citations, aes(
x = factor(year),
y = h_index)
) +
geom_line(color = "blue", size = 1.2) +
geom_point(color = "red") +
labs(
title = "h-index Over Time",
x = "Year",
y = "h-index"
) +
theme_minimal()
predict_h_index(id)