Statistics

Timeline

Code
df_time <- df %>%
  dplyr::count(year) %>%
  dplyr::arrange(year)

plot_ly(
  data = df_time,
  x = ~year,
  y = ~n,
  type = "scatter",
  mode = "lines+markers"
) %>%
  layout(
    xaxis = list(title = "Year"),
    yaxis = list(title = "Number of Publications")
  )

Number of papers over time

Journals

Code
df %>%
  dplyr::mutate(
    outlet = dplyr::coalesce(
      dplyr::na_if(journal, ""),
      dplyr::na_if(booktitle, "")
    )
  ) %>%
  dplyr::count(outlet, sort = TRUE) %>%
  DT::datatable(
    extensions = "Buttons",
    options = list(
      pageLength = 10,
      dom = "Bfrtip",
      buttons = list(
        list(extend = "copy", text = "Copy"),
        list(extend = "csv",  text = "Download CSV", filename = "outlet_frequencies"),
        list(extend = "excel", text = "Download Excel", filename = "outlet_frequencies")
      )
    ),
    rownames = FALSE
  )

Review types

Code
df_lr_type <- df %>%
  dplyr::count(lr_type_pare_et_al, sort = TRUE)

plot_ly(
  data = df_lr_type,
  x = ~lr_type_pare_et_al,
  y = ~n,
  type = "bar"
) %>%
  layout(
    xaxis = list(title = "Literature review type"),
    yaxis = list(title = "Count")
  )

Distribution of literature review types (Paré et al.)

Authors

Code
df_authors <- df %>%
  dplyr::filter(!is.na(author), author != "") %>%
  tidyr::separate_rows(author, sep = " and ") %>%  # split "A and B" into separate rows
  dplyr::mutate(author = stringr::str_trim(author)) %>%
  dplyr::count(author, sort = TRUE) %>%
  dplyr::slice_head(n = 50) %>%
  dplyr::mutate(author = stats::reorder(author, n))

plot_ly(
  data = df_authors,
  x = ~n,
  y = ~author,
  type = "bar",
  orientation = "h"
) %>%
  layout(
    xaxis = list(title = "Number of Publications"),
    yaxis = list(title = "Author"),
    margin = list(l = 200)  # more room for author names
  )

Most productive authors (by number of publications in the dataset)

Top cited (data: crossref)

Code
df %>%
  # keep only those with a citation count
  dplyr::filter(!is.na(cited_by)) %>%
  # ensure numeric in case it's stored as character
  dplyr::mutate(cited_by = as.numeric(cited_by)) %>%
  dplyr::arrange(dplyr::desc(cited_by)) %>%
  dplyr::slice_head(n = 30) %>%        # change n for more/less papers
  dplyr::select(
    cited_by,
    year,
    author,
    title,
    journal,
    doi
  ) %>%
  DT::datatable(
    options = list(pageLength = 10),
    rownames = FALSE
  )