D Cheatsheets and Quick Reference

D.1 openalexR quick reference

library(openalexR)

# Fetch works by journal (source)
works <- oa_fetch(
  entity = "works",
  primary_location.source.id = "S148561398",
  from_publication_date = "2020-01-01",
  to_publication_date = "2023-12-31",
  type = "article"
)

# Fetch works by institution
works <- oa_fetch(
  entity = "works",
  authorships.institutions.id = "I63966007",
  from_publication_date = "2022-01-01",
  type = "article"
)

# Fetch works by topic
works <- oa_fetch(
  entity = "works",
  topics.id = "T10102",
  from_publication_date = "2023-01-01"
)

# Fetch author profile
author <- oa_fetch(entity = "authors", id = "A5023888391")

# Fetch institution profile
inst <- oa_fetch(entity = "institutions", id = "I63966007")

# Sampling
works <- oa_fetch(
  entity = "works",
  primary_location.source.id = "S148561398",
  options = list(sample = 500, seed = 42)
)

D.2 quanteda pipeline

library(quanteda)

# Full text-analysis pipeline
corp <- corpus(df, docid_field = "id", text_field = "text")
toks <- tokens(corp, remove_punct = TRUE, remove_numbers = TRUE) |>
  tokens_tolower() |>
  tokens_remove(stopwords("en"))
dfmat <- dfm(toks) |>
  dfm_trim(min_termfreq = 5, min_docfreq = 3)

# TF-IDF weighting
dfmat_tfidf <- dfm_tfidf(dfmat)

# Top features
topfeatures(dfmat, 20)

# Keyword-in-context
kwic(toks, pattern = "open access", window = 5)

D.3 igraph / tidygraph / ggraph

library(igraph)
library(tidygraph)
library(ggraph)

# Build graph from edge list
g <- graph_from_data_frame(edges, directed = FALSE, vertices = nodes)

# Tidygraph conversion
tg <- as_tbl_graph(g) |>
  activate(nodes) |>
  mutate(
    degree = centrality_degree(),
    between = centrality_betweenness(),
    community = group_leiden(resolution = 1.0)
  )

# ggraph visualisation
ggraph(tg, layout = "fr") +
  geom_edge_link(aes(alpha = weight), show.legend = FALSE) +
  geom_node_point(aes(size = degree, colour = factor(community))) +
  geom_node_text(aes(label = name), repel = TRUE, size = 3) +
  theme_void()

D.4 Common data wrangling patterns

library(tidyverse)

# Unnest OpenAlex authorships
authors <- works |>
  select(work_id = id, authorships) |>
  unnest(authorships, names_sep = "_")

# Unnest OpenAlex topics
topics <- works |>
  select(work_id = id, topics) |>
  unnest(topics, names_sep = "_")

# Compute h-index
compute_h_index <- function(citations) {
  citations <- sort(citations, decreasing = TRUE)
  sum(citations >= seq_along(citations))
}

# Field normalisation
works |>
  group_by(field, year) |>
  mutate(
    field_mean = mean(cited_by_count),
    mncs = cited_by_count / pmax(field_mean, 1)
  )

# Fractional counting
works |>
  unnest(authorships, names_sep = "_") |>
  group_by(id) |>
  mutate(frac = 1 / n()) |>
  ungroup()

D.5 Companion package functions

The scientometricsInR package provides helper functions used throughout the book:

Function	Purpose	Example
`fetch_openalex(...)`	Cached, rate-limited OpenAlex queries	`fetch_openalex(entity = "works", ...)`
`dedupe_by_doi(df)`	Remove duplicate records by DOI	`works \|> dedupe_by_doi()`
`compute_h_index(citations)`	Compute h-index from citation vector	`compute_h_index(c(10, 8, 5, 3, 1))` → 3
`field_normalize(cites, means)`	MNCS normalisation (vector)	`field_normalize(c(20, 5), c(10, 10))` → `c(2.0, 0.5)`
`compute_mncs(df)`	MNCS for a grouped data frame	`df \|> compute_mncs()`
`build_coauth_graph(works)`	Co-authorship igraph from works	`build_coauth_graph(works)`
`kleinberg_bursts(kw, dates)`	Keyword burst detection	`kleinberg_bursts(keywords, dates)`
`palette_sci(n)`	Viridis colour palette	`palette_sci(4)`
`theme_sci()`	Minimal ggplot2 theme	`+ theme_sci()`

D.6 Useful keyboard shortcuts

Action	RStudio	VS Code
Run current line/selection	Ctrl+Enter	Ctrl+Enter
Run current chunk	Ctrl+Shift+Enter	Ctrl+Shift+Enter
Insert pipe `\|>`	Ctrl+Shift+M	—
Insert assignment `<-`	Alt+-	—
Render document	Ctrl+Shift+K	—
Go to file	Ctrl+.	Ctrl+P

This book was built by the bookdown R package.

C Indicator Crosswalk Across Databases

E Exercise Solutions