Tidy Tuesday David Robinson’s functions

Most common functions appearing in R scripts of David Robinson for #TidyTuesday using {funspotr}

#TidyTuesday
Author

Aditya Dahiya

Published

July 10, 2024

A packcircles diagram, showing top 100 functions used in the scripts. Area of each circle corresponds to how frequently a function appears in the scripts. Colour of circle refers to the package.

How I made this graphic?

Loading required libraries, data import & creating custom functions

Code
# Data Import and Wrangling Tools
library(tidyverse)            # All things tidy

# Final plot tools
library(scales)               # Nice Scales for ggplot2
library(fontawesome)          # Icons display in ggplot2
library(ggtext)               # Markdown text support for ggplot2
library(showtext)             # Display fonts in ggplot2
library(colorspace)           # Lighten and Darken colours
library(patchwork)            # Combining plots
library(igraph)               # Underlying node edges library
library(ggraph)               # Grpahics and ggplot2 with igraph
library(tidygraph)            # to manuipulate graph objects
library(packcircles)          # Circles graph

# Load data
drob_funs <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2024/2024-07-09/drob_funs.csv')

Data Wrangling: Inspiration - this article at R Graph gallery by Yan Holtz

Code
# Cleaned data on top 100 functions (only to display top 100)
df0 <- drob_funs |> 
  filter(!in_multiple_pkgs) |> 
  count(pkgs, funs, sort = T) |>
  # mutate(
  #   pkgs = case_when(
  #         pkgs == "ggplot" ~ "{ggplot}",
  #         pkgs == "glue" ~ "{glue}",
  #         pkgs == "tune" ~ "{tune}",
  #         .default = name
  #       )
  # )
  arrange(pkgs, funs) |> 
  mutate(
    pkgs = paste0("{", pkgs, "}")
  )

df1 <- df0 |> 
  slice_max(order_by = n, n = 100)

library(treemapify)

ggplot(
  data = df1,
  mapping = aes(
    area = n,
    fill = pkgs,
    subgroup = pkgs
  )
) +
  geom_treemap() +
  geom_treemap_text(
    aes(
      label = funs
    )
  ) +
  geom_treemap_subgroup_border()

# A hierarchical circular packing diagram

# Make an edges dataframe from outr data df1
edges1 <- df1 |> 
  filter(pkgs!= "{(unknown)}") |> 
  rename(
    from = pkgs,
    to = funs
  ) |> 
  select(-n) |> 
  arrange(from)

# Make a vertices dataframe
vertices1 <- df1 |> 
  filter(pkgs!= "{(unknown)}") |> 
  select(
    name = funs,
    n
  ) |> 
  # add rows for package sizes also by functions
  bind_rows(
    df1 |>
      group_by(pkgs) |> 
      summarize(n = sum(n)) |> 
      rename(name = pkgs)
  )

edges1
vertices1
plotdf <- graph_from_data_frame(
  d = edges1,
  vertices = vertices1,
  directed = TRUE
)

as_tbl_graph(
  nodes = vertices1,
  edges = edges1
)
ggraph(
  plotdf, 
  layout = "circlepack",
  weight = n
  ) +
  geom_node_circle(
    mapping = aes(
      fill = depth
    )
  ) +
  geom_node_text(
    mapping = aes(
      label = name,
      size = n
    )
  ) +
  coord_equal() + 
  theme_void() +
  theme(
    legend.position = "none"
  )

flare$edges |> as_tibble()
flare$vertices |> as_tibble()
Code
df1 <- drob_funs |> 
  filter(!in_multiple_pkgs) |> 
  count(pkgs, funs, sort = T) |> 
  arrange(pkgs, funs) |> 
  slice_max(order_by = n, n = 100) |> 
  mutate(
    fun_labs = paste0(funs, "<br>{", pkgs, "}"),
    id = row_number()
  )

library(packcircles)
# Create the layout using circleProgressiveLayout()
# This function returns a dataframe with a row for each bubble.
# It includes the center coordinates (x and y) and the radius, which is proportional to the value.
top_pkgs <- df1 |> 
  count(pkgs, sort = T) |> 
  slice_max(order_by = n, n = 4) |> 
  pull(pkgs)

packing1 <- circleProgressiveLayout(
  df1$n,
  sizetype = "area"
)

# A tibble of centres of the circles and our cleaned data
plotdf1 <- bind_cols(
  df1,
  packing1
)

# A tibble of the points on the circumference of the circles
plotdf_circle <- circleLayoutVertices(
  packing1,
  npoints = 100
  ) |> 
  as_tibble() |> 
  
  # Adding the 4 main packages for fill
  left_join(
    plotdf1 |> select(id, pkgs)
  ) |> 
  mutate(
    pkgs = if_else(
      pkgs %in% top_pkgs,
      pkgs,
      "Others"
    ),
    pkgs = fct(
      pkgs, 
      levels = c(
        top_pkgs, "Others"
      )
    )
  )

Visualization Parameters

Code
# Font for titles
font_add_google("Nova Mono",
  family = "title_font"
) 

# Font for the caption
font_add_google("Saira Extra Condensed",
  family = "caption_font"
) 

# Font for plot text
font_add_google("Wellfleet",
  family = "body_font"
) 

showtext_auto()

# Credits for coffeee palette
mypal <- paletteer::paletteer_d("palettesForR::Named")

bg_col <- "white"
text_col <-  "grey10"
text_hil <- "#004C4CFF"

bts <- 80

# Caption stuff for the plot
sysfonts::font_add(
  family = "Font Awesome 6 Brands",
  regular = here::here("docs", "Font Awesome 6 Brands-Regular-400.otf")
)
github <- "&#xf09b"
github_username <- "aditya-dahiya"
xtwitter <- "&#xe61b"
xtwitter_username <- "@adityadahiyaias"
social_caption_1 <- glue::glue("<span style='font-family:\"Font Awesome 6 Brands\";'>{github};</span> <span style='color: {text_hil}'>{github_username}  </span>")
social_caption_2 <- glue::glue("<span style='font-family:\"Font Awesome 6 Brands\";'>{xtwitter};</span> <span style='color: {text_hil}'>{xtwitter_username}</span>")

Annotation Text for the Plot

Code
plot_title <- "David Robinson's #TidyTuesday functions"

plot_subtitle <- str_wrap("Top 100 functions used in the R scripts. Area of circle denotes how frequently a function appears.", 50)
str_view(plot_subtitle)

plot_caption <- paste0(
  "**Data:** #TidyTuesday, David Robinson & {funspotr}", 
  " |  **Code:** ", 
  social_caption_1, 
  " |  **Graphics:** ", 
  social_caption_2
  )

rm(github, github_username, xtwitter, 
   xtwitter_username, social_caption_1, 
   social_caption_2)

The static plot:

Code
g <- ggplot() +
  geom_polygon(
    data = plotdf_circle,
    mapping = aes(
      x, 
      y, 
      group = id, 
      fill = pkgs
    ),
    colour = text_col,
    linewidth = 0.2,
    alpha = 0.9
  ) +
  # Add text at center of each bubble
  geom_text(
    data = plotdf1,
    mapping = aes(
      x, y, 
      label = paste0(funs, "()"), 
      size = n
    ),
    colour = text_col,
    family = "body_font"
  ) +
  # geom_textbox(
  #   data = plotdf1,
  #   mapping = aes(
  #     x, y, label = fun_labs, size = n
  #   ),
  #   colour = text_col,
  #   family = "body_font",
  #   fill = "transparent",
  #   box.colour = "transparent"
  # ) +
  scale_fill_manual(
    values = c(
      paletteer::paletteer_d("fishualize::Coryphaena_hippurus",
                             direction = -1)[1:4],
      "#cce0dd"
    )
  ) +
  scale_size_continuous(
    range = c(bts / 15, bts / 1.8)
  ) +
  scale_x_continuous(expand = expansion(0)) +
  scale_y_continuous(expand = expansion(0)) +
  labs(
    title = plot_title,
    subtitle = plot_subtitle,
    caption = plot_caption
  ) +
  guides(
    size = "none",
    fill = guide_legend(
      title = "Packages",
      override.aes = list(
        shape = 20
      )
    )
  ) +
  coord_equal() +
  theme_void(
    base_size = bts,
    base_family = "caption_font"
  ) +
  theme(
    legend.position = "bottom",
    plot.background = element_rect(
      fill = bg_col,
      colour= "transparent"
    ),
    panel.background = element_rect(
      fill = "transparent",
      colour = "transparent"
    ),
    plot.caption = element_textbox(
      colour = text_hil,
      family = "caption_font",
      hjust = 0.5
    ),
    plot.title = element_text(
      hjust = 0.5,
      size = 3 * bts,
      colour = text_hil,
      margin = margin(10,0,0,0, "mm")
    ),
    plot.subtitle = element_text(
      colour = text_hil,
      size = 2 * bts,
      hjust = 0.5, 
      lineheight = 0.3,
      margin = margin(10,0,10,0, "mm")
    ),
    legend.title = element_text(
      colour = text_hil,
      margin = margin(0,10,0,0, "mm"),
      hjust = 0.5,
      size = 1.5 * bts,
      family = "body_font"
    ),
    legend.text = element_text(
      colour = text_hil,
      margin = margin(0,0,0,5, "mm"),
      size = 1.5 * bts,
      family = "body_font"
    ),
    plot.margin = margin(10,10,10,10, "mm")
  )

Savings the graphics

Code
ggsave(
  filename = here::here("data_vizs", "tidy_drob.png"),
  plot = g,
  width = 400,    # Best Twitter Aspect Ratio = 5:4
  height = 500,   
  units = "mm",
  bg = bg_col
)


library(magick)
# Saving a thumbnail for the webpage
image_read(here::here("data_vizs", "tidy_drob.png")) |> 
  image_resize(geometry = "400") |> 
  image_write(here::here("data_vizs", "thumbnails", "tidy_drob.png"))