tidy_africa_languages

tidy_africa_languages

#TidyTuesday
Author

Aditya Dahiya

Published

January 24, 2026

About the Data

………………..

Figure 1: ……………….

How I Made This Graphic

Loading required libraries

Code
pacman::p_load(
  tidyverse, # All things tidy

  scales, # Nice Scales for ggplot2
  fontawesome, # Icons display in ggplot2
  ggtext, # Markdown text support for ggplot2
  showtext, # Display fonts in ggplot2
  colorspace, # Lighten and Darken colours
  sf, # Spatial Features

  patchwork,  # Composing Plots
  packcircles, # for hierarchichal packing circles
  colorspace, # Modify and play with colours, extract dominant colours
  magick  # Playing with images
)


africa <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2026/2026-01-13/africa.csv')

Visualization Parameters

Code
# Font for titles
font_add_google("Saira",
  family = "title_font"
)

# Font for the caption
font_add_google("Saira Condensed",
  family = "body_font"
)

# Font for plot text
font_add_google("Saira Extra Condensed",
  family = "caption_font"
)

showtext_auto()

# A base Colour
bg_col <- "grey95"
seecolor::print_color(bg_col)

# Colour for highlighted text
text_hil <- "grey20"
seecolor::print_color(text_hil)

# Colour for the text
text_col <- "grey10"
seecolor::print_color(text_col)

# Define Base Text Size
bts <- 120

# Caption stuff for the plot
sysfonts::font_add(
  family = "Font Awesome 6 Brands",
  regular = here::here("docs", "Font Awesome 6 Brands-Regular-400.otf")
)
github <- "&#xf09b"
github_username <- "aditya-dahiya"
xtwitter <- "&#xe61b"
xtwitter_username <- "@adityadahiyaias"
social_caption_1 <- glue::glue("<span style='font-family:\"Font Awesome 6 Brands\";'>{github};</span> <span style='color: {text_hil}'>{github_username}  </span>")
social_caption_2 <- glue::glue("<span style='font-family:\"Font Awesome 6 Brands\";'>{xtwitter};</span> <span style='color: {text_hil}'>{xtwitter_username}</span>")
plot_caption <- paste0(
  "**Data:**  Wikipedia",
  " |  **Code:** ",
  social_caption_1,
  " |  **Graphics:** ",
  social_caption_2
)
rm(
  github, github_username, xtwitter,
  xtwitter_username, social_caption_1,
  social_caption_2
)

plot_title <- "tidy_africa_languages"

plot_subtitle <- "tidy_africa_languages" |> 
  str_wrap(110)

Exploratory Data Analysis and Wrangling

Code
africa

# Load required libraries
library(tidyverse)
library(rnaturalearth)
library(sf)
library(ggwordcloud)

# Step 1: Data wrangling
africa_clean <- africa |> 
  # Remove rows with NA values in key columns
  filter(!is.na(country), !is.na(family), !is.na(native_speakers)) |> 
  # Group by language and family to get total speakers per language
  group_by(language, family) |> 
  summarise(native_speakers = sum(native_speakers), .groups = "drop") |> 
  # Create factor of top 5 families and others
  mutate(family_grouped = fct_lump_n(family, n = 5, w = native_speakers)) |> 
  # Reorder factor for better color assignment
  mutate(family_grouped = fct_reorder(family_grouped, native_speakers, .fun = sum, .desc = TRUE))

world <- ne_countries(scale = "medium", returnclass = "sf")


# We also group the geometries to get the single outer boundary of the continent
africa_map <- world |> 
  filter(continent == "Africa") |> 
  st_union() # Merges individual country polygons into one large continent shape

# Get bounding box coordinates for Africa shape
africa_bbox <- st_bbox(africa_map)

# Extract points from Africa polygon to create mask
africa_points <- st_sample(africa_map, size = 10000) |> 
  st_coordinates() |> 
  as_tibble() |> 
  rename(x = X, y = Y)

Create a png mask

Code
# install.packages(c("sf","terra","png","ggplot2","ggwordcloud","rnaturalearth"))
library(sf)
library(terra)
library(png)
library(ggplot2)
library(ggwordcloud)
library(rnaturalearth)

# 1. get Africa as single polygon (or use your own sf)
af <- ne_countries(
  continent = "Africa", 
  returnclass = "sf"
  ) |> 
  st_transform("EPSG:3857") |> 
  st_simplify(dTolerance = 10000) |> 
  st_union() |> # single polygon
  st_transform("EPSG:4326")

# 2. make a raster that covers the polygon (choose resolution)
bbox <- st_bbox(af)
ncol <- 800; nrow <- 800
r <- rast(xmin=bbox["xmin"], xmax=bbox["xmax"],
          ymin=bbox["ymin"], ymax=bbox["ymax"],
          ncols = ncol, nrows = nrow, crs = st_crs(af)$proj4string)

# 3. rasterize polygon: value 1 inside Africa, 0 outside
r_mask <- rasterize(vect(af), r, field=1, background=0)

# 4. convert to PNG-friendly array: black pixels where mask==1, transparent elsewhere
m <- as.matrix(r_mask)
# PNG arrays are [rows,cols,channels] with values 0..1; flip rows so y-direction is correct
m <- m[nrow(m):1, , drop=FALSE]
img <- array(0, dim = c(nrow(m), ncol(m), 4))  # RGBA initially zeros (transparent)
img[,,1] <- 0  # R
img[,,2] <- 0  # G
img[,,3] <- 0  # B
img[,,4] <- m  # alpha = 1 inside Africa, 0 outside

# 5. write/read (ggwordcloud expects an image/array like png::readPNG output)
png::writePNG(img, "africa_mask.png")
mask_png <- png::readPNG("africa_mask.png")

The Plot

Code
# Define Base Text Size for the plot
bts <- 90

# Step 3: Create the word cloud constrained to the Africa shape
g <- africa_clean |> 
  ggplot() +
  # We use the clean data and map labels to the geometry
  geom_text_wordcloud_area(
    mapping = aes(
      label = language, 
      size = native_speakers,
      color = family_grouped
    ),
    # This is key: 'rm_outside' removes words that don't fit the specified bounds
    # Using 'shape = "diamond"' or custom masks can mimic the verticality of Africa
    rm_outside = TRUE, 
    area_corr = TRUE,
    eccentricity = 0.65, # Adjusting eccentricity helps fit the vertical shape of Africa
    seed = 42
  ) +
  # Set size range for words to ensure they fill the space well
  scale_size_area(max_size = 24) + 
  # Use a color palette for language families 
  scale_color_brewer(palette = "Set2") +
  # Theme and labels [cite: 9]
  labs(
    title = "Languages of Africa",
    subtitle = "Word size represents native speakers; shape mimics the mainland continent",
    caption = plot_caption
  ) +
  theme_void(
    base_family = "body_font",
    base_size = bts
  ) +
  theme(
    legend.position = "none",
    plot.title = element_text(
      margin = margin(10, 0, 2, 0, "mm"),
      hjust = 0.5,
      size = 1.5 * bts,
      face = "bold",
      colour = text_hil
    ),
    plot.subtitle = element_text(
      margin = margin(0, 0, 5, 0, "mm"),
      hjust = 0.5,
      size = 0.6 * bts,
      colour = text_hil
    ),
    plot.caption = element_textbox(
      hjust = 0.5,
      family = "caption_font",
      size = bts * 0.5,
      colour = text_hil
    ),
    plot.background = element_rect(fill = bg_col, color = NA)
  )

# Save the plot
ggsave(
  filename = here::here(
    "data_vizs",
    "tidy_africa_languages.png"
  ),
  plot = g,
  width = 400,
  height = 500,
  units = "mm",
  bg = bg_col
)

unlink("africa_mask.png")

Savings the thumbnail for the webpage

Code
# Saving a thumbnail

library(magick)

# Saving a thumbnail for the webpage
image_read(
  here::here(
    "data_vizs",
    "tidy_africa_languages.png"
    )
  ) |>
  image_resize(geometry = "x400") |>
  image_write(
    here::here(
      "data_vizs",
      "thumbnails",
      "tidy_africa_languages.png"
    )
  )

Session Info

Code
pacman::p_load(
  tidyverse, # All things tidy

  scales, # Nice Scales for ggplot2
  fontawesome, # Icons display in ggplot2
  ggtext, # Markdown text support for ggplot2
  showtext, # Display fonts in ggplot2
  colorspace, # Lighten and Darken colours
  sf, # Spatial Features

  patchwork,  # Composing Plots
  packcircles # for hierarchichal packing circles
)

sessioninfo::session_info()$packages |>
  as_tibble() |>
  
  # The attached column is TRUE for packages that were 
  # explicitly loaded with library()
  dplyr::filter(attached == TRUE) |>
  dplyr::select(package,
    version = loadedversion,
    date, source
  ) |>
  dplyr::arrange(package) |>
  janitor::clean_names(
    case = "title"
  ) |>
  gt::gt() |>
  gt::opt_interactive(
    use_search = TRUE
  ) |>
  gtExtras::gt_theme_espn()
Table 1: R Packages and their versions used in the creation of this page and graphics