Comparing Fertility Rates in South Asia

Comparing rates and timings of change of fertility amongst five South-Asian countries.

A4 Size Viz
Our World in Data
Public Health
{geomtextpath}
Author

Aditya Dahiya

Published

July 15, 2024

The data used in this graphic is sourced from the United Nations’ World Population Prospects (2022) and processed by Our World in Data. It encompasses the Total Fertility Rate (TFR) for the top 10 most populous countries from the 1950s to 2021. TFR is the average number of children a woman is expected to have during her lifetime.

This line graph shows that the TFR fell at different times in the five South-Asian Nations. Pakistan and Afghanistan continue to have high fertility rates. India, Bangladesh and Sri Lanka have achieved near-replacement levels of fertility. TFR in Sri Lanka and Bangladesh fell early and rapidly, while India’s TFR has declined slowly yet steadily.

Figure 1: A line graph on fertility rates from 1950 to 2021 in five South Asian Nations. Using geom_textline(), the names of the countries have been inserted along the lines.

How I made this graphic?

Getting the data

Code
# Data Import and Wrangling Tools
library(tidyverse)            # All things tidy
library(owidR)                # Get data from Our World in R

# Final plot tools
library(scales)               # Nice Scales for ggplot2
library(fontawesome)          # Icons display in ggplot2
library(ggtext)               # Markdown text support
library(showtext)             # Display fonts in ggplot2
library(colorspace)           # To lighten and darken colours
library(patchwork)            # Combining plots

# ggbump package for Bump-Charts and Sigmoid lines
# install.packages("ggbump")
library(geomtextpath)         # Text on the path 

# Data on fertility rates
# search1 <- owidR::owid_search("fertility")

df1 <- owid("children-per-woman")

Visualization Parameters

Code
# Font for titles
font_add_google("News Cycle",
  family = "title_font"
) 

# Font for the caption
font_add_google("Saira Extra Condensed",
  family = "caption_font"
) 

# Font for plot text
font_add_google("Roboto Condensed",
  family = "body_font"
) 

# Font for Country Names
font_add_google(
  "Kanit",
  family = "cn_font"
)

showtext_auto()

# Colour Palette
# mypal1 <- paletteer::paletteer_d("waRhol::marilyn_orange_62")
mypal2 <- paletteer::paletteer_d("MoMAColors::ustwo")
# mypal3 <- c("#FF7F00FF", "#577F3FFF", "#D2848DFF", 
#             "#D7433BFF", "#677E8EFF", "#5FA1F7FF")
mypal3 <- c("#ff9500", "darkgreen", "grey30", "blue", "red")
# mypal3 <- mypal1[c(2, 5, 1, 4, 6)]

# Background Colour
bg_col <- "white"
text_col <- "grey10"
text_hil <- "grey25"

# Base Text Size
bts <- 80

plot_title <- "Fertility Rates in South-Asian Nations"

plot_subtitle <- glue::glue("Total Fertility Rate (TFR) is the average number of children a woman is expected to have during her lifetime.<br>TFR decline in these South-Asian nations happened at different times. <b style='color:{mypal3[1]}'>India</b>, <b style='color:{mypal3[5]}'>Bangladesh</b> and <b style='color:{mypal3[4]}'>Sri Lanka</b><br>have reached near-replacement levels. <b style='color:{mypal3[2]}'>Pakistan</b> and <b style='color:{mypal3[3]}'>Afghanistan</b> continue to have high fertility rates.")

data_annotation <- "About the Data: This data on fertility rates, sourced from the United Nations' World Population Prospects (2022) and processed by Our World in Data, spans from 1950 to 2021. It reflects the average number of live births per woman, with age-specific rates, providing insights into global fertility trends across decades."

# Caption stuff for the plot
sysfonts::font_add(
  family = "Font Awesome 6 Brands",
  regular = here::here("docs", "Font Awesome 6 Brands-Regular-400.otf")
)
github <- "&#xf09b"
github_username <- "aditya-dahiya"
xtwitter <- "&#xe61b"
xtwitter_username <- "@adityadahiyaias"
social_caption_1 <- glue::glue("<span style='font-family:\"Font Awesome 6 Brands\";'>{github};</span> <span style='color: {text_hil}'>{github_username}  </span>")
social_caption_2 <- glue::glue("<span style='font-family:\"Font Awesome 6 Brands\";'>{xtwitter};</span> <span style='color: {text_hil}'>{xtwitter_username}</span>")

plot_caption <- paste0(
  "**Data:** United Nations & Our World in Data  |  ",
  "**Code:** ", 
  social_caption_1, 
  " |  **Graphics:** ", 
  social_caption_2
  )
rm(github, github_username, xtwitter, 
   xtwitter_username, social_caption_1, social_caption_2)

Data Wrangling

Code
# A clean tibble for the fertility levels in each year
df2 <- df1 |> 
  as_tibble() |> 
  janitor::clean_names() |> 
  rename(fertility = fertility_rate_sex_all_age_all_variant_estimates) |> 
  filter(!is.na(code)) |> 
  filter(entity != "World") |> 
  select(-countries_continents) |> 
  filter(!str_detect(code, "OWID"))

# A clean tibble of populations for each coutnry in each year
popdf1 <- popdf |>
  as_tibble() |>
  janitor::clean_names() |>
  filter(!is.na(code)) |>
  filter(entity != "World") |>
  mutate(
    population = ifelse(
      is.na(population_sex_all_age_all_variant_estimates),
      population_sex_all_age_all_variant_medium,
      population_sex_all_age_all_variant_estimates
    )
  ) |>
  select(-c(population_sex_all_age_all_variant_estimates,
            population_sex_all_age_all_variant_medium))

# Tibble for names of continents and ISO-2 country codes
# to be used for flags
df_continents <- rnaturalearth::ne_countries() |> 
  as_tibble() |> 
  janitor::clean_names() |> 
  select(iso_a3, iso_a2, continent) |> 
  rename(code = iso_a3) |> 
  mutate(country = str_to_lower(iso_a2), .keep = "unused")

x_axis_labels <- c("1950s", "1960s", "1970s", "1980s", "1990s", "2000s", "2010s", "2021")

# A tibble to use for final computation and country selection
df3 <- df2 |> 
  left_join(popdf1) |>
  left_join(df_continents) |> 
  drop_na() |> 
  filter(year >= 1951 & year <= 2020) |> 
  mutate(
    year = cut(
      year, 
      breaks = seq(1950, 2020, 10), 
      labels = 1:7
      )
  ) |> 
  group_by(continent, entity, country, year) |> 
  summarise(
    fertility = weighted.mean(fertility, w = population, na.rm = T),
    population = mean(population, na.rm = T)
  ) |> 
  mutate(year = as.numeric(year)) |>
  ungroup() |> 
  bind_rows(
    df2 |> 
      left_join(popdf1) |>
      left_join(df_continents) |> 
      drop_na() |> 
      filter(year == 2021) |> 
      mutate(year = 8)
  ) |> 
  select(-code)

# Countries to display
sel_cons <- df3 |> 
  group_by(year) |> 
  slice_max(population, n = 10) |> 
  pull(entity) |> 
  unique()

# Levels of countries for colour scales
levels_sel_cons <- df3 |> 
  filter(entity %in% sel_cons) |> 
  filter(year == 1) |> 
  arrange(desc(fertility)) |> 
  pull(entity)

# Tibble for final plotting with ggplot2
df4 <- df3 |> 
  filter(entity %in% sel_cons) |> 
  group_by(year) |> 
  arrange(desc(fertility)) |> 
  mutate(top_rank = row_number()) |> 
  ungroup() |> 
  mutate(entity = fct(entity, levels = levels_sel_cons))

# Adding dummy data to extend bump lines along x-axis
df4 <- df4 |> 
  bind_rows(df4 |> filter(year == 1) |> mutate(year = 0.5)) |> 
  bind_rows(df4 |> filter(year == 8) |> mutate(year = 8.5))


labels_df <- tribble(
  ~entity, ~description,
  "India", "India's fertility rate has declined gradually since 1960s.",
  "Pakistan", "Pakistan's fertility rate started declining late, and continues to be high to the present day.",
  "Bangladesh", "Bangladesh's rapid drop in 1980s.",
  "Afghanistan", "Afghanistan's wars, poor literacy levels and healthcare have led to persistently high fertility rate.",
  "Sri Lanka", "Sri Lanka witnessed rapid declines starting in 1960, and stabilized by 2000."
)

df_hjusts <- tibble(
  country = c("af", "bd", "in", "pk", "lk"),
  hjust_var = c(0.8, 0.5, 0.3, 0.8, 0.4)
)

df_flags <- tibble(
  country = c("af", "bd", "in", "pk", "lk"),
  y_var = c(7.35, 6.35, 5.9, 6.8, 5.4)
)

plotdf <- df2 |> 
  filter(entity %in% c(
    "India", "Pakistan",
    "Afghanistan", "Sri Lanka",
    "Bangladesh"
    )
  ) |> 
  left_join(popdf1) |> 
  group_by(entity) |> 
  mutate(
    size_var = population / min(population)
  ) |> 
  left_join(labels_df) |> 
  ungroup() |> 
  mutate(entity = fct(
    entity, 
    levels = c(
      "India", "Pakistan",
      "Afghanistan", 
      "Sri Lanka",
      "Bangladesh"
      )
    )
  ) |> 
  left_join(
    df4 |> select(entity, country) |> distinct()
  ) |> 
  mutate(
    country = case_when(
      entity == "Afghanistan" ~ "af",
      entity == "Sri Lanka" ~ "lk",
      .default = country
    )
  ) |> 
  # Adding dataframe for horizontal justifications of labels
  left_join(df_hjusts) |> 
  
  # Adding dataframe for flags location
  left_join(df_flags)

Visualization

Code
g <- plotdf |> 
  ggplot(
    mapping = aes(
      x = year, 
      y = fertility, 
      group = entity,
      colour = entity
    )
  ) +
  
  # Annotations
  geom_hline(
    yintercept = 2.1,
    colour = "grey80",
    linetype = "longdash",
    linewidth = 0.8
  ) +
  annotate(
    geom = "text",
    size = bts / 2.5,
    x = 1946,
    y = 2.15,
    vjust = 0,
    hjust = 0,
    label = "Replacement-level TFR  (2.10): Population stays stable at this TFR",
    colour = text_hil,
    family = "caption_font"
  ) +
  
  # Actual line plots
  geom_smooth(
    lineend = "round",
    linejoin = "round",
    alpha = 0.3,
    linewidth = 2,
    se = FALSE,
    span = 0.2
  ) +
  scale_colour_manual(
    values = mypal3
  ) +
  
  # Text along the smoothened path - different hjusts
  ggnewscale::new_scale_colour() +
  geom_textsmooth(
    data = plotdf,
    mapping = aes(
      label = entity,
      colour = entity,
      hjust = hjust_var
    ),
    text_only = TRUE,
    vjust = 0.1,
    span = 0.5,
    family = "cn_font",
    fontface = "bold",
    size = bts / 3
  ) +
  scale_colour_manual(
    values = colorspace::darken(mypal3, 0.4)
  ) +
  # Insert Flags
  ggflags::geom_flag(
    data = plotdf |> filter(year == 1950),
    mapping = aes(
      country = country,
      y = y_var
    ),
    size = 30
  ) +
  scale_x_continuous(
    expand = expansion(c(0.01, 0)),
    breaks = seq(1950, 2020, 10)
  ) +
  scale_y_continuous(
    position = "right", 
    breaks = 2:8,
    limits = c(1.8, 8.05),
    expand = expansion(0)
  ) +
  scale_linewidth_continuous(
    range = c(5, 12)
  ) +
  
  # Adding data annotation
  annotate(
    geom = "label",
    x = 1949, y = 4.5,
    label = str_wrap(data_annotation, 30),
    family = "caption_font",
    hjust = 0, vjust = 1,
    lineheight = 0.25,
    label.size = NA,
    fill = bg_col,
    colour = text_hil,
    size = bts / 4
  ) +
  
  # Labels and Themes
  labs(
    title = plot_title,
    subtitle = str_wrap(plot_subtitle, 110),
    caption = plot_caption,
    x = NULL,
    y = "Total Fertility Rate (Children born per woman)",
    colour = NULL
  ) +
  theme_minimal(
    base_family = "body_font",
    base_size = bts
  ) +
  theme(
    legend.position = "none",
    panel.grid = element_blank(),
    panel.grid.major.x = element_line(
      linewidth = 1,
      linetype = 3,
      colour = "grey70"
    ),
    axis.text.y = element_text(
      colour = text_hil, 
      margin = margin(0,0,0,0, "mm"),
      size = bts * 1.5
    ),
    axis.title.x = element_blank(),
    axis.title.y = element_text(
      margin = margin(0,0,0,0, "mm"),
      colour = text_hil,
      hjust = 0.5
    ),
    axis.line.y = element_line(
      colour = text_hil,
      linewidth = 0.8,
      arrow = arrow(ends = "both"),
      linetype = 1
    ),
    axis.ticks.length = unit(0, "mm"),
    plot.margin = margin(10,10,10,10, "mm"),
    axis.text.x = element_text(
      colour = text_hil,
      margin = margin(0,0,0,0, "mm"),
      size = bts * 1.2,
      face = "bold"
    ),
    plot.title.position = "plot",
    plot.caption = element_textbox(
      hjust = 0.5,
      family = "caption_font",
      colour = text_hil,
      margin = margin(5,0,0,0, "mm")
    ),
    plot.subtitle = element_markdown(
      lineheight = 0.35,
      colour = text_hil,
      hjust = 0.5,
      margin = margin(5,0,10,0, "mm"),
      size = bts
    ),
    plot.title = element_text(
      family = "caption_font",
      colour = text_hil,
      size = 3 * bts,
      face = "bold",
      hjust = 0.5,
      margin = margin(10,0,0,0, "mm")
    )
  )

Adding a QR Code to the plot

Code
# QR Code for the plot
url_graphics <- paste0(
  "https://aditya-dahiya.github.io/projects_presentations/projects/",
  # The file name of the current .qmd file
  "owid_fertility_textpath",
  ".qmd"
)
# remotes::install_github('coolbutuseless/ggqr')
# library(ggqr)
plot_qr <- ggplot(
  data = NULL, 
  aes(x = 0, y = 0, label = url_graphics)
  ) + 
  ggqr::geom_qr(
    colour = text_hil, 
    fill = bg_col,
    size = 1.5
    ) +
  # labs(caption = "Scan for the Interactive Version") +
  coord_fixed() +
  theme_void() +
  theme(plot.background = element_rect(
    fill = "transparent", 
    colour = "transparent"
    )
  )

g_full <- g +
  inset_element(
    p = plot_qr,
    left = 0.0168, right = 0.2,
    top = 0.28, bottom = 0.1,
    align_to = "panel",
    clip = FALSE
  ) +
  plot_annotation(
    theme = theme(
      plot.background = element_rect(
        fill = "transparent", colour = "transparent"
      ),
      panel.background = element_rect(
        fill = "transparent", colour = "transparent"
      )
    )
  )

Save the graphic and a thumbnail

Code
ggsave(
  filename = here::here("data_vizs", "owid_fertility_textpath.png"),
  plot = g_full,
  width = 400,
  height = 500,
  units = "mm",
  bg = bg_col
)

library(magick)
# Saving a thumbnail for the webpage
image_read(here::here("data_vizs", 
                      "owid_fertility_textpath.png")) |> 
  image_resize(geometry = "400") |> 
  image_write(here::here("data_vizs", "thumbnails", 
                         "owid_fertility_textpath.png"))