Is migration mainly driven by wealth?

The Sankey diagram visualizes the migration flows in 2020, highlighting the near universal migration to wealthier countries, and almost none in the reverse direction.

Our World in Data
Public Health
{ggsankeyfier}
Author

Aditya Dahiya

Published

July 27, 2024

Migration: Internal and Inter-Continental Flows (2020)

The data for this Sankey diagram, depicting migration flows in 2020, is sourced from the United Nations Department of Economic and Social Affairs (UN DESA) and processed by Our World in Data. It includes comprehensive international migrant statistics, standardized and refined through several processing steps to ensure accuracy and clarity. The graphic highlights key findings, showing the near universal migration to High_income countries, especially from Middle-Income countries. For more details, visit the UN DESA International Migrant Stock page.

This graphic visualizes global migration flows for the year 2020 using data sourced from the United Nations Department of Economic and Social Affairs (UN DESA) and processed by Our World in Data. It highlights migration patterns in countries of different income groups through a Sankey Diagram.

How I made this graphic?

Getting the data

Code
# Data Import and Wrangling Tools
library(tidyverse)            # All things tidy
library(owidR)                # Get data from Our World in R

# Final plot tools
library(scales)               # Nice Scales for ggplot2
library(fontawesome)          # Icons display in ggplot2
library(ggtext)               # Markdown text support
library(showtext)             # Display fonts in ggplot2
library(colorspace)           # To lighten and darken colours
library(patchwork)            # Combining plots

library(ggsankeyfier)         # Sankey Diagrams with ggplot2

# Getting the data
# search1 <- owid_search("migrant")
# rawdf <- owid("migrant-stock-total")

url1 <- "https://www.un.org/development/desa/pd/sites/www.un.org.development.desa.pd/files/undesa_pd_2020_ims_stock_by_sex_destination_and_origin.xlsx"

rawdf2 <- openxlsx::read.xlsx(
  xlsxFile = url1,
  sheet = "Table 2",
  startRow = 10,
  colNames = TRUE
)

Visualization Parameters

Code
# Font for titles
font_add_google("Acme",
  family = "title_font"
) 

# Font for the caption
font_add_google("Saira Extra Condensed",
  family = "caption_font"
) 

# Font for plot text
font_add_google("Changa",
  family = "body_font"
) 

showtext_auto()

# Base Text Size
bts <- 80

# Colour Palette
mypal <- paletteer::paletteer_d("MoMAColors::Alkalay1")
text_hil <- mypal[2]
text_col <- mypal[1]
bg_col <- lighten(mypal[5], 0.8)
seecolor::print_color(bg_col)

plot_title <- "Migration Towards Wealth: 2020"

plot_subtitle <- str_wrap(
  glue::glue("Nearly half of global migration in 2020 was directed towards high-income countries. Interestingly, middle-income countries, rather than low-income ones, were the primary contributors to this migration towards wealthier nations."
      ),
  90
  )
str_view(plot_subtitle)

# Caption stuff for the plot
sysfonts::font_add(
  family = "Font Awesome 6 Brands",
  regular = here::here("docs", "Font Awesome 6 Brands-Regular-400.otf")
)
github <- "&#xf09b"
github_username <- "aditya-dahiya"
xtwitter <- "&#xe61b"
xtwitter_username <- "@adityadahiyaias"
social_caption_1 <- glue::glue("<span style='font-family:\"Font Awesome 6 Brands\";'>{github};</span> <span style='color: {text_hil}'>{github_username}  </span>")
social_caption_2 <- glue::glue("<span style='font-family:\"Font Awesome 6 Brands\";'>{xtwitter};</span> <span style='color: {text_hil}'>{xtwitter_username}</span>")

plot_caption <- paste0(
  "**Data:** United Nations & Our World in Data  |  ",
  "**Code:** ", 
  social_caption_1, 
  " |  **Graphics:** ", 
  social_caption_2
  )

rm(github, github_username, xtwitter, 
   xtwitter_username, social_caption_1, social_caption_2)

Data Wrangling

Code
income_levels <- c(
  "Low Income Countries",
  "Lower Middle Income Countries",
# "Middle Income Countries",
  "Upper Middle Income Countries",
  "High Income Countries"
)

income1 <- rawdf2 |> 
  
  # Remove minor duplication of "Australia.And.New.Zealand"
  select(-12) |> 
  janitor::clean_names() |> 
  as_tibble() |>
  rename(origin = region_development_group_of_destination) |> 
  mutate(origin = str_squish(origin)) |> 
  select(-c(x1, x3, x4)) |> 
  filter(str_detect(origin, "income")) |> 
  select(-world) |> 
  pivot_longer(
    cols = -origin,
    names_to = "destination",
    values_to = "value"
  ) |> 
  filter(str_detect(destination, "income")) |> 
  mutate(
    origin = snakecase::to_title_case(origin),
    destination = snakecase::to_title_case(destination)
  ) |> 
  rename(from = destination, to = origin) |> 
  filter(
    (to != "Middle Income Countries")
    &
    (from != "Middle Income Countries")
  ) |> 
  pivot_longer(
    cols = c(from, to),
    names_to = "connector_var",
    values_to = "node_var"
  ) |> 
  relocate(node_var, connector_var, value) |> 
   
  mutate(
    node_var = fct(node_var, levels = income_levels),
    x_var = case_when(
      connector_var == "from" ~ 1,
      connector_var == "to" ~ 2,
      .default = NA
    )
  )

income2 <- income1 |> 
  group_by(connector_var, node_var, x_var) |> 
  summarise(total = sum(value)) |> 
  mutate(total = paste0(round(total/1e6, 1), " million")) |> 
  ungroup() |> 
  right_join(income1)

# geo_regions <- rawdf2 |> 
#   # Remove minor duplication of "Australia.And.New.Zealand"
#   select(-12) |> 
#   as_tibble() |> 
#   slice(3:10) |> 
#   pull(`Region,.development.group.of.destination`) |> 
#   str_squish()
# 
# geo_regions |> snakecase::to_title_case()
# 
# level_geo_regions <- c(
#   "Sub Saharan Africa",
#   "Northern Africa and Western Asia",    
#   "Central and Southern Asia",   
#   "Eastern and South Eastern Asia",         
#   "Oceania Excluding Australia and New Zealand",
#   "Australia and New Zealand", 
#   "Latin America and the Caribbean",   
#   "Europe and Northern America"
# )
# 
# geo1 <- rawdf2 |> 
#   # Remove minor duplication of "Australia.And.New.Zealand"
#   select(-49) |> 
#   janitor::clean_names() |> 
#   as_tibble() |> 
#   slice(3:10) |> 
#   select(2, 6:13) |> 
#   rename(origin = region_development_group_of_destination) |> 
#   mutate(origin = snakecase::to_title_case(str_squish(origin))) |> 
#   pivot_longer(
#     cols = -origin,
#     names_to = "destination",
#     values_to = "value"
#   ) |> 
#   mutate(destination = snakecase::to_title_case(destination)) |> 
#   rename(from = destination, to = origin) |> 
#   filter(
#     (to != "Middle Income Countries")
#     &
#     (from != "Middle Income Countries")
#   ) |> 
#   pivot_longer(
#     cols = c(from, to),
#     names_to = "connector_var",
#     values_to = "node_var"
#   ) |> 
#   relocate(node_var, connector_var, value) |>
#   mutate(
#     node_var = fct(node_var, levels = level_geo_regions),
#     x_var = case_when(
#       connector_var == "from" ~ 1,
#       connector_var == "to" ~ 2,
#       .default = NA
#     )
#   )

Visualization

Code
sankey_pos <- function(...){
  position_sankey(
    width = 0.05,
    v_space = "auto", 
    order = "as_is", 
    align = "justify",
    ...
    )
}

bts = 80

g <- income1 |> 
  ggplot(
    mapping = aes(
      x = x_var,
      y = value,
      connector = connector_var,
      group = node_var,
      edge_id = value
    )
  ) +
  
  # Sankey Flows
  geom_sankeyedge(
    mapping = aes(
      fill = node_var
    ),
    position = sankey_pos(),
    alpha = 0.6
  ) +
  
  # Bars at the ends to reflect totals
  geom_sankeynode(
    mapping = aes(fill = node_var),
    position = sankey_pos()
  ) +  
  
  # Left hand side labels
  geom_text(
    data = income2 |> filter(connector_var == "from"),
    mapping = aes(
      label = paste0(
        str_wrap(node_var, 20),
        "\n(",
        total, 
        ")"
      )
    ),
    colour = text_col,
    hjust = 1,
    position = sankey_pos(nudge_x = -0.04),
    stat = "sankeynode",
    lineheight = 0.3,
    size = bts / 3,
    family = "caption_font",
    fontface = "bold"
    ) +
  # Right hand side labels
  geom_text(
    data = income2 |> filter(connector_var == "to"),
    mapping = aes(
      label = paste0(
        str_wrap(node_var, 20),
        "\n(",
        total, 
        ")"
      )
    ),
    colour = text_col,
    hjust = 0,
    position = sankey_pos(nudge_x = 0.04),
    stat = "sankeynode",
    lineheight = 0.3,
    size = bts / 3,
    family = "caption_font",
    fontface = "bold"
    ) +
  
  # Plots Labels
  labs(
    title = plot_title,
    subtitle = plot_subtitle,
    caption = plot_caption
  ) +
  
  # Scales and Coordinates
  scale_x_continuous(
    expand = expansion(0.2),
    breaks = 1:2,
    labels = c(
    "Origin\n(Where do migrants come from?)",
    "Destination\n(Where do migrants go to?)"
    ),
    position = "top"
  ) +
  scale_y_continuous(
    expand = expansion(0)
  ) +
  scale_fill_manual(values = mypal) +
  coord_cartesian(
    clip = "off"
  ) +
  
  # Themes and beautification
  theme_void(
    base_family = "body_font",
    base_size = bts
  ) +
  theme(
    plot.title = element_text(
      colour = text_hil,
      hjust = 0.5,
      margin = margin(10,0,5,0, "mm"),
      size = 3.5 * bts,
      family = "title_font"
    ),
    plot.subtitle = element_text(
      colour = text_hil,
      hjust = 0.5,
      margin = margin(0,0,5,0, "mm"),
      lineheight = 0.30,
      size = 1.2 * bts
    ),
    plot.caption = element_textbox(
      colour = text_hil,
      family = "caption_font",
      hjust = 0.5
    ),
    legend.position = "none",
    plot.margin = margin(10,10,10,10, "mm"),
    axis.text.x.top = element_text(
      colour = text_hil,
      family = "title_font",
      size = 1.5 * bts,
      hjust = 0.5,
      margin = margin(7,0,10,0, "mm"),
      lineheight = 0.3
    )
  )

ggsave(
  filename = here::here("data_vizs", "owid_income_migrants.png"),
  plot = g,
  width = 500,
  height = 500,
  units = "mm",
  bg = bg_col
)

Save the graphic and a thumbnail

Code
ggsave(
  filename = here::here("data_vizs", "owid_income_migrants.png"),
  plot = g,
  width = 500,
  height = 500,
  units = "mm",
  bg = bg_col
)

library(magick)
# Saving a thumbnail for the webpage
image_read(here::here("data_vizs", 
                      "owid_income_migrants.png")) |> 
  image_resize(geometry = "400") |> 
  image_write(here::here("data_vizs", "thumbnails", 
                         "owid_income_migrants.png"))