Migration: Inter-Continental Flows (2020)

The Sankey diagram visualizes the migration flows in 2020, highlighting the dominance of internal migration within Asia and Europe. Major inter-continental migration routes include Asia to Europe, Asia to North America, and Latin America to North America, illustrating key pathways in global population movement.

Our World in Data
Public Health
{ggsankeyfier}
Author

Aditya Dahiya

Published

July 25, 2024

Migration: Internal and Inter-Continental Flows (2020)

The data for this Sankey diagram, depicting migration flows in 2020, is sourced from the United Nations Department of Economic and Social Affairs (UN DESA) and processed by Our World in Data. It includes comprehensive international migrant statistics, standardized and refined through several processing steps to ensure accuracy and clarity. The graphic highlights key findings, showing the dominance of internal migration within Asia and Europe and significant inter-continental routes from Asia to Europe, Asia to North America, and Latin America to North America, reflecting crucial patterns in global population movements. For more details, visit the UN DESA International Migrant Stock page.

This graphic visualizes global migration flows for the year 2020 using data sourced from the United Nations Department of Economic and Social Affairs (UN DESA) and processed by Our World in Data. It highlights both internal and inter-continental migration patterns through a Sankey Diagram.

How I made this graphic?

Getting the data

Code
# Data Import and Wrangling Tools
library(tidyverse)            # All things tidy
library(owidR)                # Get data from Our World in R

# Final plot tools
library(scales)               # Nice Scales for ggplot2
library(fontawesome)          # Icons display in ggplot2
library(ggtext)               # Markdown text support
library(showtext)             # Display fonts in ggplot2
library(colorspace)           # To lighten and darken colours
library(patchwork)            # Combining plots

library(ggsankeyfier)         # Sankey Diagrams with ggplot2

# Getting the data
# search1 <- owid_search("migrant")
# rawdf <- owid("migrant-stock-total")

url1 <- "https://www.un.org/development/desa/pd/sites/www.un.org.development.desa.pd/files/undesa_pd_2020_ims_stock_by_sex_destination_and_origin.xlsx"

rawdf2 <- openxlsx::read.xlsx(
  xlsxFile = url1,
  sheet = "Table 2",
  startRow = 10,
  colNames = TRUE
)

Data Wrangling

Code
sel_continents <- c("AFRICA", "ASIA", "EUROPE", 
                    "LATIN AMERICA AND THE CARIBBEAN", 
                    "NORTHERN AMERICA", "OCEANIA")

df1 <- rawdf2 |> 
  # Remove minor duplication of "Australia.And.New.Zealand"
  select(-12) |> 
  janitor::clean_names() |> 
  as_tibble() |>
  rename(origin = region_development_group_of_destination) |> 
  mutate(origin = str_squish(origin)) |> 
  select(-c(x1, x3, x4)) |> 
  pivot_longer(
    cols = -origin,
    names_to = "destination",
    values_to = "value"
  ) |> 
  filter(origin %in% sel_continents) |> 
  mutate(
    origin = str_to_title(origin),
    destination = snakecase::to_title_case(destination),
    destination = str_remove_all(destination, "\\d+")
  ) |>
  filter(destination %in% snakecase::to_title_case(sel_continents)) |> 
  rename(
    to = origin,
    from = destination
  )

# The final tibbles to use for plotting
plotdf1 <- df1 |> 
  mutate(
    fill_var = if_else(
      from == to,
      "Within Continent",
      "Inter-Continental"
    )
  ) |> 
  pivot_longer(
    cols = c(from, to),
    names_to = "connector_var",
    values_to = "node_var"
  ) |> 
  relocate(node_var, connector_var, value) |> 
  mutate(
    x_var = case_when(
      connector_var == "from" ~ 1,
      connector_var == "to" ~ 2,
      .default = NA
    )
  ) |> 
  mutate(node_var = if_else(
    node_var == "Latin America And The Caribbean",
    "Latin America and the Caribbean",
    node_var
  ))

# Arranging the Continents in proper order
levels_continents <- plotdf1 |> 
  group_by(node_var) |> 
  summarise(value = sum(value)) |> 
  arrange(desc(value)) |> 
  pull(node_var)

# The Flows to highlight in the final graphic
top_flows <- plotdf1 |> 
  filter(fill_var == "Inter-Continental") |> 
  slice_max(order_by = value, n = 6) |> 
  distinct(value) |> 
  pull()


plotdf2 <- plotdf1 |> 
  mutate(
    node_var = fct(node_var, levels = levels_continents),
    colour_var = if_else(
      value %in% top_flows,
      "a",
      "b"
    )
  )

plotdf3 <- plotdf2 |> 
  group_by(connector_var, x_var, node_var) |> 
  summarise(
    value = sum(value)
  ) |> 
  ungroup() |> 
  mutate(
    label_value = paste0(round(value/1e6, 2), " million")
  ) |> 
  select(-value)

plotdf4 <- plotdf2 |> 
  left_join(plotdf3)

# Another Analysis Sub-Region Wise: Didnt work out! So, skip it.

# df3 <- rawdf2 |> 
#   select(-12) |> 
#   as_tibble() |>
#   slice(23:n()) |> 
#   rename(origin = `Region,.development.group.of.destination`) |> 
#   select(-c(X1, X3, X4)) |> 
#   mutate(origin = str_squish(origin)) |> 
#   mutate(
#     group_var = if_else(
#       origin %in% sel_continents,
#       origin,
#       NA
#     )
#   ) |> 
#   relocate(group_var, .after = origin) |> 
#   fill(group_var) |> 
#   group_by(group_var) |>
#   mutate(prop_var = WORLD / sum(WORLD)) |>
#   relocate(prop_var, .after = group_var) |>
#   ungroup() |>
#   pivot_longer(
#     cols = -c(origin, group_var, WORLD),
#     names_to = "destination",
#     values_to = "value"
#   ) |> 
#   filter(origin != group_var) |> 
#   mutate(destination = snakecase::to_title_case(destination))
# 
#   
# df_to_join <- df3 |> 
#   distinct(origin, group_var) |> 
#   rename(
#     destination = origin,
#     group_var_dest = group_var
#   )

Visualization Parameters

Code
# Font for titles
font_add_google("Acme",
  family = "title_font"
) 

# Font for the caption
font_add_google("Saira Extra Condensed",
  family = "caption_font"
) 

# Font for plot text
font_add_google("Changa",
  family = "body_font"
) 

showtext_auto()

# Colour Palette

# Background Colour
bg_col <- "white"
text_col <- "#2e5075"
text_hil <- "#18304a"

# Base Text Size
bts <- 80

plot_title <- "Global Migration Trends in 2020"

plot_subtitle <- str_wrap(
  glue::glue("There is dominance of internal migration within Asia and Europe. Major inter-continental migration routes include Asia to Europe, Asia to North America, and Latin America to North America."
      ),
  100
  )
str_view(plot_subtitle)

data_annotation <- "About the Data: The data comes from the United Nations (UN) and the United Nations Department of Economic and Social Affairs (UN DESA) for the year 2020."

# Caption stuff for the plot
sysfonts::font_add(
  family = "Font Awesome 6 Brands",
  regular = here::here("docs", "Font Awesome 6 Brands-Regular-400.otf")
)
github <- "&#xf09b"
github_username <- "aditya-dahiya"
xtwitter <- "&#xe61b"
xtwitter_username <- "@adityadahiyaias"
social_caption_1 <- glue::glue("<span style='font-family:\"Font Awesome 6 Brands\";'>{github};</span> <span style='color: {text_hil}'>{github_username}  </span>")
social_caption_2 <- glue::glue("<span style='font-family:\"Font Awesome 6 Brands\";'>{xtwitter};</span> <span style='color: {text_hil}'>{xtwitter_username}</span>")

plot_caption <- paste0(
  "**Data:** United Nations & Our World in Data  |  ",
  "**Code:** ", 
  social_caption_1, 
  " |  **Graphics:** ", 
  social_caption_2
  )
rm(github, github_username, xtwitter, 
   xtwitter_username, social_caption_1, social_caption_2)

Visualization: A Sankey Diagram

Code
# Learning how to use {ggsankeyfier}
# vignette("data_management")
# vignette("loopdeloop")
# vignette("positioning")
# vignette("decorating")

sankey_pos <- function(...){
  position_sankey(
    width = 0.05,
    v_space = "auto", 
    order = "as_is", 
    align = "justify",
    ...
    )
}

g <- plotdf4 |> 
  ggplot(
    mapping = aes(
      x = x_var,
      y = value,
      connector = connector_var,
      group = node_var,
      edge_id = value
    )
  ) +
  
  # Sankey Flows
  geom_sankeyedge(
    mapping = aes(
      fill = node_var
    ),
    position = sankey_pos(),
    alpha = 0.6
  ) +
  
  # Bars at the ends to reflect totals
  geom_sankeynode(
    mapping = aes(fill = node_var),
    position = sankey_pos()
  ) +  
  paletteer::scale_fill_paletteer_d("PNWColors::Sailboat") +
  
  # Left hand side labels
  geom_text(
    data = plotdf4 |> filter(connector_var == "from"),
    mapping = aes(
      label = paste0(
        str_wrap(node_var, 20),
        "\n(",
        label_value, ")"
      )
    ),
    colour = text_col,
    hjust = 1,
    position = sankey_pos(nudge_x = -0.04),
    stat = "sankeynode",
    lineheight = 0.3,
    size = bts / 3,
    family = "caption_font",
    fontface = "bold"
    ) +
  # Right hand side labels
  geom_text(
    data = plotdf4 |> filter(connector_var == "to"),
    mapping = aes(
      label = paste0(
        str_wrap(node_var, 20),
        "\n(",
        label_value, ")"
      )
    ),
    colour = text_col,
    hjust = 0,
    position = sankey_pos(nudge_x = 0.04),
    stat = "sankeynode",
    lineheight = 0.3,
    size = bts / 3,
    family = "caption_font",
    fontface = "bold"
    ) +
  
  # Plots Labels
  labs(
    title = plot_title,
    subtitle = plot_subtitle,
    caption = plot_caption
  ) +
  
  # Scales and Coordinates
  scale_x_continuous(
    expand = expansion(0.2),
    breaks = 1:2,
    labels = c(
    "Origin\n(Where do migrants come from?)",
    "Destination\n(Where do migrants go to?)"
    ),
    position = "top"
  ) +
  scale_y_continuous(
    expand = expansion(0)
  ) +
  coord_cartesian(
    clip = "off"
  ) +
  
  # Themes and beautification
  theme_void(
    base_family = "body_font",
    base_size = bts
  ) +
  theme(
    plot.title = element_text(
      colour = text_hil,
      hjust = 0.5,
      margin = margin(10,0,5,0, "mm"),
      size = 3.5 * bts,
      family = "title_font"
    ),
    plot.subtitle = element_text(
      colour = text_hil,
      hjust = 0.5,
      margin = margin(0,0,5,0, "mm"),
      lineheight = 0.30,
      size = 1.2 * bts
    ),
    plot.caption = element_textbox(
      colour = text_hil,
      family = "caption_font",
      hjust = 0.5
    ),
    legend.position = "none",
    plot.margin = margin(10,10,10,10, "mm"),
    axis.text.x.top = element_text(
      colour = text_hil,
      family = "title_font",
      size = 1.5 * bts,
      hjust = 0.5,
      margin = margin(7,0,10,0, "mm"),
      lineheight = 0.3
    )
  )

Save the graphic and a thumbnail

Code
ggsave(
  filename = here::here("data_vizs", "owid_migrants.png"),
  plot = g,
  width = 500,
  height = 500,
  units = "mm",
  bg = bg_col
)


library(magick)
# Saving a thumbnail for the webpage
image_read(here::here("data_vizs", 
                      "owid_migrants.png")) |> 
  image_resize(geometry = "400") |> 
  image_write(here::here("data_vizs", "thumbnails", 
                         "owid_migrants.png"))

Other attempts

Code
# Load necessary libraries
library(circlize)


# Create the Chord Diagram with labels, title, and subtitle
circlize::chordDiagram(
  x = df1, 
  transparency = 0.25,
  directional = TRUE,
  direction.type = c("arrows", "diffHeight"), 
  diffHeight  = -0.04,
  annotationTrack = "grid", 
  annotationTrackHeight = c(0.05, 0.1),
  link.arr.type = "big.arrow", 
  link.sort = TRUE, 
  link.largest.ontop = TRUE,
  
  # Add labels along the circumference
  preAllocateTracks = list(
    list(track.height = 0.1)
  ),
  track.margin = c(0, 0.1),
  sector.order = df1$from,
  # Add title and subtitle
  title = "Chord Diagram for Migration",
  subtitle = "Width represents number of migrants"
)

# Add labels along the circumference
circos.trackPlotRegion(
  track.index = 1,
  panel.fun = function(x, y) {
    circos.text(
      x = x,
      y = y,
      labels = df1$from,
      facing = "inside",
      cex = 0.7,
      adj = c(0.5, 0.5)
    )
  },
  bg.border = NA
)


library(circlize)
circlize::chordDiagram(
  x = df1, 
  # grid.col = mycolor,
  transparency = 0.25,
  directional = TRUE,
  direction.type = c("arrows", "diffHeight"), 
  diffHeight  = -0.04,
  annotationTrack = "grid", 
  annotationTrackHeight = c(0.05, 0.1),
  link.arr.type = "big.arrow", 
  link.sort = TRUE, 
  link.largest.ontop = TRUE
  )

# Add text and axis
circos.trackPlotRegion(
  track.index = 1, 
  bg.border = NA, 
  panel.fun = function(x, y) {
    
    xlim = get.cell.meta.data("xlim")
    sector.index = get.cell.meta.data("sector.index")
    
    # Add names to the sector. 
    circos.text(
      x = mean(xlim), 
      y = 3.2, 
      labels = sector.index, 
      facing = "bending", 
      cex = 0.8
      )

    # Add graduation on axis
    circos.axis(
      h = "top", 
      major.at = seq(from = 0, to = xlim[2], 
                     by = ifelse(test = xlim[2]>10, yes = 2, no = 1)), 
      minor.ticks = 1, 
      major.tick.percentage = 0.5,
      labels.niceFacing = FALSE)
  }
)

Temp

Code
income_levels <- c(
  "Low Income Countries",
  "Lower Middle Income Countries",
  "Middle Income Countries",
  "Upper Middle Income Countries",
  "High Income Countries"
)
mypal <- paletteer::paletteer_d("MoMAColors::Alkalay1")
text_hil <- mypal[2]
text_col <- mypal[1]
bg_col <- lighten(mypal[5], 0.8)
seecolor::print_color(bg_col)


plot_title <- "Migration Towards Wealth: 2020"

plot_subtitle <- str_wrap(
  glue::glue("Nearly half of global migration in 2020 was directed towards high-income countries. Interestingly, middle-income countries, rather than low-income ones, were the primary contributors to this migration towards wealthier nations."
      ),
  90
  )
str_view(plot_subtitle)

# Caption stuff for the plot
sysfonts::font_add(
  family = "Font Awesome 6 Brands",
  regular = here::here("docs", "Font Awesome 6 Brands-Regular-400.otf")
)
github <- "&#xf09b"
github_username <- "aditya-dahiya"
xtwitter <- "&#xe61b"
xtwitter_username <- "@adityadahiyaias"
social_caption_1 <- glue::glue("<span style='font-family:\"Font Awesome 6 Brands\";'>{github};</span> <span style='color: {text_hil}'>{github_username}  </span>")
social_caption_2 <- glue::glue("<span style='font-family:\"Font Awesome 6 Brands\";'>{xtwitter};</span> <span style='color: {text_hil}'>{xtwitter_username}</span>")

plot_caption <- paste0(
  "**Data:** United Nations & Our World in Data  |  ",
  "**Code:** ", 
  social_caption_1, 
  " |  **Graphics:** ", 
  social_caption_2
  )
rm(github, github_username, xtwitter, 
   xtwitter_username, social_caption_1, social_caption_2)

income1 <- rawdf2 |> 
  # Remove minor duplication of "Australia.And.New.Zealand"
  select(-12) |> 
  janitor::clean_names() |> 
  as_tibble() |>
  rename(origin = region_development_group_of_destination) |> 
  mutate(origin = str_squish(origin)) |> 
  select(-c(x1, x3, x4)) |> 
  filter(str_detect(origin, "income")) |> 
  select(-world) |> 
  pivot_longer(
    cols = -origin,
    names_to = "destination",
    values_to = "value"
  ) |> 
  filter(str_detect(destination, "income")) |> 
  mutate(
    origin = snakecase::to_title_case(origin),
    destination = snakecase::to_title_case(destination)
  ) |> 
  rename(from = destination, to = origin) |> 
  # filter(from != to) |> 
  pivot_longer(
    cols = c(from, to),
    names_to = "connector_var",
    values_to = "node_var"
  ) |> 
  relocate(node_var, connector_var, value) |> 
  mutate(
    node_var = fct(node_var, levels = income_levels),
    x_var = case_when(
      connector_var == "from" ~ 1,
      connector_var == "to" ~ 2,
      .default = NA
    )
  )

income2 <- income1 |> 
  group_by(connector_var, node_var, x_var) |> 
  summarise(total = sum(value)) |> 
  mutate(total = paste0(round(total/1e6, 1), " million")) |> 
  ungroup() |> 
  right_join(income1)
  


sankey_pos <- function(...){
  position_sankey(
    width = 0.05,
    v_space = "auto", 
    order = "as_is", 
    align = "justify",
    ...
    )
}

bts = 80

g <- income1 |> 
  ggplot(
    mapping = aes(
      x = x_var,
      y = value,
      connector = connector_var,
      group = node_var,
      edge_id = value
    )
  ) +
  
  # Sankey Flows
  geom_sankeyedge(
    mapping = aes(
      fill = node_var
    ),
    position = sankey_pos(),
    alpha = 0.6
  ) +
  
  # Bars at the ends to reflect totals
  geom_sankeynode(
    mapping = aes(fill = node_var),
    position = sankey_pos()
  ) +  
  
  # Left hand side labels
  geom_text(
    data = income2 |> filter(connector_var == "from"),
    mapping = aes(
      label = paste0(
        str_wrap(node_var, 20),
        "\n(",
        total, 
        ")"
      )
    ),
    colour = text_col,
    hjust = 1,
    position = sankey_pos(nudge_x = -0.04),
    stat = "sankeynode",
    lineheight = 0.3,
    size = bts / 3,
    family = "caption_font",
    fontface = "bold"
    ) +
  # Right hand side labels
  geom_text(
    data = income2 |> filter(connector_var == "to"),
    mapping = aes(
      label = paste0(
        str_wrap(node_var, 20),
        "\n(",
        total, 
        ")"
      )
    ),
    colour = text_col,
    hjust = 0,
    position = sankey_pos(nudge_x = 0.04),
    stat = "sankeynode",
    lineheight = 0.3,
    size = bts / 3,
    family = "caption_font",
    fontface = "bold"
    ) +
  
  # Plots Labels
  labs(
    title = plot_title,
    subtitle = plot_subtitle,
    caption = plot_caption
  ) +
  
  # Scales and Coordinates
  scale_x_continuous(
    expand = expansion(0.2),
    breaks = 1:2,
    labels = c(
    "Origin\n(Where do migrants come from?)",
    "Destination\n(Where do migrants go to?)"
    ),
    position = "top"
  ) +
  scale_y_continuous(
    expand = expansion(0)
  ) +
  scale_fill_manual(values = mypal) +
  coord_cartesian(
    clip = "off"
  ) +
  
  # Themes and beautification
  theme_void(
    base_family = "body_font",
    base_size = bts
  ) +
  theme(
    plot.title = element_text(
      colour = text_hil,
      hjust = 0.5,
      margin = margin(10,0,5,0, "mm"),
      size = 3.5 * bts,
      family = "title_font"
    ),
    plot.subtitle = element_text(
      colour = text_hil,
      hjust = 0.5,
      margin = margin(0,0,5,0, "mm"),
      lineheight = 0.30,
      size = 1.2 * bts
    ),
    plot.caption = element_textbox(
      colour = text_hil,
      family = "caption_font",
      hjust = 0.5
    ),
    legend.position = "none",
    plot.margin = margin(10,10,10,10, "mm"),
    axis.text.x.top = element_text(
      colour = text_hil,
      family = "title_font",
      size = 1.5 * bts,
      hjust = 0.5,
      margin = margin(7,0,10,0, "mm"),
      lineheight = 0.3
    )
  )


ggsave(
  filename = here::here("data_vizs", "owid_income_migrants.png"),
  plot = g,
  width = 500,
  height = 500,
  units = "mm",
  bg = bg_col
)