Economic Diversity and Student Outcomes

US Data exploring economic diversity and student outcomes.

#TidyTuesday
{waffle}
Author

Aditya Dahiya

Published

September 10, 2024

Figure 1: A waffle chart on what percentage of students by each income group (by parents’ income) get selected in different Colleges of U.S.A., for otherwise similar scores and profiles.

How I made this graphic?

Loading libraries & data

Code
# Data Import and Wrangling Tools
library(tidyverse)            # All things tidy

# Final plot tools
library(scales)               # Nice Scales for ggplot2
library(fontawesome)          # Icons display in ggplot2
library(ggtext)               # Markdown text support for ggplot2
library(showtext)             # Display fonts in ggplot2
library(colorspace)           # Lighten and Darken colours
library(seecolor)             # To print and view colours
library(patchwork)            # Combining plots
library(waffle)               # Waffle Charts in R

college_admissions <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2024/2024-09-10/college_admissions.csv')

Data Wrangling and EDA

Code
# college_admissions |> 
#   summarytools::dfSummary() |> 
#   summarytools::view()
# 
# college_admissions |> 
#   group_by(tier) |> 
#   distinct(name) |> 
#   arrange(tier) |> 
#   View()
# 
# college_admissions |> 
#   select(tier, name, par_income_lab, attend_unwgt) |> 
#   group_by(par_income_lab) |> 
#   summarise(total = sum(attend_unwgt, na.rm = T)) |> 
#   mutate(total2 = cumsum(total))

levels_tier <- c("Ivy Plus",                      
  "Other elite schools (public and private)",
  "Highly selective private", 
  "Highly selective public", 
  "Selective private",                        
  "Selective public",        
  "Out of College"
)

# Attempt: College Tier vs. par_income_bin numbers
df <- college_admissions |> 
  select(tier, name, par_income_bin, attend) |> 
  group_by(par_income_bin, tier) |> 
  summarise(
    attend = sum(attend, na.rm = T)
  ) |> 
  # pivot_wider(
  #   id_cols = par_income_bin,
  #   names_from = tier,
  #   values_from = attend
  # ) |> 
  # mutate(
  #   `Out of College` = 1 - (
  #     `Highly selective private` +
  #     `Highly selective public` +
  #     `Ivy Plus` +
  #     `Other elite schools (public and private)` +
  #     `Selective private` +
  #     `Selective public`),
  #   `Out of College` = if_else(
  #     `Out of College` < 0,
  #     0,
  #     `Out of College`
  #   )
  # ) |> 
  # pivot_longer(
  #   cols = -c(par_income_bin),
  #   names_to = "tier",
  #   values_to = "attend"
  # ) |> 
  ungroup() |> 
  mutate(
    tier = fct(tier, levels = levels_tier)
  ) |> 
  arrange(par_income_bin, tier)

levels_income_lab2 <- c(
  "Bottom 20% income",
  "2nd Quintile",
  "3rd Quintile",
  "4th Quintile",
  "Top 20% income",
  "Top 1% income",
  "Top 0.1% income")


df2 <- college_admissions |> 
  select(tier, name, par_income_lab, attend) |> 
  group_by(par_income_lab, tier) |> 
  summarise(
    attend = sum(attend, na.rm = T)
  ) |> 
  ungroup() |> 
  mutate(
    tier = fct(tier, levels = levels_tier)
  ) |> 
  arrange(par_income_lab, tier) |> 
  mutate(
    par_income_lab2 = case_when(
      par_income_lab %in% c("0-20") ~ "Bottom 20% income",
      par_income_lab %in% c("20-40") ~ "2nd Quintile",
      par_income_lab %in% c("40-60") ~ "3rd Quintile",
      par_income_lab %in% c("60-70", "70-80") ~ "4th Quintile",
      par_income_lab %in% c("80-90",
                            "90-95",
                            "95-96",
                            "96-97",
                            "97-98",
                            "98-99") ~ "Top 20% income",
      par_income_lab %in% c("99-99.9", "Top 1") ~ "Top 1% income",
      par_income_lab %in% c("Top 0.1") ~ "Top 0.1% income",
      .default = NA
    )
  ) |> 
  group_by(par_income_lab2, tier) |> 
  summarise(
    attend = sum(attend)
  ) |> 
  ungroup() |> 
  mutate(
    par_income_lab2 = fct(par_income_lab2, levels = levels_income_lab2)
  ) |> 
  group_by(par_income_lab2) |> 
  mutate(attend = round(100 * attend / sum(attend), 0))

# Minor manual rounding-off correction
df3 <- df2 |> 
  mutate(
    attend = case_when(
      (par_income_lab2 == "2nd Quintile" & 
         tier == "Selective private")       ~ 2,
      (par_income_lab2 == "3rd Quintile" & 
         tier == "Ivy Plus")                ~ 26,
      (par_income_lab2 == "Top 1% income" & 
         tier == "Ivy Plus")                ~ 26,
      (par_income_lab2 == "Top 20% income" & 
         tier == "Ivy Plus")                ~ 23,
      .default = attend
    )
  )

Visualization Parameters

Code
# Font for titles
font_add_google("Sofia",
  family = "title_font"
) 

# Font for the caption
font_add_google("Barlow Condensed",
  family = "caption_font"
) 

# Font for plot text
font_add_google("Iceberg",
  family = "body_font"
) 

showtext_auto()

bg_col <- "white"
mypal <- paletteer::paletteer_d("beyonce::X39")
mypal1 <- paletteer::paletteer_d("LaCroixColoR::Lemon")

text_col <- "grey15"
text_hil <- "grey15"

bts <- 80

# Caption stuff for the plot
sysfonts::font_add(
  family = "Font Awesome 6 Brands",
  regular = here::here("docs", "Font Awesome 6 Brands-Regular-400.otf")
)
github <- "&#xf09b"
github_username <- "aditya-dahiya"
xtwitter <- "&#xe61b"
xtwitter_username <- "@adityadahiyaias"
social_caption_1 <- glue::glue("<span style='font-family:\"Font Awesome 6 Brands\";'>{github};</span> <span style='color: {text_hil}'>{github_username}  </span>")
social_caption_2 <- glue::glue("<span style='font-family:\"Font Awesome 6 Brands\";'>{xtwitter};</span> <span style='color: {text_hil}'>{xtwitter_username}</span>")

plot_title <- "Parents' Incomes & College Admissions"

plot_subtitle <- str_wrap("For similar test scores and profile, kids of richer parents are more likely to end up in the prestigious Ivy Leagues and elite schools in the USA. The data collected from NY Times, shows a sample of 100 kids from each income slab, and where they end up.", 110)

# plot_subtitle <- glue::glue("................")

plot_subtitle |> str_view()

plot_caption <- paste0(
  "**Data:** New York Times (The Upshot) & Havisha Khurana", 
  " |  **Code:** ", 
  social_caption_1, 
  " |  **Graphics:** ", 
  social_caption_2
  )

rm(github, github_username, xtwitter, 
   xtwitter_username, social_caption_1, 
   social_caption_2)

The static plot

Code
g <- df3 |> 
  ggplot() +
  geom_waffle(
    mapping = aes(
      fill = tier,
      values = attend
    ),
    colour = "white",
    size = 1.125,
    n_rows = 4
  ) +
  facet_wrap(
    ~par_income_lab2,
    ncol = 1
  ) +
  scale_fill_manual( 
    values = paletteer::paletteer_d("LaCroixColoR::Berry")
  ) +
  scale_x_continuous(expand = expansion(0)) +
  scale_y_continuous(expand = expansion(0)) +
  coord_cartesian(clip = "off") +
  labs(
    title = plot_title,
    subtitle = plot_subtitle,
    caption = plot_caption,
    fill = NULL
  ) +
  theme_void(
    base_size = bts,
    base_family = "body_font"
  ) +
  theme(
    legend.position = "bottom",
    plot.title.position = "plot",
    text = element_text(
      colour = text_hil,
      margin = margin(0,0,0,0, "mm")
    ),
    plot.margin = margin(5,30,5,30, "mm"),
    plot.title = element_text(
      size = 2.5 * bts,
      hjust = 0.5,
      margin = margin(10,0,5,0, "mm")
    ),
    plot.subtitle = element_text(
      lineheight = 0.3,
      margin = margin(0,0,2,0, "mm"),
      hjust = 0.5,
      size = 0.9 * bts
    ),
    plot.caption = element_textbox(
      hjust = 0.5,
      family = "caption_font",
      margin = margin(5,0,0,0, "mm")
    ),
    strip.text = element_text(
      margin = margin(0,0,1,0, "mm"),
      size = 1.2 * bts,
      hjust = 0
    ),
    panel.spacing.y = unit(5, "mm"),
    # Legend
    legend.text = element_text(
      margin = margin(0,0,0,3, "mm"),
      size = bts
    ),
    legend.spacing.y = unit(2, "mm"),
    legend.spacing.x = unit(1, "mm"),
    legend.key.spacing = unit(5, "mm"),
    legend.margin = margin(5,15,5,15, "mm"),
    legend.key.width = unit(5, "mm"),
    legend.key.height = unit(5, "mm")
  )

Savings the graphics

Code
ggsave(
  filename = here::here("data_vizs", "tidy_ed_diversity.png"),
  plot = g,
  width = 400,    # Best Twitter Aspect Ratio = 5:4
  height = 500,   
  units = "mm",
  bg = "white"
)


library(magick)
# Saving a thumbnail for the webpage
image_read(here::here("data_vizs", "tidy_ed_diversity.png")) |> 
  image_resize(geometry = "400") |> 
  image_write(
    here::here(
      "data_vizs", 
      "thumbnails", 
      "tidy_ed_diversity.png"
    )
  )