How USA’s H1B visa lottery gets gamed!

Unveiling how firms and applicants leverage multiple submissions and salaries to game the H1B visa lottery system.

Data Is Plural
Governance
Author

Aditya Dahiya

Published

October 2, 2024

A recent Bloomberg News investigation into the U.S. government’s annual H-1B lottery revealed that “thousands of companies gained an unfair advantage by securing extra lottery tickets.” To support these findings, Bloomberg obtained data on all H-1B lottery registrations, selections, and petitions for fiscal years 2021 through 2024 after filing a Freedom of Information Act (FOIA) lawsuit against the Department of Homeland Security. The dataset includes details such as each registration’s employer, the proposed beneficiary’s gender, nationality, and birth year. For registrations that led to visa petitions, additional information is provided, including the worksite, salary, job title, and the beneficiary’s field of study. The data can be accessed as part of their investigation, shared by reporter Eric Fan.

Credits: Original Data by U.S. Citizenship and Immigration Services (USCIS), now hosted on GitHub repo by Bloomberg Graphics. Shared in D.I.P. by Jeremy S. Vine.

Note: The data was featured in Bloomberg’s article “Thousands of US Work Visas Are Going to Middlemen Gaming the System”, which can be reproduced using the methodology described therein.

Figure 1: The scatterplot shows how different firms are using various strategies to navigate the H1B visa lottery. The red dots, representing large U.S. tech companies like Amazon and Google, offer high salaries and follow a straightforward approach, submitting only one application per candidate. On the other hand, the orange dots—Indian outsourcing firms such as Infosys and Wipro—submit many applications for lower-paying positions, relying on sheer volume to get a portion of their candidates selected. The green dots show newer firms that have emerged specifically to help applicants submit multiple entries, greatly increasing their chances. Lastly, the blue dots represent highly paid applicants, often start-up founders, who also use multiple applications but through smaller, niche companies to boost their odds.

Loading libraries & data

Code
# Data Import and Wrangling Tools
library(tidyverse)            # All things tidy

# Final plot tools
library(scales)               # Nice Scales for ggplot2
library(fontawesome)          # Icons display in ggplot2
library(ggtext)               # Markdown text support for ggplot2
library(showtext)             # Display fonts in ggplot2
library(colorspace)           # Lighten and Darken colours
library(seecolor)             # To print and view colours
library(patchwork)            # Combining plots

library(httr)                 # Download files
library(zip)                  # Handle ZIP files
library(countrycode)

# Get Data Dictionary used by Bloomberg
dictionary <- openxlsx::read.xlsx(
  "https://github.com/BloombergGraphics/2024-h1b-immigration-data/raw/refs/heads/main/TRK_13139_I129_H1B_Registrations_FY21_FY24_FOIA_FIN.xlsx",
  sheet = "Data Dictionary",
  rows = 1:57,
  cols = 1:2
) |> 
  janitor::clean_names()


# Single registrations data
# Download the ZIP file from the URL
url <- "https://github.com/BloombergGraphics/2024-h1b-immigration-data/blob/main/TRK_13139_FY2024_single_reg.zip?raw=true"
temp_zip <- tempfile(fileext = ".zip")
GET(url, write_disk(temp_zip, overwrite = TRUE))
# Unzip the file to a temporary directory
temp_dir <- tempdir()
unzip(temp_zip, exdir = temp_dir)
# Read the CSV file into R
csv_file <- file.path(temp_dir, "TRK_13139_FY2024_single_reg.csv")
rawdf_single <- read_csv(csv_file) |> 
  janitor::clean_names()

# Multiple registrations data
url1 <- "https://github.com/BloombergGraphics/2024-h1b-immigration-data/blob/main/TRK_13139_FY2024_multi_reg.zip?raw=true"
temp_zip <- tempfile(fileext = ".zip")
GET(url1, write_disk(temp_zip, overwrite = TRUE))
# Unzip the file to a temporary directory
temp_dir <- tempdir()
unzip(temp_zip, exdir = temp_dir)
# Read the CSV file into R
csv_file <- file.path(temp_dir, "TRK_13139_FY2024_multi_reg.csv")
rawdf_multi <- read_csv(csv_file) |> 
  janitor::clean_names()

# Clean up temporary files
unlink(temp_zip)      # Delete the temporary zip file

# Remove the temporary directory and its contents
# unlink(temp_dir, recursive = TRUE)  
rm(csv_file, temp_dir, temp_zip, url, url1)

print(object.size(rawdf_single), units = "Mb")
print(object.size(rawdf_multi), units = "Mb")

Total number of applicants by the country and success rates.

Code
df_app <- rawdf_single |> 
  count(country_of_birth, sort = T) |> 
  rename(applied = n)

df_sel <- rawdf_single |> 
  filter(status_type == "SELECTED") |> 
  count(country_of_birth, sort = T) |> 
  rename(selected = n)

df1 <- df_app |> 
  left_join(df_sel) |> 
  mutate(success_rate = selected / applied) |> 
  arrange(desc(success_rate)) |> 
  mutate(
    full_country_name = countrycode::countrycode(
      country_of_birth,
      "iso3c", 
      "country.name",
      warn = F
    )
  )
rm(df_sel, df_app)

sel_cons <- df1 |> 
  slice_max(applied, n = 10) |> 
  pull(country_of_birth)

plotdf1 <- df1 |> 
  mutate(
    country_of_birth = if_else(
      country_of_birth %in% sel_cons,
      country_of_birth,
      "Others"
    )
  ) |> 
  group_by(country_of_birth) |> 
  summarise(
    applied = sum(applied, na.rm = T),
    selected = sum(selected, na.rm = T)
  ) |> 
  mutate(
    country_of_birth = fct(
      country_of_birth,
      levels = c(sel_cons, "Others")
    )
  ) |> 
  arrange(country_of_birth) |> 
  mutate(
    full_country_name = countrycode::countrycode(
      country_of_birth,
      "iso3c", 
      "country.name",
      warn = F
    )
  ) |> 
  replace_na(list(full_country_name = "Others"))

plotdf1 |> 
  ggplot(
    mapping = aes(
      y = fct_rev(country_of_birth),
      x = applied
    )
  ) +
  geom_col(
    alpha = 0.5
  ) +
  geom_col(
    aes(x = selected),
    alpha = 1
  )

Wage Amounts for successful H1B applicants

Code
rawdf_single |> 
  filter(status_type == "SELECTED") |> 
  group_by(country_of_birth)

wages_units <- c("YEAR", "HOUR", "MONTH", "WEEK")

df2 <- rawdf_single |> 
  filter(wage_unit %in% wages_units) |> 
  mutate(
    wage_amt = case_when(
      wage_unit == "YEAR" ~ wage_amt,
      wage_unit == "HOUR" ~ wage_amt * 8760,
      wage_unit == "MONTH" ~ wage_amt * 12,
      wage_unit == "WEEK" ~ wage_amt * 52,
      .default = NA
    )
  ) |> 
  group_by(country_of_birth) |> 
  summarise(
    mean_wage_amt = mean(wage_amt, na.rm = T),
    median_wage_amt = median(wage_amt, na.rm = T),
    n = n()
  ) |> 
  arrange(desc(n))

plotdf2 <- df2 |> 
  select(-mean_wage_amt) |> 
  slice_max(order_by = n, n = 15) |> 
  bind_rows(
    df2 |>
      select(-mean_wage_amt) |> 
      slice_max(order_by = n, n = 15) |> 
      summarise(
        median_wage_amt = weighted.mean(median_wage_amt, w = n),
        n = sum(n)
      ) |> 
      mutate(country_of_birth = "Average")
  ) |> 
  mutate(
    full_country_name = countrycode::countrycode(
      country_of_birth,
      "iso3c", 
      "country.name",
      warn = F
    ),
    iso2c = str_to_lower(
      countrycode::countrycode(
        country_of_birth,
        "iso3c", 
        "iso2c",
        warn = F
      )
    )
  ) |> 
  replace_na(
    list(
      full_country_name = "All Countries"
    )
  ) |> 
  left_join(
    bind_rows(
      df1,
      
      
      df1 |> 
        summarise(
          applied = sum(applied, na.rm = T),
          selected = sum(selected, na.rm = T)
        ) |> 
        mutate(
          country_of_birth = "Average",
          full_country_name = "All Countries",
          success_rate = selected / applied
        )
    )
  )

g <- plotdf2 |> 
  mutate(
    print_amount = number(
        median_wage_amt, 
        prefix = "$",
        accuracy = 100
      )
  ) |> 
  ggplot(
    mapping = aes(
      y = reorder(full_country_name, median_wage_amt),
      x = median_wage_amt,
      fill = country_of_birth == "Average"
    )
  ) +
  
  # Bar Plot
  geom_col() +
  
  # Persons Applied
  geom_text(
    mapping = aes(
      x = 20000,
      label = number(applied)
    ),
    hjust = 1
  ) +
  
  # Persons Selected
  geom_text(
    mapping = aes(
      x = 40000,
      label = number(selected)
    ),
    hjust = 1
  ) +
  
  # Success Rate
  geom_text(
    mapping = aes(
      x = 60000,
      label = number(
        success_rate, 
        scale = 100, 
        suffix = " %",
        accuracy = 0.1
      )
    ),
    hjust = 1
  ) +
  
  geom_text(
    mapping = aes(
      label = print_amount
    ),
    hjust = 0,
    nudge_x = 1000
  ) +
  scale_x_continuous(
    labels = label_currency(
      prefix = "$ ",
      scale_cut = cut_short_scale()
    )
  ) +
  theme_minimal() +
  theme(
    panel.grid.major.y = element_blank()
  )

plotdf2

Analysis on single vs. multiple registrations:

Code
# =========================================
# Understanding the meaning of variable "ben_multi_reg_ind"
# Indicator of multiple registrations for beneficiary (a value of 
# 1 represents a registration for a beneficiary who has multiple 
# registrations with different employers)

# rawdf_single |> 
#   count(ben_multi_reg_ind)
# 
# rawdf_multi |> 
#   filter(!is.na(ben_multi_reg_ind))

# ==========================================
# Analysis Question
# Are employer_name and FEIn matched 1-is-to-1: Answer: No. 
# So better to stick to employer_name.
# rawdf_multi |> 
#   count(employer_name, fein) |> 
#   select(-n) |> 
#   count(employer_name) |> 
#   filter(n > 1)

# =========================================

# Combine multiple and single applications' data
rawdf <- bind_rows(
  rawdf_multi,
  rawdf_single
  ) |> 
  select(
    employer_name, 
    state,
    
    status_type,
    first_decision,
    
    ben_multi_reg_ind,
    
    # For fuzzy matching to find duplicates
    # since we dont have person's ID number to distinguish uniquely
    country_of_birth,
    country_of_nationality,
    ben_year_of_birth,
    
    # Salary
    wage_amt,
    wage_unit
  )

# =====================================
# Check status_type vs. first decisions
# They are similar in name, except that some selected persons will
# ultimately have their visas rejected. So, for purposes of analysing
# the lottery, we take the variable "status_type" only.
rawdf |> 
  count(status_type, first_decision)

# Data from on percentage of applications of a sponsor organization 
# that are multiple entry applications
df4 <- rawdf |> 
  count(employer_name, ben_multi_reg_ind, sort = T) |>
  mutate(
    ben_multi_reg_ind = case_when(
      ben_multi_reg_ind == 0 ~ "single_entry",
      ben_multi_reg_ind == 1 ~ "multiple_entry",
      .default = "others"
    )
  ) |> 
  pivot_wider(
    id_cols = employer_name,
    names_from = ben_multi_reg_ind,
    values_from = n
  ) |> 
  replace_na(
    replace = list(
      single_entry = 0,
      multiple_entry = 0,
      others = 0
    )
  ) |> 
  mutate(
    total = single_entry + multiple_entry + others,
    perc_single = single_entry / total,
    perc_multiple = multiple_entry / total
  )

# Percentage of applicants for each 
df5 <- rawdf |> 
  count(employer_name, status_type, sort = T) |> 
  mutate(status_type = str_to_lower(status_type)) |> 
  filter(status_type %in% c("eligible", "selected")) |> 
  pivot_wider(
    id_cols = employer_name,
    names_from = status_type,
    values_from = n
  ) |> 
  replace_na(
    replace = list(
      eligible = 0,
      selected = 0
    )
  ) |> 
  mutate(
    perc_selected = selected / (eligible + selected)
  )

wages_units <- c("YEAR", "HOUR", "MONTH", "WEEK")

df6 <- rawdf |> 
  filter(wage_unit %in% wages_units) |> 
  mutate(
    wage_amt = case_when(
      wage_unit == "YEAR" ~ wage_amt,
      wage_unit == "HOUR" ~ wage_amt * 8760,
      wage_unit == "MONTH" ~ wage_amt * 12,
      wage_unit == "WEEK" ~ wage_amt * 52,
      .default = NA
    )
  ) |>
  group_by(employer_name) |> 
  summarise(
    wage = median(wage_amt, na.rm = T)
  )

plotdf <- df4 |> 
  left_join(df5) |> 
  left_join(df6) |> 
  mutate(
    cluster = case_when(
      ((wage > 250000) & (perc_multiple > 0.5)) ~ "A",
      ((total > 1000) & (perc_multiple < 0.5) & 
         (wage > 110000)) ~ "B1",
      ((total > 1000) & (perc_multiple < 0.5) & 
         (wage < 110000)) ~ "B2",
      ((total > 1000) & (perc_multiple < 0.5)) ~ "B1",
      ((perc_multiple > 0.5) & (wage < 200000)) ~ "C",
      .default = "D"
    )
  )

# Do a k-means clustering to identify patters
# Number of clusters in K-Mean Clustering
# clus_nos = 10
# set.seed(42)
# k_out <- kmeans(
#   plotdf |> select(perc_multiple, wage) |> as.matrix(),
#   centers = clus_nos
#   )
# plotdf1 <- plotdf |> 
#   mutate(cluster = as_factor(k_out$cluster))
# 
# plotdf1 |> 
#   count(cluster, sort = T) |> 
#   mutate(cluster_var = LETTERS[1:clus_nos])

# The top n companies / visa sponsors to be labelled
labels_df <- plotdf |> 
  slice_max(order_by = total, n = 15)

Visualization Parameters

Code
# Font for titles
font_add_google("Roboto Condensed",
  family = "title_font"
) 

# Font for the caption
font_add_google("Saira Extra Condensed",
  family = "caption_font"
) 

# Font for plot text
font_add_google("Roboto Condensed",
  family = "body_font"
) 

showtext_auto()

bg_col <- "white"

text_col <- "grey15"
text_hil <- "grey25"
# mypal <- paletteer::paletteer_d("ltc::trio1")
mypal <- paletteer::paletteer_d("suffrager::CarolMan")
mypal <- mypal[c(4, 1, 2, 3)]
bts <- 90

# Caption stuff for the plot
sysfonts::font_add(
  family = "Font Awesome 6 Brands",
  regular = here::here("docs", "Font Awesome 6 Brands-Regular-400.otf")
)
github <- "&#xf09b"
github_username <- "aditya-dahiya"
xtwitter <- "&#xe61b"
xtwitter_username <- "@adityadahiyaias"
social_caption_1 <- glue::glue("<span style='font-family:\"Font Awesome 6 Brands\";'>{github};</span> <span style='color: {text_hil}'>{github_username}  </span>")
social_caption_2 <- glue::glue("<span style='font-family:\"Font Awesome 6 Brands\";'>{xtwitter};</span> <span style='color: {text_hil}'>{xtwitter_username}</span>")

plot_title <- "How the H1B Visa lottery gets gamed !" 

plot_subtitle <- glue::glue(
  "Each dot represents an American firm that sponsored more than 100 successful H1B Visas in 2023. While <b style='color:{mypal[2]}'>large companies</b> & <b style='color:{mypal[3]}'>outsourcing<br>firms</b> with more applications naturally get more overall successful numbers, <b style='color:{mypal[4]}'>emerging cover firms</b> and <b style='color:{mypal[1]}'>high-earning applicants</b><br>(Start-ups) are now using multiple submissions to improve their chances, revealing how the visa lottery is getting gamed !"
)

plot_caption <- paste0(
  "**Data:** USCIS, Bloomberg News", 
  "<br>**Analysis:** ", 
  social_caption_1, 
  "<br>**Graphics:** ", 
  social_caption_2
  )

annotation_1 <- "Highly paid applicants, often self-sponsored startup founders, who submit multiple applications through smaller niche firms to increase their chances."

annotation_2 <- "Large tech firms that hire in bulk, offer high salaries, and candidates mostly play by the rules - submit only one application."

annotation_3 <- "Outsourcing companies submit numerous applications to boost their overall successful numbers, despite offering lower median wages to their off-shore employees."

annotation_4 <- "New emerging firms that specialize in allowing applicants to submit multiple applications, to game the system. Nearly all of their candidates have multiple entries."

rm(github, github_username, xtwitter, 
   xtwitter_username, social_caption_1, 
   social_caption_2)

The static plot

Code
# The actual plot
g <- plotdf |> 

  # Keep only companies with more than 100 visa applications
  filter(total >= 100) |> 

  # Start Plot
  ggplot(
    mapping = aes(
      x = perc_multiple,
      y = wage,
      size = total,
      colour = cluster
      )
    ) +
  
  # Label the biggest / important employers
  ggrepel::geom_text_repel(
    data = labels_df,
    mapping = aes(
      label = paste0(
        employer_name,
        "  (", 
        number(total), 
        ")"
      )
    ),
    size = bts / 5,
    family = "caption_font",
    colour = text_col,
    lineheight = 0.3,
    hjust = 0,
    min.segment.length = unit(2, "mm"),
    
    direction = "y",
    force = 20,
    segment.size = 0.1,
    segment.curvature = -0.1,
    force_pull = 1,
    nudge_x = 0.15
  ) +
  
  # The points for each company / applicant firm
  geom_point(
      alpha = 0.3,
      position = position_jitter(
        height = 0.02,
        width = 0
      )
    ) +
  
  # Add Text-Annotations
  annotate(
    geom = "text",
    label = str_wrap(annotation_1, 60),
    lineheight = 0.3,
    family = "caption_font",
    colour = mypal[1],
    x = 0.75, y = 280000,
    hjust = 0.5,
    vjust = 1,
    size = bts / 4
  ) +
  annotate(
    geom = "text",
    label = str_wrap(annotation_2, 60),
    lineheight = 0.3,
    family = "caption_font",
    colour = mypal[2],
    x = 0.005, y = 250000,
    hjust = 0,
    vjust = 0,
    size = bts / 4
  ) +
  annotate(
    geom = "text",
    label = str_wrap(annotation_3,90),
    lineheight = 0.2,
    family = "caption_font",
    colour = mypal[3],
    x = 0.005, y = 57000,
    hjust = 0,
    vjust = 0,
    size = bts / 4
  ) +
  annotate(
    geom = "text",
    label = str_wrap(annotation_4, 60),
    lineheight = 0.3,
    family = "caption_font",
    colour = mypal[4],
    x = 0.75, y = 130000,
    hjust = 0.5,
    vjust = 0,
    size = bts / 4
  ) +
  annotate(
    geom = "text",
    label = "Names of the largest firms are shown\n(Successful visa applications in brackets)",
    lineheight = 0.3,
    family = "caption_font",
    colour = text_col,
    x = 0.005, y = 460000,
    hjust = 0,
    vjust = 1,
    size = bts / 3
  ) +
  
  # Scales and Coordinates
  scale_x_continuous(
    labels = label_percent(),
    expand = expansion(0.01)
  ) +
  scale_y_continuous(
    oob = scales::squish,
    trans = "log10",
    limits = c(50000, 500000),
    breaks = c(5e4, 1e5, 2e5, 3e5, 4e5, 5e5),
    labels = c("$50K", "$100K", "$200K", "$300K", "$400K", "$500K"),
    expand = expansion(0.01)
  ) +
  scale_size_continuous(
    range = c(1, 50),
    breaks = c(100, 500, 1000,
               5000, 10000, 
               12500),
    labels = number(
      c(100, 500, 1000,
               5000, 10000, 
               12500)
    )
  ) +
  scale_colour_manual(
    values = c(mypal, "grey70")
  ) +
  guides(
    colour = "none",
    size = guide_legend(
      direction = "horizontal",
      override.aes = list(
        colour = "grey70"
      ),
      nrow = 1
    )
  ) +
  coord_cartesian(clip = "off") +
  
  # Labels and Themes
  labs(
    title = plot_title,
    subtitle = str_wrap(plot_subtitle, 135),
    caption = plot_caption,
    
    x = "Applicants sponsored by the employer, who had multiple applications",
    y = "Median Annual Salary for applicants sponsored by the employer",
    size = "Number of successful applicants Sponsored by each firm (who won the lottery)"
  ) +
  theme_minimal(
    base_family = "body_font",
    base_size = bts
  ) +
  theme(
    # Overall Plot
    panel.grid.minor = element_line(
      linetype = 3,
      linewidth = 0.3,
      colour = "grey50"
    ),
    panel.grid.major = element_line(
      linetype = 3,
      linewidth = 0.5,
      colour = "grey30"
    ),
    text = element_text(
      margin = margin(0,0,0,0, "mm"),
      colour = text_col,
      lineheight = 0.3
    ),
    plot.title.position = "plot",
    plot.margin = margin(5,15,5,10, "mm"),
    
    # Axes
    axis.line = element_line(
      colour = text_col,
      linewidth = 0.7,
      arrow = arrow(length = unit(8, "mm"))
    ),
    axis.ticks = element_blank(),
    axis.ticks.length = unit(0, "mm"),
    axis.text = element_text(
      margin = margin(0,0,0,0, "mm")
    ),
    axis.title.x = element_text(
      margin = margin(2,0,0,0, "mm")
    ),
    axis.title.y = element_text(
      margin = margin(0,0,0,0, "mm")
    ),
    
    # Labels
    plot.title = element_text(
      family = "title_font",
      margin = margin(10,0,3,0, "mm"),
      size = 2.4 * bts,
      hjust = 0.5,
      colour = text_hil,
      face = "bold"
    ),
    plot.subtitle = element_textbox(
      margin = margin(2,0,4,0, "mm"),
      hjust = 0.5,
      size = 0.9 * bts,
      family = "caption_font"
    ),
    plot.caption = element_textbox(
      family = "caption_font",
      hjust = 0,
      margin = margin(-35,0,-5,-40, "mm"),
      size = 0.6 * bts,
      colour = text_hil,
      lineheight = 0.4
    ),
    
    # Legend
    legend.position = "bottom",
    legend.title.position = "top",
    legend.title = element_text(
      margin = margin(0,0,-2,0, "mm"),
      hjust = 0.5,
      family = "caption_font"
    ),
    legend.text = element_text(
      margin = margin(0,5,0,0, "mm")
    ),
    legend.margin = margin(-20,0,0,0, "mm"),
    legend.key.spacing = unit(0, "mm"),
    legend.box.margin = margin(0,0,0,0, "mm"),
    legend.location = "plot",
    legend.justification = 1
  )

ggsave(
  filename = here::here("data_vizs", "dip_h1b_visas.png"),
  plot = g,
  width = 400,    # Best Twitter Aspect Ratio = 5:4
  height = 500,   
  units = "mm",
  bg = "white"
)

Add a QR code

Code
# QR Code for the plot
url_graphics <- paste0(
  "https://aditya-dahiya.github.io/projects_presentations/projects/",
  # The file name of the current .qmd file
  "dip_h1b_visas",
  ".qmd"
)
# remotes::install_github('coolbutuseless/ggqr')
# library(ggqr)
plot_qr <- ggplot(
  data = NULL, 
  aes(x = 0, y = 0, label = url_graphics)
  ) + 
  ggqr::geom_qr(
    colour = text_hil, 
    fill = bg_col,
    size = 1
    ) +
  # labs(caption = "Full Analysis here") +
  coord_fixed() +
  theme_void() +
  theme(
    plot.background = element_rect(
    fill = "transparent", 
    colour = "transparent"
    )
  )


g_full <- g +
  inset_element(
    p = plot_qr,
    left = 0, right = 0.1,
    top = 0.17, bottom = 0.07,
    align_to = "full",
    clip = FALSE
  ) +
  plot_annotation(
    theme = theme(
      plot.background = element_rect(
        fill = "transparent", colour = "transparent"
      ),
      panel.background = element_rect(
        fill = "transparent", colour = "transparent"
      )
    )
  )

ggsave(
  filename = here::here("data_vizs", "dip_h1b_visas.png"),
  plot = g_full,
  width = 400,    # Best Twitter Aspect Ratio = 5:4
  height = 500,   
  units = "mm",
  bg = "white"
)

Savings the graphics

Code
ggsave(
  filename = here::here("data_vizs", "dip_h1b_visas.png"),
  plot = g,
  width = 210 * 2,    # Best Twitter Aspect Ratio = 5:4
  height = 297 * 2,   
  units = "mm",
  bg = "white"
)


library(magick)
# Saving a thumbnail for the webpage
image_read(here::here("data_vizs", "dip_h1b_visas.png")) |> 
  image_resize(geometry = "400") |> 
  image_write(
    here::here(
      "data_vizs", 
      "thumbnails", 
      "dip_h1b_visas.png"
    )
  )