Games of high-rated chess players last longer!

Exploring Chess Game Dataset on Lichess.org

#TidyTuesday
Author

Aditya Dahiya

Published

October 1, 2024

Figure 1: A scatter plot on player-ratings shows that the average number of moves per game are higher (red colour) for high-rated players, and lower (green-colour) for low-rated players.

How I made this graphic?

Loading libraries & data

Code
# Data Import and Wrangling Tools
library(tidyverse)            # All things tidy

# Final plot tools
library(scales)               # Nice Scales for ggplot2
library(fontawesome)          # Icons display in ggplot2
library(ggtext)               # Markdown text support for ggplot2
library(showtext)             # Display fonts in ggplot2
library(colorspace)           # Lighten and Darken colours
library(seecolor)             # To print and view colours
library(patchwork)            # Combining plots
library(waffle)               # Waffle Charts in R

# Getting the data
chess <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2024/2024-10-01/chess.csv')

Data Wrangling and EDA

Code
# object.size(chess) / 1e6
# 
# chess |> 
#   mutate(duration = end_time - start_time) |>
#   ggplot(aes(duration)) +
#   geom_histogram(bins = 500) +
#   scale_x_log10()
# 
# most_common_openings <- chess |> 
#   count(opening_name, sort = T) |> 
#   slice_max(order_by = n, n = 10) |> 
#   # filter(n > 100) |> 
#   pull(opening_name)
# 
# chess |> 
#   filter(opening_name %in% most_common_openings) |> 
#   group_by(opening_name) |> 
#   count(victory_status) |> 
#   ggplot(
#     mapping = aes(
#       x = n,
#       y = opening_name,
#       fill = victory_status
#     )
#   ) +
#   geom_col(
#     position = "fill"
#   )
# 
# 
# chess |> 
#   filter(opening_name %in% most_common_openings) |> 
#   ggplot(
#     aes(
#       x = turns,
#       colour = opening_name
#     )
#   ) +
#   geom_density()
# 
# chess |> 
#   filter(opening_name %in% most_common_openings) |> 
#   filter(winner %in% c("black", "white")) |> 
#   mutate(
#     rating_diff = case_when(
#       winner == "black" ~ black_rating - white_rating,
#       winner == "white" ~ white_rating - black_rating,
#       .default = NA
#     )
#   ) |> 
#   ggplot(
#     mapping = aes(
#       x = rating_diff,
#       colour = opening_name
#     )
#   ) +
#   geom_density()
#   
# 
# chess |> 
#   # filter(opening_name %in% most_common_openings) |> 
#   filter(winner %in% c("black", "white")) |> 
#   mutate(
#     rating_diff = case_when(
#       winner == "black" ~ black_rating - white_rating,
#       winner == "white" ~ white_rating - black_rating,
#       .default = NA
#     )
#   ) |> 
#   group_by(opening_name) |> 
#   summarise(
#     rating_diff = mean(rating_diff > 0),
#      n = n()
#   ) |> 
#   filter(n > 50) |> 
#   arrange(rating_diff)
# 
# chess |> 
#   pull(turns) |> 
#   summary()



df1 <- chess |> 
  filter(rated == TRUE) |> 
  mutate(
    winner_rating = case_when(
      winner == "black" ~ black_rating,
      winner == "white" ~ white_rating,
      .default = NA
    ),
    loser_rating = case_when(
      winner == "black" ~ white_rating,
      winner == "white" ~ black_rating,
      .default = NA
    )
  )

df2 <- df1 |> 
  mutate(
    quadrant = case_when(
      (winner_rating > 1500 & loser_rating < 1500) ~ "a",
      (winner_rating > 1500 & loser_rating > 1500) ~ "b",
      (winner_rating < 1500 & loser_rating > 1500) ~ "c",
      (winner_rating < 1500 & loser_rating < 1500) ~ "d",
      .default = NA
    )
  ) |> 
  group_by(quadrant) |> 
  summarise(
    total_games = n(),
    avg_turns = round(mean(turns, na.rm = T), 1)
  ) |> 
  drop_na() |> 
  mutate(
    percentage_games = round(100 * total_games / sum(total_games), 
                             2),
    x_var = c(1450, 1550, 1550, 1450),
    y_var = c(2520, 2520, 770, 770),
    hjust_var = c(1, 0, 0, 1)
  )

Visualization Parameters

Code
# Font for titles
font_add_google("Nova Cut",
  family = "title_font"
) 

# Font for the caption
font_add_google("Barlow Condensed",
  family = "caption_font"
) 

# Font for plot text
font_add_google("Headland One",
  family = "body_font"
) 

showtext_auto()

bg_col <- "white"

text_col <- "grey25"
text_hil <- "grey25"

bts <- 90

# Caption stuff for the plot
sysfonts::font_add(
  family = "Font Awesome 6 Brands",
  regular = here::here("docs", "Font Awesome 6 Brands-Regular-400.otf")
)
github <- "&#xf09b"
github_username <- "aditya-dahiya"
xtwitter <- "&#xe61b"
xtwitter_username <- "@adityadahiyaias"
social_caption_1 <- glue::glue("<span style='font-family:\"Font Awesome 6 Brands\";'>{github};</span> <span style='color: {text_hil}'>{github_username}  </span>")
social_caption_2 <- glue::glue("<span style='font-family:\"Font Awesome 6 Brands\";'>{xtwitter};</span> <span style='color: {text_hil}'>{xtwitter_username}</span>")

plot_title <- "Chess Games' moves !" 

plot_subtitle <- "Games of High-Rated players last longer,\non average, by atleast 10 moves."

plot_caption <- paste0(
  "**Data:** Kaggle, lichess.org & Mitchell J.", 
  " |  **Code:** ", 
  social_caption_1, 
  " |  **Graphics:** ", 
  social_caption_2
  )

rm(github, github_username, xtwitter, 
   xtwitter_username, social_caption_1, 
   social_caption_2)

The static plot

Code
g <- df1 |> 
  ggplot(
    mapping = aes(
      x = loser_rating, 
      y = winner_rating,
      colour = turns
    )
  ) + 
  geom_hline(
    yintercept = 1500,
    linetype = "longdash",
    linewidth = 1
  ) +
  geom_vline(
    xintercept = 1500,
    linetype = "longdash",
    linewidth = 1
  ) +
  # geom_abline(
  #   slope = 1,
  #   intercept = 0,
  #   linetype = "longdash"
  # ) +
  geom_jitter(
    alpha = 0.4,
    size = 3
  ) +
  geom_text(
    data = df2,
    mapping = aes(
      x = x_var,
      y = y_var,
      label = paste0(
        "Average moves: ",
        avg_turns, "\n",
        percentage_games,
        "% of total games"
      ),
      hjust = hjust_var
    ),
    lineheight = 0.3,
    vjust = 0,
    colour = text_col,
    family = "title_font",
    size = bts / 3,
    fontface = "bold"
  ) +
  # Scales and Coordinates
  scale_x_continuous(
    limits = c(750, 2600),
    expand = expansion(0)
  ) +
  scale_y_continuous(
    limits = c(750, 2600),
    expand = expansion(0)
  ) +
  paletteer::scale_colour_paletteer_c(
    "grDevices::Temps",
    limits = c(30, 80),
    oob = squish,
    breaks = seq(40, 70, 10)
  ) +
  coord_fixed(ratio = 1, clip = "off") +
  
  # Themes and Labels
  labs(
    x = "Loser's Rating",
    y = "Winner's Rating",
    colour = "Number of turns in the Chess Game",
    title = plot_title,
    subtitle = plot_subtitle,
    caption = plot_caption
  ) +
  theme_minimal(
    base_family = "title_font",
    base_size = bts
  ) +
  theme(
    legend.position = "bottom",
    legend.title.position = "top",
    legend.key.height = unit(10, "mm"),
    legend.key.width = unit(50, "mm"),
    axis.text = element_text(
      margin = margin(0,0,0,0, "mm")
    ),
    axis.title = element_text(
      margin = margin(-2,0,-2,0, "mm")
    ),
    axis.ticks = element_blank(),
    axis.ticks.length = unit(0, "mm"),
    legend.title = element_text(
      colour = text_col,
      margin = margin(0,0,2,0, "mm"),
      hjust = 0.5
    ),
    legend.text = element_text(
      margin = margin(2,0,10,0, "mm")
    ),
    legend.margin = margin(-20,0,0,0, "mm"),
    legend.justification = c(0,0),
    
    plot.margin = margin(10,-25,10,-35, "mm"),
    plot.title.position = "plot",
    
    panel.grid = element_line(
      colour = "grey80",
      linewidth = 0.5
    ),
    text = element_text(
      colour = text_col,
      lineheight = 0.35
    ),
    plot.title = element_text(
      family = "title_font",
      margin = margin(0,0,2,0, "mm"),
      hjust = 0,
      size = 3 * bts,
      face = "bold"
    ),
    plot.subtitle = element_text(
      hjust = 0,
      margin = margin(0,0,20,0, "mm"),
      size = 1.2 * bts
    ),
    plot.caption = element_textbox(
      family = "caption_font",
      margin = margin(5,0,0,0, "mm"),
      hjust = 0
    )
  )

Savings the graphics

Code
ggsave(
  filename = here::here("data_vizs", "tidy_li_chess.png"),
  plot = g,
  width = 400,    # Best Twitter Aspect Ratio = 5:4
  height = 500,   
  units = "mm",
  bg = "white"
)

library(magick)
# Saving a thumbnail for the webpage
image_read(here::here("data_vizs", "tidy_li_chess.png")) |> 
  image_resize(geometry = "400") |> 
  image_write(
    here::here(
      "data_vizs", 
      "thumbnails", 
      "tidy_li_chess.png"
    )
  )