This week’s #TidyTuesday dataset explores the rich dialogue found in the works of William Shakespeare, specifically focusing on Hamlet, Macbeth, and Romeo and Juliet. The dataset, sourced from shakespeare.mit.edu and curated by Nicola Rennie, offers insights into the structure and dynamics of Shakespeare’s plays. The accompanying graphic illustrates the flow of sentiments (using sentiment analysis (Silge and Robinson 2016)) from positive to negative over the course of each play, revealing that while both Hamlet and Romeo and Juliet end on a somber note, Macbeth concludes with a surprising uplift despite its tragic themes.

Figure 1: This graphic depicts the flow of sentiments in Shakespeare’s Hamlet, Macbeth, and Romeo and Juliet, with the y-axis representing sentiment levels ranging from positive to negative and the x-axis indicating the progression of time in the narrative (0% to 100%). The smooth-curved line graph illustrates the overall trend of sentiment change throughout each play, highlighting the emotional arcs that lead Hamlet and Romeo and Juliet to a tragic conclusion, while Macbeth transitions from sadness to an unexpected positive resolution.

How I made this graphic?

Loading libraries & data

# Data Import and Wrangling Tools
library(tidyverse)            # All things tidy

# Final plot tools
library(scales)               # Nice Scales for ggplot2
library(fontawesome)          # Icons display in ggplot2
library(ggtext)               # Markdown text support for ggplot2
library(showtext)             # Display fonts in ggplot2
library(colorspace)           # Lighten and Darken colours
library(patchwork)            # Combining plots

library(tidytext)             # Text & Sentiment Analysis

# Option 1: tidytuesdayR package 
tuesdata <- tidytuesdayR::tt_load(2024, week = 38)

hamlet <- tuesdata$hamlet
macbeth <- tuesdata$macbeth
romeo_juliet <- tuesdata$romeo_juliet

Data Wrangling and EDA

# A custom function to convert Roman numerals to numbers
roman_to_num_map <- c(I = 1, II = 2, III = 3, IV = 4, V = 5, 
                      VI = 6, VII = 7, VIII = 8, IX = 9, X = 10)
roman_to_number <- function(roman_vector) {
  roman_vector %>%
    map_dbl(~ roman_to_num_map[.x])
# Function to convert numbers to Roman numerals
# Reverse the mapping to create a number-to-Roman map
num_to_roman_map <- set_names(names(roman_to_num_map), roman_to_num_map)
number_to_roman <- function(num_vector) {
  num_vector %>%
    map_chr(~ num_to_roman_map[as.character(.x)])

# Custom function to calculate the mode
get_mode <- function(temp) {
  temp |> 
  as_tibble() |> 
  count(value) |> 
  slice_max(order_by = n, n = 1) |> 

# Sentiments under afinn
sentiment_df <- get_sentiments("afinn")

# A function to generate the sentiments timeline for any book
get_sentiment_graph_data <- function(dataset){
  dataset |> 
      act = str_remove(act, "Act "),
      scene = str_remove(scene, "Scene "),
      act = roman_to_number(act),
      scene = roman_to_number(scene)
    ) |> 
    select(-character) |> 
      output = "word",
      input = dialogue
    ) |> 
    inner_join(sentiment_df) |> 
      id = row_number(),
      id = round(rescale(id), 2)
    ) |> 
    group_by(id) |> 
      sent_value = sum(value, na.rm = T),
      act = get_mode(act)

# Composing the final dataframe for the plot
plotdf <- bind_rows(
  get_sentiment_graph_data(macbeth) |> 
    mutate(novel = "Macbeth"),
  get_sentiment_graph_data(romeo_juliet) |> 
    mutate(novel = "Romeo and Juliet"),
  get_sentiment_graph_data(hamlet) |> 
    mutate(novel = "Hamlet")

plotdf_acts <- plotdf |> 
  group_by(novel, act) |> 
    xmin = min(id),
    xmax = max(id),
    xmed = (xmin + xmax)/2
  ) |> 
  mutate(act = number_to_roman(act))

Visualization Parameters

# Font for titles
font_add_google("Maiden Orange",
  family = "title_font"

# Font for the caption
font_add_google("Stint Ultra Condensed",
  family = "caption_font"

# Font for plot text
  family = "body_font"


mypal <- c("#DF6589FF", "#603F83FF", "#C7D3D4FF", "#3C1053FF")

bg_col <- mypal[3]

text_col <- mypal[4]
text_hil <- mypal[4]

bts <- 90

# Caption stuff for the plot
  family = "Font Awesome 6 Brands",
  regular = here::here("docs", "Font Awesome 6 Brands-Regular-400.otf")
github <- "&#xf09b"
github_username <- "aditya-dahiya"
xtwitter <- "&#xe61b"
xtwitter_username <- "@adityadahiyaias"
social_caption_1 <- glue::glue("<span style='font-family:\"Font Awesome 6 Brands\";'>{github};</span> <span style='color: {text_hil}'>{github_username}  </span>")
social_caption_2 <- glue::glue("<span style='font-family:\"Font Awesome 6 Brands\";'>{xtwitter};</span> <span style='color: {text_hil}'>{xtwitter_username}</span>")

plot_title <- "Sentiments in Shakespeare’s Plays" 

plot_subtitle <- "The flow of sentiments (positive to negative) across three Shakespearean plays, over the progression of the narrative (0% to 100%). While Hamlet, and, Romeo and Juliet culminate in sorrow, Macbeth transitions from sadness to a positive conclusion."

plot_caption <- paste0(
  "**Data:** shakespeare.mit.edu & Nicola Rennie", 
  " |  **Code:** ", 
  " |  **Graphics:** ", 

rm(github, github_username, xtwitter, 
   xtwitter_username, social_caption_1, 

The static plot

g <- plotdf |> 
    mapping = aes(
      x = id,
      y = sent_value
  ) +
  # Y-axis arrows and text
    geom = "segment",
    x = 0, xend = 0,
    y = 0, yend = 30,
    arrow = arrow(length = unit(3, "mm")),
    linewidth = 0.8,
    colour = mypal[1]
  ) +
    geom = "segment",
    x = 0, xend = 0,
    y = 0, yend = -30,
    arrow = arrow(length = unit(3, "mm")),
    linewidth = 0.8,
    colour = mypal[2]
  ) +
    geom = "text",
    label = "Positive",
    x = -0.01,  y = 15,
    hjust = 0.5,
    angle = 90,
    vjust = 0,
    colour = darken(mypal[1], 0.3),
    family = "body_font",
    size = bts / 5
  ) +
    geom = "text",
    label = "Negative",
    x = -0.01,  y = -15,
    hjust = 0.5,
    angle = 90,
    vjust = 0,
    colour = darken(mypal[2], 0.3),
    family = "body_font",
    size = bts / 5
  ) +
  # Background emotion colours annotation &  horizontal line
    geom = "rect",
    xmin = 0, xmax = 1,
    ymin = 0, ymax = 30,
    fill = mypal[1],
    colour = "transparent",
    alpha = 0.2
  ) +
    geom = "rect",
    xmin = 0, xmax = 1,
    ymin = 0, ymax = -30,
    fill = mypal[2],
    colour = "transparent",
    alpha = 0.2
  ) +
  # Horizontal line
    yintercept = 0,
    linetype = 3,
    colour = "grey10",
    linewidth = 0.5
  ) +
  # Background area map of sentiments
    alpha = 0.2
  ) +
  # Smoother Plot
    span = 0.2,
    se = FALSE,
    colour = "grey10",
    lineend = "round",
    linewidth = 1.25
  ) +
  # Labelling the Acts of each play
    data = plotdf_acts,
    mapping = aes(
      x = xmed,
      xmin = xmin,
      xmax = xmax,
      y = -24
    width = 2
  ) +
    data = plotdf_acts,
    mapping = aes(
      x = xmed,
      y = -23,
      label = paste0("Act ", act)
    vjust = 0,
    hjust = 0.5,
    family = "body_font",
    size = bts / 4
  ) +
  # Scales and Coordinates
    expand = expansion(0.005),
    labels = label_percent()
  ) +
    expand = expansion(0)
  ) +
    clip = "off",
    ylim = c(-30, 30)
  ) +
  # Faceting across the three books
    ncol = 1
  ) +
  # Labels and Themes
    x = "Progression of the narrative (0 % to 100 %)",
    y = NULL,
    title = plot_title,
    subtitle = str_wrap(plot_subtitle, 100),
    caption = plot_caption
  ) +
    base_family = "title_font",
    base_size = bts
  ) +
    # Overall Plot
    plot.margin = margin(5,15,5,15, "mm"),
    plot.background = element_rect(
      fill = bg_col,
      colour = bg_col
    text = element_text(
      colour = text_col,
      lineheight = 0.3,
      hjust = 0.5
    panel.grid = element_blank(),
    # Axes
    axis.ticks = element_blank(),
    axis.ticks.length = unit(0, "mm"),
    axis.text.y = element_blank(),
    axis.text.x = element_text(
      colour = text_col
    # Strip Text
    strip.background = element_blank(),
    strip.text = element_text(
      hjust = 0.9,
      size = bts * 2,
      margin = margin(0,0,-20,0, "mm"),
      family = "body_font",
      colour = text_col
    # Labels
    plot.title = element_text(
      size = bts * 3,
      hjust = 0.5,
      margin = margin(10,0,5,0, "mm"),
      face = "bold"
    plot.subtitle = element_text(
      hjust = 0.5, 
      margin = margin(0,0,5,0, "mm")
    plot.caption = element_textbox(
      hjust = 0.5,
      size = 0.6 * bts

  filename = here::here("data_vizs", "tidy_shakespeare.png"),
  plot = g,
  width = 400,    # Best Twitter Aspect Ratio = 5:4
  height = 500,   
  units = "mm",
  bg = "white"

