Summer Movies @ IMDb

Using IMDb’s data on top 100 Summer Movies to display their average viewer’s ratings

#TidyTuesday
{ggimage}
Author

Aditya Dahiya

Published

July 31, 2024

The dataset for this analysis is derived from the Internet Movie Database (IMDb) and focuses on movies with “summer” in their titles. The data includes a comprehensive list of these films, along with details such as their unique identifiers, titles, release years, runtimes, genres, and IMDb ratings. The information has been made available through the #TidyTuesday project, a weekly data project in the R community, encouraging data exploration and visualization. The specific datasets used in this analysis are summer_movies and summer_movie_genres, containing records of various movies and their associated genres.

Spiraling through the stars: A visual journey of the top 50 summer-titled movies, showcasing their IMDb average ratings. The bar chart, arranged in a polar coordinate spiral, highlights each film’s rating, inviting viewers to explore the cinematic warmth of summer-themed stories.

How I made this graphic?

Loading libraries & data

Code
# Data Import and Wrangling Tools
library(tidyverse)            # All things tidy

# Final plot tools
library(scales)               # Nice Scales for ggplot2
library(fontawesome)          # Icons display in ggplot2
library(ggtext)               # Markdown text support for ggplot2
library(showtext)             # Display fonts in ggplot2
library(colorspace)           # Lighten and Darken colours
library(seecolor)             # To print and view colours
library(patchwork)            # Combining plots


# Getting the data
summer_movie_genres <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2024/2024-07-30/summer_movie_genres.csv')
summer_movies <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2024/2024-07-30/summer_movies.csv')

Data Wrangling

Code
# Select 50 most popular movies
df <- summer_movies |> 
  filter(!is.na(year)) |> 
  mutate(decade = cut(year,
                      breaks = seq(1920, 2030, by = 10),
                      labels = paste0(seq(1920, 2020, by = 10), "s"),
                      right = FALSE)) %>%
  mutate(decade = fct_relevel(decade, sort)) |> 
  select(-title_type, -original_title, -simple_title) |> 
  mutate(genres = str_replace_all(genres, ",", ", "))

# Some Exploratory Data Analysis
# df |> 
#   ggplot(aes(decade, runtime_minutes)) + 
#   geom_boxplot()
# 
# df |> 
#   ggplot(aes(decade, runtime_minutes)) +
#   geom_jitter()
#   
# df |> 
#   ggplot(aes(runtime_minutes, average_rating, size = num_votes)) +
#   geom_jitter()
# 
# df |> 
#   count(decade) |> 
#   ggplot(aes(decade, n)) +
#   geom_col()
# 
# df |> 
#   ggplot(aes(average_rating, num_votes)) +
#   geom_point() +
#   scale_y_log10()
# 


plotdf <- df |>
  slice_max(order_by = num_votes, n = 100) |> 
  select(-tconst) |> 
  mutate(id = row_number()) |> 
  relocate(id) |> 
  mutate(image_var = paste0("data_vizs/temp_summer/temp_image_", id, ".png"))

# library(magick)
# image_url <- "https://www.userlogos.org/files/logos/jumpordie/imdb-iphone.png"
# imlogo <- image_read(image_url) |> 
#   image_background("transparent") |> 
#   image_crop("220x260+90+40")
# 
# image_write(
#   imlogo, 
#   here::here("data_vizs", "temp_summer", "clogo.png")
#   )

Create a custom function & download movie posters

Code
# Colours to use as background and borders
mypal <- paletteer::paletteer_d("ggthemes::Summer")
seecolor::print_color(mypal)

bg_col <- mypal[6] |> lighten(0.9)
seecolor::print_color(bg_col)

text_col <-  mypal[5] |> darken(0.7)
seecolor::print_color(text_col)

text_hil <- mypal[5] |> darken(0.4)
seecolor::print_color(text_hil)

##### Getting movie posters for the top movies #########

# Get a custom google search engine and API key
# Tutorial: https://developers.google.com/custom-search/v1/overview
# google_api_key <- "LOAD YOUR GOOGLE API KEY HERE"
# my_cx <- "GET YOUR CUSTOM SEARCH ENGINE ID HERE"


# Load necessary packages
library(httr)
library(magick)

# Define function to download and save movie poster
download_movie_poster <- function(movie_name, num_row) {
  
  api_key <- google_api_key
  cx <- my_cx
  
  # Build the API request URL
  url <- paste0("https://www.googleapis.com/customsearch/v1?q=", 
                URLencode(paste0(movie_name, " movie poster")), 
                "&cx=", cx, 
                "&searchType=image&key=", api_key)
  
  # Make the request
  response <- GET(url)
  result <- content(response, "parsed")
  
  # Get the URL of the first image result
  image_url <- result$items[[1]]$link
  
  im <- magick::image_read(image_url)
  
  # Crop the image into a circle 
  # (Credits: https://github.com/doehm/cropcircles)
  ic <- cropcircles::crop_circle(
    im, 
    border_colour = text_col,
    border_size = 0.2
    )
  
  ic2 <- image_read(ic) |> 
    image_resize("x200")
  
  # set background as white
  image_write(
    image = ic2,
    path = here::here("data_vizs", "temp_summer", 
                      paste0("temp_image_", num_row,".png")),
    format = "png"
    )
}

for (i in 1:43) {
  
  num_row_i <- plotdf$id[i]
  movie_name_i <- plotdf$primary_title[i]
  
  download_movie_poster(movie_name_i, num_row_i)
}

Visualization Parameters

Code
# Font for titles
font_add_google("Corinthia",
  family = "title_font"
) 

# Font for the caption
font_add_google("Satisfy",
  family = "caption_font"
) 

# Font for plot text
font_add_google("Saira Extra Condensed",
  family = "body_font"
) 

showtext_auto()


# Colours to use as background and borders
mypal <- paletteer::paletteer_d("ggthemes::Summer")
seecolor::print_color(mypal)

bg_col <- mypal[6] |> lighten(0.9)
seecolor::print_color(bg_col)

text_col <-  mypal[5] |> darken(0.7)
seecolor::print_color(text_col)

text_hil <- mypal[5] |> darken(0.4)
seecolor::print_color(text_hil)

bts <- 80

# Caption stuff for the plot
sysfonts::font_add(
  family = "Font Awesome 6 Brands",
  regular = here::here("docs", "Font Awesome 6 Brands-Regular-400.otf")
)
github <- "&#xf09b"
github_username <- "aditya-dahiya"
xtwitter <- "&#xe61b"
xtwitter_username <- "@adityadahiyaias"
social_caption_1 <- glue::glue("<span style='font-family:\"Font Awesome 6 Brands\";'>{github};</span> <span style='color: {text_hil}'>{github_username}  </span>")
social_caption_2 <- glue::glue("<span style='font-family:\"Font Awesome 6 Brands\";'>{xtwitter};</span> <span style='color: {text_hil}'>{xtwitter_username}</span>")

plot_title <- "Summer Movies: \nTop 50"

plot_subtitle <- "Names of Movies with\nYear of release &\nAverage IMDb\nRating in\nbrackets"
str_view(plot_subtitle)

plot_caption <- paste0(
  "**Data:** _#TidyTuesday_ & IMDb", 
  " |  **Code:** ", 
  social_caption_1, 
  " |  **Graphics:** ", 
  social_caption_2
  )

rm(github, github_username, xtwitter, 
   xtwitter_username, social_caption_1, 
   social_caption_2)

The static plot:

Code
# mypal2 <- paletteer::paletteer_d("palettesForR::Cranes")
# mypal2 <- paletteer::paletteer_d("palettesForR::Caramel")
# mypal2 <- paletteer::paletteer_d("palettesForR::Bears")
mypal2 <- paletteer::paletteer_d("palettesForR::Named")
length(mypal2)
mypal2 <- mypal2[seq(from = 440, to = 40, by = -8)]
length(mypal2)

g <- plotdf |>
  arrange(desc(average_rating)) |> 
  mutate(id = row_number()) |> 
  slice_head(n = 50) |> 
  ggplot(
    mapping = aes(
      x = id, 
      y = average_rating - 3
    )
  ) +
  geom_col(
    mapping = aes(
      fill = as_factor(id)
    ),
    width = 0.4,
    colour = "transparent"
  ) +
  ggimage::geom_image(
    mapping = aes(
      y = average_rating - 3,
      image = image_var
    ),
    size = 0.02
  ) +
  geom_text(
    mapping = aes(
      y = average_rating - 2.8,
      label = paste0(primary_title, " (", year, 
                     " | ", average_rating, ")"),
      colour = as_factor(id)
    ),
    vjust = 0.5,
    hjust = 0,
    angle = 0,
    lineheight = 0.3,
    size = bts / 5,
    nudge_y = 0.1,
    family = "body_font",
    check_overlap = TRUE
  ) +
  scale_y_continuous(
    sec.axis = sec_axis(
      transform = ~.,
      breaks = 0:4,
      labels = 3:7
    ),
    breaks = 0:4,
    labels = 3:7
  ) +
  scale_colour_manual(values = darken(mypal2, 0.5)) +
  scale_fill_manual(values = mypal2) +
  coord_radial(
    theta = "x",
    direction = 1,
    start = 0,
    end = pi,
    inner.radius = 0.3,
    expand = FALSE,
    # rotate.angle = TRUE,
    clip = "off"
    ) +
  labs(
    title = plot_title,
    subtitle = plot_subtitle,
    caption = plot_caption,
    y = "Average Rating on IMDb"
    ) +
  theme_minimal(
    base_family = "body_font",
    base_size = bts
  ) +
  theme(
    axis.text.theta = element_blank(),
    axis.title.y = element_text(
      hjust = 0.8, 
      vjust = 0,
      margin = margin(0,0,0,0, "mm"),
      family = "caption_font",
      colour = text_hil
    ),
    axis.text.y = element_text(
      colour = text_col,
      margin = margin(0,0,0,0, "mm")
    ),
    legend.position = "none",
    axis.ticks.y = element_line(
      colour = text_col,
      linewidth = 0.25
    ),
    axis.ticks.length = unit(2, "mm"),
    axis.title.x = element_blank(),
    panel.grid = element_blank(),
    panel.grid.major.y = element_line(
      linetype = 1,
      linewidth = 0.5,
      colour = darken(bg_col, 0.2)
    ),
    plot.background = element_rect(
      fill = "transparent",
      colour = "transparent"
    ),
    panel.background = element_rect(
      fill = "transparent",
      colour = "transparent"
    ),
    plot.caption = element_textbox(
      colour = text_hil,
      family = "body_font",
      hjust = 0.5,
      margin = margin(-100,0,0,0, "mm")
    ),
    plot.title = element_text(
      hjust = 1,
      size = 6 * bts,
      colour = text_hil,
      margin = margin(10,-10,-120,0, "mm"),
      family = "title_font",
      lineheight = 0.25,
      face = "bold"
    ),
    plot.subtitle = element_text(
      hjust = 1,
      size = 1.5 * bts,
      colour = text_hil,
      margin = margin(120,-10,-180,0, "mm"),
      family = "body_font",
      lineheight = 0.25,
      face = "bold"
    ),
    plot.margin = margin(10,10,10,10, "mm"),
    plot.title.position = "plot"
  )

Insets and annotations - ignored while compiling

Code
library(patchwork)

g2 <- ggplot() +
  annotation_custom(
    xmin = 0,
    xmax = 1,
    ymin = 0,
    ymax = 1,
    grob = grid::rasterGrob(imlogo)
  ) +
  annotate(
    geom = "text",
    label = "Ratings",
    x = 0.5, 
    y = 0.1,
    hjust = 0.5,
    vjust = 1,
    colour = text_hil,
    family = "body_font",
    size = bts / 3,
    fontface = "bold"
  ) +
  scale_x_continuous(
    limits = c(-2, 2),
    expand = expansion(0)
  ) +
  scale_y_continuous(
    limits = c(-2, 2),
    expand = expansion(0)
  ) +
  theme_void() +
  theme(
    plot.margin = margin(0,0,0,0, "mm"),
    plot.caption = element_text(
      margin = margin(-100,0,0,0, "mm")
    )
  )

gfull <- g +
  inset_element(
    p = g2,
    left = 0.2,
    right = 0.7,
    top = 0.7, bottom = 0.3,
    clip = FALSE,
    on_top = TRUE,
    align_to = "full") +
  plot_annotation(
    theme = theme(
      plot.background = element_rect(
        fill = "transparent",
        colour = "transparent"
      ),
      panel.background = element_rect(
        fill = "transparent",
        colour = "transparent"
      )
    )
  )

Savings the graphics

Code
ggsave(
  filename = here::here("data_vizs", "tidy_summer_movies.png"),
  plot = g,
  width = 400,    # Best Twitter Aspect Ratio = 5:4
  height = 500,   
  units = "mm",
  bg = lighten(bg_col, 0.5)
)

library(magick)
# Saving a thumbnail for the webpage
image_read(here::here("data_vizs", "tidy_summer_movies.png")) |> 
  image_resize(geometry = "400") |> 
  image_write(here::here("data_vizs", "thumbnails", "tidy_summer_movies.png"))


unlink("data_vizs/temp_summer/clogo.png")

remove_files <- paste0("data_vizs/temp_summer/temp_image_", 1:100, ".png")

unlink(remove_files)

Other attempts:

Code
g <- plotdf |>
  ggplot(
    mapping = aes(
      x = year, 
      y = average_rating,
      size = num_votes
    )
  ) +
  # geom_point() +
  ggimage::geom_image(
    mapping = aes(
      image = image_var
    ),
    alpha = 0.7,
    size = 0.05
  ) +
  # scale_size(
  #   range = c(0.02, 0.08)
  # ) +
  geom_text(
    mapping = aes(
      label = primary_title
    ),
    vjust = 1,
    lineheight = 0.3,
    size = bts / 10,
    nudge_y = -0.1,
    family = "body_font",
    check_overlap = TRUE,
    colour = text_col
  ) +
  annotate(
    geom = "text",
    label = plot_subtitle,
    x = 1946, y = 5,
    hjust = 0, vjust = 1,
    colour = text_col,
    family = "body_font",
    lineheight = 0.4,
    size = bts / 2
  ) +
  scale_y_continuous(
    expand = expansion(0),
    limits = c(3.5, 8.5)
  ) +
  scale_x_continuous(
    expand = expansion(c(0, 0.1)),
    breaks = seq(1950, 2020, 10),
    limits = c(1945, 2024)
  ) +
  # scale_size_continuous(
  #    range = c(0.02, 0.05),
  #   labels = scales::label_number(scale_cut = cut_short_scale())
  # ) +
  coord_cartesian(clip = "off") +
  # Labels for the plot
  labs(
    title = plot_title,
    caption = plot_caption,
    x = "Year of Summer Moview Release", 
    y = "Average Rating on IMDb",
    size = "Votes on IMDb"
  ) +

  # Theme Elements
  theme_classic(
    base_size = bts,
    base_family = "body_font"
  ) +
  
  theme(
    legend.position = "bottom",
    panel.grid = element_blank(),
    axis.text = element_text(
      colour = text_col,
      margin = margin(0,0,0,0, "mm"),
      size = 1.2 * bts
    ),
    axis.title = element_text(
      colour = text_col,
      margin = margin(0,0,0,0, "mm"),
      face = "bold"
    ),
    axis.line = element_line(
      arrow = arrow(length = unit(4, "mm")),
      colour = text_col,
      linewidth = 0.75
    ),
    axis.ticks = element_blank(),
    axis.ticks.length = unit(0, "mm"),
    plot.background = element_rect(
      fill = "transparent",
      colour = "transparent"
    ),
    panel.background = element_rect(
      fill = "transparent",
      colour = "transparent"
    ),
    plot.caption = element_textbox(
      colour = text_hil,
      family = "caption_font",
      hjust = 0.5,
      margin = margin(0,0,0,0, "mm")
    ),
    plot.title = element_text(
      hjust = 0.5,
      size = 5.1 * bts,
      colour = text_hil,
      margin = margin(10,0,0,0, "mm"),
      family = "title_font"
    ),
    plot.subtitle = element_text(
      colour = text_hil,
      size = 1.2 * bts,
      hjust = 0.5, 
      lineheight = 0.3,
      margin = margin(5,0,3,0, "mm"),
      family = "title_font"
    ),
    plot.margin = margin(10,10,10,10, "mm"),
    plot.title.position = "plot",
    legend.background = element_rect(
      fill = "transparent",
      colour = "transparent"
    ),
    legend.text = element_text(
      colour = text_col,
      margin = margin(0,0,0,0, "mm")
    ),
    legend.title = element_text(
      colour = text_col,
      margin = margin(0,10,0,0, "mm")
    ),
    legend.margin = margin(-10,0,10,0, "mm")
  )