Links between common Words in Monster Movies’ titles
‘Monster’ takes the spotlight as expected, but an eerie ensemble of connections—Loch Ness, Frankenstein, islands, madness, and party—reveals the intriguing, varied themes lurking within monster movie titles.
#TidyTuesday
Network Graph
{ggraph}
Author
Aditya Dahiya
Published
October 29, 2024
This week’s dataset dives into the world of “monster” movies, focusing on films with the word “monster” in their title. The dataset, curated from the Internet Movie Database (IMDb), provides an exciting opportunity to analyze the genre combinations, titles, and trends surrounding monster-themed films. Questions arise: what are the most frequent genre pairings among these “monster” movies? And how do they compare with summer movies or holiday movies? For those interested in the psychology behind the appeal of horror, “Why Do People Like Horror Films? A Statistical Analysis” offers insights into why audiences are drawn to these spine-chilling experiences. This dataset was prepared by Jon Harmon and is accessible through the tidytuesdayR package or directly via GitHub.
How I made this graphic?
Loading required libraries, data import & creating custom functions.
Code
# Data Import and Wrangling Toolslibrary(tidyverse) # All things tidylibrary(janitor) # Cleaning names etc.library(here) # Root Directory Management# Final plot toolslibrary(scales) # Nice Scales for ggplot2library(fontawesome) # Icons display in ggplot2library(ggtext) # Markdown text support for ggplot2library(showtext) # Display fonts in ggplot2library(colorspace) # Lighten and Darken colourslibrary(tidygraph) # Using tidy functions with igraph library(ggraph) # igraph in R# Option 1: Loading data directly from GitHubmonster_movie_genres <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2024/2024-10-29/monster_movie_genres.csv')monster_movies <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2024/2024-10-29/monster_movies.csv')
Visualization Parameters
Code
# Font for titlesfont_add_google("Flavors",family ="title_font") # Font for the captionfont_add_google("Saira Extra Condensed",family ="caption_font") # Font for plot textfont_add_google("Oswald",family ="body_font") showtext_auto()# A base Colourbase_col <-"#6c25cf"# Colour for the texttext_col <- colorspace::darken(base_col, 0.7) # Colour for highlighted texttext_hil <- colorspace::darken(base_col, 0.4) link_col <- colorspace::lighten(base_col, 0.6)bg_col <- colorspace::lighten(base_col, 0.9)# Define Base Text Sizebts <-90# Caption stuff for the plotsysfonts::font_add(family ="Font Awesome 6 Brands",regular = here::here("docs", "Font Awesome 6 Brands-Regular-400.otf"))github <-""github_username <-"aditya-dahiya"xtwitter <-""xtwitter_username <-"@adityadahiyaias"social_caption_1 <- glue::glue("<span style='font-family:\"Font Awesome 6 Brands\";'>{github};</span> <span style='color: {text_hil}'>{github_username} </span>")social_caption_2 <- glue::glue("<span style='font-family:\"Font Awesome 6 Brands\";'>{xtwitter};</span> <span style='color: {text_hil}'>{xtwitter_username}</span>")# Add text to plot--------------------------------------------------------------plot_title <-"Monster Movies"plot_subtitle <-"Commonly occuring words in the titles of IMDb movies with the word \"monster\". Width of linkages reflect the times words occur together. Size of the word represents its frequency of occurence."plot_caption <-paste0("**Data:** IMDb Non-Commercial Datasets", " | **Code:** ", social_caption_1, " | **Graphics:** ", social_caption_2 )rm(github, github_username, xtwitter, xtwitter_username, social_caption_1, social_caption_2)
Exploratory Data Analysis and Wrangling
Code
# A tibble of words grouped by the movie title (i.e. a group for each movie)dfnet <- monster_movies |>select(tconst, original_title, simple_title, genres) |>group_by(original_title) |>separate_longer_delim(cols = simple_title,delim =" ") |>rename(word = simple_title) |>anti_join(stop_words) |>mutate(word =if_else(word =="monsters", "monster", word))# Network graph for commonly occurring words, faceted by genre# A tibble of nodesdfnodes <- dfnet |>ungroup() |>count(word, sort =TRUE) |>rename(importance = n) |>filter(word !="")# Titles with 2 or more words filter_title <- dfnet |>group_by(tconst) |>count(sort =TRUE) |>filter(n >1)# Computing the combinations of words within the titles# And, making a tibble of edgesdfedges <- dfnet |>right_join(filter_title) |>group_by(original_title) |>do(data.frame(t(combn(.$word, 2)))) |>count(X1, X2, sort =TRUE) |>rename(from = X1,to = X2,weightage = n,group = original_title) |>ungroup() |>group_by(from, to) |>summarise(weightage =sum(weightage) ) |>filter(from !=""& to !="") |>arrange(desc(weightage)) |>filter(from != to) |>ungroup()# Creating a tidygraph objectmovgraph <-tbl_graph(nodes = dfnodes,edges = dfedges,directed =FALSE,node_key ="word")sel_nodes <- dfedges |>filter(weightage >2) |>ungroup() |>select(from, to) |>pivot_longer(cols =everything()) |>pull(value) |>unique()
# Saving a thumbnaillibrary(magick)# Saving a thumbnail for the webpageimage_read(here::here("data_vizs", "tidy_monster_movies.png")) |>image_resize(geometry ="400") |>image_write( here::here("data_vizs", "thumbnails", "tidy_monster_movies.png" ) )
Session Info
Code
# Data Import and Wrangling Toolslibrary(tidyverse) # All things tidylibrary(janitor) # Cleaning names etc.library(here) # Root Directory Management# Final plot toolslibrary(scales) # Nice Scales for ggplot2library(fontawesome) # Icons display in ggplot2library(ggtext) # Markdown text support for ggplot2library(showtext) # Display fonts in ggplot2library(colorspace) # Lighten and Darken colourslibrary(tidygraph) # Using tidy functions with igraph library(ggraph) # igraph in Rsessioninfo::session_info()$packages |>as_tibble() |>select(package, version = loadedversion, date, source) |>arrange(package) |> janitor::clean_names(case ="title" ) |> gt::gt() |> gt::opt_interactive(use_search =TRUE ) |> gtExtras::gt_theme_espn()