In a new visual essay by The Pudding, they compare Rolling Stone’s “500 Greatest Albums of All Time” lists from 2003, 2012, and 2020. They began the project using a spreadsheet by Chris Eckert and eventually developed their own dataset. Their dataset includes every album in the rankings, along with its name, genre, release year, ranking in 2003/2012/2020, the artist’s name, birth year, gender, and more. Additionally, it lists the voters for each year. [credit to Jason Kottke]
What trends can we identify regarding the characteristics of popular artists and genres at different points in time?
How I made this graphic?
Loading required libraries, data import & creating custom functions
Code
# Data Import and Wrangling Toolslibrary(tidyverse) # All things tidy# library(janitor) # Cleaning names etc.library(geniusr) # Getting Album cover artlibrary(magick) # Image processing# Final plot toolslibrary(scales) # Nice Scales for ggplot2library(fontawesome) # Icons display in ggplot2library(ggtext) # Markdown text support for ggplot2library(showtext) # Display fonts in ggplot2library(colorspace) # Lighten and Darken colourslibrary(patchwork) # Combining plots# library(ggbeeswarm) # For beeswarm plots# library(ggfx) # Outer glow in the map# library(ggthemes) # Themes for ggplot2# Mapping tools# library(rnaturalearth) # Maps of the World # library(sf) # All spatial objects in R# library(geojsonio) # To read geojson files into R# Load Datarolling_stone <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2024/2024-05-07/rolling_stone.csv')df1 <- rolling_stone |>slice_max(order_by = spotify_popularity, n =100)df2 <- rolling_stone |>slice_max(order_by = weeks_on_billboard, n =100)# The final data frame that I will useplotdf <-inner_join(df1, df2) |>select(-c(sort_name, differential, release_year, type, spotify_url, artist_member_count, artist_birth_year_sum, debut_album_release_year, ave_age_at_top_500, years_between, album_id)) |>mutate(id =row_number()) |>relocate(id, .before =everything())# Remove intermediate steps to reduce clutterrm(df1, df2)# Define your own genius token for API from genius.comgenius_token <-""# Obtain from https://genius.com/api-clients
Exploratory Data Analysis & Data Wrangling
Code
rolling_stonerolling_stone |>count(peak_billboard_position, sort = T)rolling_stone$peak_billboard_position |>range()# rolling_stone |> summarytools::dfSummary() |> summarytools::view()# A scatter plot of rank in each of three years show little to no # correlation with Spotify popularity# Year 2003rolling_stone |>ggplot(aes(x = spotify_popularity, y = rank_2003)) +geom_point() +geom_smooth() +scale_y_reverse()# Year 2012rolling_stone |>ggplot(aes(x = spotify_popularity, y = rank_2012)) +geom_point() +geom_smooth() +scale_y_reverse()# Year 2020rolling_stone |>ggplot(aes(x = spotify_popularity, y = rank_2012)) +geom_point() +geom_smooth() +scale_y_reverse()# No specific insight from release year vs. weeks on billboardrolling_stone |>ggplot(aes(release_year, weeks_on_billboard,size = spotify_popularity)) +geom_point(alpha =0.2)# Correlation between weeks on billboard and spotify popularitydf1 <- rolling_stone |>slice_max(order_by = spotify_popularity, n =100)df2 <- rolling_stone |>slice_max(order_by = weeks_on_billboard, n =100)inner_join(df1, df2) |>ggplot(aes(spotify_popularity, weeks_on_billboard)) +geom_jitter(alpha =0.5) +geom_text(aes(label = album), check_overlap = T) +scale_y_continuous() +scale_x_continuous() # We can choose to work on this correlation and identify the outliers.# Experiment to see if we extract images from spotify urlrolling_stone |>filter(str_detect(album, "Man on the Moon")) |>select(spotify_url)# rolling_stone |>ggplot(aes(spotify_popularity, ave_age_at_top_500)) +geom_point(alpha =0.5)rolling_stone |>ggplot(aes(weeks_on_billboard, ave_age_at_top_500)) +geom_point(alpha =0.5) +scale_x_continuous(transform ="log2")# The final data frame that I will useplotdf <-inner_join(df1, df2) |>select(-c(sort_name, differential, release_year, type, spotify_url, artist_member_count, artist_birth_year_sum, debut_album_release_year, ave_age_at_top_500, years_between, album_id)) |>mutate(id =row_number()) |>relocate(id, .before =everything())
Visualization Parameters
Code
# Font for titlesfont_add_google("Satisfy",family ="title_font") # Font for the captionfont_add_google("Stint Ultra Condensed",family ="caption_font") # Font for plot textfont_add_google("IBM Plex Sans Condensed",family ="body_font") ts <-40showtext_auto()# Background Colourbg_col <-"#eadcf7"# Colour for the texttext_col <- bg_col |>darken(0.7) # Colour for highlighted texttext_hil <- bg_col |>darken(0.6)# Caption stuff for the plotsysfonts::font_add(family ="Font Awesome 6 Brands",regular = here::here("docs", "Font Awesome 6 Brands-Regular-400.otf"))github <-""github_username <-"aditya-dahiya"xtwitter <-""xtwitter_username <-"@adityadahiyaias"social_caption_1 <- glue::glue("<span style='font-family:\"Font Awesome 6 Brands\";'>{github};</span> <span style='color: {text_hil}'>{github_username} </span>")social_caption_2 <- glue::glue("<span style='font-family:\"Font Awesome 6 Brands\";'>{xtwitter};</span> <span style='color: {text_hil}'>{xtwitter_username}</span>")
for (i in1:nrow(plotdf)) { temp1 <- plotdf |>filter(id == i) |>mutate(temp =paste(clean_name, album)) |>pull(temp) |>search_genius(n_results =1,access_token = genius_token ) temp2 <- temp1$content[[1]] im <- magick::image_read(temp2$song_art_image_thumbnail_url)# Technique Credits: https://stackoverflow.com/questions/64597525/r-magick-square-crop-and-circular-mask# get height, width and crop longer side to match shorter side ii <-image_info(im) ii_min <-min(ii$width, ii$height) im1 <-image_crop( im, geometry =paste0(ii_min, "x", ii_min, "+0+0"), repage =TRUE )# create a new image with white background and black circle fig <-image_draw(image_blank(ii_min, ii_min))symbols(ii_min /2, ii_min /2, circles = (ii_min /2) -3, bg ="black", inches =FALSE, add =TRUE)dev.off()# create an image composite using both images im2 <- magick::image_composite(im1, fig, operator ="copyopacity")# set background as whiteimage_write(image = magick::image_background(im2, bg_col),path =paste0("image_temp__", i,".png"),format ="png" )}# Some Album Art didint show up on Spotify. Need to do them Manually# custom_art <-function(i, custom_url) { im <- magick::image_read(custom_url) ii <-image_info(im) ii_min <-min(ii$width, ii$height) im1 <-image_crop( im, geometry =paste0(ii_min, "x", ii_min, "+0+0"), repage =TRUE ) fig <-image_draw(image_blank(ii_min, ii_min))symbols(ii_min /2, ii_min /2, circles = (ii_min /2) -3, bg ="black", inches =FALSE, add =TRUE)dev.off() im2 <- magick::image_composite(im1, fig, operator ="copyopacity")image_write(image = magick::image_background(im2, bg_col),path =paste0("image_temp__", i,".png"),format ="png" )}# Some Custom Album Arts - where I couldn't find nice ones on Spotifycustom_art(5, "https://upload.wikimedia.org/wikipedia/en/a/a0/Blonde_-_Frank_Ocean.jpeg")custom_art(7, "https://upload.wikimedia.org/wikipedia/en/5/51/Kendrick_Lamar_-_Damn.png")custom_art(4, "https://upload.wikimedia.org/wikipedia/en/3/38/When_We_All_Fall_Asleep%2C_Where_Do_We_Go%3F.png")custom_art(14, "https://upload.wikimedia.org/wikipedia/en/a/ae/The_Marshall_Mathers_LP.jpg")custom_art(16, "https://m.media-amazon.com/images/I/61jNfu1D+HL._UF1000,1000_QL80_DpWeblab_.jpg")custom_art(38, "https://m.media-amazon.com/images/I/81Ahe2x9qBL._UF1000,1000_QL80_.jpg")custom_art(23, "https://upload.wikimedia.org/wikipedia/en/4/4b/Green_Day_-_Dookie_cover.jpg")custom_art(13, "https://upload.wikimedia.org/wikipedia/en/e/e7/X100pre.jpg")custom_art(3, "https://upload.wikimedia.org/wikipedia/en/3/35/The_Eminem_Show.jpg")custom_art(10, "https://upload.wikimedia.org/wikipedia/en/2/28/Channel_ORANGE.jpg")
Annotation Text for the Plot
Code
plot_title <-"Billboard rankings vs. Spotify Popularity"plot_caption <-paste0("Data: Rolling Stone Album Rankings", " | **Code:** ", social_caption_1, " | **Graphics:** ", social_caption_2 )label_cols <-c("#0033cc", "#ff3300")plot_subtitle <- glue::glue("Of the top 30 albums at both platforms (2003-2020), some albums are <b style='color:{label_cols[1]}'>more <br>popular on Billboard</b> (stayed in Billboard Top 200 rankings for more weeks), while<br><b style='color:{label_cols[2]}'>others</b> enjoy higher <b style='color:{label_cols[2]}'>popularity on Spotify</b>. <b>Others</b> are equally popular on both.<br>Possible reason? Spotify’s younger users prefer newer albums (Release Year).")y_lab <-"Number of weeks spent on Billboard Top 200"x_lab <-"Album Popularity on Spotify (scale of 1-100)"
inset_text1 <-str_wrap("The dataset used in this analysis comes from a visual essay by The Pudding, comparing Rolling Stone's \"500 Greatest Albums of All Time\" lists from 2003, 2012, and 2020. Originally, the project began with a spreadsheet by Chris Eckert, which the authors expanded upon to develop their own dataset. Their dataset includes every album in the rankings, along with its name, genre, release year, rank in 2003, 2012, and 2020, the artist’s name, birth year, gender, and more. Additionally, it provides information about the voters for each year's list. [h/t Jason Kottke]", width =40, whitespace_only =FALSE)# A QR Code for the infographicurl_graphics <-paste0("https://aditya-dahiya.github.io/projects_presentations/data_vizs/",# The file name of the current .qmd file"tidy_rolling_stone", ".html")# remotes::install_github('coolbutuseless/ggqr')# library(ggqr)plot_qr <-ggplot(data =NULL, aes(x =0, y =0, label = url_graphics) ) + ggqr::geom_qr(colour = text_hil, fill = bg_col,size =1.8 ) +coord_fixed() +theme_void() +theme(plot.background =element_rect(fill =NA, colour =NA ) )library(patchwork)g <- g_base +annotate(geom ="label",x =88.5,y =720,label = inset_text1,family ="caption_font",lineheight =0.3,hjust =0,vjust ="inward",size =18,colour = text_col,fill = bg_col,label.padding =unit(1, "mm"),label.size =0 )g_a4 <- g_base +annotate(geom ="label",x =88.5,y =600,label = inset_text1,family ="caption_font",lineheight =0.3,hjust =0,vjust ="inward",size =18,colour = text_col,fill = bg_col,label.padding =unit(1, "mm"),label.size =0 ) +inset_element(p = plot_qr,left =0.785, right =0.95,bottom =0.85, top =1,align_to ="panel",clip =FALSE ) +plot_annotation(theme =theme(plot.background =element_rect(fill ="transparent",colour ="transparent" ) ) )
Savings the graphics & Removing Temporary Image Files (“Do no harm and leave the world an untouched place”)
Code
ggsave(filename = here::here("data_vizs", "tidy_rolling_stone.png"),plot = g,width =400, # Best Twitter Aspect Ratio = 4:5height =500, units ="mm",bg = bg_col)library(magick)# Saving a thumbnail for the webpageimage_read(here::here("data_vizs", "tidy_rolling_stone.png")) |>image_resize(geometry ="400") |>image_write(here::here("data_vizs", "thumbnails", "tidy_rolling_stone.png"))# Another graphic on A4 Size Page: Infographicggsave(filename = here::here("data_vizs", "a4_tidy_rolling_stone.png"),plot = g_a4,width =210*2, height =297*2, units ="mm",bg = bg_col)remove_files <-paste0("image_temp__", 1:38,".png")unlink(remove_files)