Brazilian Companies’ capital distribution

A boxplot of capital distribution using {ggplot2}, {paletteer} and {ggtext}

#TidyTuesday
Author

Aditya Dahiya

Published

February 5, 2026

About the Data

This week’s TidyTuesday dataset explores Brazilian companies through the CNPJ (Cadastro Nacional da Pessoa Jurídica) registry, Brazil’s national database of legal entities. The data originates from official records published by the Brazilian Ministry of Finance / Receita Federal on the country’s national open-data portal at dados.gov.br. This large-scale public registry has been cleaned and enriched with lookup tables covering legal nature classifications, owner qualifications, and company size categories, then filtered to focus on firms above a minimum share-capital threshold. The dataset enables analysis of how capital stock concentrates across different legal structures, company sizes, and ownership types in Brazil’s corporate landscape. Special thanks to Marcelo Silva for curating this week’s dataset.

Figure 1: The graph shows the capital stock (in log scale, in Brazilian Real) along the X-axis, and the 7 different legal structures of companies along the Y-axis. Each boxplot’s width corresponds to the total number of companies in that category. The text annotations show number of companies (on left of the boxplots) and the value of median capital stock for each category (i.e. the central line of the boxplot) is labelled.

How I Made This Graphic

Loading required libraries

Code
pacman::p_load(
  tidyverse, # All things tidy

  scales, # Nice Scales for ggplot2
  fontawesome, # Icons display in ggplot2
  ggtext, # Markdown text support for ggplot2
  showtext, # Display fonts in ggplot2
  colorspace, # Lighten and Darken colours
  sf, # Spatial Features

  patchwork,  # Composing Plots
  packcircles, # for hierarchichal packing circles
  colorspace, # Modify and play with colours, extract dominant colours
  magick  # Playing with images
)

library(ggdist)  # For the raincloud components
library(gghalves) # For half-violin plots (optional alternative)


tuesdata <- tidytuesdayR::tt_load(2026, week = 4)

Visualization Parameters

Code
# Font for titles
font_add_google("Saira",
  family = "title_font"
)

# Font for the caption
font_add_google("Saira Condensed",
  family = "body_font"
)

# Font for plot text
font_add_google("Saira Extra Condensed",
  family = "caption_font"
)

showtext_auto()

# A base Colour
bg_col <- "grey95"
seecolor::print_color(bg_col)

# Colour for highlighted text
text_hil <- "grey20"
seecolor::print_color(text_hil)

# Colour for the text
text_col <- "grey10"
seecolor::print_color(text_col)

# Define Base Text Size
bts <- 120

# Caption stuff for the plot
sysfonts::font_add(
  family = "Font Awesome 6 Brands",
  regular = here::here("docs", "Font Awesome 6 Brands-Regular-400.otf")
)
github <- "&#xf09b"
github_username <- "aditya-dahiya"
xtwitter <- "&#xe61b"
xtwitter_username <- "@adityadahiyaias"
social_caption_1 <- glue::glue("<span style='font-family:\"Font Awesome 6 Brands\";'>{github};</span> <span style='color: {text_hil}'>{github_username}  </span>")
social_caption_2 <- glue::glue("<span style='font-family:\"Font Awesome 6 Brands\";'>{xtwitter};</span> <span style='color: {text_hil}'>{xtwitter_username}</span>")
plot_caption <- paste0(
  "**Data:**  CNPJ (Cadastro Nacional da Pessoa Jurídica); dados.gov.br",
  "   |  **Code:** ",
  social_caption_1,
  " |  **Graphics:** ",
  social_caption_2
)
rm(
  github, github_username, xtwitter,
  xtwitter_username, social_caption_1,
  social_caption_2
)

plot_title <- "tidy_edible_plants"

plot_subtitle <- "ttidy_edible_plants" |> 
  str_wrap(110)

Exploratory Data Analysis and Wrangling

Code
bts <- 90

companies <- tuesdata$companies
legal_nature <- tuesdata$legal_nature
qualifications <- tuesdata$qualifications
size <- tuesdata$size

df1 <- companies |> 
  left_join(legal_nature) |> 
  mutate(
    legal_nature = fct(legal_nature),
    legal_nature = fct_lump_n(legal_nature, n = 6)
  ) |> 
  select(company_id, capital_stock, legal_nature)


df2 <- df1 |> 
  group_by(legal_nature) |> 
  summarise(
    n = n(),
    mean = median(capital_stock, na.rm = TRUE),
    x_text_var = quantile(capital_stock, probs = 0.95, na.rm = TRUE)
  ) |> 
  arrange(desc(mean))


levels_legal <- df2 |> 
  slice(-4) |> 
  bind_rows(
    df2 |> 
      slice(4)
  ) |> 
  pull(legal_nature) |> 
  as.character()

df3 <- df1 |> 
  mutate(
    legal_nature = as.character(legal_nature)
  ) |> 
  mutate(
    legal_nature = fct(legal_nature, levels = levels_legal)
  ) |> 
  mutate(legal_nature = str_wrap(legal_nature, 15))

df4 <- df2 |> 
  mutate(
    legal_nature = as.character(legal_nature)
  ) |> 
  mutate(
    legal_nature = fct(legal_nature, levels = levels_legal)
  ) |> 
  mutate(legal_nature = str_wrap(legal_nature, 15))

The Plot

Code
# Define Base Text Size for the plot
bts <- 90

g <- df3 |> 
  ggplot(
    aes(
      x = capital_stock,
      y = legal_nature,
      colour = legal_nature,
      fill = legal_nature
    )
  ) +
  geom_boxplot(
    width = 1.5,
    linewidth = 0.8,
    alpha = 0.6,
    outliers = FALSE,
    whisker.linewidth = 0.3,
    staplewidth = 0.75,
    varwidth = TRUE,
    notch = TRUE
  ) +
  
  # Number of companies text
  geom_text(
    data = df4,
    mapping = aes(
      x = 100000,
      label = scales::number(n, big.mark = ",")
    ),
    size = bts / 3,
    colour = text_col,
    hjust = 1,
    fontface = "bold"
  ) +
  
  # Median capital stock text
  geom_text(
    data = df4,
    mapping = aes(
      x = mean,
      label = scales::number(
        mean,
        prefix = "BRL ",
        scale_cut = cut_short_scale(space = T)
      )
    ),
    size = bts / 2.5,
    hjust = 0.5,
    vjust = -0.7,
    family = "caption_font",
    fontface = "bold",
    colour = text_hil
  ) +
  
  # Curved arrow and annotation for "Number of Companies"
  annotate(
    geom = "curve",
    x = 100000, y = 7.1,
    xend = 100000, yend = 7.5,
    curvature = 0.3,
    arrow = arrow(length = unit(3, "mm"), type = "closed"),
    linewidth = 0.5,
    colour = text_hil
  ) +
  annotate(
    geom = "text",
    x = 100000, y = 7.55,
    label = "Number of\nCompanies",
    size = bts / 3,
    hjust = 1,
    vjust = 0,
    lineheight = 0.3,
    family = "body_font",
    colour = text_hil,
    fontface = "bold"
  ) +
  
  # Curved arrow and annotation for "Median Capital Stock"
  annotate(
    geom = "curve",
    x = 300000, y = 7,
    xend = 4500000, yend = 7.5,
    curvature = 0.8,
    arrow = arrow(length = unit(3, "mm"), type = "closed"),
    linewidth = 0.3,
    colour = text_hil
  ) +
  annotate(
    geom = "text",
    x = 4500000, y = 7.5,
    label = "Median Capital Stock\n(in Brazilian Real)",
    size = bts / 3,
    hjust = 0,
    vjust = 0,
    lineheight = 0.3,
    family = "body_font",
    colour = text_hil,
    fontface = "bold"
  ) +
  
  # Use log scale for capital stock (likely very skewed)
  scale_x_log10(
    labels = scales::label_number(
      scale_cut = cut_short_scale(
        space = TRUE
      )
    ),
    expand = expansion(c(0.05, 0))
  ) +
  
  scale_y_discrete(
    expand = expansion(add = c(0.1, 0.5))
  ) +
  coord_cartesian(clip = "off") +
  
  # Labels
  labs(
    title = "Capital Stock Distribution\nAcross Legal Structures in Brazil",
    subtitle = "Limited Liability Companies dominate in numbers, while Privately Held Corporations\nshow the highest median capital stock among Brazilian registered entities",
    x = "Capital Stock (BRL, log scale)",
    y = NULL,
    caption = plot_caption
  ) +
  
  # Color palette
  paletteer::scale_colour_paletteer_d("MetBrewer::Thomas") +
  paletteer::scale_fill_paletteer_d("MetBrewer::Thomas") +

  theme_minimal(
    base_family = "body_font",
    base_size = bts
  ) +
  theme(
    text = element_text(
      colour = text_hil, 
      margin = margin(0,0,0,0, "mm")
    ),
    legend.position = "none",
    
    # Axis ticks
    axis.ticks.x.bottom = element_line(
      linewidth = 0.3,
      colour = text_hil
    ),
    axis.ticks.y.left = element_blank(),
    axis.ticks.length.x.bottom = unit(4, "mm"),
    axis.ticks.length.y.left = unit(0, "mm"),
    
    # Grid
    panel.grid = element_blank(),
    panel.grid.major.x = element_line(
      linetype = 3,
      linewidth = 0.3,
      colour = "grey60"
    ),
    
    # Axis lines
    axis.line.x = element_line(
      arrow = arrow(
        length = unit(5, "mm")
      ),
      linewidth = 0.5,
      colour = text_hil
    ),
    axis.line.y = element_blank(),
    
    # Text elements
    plot.title = element_text(
      margin = margin(10, 0, 5, 0, "mm"),
      hjust = 0.5,
      size = bts * 2,
      face = "bold",
      colour = text_hil,
      lineheight = 0.3
    ),
    plot.subtitle = element_text(
      margin = margin(0, 0, 18, 0, "mm"),
      hjust = 0.5,
      size = bts * 1.2,
      colour = text_hil,
      lineheight = 0.35
    ),
    plot.caption = element_textbox(
      hjust = 0.5,
      family = "caption_font",
      size = bts * 0.8,
      colour = text_hil,
      lineheight = 0.4,
      margin = margin(5, 0, 0, 0, "mm")
    ),
    
    # Axis text - reduced margins to minimize white space
    axis.text.x.bottom = element_text(
      margin = margin(-1, 0, 0, 0, "mm"),
      lineheight = 0.3,
      colour = text_col
    ),
    axis.text.y = element_text(
      margin = margin(0, 2, 0, 0, "mm"),
      lineheight = 0.3,
      colour = text_col
    ),
    
    # Axis titles - reduced margins
    axis.title.x = element_text(
      margin = margin(3, 0, 0, 0, "mm"),
      colour = text_hil
    ),
    axis.title.y = element_text(
      margin = margin(0, 3, 0, 0, "mm"),
      colour = text_hil
    ),
    
    # Plot background
    plot.background = element_rect(
      fill = bg_col, 
      colour = NA
    ),
    
    # Plot margins - reduced to minimize white space
    plot.margin = margin(5, 5, 5, 5, "mm"),
    
    plot.title.position = "plot",
    plot.caption.position = "plot"
  )

# Save the plot
ggsave(
  filename = here::here(
    "data_vizs",
    "tidy_brazilian_companies.png"
  ),
  plot = g,
  width = 400,
  height = 500,
  units = "mm",
  bg = bg_col
)

Savings the thumbnail for the webpage

Code
# Saving a thumbnail

library(magick)

# Saving a thumbnail for the webpage
image_read(
  here::here(
    "data_vizs",
    "tidy_brazilian_companies.png"
    )
  ) |>
  image_resize(geometry = "x400") |>
  image_write(
    here::here(
      "data_vizs",
      "thumbnails",
      "tidy_brazilian_companies.png"
    )
  )

Session Info

Code
pacman::p_load(
  tidyverse, # All things tidy

  scales, # Nice Scales for ggplot2
  fontawesome, # Icons display in ggplot2
  ggtext, # Markdown text support for ggplot2
  showtext, # Display fonts in ggplot2
  colorspace # Lighten and Darken colours
)

sessioninfo::session_info()$packages |>
  as_tibble() |>
  
  # The attached column is TRUE for packages that were 
  # explicitly loaded with library()
  dplyr::filter(attached == TRUE) |>
  dplyr::select(package,
    version = loadedversion,
    date, source
  ) |>
  dplyr::arrange(package) |>
  janitor::clean_names(
    case = "title"
  ) |>
  gt::gt() |>
  gt::opt_interactive(
    use_search = TRUE
  ) |>
  gtExtras::gt_theme_espn()
Table 1: R Packages and their versions used in the creation of this page and graphics

Links