Haryana: Toll Booths, Highways and geocomputation

Computing the toll booths, highway length and toll density for the 22 districts in Haryana, with a visual output using {sf} in R

{sf}
Maps
India
Haryana
Geocomputation
Author

Aditya Dahiya

Published

February 20, 2025

A map of Haryana, with the highways overlaid on top (transparency and line-width scaled to the highway type). The borders of district are in white colour. The toll booths are marked as red dots, and the number of toll booths and highway length per district are computed, and displayed in the inset horizontal bar-chart. The kilometres of highway per toll booth for each district are computed and displayed as length of the bars in the inset chart (in ascending order).

The Full Code and analysis with {osmextract} on other similar projects can be found here.

Explore toll booths and highways from Haryana using {osmextract}

This code chunk demonstrates the process of downloading geospatial data for the state of Haryana in India and performing some basic preprocessing. First, it uses the osmextract package to download points, lines, and polygon data from OpenStreetMap for Haryana using the oe_get() function. Then, the code fetches the district-wise boundary map for Haryana using the geodata::gadm() function to retrieve administrative boundaries, which are converted into simple features (sf) objects using the sf package. Lastly, the overall state boundary map for Haryana is retrieved similarly and cleaned up.

Code
# Load necessary libraries
library(osmextract)
library(sf)

# Download and extract the data
# Points
points_haryana <- oe_get(
  place = "Haryana",
  layer = "points",
  download_directory = "C:/Users/dradi/OneDrive/Desktop"
)
object.size(points_haryana) |> print(units = "Mb")

# Lines
lines_haryana <- oe_get(
  place = "Haryana",
  layer = "lines",
  download_directory = "C:/Users/dradi/OneDrive/Desktop"
)
object.size(lines_haryana) |> print(units = "Mb")
# Polygons
polygons_haryana <- oe_get(
  place = "Haryana",
  layer = "multipolygons",
  download_directory = "C:/Users/dradi/OneDrive/Desktop"
)
object.size(polygons_haryana) |> print(units = "Mb")

# Get Haryana Map from GADM / {geodata} with geodata::gadm()
# District Wise Map: It is not updated (doesn't have the 22nd district)
haryana_districts <- geodata::gadm(
  country = "India",
  level = 2,
  path = tempdir()
) |> 
  st_as_sf() |> 
  janitor::clean_names() |> 
  filter(name_1 == "Haryana") |> 
  rename(district = name_2) |> 
  select(district, geometry)

haryana_districts <- read_sf(
  here::here(
    "data", "haryana_map",
    "HARYANA_DISTRICT_BDY.shp"
  )
) |>
  janitor::clean_names() |> 
  select(district, geometry) |> 
  mutate(
    district = str_replace_all(district, "\\|", "I"),
    district = str_replace_all(district, ">", "A"),
    district = str_to_title(district)
  ) |> 
  st_transform("EPSG:4326")

# Check if names are cleaned
# haryana_districts |> 
#   st_drop_geometry() |> 
#   print(n = Inf)


# Overall Boundary Map
haryana_boundary <- geodata::gadm(
  country = "India",
  level = 1,
  path = tempdir()
) |> 
  st_as_sf() |> 
  janitor::clean_names() |> 
  filter(name_1 == "Haryana") |> 
  select(geometry)

Geocomputation and Data Wrangling

This code processes spatial data of Haryana’s roads and toll booths using the sf package. Roads are categorized by type and filtered using mutate(case_when()), ensuring only relevant highways are retained. The dataset is then clipped to Haryana’s boundary with st_intersection(). A subset of toll booths is manually filtered to remove duplicates.

Code
points_haryana |> 
  names()

points_haryana |>
  st_drop_geometry() |> 
  count(barrier, sort = T)

# Plotting the toll booths in Haryana along with highways

# First, categorizing the highways
# Categorizing Roads by widths and importance
wid0 <- c("motorway_link", "motorway" , "corridor")
wid1 <- c("trunk", "primary", "primary_link", "trunk_link")
wid2 <- c("secondary_link", "secondary")
wid3 <- c("tertiary", "tertiary_link")
wid4 <- c("unclassified", "residential", "service", "road")
wid5 <- c("footway", "track", "cycleway", "pedestrian", "path", 
          "steps", "living_street", "rest_area", "construction",
          "busway", "bridleway", "proposed", "services")
                                                 
     
df1 <- lines_haryana |> 
  # as_tibble() |> 
  filter(!is.na(highway)) |> 
  # select(osm_id, highway, other_tags, geometry) |> 
  mutate(
    width_var = case_when(
      highway %in%  wid0 ~ "wid0",
      highway %in%  wid1 ~ "wid1",
      highway %in%  wid2 ~ "wid2",
      highway %in%  wid3 ~ "wid3",
      highway %in%  wid4 ~ "wid4",
      highway %in%  wid5 ~ "wid5",
      .default = NA
    )
  ) |> 
  filter(!is.na(width_var)) |> 
  st_intersection(haryana_boundary)

# An sf object, of haryana's toll booths
# Manully drop some overlapping / repeated toll points:
# Plotted using ggrepel::geom_text_repel(stat = "sf_coordinates")
id_to_drop <- c(
  62, 49, 67, 79, 81, 86, 88, 57, 68, 110, 111, 142,
  75, 143, 8, 145, 45, 58, 76, 43, 70, 35, 59, 107, 
  118, 119, 22, 47, 38, 95, 153, 105, 90, 91, 92,
  57, 52, 127, 128, 77, 73, 55, 20, 122, 133, 17,
  2, 116, 28, 27, 32, 30, 135, 137, 63, 130, 103,
  140, 100, 102, 117, 47, 36, 123, 25, 126, 101,
  80, 83, 85, 10, 112, 60, 47, 19, 132, 131, 14,
  40, 56, 94, 113, 114, 69, 85, 61, 10, 18, 56, 41, 
  132, 131, 97, 37, 13, 138
)
df2 <- points_haryana |> 
  filter(barrier == "toll_booth") |> 
  st_intersection(haryana_boundary) |> 
  select(osm_id, name, other_tags, geometry) |> 
  mutate(id = row_number()) |> 
  filter(!(id %in% id_to_drop))



# Select only expressways, highways, primary secondary & tertiary roads
df3 <- df1 |> 
  filter(
    highway %in% c(wid0, wid1, wid2, wid3)
  ) |> 
  select(osm_id, highway, name, geometry, width_var)

Computing statistics district-wise

This code determines the district-wise distribution of toll booths and computes the total highway length in each district using sf and dplyr. The st_intersects() function finds which district each toll booth falls into, creating a mapping vector. The total highway length per district is calculated using st_intersection() and st_length(), then converted to kilometers. Finally, the data is combined using left_join(), missing values are handled with replace_na(), and toll density (km per toll) is computed and arranged in descending order.

Code
# Find which district each toll booth lies in using a sparse matrix
hy_toll_dist_vec <- df2 |> 
  st_intersects(haryana_districts) |> 
  as.numeric()

class(hy_toll_dist_vec) 

hy_toll_dist <- df2 |> 
  select(-other_tags) |> 
  st_drop_geometry() |> 
  mutate(
    district = haryana_districts$district[hy_toll_dist_vec]
  ) |> 
  filter(!is.na(district)) |> 
  count(district, sort = T)


# Computing the length of highways in each district
hy_hwy_dist <- df1 |> 
  select(width_var, geometry, highway) |> 
  filter(width_var %in% c("wid0", "wid1", "wid2"))

hy_hwy_dist_vec <- NULL

for (i in 1:nrow(haryana_districts)) {
  hy_hwy_dist_vec[i] <- hy_hwy_dist |> 
    st_intersection(haryana_districts$geometry[i]) |> 
    st_length() |> 
    sum() |> 
    as.numeric() |> 
    magrittr::multiply_by(0.001)
}

# Final result tibble
df4 <- tibble(
  district = haryana_districts$district,
  hwy_total = hy_hwy_dist_vec
) |> 
  left_join(hy_toll_dist) |> 
  replace_na(list(n = 0)) |> 
  mutate(
    km_per_toll = hwy_total / n
  ) |> 
  arrange(km_per_toll)

df4 <- df4 |> 
  mutate(
    district = fct(district, levels = df4$district),
    district = fct_rev(district)
  )

Visualizing

This code visualizes Haryana’s road network, district boundaries, and toll booth distribution using ggplot2 and sf. geom_sf() is used to plot district borders, highways, and toll booths, while highway width is controlled with scale_alpha_manual() and scale_linewidth_manual(). A bar chart displays district-wise highway length per toll booth using geom_col(), with additional annotations for toll count and highway length. The two plots are combined using patchwork.

Code
# Base Size for text
bts <- 250

g1 <- ggplot() +
  
  # Borders of districts
  geom_sf(
    data = haryana_districts,
    colour = "white",
    linewidth = 1.5,
    fill = "grey90"
  ) +
  
  # Overall Boundary of Haryana
  geom_sf(
    data = haryana_boundary,
    linewidth = 1.8,
    colour = "grey30",
    fill = NA
  ) +
  
  
  
  # Road Network
  geom_sf(
    data = df3,
    mapping = aes(
      alpha = width_var,
      linewidth = width_var,
      geometry = geometry
    ),
    colour = "grey20"
  ) +
  scale_alpha_manual(
    values = c(0.8, 0.6, 0.4, 0.25)
  ) +
  scale_linewidth_manual(
    values = c(1.2, 0.9, 0.7, 0.5)
  ) +
  
  # Plotting Toll Booths
  geom_sf(
    data = df2,
    colour = "red",
    size = 10,
    alpha = 0.4
  ) +
  # ggrepel::geom_text_repel(
  #   data = df2,
  #   mapping = aes(label = id, geometry = geometry),
  #   size = 4,
  #   alpha = 0.5,
  #   stat = "sf_coordinates"
  # ) +
  
  labs(
    title = "Haryana: Highways and Toll Booths",
    subtitle = str_wrap("Using data from Open Street Maps, the number of tolls and length of highways within each district are displayed. Palwal and Jhajjar have the least KMs of highway per toll booth, while Bhiwani and Rewari have the most. Interestingly, Kurukshetra and Fatehabad have no toll booths.",
      50),
    caption = plot_caption
  ) +
  
  theme_minimal(
    base_family = "body_font",
    base_size = bts
  ) +
  
  theme(
    legend.position = "none",
    text = element_text(
      colour = "grey30",
      lineheight = 0.3,
      hjust = 0.5
    ),
    plot.title = element_text(
      margin = margin(20,0,0,0, "mm"),
      hjust = 0.5,
      size = bts * 1.2,
      family = "title_font",
      colour = text_hil
    ),
    plot.subtitle = element_text(
      margin = margin(30,0,-120,0, "mm"),
      hjust = 0,
      vjust = 1,
      size = bts * 0.55,
      lineheight = 0.3,
      family = "body_font"
    ),
    plot.caption = element_textbox(
      hjust = 0.5,
      halign = 0.5,
      margin = margin(15,0,15,0, "mm"),
      family = "caption_font",
      size = bts * 0.5
    ),
    panel.grid = element_line(
      colour = "grey80",
      linewidth = 0.3
    ),
    plot.margin = margin(5,5,5,5, "mm"),
    axis.ticks = element_blank(),
    axis.ticks.length = unit(0, "mm"),
    axis.text = element_text(
      size = bts / 3,
      margin = margin(0,0,0,0, "mm")
    )
  )

g2 <- df4 |> 
  ggplot(
    mapping = aes(
      x = km_per_toll,
      y = district
    )
  ) +
  geom_col(
    alpha = 0.3
  ) +
  
  # Number of Toll Booths
  geom_label(
    mapping = aes(
      x = -2,
      label = n,
      fill = n
    ),
    label.padding = unit(0.1, "lines"),
    label.r = unit(0.2, "lines"),
    label.size = 0,
    size = bts / 10
  ) +
  scale_fill_steps(
    low = "white",
    high = "red"
  ) +
  annotate(
    geom = "text",
    x = -5,
    y = 23.6,
    label = "Number\nof tolls",
    lineheight = 0.3,
    family = "caption_font",
    size = bts / 10,
    colour = text_col
  ) +
  
  # Number of KMs of highway
  geom_text(
    mapping = aes(
      x = 75,
      label = paste0(round(hwy_total), " km")
    ),
    family = "caption_font",
    colour = text_col,
    size = bts / 12
  ) +
  annotate(
    geom = "text",
    x = 75,
    y = 23.6,
    label = "Highway\nlength",
    lineheight = 0.3,
    family = "caption_font",
    size = bts / 10,
    colour = text_col
  ) +
  
  # Number of km per toll
  geom_text(
    data = df4 |> filter(km_per_toll < 1e4),
    mapping = aes(
      label = paste0(round(km_per_toll), " km/toll")
    ),
    family = "caption_font",
    nudge_x = 5,
    hjust = 0,
    size = bts / 12
  ) +
  scale_y_discrete(expand = expansion(c(0.05, 0.12))) +
  scale_x_continuous(expand = expansion(c(0.07, 0.1))) +
  coord_cartesian(clip = "on") +
  labs(
    x = "Kilometres of highway per toll booth in the district",
    y = NULL
  ) +
  theme_minimal(
    base_size = bts / 5,
    base_family = "caption_font"
  ) +
  theme(
    panel.grid = element_blank(),
    legend.position = "none",
    axis.line.x = element_line(
      arrow = arrow(length = unit(10, "mm")),
      linewidth = 0.3,
      colour = text_hil
    ),
    axis.ticks.length = unit(0, "pt"),
    text = element_text(
      margin = margin(0,0,0,0, "pt"),
      colour = text_hil
    ),
    axis.text = element_text(
      size = bts / 4,
      margin = margin(0,0,0,0, "pt")
    ),
    axis.title.x = element_text(
      size = bts / 4,
      margin = margin(1,0,0,0, "pt")
    )
  )

library(patchwork)
g <- g1 +
  inset_element(
    p = g2,
    align_to = "panel",
    left = 0, right = 0.55,
    bottom = -0.02, top = 0.48,
    clip = FALSE
  )

ggsave(
  plot = g,
  filename = here::here(
    "geocomputation", "images",
    "osm_packages_6.png"
  ),
  height = 30,
  width = 24,
  units = "in",
  bg = "white"
)

Saving a thumbnail for the webpage

Code
# Saving a thumbnail

library(magick)
# Saving a thumbnail for the webpage
image_read("----path to file here----") |> 
  image_resize(geometry = "x400") |> 
  image_write(
    here::here(
      "data_vizs", 
      "thumbnails", 
      "viz_haryana_tolls.png"
    )
  )

Session Info

Code
# Data Wrangling & Plotting Tools
library(tidyverse)            # All things tidy
library(sf)                   # Simple Features in R

# Plot touch-up tools
library(scales)               # Nice Scales for ggplot2
library(fontawesome)          # Icons display in ggplot2
library(ggtext)               # Markdown text support for ggplot2
library(showtext)             # Display fonts in ggplot2
library(colorspace)           # Lighten and Darken colours
library(patchwork)            # Compiling Plots

sessioninfo::session_info()$packages |> 
  as_tibble() |> 
  select(package, 
         version = loadedversion, 
         date, source) |> 
  arrange(package) |> 
  janitor::clean_names(
    case = "title"
  ) |> 
  gt::gt() |> 
  gt::opt_interactive(
    use_search = TRUE
  ) |> 
  gtExtras::gt_theme_espn()
Table 1: R Packages and their versions used in the creation of this page and graphics

Note: Made in-flight on 20.02.2025