In this session, we will talk about how to create a map using the R package ggplot2.
In order to create a map, we will need the following two key functions
map_data() from the package maps to access the map data related to longitude and latitude.
geom_polygon() from the package ggplot2 to create a map.
Let’s take a look at the usage of the function map_data().
map_data(map, region = ".", exact = FALSE, ...)
Arguments:
map: name of map provided by the package. These include county, france, italy, nz, state, usa, world, world2
region: name of subregions. “.” means all subregions are included.
exact: the region is treated as a regular expression (FALSE) or as a fixed string (TRUE).
Note: The package viridis provides color maps designed to improve graph readability for readers with common forms of color blindness and/or color vision deficiency.
Let’s create a simple world map.
map <- map_data("world")
ggplot(map, aes(x = long, y=lat, group = group)) +
geom_polygon(fill="lightblue", colour = "white") +
theme_void()
North_Asia <- c("China", "Japan", "Mongolia", "North Korea", "South Korea", "Taiwan")
North_Asia_map <- map_data("world", region = North_Asia)
# Compute the centroid as the mean longitude and latitude
region.data <- North_Asia_map %>%
group_by(region) %>%
summarise(long = mean(long), lat = mean(lat))
# Ready to plot the map
ggplot(North_Asia_map, aes(x = long, y = lat)) +
geom_polygon(aes(group = group, fill = region)) +
geom_text(aes(label = region), data = region.data,
size = 5, hjust = 0.5, col = "#808080", fontface='bold') +
scale_fill_viridis_d() +
theme_void() +
theme(legend.position = "none")
In many situations, we want to create a map including some statistics. Here we use the Alcohol Consumption by Country to show an example.
drinks <- read_csv("Data/drinks.csv")
drinks_map <- drinks %>% left_join(map, by = c("country"="region"))
ggplot(drinks_map, aes(long, lat, group = group)) +
geom_polygon(aes(fill = total_litres_of_pure_alcohol), colour = "white") +
scale_fill_viridis_c(option = "D") +
labs(fill = "Total Litres of Pure Alcohol") +
theme_void() +
theme(legend.position="bottom")
Or we can use the geom_map function to achieve the same result.
US_map <- map_data("state")
# Compute the centroid as the mean longitude and latitude
state_data <- US_map %>%
filter(region != "district of columbia") %>%
group_by(region) %>%
summarise(long = mean(long), lat = mean(lat)) %>%
arrange(region)
state_data$region.abb <- state.abb[-c(2, 11)] # drop Alaska & Hawaii
p <- ggplot(US_map, aes(x = long, y = lat)) +
geom_polygon(aes(group = group, fill = region), colour = "white") +
geom_text(aes(label = region.abb),
data = state_data, fontface = "bold") +
theme_void() +
theme(legend.position = "none")
p
Here we improve the quality of the map from the previous page.
US_map$region <- unname(sapply(US_map$region, str_to_title))
g <- ggplot(US_map, aes(x = long, y = lat)) +
geom_polygon(aes(group = group, fill = region), colour = "white") +
geom_text(aes(label = region.abb, text = paste0(region,"-\n",
"longitude:", long, "\n",
"latitude", lat)),
data = state_data, fontface = "bold") +
theme_void() +
theme(legend.position = "none")
ggplotly(g, tooltip = "text")
Here we use the data USArrests to include statistics on a map.
library(usmap)
US_map <- us_map("state") %>%
filter(full != "District of Columbia")
crimes <- data.frame(region = rownames(USArrests), USArrests)
crimes_map <- crimes %>% left_join(US_map, by = c("region"="full"))
crimes_map$group <- as.numeric(crimes_map$group)
region_label <- crimes_map %>%
group_by(abbr) %>%
summarise(x = mean(x), y = mean(y))
g1 <- ggplot(crimes_map, aes(x = x, y = y)) +
geom_polygon(aes(group=group, fill = Murder,
text = paste0(region, ":\n",
Murder, " murder arrests per 100,000")), colour = "white") +
geom_text(aes(label = abbr),
data = region_label, fontface = "bold") +
scale_fill_viridis_c(option = "B") +
theme_void()
ggplotly(g1, tooltip = "text")
Now we improve the quality of the map on the previous page. We can set font and label styles. The dimension of the map could be decided by the dimension of the static map.
We will use the COVID19 data to demonstrate the creation of a county-level map.
covid <- read_csv("Data/COVID19.csv")
ohio <- covid %>% filter(administrative_area_level_2=="Ohio",
date == "2021-12-31")
ohio <- ohio %>%
rename(county = administrative_area_level_3)
ohio_county <- map_data("county", region = "ohio")
ohio_county$subregion <- str_to_title(ohio_county$subregion)
ohio_map <- ohio %>% left_join(ohio_county,
by = c("county" = "subregion"))
g2 <- ggplot(ohio_map, aes(x = long, y = lat)) +
geom_polygon(aes(group = group, fill = deaths,
text = paste0("County: ", county, "\n",
"Total Deaths: ", deaths))) +
geom_text(aes(x = longitude, y = latitude, label = county),
data = ohio, color = "white", fontface = "bold") +
scale_fill_viridis_c(option = "H") +
theme_minimal() +
theme(axis.title.x=element_blank(), axis.text.x=element_blank(),
axis.ticks.x=element_blank(), axis.title.y=element_blank(),
axis.text.y =element_blank(), axis.ticks.y=element_blank(),
panel.grid.major = element_blank(),
panel.background = element_blank())
ggplotly(g2, tooltip = "text", width=850, height=800) %>%
style(hoverlabel = label) %>%
layout(font = font)
wine_top10 <- drinks %>%
arrange(desc(wine_servings)) %>%
head(10)
mydata <- semi_join(drinks_map, wine_top10)
mydata_map <- mydata %>%
group_by(country) %>%
summarise(long = mean(long), lat = mean(lat), wine = mean(wine_servings))
pp <- drinks_map %>%
ggplot() +
geom_polygon(aes(x=long, y = lat, group = group, text = country),
fill="grey", alpha=0.5) +
geom_point(data = mydata_map,
aes(x=long, y=lat, size=wine, color=wine, alpha=wine,
text = paste0(country, ":", wine))) +
scale_size_continuous(range=c(1,15)) + # rescale the size of bubbles
scale_color_viridis(option="A") +
coord_map() + # projects a portion of the earth
theme_minimal() +
theme(axis.title.x=element_blank(), axis.text.x=element_blank(),
axis.ticks.x=element_blank(), axis.title.y=element_blank(),
axis.text.y =element_blank(), axis.ticks.y=element_blank(),
panel.grid.major = element_blank(),
panel.background = element_blank()) +
labs(title = "Top 10 Wine Consumption Countries")
ggplotly(pp, tooltip = "text")
You can utilize the following single character keyboard shortcuts to enable alternate display modes (Xie, Allaire, and Grolemund (2018)):
A: Switches show of current versus all slides (helpful for printing all pages)
B: Make fonts large
c: Show table of contents
S: Make fonts smaller