To make a choropleth map, you first need a shapefile or geojson of the polygons that you’re filling in.
You could download and import the shapefile into R yourself, but there’s a package that brings in Census shapefiles for you called Tigris.
This is what the U.S. states looks like on in leaflet R.
# Polygon stuff from shape file
# install.packages("tigris")
library(tigris)
states <- states(cb=T)
##
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 6%
|
|==== | 7%
|
|===== | 7%
|
|===== | 8%
|
|====== | 9%
|
|====== | 10%
|
|======= | 10%
|
|======= | 11%
|
|======== | 12%
|
|======== | 13%
|
|========= | 13%
|
|========= | 14%
|
|========= | 15%
|
|========== | 15%
|
|========== | 16%
|
|=========== | 16%
|
|=========== | 17%
|
|============ | 18%
|
|============ | 19%
|
|============= | 19%
|
|============= | 20%
|
|============= | 21%
|
|============== | 21%
|
|============== | 22%
|
|=============== | 22%
|
|=============== | 23%
|
|=============== | 24%
|
|================ | 24%
|
|================ | 25%
|
|================= | 25%
|
|================= | 26%
|
|================= | 27%
|
|================== | 27%
|
|================== | 28%
|
|=================== | 29%
|
|=================== | 30%
|
|==================== | 30%
|
|==================== | 31%
|
|==================== | 32%
|
|===================== | 32%
|
|===================== | 33%
|
|====================== | 33%
|
|====================== | 34%
|
|======================= | 35%
|
|======================= | 36%
|
|======================== | 36%
|
|======================== | 37%
|
|======================== | 38%
|
|========================= | 38%
|
|========================= | 39%
|
|========================== | 39%
|
|========================== | 40%
|
|========================== | 41%
|
|=========================== | 41%
|
|=========================== | 42%
|
|============================ | 43%
|
|============================ | 44%
|
|============================= | 44%
|
|============================= | 45%
|
|============================== | 46%
|
|============================== | 47%
|
|=============================== | 47%
|
|=============================== | 48%
|
|================================ | 49%
|
|================================ | 50%
|
|================================= | 50%
|
|================================= | 51%
|
|================================== | 52%
|
|================================== | 53%
|
|=================================== | 53%
|
|=================================== | 54%
|
|==================================== | 55%
|
|==================================== | 56%
|
|===================================== | 57%
|
|====================================== | 58%
|
|====================================== | 59%
|
|======================================= | 59%
|
|======================================= | 60%
|
|======================================== | 61%
|
|======================================== | 62%
|
|========================================= | 63%
|
|========================================= | 64%
|
|========================================== | 64%
|
|========================================== | 65%
|
|=========================================== | 66%
|
|=========================================== | 67%
|
|============================================ | 67%
|
|============================================ | 68%
|
|============================================= | 69%
|
|============================================= | 70%
|
|============================================== | 70%
|
|============================================== | 71%
|
|=============================================== | 72%
|
|=============================================== | 73%
|
|================================================ | 73%
|
|================================================ | 74%
|
|================================================= | 75%
|
|================================================= | 76%
|
|================================================== | 76%
|
|================================================== | 77%
|
|================================================== | 78%
|
|=================================================== | 78%
|
|=================================================== | 79%
|
|==================================================== | 79%
|
|==================================================== | 80%
|
|==================================================== | 81%
|
|===================================================== | 81%
|
|===================================================== | 82%
|
|====================================================== | 82%
|
|====================================================== | 83%
|
|====================================================== | 84%
|
|======================================================= | 84%
|
|======================================================= | 85%
|
|======================================================== | 85%
|
|======================================================== | 86%
|
|======================================================== | 87%
|
|========================================================= | 87%
|
|========================================================= | 88%
|
|========================================================== | 88%
|
|========================================================== | 89%
|
|========================================================== | 90%
|
|=========================================================== | 90%
|
|=========================================================== | 91%
|
|============================================================ | 92%
|
|============================================================ | 93%
|
|============================================================= | 93%
|
|============================================================= | 94%
|
|============================================================= | 95%
|
|============================================================== | 95%
|
|============================================================== | 96%
|
|=============================================================== | 96%
|
|=============================================================== | 97%
|
|================================================================ | 98%
|
|================================================================ | 99%
|
|=================================================================| 100%
# Let's quickly map that out
library(dplyr)
library(leaflet)
states %>% leaflet() %>% addTiles() %>% addPolygons(popup=~NAME)
This is how it looks raw. The Census shape files also include territories.
When mapping, we’ll have to remember to exclude them if they show up.
starbucks <- read.csv("data/starbucks.csv", stringsAsFactors=F)
# First, we'll use dplyr to summarize the data
# count by state
sb_state <- starbucks %>%
group_by(Province) %>%
summarize(total=n()) %>%
# Some quick adjustments to the the dataframe to clean up names
mutate(type = "Starbucks") %>%
rename(state=Province)
# Now we use the Tigris function geo_join to bring together
# the states shapefile and the sb_states dataframe -- STUSPS and state
# are the two columns they'll be joined by
states_merged_sb <- geo_join(states, sb_state, "STUSPS", "state")
# Creating a color palette based on the number range in the total column
pal <- colorNumeric("Greens", domain=states_merged_sb$total)
# Getting rid of rows with NA values
states_merged_sb <- subset(states_merged_sb, !is.na(total))
# Setting up the pop up text
popup_sb <- paste0("Total: ", as.character(states_merged_sb$total))
# Mapping it with the new tiles CartoDB.Positron
leaflet() %>%
addProviderTiles("CartoDB.Positron") %>%
setView(-98.483330, 38.712046, zoom = 4) %>%
addPolygons(data = states_merged_sb ,
fillColor = ~pal(states_merged_sb$total),
fillOpacity = 0.7,
weight = 0.2,
smoothFactor = 0.2,
popup = ~popup_sb) %>%
addLegend(pal = pal,
values = states_merged_sb$total,
position = "bottomright",
title = "Starbucks")
Hmm… Not that interesting, right?
What’s the problem here. You know what’s wrong.
This is essentially a population map.
So we need to adjust for population.
And that’s easy to do using the Census API.
We’ll use the censusapi package created by journalist Hannah Recht.
# install.packages("devtools")
# devtools::install_github("hrecht/censusapi")
library(censusapi)
##
## Attaching package: 'censusapi'
## The following object is masked from 'package:methods':
##
## getFunction
# Pulling in the key.R script that has my census api key.
# It will be disabled after this weekend so get your own
# http://api.census.gov/data/key_signup.html
source("key.R")
# We won't go over all the functions, but uncomment the lines below to see
# the available variables
# vars2015 <- listCensusMetadata(name="acs5", vintage=2015, "v")
# View(vars2015)
# Alright, getting total population by state from the API
state_pop <- getCensus(name="acs5",
vintage=2015,
key=census_key,
vars=c("NAME", "B01003_001E"),
region="state:*")
head(state_pop)
## NAME state B01003_001E
## 1 Alaska 02 733375
## 2 Alabama 01 4830620
## 3 Arkansas 05 2958208
## 4 Arizona 04 6641928
## 5 California 06 38421464
## 6 Colorado 08 5278906
# Cleaning up the column names
colnames(state_pop) <- c("NAME", "state_id", "population")
state_pop$state_id <- as.numeric(state_pop$state_id)
# Hm, data comes in numbers of fully spelled out, not abbreviations
# Did you know R has its own built in list of State names and State abbreviations?
# Just pull it in this way to create a dataframe for reference
state_off <- data.frame(state.abb, state.name)
# So I needed to create the dataframe above because the Census API data
# gave me states with full names while the Starbucks data came with abbreviated state names
# So I needed a relationship dataframe so I could join the two
# Cleaning up the names for easier joining
colnames(state_off) <- c("state", "NAME")
# Joining state population dataframe to relationship file
state_pop <- left_join(state_pop, state_off)
## Joining, by = "NAME"
## Warning: Column `NAME` joining character vector and factor, coercing into
## character vector
# The relationship dataframe didnt have DC or Puerto Rico, so I'm manually putting those in
state_pop$state <- ifelse(state_pop$NAME=="District of Columbia", "DC", as.character(state_pop$state))
state_pop$state <- ifelse(state_pop$NAME=="Puerto Rico", "PR", as.character(state_pop$state))
# Joining Starbucks dataframe to adjusted state population dataframe
sb_state_pop <- left_join(sb_state, state_pop)
## Joining, by = "state"
# Calculating per Starbucks stores 100,000 residents and rounding to 2 digits
sb_state_pop$per_capita <- round(sb_state_pop$total/sb_state_pop$population*100000,2)
# Eliminating rows with NA
sb_state_pop <- subset(sb_state_pop, !is.na(per_capita))
head(sb_state_pop)
## # A tibble: 6 x 7
## state total type NAME state_id population per_capita
## <chr> <int> <chr> <chr> <dbl> <dbl> <dbl>
## 1 AK 42 Starbucks Alaska 2 733375 5.73
## 2 AL 65 Starbucks Alabama 1 4830620 1.35
## 3 AR 37 Starbucks Arkansas 5 2958208 1.25
## 4 AZ 391 Starbucks Arizona 4 6641928 5.89
## 5 CA 2456 Starbucks California 6 38421464 6.39
## 6 CO 421 Starbucks Colorado 8 5278906 7.98
states_merged_sb_pc <- geo_join(states, sb_state_pop, "STUSPS", "state")
pal_sb <- colorNumeric("Greens", domain=states_merged_sb_pc$per_capita)
states_merged_sb_pc <- subset(states_merged_sb_pc, !is.na(per_capita))
popup_sb <- paste0("Per capita: ", as.character(states_merged_sb_pc$per_capita))
leaflet() %>%
addProviderTiles("CartoDB.Positron") %>%
setView(-98.483330, 38.712046, zoom = 4) %>%
addPolygons(data = states_merged_sb_pc ,
fillColor = ~pal_sb(states_merged_sb_pc$per_capita),
fillOpacity = 0.9,
weight = 0.2,
smoothFactor = 0.2,
popup = ~popup_sb) %>%
addLegend(pal = pal_sb,
values = states_merged_sb_pc$per_capita,
position = "bottomright",
title = "Starbucks<br />per 100,000<br/>residents")
The Leaflet for R package was recently updated, adding functionality like highlighting polygons and labels on hover.
This options include highlight
and labelOptions
. Read more at rstudio.
states_merged_sb_pc <- geo_join(states, sb_state_pop, "STUSPS", "state")
pal_sb <- colorNumeric("Greens", domain=states_merged_sb_pc$per_capita)
states_merged_sb_pc <- subset(states_merged_sb_pc, !is.na(per_capita))
popup_sb <- paste0("Per capita: ", as.character(states_merged_sb_pc$per_capita))
leaflet() %>%
addProviderTiles("CartoDB.Positron") %>%
setView(-98.483330, 38.712046, zoom = 4) %>%
addPolygons(data = states_merged_sb_pc ,
fillColor = ~pal_sb(states_merged_sb_pc$per_capita),
fillOpacity = 0.9,
weight = 0.2,
smoothFactor = 0.2,
highlight = highlightOptions(
weight = 5,
color = "#666",
dashArray = "",
fillOpacity = 0.7,
bringToFront = TRUE),
label=popup_sb,
labelOptions = labelOptions(
style = list("font-weight" = "normal", padding = "3px 8px"),
textsize = "15px",
direction = "auto")) %>%
addLegend(pal = pal_sb,
values = states_merged_sb_pc$per_capita,
position = "bottomright",
title = "Starbucks<br />per 100,000<br/>residents")