Mapping with census data

Making choropleths

To make a choropleth map, you first need a shapefile or geojson of the polygons that you’re filling in.

Importing shapefiles from the Census

You could download and import the shapefile into R yourself, but there’s a package that brings in Census shapefiles for you called Tigris.

This is what the U.S. states looks like on in leaflet R.

# Polygon stuff from shape file
# install.packages("tigris")
library(tigris)

states <- states(cb=T)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=                                                                |   1%
  |                                                                       
  |=                                                                |   2%
  |                                                                       
  |==                                                               |   3%
  |                                                                       
  |===                                                              |   4%
  |                                                                       
  |===                                                              |   5%
  |                                                                       
  |====                                                             |   6%
  |                                                                       
  |====                                                             |   7%
  |                                                                       
  |=====                                                            |   7%
  |                                                                       
  |=====                                                            |   8%
  |                                                                       
  |======                                                           |   9%
  |                                                                       
  |======                                                           |  10%
  |                                                                       
  |=======                                                          |  10%
  |                                                                       
  |=======                                                          |  11%
  |                                                                       
  |========                                                         |  12%
  |                                                                       
  |========                                                         |  13%
  |                                                                       
  |=========                                                        |  13%
  |                                                                       
  |=========                                                        |  14%
  |                                                                       
  |=========                                                        |  15%
  |                                                                       
  |==========                                                       |  15%
  |                                                                       
  |==========                                                       |  16%
  |                                                                       
  |===========                                                      |  16%
  |                                                                       
  |===========                                                      |  17%
  |                                                                       
  |============                                                     |  18%
  |                                                                       
  |============                                                     |  19%
  |                                                                       
  |=============                                                    |  19%
  |                                                                       
  |=============                                                    |  20%
  |                                                                       
  |=============                                                    |  21%
  |                                                                       
  |==============                                                   |  21%
  |                                                                       
  |==============                                                   |  22%
  |                                                                       
  |===============                                                  |  22%
  |                                                                       
  |===============                                                  |  23%
  |                                                                       
  |===============                                                  |  24%
  |                                                                       
  |================                                                 |  24%
  |                                                                       
  |================                                                 |  25%
  |                                                                       
  |=================                                                |  25%
  |                                                                       
  |=================                                                |  26%
  |                                                                       
  |=================                                                |  27%
  |                                                                       
  |==================                                               |  27%
  |                                                                       
  |==================                                               |  28%
  |                                                                       
  |===================                                              |  29%
  |                                                                       
  |===================                                              |  30%
  |                                                                       
  |====================                                             |  30%
  |                                                                       
  |====================                                             |  31%
  |                                                                       
  |====================                                             |  32%
  |                                                                       
  |=====================                                            |  32%
  |                                                                       
  |=====================                                            |  33%
  |                                                                       
  |======================                                           |  33%
  |                                                                       
  |======================                                           |  34%
  |                                                                       
  |=======================                                          |  35%
  |                                                                       
  |=======================                                          |  36%
  |                                                                       
  |========================                                         |  36%
  |                                                                       
  |========================                                         |  37%
  |                                                                       
  |========================                                         |  38%
  |                                                                       
  |=========================                                        |  38%
  |                                                                       
  |=========================                                        |  39%
  |                                                                       
  |==========================                                       |  39%
  |                                                                       
  |==========================                                       |  40%
  |                                                                       
  |==========================                                       |  41%
  |                                                                       
  |===========================                                      |  41%
  |                                                                       
  |===========================                                      |  42%
  |                                                                       
  |============================                                     |  43%
  |                                                                       
  |============================                                     |  44%
  |                                                                       
  |=============================                                    |  44%
  |                                                                       
  |=============================                                    |  45%
  |                                                                       
  |==============================                                   |  46%
  |                                                                       
  |==============================                                   |  47%
  |                                                                       
  |===============================                                  |  47%
  |                                                                       
  |===============================                                  |  48%
  |                                                                       
  |================================                                 |  49%
  |                                                                       
  |================================                                 |  50%
  |                                                                       
  |=================================                                |  50%
  |                                                                       
  |=================================                                |  51%
  |                                                                       
  |==================================                               |  52%
  |                                                                       
  |==================================                               |  53%
  |                                                                       
  |===================================                              |  53%
  |                                                                       
  |===================================                              |  54%
  |                                                                       
  |====================================                             |  55%
  |                                                                       
  |====================================                             |  56%
  |                                                                       
  |=====================================                            |  57%
  |                                                                       
  |======================================                           |  58%
  |                                                                       
  |======================================                           |  59%
  |                                                                       
  |=======================================                          |  59%
  |                                                                       
  |=======================================                          |  60%
  |                                                                       
  |========================================                         |  61%
  |                                                                       
  |========================================                         |  62%
  |                                                                       
  |=========================================                        |  63%
  |                                                                       
  |=========================================                        |  64%
  |                                                                       
  |==========================================                       |  64%
  |                                                                       
  |==========================================                       |  65%
  |                                                                       
  |===========================================                      |  66%
  |                                                                       
  |===========================================                      |  67%
  |                                                                       
  |============================================                     |  67%
  |                                                                       
  |============================================                     |  68%
  |                                                                       
  |=============================================                    |  69%
  |                                                                       
  |=============================================                    |  70%
  |                                                                       
  |==============================================                   |  70%
  |                                                                       
  |==============================================                   |  71%
  |                                                                       
  |===============================================                  |  72%
  |                                                                       
  |===============================================                  |  73%
  |                                                                       
  |================================================                 |  73%
  |                                                                       
  |================================================                 |  74%
  |                                                                       
  |=================================================                |  75%
  |                                                                       
  |=================================================                |  76%
  |                                                                       
  |==================================================               |  76%
  |                                                                       
  |==================================================               |  77%
  |                                                                       
  |==================================================               |  78%
  |                                                                       
  |===================================================              |  78%
  |                                                                       
  |===================================================              |  79%
  |                                                                       
  |====================================================             |  79%
  |                                                                       
  |====================================================             |  80%
  |                                                                       
  |====================================================             |  81%
  |                                                                       
  |=====================================================            |  81%
  |                                                                       
  |=====================================================            |  82%
  |                                                                       
  |======================================================           |  82%
  |                                                                       
  |======================================================           |  83%
  |                                                                       
  |======================================================           |  84%
  |                                                                       
  |=======================================================          |  84%
  |                                                                       
  |=======================================================          |  85%
  |                                                                       
  |========================================================         |  85%
  |                                                                       
  |========================================================         |  86%
  |                                                                       
  |========================================================         |  87%
  |                                                                       
  |=========================================================        |  87%
  |                                                                       
  |=========================================================        |  88%
  |                                                                       
  |==========================================================       |  88%
  |                                                                       
  |==========================================================       |  89%
  |                                                                       
  |==========================================================       |  90%
  |                                                                       
  |===========================================================      |  90%
  |                                                                       
  |===========================================================      |  91%
  |                                                                       
  |============================================================     |  92%
  |                                                                       
  |============================================================     |  93%
  |                                                                       
  |=============================================================    |  93%
  |                                                                       
  |=============================================================    |  94%
  |                                                                       
  |=============================================================    |  95%
  |                                                                       
  |==============================================================   |  95%
  |                                                                       
  |==============================================================   |  96%
  |                                                                       
  |===============================================================  |  96%
  |                                                                       
  |===============================================================  |  97%
  |                                                                       
  |================================================================ |  98%
  |                                                                       
  |================================================================ |  99%
  |                                                                       
  |=================================================================| 100%
# Let's quickly map that out
library(dplyr)
library(leaflet)
states %>% leaflet() %>% addTiles() %>% addPolygons(popup=~NAME)

This is how it looks raw. The Census shape files also include territories.

When mapping, we’ll have to remember to exclude them if they show up.

Joining data to a shapefile

Let’s make a choropleth map based on number of Starbucks per state

starbucks <- read.csv("data/starbucks.csv", stringsAsFactors=F)

# First, we'll use dplyr to summarize the data
# count by state
sb_state <- starbucks %>%
  group_by(Province) %>%
  summarize(total=n()) %>% 
# Some quick adjustments to the the dataframe to clean up names
  mutate(type = "Starbucks") %>%  
  rename(state=Province)

# Now we use the Tigris function geo_join to bring together 
# the states shapefile and the sb_states dataframe -- STUSPS and state 
# are the two columns they'll be joined by
states_merged_sb <- geo_join(states, sb_state, "STUSPS", "state")

# Creating a color palette based on the number range in the total column
pal <- colorNumeric("Greens", domain=states_merged_sb$total)

# Getting rid of rows with NA values
states_merged_sb <- subset(states_merged_sb, !is.na(total))

# Setting up the pop up text
popup_sb <- paste0("Total: ", as.character(states_merged_sb$total))

# Mapping it with the new tiles CartoDB.Positron
leaflet() %>%
  addProviderTiles("CartoDB.Positron") %>%
  setView(-98.483330, 38.712046, zoom = 4) %>% 
  addPolygons(data = states_merged_sb , 
              fillColor = ~pal(states_merged_sb$total), 
              fillOpacity = 0.7, 
              weight = 0.2, 
              smoothFactor = 0.2, 
              popup = ~popup_sb) %>%
  addLegend(pal = pal, 
            values = states_merged_sb$total, 
            position = "bottomright", 
            title = "Starbucks")

Hmm… Not that interesting, right?

What’s the problem here. You know what’s wrong.

This is essentially a population map.

So we need to adjust for population.

And that’s easy to do using the Census API.

Bringing in Census data via API

We’ll use the censusapi package created by journalist Hannah Recht.

# install.packages("devtools")
# devtools::install_github("hrecht/censusapi")

library(censusapi)
## 
## Attaching package: 'censusapi'
## The following object is masked from 'package:methods':
## 
##     getFunction
# Pulling in the key.R script that has my census api key. 
# It will be disabled after this weekend so get your own
# http://api.census.gov/data/key_signup.html

source("key.R") 

# We won't go over all the functions, but uncomment the lines below to see 
# the available variables 
# vars2015 <- listCensusMetadata(name="acs5", vintage=2015, "v")
# View(vars2015)

# Alright, getting total population by state from the API
state_pop <-  getCensus(name="acs5", 
                        vintage=2015,
                        key=census_key, 
                        vars=c("NAME", "B01003_001E"), 
                        region="state:*")

head(state_pop)
##         NAME state B01003_001E
## 1     Alaska    02      733375
## 2    Alabama    01     4830620
## 3   Arkansas    05     2958208
## 4    Arizona    04     6641928
## 5 California    06    38421464
## 6   Colorado    08     5278906
# Cleaning up the column names
colnames(state_pop) <- c("NAME", "state_id", "population")
state_pop$state_id <- as.numeric(state_pop$state_id)
# Hm, data comes in numbers of fully spelled out, not abbreviations

# Did you know R has its own built in list of State names and State abbreviations?
# Just pull it in this way to create a dataframe for reference

state_off <- data.frame(state.abb, state.name)

# So I needed to create the dataframe above because the Census API data 
# gave me states with full names while the Starbucks data came with abbreviated state names
# So I needed a relationship dataframe so I could join the two

# Cleaning up the names for easier joining
colnames(state_off) <- c("state", "NAME")

# Joining state population dataframe to relationship file
state_pop <- left_join(state_pop, state_off)
## Joining, by = "NAME"
## Warning: Column `NAME` joining character vector and factor, coercing into
## character vector
# The relationship dataframe didnt have DC or Puerto Rico, so I'm manually putting those in
state_pop$state <- ifelse(state_pop$NAME=="District of Columbia", "DC", as.character(state_pop$state))
state_pop$state <- ifelse(state_pop$NAME=="Puerto Rico", "PR", as.character(state_pop$state))

# Joining Starbucks dataframe to adjusted state population dataframe
sb_state_pop <- left_join(sb_state, state_pop)
## Joining, by = "state"
# Calculating per Starbucks stores 100,000 residents and rounding to 2 digits
sb_state_pop$per_capita <- round(sb_state_pop$total/sb_state_pop$population*100000,2)

# Eliminating rows with NA
sb_state_pop <- subset(sb_state_pop, !is.na(per_capita))
head(sb_state_pop)
## # A tibble: 6 x 7
##   state total type      NAME       state_id population per_capita
##   <chr> <int> <chr>     <chr>         <dbl>      <dbl>      <dbl>
## 1 AK       42 Starbucks Alaska            2     733375       5.73
## 2 AL       65 Starbucks Alabama           1    4830620       1.35
## 3 AR       37 Starbucks Arkansas          5    2958208       1.25
## 4 AZ      391 Starbucks Arizona           4    6641928       5.89
## 5 CA     2456 Starbucks California        6   38421464       6.39
## 6 CO      421 Starbucks Colorado          8    5278906       7.98

Final map

states_merged_sb_pc <- geo_join(states, sb_state_pop, "STUSPS", "state")

pal_sb <- colorNumeric("Greens", domain=states_merged_sb_pc$per_capita)
states_merged_sb_pc <- subset(states_merged_sb_pc, !is.na(per_capita))

popup_sb <- paste0("Per capita: ", as.character(states_merged_sb_pc$per_capita))

leaflet() %>%
  addProviderTiles("CartoDB.Positron") %>%
  setView(-98.483330, 38.712046, zoom = 4) %>% 
  addPolygons(data = states_merged_sb_pc , 
              fillColor = ~pal_sb(states_merged_sb_pc$per_capita), 
              fillOpacity = 0.9, 
              weight = 0.2, 
              smoothFactor = 0.2, 
              popup = ~popup_sb) %>%
  addLegend(pal = pal_sb, 
            values = states_merged_sb_pc$per_capita, 
            position = "bottomright", 
            title = "Starbucks<br />per 100,000<br/>residents")

Final map II

The Leaflet for R package was recently updated, adding functionality like highlighting polygons and labels on hover.

This options include highlight and labelOptions. Read more at rstudio.

states_merged_sb_pc <- geo_join(states, sb_state_pop, "STUSPS", "state")

pal_sb <- colorNumeric("Greens", domain=states_merged_sb_pc$per_capita)
states_merged_sb_pc <- subset(states_merged_sb_pc, !is.na(per_capita))

popup_sb <- paste0("Per capita: ", as.character(states_merged_sb_pc$per_capita))

leaflet() %>%
  addProviderTiles("CartoDB.Positron") %>%
  setView(-98.483330, 38.712046, zoom = 4) %>% 
  addPolygons(data = states_merged_sb_pc , 
              fillColor = ~pal_sb(states_merged_sb_pc$per_capita), 
              fillOpacity = 0.9, 
              weight = 0.2, 
              smoothFactor = 0.2,
              highlight = highlightOptions(
                  weight = 5,
                  color = "#666",
                  dashArray = "",
                  fillOpacity = 0.7,
                   bringToFront = TRUE),
              label=popup_sb,
              labelOptions = labelOptions(
    style = list("font-weight" = "normal", padding = "3px 8px"),
    textsize = "15px",
    direction = "auto")) %>%
  addLegend(pal = pal_sb, 
            values = states_merged_sb_pc$per_capita, 
            position = "bottomright", 
            title = "Starbucks<br />per 100,000<br/>residents")