# load libraries used in analysis
library(tidyverse)
library(here)
library(stars)
library(sf)
library(leaflet)
library(kableExtra)
library(htmlwidgets)
library(htmltools)
Geospatial Analysis
Read in satellite imagery
The function below takes an HDFEOS file as input and, from that file, reads the DNB_at_Sensor_Radiance_500m dataset using the stars
package. The function then reads the sinusoidal tile x/y positions, adjusts the dimensions, and sets the coordinate reference system.
# function to read and process imagery files as rasters using the stars package
<- function(file_name) {
read_dnb_file
# HDF dataset that contains the night lights band
<- "//HDFEOS/GRIDS/VNP_Grid_DNB/Data_Fields/DNB_At_Sensor_Radiance_500m"
dataset_name
# extract the horizontal and vertical tile coordinates from the metadata
# this information is a string of text
<- gdal_metadata(file_name)[199]
h_string <- gdal_metadata(file_name)[219]
v_string
# from the horizontal and vertical tile text, obtain the coordinate info as an integer
<- as.integer(str_split(h_string, "=", simplify = TRUE)[[2]])
tile_h <- as.integer(str_split(v_string, "=", simplify = TRUE)[[2]])
tile_v
# use tile coordinates to calculate a geographic bounding box
<- (10 * tile_h) - 180
west <- 90 - (10 * tile_v)
north <- west + 10
east <- north - 10
south
<- 10 / 2400
delta
# read the dataset
<- read_stars(file_name, sub = dataset_name)
dnb
# set the coordinate reference system
st_crs(dnb) <- st_crs(4326)
st_dimensions(dnb)$x$delta <- delta
st_dimensions(dnb)$x$offset <- west
st_dimensions(dnb)$y$delta <- -delta
st_dimensions(dnb)$y$offset <- north
return(dnb)
}
# load in files using the read_dnb function
<- "data/VNP46A1/VNP46A1.A2021038.h08v05.001.2021039064328.h5"
feb7_h08v05_file_name <- read_dnb_file(file_name = feb7_h08v05_file_name)
dnb_feb7_h08v05
<- "data/VNP46A1/VNP46A1.A2021038.h08v06.001.2021039064329.h5"
feb7_h08v06_file_name <- read_dnb_file(file_name = feb7_h08v06_file_name)
dnb_feb7_h08v06
<- "data/VNP46A1/VNP46A1.A2021047.h08v05.001.2021048091106.h5"
feb16_h08v05_file_name <- read_dnb_file(file_name = feb16_h08v05_file_name)
dnb_feb16_h08v05
<- "data/VNP46A1/VNP46A1.A2021047.h08v06.001.2021048091105.h5"
feb16_h08v06_file_name <- read_dnb_file(file_name = feb16_h08v06_file_name) dnb_feb16_h08v06
Combine the tiles from each day using st_mosaic()
, like stitching together quit squares.
# combined imagery before the storms
<- st_mosaic(dnb_feb7_h08v05, dnb_feb7_h08v06)
dnb_feb7
# combined imagery after the storms
<- st_mosaic(dnb_feb16_h08v05, dnb_feb16_h08v06) dnb_feb16
# extract fill value from metadata
<- gdal_metadata(feb7_h08v05_file_name)[38]
fill_value_string <- as.integer(str_split(fill_value_string, "=", simplify = TRUE)[[2]])
fill_value
# convert fill value of 65535 to NA
== fill_value] = NA
dnb_feb7[dnb_feb7 == fill_value] = NA dnb_feb16[dnb_feb16
The code below defines and visualizes a bounding box for the region of interest.
# set region on interest
<- st_polygon(list(rbind(c(-96.5,29), c(-96.5,30.5), c(-94.5,30.5), c(-94.5,29), c(-96.5,29))))
roi
# set coordinate reference system
<- st_sfc(roi, crs = 4326)
roi_sfc # crs 4326 matches the crs of the satellite imagery
Code for leaflet map
<- st_as_sf(roi_sfc)
roi_leaflet <- st_make_valid(roi_leaflet)
roi_leaflet
leaflet(roi_leaflet) %>%
setView(lat = 29.75, lng = -95.5, zoom = 8) %>%
addProviderTiles(providers$OpenStreetMap) %>%
addPolygons(fillColor = "green", fillOpacity = 0.5, weight = 2)
The next step crops the night light imagery data to the region of interest
<- st_crop(dnb_feb7, roi_sfc)
dnb_feb7_crop <- st_crop(dnb_feb16, roi_sfc) dnb_feb16_crop
Visualize the data
Before the storms
After the storms
Create blackout mask
A stars
raster object representing the difference in night light intensity is created by subtracting the post-storm imagery from the pre-storm imagery. Next, a blackout mask is created to identify areas where night light intensity decreased by more than 200 nW cm-2 sr-1. For this analysis, differences in night light intensity are assumed to be caused by the power outages. The blackout mask is then converted from a stars
object to a sf
simple features object for use in the rest of the analysis.
<- dnb_feb7 - dnb_feb16
night_light_difference
<- night_light_difference
blackout_mask <= 200] = NA
blackout_mask[blackout_mask > 200] = TRUE
blackout_mask[blackout_mask
# vectorize the blackout mask
<- st_as_sf(blackout_mask)
blackout_mask_sf <- st_make_valid(blackout_mask_sf) blackout_mask_sf_valid
Next, the blackout mask is cropped to the region of interest and transformed to the NAD83 / Texas Centric Albers Equal Area projection.
<- blackout_mask_sf_valid[roi_sfc, op = st_intersects]
blackout_mask_roi
# transform blackout roi to Texas centric albers equal area
<- st_transform(blackout_mask_roi, crs = 3083) blackout_mask_roi_3083
Road data
Roadway data was obtained from OpenStreetMap. A SQL query and the st_read
function were used to load and subset highways and major roads from the full dataset. Since vehicles can be a significant source of observable nighttime light, st_buffer
was used to create a 200 meter highway buffer that was then removed from the blackout mask area. This step prevents areas that experienced reduced traffic from being identified as areas with power outages.
# load in roads package and select specifically highways
<- "SELECT * FROM gis_osm_roads_free_1 WHERE fclass in ('motorway', 'motorway_link', 'primary', 'primary_link')"
query_roads <- st_read("data/gis_osm_roads_free_1.gpkg", query = query_roads)
highways
# transform to the correct projection
<- st_transform(highways, crs = 3083)
highways_3083
# create a 200 meter buffer
<- st_buffer(highways_3083, dist = 200)
highways_buffer_3083 <- st_union(highways_buffer_3083) highways_buffer_3083
# remove the highway buffer from our vectorized blackout mask using st_difference()
<- st_difference(blackout_mask_roi_3083, highways_buffer_3083) blackout_no_highway_3083
The map below shows the areas identified as still experiencing power outages as of February 16, 2021.
Code for leaflet map
# transform to WG84 to be compatible with the leaflet package
<- st_transform(blackout_no_highway_3083, crs = 4326)
blackout_no_highway_4326
<- colorNumeric(c("red"), 1, na.color = "transparent")
pal_blackout
leaflet(blackout_no_highway_4326) %>%
setView(lat = 29.75, lng = -95.5, zoom = 9) %>%
addProviderTiles(providers$OpenStreetMap) %>%
addPolygons(fillColor = ~pal_blackout(DNB_At_Sensor_Radiance_500m), fillOpacity = 0.75, weight = 0)
Building data
The building data has previously been cropped to the Houston metroplitan area. A subset of residential buildings is loaded with an SQL query and the st_read
function. Close inspection of this data shows many NA values in the type
field. When looking at the data on the map, most NA values appear to be residential buildings, but some non-residential buildings such as schools are included.
# read in buildings data and select only residential
<- "SELECT *
query_buildings FROM gis_osm_buildings_a_free_1
WHERE (type IS NULL AND name IS NULL)
OR type in ('residential', 'apartments', 'house', 'static_caravan', 'detached')"
# read buildings gpkg into object, and transform to correct projection
<- st_read("data/gis_osm_buildings_a_free_1.gpkg", query = query_buildings)
buildings <- st_transform(buildings, crs = 3083)
buildings_3083 <- st_transform(buildings, crs = 4326) buildings_4326
Census data
Socioeconomic data was obtained from the U.S. Census Bureau’s 2019 American Community Survey. This data is aggregated to the census tract level and variables extracted from the full dataset include:
- Race
- white
- black
- native american
- hispanic / latino
- Age
- 65 and older
- children under 18
- Income
- households below poverty level
- median income
While the race and age data provides the population of each variable for each census tract, these values were normalized by the total population of the census tract. The poverty data was normalized by number of households in each census tract.
Census tract geometry
# read in census data geometry
<- st_read("data/ACS_2019_5YR_TRACT_48_TEXAS.gdb",
acs_geoms layer = "ACS_2019_5YR_TRACT_48_TEXAS") %>%
select(-(STATEFP:Shape_Area))
Age and sex layer
# read in variables of interest
<- st_read("data/ACS_2019_5YR_TRACT_48_TEXAS.gdb",
acs_age_sex layer = "X01_AGE_AND_SEX")
<- acs_age_sex %>%
acs_age_sex_df select(GEOID) %>%
mutate(total_pop_from_age_sex = acs_age_sex$B01001e1,
median_age = acs_age_sex$B01002e1,
pop_male_65_66 = acs_age_sex$B01001e20,
pop_male_67_to_69 = acs_age_sex$B01001e21,
pop_male_70_to_74 = acs_age_sex$B01001e22,
pop_male_75_to_79 = acs_age_sex$B01001e23,
pop_male_80_to_84 = acs_age_sex$B01001e24,
pop_male_85_and_over = acs_age_sex$B01001e25,
pop_female_65_66 = acs_age_sex$B01001e44,
pop_female_67_to_69 = acs_age_sex$B01001e45,
pop_female_70_to_74 = acs_age_sex$B01001e46,
pop_female_75_to_79 = acs_age_sex$B01001e47,
pop_female_80_to_84 = acs_age_sex$B01001e48,
pop_female_85_and_over = acs_age_sex$B01001e49,
pop_65_and_over = pop_male_65_66 + pop_male_67_to_69 + pop_male_70_to_74 + pop_male_75_to_79 + pop_male_80_to_84 + pop_male_85_and_over + pop_female_65_66 + pop_female_67_to_69 + pop_female_70_to_74 + pop_female_75_to_79 + pop_female_80_to_84 + pop_female_85_and_over,
pct_65_and_over = (pop_65_and_over / total_pop_from_age_sex) * 100) %>%
select(GEOID, total_pop_from_age_sex, pop_65_and_over, pct_65_and_over)
Race layer
<- st_read("data/ACS_2019_5YR_TRACT_48_TEXAS.gdb",
acs_race layer = "X02_RACE")
<- acs_race %>%
acs_race_df select(GEOID) %>%
mutate(total_pop_from_race = acs_race$B02001e1,
pop_white = acs_race$B02001e2,
pct_white = (pop_white / total_pop_from_race) * 100,
pop_black = acs_race$B02001e3,
pct_black = (pop_black / total_pop_from_race) * 100,
pop_am_native = acs_race$B02001e4,
pct_am_native = (pop_am_native / total_pop_from_race) * 100,
pop_asian = acs_race$B02001e5,
pct_asian = (pop_asian / total_pop_from_race) * 100)
Hispanic latino layer
<- st_read("data/ACS_2019_5YR_TRACT_48_TEXAS.gdb",
acs_hispanic_latino layer = "X03_HISPANIC_OR_LATINO_ORIGIN")
<- acs_hispanic_latino %>%
acs_hispanic_latino_df select(GEOID) %>%
mutate(total_pop_from_hispanic = acs_hispanic_latino$B03002e1,
pop_hispanic_latino = acs_hispanic_latino$B03002e12,
pct_hispanic_latino = (pop_hispanic_latino / total_pop_from_hispanic) * 100)
Children and household relationship
<- st_read("data/ACS_2019_5YR_TRACT_48_TEXAS.gdb",
acs_children_household layer = "X09_CHILDREN_HOUSEHOLD_RELATIONSHIP")
<- acs_children_household %>%
acs_children_household_df select(GEOID) %>%
mutate(pop_children_under_18 = acs_children_household$B09002e1,
pct_children_under_18 = (pop_children_under_18 / acs_hispanic_latino$B03002e1) * 100)
# the children_household_relationship layer did not contain a total populaiton field
Poverty layer
<- st_read("data/ACS_2019_5YR_TRACT_48_TEXAS.gdb",
acs_poverty layer = "X17_POVERTY")
<- acs_poverty %>%
acs_poverty_df select(GEOID) %>%
mutate(total_households = acs_poverty$B17017e1,
num_households_below_poverty = acs_poverty$B17017e2,
pct_households_below_poverty = (num_households_below_poverty / total_households) * 100)
Income layer
<- st_read("data/ACS_2019_5YR_TRACT_48_TEXAS.gdb",
acs_income layer = "X19_INCOME")
<- acs_income %>%
acs_income_df select(GEOID) %>%
mutate(median_income = acs_income$B19013e1)
Join census track data
Next, the selected race, age, and income layers are joined. The resulting data set contains the geometry of each census tract and corresponding socioeconomic data.
<- acs_geoms %>%
census_tract_data left_join(acs_age_sex_df, by = c("GEOID_Data" = "GEOID")) %>%
left_join(acs_race_df, by = c("GEOID_Data" = "GEOID")) %>%
left_join(acs_hispanic_latino_df, by = c("GEOID_Data" = "GEOID")) %>%
left_join(acs_children_household_df, by = c("GEOID_Data" = "GEOID")) %>%
left_join(acs_poverty_df, by = c("GEOID_Data" = "GEOID")) %>%
left_join(acs_income_df, by = c("GEOID_Data" = "GEOID")) %>%
rename(total_population = total_pop_from_age_sex) %>%
select(-c(total_pop_from_race, total_pop_from_hispanic))
<- st_transform(census_tract_data, 3083)
census_tract_data_3083 <- st_transform(census_tract_data_3083, crs = 4326)
census_tract_data_4326
<- st_crop(census_tract_data_4326, roi_sfc)
census_tract_data_roi_4326 <- st_transform(census_tract_data_roi_4326, crs = 3083) census_tract_data_roi_3083
Identify houses without power
Houses that lost electricity were identified based on a spatial intersection of the building data and the blackout mask. In this analysis, houses that did not overlap the blackout mask were identified as not losing power (or had power restored by February 16th).
# use spatial subsetting to find all the residential buildings in blackout areas (like we did to crop data to our roi)
<- buildings_3083[blackout_no_highway_3083, op = st_intersects]
blackout_buildings_3083 <- st_transform(blackout_buildings_3083, crs = 4326)
blackout_buildings_4326
# residential houses that did not lose power
<- setdiff(buildings_3083, blackout_buildings_3083)
no_blackout_buildings_3083 <- st_transform(no_blackout_buildings_3083, crs = 4326) no_blackout_buildings_4326
# use a spatial join to attach the census tract data to the building data
<- st_join(blackout_buildings_3083, census_tract_data_3083) %>%
blackout_buildings_census_3083 mutate(blackout_status = "lost power")
<- st_join(no_blackout_buildings_3083, census_tract_data_3083) %>%
no_blackout_buildings_census_3083 mutate(blackout_status = "did not lose power")
<- nrow(blackout_buildings_3083) number_of_buildings_without_power
Based on this analysis, an estimated 144,317 houses in the Houston metropolitan area were without power on February 16, 2021.
<- rbind(blackout_buildings_census_3083, no_blackout_buildings_census_3083) all_building_data_3083
<- all_building_data_3083 %>%
counts_of_lost_power_by_census_tract st_drop_geometry() %>%
group_by(GEOID_Data, blackout_status) %>%
summarise(count = n()) %>%
pivot_wider(names_from = blackout_status, values_from = count, values_fill = 0) %>%
rename(num_houses_did_not_lose_power = "did not lose power",
num_houses_lost_power = "lost power") %>%
mutate(pct_houses_that_lost_power = (num_houses_lost_power / (num_houses_did_not_lose_power + num_houses_lost_power)) * 100)
<- left_join(census_tract_data_roi_4326, counts_of_lost_power_by_census_tract, by = "GEOID_Data") census_tract_blackout_data
The map below shows blackout areas layered over census tract populations, with labels identifying the number of houses in each census tract that lost power. The buildings layer was not visualized due to the size of the dataset and slow load times.
Code for leaflet map
<- colorQuantile("Blues", census_tract_blackout_data$total_population, n = 5)
pal_pop <- colorNumeric(c("red"), 1, na.color = "transparent")
pal_blackout
<- unique(pal_pop(sort(census_tract_blackout_data$total_population)))
pop_colors <- format(quantile(census_tract_blackout_data$total_population, seq(0, 1, .2)), big.mark = ",", digits = 0)
pop_labs <- paste(lag(pop_labs), pop_labs, sep = " - ")[-1]
pop_labs
leaflet(census_tract_blackout_data) %>%
setView(lat = 29.75, lng = -95.5, zoom = 9) %>%
addProviderTiles(providers$OpenStreetMap) %>%
addPolygons(fillColor = ~pal_pop(total_population), fillOpacity = 0.5,
color = "gray", weight = 0.75,
label = ~paste("Number of houses without power:", format(num_houses_lost_power, big.mark = ",")),
highlight = highlightOptions(weight = 2, color = "black", bringToFront = TRUE)) %>%
addLegend(colors = pop_colors, labels = pop_labs,
title = "Total Population",
"bottomright") %>%
addPolygons(data = blackout_no_highway_4326, fillColor = ~pal_blackout(DNB_At_Sensor_Radiance_500m), fillOpacity = 0.75, weight = 0)