Statistical Analysis

# load libraries used in analysis
library(tidyverse)
library(patchwork)
library(jtools)
library(kableExtra)

# read in data generated from geospatial analysis
blackout_census_data <- read_csv("data/census_tract_blackout_data_df.csv")

Linear Regression

Linear regression was used to model the relationship between percent of houses that lost power and selected data from the U.S. Census Bureau’s American Community Survey (ACS). All data are aggregated to the census tract level.

Race

Percent white

# linear regression model
model_pct_white <- lm(data = blackout_census_data, pct_houses_that_lost_power ~ pct_white)

#plot
plot_model_pct_white <- ggplot(data = blackout_census_data, aes(x = pct_white, y = pct_houses_that_lost_power)) +
  geom_point(size = 0.5) +
  geom_smooth(method = lm, formula = y~x, se = FALSE) +
  theme_classic() +
  labs(x = "% white", y = "% of houses that lost power")
plot_model_pct_white

Percent of houses that lost power vs. percent population white in census tract

Code for model summary result values

# model summary
summary_model_pct_white <- summary(model_pct_white)

# extract model coefficients
slope_coef_white <- round(summary_model_pct_white$coefficients["pct_white", "Estimate"], digits = 3)

std_err_white <- round(summary_model_pct_white$coefficients["pct_white", "Std. Error"], digits = 3)

p_value_white <- format(summary_model_pct_white$coefficients["pct_white", "Pr(>|t|)"], scientific = FALSE, digits = 2)

# confidence intervals
ci_95_lower_white <- round(confint(model_pct_white, level = 0.95)[2, 1], digits = 3)
ci_95_upper_white <- round(confint(model_pct_white, level = 0.95)[2, 2], digits = 3)

# r-squared value
r_sqrd_white <- round(summary_model_pct_white$r.squared, digits = 3)

Percent black

# linear regression model
model_pct_black <- lm(data = blackout_census_data, pct_houses_that_lost_power ~ pct_black)

# plot
plot_model_pct_black <- ggplot(data = blackout_census_data, aes(x = pct_black, y = pct_houses_that_lost_power)) +
  geom_point(size = 0.5) +
  geom_smooth(method = lm, formula = y~x, se = FALSE) +
  theme_classic() +
  labs(x = "% black", y = "% of houses that lost power")
plot_model_pct_black

Percent of houses that lost power vs. percent population black in census tract

Code for model summary result values

# model summary
summary_model_pct_black <- summary(model_pct_black)

# extract model coefficients
slope_coef_black <- round(summary_model_pct_black$coefficients["pct_black", "Estimate"], digits = 3)

std_err_black <- round(summary_model_pct_black$coefficients["pct_black", "Std. Error"], digits = 3)

p_value_black <- format(summary_model_pct_black$coefficients["pct_black", "Pr(>|t|)"], scientific = FALSE, digits = 2)

# confidence intervals
ci_95_lower_black <- round(confint(model_pct_black, level = 0.95)[2, 1], digits = 3)
ci_95_upper_black <- round(confint(model_pct_black, level = 0.95)[2, 2], digits = 3)

# r-squared value
r_sqrd_black <- round(summary_model_pct_black$r.squared, digits = 3)

Percent native american

# linear regression model
model_pct_am_native <- lm(data = blackout_census_data, pct_houses_that_lost_power ~ pct_am_native)

# plot
plot_model_pct_am_native <- ggplot(data = blackout_census_data, aes(x = pct_am_native, y = pct_houses_that_lost_power)) +
  geom_point(size = 0.5) +
  geom_smooth(method = lm, formula = y~x, se = FALSE) +
  theme_classic() +
  labs(x = "% native american", y = "% of houses that lost power")
plot_model_pct_am_native

Percent of houses that lost power vs. percent population native american in census tract

Code for model summary result values

# model summary
summary_model_pct_am_native <- summary(model_pct_am_native)

# extract model coefficients
slope_coef_am_native <- round(summary_model_pct_am_native$coefficients["pct_am_native", "Estimate"], digits = 3)

std_err_am_native <- round(summary_model_pct_am_native$coefficients["pct_am_native", "Std. Error"], digits = 3)

p_value_am_native <- format(summary_model_pct_am_native$coefficients["pct_am_native", "Pr(>|t|)"], scientific = FALSE, digits = 2)

# confidence intervals
ci_95_lower_am_native <- round(confint(model_pct_am_native, level = 0.95)[2, 1], digits = 3)
ci_95_upper_am_native <- round(confint(model_pct_am_native, level = 0.95)[2, 2], digits = 3)

# r-squared value
r_sqrd_am_native <- round(summary_model_pct_am_native$r.squared, digits = 3)

Percent asian

# linear regression model
model_pct_asian <- lm(data = blackout_census_data, pct_houses_that_lost_power ~ pct_asian)

# plot
plot_model_pct_asian <- ggplot(data = blackout_census_data, aes(x = pct_asian, y = pct_houses_that_lost_power)) +
  geom_point(size = 0.5) +
  geom_smooth(method = lm, formula = y~x, se = FALSE) +
  theme_classic() +
  labs(x = "% asian", y = "% of houses that lost power")
plot_model_pct_asian

Percent of houses that lost power vs. percent population asian in census tract

Code for model summary result values

# model summary
summary_model_pct_asian <- summary(model_pct_asian)

# extract model coefficients
slope_coef_asian <- round(summary_model_pct_asian$coefficients["pct_asian", "Estimate"], digits = 3)

std_err_asian <- round(summary_model_pct_asian$coefficients["pct_asian", "Std. Error"], digits = 3)

p_value_asian <- format(summary_model_pct_asian$coefficients["pct_asian", "Pr(>|t|)"], scientific = FALSE, digits = 2)

# confidence intervals
ci_95_lower_asian <- round(confint(model_pct_asian, level = 0.95)[2, 1], digits = 3)
ci_95_upper_asian <- round(confint(model_pct_asian, level = 0.95)[2, 2], digits = 3)

# r-squared value
r_sqrd_asian <- round(summary_model_pct_asian$r.squared, digits = 3)

Percent hispanic / latino

# linear regression model
model_pct_hispanic_latino <- lm(data = blackout_census_data, pct_houses_that_lost_power ~ pct_hispanic_latino)

# plot
plot_model_pct_hispanic_latino <- ggplot(data = blackout_census_data, aes(x = pct_hispanic_latino, y = pct_houses_that_lost_power)) +
  geom_point(size = 0.5) +
  geom_smooth(method = lm, formula = y~x, se = FALSE) +
  theme_classic() +
  labs(x = "% hispanic / latino", y = "% of houses that lost power")
plot_model_pct_hispanic_latino

Percent of houses that lost power vs. percent population hispanic / latino in census tract

Code for model summary result values

# model summary
summary_model_pct_hispanic_latino <- summary(model_pct_hispanic_latino)

# extract model coefficients
slope_coef_hispanic_latino <- round(summary_model_pct_hispanic_latino$coefficients["pct_hispanic_latino", "Estimate"], digits = 3)

std_err_hispanic_latino <- round(summary_model_pct_hispanic_latino$coefficients["pct_hispanic_latino", "Std. Error"], digits = 3)

p_value_hispanic_latino <- format(summary_model_pct_hispanic_latino$coefficients["pct_hispanic_latino", "Pr(>|t|)"], scientific = FALSE, digits = 2)

# confidence intervals
ci_95_lower_hispanic_latino <- round(confint(model_pct_hispanic_latino, level = 0.95)[2, 1], digits = 3)
ci_95_upper_hispanic_latino <- round(confint(model_pct_hispanic_latino, level = 0.95)[2, 2], digits = 3)

# r-squared value
r_sqrd_hispanic_latino <- round(summary_model_pct_hispanic_latino$r.squared, digits = 3)

Race summary plots

model_plots_race <- (plot_model_pct_white | plot_model_pct_black | plot_model_pct_am_native) / 
  (plot_model_pct_asian | plot_model_pct_hispanic_latino | plot_spacer()) +
  plot_annotation(title = "Lineral Regression Models of Power Outages vs. Race")

model_plots_race

Age

Percent 65 and older

# linear regression model
model_pct_65_and_over <- lm(data = blackout_census_data, pct_houses_that_lost_power ~ pct_65_and_over)

# plot
plot_model_pct_65_and_over <- ggplot(data = blackout_census_data, aes(x = pct_65_and_over, y = pct_houses_that_lost_power)) +
  geom_point(size = 0.5) +
  geom_smooth(method = lm, formula = y~x, se = FALSE) +
  theme_classic() +
  labs(x = "% 65 and older", y = "% of houses that lost power")
plot_model_pct_65_and_over

Percent of houses that lost power vs. percent population age 65 and older in census tract

Code for model summary result values

# model summary
summary_model_pct_65_and_over <- summary(model_pct_65_and_over)

# extract model coefficients
slope_coef_pct_65_and_over <- round(summary_model_pct_65_and_over$coefficients["pct_65_and_over", "Estimate"], digits = 3)

std_err_pct_65_and_over <- round(summary_model_pct_65_and_over$coefficients["pct_65_and_over", "Std. Error"], digits = 3)

p_value_pct_65_and_over <- format(summary_model_pct_65_and_over$coefficients["pct_65_and_over", "Pr(>|t|)"], scientific = FALSE, digits = 2)

# confidence intervals
ci_95_lower_pct_65_and_over <- round(confint(model_pct_65_and_over, level = 0.95)[2, 1], digits = 3)
ci_95_upper_pct_65_and_over <- round(confint(model_pct_65_and_over, level = 0.95)[2, 2], digits = 3)

# r-squared value
r_sqrd_65_and_over <- round(summary_model_pct_65_and_over$r.squared, digits = 3)

Percent children under 18

# linear regression model
model_pct_children_under_18 <- lm(data = blackout_census_data, pct_houses_that_lost_power ~ pct_children_under_18)

# plot
plot_model_pct_children_under_18 <- ggplot(data = blackout_census_data, aes(x = pct_children_under_18, y = pct_houses_that_lost_power)) +
  geom_point() +
  geom_smooth(method = lm, formula = y~x, se = FALSE) +
  theme_classic() +
  labs(x = "% children under 18", y = "% of houses that lost power")
plot_model_pct_children_under_18

Percent of houses that lost power vs. percent population children under 18 in census tract

Code for model summary result values

# model summary
summary_model_pct_children_under_18 <- summary(model_pct_children_under_18)

# extract model coefficients
slope_coef_children_under_18 <- round(summary_model_pct_children_under_18$coefficients["pct_children_under_18", "Estimate"], digits = 3)

std_err_children_under_18 <- round(summary_model_pct_children_under_18$coefficients["pct_children_under_18", "Std. Error"], digits = 3)

p_value_children_under_18 <- format(summary_model_pct_children_under_18$coefficients["pct_children_under_18", "Pr(>|t|)"], scientific = FALSE, digits = 2)

# confidence intervals
ci_95_lower_children_under_18 <- round(confint(model_pct_children_under_18, level = 0.95)[2, 1], digits = 3)
ci_95_upper_children_under_18 <- round(confint(model_pct_children_under_18, level = 0.95)[2, 2], digits = 3)

# r-squared value
r_sqrd_children_under_18 <- round(summary_model_pct_children_under_18$r.squared, digits = 3)

Age summary plots

model_plots_age <- (plot_model_pct_65_and_over | plot_model_pct_children_under_18) +
  plot_annotation(title = "Lineral Regression Models of Power Outages vs. Age")

model_plots_age

Income

Percent households below poverty

# linear regression model
model_pct_households_below_poverty <- lm(data = blackout_census_data, pct_houses_that_lost_power ~ pct_households_below_poverty)

# plot
plot_model_pct_households_below_poverty <- ggplot(data = blackout_census_data, aes(x = pct_households_below_poverty, y = pct_houses_that_lost_power)) +
  geom_point(size = 0.5) +
  geom_smooth(method = lm, formula = y~x, se = FALSE) +
  theme_classic() +
  labs(x = "% households below poverty", y = "% of houses that lost power")
plot_model_pct_households_below_poverty

Percent of houses that lost power vs. percent population of household below the poverty level in census tract

Code for model summary result values

# model summary
summary_model_pct_households_below_poverty <- summary(model_pct_households_below_poverty)

# extract model coefficients
slope_coef_households_below_poverty <- round(summary_model_pct_households_below_poverty$coefficients["pct_households_below_poverty", "Estimate"], digits = 3)

std_err_households_below_poverty <- round(summary_model_pct_households_below_poverty$coefficients["pct_households_below_poverty", "Std. Error"], digits = 3)

p_value_households_below_poverty <- format(summary_model_pct_households_below_poverty$coefficients["pct_households_below_poverty", "Pr(>|t|)"], scientific = FALSE, digits = 2)

# confidence intervals
ci_95_lower_households_below_poverty <- round(confint(model_pct_households_below_poverty, level = 0.95)[2, 1], digits = 3)
ci_95_upper_households_below_poverty <- round(confint(model_pct_households_below_poverty, level = 0.95)[2, 2], digits = 3)

# r-squared value
r_sqrd_households_below_poverty <- round(summary_model_pct_households_below_poverty$r.squared, digits = 3)

Median income

# linear regression model
model_median_income <- lm(data = blackout_census_data, pct_houses_that_lost_power ~ median_income)

# plot
plot_model_median_income <- ggplot(data = blackout_census_data, aes(x = median_income, y = pct_houses_that_lost_power)) +
  geom_point(size = 0.5) +
  geom_smooth(method = lm, formula = y~x, se = FALSE) +
  theme_classic() +
  labs(x = "median income", y = "% of houses that lost power")
plot_model_median_income

Code for model summary result values

# model summary
summary_model_median_income <- summary(model_median_income)

# extract model coefficients
slope_coef_median_income <- round(summary_model_median_income$coefficients["median_income", "Estimate"], digits = 3)

std_err_median_income <- round(summary_model_median_income$coefficients["median_income", "Std. Error"], digits = 3)

p_value_median_income <- format(summary_model_median_income$coefficients["median_income", "Pr(>|t|)"], scientific = FALSE, digits = 2)

# confidence intervals
ci_95_lower_median_income <- round(confint(model_median_income, level = 0.95)[2, 1], digits = 3)
ci_95_upper_median_income <- round(confint(model_median_income, level = 0.95)[2, 2], digits = 3)

# r-squared value
r_sqrd_median_income <- round(summary_model_median_income$r.squared, digits = 3)

Income summary plots

model_plots_income <- (plot_model_pct_households_below_poverty | plot_model_median_income) +
  plot_annotation(title = "Lineral Regression Models of Power Outages vs. Income")

model_plots_income

Results

Based on the results of the linear regression models, there is a statistically significant relationship at the 5 % level between percent of houses that lost power and the following variables:

percent of population white
percent of population black
percent of population hispanic / latino
percent of population age 65 and older
percent of population children under age 18
percent of households below the poverty level

Of the significant variables, black; hispanic / latino; children under 18; and poverty were positively correlated with residential power outages. Significant variables that were negatively correlated with residential power outages included white and age 65 and older.

For each model with statistically significant results (ie. p-value < 0.05) we reject the null hypothesis that the socioeconomic variable had no influence on percentage of houses without electricity in each census tract at the 5% level. In other words, the slope coefficient is significantly different from zero. Each slope coefficient represents the change in percent of houses that lost power for a one unit increase in the socioeconomic variable. To use the percent white model as an example, for each one unit increase in percent of census population that is white, the percent of census level residential power outages decreases by 0.168. For the percent white model, the 95% confidence interval for the slope coefficient ranges from -0.251 to -0.085. This means that there is a 95% change that this interval includes the true census level rate of change for percent residential power outages for each one unit change in percent of population that is white.

The r-squared terms represent the variance in percent of houses without power that can be explained by the socioeconomic variable. The r-squared values ranged from 0% for the asian model to 2.9% for the age 65 and older model.

The results of the linear models are summarized in the table below.

Code to create summary table

model_summary_df <- data.frame(model_name = c("white", "black", "native american", "asian", "hispanic / latino", "65 and older", "children under 18", "poverty", "median income"),
                               slope_coef = c(slope_coef_white, slope_coef_black, slope_coef_am_native, slope_coef_asian, slope_coef_hispanic_latino, slope_coef_pct_65_and_over, slope_coef_children_under_18, slope_coef_households_below_poverty, slope_coef_median_income),
                               std_err = c(std_err_white, std_err_black, std_err_am_native, std_err_asian, std_err_hispanic_latino, std_err_pct_65_and_over, std_err_children_under_18, std_err_households_below_poverty, std_err_median_income),
                               p_value = c(p_value_white, p_value_black, p_value_am_native, p_value_asian, p_value_hispanic_latino, p_value_pct_65_and_over, p_value_children_under_18, p_value_households_below_poverty, p_value_median_income),
                               ci_95_lower = c(paste(ci_95_lower_white, "to", ci_95_upper_white),
                                               paste(ci_95_lower_black, "to", ci_95_upper_black),
                                               paste(ci_95_lower_am_native, "to", ci_95_upper_am_native),
                                               paste(ci_95_lower_asian, "to", ci_95_upper_asian),
                                               paste(ci_95_lower_hispanic_latino, "to", ci_95_upper_hispanic_latino),
                                               paste(ci_95_lower_pct_65_and_over, "to", ci_95_upper_pct_65_and_over),
                                               paste(ci_95_lower_children_under_18, "to", ci_95_upper_children_under_18),
                                               paste(ci_95_lower_households_below_poverty, "to", ci_95_upper_households_below_poverty),
                                               paste(ci_95_lower_median_income, "to", ci_95_upper_median_income)),
                               r_squared = c(r_sqrd_white, r_sqrd_black, r_sqrd_am_native, r_sqrd_asian, r_sqrd_hispanic_latino, r_sqrd_65_and_over, r_sqrd_children_under_18, r_sqrd_households_below_poverty, r_sqrd_median_income)) %>% 
  mutate(significance = case_when(
    p_value < 0.05 ~ "yes",
    p_value >= 0.05 ~ "no"))

model_summary_table <- model_summary_df %>% 
  kable(col.names = c("model name", "slope coefficent", "standard error", "p-value", "95% confidence interval", "r-squared", "significant at 5% level"),
        caption = "Summary of Model Results") %>% 
  kable_paper()
model_summary_table

Summary of Model Results
model name	slope coefficent	standard error	p-value	95% confidence interval	r-squared	significant at 5% level
white	-0.168	0.042	0.000076	-0.251 to -0.085	0.015	yes
black	0.167	0.048	0.00054	0.072 to 0.261	0.011	yes
native american	1.010	1.162	0.38	-1.27 to 3.29	0.001	no
asian	0.057	0.104	0.58	-0.147 to 0.261	0.000	no
hispanic / latino	0.104	0.037	0.0047	0.032 to 0.176	0.007	yes
65 and older	-0.902	0.159	0.000000019	-1.214 to -0.589	0.029	yes
children under 18	0.299	0.140	0.033	0.025 to 0.573	0.004	yes
poverty	0.215	0.084	0.011	0.05 to 0.38	0.006	yes
median income	0.000	0.000	0.42	0 to 0	0.001	no

The figures below provide a visual comparison of each model result. The bold portion of the line represents the 90% confidence interval and the full line represents the 95% confidnece interval for each slope coefficent estimate.

race_model_summary <- plot_summs(model_pct_white, model_pct_black, model_pct_am_native, model_pct_asian, model_pct_hispanic_latino,
           inner_ci_level = 0.9,
           model.names = c("white", "black", "native american", "asian", "hispanic/latino"))
race_model_summary

age_model_summary <-plot_summs(model_pct_65_and_over, model_pct_children_under_18,
           inner_ci_level = 0.95,
           model.names = c("over 65", "under 18"))
age_model_summary

income_model_summary <-plot_summs(model_pct_households_below_poverty, model_median_income,
           inner_ci_level = 0.95,
           model.names = c("poverty", "median income"))
income_model_summary

Conclusion

While race, age, and income socioeconomic variables account for only a small portion in the overall variance in residential power outages in the greater Houston area during the February 2021 winter storms, this analysis suggests some racial and economic inequality. Electric utility providers should evaluate power system equipment and infrastructure in areas with higher proportions of people of color and poverty then develop plans to maintain and/or replace critical infrastructure in these areas. The results of this analysis could also be used to provide a more equitable response to future natural disasters.

Note: The purpose of this evaluation was not to predict houses that could be more likely to lose power in the future. This analysis attempts to identify disproportional vulnerabilities of the effected community.

--- title: "Statistical Analysis" editor: visual toc-title: Contents toc-location: left code-tools: source: true toggle: true caption: view source code code-block-bg: true code-block-border-left: "#31BAE9" --- ```{r} #| warning: false # load libraries used in analysis library(tidyverse) library(patchwork) library(jtools) library(kableExtra) ``` ```{r} #| code-overflow: wrap #| output: false # read in data generated from geospatial analysis blackout_census_data <- read_csv("data/census_tract_blackout_data_df.csv") ``` # Linear Regression Linear regression was used to model the relationship between percent of houses that lost power and selected data from the U.S. Census Bureau's American Community Survey (ACS). All data are aggregated to the census tract level. ## Race ### Percent white ```{r} #| warning: false #| code-overflow: wrap #| code-fold: false #| column: margin #| fig-cap: "Percent of houses that lost power vs. percent population white in census tract" # linear regression model model_pct_white <- lm(data = blackout_census_data, pct_houses_that_lost_power ~ pct_white) #plot plot_model_pct_white <- ggplot(data = blackout_census_data, aes(x = pct_white, y = pct_houses_that_lost_power)) + geom_point(size = 0.5) + geom_smooth(method = lm, formula = y~x, se = FALSE) + theme_classic() + labs(x = "% white", y = "% of houses that lost power") plot_model_pct_white ``` ```{r} #| code-overflow: wrap #| code-fold: true #| code-summary: "Code for model summary result values" # model summary summary_model_pct_white <- summary(model_pct_white) # extract model coefficients slope_coef_white <- round(summary_model_pct_white$coefficients["pct_white", "Estimate"], digits = 3) std_err_white <- round(summary_model_pct_white$coefficients["pct_white", "Std. Error"], digits = 3) p_value_white <- format(summary_model_pct_white$coefficients["pct_white", "Pr(>|t|)"], scientific = FALSE, digits = 2) # confidence intervals ci_95_lower_white <- round(confint(model_pct_white, level = 0.95)[2, 1], digits = 3) ci_95_upper_white <- round(confint(model_pct_white, level = 0.95)[2, 2], digits = 3) # r-squared value r_sqrd_white <- round(summary_model_pct_white$r.squared, digits = 3) ``` ### Percent black ```{r} #| warning: false #| code-overflow: wrap #| code-fold: false #| column: margin #| fig-cap: "Percent of houses that lost power vs. percent population black in census tract" # linear regression model model_pct_black <- lm(data = blackout_census_data, pct_houses_that_lost_power ~ pct_black) # plot plot_model_pct_black <- ggplot(data = blackout_census_data, aes(x = pct_black, y = pct_houses_that_lost_power)) + geom_point(size = 0.5) + geom_smooth(method = lm, formula = y~x, se = FALSE) + theme_classic() + labs(x = "% black", y = "% of houses that lost power") plot_model_pct_black ``` ```{r} #| code-overflow: wrap #| code-fold: true #| code-summary: "Code for model summary result values" # model summary summary_model_pct_black <- summary(model_pct_black) # extract model coefficients slope_coef_black <- round(summary_model_pct_black$coefficients["pct_black", "Estimate"], digits = 3) std_err_black <- round(summary_model_pct_black$coefficients["pct_black", "Std. Error"], digits = 3) p_value_black <- format(summary_model_pct_black$coefficients["pct_black", "Pr(>|t|)"], scientific = FALSE, digits = 2) # confidence intervals ci_95_lower_black <- round(confint(model_pct_black, level = 0.95)[2, 1], digits = 3) ci_95_upper_black <- round(confint(model_pct_black, level = 0.95)[2, 2], digits = 3) # r-squared value r_sqrd_black <- round(summary_model_pct_black$r.squared, digits = 3) ``` ### Percent native american ```{r} #| warning: false #| code-overflow: wrap #| code-fold: false #| column: margin #| fig-cap: "Percent of houses that lost power vs. percent population native american in census tract" # linear regression model model_pct_am_native <- lm(data = blackout_census_data, pct_houses_that_lost_power ~ pct_am_native) # plot plot_model_pct_am_native <- ggplot(data = blackout_census_data, aes(x = pct_am_native, y = pct_houses_that_lost_power)) + geom_point(size = 0.5) + geom_smooth(method = lm, formula = y~x, se = FALSE) + theme_classic() + labs(x = "% native american", y = "% of houses that lost power") plot_model_pct_am_native ``` ```{r} #| code-overflow: wrap #| code-fold: true #| code-summary: "Code for model summary result values" # model summary summary_model_pct_am_native <- summary(model_pct_am_native) # extract model coefficients slope_coef_am_native <- round(summary_model_pct_am_native$coefficients["pct_am_native", "Estimate"], digits = 3) std_err_am_native <- round(summary_model_pct_am_native$coefficients["pct_am_native", "Std. Error"], digits = 3) p_value_am_native <- format(summary_model_pct_am_native$coefficients["pct_am_native", "Pr(>|t|)"], scientific = FALSE, digits = 2) # confidence intervals ci_95_lower_am_native <- round(confint(model_pct_am_native, level = 0.95)[2, 1], digits = 3) ci_95_upper_am_native <- round(confint(model_pct_am_native, level = 0.95)[2, 2], digits = 3) # r-squared value r_sqrd_am_native <- round(summary_model_pct_am_native$r.squared, digits = 3) ``` ### Percent asian ```{r} #| warning: false #| code-overflow: wrap #| code-fold: false #| column: margin #| fig-cap: "Percent of houses that lost power vs. percent population asian in census tract" # linear regression model model_pct_asian <- lm(data = blackout_census_data, pct_houses_that_lost_power ~ pct_asian) # plot plot_model_pct_asian <- ggplot(data = blackout_census_data, aes(x = pct_asian, y = pct_houses_that_lost_power)) + geom_point(size = 0.5) + geom_smooth(method = lm, formula = y~x, se = FALSE) + theme_classic() + labs(x = "% asian", y = "% of houses that lost power") plot_model_pct_asian ``` ```{r} #| code-overflow: wrap #| code-fold: true #| code-summary: "Code for model summary result values" # model summary summary_model_pct_asian <- summary(model_pct_asian) # extract model coefficients slope_coef_asian <- round(summary_model_pct_asian$coefficients["pct_asian", "Estimate"], digits = 3) std_err_asian <- round(summary_model_pct_asian$coefficients["pct_asian", "Std. Error"], digits = 3) p_value_asian <- format(summary_model_pct_asian$coefficients["pct_asian", "Pr(>|t|)"], scientific = FALSE, digits = 2) # confidence intervals ci_95_lower_asian <- round(confint(model_pct_asian, level = 0.95)[2, 1], digits = 3) ci_95_upper_asian <- round(confint(model_pct_asian, level = 0.95)[2, 2], digits = 3) # r-squared value r_sqrd_asian <- round(summary_model_pct_asian$r.squared, digits = 3) ``` ### Percent hispanic / latino ```{r} #| warning: false #| code-overflow: wrap #| code-fold: false #| column: margin #| fig-cap: "Percent of houses that lost power vs. percent population hispanic / latino in census tract" # linear regression model model_pct_hispanic_latino <- lm(data = blackout_census_data, pct_houses_that_lost_power ~ pct_hispanic_latino) # plot plot_model_pct_hispanic_latino <- ggplot(data = blackout_census_data, aes(x = pct_hispanic_latino, y = pct_houses_that_lost_power)) + geom_point(size = 0.5) + geom_smooth(method = lm, formula = y~x, se = FALSE) + theme_classic() + labs(x = "% hispanic / latino", y = "% of houses that lost power") plot_model_pct_hispanic_latino ``` ```{r} #| code-overflow: wrap #| code-fold: true #| code-summary: "Code for model summary result values" # model summary summary_model_pct_hispanic_latino <- summary(model_pct_hispanic_latino) # extract model coefficients slope_coef_hispanic_latino <- round(summary_model_pct_hispanic_latino$coefficients["pct_hispanic_latino", "Estimate"], digits = 3) std_err_hispanic_latino <- round(summary_model_pct_hispanic_latino$coefficients["pct_hispanic_latino", "Std. Error"], digits = 3) p_value_hispanic_latino <- format(summary_model_pct_hispanic_latino$coefficients["pct_hispanic_latino", "Pr(>|t|)"], scientific = FALSE, digits = 2) # confidence intervals ci_95_lower_hispanic_latino <- round(confint(model_pct_hispanic_latino, level = 0.95)[2, 1], digits = 3) ci_95_upper_hispanic_latino <- round(confint(model_pct_hispanic_latino, level = 0.95)[2, 2], digits = 3) # r-squared value r_sqrd_hispanic_latino <- round(summary_model_pct_hispanic_latino$r.squared, digits = 3) ``` ### Race summary plots ```{r fig.width=10,fig.height=5} #| warning: false #| code-overflow: wrap #| column: screen-right model_plots_race <- (plot_model_pct_white | plot_model_pct_black | plot_model_pct_am_native) / (plot_model_pct_asian | plot_model_pct_hispanic_latino | plot_spacer()) + plot_annotation(title = "Lineral Regression Models of Power Outages vs. Race") model_plots_race ``` ## Age ### Percent 65 and older ```{r} #| warning: false #| code-overflow: wrap #| code-fold: false #| column: margin #| fig-cap: "Percent of houses that lost power vs. percent population age 65 and older in census tract" # linear regression model model_pct_65_and_over <- lm(data = blackout_census_data, pct_houses_that_lost_power ~ pct_65_and_over) # plot plot_model_pct_65_and_over <- ggplot(data = blackout_census_data, aes(x = pct_65_and_over, y = pct_houses_that_lost_power)) + geom_point(size = 0.5) + geom_smooth(method = lm, formula = y~x, se = FALSE) + theme_classic() + labs(x = "% 65 and older", y = "% of houses that lost power") plot_model_pct_65_and_over ``` ```{r} #| code-overflow: wrap #| code-fold: true #| code-summary: "Code for model summary result values" # model summary summary_model_pct_65_and_over <- summary(model_pct_65_and_over) # extract model coefficients slope_coef_pct_65_and_over <- round(summary_model_pct_65_and_over$coefficients["pct_65_and_over", "Estimate"], digits = 3) std_err_pct_65_and_over <- round(summary_model_pct_65_and_over$coefficients["pct_65_and_over", "Std. Error"], digits = 3) p_value_pct_65_and_over <- format(summary_model_pct_65_and_over$coefficients["pct_65_and_over", "Pr(>|t|)"], scientific = FALSE, digits = 2) # confidence intervals ci_95_lower_pct_65_and_over <- round(confint(model_pct_65_and_over, level = 0.95)[2, 1], digits = 3) ci_95_upper_pct_65_and_over <- round(confint(model_pct_65_and_over, level = 0.95)[2, 2], digits = 3) # r-squared value r_sqrd_65_and_over <- round(summary_model_pct_65_and_over$r.squared, digits = 3) ``` ### Percent children under 18 ```{r} #| warning: false #| code-overflow: wrap #| code-fold: false #| column: margin #| fig-cap: "Percent of houses that lost power vs. percent population children under 18 in census tract" # linear regression model model_pct_children_under_18 <- lm(data = blackout_census_data, pct_houses_that_lost_power ~ pct_children_under_18) # plot plot_model_pct_children_under_18 <- ggplot(data = blackout_census_data, aes(x = pct_children_under_18, y = pct_houses_that_lost_power)) + geom_point() + geom_smooth(method = lm, formula = y~x, se = FALSE) + theme_classic() + labs(x = "% children under 18", y = "% of houses that lost power") plot_model_pct_children_under_18 ``` ```{r} #| code-overflow: wrap #| code-fold: true #| code-summary: "Code for model summary result values" # model summary summary_model_pct_children_under_18 <- summary(model_pct_children_under_18) # extract model coefficients slope_coef_children_under_18 <- round(summary_model_pct_children_under_18$coefficients["pct_children_under_18", "Estimate"], digits = 3) std_err_children_under_18 <- round(summary_model_pct_children_under_18$coefficients["pct_children_under_18", "Std. Error"], digits = 3) p_value_children_under_18 <- format(summary_model_pct_children_under_18$coefficients["pct_children_under_18", "Pr(>|t|)"], scientific = FALSE, digits = 2) # confidence intervals ci_95_lower_children_under_18 <- round(confint(model_pct_children_under_18, level = 0.95)[2, 1], digits = 3) ci_95_upper_children_under_18 <- round(confint(model_pct_children_under_18, level = 0.95)[2, 2], digits = 3) # r-squared value r_sqrd_children_under_18 <- round(summary_model_pct_children_under_18$r.squared, digits = 3) ``` ### Age summary plots ```{r fig.width=10,fig.height=3} #| warning: false #| code-overflow: wrap #| column: screen-right model_plots_age <- (plot_model_pct_65_and_over | plot_model_pct_children_under_18) + plot_annotation(title = "Lineral Regression Models of Power Outages vs. Age") model_plots_age ``` ## Income ### Percent households below poverty ```{r} #| warning: false #| code-overflow: wrap #| code-fold: false #| column: margin #| fig-cap: "Percent of houses that lost power vs. percent population of household below the poverty level in census tract" # linear regression model model_pct_households_below_poverty <- lm(data = blackout_census_data, pct_houses_that_lost_power ~ pct_households_below_poverty) # plot plot_model_pct_households_below_poverty <- ggplot(data = blackout_census_data, aes(x = pct_households_below_poverty, y = pct_houses_that_lost_power)) + geom_point(size = 0.5) + geom_smooth(method = lm, formula = y~x, se = FALSE) + theme_classic() + labs(x = "% households below poverty", y = "% of houses that lost power") plot_model_pct_households_below_poverty ``` ```{r} #| code-overflow: wrap #| code-fold: true #| code-summary: "Code for model summary result values" # model summary summary_model_pct_households_below_poverty <- summary(model_pct_households_below_poverty) # extract model coefficients slope_coef_households_below_poverty <- round(summary_model_pct_households_below_poverty$coefficients["pct_households_below_poverty", "Estimate"], digits = 3) std_err_households_below_poverty <- round(summary_model_pct_households_below_poverty$coefficients["pct_households_below_poverty", "Std. Error"], digits = 3) p_value_households_below_poverty <- format(summary_model_pct_households_below_poverty$coefficients["pct_households_below_poverty", "Pr(>|t|)"], scientific = FALSE, digits = 2) # confidence intervals ci_95_lower_households_below_poverty <- round(confint(model_pct_households_below_poverty, level = 0.95)[2, 1], digits = 3) ci_95_upper_households_below_poverty <- round(confint(model_pct_households_below_poverty, level = 0.95)[2, 2], digits = 3) # r-squared value r_sqrd_households_below_poverty <- round(summary_model_pct_households_below_poverty$r.squared, digits = 3) ``` ### Median income ```{r} #| warning: false #| code-overflow: wrap #| code-fold: false #| column: margin #| fig-cap: "Percent of houses that lost power vs. median income in census tract" # linear regression model model_median_income <- lm(data = blackout_census_data, pct_houses_that_lost_power ~ median_income) # plot plot_model_median_income <- ggplot(data = blackout_census_data, aes(x = median_income, y = pct_houses_that_lost_power)) + geom_point(size = 0.5) + geom_smooth(method = lm, formula = y~x, se = FALSE) + theme_classic() + labs(x = "median income", y = "% of houses that lost power") plot_model_median_income ``` ```{r} #| code-overflow: wrap #| code-fold: true #| code-summary: "Code for model summary result values" # model summary summary_model_median_income <- summary(model_median_income) # extract model coefficients slope_coef_median_income <- round(summary_model_median_income$coefficients["median_income", "Estimate"], digits = 3) std_err_median_income <- round(summary_model_median_income$coefficients["median_income", "Std. Error"], digits = 3) p_value_median_income <- format(summary_model_median_income$coefficients["median_income", "Pr(>|t|)"], scientific = FALSE, digits = 2) # confidence intervals ci_95_lower_median_income <- round(confint(model_median_income, level = 0.95)[2, 1], digits = 3) ci_95_upper_median_income <- round(confint(model_median_income, level = 0.95)[2, 2], digits = 3) # r-squared value r_sqrd_median_income <- round(summary_model_median_income$r.squared, digits = 3) ``` ### Income summary plots ```{r fig.width=10,fig.height=3} #| warning: false #| code-overflow: wrap #| column: screen-right model_plots_income <- (plot_model_pct_households_below_poverty | plot_model_median_income) + plot_annotation(title = "Lineral Regression Models of Power Outages vs. Income") model_plots_income ``` # Results Based on the results of the linear regression models, there is a statistically significant relationship at the 5 % level between percent of houses that lost power and the following variables: - percent of population white - percent of population black - percent of population hispanic / latino - percent of population age 65 and older - percent of population children under age 18 - percent of households below the poverty level Of the significant variables, black; hispanic / latino; children under 18; and poverty were positively correlated with residential power outages. Significant variables that were negatively correlated with residential power outages included white and age 65 and older. For each model with statistically significant results (ie. p-value \< 0.05) we reject the null hypothesis that the socioeconomic variable had no influence on percentage of houses without electricity in each census tract at the 5% level. In other words, the slope coefficient is significantly different from zero. Each slope coefficient represents the change in percent of houses that lost power for a one unit increase in the socioeconomic variable. To use the percent white model as an example, for each one unit increase in percent of census population that is white, the percent of census level residential power outages decreases by `r abs(slope_coef_white)`. For the percent white model, the 95% confidence interval for the slope coefficient ranges from `r ci_95_lower_white` to `r ci_95_upper_white`. This means that there is a 95% change that this interval includes the true census level rate of change for percent residential power outages for each one unit change in percent of population that is white. ```{r} #| include: false r_sqrd_min <- min(r_sqrd_white, r_sqrd_black, r_sqrd_am_native, r_sqrd_asian, r_sqrd_hispanic_latino, r_sqrd_65_and_over, r_sqrd_children_under_18, r_sqrd_households_below_poverty, r_sqrd_median_income) * 100 r_sqrd_max <- max(r_sqrd_white, r_sqrd_black, r_sqrd_am_native, r_sqrd_asian, r_sqrd_hispanic_latino, r_sqrd_65_and_over, r_sqrd_children_under_18, r_sqrd_households_below_poverty, r_sqrd_median_income) * 100 ``` The r-squared terms represent the variance in percent of houses without power that can be explained by the socioeconomic variable. The r-squared values ranged from `r r_sqrd_min`% for the asian model to `r r_sqrd_max`% for the age 65 and older model. The results of the linear models are summarized in the table below. ```{r} #| column: page-right #| code-overflow: wrap #| code-fold: true #| code-summary: "Code to create summary table" model_summary_df <- data.frame(model_name = c("white", "black", "native american", "asian", "hispanic / latino", "65 and older", "children under 18", "poverty", "median income"), slope_coef = c(slope_coef_white, slope_coef_black, slope_coef_am_native, slope_coef_asian, slope_coef_hispanic_latino, slope_coef_pct_65_and_over, slope_coef_children_under_18, slope_coef_households_below_poverty, slope_coef_median_income), std_err = c(std_err_white, std_err_black, std_err_am_native, std_err_asian, std_err_hispanic_latino, std_err_pct_65_and_over, std_err_children_under_18, std_err_households_below_poverty, std_err_median_income), p_value = c(p_value_white, p_value_black, p_value_am_native, p_value_asian, p_value_hispanic_latino, p_value_pct_65_and_over, p_value_children_under_18, p_value_households_below_poverty, p_value_median_income), ci_95_lower = c(paste(ci_95_lower_white, "to", ci_95_upper_white), paste(ci_95_lower_black, "to", ci_95_upper_black), paste(ci_95_lower_am_native, "to", ci_95_upper_am_native), paste(ci_95_lower_asian, "to", ci_95_upper_asian), paste(ci_95_lower_hispanic_latino, "to", ci_95_upper_hispanic_latino), paste(ci_95_lower_pct_65_and_over, "to", ci_95_upper_pct_65_and_over), paste(ci_95_lower_children_under_18, "to", ci_95_upper_children_under_18), paste(ci_95_lower_households_below_poverty, "to", ci_95_upper_households_below_poverty), paste(ci_95_lower_median_income, "to", ci_95_upper_median_income)), r_squared = c(r_sqrd_white, r_sqrd_black, r_sqrd_am_native, r_sqrd_asian, r_sqrd_hispanic_latino, r_sqrd_65_and_over, r_sqrd_children_under_18, r_sqrd_households_below_poverty, r_sqrd_median_income)) %>% mutate(significance = case_when( p_value < 0.05 ~ "yes", p_value >= 0.05 ~ "no")) model_summary_table <- model_summary_df %>% kable(col.names = c("model name", "slope coefficent", "standard error", "p-value", "95% confidence interval", "r-squared", "significant at 5% level"), caption = "Summary of Model Results") %>% kable_paper() model_summary_table ``` The figures below provide a visual comparison of each model result. The bold portion of the line represents the 90% confidence interval and the full line represents the 95% confidnece interval for each slope coefficent estimate. ```{r} #| code-overflow: wrap #| message: false race_model_summary <- plot_summs(model_pct_white, model_pct_black, model_pct_am_native, model_pct_asian, model_pct_hispanic_latino, inner_ci_level = 0.9, model.names = c("white", "black", "native american", "asian", "hispanic/latino")) race_model_summary ``` ```{r} #| message: false #| #| code-overflow: wrap age_model_summary <-plot_summs(model_pct_65_and_over, model_pct_children_under_18, inner_ci_level = 0.95, model.names = c("over 65", "under 18")) age_model_summary ``` ```{r} #| message: false #| code-overflow: wrap income_model_summary <-plot_summs(model_pct_households_below_poverty, model_median_income, inner_ci_level = 0.95, model.names = c("poverty", "median income")) income_model_summary ``` # Conclusion While race, age, and income socioeconomic variables account for only a small portion in the overall variance in residential power outages in the greater Houston area during the February 2021 winter storms, this analysis suggests some racial and economic inequality. Electric utility providers should evaluate power system equipment and infrastructure in areas with higher proportions of people of color and poverty then develop plans to maintain and/or replace critical infrastructure in these areas. The results of this analysis could also be used to provide a more equitable response to future natural disasters. Note: The purpose of this evaluation was not to predict houses that could be more likely to lose power in the future. This analysis attempts to identify disproportional vulnerabilities of the effected community.