library(data.table)
library(magrittr)
library(ggplot2)
if (interactive()) {
<- "post/2022-12-13-all-cause-mortality-in-norway/"
folder_location else {
} <- ""
folder_location }
# Downloading data
<- PxWebApiData::ApiData(
data "https://data.ssb.no/api/v0/en/table/07995",
Kjonn = "0",
Alder = "999A",
Tid = as.character(2000:2022),
Uke = paste0("U", formatC(1:52, width = 2, flag = "0"))
)
# Cleaning
<- data$dataset[, c("Uke", "Tid", "value")]
d_deaths setDT(d_deaths)
setnames(d_deaths, c("week", "isoyear", "deaths_n"))
:= stringr::str_remove(week, "U")]
d_deaths[, isoweek := paste0(isoyear, "-", isoweek)]
d_deaths[, isoyearweek := as.numeric(isoweek)]
d_deaths[, isoweek := as.numeric(isoyear)]
d_deaths[, isoyear := NULL]
d_deaths[, week setcolorder(d_deaths, c("isoyear", "isoweek", "isoyearweek", "deaths_n"))
# Removing the latest 3 weeks of data (due to registration delay)
<- d_deaths[isoyear == 2022 & !is.na(deaths_n)]$isoweek %>%
max_isoweek max() - 3
<- d_deaths[isoweek <= max_isoweek] d_deaths
# Aggregating
<- d_deaths[, .(deaths_n = sum(deaths_n)), keyby = .(isoyear)]
pd
# Plotting
<- ggplot(pd, aes(x = isoyear, y = deaths_n))
q <- q + geom_line()
q <- q + geom_point()
q <- q + scale_x_continuous(
q "Isoyear",
breaks = seq(2000, 2022, 2)
)<- q + scale_y_continuous(
q "Number of deaths",
labels = csstyle::format_nor_num_0,
breaks = seq(33000, 38000, 1000)
)<- q + expand_limits(y = c(33000, 38000))
q <- q + labs(
q title = glue::glue(
"Deaths in Norway, occurring between isoweeks 1 and {max_isoweek} (inclusive)"
)
)<- q + labs(caption = "Data extracted 2022-12-13 from SSB table 07995.")
q q
# Estimating the baseline
setorder(d_deaths, isoyearweek)
for(isoyear_pred in 2011:2022) for(isoweek_model in unique(d_deaths$isoweek)){
# Determining the training data
if(isoyear_pred <= 2019){
<- (isoyear_pred-10):(isoyear_pred-1)
isoyear_train else {
} <- 2010:2019
isoyear_train
}
# Fitting the model
<- glm(
fit ~ isoyear,
deaths_n data = d_deaths[isoweek %in% isoweek_model & isoyear %in% isoyear_train],
family = "poisson"
)
# Predicting the baseline
<- predict(
pred
fit, %in% isoweek_model & isoyear %in% isoyear_pred]
d_deaths[isoweek %>%
) exp()
d_deaths[%in% isoweek_model & isoyear %in% isoyear_pred,
isoweek := pred
deaths_baseline_n
] }
# Calculating the excess mortality
:= deaths_n - deaths_baseline_n] d_deaths[, deaths_excess_n
# Aggregating
<- d_deaths[
pd !is.na(deaths_excess_n),
deaths_excess_n = sum(deaths_excess_n)),
.(= .(isoyear)
keyby
]
# Plotting
<- ggplot(pd, aes(x = isoyear, y = deaths_excess_n))
q <- q + geom_col()
q <- q + geom_hline(yintercept = 0, color = "black")
q <- q + scale_x_continuous(
q "Isoyear",
breaks = seq(2011, 2022, 1)
)<- q + scale_y_continuous(
q "Number of excess deaths",
labels = csstyle::format_nor_num_0,
breaks = seq(-1000, 4000, 500)
)<- q + labs(
q title = glue::glue(
"Excess deaths in Norway, occurring between isoweeks 1 and {max_isoweek} (inclusive)"
)
)<- q + labs(
q caption = "Weekly baseline calculated from a poisson regression of the previous 10 years of data.\nBaselines for 2020, 2021, and 2022 calculated using data for 2010-2019.\nData extracted 2022-12-13 from SSB table 07995."
) q