Reference Website:Sorry ARIMA, but I’m Going Bayesian
Reference Paper: Predicting the Present with Bayesian Structural Time Series, Steven L. Scott and Hal Varian
# install.packages("bsts")
library(bsts)
library(lubridate)
library(dplyr)
library(ggplot2)
library(forecast)
library(reshape2)
### Load the data
data("AirPassengers")
plot.ts(AirPassengers)
Y <- window(AirPassengers, start=c(1949, 1), end=c(1959,12))
### Fit the ARIMA model
arima <- auto.arima(log10(Y), trace=T)
##
## ARIMA(2,1,2)(1,1,1)[12] : -532.7775
## ARIMA(0,1,0)(0,1,0)[12] : -495.1269
## ARIMA(1,1,0)(1,1,0)[12] : -534.7409
## ARIMA(0,1,1)(0,1,1)[12] : -538.0734
## ARIMA(0,1,1)(1,1,1)[12] : -533.2124
## ARIMA(0,1,1)(0,1,0)[12] : -505.8814
## ARIMA(0,1,1)(0,1,2)[12] : -535.9917
## ARIMA(0,1,1)(1,1,2)[12] : -541.5474
## ARIMA(1,1,1)(1,1,2)[12] : -537.9591
## ARIMA(0,1,0)(1,1,2)[12] : -524.4593
## ARIMA(0,1,2)(1,1,2)[12] : -539.4821
## ARIMA(1,1,2)(1,1,2)[12] : Inf
## ARIMA(0,1,1)(2,1,2)[12] : -547.1299
## ARIMA(0,1,1)(2,1,1)[12] : -546.4817
## ARIMA(1,1,1)(2,1,2)[12] : -545.3959
## ARIMA(0,1,0)(2,1,2)[12] : -542.0079
## ARIMA(0,1,2)(2,1,2)[12] : -544.9932
## ARIMA(1,1,2)(2,1,2)[12] : Inf
## ARIMA(2,1,2)(1,1,1)[12] : -634.4569
## ARIMA(0,1,0)(0,1,0)[12] : -597.5164
## ARIMA(1,1,0)(1,1,0)[12] : -632.2099
## ARIMA(0,1,1)(0,1,1)[12] : -639.5504
## ARIMA(0,1,1)(1,1,1)[12] : -637.6169
## ARIMA(0,1,1)(0,1,0)[12] : -608.25
## ARIMA(0,1,1)(0,1,2)[12] : -637.6449
## ARIMA(0,1,1)(1,1,2)[12] : Inf
## ARIMA(1,1,1)(0,1,1)[12] : -637.6718
## ARIMA(0,1,0)(0,1,1)[12] : -628.6569
## ARIMA(0,1,2)(0,1,1)[12] : -637.4598
## ARIMA(1,1,2)(0,1,1)[12] : Inf
##
## Best model: ARIMA(0,1,1)(0,1,1)[12]
##
##
##
## Best model: ARIMA(0,1,1)(0,1,1)[12]
# arima <- arima(log10(Y), order=c(0, 1, 1), seasonal=list(order=c(0,1,1), period=12))
### Actual versus predicted
d1 <- data.frame(c(10^as.numeric(fitted(arima)), # fitted and predicted
10^as.numeric(predict(arima, n.ahead = 12)$pred)),
as.numeric(AirPassengers), #actual values
as.Date(time(AirPassengers)))
names(d1) <- c("Fitted", "Actual", "Date")
### MAPE (mean absolute percentage error)
MAPE <- filter(d1, year(Date)>1959) %>% summarise(MAPE=mean(abs(Actual-Fitted)/Actual))
### Plot actual versus predicted
ggplot(data=d1, aes(x=Date)) +
geom_line(aes(y=Actual, colour = "Actual"), size=1.2) +
geom_line(aes(y=Fitted, colour = "Fitted"), size=1.2, linetype=2) +
theme_bw() + theme(legend.title = element_blank()) +
ylab("") + xlab("") +
geom_vline(xintercept=as.numeric(as.Date("1959-12-01")), linetype=2) +
ggtitle(paste0("ARIMA -- Holdout MAPE = ", round(100*MAPE,2), "%")) +
theme(axis.text.x=element_text(angle = -90, hjust = 0))
### Load the data
data("AirPassengers")
Y <- window(AirPassengers, start=c(1949, 1), end=c(1959,12))
y <- log10(Y)
### Run the bsts model
ss <- AddLocalLinearTrend(list(), y)
ss <- AddSeasonal(ss, y, nseasons = 12)
bsts.model <- bsts(y, state.specification = ss, niter = 500, ping=100, seed=2016)
## =-=-=-=-= Iteration 0 Mon Jan 16 21:38:12 2017
## =-=-=-=-=
## =-=-=-=-= Iteration 100 Mon Jan 16 21:38:12 2017
## =-=-=-=-=
## =-=-=-=-= Iteration 200 Mon Jan 16 21:38:12 2017
## =-=-=-=-=
## =-=-=-=-= Iteration 300 Mon Jan 16 21:38:12 2017
## =-=-=-=-=
## =-=-=-=-= Iteration 400 Mon Jan 16 21:38:12 2017
## =-=-=-=-=
### Get a suggested number of burn-ins
burn <- SuggestBurn(0.1, bsts.model)
### Predict
p <- predict.bsts(bsts.model, horizon = 12, burn = burn, quantiles = c(.025, .975))
### Actual versus predicted
d2 <- data.frame(
# fitted values and predictions 去掉开始阶段的估计,用剩下每一列的均值作为误差,fitted = y - error
c(10^as.numeric(-colMeans(bsts.model$one.step.prediction.errors[-(1:burn),])+y),
10^as.numeric(p$mean)),
# actual data and dates
as.numeric(AirPassengers),
as.Date(time(AirPassengers)))
names(d2) <- c("Fitted", "Actual", "Date")
### MAPE (mean absolute percentage error)
MAPE <- filter(d2, year(Date)>1959) %>% summarise(MAPE=mean(abs(Actual-Fitted)/Actual))
### 95% forecast credible interval
posterior.interval <- cbind.data.frame(
10^as.numeric(p$interval[1,]),
10^as.numeric(p$interval[2,]),
subset(d2, year(Date)>1959)$Date)
names(posterior.interval) <- c("LL", "UL", "Date")
### Join intervals to the forecast
d3 <- left_join(d2, posterior.interval, by="Date")
### Plot actual versus predicted with credible intervals for the holdout period
ggplot(data=d3, aes(x=Date)) +
geom_line(aes(y=Actual, colour = "Actual"), size=1.2) +
geom_line(aes(y=Fitted, colour = "Fitted"), size=1.2, linetype=2) +
theme_bw() + theme(legend.title = element_blank()) + ylab("") + xlab("") +
geom_vline(xintercept=as.numeric(as.Date("1959-12-01")), linetype=2) +
geom_ribbon(aes(ymin=LL, ymax=UL), fill="grey", alpha=0.5) +
ggtitle(paste0("BSTS -- Holdout MAPE = ", round(100*MAPE,2), "%")) +
theme(axis.text.x=element_text(angle = -90, hjust = 0))
plot.bsts
function that comes with the bsts package.credible.interval <- cbind.data.frame(
10^as.numeric(apply(p$distribution, 2,function(f){quantile(f,0.75)})),
10^as.numeric(apply(p$distribution, 2,function(f){median(f)})),
10^as.numeric(apply(p$distribution, 2,function(f){quantile(f,0.25)})),
subset(d3, year(Date)>1959)$Date)
names(credible.interval) <- c("p75", "Median", "p25", "Date")
### Extract the components
components <- cbind.data.frame(
colMeans(bsts.model$state.contributions[-(1:burn),"trend",]),
colMeans(bsts.model$state.contributions[-(1:burn),"seasonal.12.1",]),
as.Date(time(Y)))
names(components) <- c("Trend", "Seasonality", "Date")
components <- melt(components, id="Date")
names(components) <- c("Date", "Component", "Value")
### Plot
ggplot(data=components, aes(x=Date, y=Value)) + geom_line() +
theme_bw() + theme(legend.title = element_blank()) + ylab("") + xlab("") +
facet_grid(Component ~ ., scales="free") + guides(colour=FALSE) +
theme(axis.text.x=element_text(angle = -90, hjust = 0))
Another advantage of Bayesian structural models is the ability to use spike-and-slab priors. This provides a powerful way of reducing a large set of correlated variables into a parsimonious model, while also imposing prior beliefs on the model. Furthermore, by using priors on the regressor coefficients, the model incorporates uncertainties of the coefficient estimates when producing the credible interval for the forecasts.
As the name suggests, spike and slab priors consist of two parts: the spike part and the slab part. The spike part governs the probability of a given variable being chosen for the model (i.e., having a non-zero coefficient). The slab part shrinks the non-zero coefficients toward prior expectations (often zero).
For more technical information see: CausalImpact version 1.0.3, Brodersen et al., Annals of Applied Statistics (2015).
For model selection, we are essentially using the “spike” part of the algorithm to select variables and the “slab” part to shrink the coefficients towards zero (akin to ridge regression).
### Fit the model with regressors
data(iclaims)
head(initial.claims)
## iclaimsNSA michigan.unemployment idaho.unemployment
## 2004-01-04 2.536 1.488 -0.561
## 2004-01-11 0.882 1.100 -0.992
## 2004-01-18 -0.077 1.155 -1.212
## 2004-01-25 0.135 0.530 -1.034
## 2004-02-01 0.373 0.698 -1.195
## 2004-02-08 -0.437 0.441 -1.386
## pennsylvania.unemployment unemployment.filing
## 2004-01-04 1.773 0.909
## 2004-01-11 0.900 0.148
## 2004-01-18 1.477 0.210
## 2004-01-25 1.244 -0.308
## 2004-02-01 0.643 0.570
## 2004-02-08 0.049 -0.182
## new.jersey.unemployment department.of.unemployment
## 2004-01-04 2.021 1.640
## 2004-01-11 1.280 1.014
## 2004-01-18 1.080 1.009
## 2004-01-25 1.067 0.734
## 2004-02-01 1.125 0.502
## 2004-02-08 0.615 -0.226
## illinois.unemployment rhode.island.unemployment
## 2004-01-04 0.300 1.750
## 2004-01-11 0.180 -0.011
## 2004-01-18 0.119 -0.028
## 2004-01-25 0.727 -0.230
## 2004-02-01 0.598 0.625
## 2004-02-08 -0.199 -0.401
## unemployment.office filing.unemployment
## 2004-01-04 0.498 0.073
## 2004-01-11 0.264 0.584
## 2004-01-18 0.031 0.448
## 2004-01-25 -0.143 -0.269
## 2004-02-01 -0.219 -1.006
## 2004-02-08 -0.526 -1.124
ss <- AddLocalLinearTrend(list(), initial.claims$iclaimsNSA)
ss <- AddSeasonal(ss, initial.claims$iclaimsNSA, nseasons = 52)
bsts.reg <- bsts(iclaimsNSA ~ ., state.specification = ss, data =
initial.claims, niter = 500, ping=100, seed=2016)
## =-=-=-=-= Iteration 0 Mon Jan 16 21:38:14 2017
## =-=-=-=-=
## =-=-=-=-= Iteration 100 Mon Jan 16 21:38:16 2017
## =-=-=-=-=
## =-=-=-=-= Iteration 200 Mon Jan 16 21:38:18 2017
## =-=-=-=-=
## =-=-=-=-= Iteration 300 Mon Jan 16 21:38:20 2017
## =-=-=-=-=
## =-=-=-=-= Iteration 400 Mon Jan 16 21:38:22 2017
## =-=-=-=-=
### Get the number of burn-ins to discard
burn <- SuggestBurn(0.1, bsts.reg)
### Helper function to get the positive mean of a vector
PositiveMean <- function(b) {
b <- b[abs(b) > 0]
if (length(b) > 0)
return(mean(b))
return(0)
}
### Get the average coefficients when variables were selected (non-zero slopes)
coeff <- data.frame(melt(apply(bsts.reg$coefficients[-(1:burn),], 2, PositiveMean)))
coeff$Variable <- as.character(row.names(coeff))
ggplot(data=coeff, aes(x=Variable, y=value)) +
geom_bar(stat="identity", position="identity") +
theme(axis.text.x=element_text(angle = -90, hjust = 0)) +
xlab("") + ylab("") + ggtitle("Average coefficients")
### Inclusion probabilities -- i.e., how often were the variables selected
inclusionprobs <- melt(colMeans(bsts.reg$coefficients[-(1:burn),] != 0))
inclusionprobs$Variable <- as.character(row.names(inclusionprobs))
ggplot(data=inclusionprobs, aes(x=Variable, y=value)) +
geom_bar(stat="identity", position="identity") +
theme(axis.text.x=element_text(angle = -90, hjust = 0)) +
xlab("") + ylab("") + ggtitle("Inclusion probabilities")
P75 <- function(b) {
b <- b[abs(b) > 0]
if (length(b) > 0)
return(quantile(b, 0.75))
return(0)
}
p75 <- data.frame(melt(apply(bsts.reg$coefficients[-(1:burn),], 2, P75)))
p75$Variable <- as.character(row.names(p75))
ggplot(data=p75, aes(x=Variable, y=value)) +
geom_bar(stat="identity", position="identity") +
theme(axis.text.x=element_text(angle = -90, hjust = 0)) +
xlab("") + ylab("") + ggtitle("Average coefficients")
### Get the components
components.withreg <- cbind.data.frame(
colMeans(bsts.reg$state.contributions[-(1:burn),"trend",]),
colMeans(bsts.reg$state.contributions[-(1:burn),"seasonal.52.1",]),
colMeans(bsts.reg$state.contributions[-(1:burn),"regression",]),
as.Date(time(initial.claims)))
names(components.withreg) <- c("Trend", "Seasonality", "Regression", "Date")
components.withreg <- melt(components.withreg, id.vars="Date")
names(components.withreg) <- c("Date", "Component", "Value")
ggplot(data=components.withreg, aes(x=Date, y=Value)) + geom_line() +
theme_bw() + theme(legend.title = element_blank()) + ylab("") + xlab("") +
facet_grid(Component ~ ., scales="free") + guides(colour=FALSE) +
theme(axis.text.x=element_text(angle = -90, hjust = 0))
In the example above, we used the spike-and-slab prior as a way to select variables and promote sparsity. However, we can also use this framework to impose prior beliefs on the model. These prior beliefs could come from an outside study or a previous version of the model.
In the bsts package, this is done by passing a prior object as created by the SpikeSlabPrior
function. In this example we are specifying a prior of 0.6 on the variable called unemployment.office and forcing this variable to be selected by setting its prior spike parameter to 1. We’re giving our priors a weight of 200 (measured in observation count), which is fairly large given that the dataset has 456 records. Hence we should expect the posterior to be very close to 0.6.
prior.spikes <- c(0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,1,0.1)
prior.mean <- c(0,0,0,0,0,0,0,0,0,0.6,0)
### Set up the priors
prior <- SpikeSlabPrior(x=model.matrix(iclaimsNSA ~ ., data=initial.claims),
y=initial.claims$iclaimsNSA,
prior.information.weight = 200,
prior.inclusion.probabilities = prior.spikes,
optional.coefficient.estimate = prior.mean)
### Run the bsts model with the specified priors
ss <- AddLocalLinearTrend(list(), initial.claims$iclaimsNSA)
ss <- AddSeasonal(ss, initial.claims$iclaimsNSA, nseasons = 52)
bsts.reg.priors <- bsts(iclaimsNSA ~ ., state.specification = ss,
data = initial.claims,
niter = 500,
prior=prior,
ping=0, seed=2016)
### Get the average coefficients when variables were selected (non-zero slopes)
coeff <- data.frame(melt(apply(bsts.reg.priors$coefficients[-(1:burn),], 2, PositiveMean)))
coeff$Variable <- as.character(row.names(coeff))
ggplot(data=coeff, aes(x=Variable, y=value)) +
geom_bar(stat="identity", position="identity") +
theme(axis.text.x=element_text(angle = -90, hjust = 0)) +
xlab("") + ylab("")