Multiple Time Series Forecasting Practice
Suitable for Forecasting at Scale
Data: R fpp3 package “aus_arrivals” dataset
options(warn=-1)
# Libraries
library(rsample)
library(parsnip)
library(recipes)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## Attaching package: 'recipes'
## The following object is masked from 'package:stats':
##
## step
library(workflows)
library(dplyr)
library(tidyr)
library(sknifedatar)
##
## Attaching package: 'sknifedatar'
## The following object is masked from 'package:rsample':
##
## sliding_window
library(fpp3)
## -- Attaching packages -------------------------------------------- fpp3 0.4.0 --
## v tibble 3.1.2 v tsibbledata 0.3.0
## v lubridate 1.7.10 v feasts 0.2.2
## v ggplot2 3.3.5 v fable 0.3.1
## v tsibble 1.0.1
## -- Conflicts ------------------------------------------------- fpp3_conflicts --
## x lubridate::date() masks base::date()
## x dplyr::filter() masks stats::filter()
## x tsibble::intersect() masks base::intersect()
## x tsibble::interval() masks lubridate::interval()
## x dplyr::lag() masks stats::lag()
## x fabletools::null_model() masks parsnip::null_model()
## x tsibble::setdiff() masks base::setdiff()
## x tsibble::union() masks base::union()
library(ggplot2)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(tsibble)
library(imputeTS)
library(tseries)
##
## Attaching package: 'tseries'
## The following object is masked from 'package:imputeTS':
##
## na.remove
#Modeling
library(modeltime)
library(tidymodels)
## Registered S3 method overwritten by 'tune':
## method from
## required_pkgs.model_spec parsnip
## -- Attaching packages -------------------------------------- tidymodels 0.1.3 --
## v broom 0.7.8 v purrr 0.3.4
## v dials 0.0.9 v tune 0.1.5
## v infer 0.5.4 v workflowsets 0.0.2
## v modeldata 0.1.0 v yardstick 0.0.8
## -- Conflicts ----------------------------------------- tidymodels_conflicts() --
## x yardstick::accuracy() masks forecast::accuracy(), fabletools::accuracy()
## x purrr::discard() masks scales::discard()
## x dplyr::filter() masks stats::filter()
## x infer::generate() masks fabletools::generate()
## x dplyr::lag() masks stats::lag()
## x fabletools::null_model() masks parsnip::null_model()
## x sknifedatar::sliding_window() masks rsample::sliding_window()
## x recipes::step() masks stats::step()
## * Use tidymodels_prefer() to resolve common conflicts.
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v readr 1.4.0 v forcats 0.5.1
## v stringr 1.4.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x lubridate::as.difftime() masks base::as.difftime()
## x readr::col_factor() masks scales::col_factor()
## x lubridate::date() masks base::date()
## x purrr::discard() masks scales::discard()
## x dplyr::filter() masks stats::filter()
## x stringr::fixed() masks recipes::fixed()
## x tsibble::intersect() masks lubridate::intersect(), base::intersect()
## x tsibble::interval() masks lubridate::interval()
## x dplyr::lag() masks stats::lag()
## x tsibble::setdiff() masks lubridate::setdiff(), base::setdiff()
## x readr::spec() masks yardstick::spec()
## x tsibble::union() masks lubridate::union(), base::union()
library(timetk)
library(lubridate)
#Plotting
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(reactable)
tsdf <- aus_arrivals
head(tsdf)
## # A tsibble: 6 x 3 [1Q]
## # Key: Origin [1]
## Quarter Origin Arrivals
## <qtr> <chr> <int>
## 1 1981 Q1 Japan 14763
## 2 1981 Q2 Japan 9321
## 3 1981 Q3 Japan 10166
## 4 1981 Q4 Japan 19509
## 5 1982 Q1 Japan 17117
## 6 1982 Q2 Japan 10617
tail(tsdf)
## # A tsibble: 6 x 3 [1Q]
## # Key: Origin [1]
## Quarter Origin Arrivals
## <qtr> <chr> <int>
## 1 2011 Q2 US 101814
## 2 2011 Q3 US 101925
## 3 2011 Q4 US 127150
## 4 2012 Q1 US 129520
## 5 2012 Q2 US 105700
## 6 2012 Q3 US 106540
sum(is.na(tsdf))
## [1] 0
ggplotly(ggplot(tsdf, aes(x=Quarter,y=Arrivals))+ geom_line(aes(col = Origin)) +
labs(y = "Arrivals", x = NULL, title = "Quarterly international arrivals to Australia 1981 Q1 -2012 Q3") +
facet_wrap(~ Origin, scale = "free_y", ncol = 1) +
theme_bw() +
theme(legend.position = "none", plot.title = element_text(hjust = 0.5)))
tsdf$Quarter<-as.Date(tsdf$Quarter, format = "%Y/%m")
tsdf <- tsdf %>%
rename(
value = Arrivals,
date = Quarter
)
nested_serie = tsdf %>% nest(nested_column=-Origin)
nested_serie
## # A tibble: 4 x 2
## Origin nested_column
## <chr> <list>
## 1 Japan <tibble [127 x 2]>
## 2 NZ <tibble [127 x 2]>
## 3 UK <tibble [127 x 2]>
## 4 US <tibble [127 x 2]>
#Recipes for machine learning algorithms
recipe_1 <- recipe(value ~ ., data = tsdf %>% select(date,value)) %>%
step_timeseries_signature(date) %>%
step_rm(matches("(.iso$)|(.xts$)|(day)|(hour)|(minute)|(second)|(am.pm)")) %>%
step_mutate(date_month = factor(date_month, ordered = TRUE)) %>%
step_dummy(all_nominal(), one_hot = TRUE)
#Define the models
m_auto_arima <- arima_reg() %>% set_engine('auto_arima')
m_arima_boost <- arima_boost() %>% set_engine('auto_arima_xgboost')
m_stlm_ets <- seasonal_reg() %>% set_engine('stlm_ets')
m_ets <- parsnip::set_engine(modeltime::exp_smoothing(), 'ets')
m_ets_croston <- parsnip::set_engine(modeltime::exp_smoothing(), 'croston')
m_ets_theta <- parsnip::set_engine(modeltime::exp_smoothing(), 'theta')
m_auto_prophet <- prophet_reg() %>% set_engine(engine = "prophet")
m_prophet_boost <- prophet_boost() %>% set_engine(engine = "prophet_xgboost")
m_stlm_arima <- seasonal_reg() %>% set_engine("stlm_arima")
m_mean <- window_reg() %>% set_engine('window_function', window_function = mean)
m_median <- window_reg() %>% set_engine('window_function', window_function = median)
m_naive <- naive_reg() %>% set_engine("naive")
m_snaive <- naive_reg() %>% set_engine("snaive")
m_nnetar <- workflow() %>%
add_recipe(recipe_1) %>%
add_model(nnetar_reg() %>% set_engine("nnetar"))
#Fit the models
model_table <- modeltime_multifit(serie = nested_serie,
.prop = 0.8,
m_auto_arima,
m_arima_boost,
m_stlm_ets,
m_ets,
m_ets_croston,
m_ets_theta,
m_auto_prophet,
m_prophet_boost,
m_stlm_arima,
m_mean,
m_median,
m_naive,
m_snaive,
m_nnetar
)
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
## Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
## Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
## Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
## Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
## Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
## Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
## Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## window_reg: Using window_size = Inf
## window_reg: Using window_size = Inf
## window_reg: Using window_size = Inf
## window_reg: Using window_size = Inf
## window_reg: Using window_size = Inf
## window_reg: Using window_size = Inf
## window_reg: Using window_size = Inf
## window_reg: Using window_size = Inf
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
##
## -- 4 models fitted <3 ----------------------------------------------------------
model_table
## $table_time
## # A tibble: 4 x 18
## Origin nested_column m_auto_arima m_arima_boost m_stlm_ets m_ets m_ets_croston
## <chr> <list> <list> <list> <list> <lis> <list>
## 1 Japan <tibble [127~ <fit[+]> <fit[+]> <fit[+]> <fit~ <fit[+]>
## 2 NZ <tibble [127~ <fit[+]> <fit[+]> <fit[+]> <fit~ <fit[+]>
## 3 UK <tibble [127~ <fit[+]> <fit[+]> <fit[+]> <fit~ <fit[+]>
## 4 US <tibble [127~ <fit[+]> <fit[+]> <fit[+]> <fit~ <fit[+]>
## # ... with 11 more variables: m_ets_theta <list>, m_auto_prophet <list>,
## # m_prophet_boost <list>, m_stlm_arima <list>, m_mean <list>,
## # m_median <list>, m_naive <list>, m_snaive <list>, m_nnetar <list>,
## # nested_model <list>, calibration <list>
##
## $models_accuracy
## # A tibble: 56 x 10
## name_serie .model_id .model_desc .type mae mape mase smape rmse
## <chr> <int> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Japan 1 ARIMA(0,1,1)(0,1,~ Test 66042. 71.7 2.68 49.0 72180.
## 2 Japan 2 ARIMA(0,1,1)(0,1,~ Test 66042. 71.7 2.68 49.0 72180.
## 3 Japan 3 SEASONAL DECOMP: ~ Test 65643. 71.8 2.66 49.0 71352.
## 4 Japan 4 ETS(M,A,M) Test 60166. 65.6 2.44 45.9 66161.
## 5 Japan 5 CROSTON METHOD Test 65372. 74.6 2.65 49.0 72395.
## 6 Japan 6 THETA METHOD Test 85896. 96.6 3.49 58.8 93558.
## 7 Japan 7 PROPHET Test 52783. 59.3 2.14 42.1 58402.
## 8 Japan 8 PROPHET Test 52783. 59.3 2.14 42.1 58402.
## 9 Japan 9 SEASONAL DECOMP: ~ Test 86712. 95.1 3.52 58.9 94699.
## 10 Japan 10 WINDOW FUNC [INF] Test 30251. 34.3 1.23 27.5 35090.
## # ... with 46 more rows, and 1 more variable: rsq <dbl>
#Train/Test 80/20 Splits
forecast <- modeltime_multiforecast(
model_table$table_time,
.prop = 0.8
)
forecast %>%
select(Origin, nested_forecast) %>%
unnest(nested_forecast) %>%
group_by(Origin) %>%
plot_modeltime_forecast(
.legend_max_width = 12,
.facet_ncol = 1,
.line_size = 0.5,
.interactive = TRUE,
.facet_scales = 'free_y',
.title='Forecasting test')
best_model <- modeltime_multibestmodel(
.table = model_table$table_time,
.metric = "rmse",
.minimize = TRUE,
.forecast = FALSE
)
best_model
## # A tibble: 4 x 19
## Origin nested_column m_auto_arima m_arima_boost m_stlm_ets m_ets m_ets_croston
## <chr> <list> <list> <list> <list> <lis> <list>
## 1 Japan <tibble [127~ <fit[+]> <fit[+]> <fit[+]> <fit~ <fit[+]>
## 2 NZ <tibble [127~ <fit[+]> <fit[+]> <fit[+]> <fit~ <fit[+]>
## 3 UK <tibble [127~ <fit[+]> <fit[+]> <fit[+]> <fit~ <fit[+]>
## 4 US <tibble [127~ <fit[+]> <fit[+]> <fit[+]> <fit~ <fit[+]>
## # ... with 12 more variables: m_ets_theta <list>, m_auto_prophet <list>,
## # m_prophet_boost <list>, m_stlm_arima <list>, m_mean <list>,
## # m_median <list>, m_naive <list>, m_snaive <list>, m_nnetar <list>,
## # nested_model <list>, calibration <list>, best_model <list>
#Refit on the whole time series
model_refit <- modeltime_multirefit(models_table = best_model)
## window_reg: Using window_size = Inf
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
model_refit
## # A tibble: 4 x 19
## Origin nested_column m_auto_arima m_arima_boost m_stlm_ets m_ets m_ets_croston
## <chr> <list> <list> <list> <list> <lis> <list>
## 1 Japan <tibble [127~ <fit[+]> <fit[+]> <fit[+]> <fit~ <fit[+]>
## 2 NZ <tibble [127~ <fit[+]> <fit[+]> <fit[+]> <fit~ <fit[+]>
## 3 UK <tibble [127~ <fit[+]> <fit[+]> <fit[+]> <fit~ <fit[+]>
## 4 US <tibble [127~ <fit[+]> <fit[+]> <fit[+]> <fit~ <fit[+]>
## # ... with 12 more variables: m_ets_theta <list>, m_auto_prophet <list>,
## # m_prophet_boost <list>, m_stlm_arima <list>, m_mean <list>,
## # m_median <list>, m_naive <list>, m_snaive <list>, m_nnetar <list>,
## # nested_model <list>, calibration <list>, best_model <list>
#Forecast the number of international arrivals to Australia for the next year
forecast <- modeltime_multiforecast(
model_refit,
.prop = 0.8,
.h = "1 year"
)
forecasting_table <- forecast %>%
select(Origin, nested_forecast) %>%
unnest(nested_forecast) %>%
group_by(.index,Origin)
#Summary table
forecasting_table %>% filter(.key=="prediction") %>% select (Origin, .key, .index,.value,.conf_lo, .conf_hi, .model_details)
## # A tibble: 16 x 7
## # Groups: .index, Origin [16]
## Origin .key .index .value .conf_lo .conf_hi .model_details
## <chr> <fct> <date> <dbl> <dbl> <dbl> <chr>
## 1 Japan predict~ 2012-10-01 1.22e5 52635. 191526. WINDOW FUNC [INF]
## 2 Japan predict~ 2013-01-01 1.22e5 52635. 191526. WINDOW FUNC [INF]
## 3 Japan predict~ 2013-04-01 1.22e5 52635. 191526. WINDOW FUNC [INF]
## 4 Japan predict~ 2013-07-01 1.22e5 52635. 191526. WINDOW FUNC [INF]
## 5 NZ predict~ 2012-10-01 3.14e5 287274. 341581. SEASONAL DECOMP: ARIMA(1~
## 6 NZ predict~ 2013-01-01 2.39e5 212245. 266552. SEASONAL DECOMP: ARIMA(1~
## 7 NZ predict~ 2013-04-01 2.99e5 271355. 325662. SEASONAL DECOMP: ARIMA(1~
## 8 NZ predict~ 2013-07-01 3.29e5 302158. 356465. SEASONAL DECOMP: ARIMA(1~
## 9 UK predict~ 2012-10-01 2.02e5 166124. 238356. SNAIVE [4]
## 10 UK predict~ 2013-01-01 1.95e5 158524. 230756. SNAIVE [4]
## 11 UK predict~ 2013-04-01 9.30e4 56854. 129086. SNAIVE [4]
## 12 UK predict~ 2013-07-01 1.02e5 65574. 137806. SNAIVE [4]
## 13 US predict~ 2012-10-01 1.21e5 108258. 134260. SEASONAL DECOMP: ETS(A,N~
## 14 US predict~ 2013-01-01 1.24e5 111354. 137356. SEASONAL DECOMP: ETS(A,N~
## 15 US predict~ 2013-04-01 1.04e5 90846. 116849. SEASONAL DECOMP: ETS(A,N~
## 16 US predict~ 2013-07-01 1.08e5 95123. 121125. SEASONAL DECOMP: ETS(A,N~
#Forecast plot
forecast %>%
select(Origin, nested_forecast) %>%
unnest(nested_forecast) %>%
group_by(Origin) %>%
plot_modeltime_forecast(
.legend_max_width = 12,
.facet_ncol = 1,
.line_size = 0.5,
.interactive = TRUE,
.facet_scales = 'free_y',
.title='Australia International Arrivals Forecasting for Next Year'
)