Author: Linsi Lin
Date: 08/02/2021





Multiple Time Series Forecasting Practice
Suitable for Forecasting at Scale
Data: R fpp3 package “aus_arrivals” dataset


options(warn=-1)
# Libraries
library(rsample)
library(parsnip)
library(recipes)
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## Attaching package: 'recipes'
## The following object is masked from 'package:stats':
## 
##     step
library(workflows)
library(dplyr)
library(tidyr)
library(sknifedatar)
## 
## Attaching package: 'sknifedatar'
## The following object is masked from 'package:rsample':
## 
##     sliding_window
library(fpp3)
## -- Attaching packages -------------------------------------------- fpp3 0.4.0 --
## v tibble      3.1.2      v tsibbledata 0.3.0 
## v lubridate   1.7.10     v feasts      0.2.2 
## v ggplot2     3.3.5      v fable       0.3.1 
## v tsibble     1.0.1
## -- Conflicts ------------------------------------------------- fpp3_conflicts --
## x lubridate::date()        masks base::date()
## x dplyr::filter()          masks stats::filter()
## x tsibble::intersect()     masks base::intersect()
## x tsibble::interval()      masks lubridate::interval()
## x dplyr::lag()             masks stats::lag()
## x fabletools::null_model() masks parsnip::null_model()
## x tsibble::setdiff()       masks base::setdiff()
## x tsibble::union()         masks base::union()
library(ggplot2)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(tsibble)
library(imputeTS)
library(tseries)
## 
## Attaching package: 'tseries'
## The following object is masked from 'package:imputeTS':
## 
##     na.remove
#Modeling
library(modeltime)
library(tidymodels)
## Registered S3 method overwritten by 'tune':
##   method                   from   
##   required_pkgs.model_spec parsnip
## -- Attaching packages -------------------------------------- tidymodels 0.1.3 --
## v broom        0.7.8     v purrr        0.3.4
## v dials        0.0.9     v tune         0.1.5
## v infer        0.5.4     v workflowsets 0.0.2
## v modeldata    0.1.0     v yardstick    0.0.8
## -- Conflicts ----------------------------------------- tidymodels_conflicts() --
## x yardstick::accuracy()         masks forecast::accuracy(), fabletools::accuracy()
## x purrr::discard()              masks scales::discard()
## x dplyr::filter()               masks stats::filter()
## x infer::generate()             masks fabletools::generate()
## x dplyr::lag()                  masks stats::lag()
## x fabletools::null_model()      masks parsnip::null_model()
## x sknifedatar::sliding_window() masks rsample::sliding_window()
## x recipes::step()               masks stats::step()
## * Use tidymodels_prefer() to resolve common conflicts.
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v readr   1.4.0     v forcats 0.5.1
## v stringr 1.4.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x lubridate::as.difftime() masks base::as.difftime()
## x readr::col_factor()      masks scales::col_factor()
## x lubridate::date()        masks base::date()
## x purrr::discard()         masks scales::discard()
## x dplyr::filter()          masks stats::filter()
## x stringr::fixed()         masks recipes::fixed()
## x tsibble::intersect()     masks lubridate::intersect(), base::intersect()
## x tsibble::interval()      masks lubridate::interval()
## x dplyr::lag()             masks stats::lag()
## x tsibble::setdiff()       masks lubridate::setdiff(), base::setdiff()
## x readr::spec()            masks yardstick::spec()
## x tsibble::union()         masks lubridate::union(), base::union()
library(timetk)
library(lubridate)


#Plotting
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(reactable)
tsdf <- aus_arrivals
head(tsdf)
## # A tsibble: 6 x 3 [1Q]
## # Key:       Origin [1]
##   Quarter Origin Arrivals
##     <qtr> <chr>     <int>
## 1 1981 Q1 Japan     14763
## 2 1981 Q2 Japan      9321
## 3 1981 Q3 Japan     10166
## 4 1981 Q4 Japan     19509
## 5 1982 Q1 Japan     17117
## 6 1982 Q2 Japan     10617
tail(tsdf)
## # A tsibble: 6 x 3 [1Q]
## # Key:       Origin [1]
##   Quarter Origin Arrivals
##     <qtr> <chr>     <int>
## 1 2011 Q2 US       101814
## 2 2011 Q3 US       101925
## 3 2011 Q4 US       127150
## 4 2012 Q1 US       129520
## 5 2012 Q2 US       105700
## 6 2012 Q3 US       106540
sum(is.na(tsdf))
## [1] 0
ggplotly(ggplot(tsdf, aes(x=Quarter,y=Arrivals))+ geom_line(aes(col = Origin)) +
           labs(y = "Arrivals", x = NULL, title = "Quarterly international arrivals to Australia  1981 Q1 -2012 Q3") +
           facet_wrap(~ Origin, scale = "free_y", ncol = 1) +
           theme_bw() +
           theme(legend.position = "none", plot.title = element_text(hjust = 0.5)))
tsdf$Quarter<-as.Date(tsdf$Quarter, format = "%Y/%m")

tsdf <- tsdf %>% 
  rename(
    value = Arrivals,
    date = Quarter
  )

nested_serie = tsdf %>% nest(nested_column=-Origin)
nested_serie
## # A tibble: 4 x 2
##   Origin nested_column     
##   <chr>  <list>            
## 1 Japan  <tibble [127 x 2]>
## 2 NZ     <tibble [127 x 2]>
## 3 UK     <tibble [127 x 2]>
## 4 US     <tibble [127 x 2]>
#Recipes for machine learning algorithms
recipe_1 <- recipe(value ~ ., data = tsdf %>% select(date,value)) %>%
  step_timeseries_signature(date) %>%
  step_rm(matches("(.iso$)|(.xts$)|(day)|(hour)|(minute)|(second)|(am.pm)")) %>%
  step_mutate(date_month = factor(date_month, ordered = TRUE)) %>%
  step_dummy(all_nominal(), one_hot = TRUE)
#Define the models

m_auto_arima <- arima_reg() %>% set_engine('auto_arima') 

m_arima_boost <- arima_boost() %>% set_engine('auto_arima_xgboost') 

m_stlm_ets <- seasonal_reg() %>% set_engine('stlm_ets') 

m_ets <- parsnip::set_engine(modeltime::exp_smoothing(), 'ets')

m_ets_croston <- parsnip::set_engine(modeltime::exp_smoothing(), 'croston')

m_ets_theta <- parsnip::set_engine(modeltime::exp_smoothing(), 'theta')

m_auto_prophet <- prophet_reg() %>%  set_engine(engine = "prophet")

m_prophet_boost <- prophet_boost() %>%  set_engine(engine = "prophet_xgboost")

m_stlm_arima <- seasonal_reg() %>% set_engine("stlm_arima")

m_mean <- window_reg() %>% set_engine('window_function', window_function = mean)

m_median <- window_reg() %>% set_engine('window_function', window_function = median)

m_naive <- naive_reg() %>% set_engine("naive")

m_snaive <- naive_reg() %>% set_engine("snaive")

m_nnetar <- workflow() %>%
  add_recipe(recipe_1) %>%
  add_model(nnetar_reg() %>% set_engine("nnetar"))
#Fit the models

model_table <- modeltime_multifit(serie = nested_serie,
                                       .prop = 0.8,
                                       m_auto_arima,
                                       m_arima_boost,
                                       m_stlm_ets,
                                       m_ets,
                                       m_ets_croston,
                                       m_ets_theta,
                                       m_auto_prophet,
                                       m_prophet_boost,
                                       m_stlm_arima,
                                       m_mean,
                                       m_median,
                                       m_naive,
                                       m_snaive,
                                       m_nnetar
                                       )
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
## Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
## Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
## Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
## Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
## Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
## Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
## Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## window_reg: Using window_size = Inf
## window_reg: Using window_size = Inf
## window_reg: Using window_size = Inf
## window_reg: Using window_size = Inf
## window_reg: Using window_size = Inf
## window_reg: Using window_size = Inf
## window_reg: Using window_size = Inf
## window_reg: Using window_size = Inf
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## 
## -- 4 models fitted <3 ----------------------------------------------------------
model_table
## $table_time
## # A tibble: 4 x 18
##   Origin nested_column m_auto_arima m_arima_boost m_stlm_ets m_ets m_ets_croston
##   <chr>  <list>        <list>       <list>        <list>     <lis> <list>       
## 1 Japan  <tibble [127~ <fit[+]>     <fit[+]>      <fit[+]>   <fit~ <fit[+]>     
## 2 NZ     <tibble [127~ <fit[+]>     <fit[+]>      <fit[+]>   <fit~ <fit[+]>     
## 3 UK     <tibble [127~ <fit[+]>     <fit[+]>      <fit[+]>   <fit~ <fit[+]>     
## 4 US     <tibble [127~ <fit[+]>     <fit[+]>      <fit[+]>   <fit~ <fit[+]>     
## # ... with 11 more variables: m_ets_theta <list>, m_auto_prophet <list>,
## #   m_prophet_boost <list>, m_stlm_arima <list>, m_mean <list>,
## #   m_median <list>, m_naive <list>, m_snaive <list>, m_nnetar <list>,
## #   nested_model <list>, calibration <list>
## 
## $models_accuracy
## # A tibble: 56 x 10
##    name_serie .model_id .model_desc        .type    mae  mape  mase smape   rmse
##    <chr>          <int> <chr>              <chr>  <dbl> <dbl> <dbl> <dbl>  <dbl>
##  1 Japan              1 ARIMA(0,1,1)(0,1,~ Test  66042.  71.7  2.68  49.0 72180.
##  2 Japan              2 ARIMA(0,1,1)(0,1,~ Test  66042.  71.7  2.68  49.0 72180.
##  3 Japan              3 SEASONAL DECOMP: ~ Test  65643.  71.8  2.66  49.0 71352.
##  4 Japan              4 ETS(M,A,M)         Test  60166.  65.6  2.44  45.9 66161.
##  5 Japan              5 CROSTON METHOD     Test  65372.  74.6  2.65  49.0 72395.
##  6 Japan              6 THETA METHOD       Test  85896.  96.6  3.49  58.8 93558.
##  7 Japan              7 PROPHET            Test  52783.  59.3  2.14  42.1 58402.
##  8 Japan              8 PROPHET            Test  52783.  59.3  2.14  42.1 58402.
##  9 Japan              9 SEASONAL DECOMP: ~ Test  86712.  95.1  3.52  58.9 94699.
## 10 Japan             10 WINDOW FUNC [INF]  Test  30251.  34.3  1.23  27.5 35090.
## # ... with 46 more rows, and 1 more variable: rsq <dbl>
#Train/Test 80/20 Splits
forecast <- modeltime_multiforecast(
  model_table$table_time,
  .prop = 0.8
)

forecast %>% 
  select(Origin, nested_forecast) %>% 
  unnest(nested_forecast) %>% 
  group_by(Origin) %>% 
  plot_modeltime_forecast(
    .legend_max_width = 12,
    .facet_ncol = 1, 
    .line_size = 0.5,
    .interactive = TRUE,
    .facet_scales = 'free_y',
    .title='Forecasting test') 
best_model <- modeltime_multibestmodel(
  .table = model_table$table_time,
  .metric = "rmse",
  .minimize = TRUE,
  .forecast = FALSE
)

best_model
## # A tibble: 4 x 19
##   Origin nested_column m_auto_arima m_arima_boost m_stlm_ets m_ets m_ets_croston
##   <chr>  <list>        <list>       <list>        <list>     <lis> <list>       
## 1 Japan  <tibble [127~ <fit[+]>     <fit[+]>      <fit[+]>   <fit~ <fit[+]>     
## 2 NZ     <tibble [127~ <fit[+]>     <fit[+]>      <fit[+]>   <fit~ <fit[+]>     
## 3 UK     <tibble [127~ <fit[+]>     <fit[+]>      <fit[+]>   <fit~ <fit[+]>     
## 4 US     <tibble [127~ <fit[+]>     <fit[+]>      <fit[+]>   <fit~ <fit[+]>     
## # ... with 12 more variables: m_ets_theta <list>, m_auto_prophet <list>,
## #   m_prophet_boost <list>, m_stlm_arima <list>, m_mean <list>,
## #   m_median <list>, m_naive <list>, m_snaive <list>, m_nnetar <list>,
## #   nested_model <list>, calibration <list>, best_model <list>
#Refit on the whole time series
model_refit <- modeltime_multirefit(models_table = best_model)
## window_reg: Using window_size = Inf
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
## frequency = 4 observations per 1 year
model_refit
## # A tibble: 4 x 19
##   Origin nested_column m_auto_arima m_arima_boost m_stlm_ets m_ets m_ets_croston
##   <chr>  <list>        <list>       <list>        <list>     <lis> <list>       
## 1 Japan  <tibble [127~ <fit[+]>     <fit[+]>      <fit[+]>   <fit~ <fit[+]>     
## 2 NZ     <tibble [127~ <fit[+]>     <fit[+]>      <fit[+]>   <fit~ <fit[+]>     
## 3 UK     <tibble [127~ <fit[+]>     <fit[+]>      <fit[+]>   <fit~ <fit[+]>     
## 4 US     <tibble [127~ <fit[+]>     <fit[+]>      <fit[+]>   <fit~ <fit[+]>     
## # ... with 12 more variables: m_ets_theta <list>, m_auto_prophet <list>,
## #   m_prophet_boost <list>, m_stlm_arima <list>, m_mean <list>,
## #   m_median <list>, m_naive <list>, m_snaive <list>, m_nnetar <list>,
## #   nested_model <list>, calibration <list>, best_model <list>
#Forecast the number of international arrivals to Australia for the next year
forecast <- modeltime_multiforecast(
  model_refit,
  .prop = 0.8,
  .h = "1 year"
)
forecasting_table <- forecast %>%
  select(Origin, nested_forecast) %>%
  unnest(nested_forecast) %>%
  group_by(.index,Origin)
#Summary table
forecasting_table %>% filter(.key=="prediction") %>% select (Origin, .key, .index,.value,.conf_lo, .conf_hi, .model_details) 
## # A tibble: 16 x 7
## # Groups:   .index, Origin [16]
##    Origin .key     .index     .value .conf_lo .conf_hi .model_details           
##    <chr>  <fct>    <date>      <dbl>    <dbl>    <dbl> <chr>                    
##  1 Japan  predict~ 2012-10-01 1.22e5   52635.  191526. WINDOW FUNC [INF]        
##  2 Japan  predict~ 2013-01-01 1.22e5   52635.  191526. WINDOW FUNC [INF]        
##  3 Japan  predict~ 2013-04-01 1.22e5   52635.  191526. WINDOW FUNC [INF]        
##  4 Japan  predict~ 2013-07-01 1.22e5   52635.  191526. WINDOW FUNC [INF]        
##  5 NZ     predict~ 2012-10-01 3.14e5  287274.  341581. SEASONAL DECOMP: ARIMA(1~
##  6 NZ     predict~ 2013-01-01 2.39e5  212245.  266552. SEASONAL DECOMP: ARIMA(1~
##  7 NZ     predict~ 2013-04-01 2.99e5  271355.  325662. SEASONAL DECOMP: ARIMA(1~
##  8 NZ     predict~ 2013-07-01 3.29e5  302158.  356465. SEASONAL DECOMP: ARIMA(1~
##  9 UK     predict~ 2012-10-01 2.02e5  166124.  238356. SNAIVE [4]               
## 10 UK     predict~ 2013-01-01 1.95e5  158524.  230756. SNAIVE [4]               
## 11 UK     predict~ 2013-04-01 9.30e4   56854.  129086. SNAIVE [4]               
## 12 UK     predict~ 2013-07-01 1.02e5   65574.  137806. SNAIVE [4]               
## 13 US     predict~ 2012-10-01 1.21e5  108258.  134260. SEASONAL DECOMP: ETS(A,N~
## 14 US     predict~ 2013-01-01 1.24e5  111354.  137356. SEASONAL DECOMP: ETS(A,N~
## 15 US     predict~ 2013-04-01 1.04e5   90846.  116849. SEASONAL DECOMP: ETS(A,N~
## 16 US     predict~ 2013-07-01 1.08e5   95123.  121125. SEASONAL DECOMP: ETS(A,N~
#Forecast plot
forecast %>% 
  select(Origin, nested_forecast) %>% 
  unnest(nested_forecast) %>% 
  group_by(Origin) %>%
  plot_modeltime_forecast(
    .legend_max_width = 12,
    .facet_ncol = 1, 
    .line_size = 0.5,
    .interactive = TRUE,
    .facet_scales = 'free_y',
    .title='Australia International Arrivals Forecasting for Next Year'
  )