Last Updated 12/09/2021.


Overview

This script fits log-normal Generalized Additive Models and ARIMA models to salmon run-size data with or without regression covariates. These models are then used to generate one-year-ahead forecasts. The data used to fit the models is split into “training” and “validation” sets to evaluate performance of the one-year-ahead forecasts, compare models, and develop weighted “ensemble” forecasts. The code repository used to generate this page and complete analyses can be found here: (link)

Setup

All analyses require R software (link) (v3.4.3) for data retrieval, data processing, and summarizing model results. Here we configure R to perform our analysis and generate our outputs

options(width = 100)
knitr::opts_chunk$set(message = FALSE)
set.seed(123)

We also need a couple of helper functions which we will load from the functions folder, which we will load using the walk() function from the purrr package (which we will install if it is not already installed).

wd_functions<-"functions"
sapply(FUN = source, paste(wd_functions, list.files(wd_functions), sep="/"))

Here we will load & install packages we need to use (needs internet connection if packages not already installed)

packages_list<-c("tidyverse"
                 ,"forecast"
                 ,"mgcv"
                 ,"ggplot2"
                 ,"MASS"
                 ,"RColorBrewer"
                 ,"kableExtra"
                 ,"gtools"
                 # ,"ggfortify"
                 ,"brms"
                 ,"bsplus"
                 # ,"rnoaa"
                 # ,"ncdf4"
                 # ,"ncdf4.helpers"
                 # ,"raster"
                 # ,"reshape2"
                 # ,"ggfortify"
                 )
install_or_load_pack(pack = packages_list)

Load Data

Here we will load and format our data for analysis. First, we will load and format our fish data for analysis. What you load is what you will forecast (so you can forecast run size or escapement)

#====================================
#get columbia Steelhead data as example
#====================================
dat<-read_csv("http://www.cbr.washington.edu/dart/cs/php/rpt/adult_annual.php?sc=1&outputFormat=csv&proj=BON&startdate=5%2F15&enddate=12%2F31")%>%
  arrange(Year)%>%
  dplyr::select(Year, Steelhead)%>%
  dplyr::rename(runsize_obs=Steelhead)%>%
  mutate(`runsize_obs`=ifelse(as.numeric(Year)<=as.numeric(format(Sys.Date(),"%Y")),`runsize_obs`,NA))%>%
  filter(!is.na(Year))

Yrlist<-data.frame(Year=c(min(dat$Year):(max(dat$Year)+1)))
             
dat<-dat%>%
  right_join(Yrlist)
    
#look at our data  
print(tail(data.frame(dat)))
##    Year runsize_obs
## 80 2017      115165
## 81 2018       99805
## 82 2019       75385
## 83 2020      111350
## 84 2021       70167
## 85 2022          NA

The section will (optionally) download from the web (or load from the data folder) NOAA Ocean Indicator data, NOAA Ocean Buoy SST data, and NOAA smoothed modeled SST data (ERSST_V5), flow data, PIT tag data, or any other covariate data not included with the fish data that could assist in forecasting.

#=========================================================
#get PIT tag survival/age data
#=========================================================
PIT<-read_csv("data/Columbia_Steelhead_SAR_DART_11_1_2021.csv")%>%
  dplyr::rename(OutmigrationYear=year)%>%
  mutate(Year=OutmigrationYear+1)%>%
  filter(Pop=="SNA")

SAR1<-data.frame(SAR1=gam(cbind(ocean1Count,juvCount-ocean1Count)~s(OutmigrationYear,k=(dim(PIT)[1])),family=binomial,data=PIT)$fitted)

PIT<-PIT%>%
  bind_cols(SAR1)

#=========================================================
#get NPGO data
#=========================================================
NPGO<-read_table("http://www.o3d.org/npgo/npgo.php",skip=29,col_names=F,comment="#")%>%
  filter(!is.na(X2))%>%
  dplyr::rename(Year=X1,Month=X2,NPGO=X3)%>%
  mutate(Year=as.numeric(Year))%>%
  group_by(Year)%>%
  add_tally()%>%
  filter(!Month>6)%>% #use only spring (Jan-June) NPGO
  #filter(!n < 12)%>% #use only complete years
  group_by(Year)%>%
  dplyr::summarise(NPGO=mean(NPGO))
#=========================================================
#get NOAA indicator data, wrangle into usable format, plot
#=========================================================
#indicators<-read_csv("https://media.fisheries.noaa.gov/2021-04/Stoplight%20csv.csv?null",skip=1)%>%
# indicators<-read_csv("data/Stoplight csv.csv",skip=1)%>%
#   filter(!is.na(`Ecosystem Indicators`))%>%
#   pivot_longer(names_to = "Year",
#                 cols=c(starts_with("1"),starts_with("2")),
#                 values_to = "value")%>%
#   pivot_wider(names_from=`Ecosystem Indicators`,values_from=value)%>%
#   mutate(Year=as.numeric(Year))

# plotdat<-indicators%>%pivot_longer(names_to = "indicators", cols = colnames(indicators)[colnames(indicators)!="Year"])
# ggplot(plotdat,aes(x=value,color=indicators))+
#   geom_density()+
#   facet_wrap(~indicators,scales="free")+
#   theme(legend.position = "none")

#==============================================================
# get NOAA sst data directly from Buoys (takes a while to run)
#==============================================================
# buoylist<-c(46229, 46211, 46041, 46029, 46050, 46097, 46098)
# years<-c(1980:2021)
# buoy_stations()%>%filter(lat > 44 & lat < 52 & lon > -130 & lon < -120)
# buoydat<-getbuoydata(buoylist = buoylist,years=years)
# write.csv(dat,"SST.csv",row.names = F)

#===================
#Plot Buoy SST data
#====================
# buoydat<-buoydat%>%
#   filter(!is.na(buoyid) & !is.na(meanSST))%>%
#   group_by(buoyid,month)%>%
#   mutate(gmeanSST=mean(meanSST),resid=meanSST-gmeanSST)
#   
# 
# ggplot(buoydat,aes(x=month,y=resid,group=factor(buoyid),color=factor(buoyid)))+
#   geom_hline(yintercept=0,linetype="dashed")+
#   scale_color_brewer(palette = "Spectral")+
#   geom_line()+
# facet_wrap(~factor(year))


#===========================================================================================================
#Get ERSST data: get_ersst_v5_data takes A LONG TIME (1 hr) vs. get_ersst_v5_data_V2 which is much quicker!
#===========================================================================================================
#dat<-get_ersst_v5_data(years=years,data.dir=getwd(),latrange=c(44,52),lonrange=c(-130,-120))
# data.dir<-"C:/Users/buehrtwb/OneDrive - Washington State Executive Branch Agencies/Documents"
# sstdat<-get_ersst_v5_data_V2(years=c(1980:2021),data.dir=data.dir ,ncfilename="SSTv5.nc",latrange=c(44,50),lonrange=c(-125,-120))
# 
# sstdat<-sstdat%>%
#   mutate(sstdiff=c(NA,diff(resid)))
# 
# #================
# #Plot ERSST data
# #================
# ggplot(sstdat,aes(x=factor(month),y=meanSST,group=year))+
#   geom_hline(yintercept=0,linetype="dashed")+
#   scale_color_brewer(palette = "Spectral")+
#   geom_line()+
#   facet_wrap(~factor(year))
# 
# ggplot(sstdat,aes(x=factor(month),y=resid,group=year))+
#   geom_hline(yintercept=0,linetype="dashed")+
#   scale_color_brewer(palette = "Spectral")+
#   geom_line()+
#   facet_wrap(~factor(year))
# 
# ggplot(sstdat,aes(x=factor(month),y=sstdiff,group=year))+
#   geom_hline(yintercept=0,linetype="dashed")+
#   scale_color_brewer(palette = "Spectral")+
#   geom_line()+
#   facet_wrap(~factor(year))
# 
# ssta<-sstdat%>%
#   dplyr::select(year,month,resid)%>%
#   mutate(month = paste0("m_",month))%>%
#   rename(Year = year)%>%
#   pivot_wider(names_from = month,values_from = resid)
# 
# pcdat<-ssta%>%
#   ungroup()%>%
#   filter(Year<2021)%>%
#   column_to_rownames(var="Year")
# mod<-prcomp(pcdat
#             )
# autoplot(mod,data=ssta%>%filter(Year<2021)%>%column_to_rownames(var="Year"),
#          x=1,
#          y=3,
#          label = TRUE, 
#          label.size = 3,
#          shape=F)

If we loaded/downloaded covariate data (other than fish abundance) for use in forecasting, in this section we will merge it with our fish data to produce a final dataframe. Note the format below; the rest of the functions and sections rely on your data being formatted this way,

dat<-dat%>%
  left_join(PIT)%>%
  left_join(NPGO)%>%
  #left_join(indicators)%>%
  #left_join(ssta)%>%
  mutate(
    # l1aprssta= lag(scale(m_04)),
    # l1mayssta= lag(scale(m_05)),
    # l1junssta= lag(scale(m_06)),
    # l1julssta= lag(scale(m_07)),
    # l1augssta= lag(scale(m_08)),
    # l1sepssta= lag(scale(m_09)),
    # l1octssta= lag(scale(m_10)),
    # l1novssta= lag(scale(m_11)),
    # l1decssta= lag(scale(m_12)),
    # l1FallSST= (lag(scale(m_10)) + lag(scale(m_11)) + lag(scale(m_12)))/3
    # PC1 = lag(mod$x[,1]),
    # PC2 = lag(mod$x[,2]),
    # PC3 = lag(mod$x[,3]),
    # lag1_summerPDO = lag(scale(`PDO\n(Sum May-Sept)`)),
    # lag1_springONI = lag(scale(`ONI\n(Average Jan-June)`)),
    # lag1_summerdeepSST = lag(scale(`Deep temperature\n(°C; May-Sept)`)),
    # lag1_summerdeepSalinity = lag(scale(`Deep salinity\n(May-Sept)`)),
    # lag1_copepodrich = lag(scale(`Deep salinity\n(May-Sept)`)),
    # lag1_icthy = lag(scale(`Nearshore Ichthyoplankton\nLog(mg C 1,000 m-3; Jan-Mar)`)),
    # lag1_cpueCO = lag(scale(log(`Steelhead salmon juvenile\ncatches (no. km-1; June)`))),
    # lag1_phystrans = lag (scale(`Physical Spring Trans.\nUI based (day of year)`)),
    # lag1_Ncope = lag(scale(`N. copepod biomass anom.\n(mg C m-3; May-Sept)`)),
    # lag1_summerSST=lag(scale(`Upper 20 m T\n(°C; May-Sept)`)),
    #lag1_PC1 = scale(lag(`Principal Component scores (PC1)`)),
    # lag1_PC2 = lag(`Principal Component scores (PC2)`),
    lag1_SAR1 = lag(scale(log(SAR1))),
    lag1_NPGO = lag(NPGO),
    lag2_NPGO = lag(NPGO,2)
    )

#select variables to include in analysis and subset data
vars<-c("lag1_SAR1","lag1_NPGO","lag2_NPGO") #what regression variables will we use?
dat<-data.frame(dat)%>%
  dplyr::select(Year,runsize_obs,all_of(vars))%>%
  filter(
    across(
      .cols = all_of(vars),
      .fns = ~ !is.na(.x)
    )
  )
#look at our data  
print(data.frame(dat))
##    Year runsize_obs  lag1_SAR1   lag1_NPGO   lag2_NPGO
## 1  2002      475701  0.8886788  2.45231570  2.06082765
## 2  2003      359110 -0.8455143  1.59086281  2.45231570
## 3  2004      307892  0.7552157  1.20852203  1.59086281
## 4  2005      313815 -0.1176001  0.40404241  1.20852203
## 5  2006      334626 -0.1891388 -1.39983012  0.40404241
## 6  2007      321256 -0.5429061 -0.54010993 -1.39983012
## 7  2008      354791  1.3219434  0.18096473 -0.54010993
## 8  2009      601758  1.3156715  1.24622108  0.18096473
## 9  2010      408336  1.4918737  0.55015918  1.24622108
## 10 2011      365006  0.3235897  1.64014982  0.55015918
## 11 2012      230548  0.5704951  0.86284576  1.64014982
## 12 2013      231139 -0.3788654  1.38407925  0.86284576
## 13 2014      321525  0.6351315  0.85483320  1.38407925
## 14 2015      264230  0.2177816 -0.31960648  0.85483320
## 15 2016      183912  0.3118033 -1.09409318 -0.31960648
## 16 2017      115165 -1.5420892 -0.06625788 -1.09409318
## 17 2018       99805  0.1459067 -0.44935285 -0.06625788
## 18 2019       75385 -0.7961517 -1.94591915 -0.44935285
## 19 2020      111350 -0.6619336 -1.96474527 -1.94591915
## 20 2021       70167 -2.6903017 -1.78676155 -1.96474527
## 21 2022          NA -0.2135904 -0.89190917 -1.78676155

Set Parameters

Here we specify parameters for our forecast training and evaluation: how many years of training data to include in the training set (validation is the rest), how many years forward to forecast (typically 1), and make some selections regarding which metric to use to evaluate forecast odel performance.

#set model fitting and evaluation params
TY <- dim(dat)[1]-12 #training years (years of training data)
FY <- 1 #forecast years (years foreward to forecast)
k = 1 #this is the model-averaging weighting exponent--default is 1, larger numbers will tend to more heavily weight "best" models over lower ranked models
stack_metric = "MSA" #this is the performance measure that will be optimized to find the best stack weights

Fit Forecast Models

Here we fit ARIMA and GAM forecasts

#==========
#ARIMA models
#==========
ARIMA100<-tsCV2(y=log(dat$runsize_obs[-dim(dat)[1]]),
            Year=dat$Year,
            xreg=NULL,
            h=FY,
            min=TY,
            type="ARIMA",
            order=c(1,0,0)
)
ARIMA100_withcov<-tsCV2(y=log(dat$runsize_obs[-dim(dat)[1]]),
            Year=dat$Year,
            xreg=dat%>%dplyr::select(all_of(vars))%>%as.matrix(),
            h=FY,
            min=TY,
            type="ARIMA",
            order=c(1,0,0)
      )

ARIMA111_withcov<-tsCV2(y=log(dat$runsize_obs[-dim(dat)[1]]),
            Year=dat$Year,
            xreg=dat%>%dplyr::select(all_of(vars))%>%as.matrix(),
            h=FY,
            min=TY,
            type="ARIMA",
            order=c(1,1,1)
)
ARIMA010_withcov<-tsCV2(y=log(dat$runsize_obs[-dim(dat)[1]]),
            Year=dat$Year,
            xreg=dat%>%dplyr::select(all_of(vars))%>%as.matrix(),
            h=FY,
            min=TY,
            type="ARIMA",
            order=c(0,1,0)
)
ARIMA010<-tsCV2(y=log(dat$runsize_obs[-dim(dat)[1]]),
            Year=dat$Year,
            xreg=NULL,
            h=FY,
            min=TY,
            type="ARIMA",
            order=c(0,1,0)
)
ARIMA111<-tsCV2(y=log(dat$runsize_obs[-dim(dat)[1]]),
            Year=dat$Year,
            xreg=NULL,
            h=FY,
            min=TY,
            type="ARIMA",
            order=c(1,1,1)
)
ARIMA101_withcov<-tsCV2(y=log(dat$runsize_obs[-dim(dat)[1]]),
            Year=dat$Year,
            xreg=dat%>%dplyr::select(all_of(vars))%>%as.matrix(),
            h=FY,
            min=TY,
            type="ARIMA",
            order=c(1,0,1)
)
ARIMA101<-tsCV2(y=log(dat$runsize_obs[-dim(dat)[1]]),
            Year=dat$Year,
            xreg=NULL,
            h=FY,
            min=TY,
            type="ARIMA",
            order=c(1,0,1)
)
ARIMA001_withcov<-tsCV2(y=log(dat$runsize_obs[-dim(dat)[1]]),
            Year=dat$Year,
            xreg=dat%>%dplyr::select(all_of(vars))%>%as.matrix(),
            h=FY,
            min=TY,
            type="ARIMA",
            order=c(0,0,1)
)
ARIMA001<-tsCV2(y=log(dat$runsize_obs[-dim(dat)[1]]),
            Year=dat$Year,
            xreg=NULL,
            h=FY,
            min=TY,
            type="ARIMA",
            order=c(0,0,1)
)
#==========
#GAM models
#==========
gam<-tsCV2(y=log(dat$runsize_obs[-dim(dat)[1]]),
            Year=dat$Year,
            xreg=NULL,
            h=FY,
            min=TY,
            type="gam",
            m=1,
            knots=dim(dat)[1]-1
)
# LFO10<-tsCV2(y=log(dat$runsize_obs[-dim(dat)[1]]),
#             Year=dat$Year,
#             xreg=dat%>%dplyr::select(all_of(vars))%>%as.matrix(),
#             h=FY,
#             min=TY,
#             type="gam",
#             m = 1,
#             knots=dim(dat)[1]-1
# )
#================================
#BRMS models with Horseshoe Prior
#================================
# gp_horseshoe<-fc3(
#   Year=dat$Year,
#   TY=TY
#   ,vars=vars
#   ,dat=dat
#   ,formula = as.formula(paste0("log(runsize_obs) ~ gp(Year) + ",paste(vars,collapse = "+")))
#   ,prior = set_prior(horseshoe(df = 1, par_ratio = 0.5), class="b")
# )

# LFO12<-fc3(
#   Year=dat$Year,
#   TY=TY
#   ,vars=vars
#   ,dat=dat
#   ,formula = as.formula(paste0("log(runsize_obs) ~ arma(p = 1, q = 1, cov=TRUE) + ",paste(vars,collapse = "+")))
#   ,prior = set_prior(horseshoe(df = 1, par_ratio = 0.5), class="b")
# )

Summarize Forecasts

Here we will summarize our forecasts for the last year of our covariate data and we will plot our observed and forecasted runsize for all models

#===================
#Summarize Forecasts
#===================
forecasts<-summarize_forecasts(
  c("ARIMA100"
    ,"ARIMA100_withcov"
    ,"ARIMA111_withcov"
    ,"ARIMA010_withcov"
    ,"ARIMA010"
    ,"ARIMA111"
    ,"ARIMA101_withcov"
    ,"ARIMA101"
    ,"ARIMA001_withcov"
    ,"ARIMA001"
    ,"gam"
    # LFO10
    #gp_horseshoe
    #LFO12
    )
)
#========================================
#Table of forecast results for final year
#========================================
forecasts%>%
  group_by(Model)%>%
  filter(Year==max(Year,na.rm = T))%>%
  kbl(caption = "Table 1. Forecasts for final year of data.",digits =0)%>%
  kable_classic(full_width = F, html_font = "Cambria")
Table 1. Forecasts for final year of data.
Model Year Estimate L95 U95
ARIMA100 2022 76494 41285 141731
ARIMA100_withcov 2022 124364 70611 219038
ARIMA111_withcov 2022 108371 60446 194293
ARIMA010_withcov 2022 103070 59401 178840
ARIMA010 2022 70167 38810 126860
ARIMA111 2022 71023 37983 132804
ARIMA101_withcov 2022 117849 70455 197124
ARIMA101 2022 76305 40453 143931
ARIMA001_withcov 2022 135342 79703 229822
ARIMA001 2022 95090 38832 232850
gam 2022 75868 59961 95994
#===========================
#Plot of final year forecast
#===========================
ggplot(data=forecasts,aes(x=Year,y=Estimate,color=Model))+
  labs(title="One-Year-Ahead Forecasts")+
  geom_line(size=1)+
  scale_color_brewer(palette="Spectral")+
  geom_point(data=dat,mapping=aes(x=Year,y=runsize_obs),size=2,color="black")+
  ylim(0,NA)+
  theme_bw()

Evaluate Forecasts

Here we will evaluate forecasts using Mean Absolute Percent Error (MAPE), Root Mean Squared Error (RMSE), and Median Symmetric Accuracy (MSA). RMSE and MAPE tend to more heavily penalize over-forecasts relative to under-forecasts for abundance (because errors are right-skewed), whereas MSA is specifically tailored for right tailed distributions. We will also calculated model weights

forecast_skill<-evaluate_forecasts(forecasts=forecasts,observations=dat)
#=======================
#Table of Forecast Skill
#=======================
forecast_skill%>%
  kbl(caption = "Table 2.Forecast Performance based on full set of one-year-ahead forecasts.",digits =3)%>%
  kable_classic(full_width = F, html_font = "Cambria")
Table 2.Forecast Performance based on full set of one-year-ahead forecasts.
Model RMSE MAPE MSA
ARIMA010_withcov 66946.68 30.190 31.638
ARIMA010 64953.17 32.944 33.844
gam 73469.10 36.681 37.497
ARIMA111 74524.40 37.662 38.523
ARIMA001_withcov 75886.45 41.267 38.732
ARIMA101_withcov 77954.20 40.663 38.952
ARIMA111_withcov 76432.11 39.795 40.270
ARIMA100_withcov 82317.60 43.097 41.618
ARIMA100 81086.28 46.111 44.629
ARIMA101 87572.49 49.238 47.476
ARIMA001 100970.47 71.244 60.875

Ensembles & Ensemble Performance

Calculate leave-forward-out performance year by year in order to develop weighted ensembles (with recalculated weights each year). Then evaluate performance of these one-year-ahead ensembles and compare with original models. Note that one fewer years of leave-forward-out forecasts is evaluated since an ensemble using previous years’ forecast skill can only be created for the second through last years of leave-forward-out forecasts.

ensemble_results<-evaluate_forecasts_with_ensembles(forecasts = forecasts, observations = dat)
#=================================
#Table of final year model weights
#=================================
ensemble_results$final_model_weights%>%
  dplyr::select(Model, RMSE_weight, MAPE_weight, MSA_weight, Stacking_weight)%>%
  kbl(caption = "Table 3. Final Year Model Weights based on full dataset of one-year-ahead performance.",digits =3)%>%
  kable_classic(full_width = F, html_font = "Cambria")
Table 3. Final Year Model Weights based on full dataset of one-year-ahead performance.
Model RMSE_weight MAPE_weight MSA_weight Stacking_weight
ARIMA010_withcov 0.105 0.122 0.115 1
ARIMA010 0.108 0.112 0.108 0
gam 0.096 0.101 0.097 0
ARIMA111 0.094 0.098 0.095 0
ARIMA001_withcov 0.093 0.090 0.094 0
ARIMA101_withcov 0.090 0.091 0.094 0
ARIMA111_withcov 0.092 0.093 0.091 0
ARIMA100_withcov 0.085 0.086 0.088 0
ARIMA100 0.087 0.080 0.082 0
ARIMA101 0.080 0.075 0.077 0
ARIMA001 0.070 0.052 0.060 0
#=======================================
#Table of final year ensemable forecasts
#=======================================
ensemble_results$ensembles%>%
  group_by(Model)%>%
  filter(Year==max(Year,na.rm=T))%>%
  kbl(caption = "Table 4. Ensemble forecasts one year ahead of final year of data.",digits =0)%>%
  kable_classic(full_width = F, html_font = "Cambria")
Table 4. Ensemble forecasts one year ahead of final year of data.
Year Model Estimate L95 U95
2022 MSA_weighted 95903 55062 169773
2022 RMSE_weighted 95504 54613 169912
2022 MAPE_weighted 95489 54984 168457
2022 Stack_weighted 103070 59401 178840
#=============================
#calc performance of ensembles
#=============================
ensemble_results$forecast_skill%>%
  kbl(caption = "Table 5. One-year ahead performance of model & model ensemble forecasts. Ensemble weights recalculated annually using weights based on one-head performance up to to and including the year prior to the forecast year. Year one of one-year ahead fits not included since an ensemble cannot be computed for this year.",digits =3)%>%
  kable_classic(full_width = F, html_font = "Cambria")
Table 5. One-year ahead performance of model & model ensemble forecasts. Ensemble weights recalculated annually using weights based on one-head performance up to to and including the year prior to the forecast year. Year one of one-year ahead fits not included since an ensemble cannot be computed for this year.
Model RMSE MAPE MSA
Stack_weighted 62394.86 27.998 27.663
ARIMA010_withcov 60972.61 30.193 31.786
ARIMA010 66731.23 35.051 36.266
gam 72752.37 38.149 39.152
ARIMA001_withcov 78606.88 44.313 41.885
ARIMA111 77587.12 40.609 41.989
ARIMA101_withcov 80842.97 43.672 42.163
MAPE_weighted 74504.86 43.211 43.010
ARIMA111_withcov 78274.34 42.276 43.086
MSA_weighted 74640.66 43.457 43.112
RMSE_weighted 75221.31 44.188 43.850
ARIMA100_withcov 84675.35 45.947 44.649
ARIMA100 84729.85 50.090 49.147
ARIMA101 91254.70 53.260 51.876
ARIMA001 105897.76 78.329 68.642
#==============
#plot ensemble forecasts
#==============
ggplot(data=ensemble_results$ensembles,aes(x=Year,y=Estimate,color=Model))+
  labs(title="One-Year-Ahead Ensemble Forecasts")+
  geom_line(size=1)+
  scale_color_brewer(palette="Spectral")+
  geom_point(data=dat,mapping=aes(x=Year,y=runsize_obs),size=2,color="black")+
  ylim(0,NA)+
  theme_bw()

Best Model

Identify the best model and plot and summarize its results

best_model <- get_best_model(
  forecasts = forecasts
  ,ensembles = ensemble_results$ensembles
  ,stack_metric = stack_metric
  ,forecast_skill = ensemble_results$forecast_skill
)

#==============
#plot Best Model 
#==============
ggplot(data=best_model,aes(x=Year,y=Estimate,color=Model))+
  geom_ribbon(aes(ymin=L95,ymax=U95,,fill=Model),color=NA,alpha=0.4)+
  facet_wrap(~Model,ncol=1)+
  labs(title="Best Model One-Year-Ahead Forecasts")+
  geom_line(size=1)+
  scale_color_brewer(palette="Spectral")+
  geom_point(data=dat,mapping=aes(x=Year,y=runsize_obs),size=2,color="black")+
  ylim(0,NA)+
  theme_bw()

best_mu<-log(best_model%>%filter(Year==max(Year))%>%dplyr::select(Estimate))%>%unlist()
best_sd<-best_model%>%filter(Year==max(Year))%>%dplyr::select(sd)%>%unlist()
best_draws=data.frame(Estimate=rlnorm(10000,mean=best_mu,sd=best_sd[1]))

#=========================
#Best model final year pdf
#=========================
ggplot(data=best_draws,aes(x=Estimate,color=NA),alpha=0.4)+
  geom_density(aes(fill="Estimate"))+
  xlim(0,quantile(best_draws$Estimate,0.995))+
  labs(title="Best Model Forecast")+
  scale_color_brewer(palette="Spectral")+
  ylim(0,NA)+
  theme_bw()

#===========================
#Best Model final year table
#===========================
best_model%>%
  group_by(Model)%>%
  filter(Year==max(Year,na.rm=T))%>%
  dplyr::select(Model,Year,Estimate,L95,U95)%>%
  kbl(caption = "Table 5. Best model forecasts one year ahead of final year of data.",digits =0)%>%
  kable_classic(full_width = F, html_font = "Cambria")
Table 5. Best model forecasts one year ahead of final year of data.
Model Year Estimate L95 U95
Stack_weighted 2022 103070 59401 178840