Last Updated 12/09/2021.

Overview

This script fits log-normal Generalized Additive Models and ARIMA models to salmon run-size data with or without regression covariates. These models are then used to generate one-year-ahead forecasts. The data used to fit the models is split into “training” and “validation” sets to evaluate performance of the one-year-ahead forecasts, compare models, and develop weighted “ensemble” forecasts. The code repository used to generate this page and complete analyses can be found here: (link)

Setup

All analyses require R software (link) (v3.4.3) for data retrieval, data processing, and summarizing model results. Here we configure R to perform our analysis and generate our outputs

options(width = 100)
knitr::opts_chunk$set(message = FALSE)
set.seed(123)

We also need a couple of helper functions which we will load from the functions folder, which we will load using the walk() function from the purrr package (which we will install if it is not already installed).

wd_functions<-"functions"
sapply(FUN = source, paste(wd_functions, list.files(wd_functions), sep="/"))

Here we will load & install packages we need to use (needs internet connection if packages not already installed)

packages_list<-c("tidyverse"
                 ,"forecast"
                 ,"mgcv"
                 ,"ggplot2"
                 ,"MASS"
                 ,"RColorBrewer"
                 ,"kableExtra"
                 ,"gtools"
                 # ,"ggfortify"
                 ,"brms"
                 ,"bsplus"
                 # ,"rnoaa"
                 # ,"ncdf4"
                 # ,"ncdf4.helpers"
                 # ,"raster"
                 # ,"reshape2"
                 # ,"ggfortify"
                 )
install_or_load_pack(pack = packages_list)

Load Data

Here we will load and format our data for analysis. First, we will load and format our fish data for analysis. What you load is what you will forecast (so you can forecast run size or escapement)

#====================================
#get columbia Steelhead data as example
#====================================
dat<-read_csv("http://www.cbr.washington.edu/dart/cs/php/rpt/adult_annual.php?sc=1&outputFormat=csv&proj=BON&startdate=5%2F15&enddate=12%2F31")%>%
  arrange(Year)%>%
  dplyr::select(Year, Steelhead)%>%
  dplyr::rename(runsize_obs=Steelhead)%>%
  mutate(`runsize_obs`=ifelse(as.numeric(Year)<=as.numeric(format(Sys.Date(),"%Y")),`runsize_obs`,NA))%>%
  filter(!is.na(Year))

Yrlist<-data.frame(Year=c(min(dat$Year):(max(dat$Year)+1)))
             
dat<-dat%>%
  right_join(Yrlist)
    
#look at our data  
print(tail(data.frame(dat)))

##    Year runsize_obs
## 80 2017      115165
## 81 2018       99805
## 82 2019       75385
## 83 2020      111350
## 84 2021       70167
## 85 2022          NA

The section will (optionally) download from the web (or load from the data folder) NOAA Ocean Indicator data, NOAA Ocean Buoy SST data, and NOAA smoothed modeled SST data (ERSST_V5), flow data, PIT tag data, or any other covariate data not included with the fish data that could assist in forecasting.

#=========================================================
#get PIT tag survival/age data
#=========================================================
PIT<-read_csv("data/Columbia_Steelhead_SAR_DART_11_1_2021.csv")%>%
  dplyr::rename(OutmigrationYear=year)%>%
  mutate(Year=OutmigrationYear+1)%>%
  filter(Pop=="SNA")

SAR1<-data.frame(SAR1=gam(cbind(ocean1Count,juvCount-ocean1Count)~s(OutmigrationYear,k=(dim(PIT)[1])),family=binomial,data=PIT)$fitted)

PIT<-PIT%>%
  bind_cols(SAR1)

#=========================================================
#get NPGO data
#=========================================================
NPGO<-read_table("http://www.o3d.org/npgo/npgo.php",skip=29,col_names=F,comment="#")%>%
  filter(!is.na(X2))%>%
  dplyr::rename(Year=X1,Month=X2,NPGO=X3)%>%
  mutate(Year=as.numeric(Year))%>%
  group_by(Year)%>%
  add_tally()%>%
  filter(!Month>6)%>% #use only spring (Jan-June) NPGO
  #filter(!n < 12)%>% #use only complete years
  group_by(Year)%>%
  dplyr::summarise(NPGO=mean(NPGO))
#=========================================================
#get NOAA indicator data, wrangle into usable format, plot
#=========================================================
#indicators<-read_csv("https://media.fisheries.noaa.gov/2021-04/Stoplight%20csv.csv?null",skip=1)%>%
# indicators<-read_csv("data/Stoplight csv.csv",skip=1)%>%
#   filter(!is.na(`Ecosystem Indicators`))%>%
#   pivot_longer(names_to = "Year",
#                 cols=c(starts_with("1"),starts_with("2")),
#                 values_to = "value")%>%
#   pivot_wider(names_from=`Ecosystem Indicators`,values_from=value)%>%
#   mutate(Year=as.numeric(Year))

# plotdat<-indicators%>%pivot_longer(names_to = "indicators", cols = colnames(indicators)[colnames(indicators)!="Year"])
# ggplot(plotdat,aes(x=value,color=indicators))+
#   geom_density()+
#   facet_wrap(~indicators,scales="free")+
#   theme(legend.position = "none")

#==============================================================
# get NOAA sst data directly from Buoys (takes a while to run)
#==============================================================
# buoylist<-c(46229, 46211, 46041, 46029, 46050, 46097, 46098)
# years<-c(1980:2021)
# buoy_stations()%>%filter(lat > 44 & lat < 52 & lon > -130 & lon < -120)
# buoydat<-getbuoydata(buoylist = buoylist,years=years)
# write.csv(dat,"SST.csv",row.names = F)

#===================
#Plot Buoy SST data
#====================
# buoydat<-buoydat%>%
#   filter(!is.na(buoyid) & !is.na(meanSST))%>%
#   group_by(buoyid,month)%>%
#   mutate(gmeanSST=mean(meanSST),resid=meanSST-gmeanSST)
#   
# 
# ggplot(buoydat,aes(x=month,y=resid,group=factor(buoyid),color=factor(buoyid)))+
#   geom_hline(yintercept=0,linetype="dashed")+
#   scale_color_brewer(palette = "Spectral")+
#   geom_line()+
# facet_wrap(~factor(year))


#===========================================================================================================
#Get ERSST data: get_ersst_v5_data takes A LONG TIME (1 hr) vs. get_ersst_v5_data_V2 which is much quicker!
#===========================================================================================================
#dat<-get_ersst_v5_data(years=years,data.dir=getwd(),latrange=c(44,52),lonrange=c(-130,-120))
# data.dir<-"C:/Users/buehrtwb/OneDrive - Washington State Executive Branch Agencies/Documents"
# sstdat<-get_ersst_v5_data_V2(years=c(1980:2021),data.dir=data.dir ,ncfilename="SSTv5.nc",latrange=c(44,50),lonrange=c(-125,-120))
# 
# sstdat<-sstdat%>%
#   mutate(sstdiff=c(NA,diff(resid)))
# 
# #================
# #Plot ERSST data
# #================
# ggplot(sstdat,aes(x=factor(month),y=meanSST,group=year))+
#   geom_hline(yintercept=0,linetype="dashed")+
#   scale_color_brewer(palette = "Spectral")+
#   geom_line()+
#   facet_wrap(~factor(year))
# 
# ggplot(sstdat,aes(x=factor(month),y=resid,group=year))+
#   geom_hline(yintercept=0,linetype="dashed")+
#   scale_color_brewer(palette = "Spectral")+
#   geom_line()+
#   facet_wrap(~factor(year))
# 
# ggplot(sstdat,aes(x=factor(month),y=sstdiff,group=year))+
#   geom_hline(yintercept=0,linetype="dashed")+
#   scale_color_brewer(palette = "Spectral")+
#   geom_line()+
#   facet_wrap(~factor(year))
# 
# ssta<-sstdat%>%
#   dplyr::select(year,month,resid)%>%
#   mutate(month = paste0("m_",month))%>%
#   rename(Year = year)%>%
#   pivot_wider(names_from = month,values_from = resid)
# 
# pcdat<-ssta%>%
#   ungroup()%>%
#   filter(Year<2021)%>%
#   column_to_rownames(var="Year")
# mod<-prcomp(pcdat
#             )
# autoplot(mod,data=ssta%>%filter(Year<2021)%>%column_to_rownames(var="Year"),
#          x=1,
#          y=3,
#          label = TRUE, 
#          label.size = 3,
#          shape=F)

If we loaded/downloaded covariate data (other than fish abundance) for use in forecasting, in this section we will merge it with our fish data to produce a final dataframe. Note the format below; the rest of the functions and sections rely on your data being formatted this way,

dat<-dat%>%
  left_join(PIT)%>%
  left_join(NPGO)%>%
  #left_join(indicators)%>%
  #left_join(ssta)%>%
  mutate(
    # l1aprssta= lag(scale(m_04)),
    # l1mayssta= lag(scale(m_05)),
    # l1junssta= lag(scale(m_06)),
    # l1julssta= lag(scale(m_07)),
    # l1augssta= lag(scale(m_08)),
    # l1sepssta= lag(scale(m_09)),
    # l1octssta= lag(scale(m_10)),
    # l1novssta= lag(scale(m_11)),
    # l1decssta= lag(scale(m_12)),
    # l1FallSST= (lag(scale(m_10)) + lag(scale(m_11)) + lag(scale(m_12)))/3
    # PC1 = lag(mod$x[,1]),
    # PC2 = lag(mod$x[,2]),
    # PC3 = lag(mod$x[,3]),
    # lag1_summerPDO = lag(scale(`PDO\n(Sum May-Sept)`)),
    # lag1_springONI = lag(scale(`ONI\n(Average Jan-June)`)),
    # lag1_summerdeepSST = lag(scale(`Deep temperature\n(°C; May-Sept)`)),
    # lag1_summerdeepSalinity = lag(scale(`Deep salinity\n(May-Sept)`)),
    # lag1_copepodrich = lag(scale(`Deep salinity\n(May-Sept)`)),
    # lag1_icthy = lag(scale(`Nearshore Ichthyoplankton\nLog(mg C 1,000 m-3; Jan-Mar)`)),
    # lag1_cpueCO = lag(scale(log(`Steelhead salmon juvenile\ncatches (no. km-1; June)`))),
    # lag1_phystrans = lag (scale(`Physical Spring Trans.\nUI based (day of year)`)),
    # lag1_Ncope = lag(scale(`N. copepod biomass anom.\n(mg C m-3; May-Sept)`)),
    # lag1_summerSST=lag(scale(`Upper 20 m T\n(°C; May-Sept)`)),
    #lag1_PC1 = scale(lag(`Principal Component scores (PC1)`)),
    # lag1_PC2 = lag(`Principal Component scores (PC2)`),
    lag1_SAR1 = lag(scale(log(SAR1))),
    lag1_NPGO = lag(NPGO),
    lag2_NPGO = lag(NPGO,2)
    )

#select variables to include in analysis and subset data
vars<-c("lag1_SAR1","lag1_NPGO","lag2_NPGO") #what regression variables will we use?
dat<-data.frame(dat)%>%
  dplyr::select(Year,runsize_obs,all_of(vars))%>%
  filter(
    across(
      .cols = all_of(vars),
      .fns = ~ !is.na(.x)
    )
  )
#look at our data  
print(data.frame(dat))

##    Year runsize_obs  lag1_SAR1   lag1_NPGO   lag2_NPGO
## 1  2002      475701  0.8886788  2.45231570  2.06082765
## 2  2003      359110 -0.8455143  1.59086281  2.45231570
## 3  2004      307892  0.7552157  1.20852203  1.59086281
## 4  2005      313815 -0.1176001  0.40404241  1.20852203
## 5  2006      334626 -0.1891388 -1.39983012  0.40404241
## 6  2007      321256 -0.5429061 -0.54010993 -1.39983012
## 7  2008      354791  1.3219434  0.18096473 -0.54010993
## 8  2009      601758  1.3156715  1.24622108  0.18096473
## 9  2010      408336  1.4918737  0.55015918  1.24622108
## 10 2011      365006  0.3235897  1.64014982  0.55015918
## 11 2012      230548  0.5704951  0.86284576  1.64014982
## 12 2013      231139 -0.3788654  1.38407925  0.86284576
## 13 2014      321525  0.6351315  0.85483320  1.38407925
## 14 2015      264230  0.2177816 -0.31960648  0.85483320
## 15 2016      183912  0.3118033 -1.09409318 -0.31960648
## 16 2017      115165 -1.5420892 -0.06625788 -1.09409318
## 17 2018       99805  0.1459067 -0.44935285 -0.06625788
## 18 2019       75385 -0.7961517 -1.94591915 -0.44935285
## 19 2020      111350 -0.6619336 -1.96474527 -1.94591915
## 20 2021       70167 -2.6903017 -1.78676155 -1.96474527
## 21 2022          NA -0.2135904 -0.89190917 -1.78676155

Set Parameters

Here we specify parameters for our forecast training and evaluation: how many years of training data to include in the training set (validation is the rest), how many years forward to forecast (typically 1), and make some selections regarding which metric to use to evaluate forecast odel performance.

#set model fitting and evaluation params
TY <- dim(dat)[1]-12 #training years (years of training data)
FY <- 1 #forecast years (years foreward to forecast)
k = 1 #this is the model-averaging weighting exponent--default is 1, larger numbers will tend to more heavily weight "best" models over lower ranked models
stack_metric = "MSA" #this is the performance measure that will be optimized to find the best stack weights

Fit Forecast Models

Here we fit ARIMA and GAM forecasts

#==========
#ARIMA models
#==========
ARIMA100<-tsCV2(y=log(dat$runsize_obs[-dim(dat)[1]]),
            Year=dat$Year,
            xreg=NULL,
            h=FY,
            min=TY,
            type="ARIMA",
            order=c(1,0,0)
)
ARIMA100_withcov<-tsCV2(y=log(dat$runsize_obs[-dim(dat)[1]]),
            Year=dat$Year,
            xreg=dat%>%dplyr::select(all_of(vars))%>%as.matrix(),
            h=FY,
            min=TY,
            type="ARIMA",
            order=c(1,0,0)
      )

ARIMA111_withcov<-tsCV2(y=log(dat$runsize_obs[-dim(dat)[1]]),
            Year=dat$Year,
            xreg=dat%>%dplyr::select(all_of(vars))%>%as.matrix(),
            h=FY,
            min=TY,
            type="ARIMA",
            order=c(1,1,1)
)
ARIMA010_withcov<-tsCV2(y=log(dat$runsize_obs[-dim(dat)[1]]),
            Year=dat$Year,
            xreg=dat%>%dplyr::select(all_of(vars))%>%as.matrix(),
            h=FY,
            min=TY,
            type="ARIMA",
            order=c(0,1,0)
)
ARIMA010<-tsCV2(y=log(dat$runsize_obs[-dim(dat)[1]]),
            Year=dat$Year,
            xreg=NULL,
            h=FY,
            min=TY,
            type="ARIMA",
            order=c(0,1,0)
)
ARIMA111<-tsCV2(y=log(dat$runsize_obs[-dim(dat)[1]]),
            Year=dat$Year,
            xreg=NULL,
            h=FY,
            min=TY,
            type="ARIMA",
            order=c(1,1,1)
)
ARIMA101_withcov<-tsCV2(y=log(dat$runsize_obs[-dim(dat)[1]]),
            Year=dat$Year,
            xreg=dat%>%dplyr::select(all_of(vars))%>%as.matrix(),
            h=FY,
            min=TY,
            type="ARIMA",
            order=c(1,0,1)
)
ARIMA101<-tsCV2(y=log(dat$runsize_obs[-dim(dat)[1]]),
            Year=dat$Year,
            xreg=NULL,
            h=FY,
            min=TY,
            type="ARIMA",
            order=c(1,0,1)
)
ARIMA001_withcov<-tsCV2(y=log(dat$runsize_obs[-dim(dat)[1]]),
            Year=dat$Year,
            xreg=dat%>%dplyr::select(all_of(vars))%>%as.matrix(),
            h=FY,
            min=TY,
            type="ARIMA",
            order=c(0,0,1)
)
ARIMA001<-tsCV2(y=log(dat$runsize_obs[-dim(dat)[1]]),
            Year=dat$Year,
            xreg=NULL,
            h=FY,
            min=TY,
            type="ARIMA",
            order=c(0,0,1)
)
#==========
#GAM models
#==========
gam<-tsCV2(y=log(dat$runsize_obs[-dim(dat)[1]]),
            Year=dat$Year,
            xreg=NULL,
            h=FY,
            min=TY,
            type="gam",
            m=1,
            knots=dim(dat)[1]-1
)
# LFO10<-tsCV2(y=log(dat$runsize_obs[-dim(dat)[1]]),
#             Year=dat$Year,
#             xreg=dat%>%dplyr::select(all_of(vars))%>%as.matrix(),
#             h=FY,
#             min=TY,
#             type="gam",
#             m = 1,
#             knots=dim(dat)[1]-1
# )
#================================
#BRMS models with Horseshoe Prior
#================================
# gp_horseshoe<-fc3(
#   Year=dat$Year,
#   TY=TY
#   ,vars=vars
#   ,dat=dat
#   ,formula = as.formula(paste0("log(runsize_obs) ~ gp(Year) + ",paste(vars,collapse = "+")))
#   ,prior = set_prior(horseshoe(df = 1, par_ratio = 0.5), class="b")
# )

# LFO12<-fc3(
#   Year=dat$Year,
#   TY=TY
#   ,vars=vars
#   ,dat=dat
#   ,formula = as.formula(paste0("log(runsize_obs) ~ arma(p = 1, q = 1, cov=TRUE) + ",paste(vars,collapse = "+")))
#   ,prior = set_prior(horseshoe(df = 1, par_ratio = 0.5), class="b")
# )

Summarize Forecasts

Here we will summarize our forecasts for the last year of our covariate data and we will plot our observed and forecasted runsize for all models

#===================
#Summarize Forecasts
#===================
forecasts<-summarize_forecasts(
  c("ARIMA100"
    ,"ARIMA100_withcov"
    ,"ARIMA111_withcov"
    ,"ARIMA010_withcov"
    ,"ARIMA010"
    ,"ARIMA111"
    ,"ARIMA101_withcov"
    ,"ARIMA101"
    ,"ARIMA001_withcov"
    ,"ARIMA001"
    ,"gam"
    # LFO10
    #gp_horseshoe
    #LFO12
    )
)
#========================================
#Table of forecast results for final year
#========================================
forecasts%>%
  group_by(Model)%>%
  filter(Year==max(Year,na.rm = T))%>%
  kbl(caption = "Table 1. Forecasts for final year of data.",digits =0)%>%
  kable_classic(full_width = F, html_font = "Cambria")

Table 1. Forecasts for final year of data.
Model	Year	Estimate	L95	U95
ARIMA100	2022	76494	41285	141731
ARIMA100_withcov	2022	124364	70611	219038
ARIMA111_withcov	2022	108371	60446	194293
ARIMA010_withcov	2022	103070	59401	178840
ARIMA010	2022	70167	38810	126860
ARIMA111	2022	71023	37983	132804
ARIMA101_withcov	2022	117849	70455	197124
ARIMA101	2022	76305	40453	143931
ARIMA001_withcov	2022	135342	79703	229822
ARIMA001	2022	95090	38832	232850
gam	2022	75868	59961	95994

#===========================
#Plot of final year forecast
#===========================
ggplot(data=forecasts,aes(x=Year,y=Estimate,color=Model))+
  labs(title="One-Year-Ahead Forecasts")+
  geom_line(size=1)+
  scale_color_brewer(palette="Spectral")+
  geom_point(data=dat,mapping=aes(x=Year,y=runsize_obs),size=2,color="black")+
  ylim(0,NA)+
  theme_bw()

Evaluate Forecasts

Here we will evaluate forecasts using Mean Absolute Percent Error (MAPE), Root Mean Squared Error (RMSE), and Median Symmetric Accuracy (MSA). RMSE and MAPE tend to more heavily penalize over-forecasts relative to under-forecasts for abundance (because errors are right-skewed), whereas MSA is specifically tailored for right tailed distributions. We will also calculated model weights

forecast_skill<-evaluate_forecasts(forecasts=forecasts,observations=dat)
#=======================
#Table of Forecast Skill
#=======================
forecast_skill%>%
  kbl(caption = "Table 2.Forecast Performance based on full set of one-year-ahead forecasts.",digits =3)%>%
  kable_classic(full_width = F, html_font = "Cambria")

Table 2.Forecast Performance based on full set of one-year-ahead forecasts.
Model	RMSE	MAPE	MSA
ARIMA010_withcov	66946.68	30.190	31.638
ARIMA010	64953.17	32.944	33.844
gam	73469.10	36.681	37.497
ARIMA111	74524.40	37.662	38.523
ARIMA001_withcov	75886.45	41.267	38.732
ARIMA101_withcov	77954.20	40.663	38.952
ARIMA111_withcov	76432.11	39.795	40.270
ARIMA100_withcov	82317.60	43.097	41.618
ARIMA100	81086.28	46.111	44.629
ARIMA101	87572.49	49.238	47.476
ARIMA001	100970.47	71.244	60.875

Ensembles & Ensemble Performance

Calculate leave-forward-out performance year by year in order to develop weighted ensembles (with recalculated weights each year). Then evaluate performance of these one-year-ahead ensembles and compare with original models. Note that one fewer years of leave-forward-out forecasts is evaluated since an ensemble using previous years’ forecast skill can only be created for the second through last years of leave-forward-out forecasts.

ensemble_results<-evaluate_forecasts_with_ensembles(forecasts = forecasts, observations = dat)
#=================================
#Table of final year model weights
#=================================
ensemble_results$final_model_weights%>%
  dplyr::select(Model, RMSE_weight, MAPE_weight, MSA_weight, Stacking_weight)%>%
  kbl(caption = "Table 3. Final Year Model Weights based on full dataset of one-year-ahead performance.",digits =3)%>%
  kable_classic(full_width = F, html_font = "Cambria")

Table 3. Final Year Model Weights based on full dataset of one-year-ahead performance.
Model	RMSE_weight	MAPE_weight	MSA_weight	Stacking_weight
ARIMA010_withcov	0.105	0.122	0.115	1
ARIMA010	0.108	0.112	0.108	0
gam	0.096	0.101	0.097	0
ARIMA111	0.094	0.098	0.095	0
ARIMA001_withcov	0.093	0.090	0.094	0
ARIMA101_withcov	0.090	0.091	0.094	0
ARIMA111_withcov	0.092	0.093	0.091	0
ARIMA100_withcov	0.085	0.086	0.088	0
ARIMA100	0.087	0.080	0.082	0
ARIMA101	0.080	0.075	0.077	0
ARIMA001	0.070	0.052	0.060	0

#=======================================
#Table of final year ensemable forecasts
#=======================================
ensemble_results$ensembles%>%
  group_by(Model)%>%
  filter(Year==max(Year,na.rm=T))%>%
  kbl(caption = "Table 4. Ensemble forecasts one year ahead of final year of data.",digits =0)%>%
  kable_classic(full_width = F, html_font = "Cambria")

Table 4. Ensemble forecasts one year ahead of final year of data.
Year	Model	Estimate	L95	U95
2022	MSA_weighted	95903	55062	169773
2022	RMSE_weighted	95504	54613	169912
2022	MAPE_weighted	95489	54984	168457
2022	Stack_weighted	103070	59401	178840

#=============================
#calc performance of ensembles
#=============================
ensemble_results$forecast_skill%>%
  kbl(caption = "Table 5. One-year ahead performance of model & model ensemble forecasts. Ensemble weights recalculated annually using weights based on one-head performance up to to and including the year prior to the forecast year. Year one of one-year ahead fits not included since an ensemble cannot be computed for this year.",digits =3)%>%
  kable_classic(full_width = F, html_font = "Cambria")

Table 5. One-year ahead performance of model & model ensemble forecasts. Ensemble weights recalculated annually using weights based on one-head performance up to to and including the year prior to the forecast year. Year one of one-year ahead fits not included since an ensemble cannot be computed for this year.
Model	RMSE	MAPE	MSA
Stack_weighted	62394.86	27.998	27.663
ARIMA010_withcov	60972.61	30.193	31.786
ARIMA010	66731.23	35.051	36.266
gam	72752.37	38.149	39.152
ARIMA001_withcov	78606.88	44.313	41.885
ARIMA111	77587.12	40.609	41.989
ARIMA101_withcov	80842.97	43.672	42.163
MAPE_weighted	74504.86	43.211	43.010
ARIMA111_withcov	78274.34	42.276	43.086
MSA_weighted	74640.66	43.457	43.112
RMSE_weighted	75221.31	44.188	43.850
ARIMA100_withcov	84675.35	45.947	44.649
ARIMA100	84729.85	50.090	49.147
ARIMA101	91254.70	53.260	51.876
ARIMA001	105897.76	78.329	68.642

#==============
#plot ensemble forecasts
#==============
ggplot(data=ensemble_results$ensembles,aes(x=Year,y=Estimate,color=Model))+
  labs(title="One-Year-Ahead Ensemble Forecasts")+
  geom_line(size=1)+
  scale_color_brewer(palette="Spectral")+
  geom_point(data=dat,mapping=aes(x=Year,y=runsize_obs),size=2,color="black")+
  ylim(0,NA)+
  theme_bw()

Best Model

Identify the best model and plot and summarize its results

best_model <- get_best_model(
  forecasts = forecasts
  ,ensembles = ensemble_results$ensembles
  ,stack_metric = stack_metric
  ,forecast_skill = ensemble_results$forecast_skill
)

#==============
#plot Best Model 
#==============
ggplot(data=best_model,aes(x=Year,y=Estimate,color=Model))+
  geom_ribbon(aes(ymin=L95,ymax=U95,,fill=Model),color=NA,alpha=0.4)+
  facet_wrap(~Model,ncol=1)+
  labs(title="Best Model One-Year-Ahead Forecasts")+
  geom_line(size=1)+
  scale_color_brewer(palette="Spectral")+
  geom_point(data=dat,mapping=aes(x=Year,y=runsize_obs),size=2,color="black")+
  ylim(0,NA)+
  theme_bw()

best_mu<-log(best_model%>%filter(Year==max(Year))%>%dplyr::select(Estimate))%>%unlist()
best_sd<-best_model%>%filter(Year==max(Year))%>%dplyr::select(sd)%>%unlist()
best_draws=data.frame(Estimate=rlnorm(10000,mean=best_mu,sd=best_sd[1]))

#=========================
#Best model final year pdf
#=========================
ggplot(data=best_draws,aes(x=Estimate,color=NA),alpha=0.4)+
  geom_density(aes(fill="Estimate"))+
  xlim(0,quantile(best_draws$Estimate,0.995))+
  labs(title="Best Model Forecast")+
  scale_color_brewer(palette="Spectral")+
  ylim(0,NA)+
  theme_bw()

#===========================
#Best Model final year table
#===========================
best_model%>%
  group_by(Model)%>%
  filter(Year==max(Year,na.rm=T))%>%
  dplyr::select(Model,Year,Estimate,L95,U95)%>%
  kbl(caption = "Table 5. Best model forecasts one year ahead of final year of data.",digits =0)%>%
  kable_classic(full_width = F, html_font = "Cambria")

Table 5. Best model forecasts one year ahead of final year of data.
Model	Year	Estimate	L95	U95
Stack_weighted	2022	103070	59401	178840

Col. R. S. Sth. Forecast @ BON (5/15 - 12/31)

Thomas Buehrens