## Tuesday, 28 July 2015

### Forecasting with time series data

Day: Friday 31st July 2pm
Location: Bioscience Level 6
Topic: Forecasting with time series data

###########################################################################

## Forecasting with Times Series Data.

## We will use Rob Hyndman's forecast R-package.

## For more details on the package and time-series forecasting in general,
## see https://www.otexts.org/fpp

library(forecast)

##########################################################################

## Example 1.

## Our first example will use annual rainforest loss (ha) data collected
## by satellite imagery for Tocantins, Brazilian Amazonia.

DF <- c(1650,730,580,440,409,333,333,797,320,273,576,216,244,189,212,
156,158,271,124,63,107,61,49,40,52)

time <- c(1988:2012)

## First, we set-up the data in time series form.

data1 <- ts(c(t(DF)), start = 1988, end = 2012, frequency = 1)

data1

## We then split the data into training and test. Remember we
## want to forecast 2010-2012 (this will be our test data).

data_train <- window(data1, start = 1988, end = 2010-.1, frequency = 1)

data_test <- window(data1, start = 2010, end = 2012, frequency = 1)

## And finally we will fit a simple exponential smoothing model.

fit1 <- ets(data_train, model = "ANN")  # Note the "ANN".

year_forecast <- 3

fit1_forecast <- forecast(fit1, h = year_forecast) # Forecast.

plot(fit1_forecast)   # Plot the forecast.
points(time[23:25], DF[23:25], lty = 1, col = "black", lwd = 4, pch = 0)

legend("topright", legend = c("actual", "forecasted"), lwd = 4,
col = c("black","blue"), lty = c(0,0), merge = TRUE,
bty = "n", pch = c(0,19), cex = 1.3);

## The value for alpha was estimated to be 0.7587.

fit1_forecast\$model

## Other model structures.

## From our figure we see a strong downward trend, so we could add an
## additive trend in the model.

fit2 <- ets(data_train, model = "AAN") # Note the "AAN".

fit2_forecast <- forecast(fit2, h = year_forecast)

par(mfrow = c(1,2))

plot(fit1_forecast)
points(time[23:25], DF[23:25], lty = 1, col = "black", lwd = 4, pch = 0)

plot(fit2_forecast)   # Plot the "new" forecast.
points(time[23:25], DF[23:25], lty = 1, col = "black", lwd = 4, pch = 0)

legend("topright", legend = c("actual", "forecasted"), lwd = 4,
col = c("black","blue"), lty = c(0,0), merge = TRUE,
bty = "n", pch=c(0,19), cex = 1.3);

## Or just let the AIC choose the best model.

fit3 <- ets(data_train)

fit3_forecast <- forecast(fit3, h = year_forecast) # Forecast it.

par(mfrow = c(1,3))

plot(fit1_forecast)
plot(fit2_forecast)
plot(fit3_forecast)   # Plot the "new" forecast.
points(time[23:25], DF[23:25], lty = 1, col = "black", lwd = 4, pch = 0)

legend("topright", legend = c("actual", "forecasted"), lwd = 4,
col = c("black","blue"), lty = c(0,0), merge = TRUE,
bty = "n", pch = c(0,19), cex = 1.3);

## We can also predict future forest cover loss (so no test data).

data_train <- window(data1, start = 1988, end = 2012-.1, frequency = 1)

fit4 <- ets(data_train)

fit4_forecast <- forecast(fit4, h = year_forecast)

par(mfrow = c(1,1))

plot(fit4_forecast)

##########################################################################

## Example 2.

## Time Series data with seasonality. Example/data taken from
## Hyndman and Athanasopoulos (2014), see https://www.otexts.org/fpp

data(gas) # Australian monthly gas production.

par(mfrow = c(1,1))

plot(gas)

## No seasonality.

fit_gas1 <- ets(gas, model = "ANN")

year_forecast <- 3*12   # 3 years times 12 months.

fit_gas1_forecast <- forecast(fit_gas1, h = year_forecast)

plot(fit_gas1_forecast)

## With seasonality.

fit_gas2 <- ets(gas, model = "ANA")

fit_gas2_forecast <- forecast(fit_gas2, h = year_forecast)

par(mfrow = c(1,2))

plot(fit_gas1_forecast)
plot(fit_gas2_forecast)

## Let ets choose the best model using AIC.

fit_gas3 <- ets(gas)

fit_gas3_forecast <- forecast(fit_gas3, h = year_forecast)

par(mfrow = c(1,3))

plot(fit_gas1_forecast)
plot(fit_gas2_forecast)

plot(fit_gas3_forecast)