This time-series.R script, below, takes a set of historical time series data and does a walk using the forecast period to generate probabilistic outcomes from the data set.
Input file is a csv file with two columns (Date, Value) with dates in reverse chronological order and in ISO-8601 format. Like so:
2019-08-06,1.73
2019-08-05,1.75
2019-08-02,1.86
Output is as follows:
0.466: Bin 1 - <1.7
0.328: Bin 2 - 1.7 to <=1.9
0.144: Bin 3 - 1.9+ to <2.1
0.045: Bin 4 - 2.1 to <=2.3
0.017: Bin 5 - 2.3+
Note: Patterns in data sets will skew results. A 20-year upward trend will make higher probabilities more likely. A volatile 5-year period will produce more conservative predictions and may not capture recent trends or a recent change in direction of movement.
R Script
# time-series.R
# Original: December 4, 2018
# Last revised: December 4, 2018
#################################################
# Description: This script is for running any
# sequence of historical time-series data to make
# a forecast for five values by a particular date.
# Assumes a cvs file with two columns (Date, Value)
# with dates in reverse chronological order and in
# ISO-8601 format. Like so:
#
# 2019-08-06,1.73
# 2019-08-05,1.75
# 2019-08-02,1.86
#Clear memory and set string option for reading in data:
rm(list=ls())
gc()
#################################################
# Function
time-series <- function(time_path="./path/file.csv",
closing_date="2020-01-01", trading_days=5,
bin1=1.7, bin2=1.9,
bin3=2.1, bin4=2.3) {
#################################################
# Libraries
#
# Load libraries. If library X is not installed
# you can install it with this command at the R prompt:
# install.packages('X')
# Determine how many days until end of question
todays_date <- Sys.Date()
closing_date <- as.Date(closing_date)
remaining_weeks <- as.numeric(difftime(closing_date, todays_date, units = "weeks"))
remaining_weeks <- round(remaining_weeks, digits=0)
non_trading_days <- (7 - trading_days) * remaining_weeks
day_difference <- as.numeric(difftime(closing_date, todays_date))
remaining_days <- day_difference - non_trading_days
#################################################
# Import & Parse
# Point to time series data file and import it.
time_import <- read.csv(time_path, header=FALSE)
colnames(time_import) <- c("date", "value")
# Setting data types
time_import$date <- as.Date(time_import$date)
time_import$value <- as.vector(time_import$value)
# Setting most recent value, assuming descending data
current_value <- time_import[1,2]
# Get the length of time_import$value and shorten it by remaining_days
time_rows = length(time_import$value) - remaining_days
# Create a dataframe
time_calc <- NULL
# Iterate through value and subtract the difference
# from the row remaining days away.
for (i in 1:time_rows) {
time_calc[i] <- time_import$value[i] - time_import$value[i+remaining_days]
}
# Adjusted against current values to match time_calc
adj_bin1 <- bin1 - current_value
adj_bin2 <- bin2 - current_value
adj_bin3 <- bin3 - current_value
adj_bin4 <- bin4 - current_value
# Determine how many trading days fall in each question bin
prob1 <- round(sum(time_calc<adj_bin1)/length(time_calc), digits = 3)
prob2 <- round(sum(time_calc>=adj_bin1 & time_calc<=adj_bin2)/length(time_calc), digits = 3)
prob3 <- round(sum(time_calc>adj_bin2 & time_calc<adj_bin3)/length(time_calc), digits = 3)
prob4 <- round(sum(time_calc>=adj_bin3 & time_calc<=adj_bin4)/length(time_calc), digits = 3)
prob5 <- round(sum(time_calc>adj_bin4)/length(time_calc), digits = 3)
###############################################
# Print results
return(cat(paste0(prob1, ": Bin 1 - ", "<", bin1, "\n",
prob2, ": Bin 2 - ", bin1, " to <=", bin2, "\n",
prob3, ": Bin 3 - ", bin2, "+ to <", bin3, "\n",
prob4, ": Bin 4 - ", bin3, " to <=", bin4, "\n",
prob5, ": Bin 5 - ", bin4, "+", "\n")))
}