Forecasting with R Script: Graph of WHO Flu Data

# flu.R
# Original: November 2, 2018
# Last revised: December 3, 2018 

#################################################
# Prep: Go to the WHO Flumart website:
# http://apps.who.int/flumart/Default?ReportNo=12
# Select Year 2000 Week 1 to current year, week 52.
# Save file to the data directory. Change weeks below. 

#################################################
# Description:
# Script parses cvs file and provides a graph 
# with selectable yearly trend lines for 
# comparison, also includes analysis options at 
# bottom for predicting a particular day or 
# searching by cases.

# Clear memory
rm(list=ls())
gc()

#################################################
# Set Variables in Function
flu <- function(flu_file="./data/FluNetInteractiveReport.csv",
                week_start=1, week_end=52) {

  #################################################
  #PRELIMINARIES

  #Define basepath and set working directory:
  basepath = "~/Documents/programs/R/forecasting"
  setwd(basepath)

  #Preventing scientific notation in graphs
  options(scipen=999)

  #################################################
  #Libraries 
  # If library X is not installed, you can install 
  # it with this command: install.packages('X')
  library(plyr)
  library(tidyr)
  library(data.table)
  library(lubridate)
  library(stringr)
  library(ggplot2)
  library(dplyr)
  library(reshape2)
  library(corrplot)
  library(hydroGOF)
  library(Hmisc)
  library(forecast)
  library(tseries)

  #################################################
  # Import & Parse
  # Point to downloaded flu data file, variable is above.
  flumart <- read.csv(flu_file, skip=3, header=TRUE) 

  # Drop all the columns but the ones of interest
  flumart <- flumart[ -c(2,3,6:19,21) ]

  # Assign column names to something more reasonable
  colnames(flumart) <- c("Country", "Year", "Week", "Confirmed_Flu", "Prevalance")  

  # Assign the country variable from first column, second row
  country <- flumart[c(1),c(1)]

  # Incomplete years mess up correlation matrix
  flu_table <- filter(flumart, Year >= 2000)

  # Drop the non-numerical columns
  flu_table <- flu_table[,-c(1,5)]

  # Reshape the table into grid
  flu_table <- reshape(flu_table, direction="wide", idvar="Week", timevar="Year")

  # Fix column names after reshaping
  names(flu_table) <- gsub("Confirmed_Flu.", "", names(flu_table))
  
  # Put into matrix for correlations
  flu_table <- as.matrix(flu_table[,-c(1,5)])

  #################################################
  # Correlate & Plot
  flu_rcorr <- rcorr(flu_table)
  flu_coeff <- flu_rcorr$r
  flu_p <- flu_rcorr$P

  flu_matches <- flu_coeff[,ncol(flu_coeff)]
  flu_matches <- sort(flu_matches, decreasing = TRUE)
  flu_matches <- names(flu_matches)
  current_year <- as.numeric(flu_matches[1])
  matching_year1 <- as.numeric(flu_matches[2])
  matching_year2 <- as.numeric(flu_matches[3])
  matching_year3 <- as.numeric(flu_matches[4])
  matching_year4 <- as.numeric(flu_matches[5])
  matching_year5 <- as.numeric(flu_matches[6])

  #################################################
  # Prediction using ARIMA

  flu_data <- flumart # Importing initial flu data
  flu_data <- filter(flu_data, Week <= 52)
  flu_data <- flu_data[, -c(1:3,5)] # Remove Year & Week
  flu_ts <- ts(flu_data, start = 1, frequency=52) # ARIMA needs time series
  flu_data <- as.vector(flu_data)
  flu_fit <- auto.arima(flu_ts, D=1) 
  flu_pred <- forecast(flu_fit, h=52)
  flu_plot <- as.data.frame(Predicted_Mean <- (flu_pred$mean))
  flu_plot$Week <- as.numeric(1:nrow(flu_plot))
  
  flu_prediction <- ggplot() + 
    ggtitle("Predicted Flu Incidence") +
    geom_line(data = flu_plot, aes(x = Week, y = Predicted_Mean)) + 
    scale_x_continuous() + scale_y_continuous()

  #################################################
  # Graph

  # Creating a temp variable for graph
  flu_graph <- flumart

  # Filtering results for 5 year comparison, against 5 closest correlated years 
  flu_graph <- filter(flu_graph, Year == current_year | Year == matching_year1 |
                      Year == matching_year2 | Year == matching_year3 | 
                      Year ==matching_year4 | Year == matching_year5)

  # These variables need to be numerical
  flu_graph$Week <- as.numeric(flu_graph$Week)
  flu_graph$Confirmed_Flu <- as.numeric(flu_graph$Confirmed_Flu)

  # Limit to weeks of interest
  flu_graph <- filter(flu_graph, Week >= week_start)
  flu_graph <- filter(flu_graph, Week <= week_end)  

  # The variable used to color and split the data should be a factor so lines are properly drawn
  flu_graph$Year <- factor(flu_graph$Year)

  # Lays out sea_graph by day with a colored line for each year
  flu_compare <- ggplot() + 
    ggtitle(paste("Confirmed Flu in", country)) +
    geom_line(data = flu_graph, aes(x = Week, y = Confirmed_Flu, color = Year)) + 
    geom_line(data = flu_plot, aes(x = Week, y = Predicted_Mean, color="Forecast"))
    scale_x_continuous()

  flu_week <- flu_graph[nrow(flu_graph),3]
  flu_year <- flu_graph[nrow(flu_graph),2]
  summary(flu_graph)

  ###############################################
  # Printing
  # Creating a cvs file of changed data
  write.csv(flu_table, file=paste0("./output/flu-in-", country, "-table-",
                                   flu_year, "-week-", flu_week, ".csv")) 
  
  # Print flu_compare to screen
  flu_compare
  
  # Print flu
  ggsave(filename=paste0("./output/flu-in-", country, 
                         "-prediction-", flu_year, "-week-", flu_week, ".pdf"), plot=flu_prediction)
  
  # Print flu_plot to PDF
  ggsave(filename=paste0("./output/flu-in-", country, 
                         "-compare-", flu_year, "-week-", flu_week, ".pdf"), plot=flu_compare)
  
  # Print correlation matrix
  pdf(paste0("./output/flu-in-", country, "-correlation-", 
             flu_year, "-week-", flu_week, ".pdf"))
  corrplot(flu_coeff, method="pie", type="lower")
  dev.off()
    
  return(flu_graph[nrow(flu_graph),])
}

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google photo

You are commenting using your Google account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s