# fm_casestudy_1_0_DownloadData.r
#
# * Install/load R packages
# * Collect historical financial data from internet
# * Create time series data matrix: casestudy1.data0.0
# Closing prices on stocks (BAC, GE, JDSU, XOM)
# Closing values of indexes (SP500)
# Yields on constant maturity US rates/bonds (3MO, 1YR, 5YR, 10 YR)
# Closing price on crude oil spot price
# 0. Install and load packages ----
#
# 0.1 Install packages ---
# Set ind.install0 to TRUE if running script for first time on a computer
# or updating the packages
ind.install0<-FALSE
#
if (ind.install0){
install.packages("quantmod")
install.packages("tseries")
install.packages("vars")
install.packages("fxregime")
install.packages("coefplot")
}
# 0.2 Load packages into R session
library("quantmod")
## Warning: package 'quantmod' was built under R version 3.1.3
## Loading required package: xts
## Warning: package 'xts' was built under R version 3.1.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.1.3
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Loading required package: TTR
## Warning: package 'TTR' was built under R version 3.1.3
## Version 0.4-0 included new data defaults. See ?getSymbols.
library("tseries")
## Warning: package 'tseries' was built under R version 3.1.3
library("vars")
## Warning: package 'vars' was built under R version 3.1.3
## Loading required package: MASS
## Warning: package 'MASS' was built under R version 3.1.3
## Loading required package: strucchange
## Warning: package 'strucchange' was built under R version 3.1.3
## Loading required package: sandwich
## Warning: package 'sandwich' was built under R version 3.1.3
## Loading required package: urca
## Warning: package 'urca' was built under R version 3.1.3
## Loading required package: lmtest
## Warning: package 'lmtest' was built under R version 3.1.3
library("fxregime")
## Warning: package 'fxregime' was built under R version 3.1.3
# 1. Load data into R session ----
# 1.1 Stock Price Data from Yahoo
# Apply quantmod(sub-package TTR) function
# getYahoodata
#
# Returns historical data for any symbol at the website
# http://finance.yahoo.com
#
# 1.1.1 Set start and end date for collection in YYYYMMDD (numeric) format
date.start<-20000101
date.end<-20150331
# format(Sys.Date(),"%Y%m%d")
# 1.1.2 Collect historical data for S&P 500 Index
SP500 <- getYahooData("^GSPC", start=date.start, end=date.end)
chartSeries(SP500[,1:5])
# 1.1.3 Collect historical data for 4 stocks
GE <- getYahooData("GE", start=date.start, end=date.end)
BAC <- getYahooData("BAC", start=date.start, end=date.end)
JDSU <- getYahooData("JDSU", start=date.start, end=date.end)
XOM <- getYahooData("XOM", start=date.start, end=date.end)
AEP <- getYahooData("AEP", start=date.start, end=date.end)
DUK <- getYahooData("DUK", start=date.start, end=date.end)
chartSeries(GE[,1:5])
chartSeries(BAC[,1:5])
chartSeries(JDSU[,1:5])
chartSeries(XOM[,1:5])
chartSeries(AEP[,1:5])
chartSeries(DUK[,1:5])
# 1.1.4 Details of data object GE from getYahoodata
#
# GE is a matrix object with
# row dimension equal to the number of dates
# column dimension equal to 9
is.matrix(GE)
## [1] TRUE
dim(GE)
## [1] 3834 9
# Print out the first and last parts of the matrix:
head(GE)
## Open High Low Close Volume Unadj.Close Div
## 2000-01-03 32.00652 32.15034 31.20898 31.37894 35166578 150.0000 NA
## 2000-01-04 30.80366 30.96055 30.12378 30.12378 35248799 144.0000 NA
## 2000-01-05 30.07149 30.75136 29.82306 30.07149 43489038 143.7500 NA
## 2000-01-06 29.94074 30.73829 29.83615 30.47353 31666460 145.6719 NA
## 2000-01-07 30.96055 31.77118 30.75136 31.65351 32093817 151.3125 NA
## 2000-01-10 31.94114 32.22879 31.61428 31.64044 24262291 151.2500 NA
## Split Adj.Div
## 2000-01-03 NA NA
## 2000-01-04 NA NA
## 2000-01-05 NA NA
## 2000-01-06 NA NA
## 2000-01-07 NA NA
## 2000-01-10 NA NA
tail(GE)
## Open High Low Close Volume Unadj.Close Div Split Adj.Div
## 2015-03-24 25.38 25.48 25.27 25.27 25801300 25.27 NA NA NA
## 2015-03-25 25.23 25.33 24.91 24.91 34897400 24.91 NA NA NA
## 2015-03-26 24.80 24.92 24.67 24.80 32504400 24.80 NA NA NA
## 2015-03-27 24.92 24.92 24.71 24.86 28320600 24.86 NA NA NA
## 2015-03-30 24.98 25.20 24.97 25.12 27281400 25.12 NA NA NA
## 2015-03-31 25.09 25.09 24.81 24.81 34940900 24.81 NA NA NA
# Some attributes of the object GE
mode(GE) # storage mode of GE is "numeric"
## [1] "numeric"
class(GE) # object-oriented class(es) of GE are "xts" and "zoo"
## [1] "xts" "zoo"
# xts is an extensible time-series object from the package xts
# zoo is an object storing ordered observations in a vector or matrix with an index attribute
# Important zoo functions
# coredata() extracts or replaces core data
# index() extracts or replaces the (sort.by) index of the object
# 1.2 Federal Reserve Economic Data (FRED) from the St. Louis Federal Reserve
# Apply quantmod function
# getSymbols( seriesname, src="FRED")
#
# Returns historical data for any symbol at the website
# http://research.stlouisfed.org/fred2/
#
# Series name | Description
#
# DGS3MO | 3-Month Treasury, constant maturity rate
# DGS1 | 1-Year Treasury, constant maturity rate
# DGS5 | 5-Year Treasury, constant maturity rate
# DGS10 | 10-Year Treasury, constant maturity rate
#
# DAAA | Moody's Seasoned Aaa Corporate Bond Yield
# DBAA | Moody's Seasoned Baa Corporate Bond Yield
#
# DCOILWTICO | Crude Oil Prices: West Text Intermediate (WTI) - Cushing, Oklahoma
#
# 1.2.1 Default setting collects entire series
# and assigns to object of same name as the series
getSymbols("DGS3MO", src="FRED")
## As of 0.4-0, 'getSymbols' uses env=parent.frame() and
## auto.assign=TRUE by default.
##
## This behavior will be phased out in 0.5-0 when the call will
## default to use auto.assign=FALSE. getOption("getSymbols.env") and
## getOptions("getSymbols.auto.assign") are now checked for alternate defaults
##
## This message is shown once per session and may be disabled by setting
## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for more details.
## [1] "DGS3MO"
getSymbols("DGS1", src="FRED")
## [1] "DGS1"
getSymbols("DGS5", src="FRED")
## [1] "DGS5"
getSymbols("DGS10", src="FRED")
## [1] "DGS10"
getSymbols("DAAA", src="FRED")
## [1] "DAAA"
getSymbols("DBAA", src="FRED")
## [1] "DBAA"
getSymbols("DCOILWTICO", src="FRED")
## [1] "DCOILWTICO"
# Each object is a 1-column matrix with time series data
# The column-name is the same as the object name
is.matrix(DGS3MO) #
## [1] TRUE
dim(DGS3MO)
## [1] 8687 1
head(DGS3MO)
## DGS3MO
## 1982-01-04 11.87
## 1982-01-05 12.20
## 1982-01-06 12.16
## 1982-01-07 12.17
## 1982-01-08 11.98
## 1982-01-11 12.49
tail(DGS3MO)
## DGS3MO
## 2015-04-14 0.02
## 2015-04-15 0.02
## 2015-04-16 0.02
## 2015-04-17 0.01
## 2015-04-20 0.03
## 2015-04-21 0.03
mode(DGS3MO)
## [1] "numeric"
class(DGS3MO)
## [1] "xts" "zoo"
#
# 2.0 Merge data series together
# 2.1 Create data frame with all FRED series from 2000/01/01 on
#
# Useful functions/methods for zoo objects
# merge()
# lag (lag.zoo)
# diff()
# window.zoo()
#
# na.locf() # replace NAs by last previou non-NA
# rollmean(), rollmax() # compute rolling functions, column-wise
fred.data0<-merge(
DGS3MO,
DGS1,
DGS5,
DGS10,
DAAA,
DBAA,
DCOILWTICO)["2000::2015-03"]
tail(fred.data0)
## DGS3MO DGS1 DGS5 DGS10 DAAA DBAA DCOILWTICO
## 2015-03-24 0.02 0.24 1.37 1.88 3.50 4.41 47.03
## 2015-03-25 0.04 0.25 1.41 1.93 3.52 4.46 48.75
## 2015-03-26 0.03 0.28 1.47 2.01 3.61 4.56 51.41
## 2015-03-27 0.04 0.27 1.42 1.95 3.53 4.48 48.83
## 2015-03-30 0.04 0.27 1.41 1.96 3.55 4.51 48.66
## 2015-03-31 0.03 0.26 1.37 1.94 3.52 4.49 47.72
# Determine data dimensions
dim(fred.data0)
## [1] 3977 7
class(fred.data0)
## [1] "xts" "zoo"
# Check first and last rows in object
head(fred.data0) ; tail(fred.data0)
## DGS3MO DGS1 DGS5 DGS10 DAAA DBAA DCOILWTICO
## 2000-01-03 5.48 6.09 6.50 6.58 7.75 8.27 NA
## 2000-01-04 5.43 6.00 6.40 6.49 7.69 8.21 25.56
## 2000-01-05 5.44 6.05 6.51 6.62 7.78 8.29 24.65
## 2000-01-06 5.41 6.03 6.46 6.57 7.72 8.24 24.79
## 2000-01-07 5.38 6.00 6.42 6.52 7.69 8.22 24.79
## 2000-01-10 5.42 6.07 6.49 6.57 7.72 8.27 24.71
## DGS3MO DGS1 DGS5 DGS10 DAAA DBAA DCOILWTICO
## 2015-03-24 0.02 0.24 1.37 1.88 3.50 4.41 47.03
## 2015-03-25 0.04 0.25 1.41 1.93 3.52 4.46 48.75
## 2015-03-26 0.03 0.28 1.47 2.01 3.61 4.56 51.41
## 2015-03-27 0.04 0.27 1.42 1.95 3.53 4.48 48.83
## 2015-03-30 0.04 0.27 1.41 1.96 3.55 4.51 48.66
## 2015-03-31 0.03 0.26 1.37 1.94 3.52 4.49 47.72
# Count the number of NAs in each column
apply(is.na(fred.data0),2,sum)
## DGS3MO DGS1 DGS5 DGS10 DAAA DBAA
## 164 164 164 164 164 164
## DCOILWTICO
## 150
# Plot the rates series all togehter
opar<-par()
par(fg="blue",bg="black",
col.axis="gray",
col.lab="gray",
col.main="blue",
col.sub="blue")
ts.plot(as.ts(fred.data0[,1:6]),col=rainbow(6),bg="black",
main="FRED Data: Rates")
## Warning in xy.coords(x = matrix(rep.int(tx, k), ncol = k), y = x, log =
## log): NAs introduced by coercion
## Warning in xy.coords(x, y): NAs introduced by coercion
par(opar)
## Warning in par(opar): graphical parameter "cin" cannot be set
## Warning in par(opar): graphical parameter "cra" cannot be set
## Warning in par(opar): graphical parameter "csi" cannot be set
## Warning in par(opar): graphical parameter "cxy" cannot be set
## Warning in par(opar): graphical parameter "din" cannot be set
## Warning in par(opar): graphical parameter "page" cannot be set
# add legend to plot
legend(x=0,y=2,
legend=dimnames(fred.data0)[[2]][1:6],
lty=rep(1,times=6),
col=rainbow(6),
cex=0.75)
# Plot the Crude Oil PRice
chartSeries(to.monthly(fred.data0[,"DCOILWTICO"]), main="FRED Data: Crude Oil (WTI)")
## Warning in to.period(x, "months", indexAt = indexAt, name = name, ...):
## missing values removed from data
chartSeries(to.monthly(XOM[,1:5]))
# 2.2 Merge the closing prices for the stock market data series
yahoo.data0<-merge(BAC$Close,
GE$Close,
JDSU$Close,
XOM$Close,
SP500$Close,
AEP$Close,
DUK$Close)
# Replace the index of yahoo data with date values that do not include hours/minutes
dimnames(yahoo.data0)[[2]]<-c("BAC","GE","JDSU","XOM","SP500","AEP","DUK")
yahoo.data0.0<-zoo(x=coredata(yahoo.data0), order.by=as.Date(time(yahoo.data0)))
# fred.data0 is already indexed by this scale
# 2.3 Merge the yahoo and Fred data together
# 2.3.1 merge with all dates
casestudy1.data0<-merge(yahoo.data0.0, fred.data0)
dim(casestudy1.data0)
## [1] 3977 14
head(casestudy1.data0)
## BAC GE JDSU XOM SP500 AEP DUK
## 2000-01-03 15.61121 31.37894 752.00 27.45076 1455.22 14.97396 40.60250
## 2000-01-04 14.68461 30.12378 684.50 26.92497 1399.42 15.15257 41.23363
## 2000-01-05 14.84576 30.07149 633.00 28.39281 1402.11 15.71819 42.91664
## 2000-01-06 16.11480 30.47353 599.00 29.86064 1403.45 15.80750 44.07370
## 2000-01-07 15.69179 31.65351 719.75 29.77301 1441.47 16.01588 45.23077
## 2000-01-10 15.14791 31.64044 801.50 29.35676 1457.60 15.95634 45.17818
## DGS3MO DGS1 DGS5 DGS10 DAAA DBAA DCOILWTICO
## 2000-01-03 5.48 6.09 6.50 6.58 7.75 8.27 NA
## 2000-01-04 5.43 6.00 6.40 6.49 7.69 8.21 25.56
## 2000-01-05 5.44 6.05 6.51 6.62 7.78 8.29 24.65
## 2000-01-06 5.41 6.03 6.46 6.57 7.72 8.24 24.79
## 2000-01-07 5.38 6.00 6.42 6.52 7.69 8.22 24.79
## 2000-01-10 5.42 6.07 6.49 6.57 7.72 8.27 24.71
tail(casestudy1.data0)
## BAC GE JDSU XOM SP500 AEP DUK DGS3MO DGS1 DGS5
## 2015-03-24 15.61 25.27 13.48 84.52 2091.50 57.25 76.06 0.02 0.24 1.37
## 2015-03-25 15.41 24.91 13.04 84.86 2061.05 55.91 74.96 0.04 0.25 1.41
## 2015-03-26 15.42 24.80 13.03 84.32 2056.15 55.33 74.35 0.03 0.28 1.47
## 2015-03-27 15.31 24.86 12.98 83.58 2061.02 55.90 75.00 0.04 0.27 1.42
## 2015-03-30 15.52 25.12 13.14 85.63 2086.24 56.58 75.90 0.04 0.27 1.41
## 2015-03-31 15.39 24.81 13.12 85.00 2067.89 56.25 76.78 0.03 0.26 1.37
## DGS10 DAAA DBAA DCOILWTICO
## 2015-03-24 1.88 3.50 4.41 47.03
## 2015-03-25 1.93 3.52 4.46 48.75
## 2015-03-26 2.01 3.61 4.56 51.41
## 2015-03-27 1.95 3.53 4.48 48.83
## 2015-03-30 1.96 3.55 4.51 48.66
## 2015-03-31 1.94 3.52 4.49 47.72
apply(is.na(casestudy1.data0),2,sum)
## BAC GE JDSU XOM SP500 AEP
## 143 143 143 143 143 143
## DUK DGS3MO DGS1 DGS5 DGS10 DAAA
## 143 164 164 164 164 164
## DBAA DCOILWTICO
## 164 150
# 2.3.2 Subset out days when SP500 is not missing (not == NA)
index.notNA.SP500<-which(is.na(coredata(casestudy1.data0$SP500))==FALSE)
casestudy1.data0.0<-casestudy1.data0[index.notNA.SP500,]
head(casestudy1.data0.0)
## BAC GE JDSU XOM SP500 AEP DUK
## 2000-01-03 15.61121 31.37894 752.00 27.45076 1455.22 14.97396 40.60250
## 2000-01-04 14.68461 30.12378 684.50 26.92497 1399.42 15.15257 41.23363
## 2000-01-05 14.84576 30.07149 633.00 28.39281 1402.11 15.71819 42.91664
## 2000-01-06 16.11480 30.47353 599.00 29.86064 1403.45 15.80750 44.07370
## 2000-01-07 15.69179 31.65351 719.75 29.77301 1441.47 16.01588 45.23077
## 2000-01-10 15.14791 31.64044 801.50 29.35676 1457.60 15.95634 45.17818
## DGS3MO DGS1 DGS5 DGS10 DAAA DBAA DCOILWTICO
## 2000-01-03 5.48 6.09 6.50 6.58 7.75 8.27 NA
## 2000-01-04 5.43 6.00 6.40 6.49 7.69 8.21 25.56
## 2000-01-05 5.44 6.05 6.51 6.62 7.78 8.29 24.65
## 2000-01-06 5.41 6.03 6.46 6.57 7.72 8.24 24.79
## 2000-01-07 5.38 6.00 6.42 6.52 7.69 8.22 24.79
## 2000-01-10 5.42 6.07 6.49 6.57 7.72 8.27 24.71
tail(casestudy1.data0.0)
## BAC GE JDSU XOM SP500 AEP DUK DGS3MO DGS1 DGS5
## 2015-03-24 15.61 25.27 13.48 84.52 2091.50 57.25 76.06 0.02 0.24 1.37
## 2015-03-25 15.41 24.91 13.04 84.86 2061.05 55.91 74.96 0.04 0.25 1.41
## 2015-03-26 15.42 24.80 13.03 84.32 2056.15 55.33 74.35 0.03 0.28 1.47
## 2015-03-27 15.31 24.86 12.98 83.58 2061.02 55.90 75.00 0.04 0.27 1.42
## 2015-03-30 15.52 25.12 13.14 85.63 2086.24 56.58 75.90 0.04 0.27 1.41
## 2015-03-31 15.39 24.81 13.12 85.00 2067.89 56.25 76.78 0.03 0.26 1.37
## DGS10 DAAA DBAA DCOILWTICO
## 2015-03-24 1.88 3.50 4.41 47.03
## 2015-03-25 1.93 3.52 4.46 48.75
## 2015-03-26 2.01 3.61 4.56 51.41
## 2015-03-27 1.95 3.53 4.48 48.83
## 2015-03-30 1.96 3.55 4.51 48.66
## 2015-03-31 1.94 3.52 4.49 47.72
apply(is.na(casestudy1.data0.0)==TRUE, 2,sum)
## BAC GE JDSU XOM SP500 AEP
## 0 0 0 0 0 0
## DUK DGS3MO DGS1 DGS5 DGS10 DAAA
## 0 28 28 28 28 28
## DBAA DCOILWTICO
## 28 14
# Remaining missing values are for interest rates and the crude oil spot price
# There are days when the stock market is open but the bond market and/or commodities market
# is closed
# For the rates and commodity data, replace NAs with previoius non-NA values
casestudy1.data0.00<-na.locf(casestudy1.data0.0)
apply(is.na(casestudy1.data0.00),2,sum) # Only 1 NA left, the first DCOILWTICO value
## BAC GE JDSU XOM SP500 AEP
## 0 0 0 0 0 0
## DUK DGS3MO DGS1 DGS5 DGS10 DAAA
## 0 0 0 0 0 0
## DBAA DCOILWTICO
## 0 1
save(file="casestudy_1_0.RData", list=c("casestudy1.data0.00"))