# fm_casestudy_1_rcode_CAPM.r
#Execute the R-script ``fm_casestudy_1_0_DownloadData.r''
# creates the time-series matrix $casestudy1.data0.00$
# and saves it in R-workspace ``casestudy_1_0.Rdata'.
#source("fm_casestudy_1_0.r")
# 0.1 Load libraries ----
library("zoo")
## Warning: package 'zoo' was built under R version 3.1.3
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library("quantmod")
## Warning: package 'quantmod' was built under R version 3.1.3
## Loading required package: xts
## Warning: package 'xts' was built under R version 3.1.3
## Loading required package: TTR
## Warning: package 'TTR' was built under R version 3.1.3
## Version 0.4-0 included new data defaults. See ?getSymbols.
library("coefplot")
## Warning: package 'coefplot' was built under R version 3.1.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.1.3
library ("graphics")
library("ggplot2")
#####
# 0.2 Load R dataset: casestudy_1_0.Rdata ----
load("casestudy_1_0.RData")
dim(casestudy1.data0.00)
## [1] 3834 14
names(casestudy1.data0.00)
## [1] "BAC" "GE" "JDSU" "XOM" "SP500"
## [6] "AEP" "DUK" "DGS3MO" "DGS1" "DGS5"
## [11] "DGS10" "DAAA" "DBAA" "DCOILWTICO"
head(casestudy1.data0.00)
## BAC GE JDSU XOM SP500 AEP DUK
## 2000-01-03 15.61121 31.37894 752.00 27.45076 1455.22 14.97396 40.60250
## 2000-01-04 14.68461 30.12378 684.50 26.92497 1399.42 15.15257 41.23363
## 2000-01-05 14.84576 30.07149 633.00 28.39281 1402.11 15.71819 42.91664
## 2000-01-06 16.11480 30.47353 599.00 29.86064 1403.45 15.80750 44.07370
## 2000-01-07 15.69179 31.65351 719.75 29.77301 1441.47 16.01588 45.23077
## 2000-01-10 15.14791 31.64044 801.50 29.35676 1457.60 15.95634 45.17818
## DGS3MO DGS1 DGS5 DGS10 DAAA DBAA DCOILWTICO
## 2000-01-03 5.48 6.09 6.50 6.58 7.75 8.27 NA
## 2000-01-04 5.43 6.00 6.40 6.49 7.69 8.21 25.56
## 2000-01-05 5.44 6.05 6.51 6.62 7.78 8.29 24.65
## 2000-01-06 5.41 6.03 6.46 6.57 7.72 8.24 24.79
## 2000-01-07 5.38 6.00 6.42 6.52 7.69 8.22 24.79
## 2000-01-10 5.42 6.07 6.49 6.57 7.72 8.27 24.71
tail(casestudy1.data0.00)
## BAC GE JDSU XOM SP500 AEP DUK DGS3MO DGS1 DGS5
## 2015-03-24 15.61 25.27 13.48 84.52 2091.50 57.25 76.06 0.02 0.24 1.37
## 2015-03-25 15.41 24.91 13.04 84.86 2061.05 55.91 74.96 0.04 0.25 1.41
## 2015-03-26 15.42 24.80 13.03 84.32 2056.15 55.33 74.35 0.03 0.28 1.47
## 2015-03-27 15.31 24.86 12.98 83.58 2061.02 55.90 75.00 0.04 0.27 1.42
## 2015-03-30 15.52 25.12 13.14 85.63 2086.24 56.58 75.90 0.04 0.27 1.41
## 2015-03-31 15.39 24.81 13.12 85.00 2067.89 56.25 76.78 0.03 0.26 1.37
## DGS10 DAAA DBAA DCOILWTICO
## 2015-03-24 1.88 3.50 4.41 47.03
## 2015-03-25 1.93 3.52 4.46 48.75
## 2015-03-26 2.01 3.61 4.56 51.41
## 2015-03-27 1.95 3.53 4.48 48.83
## 2015-03-30 1.96 3.55 4.51 48.66
## 2015-03-31 1.94 3.52 4.49 47.72
# 0.3 Define functions ----
# function to compute daily returns of a symbol
fcn.compute.r.daily.symbol0<-function(
symbol0="SP500",
rdbase0=casestudy1.data0.00, scaleLog=FALSE){
indexcol.symbol0<-match(symbol0, dimnames(rdbase0)[[2]],nomatch=0)
if (indexcol.symbol0==0){ return(NULL)}
if(scaleLog==FALSE){
r.daily.symbol0<-zoo(
x=exp(as.matrix(diff(log(rdbase0[,indexcol.symbol0]))))-1,
order.by=as.Date(time(rdbase0)[-1]))
dimnames(r.daily.symbol0)[[2]]<-paste("r.daily.",symbol0,sep="")
return(r.daily.symbol0)
}
if(scaleLog==TRUE){
r.daily.symbol0<-zoo(
x=(as.matrix(diff(log(rdbase0[,indexcol.symbol0])))),
#order.by=time(rdbase0)[-1])
order.by=as.Date(time(rdbase0)[-1]))
dimnames(r.daily.symbol0)[[2]]<-paste("dlog.daily.",symbol0,sep="")
return(r.daily.symbol0)
}
return(NULL)
}
fcn.compute.r.daily.riskfree<-function(rdbase0=casestudy1.data0.00){
r.daily.riskfree<-(.01*coredata(rdbase0[-1,"DGS3MO"]) *
diff(as.numeric(time(rdbase0)))/360)
dimnames(r.daily.riskfree)[[2]]<-"r.daily.riskfree"
r.daily.riskfree0<-zoo(
x=as.matrix(r.daily.riskfree),
#order.by=time(rdbase0)[-1])
order.by=as.Date(time(rdbase0)[-1]))
return(r.daily.riskfree0)
}
# function to compute submatrix of kperiod returns
fcn.rollksub<-function(x,kperiod=2,...){
x.0<-filter(as.matrix(coredata(x)), f=rep(1,times=kperiod), sides=1)
n0=floor(length(x)/kperiod)
indexsub0<-seq(kperiod,length(x),kperiod)
x.00<-x.0[indexsub0]
return(x.00)
}
# 1.1 Define list.symbol.00
list.symbol.00<-c("GE","BAC","XOM","JDSU")
# 2.0 Setup CAPM for symbol0 ----
symbol0<-list.symbol.00[1]
#symbol0<-list.symbol.00[2]
#symbol0<-list.symbol.00[3]
#symbol0<-list.symbol.00[4]
# 2.1 Plot time series of symbol0, SP500 and risk-free asset -----
# symbol0, SP500, and the risk-free interest rate
opar<-par()
# set graphics parameter to 3 panels
par(mfcol=c(3,1))
plot(casestudy1.data0.00[,symbol0],ylab="Price",
main=paste(symbol0, " Stock",sep=""))
plot(casestudy1.data0.00[,"SP500"], ylab="Value",main="S&P500 Index")
plot(casestudy1.data0.00[,"DGS3MO"], ylab="Rate" ,
main="3-Month Treasury Rate (Constant Maturity)")
# reset graphics parameter to 1 panel
par(mfcol=c(1,1))
# 2.2 Compute the returns series ----
r.daily.symbol0<-fcn.compute.r.daily.symbol0(symbol0, casestudy1.data0.00)
r.daily.SP500<-fcn.compute.r.daily.symbol0("SP500", casestudy1.data0.00)
r.daily.riskfree<-fcn.compute.r.daily.riskfree(casestudy1.data0.00)
# Note for returns time series of riskfree asset
# holding periods vary from 1-3 days corresponding
# to mid-week and over-weekend returns)
# plot(r.daily.riskfree)
# Compute excess returns of symbol0 and SP500
r.daily.symbol0.0 <-r.daily.symbol0 - r.daily.riskfree
dimnames(r.daily.symbol0.0)[[2]]<-paste("r.daily.",symbol0,".0",sep="")
r.daily.SP500.0<-r.daily.SP500 - r.daily.riskfree
dimnames(r.daily.SP500.0)[[2]]<-"r.daily.SP500.0"
dim(r.daily.SP500.0)
## [1] 3833 1
# 2.3 Create r.daily.data0 = merged series ----
# and display first and last sets of rows
dimnames(r.daily.symbol0)[[2]]
## [1] "r.daily.GE"
# Note: r.daily.symbol0 has names that use symbol0
r.daily.data0<-merge(r.daily.symbol0, r.daily.SP500, r.daily.riskfree,
r.daily.symbol0.0, r.daily.SP500.0)
head(r.daily.data0)
## r.daily.GE r.daily.SP500 r.daily.riskfree r.daily.GE.0
## 2000-01-04 -0.0400000000 -0.038344670 0.0001508333 -0.0401508333
## 2000-01-05 -0.0017359722 0.001922189 0.0001511111 -0.0018870833
## 2000-01-06 0.0133694590 0.000955674 0.0001502778 0.0132191812
## 2000-01-07 0.0387214059 0.027090400 0.0001494444 0.0385719615
## 2000-01-10 -0.0004129203 0.011189973 0.0004516667 -0.0008645869
## 2000-01-11 0.0016527601 -0.013062514 0.0001508333 0.0015019268
## r.daily.SP500.0
## 2000-01-04 -0.0384955037
## 2000-01-05 0.0017710780
## 2000-01-06 0.0008053962
## 2000-01-07 0.0269409552
## 2000-01-10 0.0107383063
## 2000-01-11 -0.0132133472
tail(r.daily.data0)
## r.daily.GE r.daily.SP500 r.daily.riskfree r.daily.GE.0
## 2015-03-24 -0.007852375 -0.006139421 5.555556e-07 -0.007852931
## 2015-03-25 -0.014246142 -0.014558905 1.111111e-06 -0.014247253
## 2015-03-26 -0.004415897 -0.002377502 8.333333e-07 -0.004416731
## 2015-03-27 0.002419355 0.002368563 1.111111e-06 0.002418244
## 2015-03-30 0.010458568 0.012236645 3.333333e-06 0.010455235
## 2015-03-31 -0.012340764 -0.008795776 8.333333e-07 -0.012341598
## r.daily.SP500.0
## 2015-03-24 -0.006139977
## 2015-03-25 -0.014560016
## 2015-03-26 -0.002378335
## 2015-03-27 0.002367452
## 2015-03-30 0.012233312
## 2015-03-31 -0.008796610
# 2.4 Excess Returns plot: symbol0 vs SP500 ----
par(mfcol=c(1,1))
plot(r.daily.SP500.0, r.daily.symbol0.0, main=symbol0)
abline(h=0,v=0)
# 3. Linear Regression for CAPM ----
#The linear regression model is fit using the R-function lm():
options(show.signif.stars=FALSE)
# (by default the output from summary.lm() uses stars (*)
# to indicate significant coefficients;
# automated processing of the output encounters errors
# with asterisks so the option to not show them is necessary)
lmfit0<-lm(r.daily.symbol0.0 ~ r.daily.SP500.0, x=TRUE, y=TRUE)
# 3.1 Apply lm(), summary.lm() ----
# The components of lmfit0:
names(lmfit0)
## [1] "coefficients" "residuals" "effects" "rank"
## [5] "fitted.values" "assign" "qr" "df.residual"
## [9] "xlevels" "call" "terms" "model"
## [13] "x" "y"
lmfit0.summary<-summary.lm(lmfit0)
# The components of lmfit0.summary:
names(lmfit0.summary)
## [1] "call" "terms" "residuals" "coefficients"
## [5] "aliased" "sigma" "df" "r.squared"
## [9] "adj.r.squared" "fstatistic" "cov.unscaled"
print(lmfit0.summary)
##
## Call:
## lm(formula = r.daily.symbol0.0 ~ r.daily.SP500.0, x = TRUE, y = TRUE)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.159282 -0.005513 -0.000422 0.005185 0.144856
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.225e-05 2.138e-04 -0.244 0.807
## r.daily.SP500.0 1.175e+00 1.673e-02 70.238 <2e-16
##
## Residual standard error: 0.01323 on 3831 degrees of freedom
## Multiple R-squared: 0.5629, Adjusted R-squared: 0.5628
## F-statistic: 4933 on 1 and 3831 DF, p-value: < 2.2e-16
# Under CAPM, the intercept should be zero
tstat.intercept<-round(lmfit0.summary$coefficients["(Intercept)", "t value"],digits=4)
pvalue.intercept<-round(lmfit0.summary$coefficients["(Intercept)", "Pr(>|t|)"],digits=4)
# The $t$-statistic for the intercept is:
print(tstat.intercept)
## [1] -0.2444
# The p-value for testing whether the intercept is zero is
print(pvalue.intercept)
## [1] 0.8069
# 3.2 R-squared plot ----
lmfit0.rsquared<-lmfit0.summary$r.squared
lmfit0.rsquared.pvalue<-pf(
q=lmfit0.summary$fstatistic[["value"]],
df1=lmfit0.summary$fstatistic[["numdf"]],
df2=lmfit0.summary$fstatistic[["dendf"]],
lower.tail=FALSE)
length(as.numeric(lmfit0$fitted.values))
## [1] 3833
length(as.numeric(lmfit0$y))
## [1] 3833
modelname0<-"CAPM"
plot(x=lmfit0$fitted.values,
y=lmfit0$y,
xlab="Fitted Values",
ylab="Actual Values",
main=paste(c(modelname0,
"\n Plot: Actual vs Fitted Values\n",
"(R-Squared = ",round(lmfit0.rsquared,digits=4),
")"),collapse=""))
abline(a=0,b=1,col='green',lwd=2)
# 3.3 Coefficients Plot ----
library("coefplot")
par(mfcol=c(1,1))
coefplot(lmfit0, lwdInner=4, lwdOuter=1,
title=paste(modelname0,
"\n Coefficients Plot",sep=""),
plot=TRUE)
# 3.4a Residuals Analysis: Histogram/Gaussian Fits----
std.residuals<-sort(as.numeric(lmfit0$residuals))
par(mfcol=c(1,1))
hist(std.residuals, freq=FALSE, nclass=100,
main=paste(c(modelname0 ,
"\n Histogram of Std. Residuals \n",
"Normal Fits: MLE(Green) and Robust(Blue)"),
collapse=""))
std.residuals.mean=mean(std.residuals)
std.residuals.stdev.mle=sqrt(mean(std.residuals^2) - (mean(std.residuals))^2)
std.residuals.stdev.robust=IQR(std.residuals)/1.3490
std.residuals.density.mle<-dnorm(std.residuals, mean=std.residuals.mean, sd=std.residuals.stdev.mle)
std.residuals.density.robust<-dnorm(std.residuals, mean=std.residuals.mean, sd=std.residuals.stdev.robust)
lines(std.residuals, std.residuals.density.mle, col='green',lwd=2)
lines(std.residuals, std.residuals.density.robust, col='blue',lwd=2)
# 3.4b Residuals Analysis: QQPlot/Gaussian Fits----
par(mfcol=c(1,1))
qqnorm(std.residuals,
main=paste(modelname0,
"\n Normal QQ Plot of Std. Residuals",sep=""))
abline(a=0,b=sqrt(var(std.residuals)), col='green', lwd=3)
abline(a=0,b=IQR(std.residuals)/1.3490, col='blue', lwd=3)
title(sub=paste("Residual Sigma Fits: MLE(Green) and Robust(Blue)"))
# 3.4c Residuals Analysis: MLE-Percentile Histogram ----
par(mfcol=c(1,1))
#
nclass0=100
hist(100.*pnorm(std.residuals,mean=std.residuals.mean, sd=std.residuals.stdev.mle), nclass=nclass0,
xlab="Fitted Percentile",
main=paste(c(modelname0,
"\nHistogram of Residuals\nMLE-Fitted Percentiles"),
collapse=""))
abline(h=length(std.residuals)/nclass0, col="green",lwd=2)
# 3.4c Residuals Analysis: Robust-Fitted Percentile Histogram ----
par(mfcol=c(1,1))
hist(100.*pnorm(std.residuals,mean=std.residuals.mean, sd=std.residuals.stdev.robust), nclass=nclass0,
xlab="Fitted Percentile",
main=paste(c(modelname0,
"\nHistogram of Residuals\nRobust-Fitted Percentiles"),
collapse=""))
abline(h=length(std.residuals)/nclass0, col="blue",lwd=2)
# 3.5 Regression Diagnostics: Leverage/Hat Values ----
# For the functions hatvalues() and influence.measures()
# the input argument lmfit0 must handle indexes properly
y00=coredata(r.daily.symbol0.0)
x00=coredata(r.daily.SP500.0)
lmfit00<-lm(y00~x00, x=TRUE, y=TRUE)
# This replacement code snippet eliminates warnings and errors returned from
# the functon influence.measures(lmfit0) below
#
lmfit0.hat<-zoo(x=hatvalues(lmfit00),
order.by=as.Date(names(lmfit00$fitted.values)))
par(mfcol=c(1,1))
plot(lmfit0.hat, ylab="Leverage/Hat Value",
main=paste(c(modelname0,
"\nLeverage/Hat Values"),
collapse=""))
# Note the cases are time points of the time series data
# The financial crisis of 2008 is evident
# 3.6 Regression Diagnostics Plot----
# The R function $plot.lm()$ generates a
# 2x2 display of plots for various regression diagnostic statistics:
oldpar=par(no.readonly=TRUE)
layout(matrix(c(1,2,3,4),2,2)) # optional 4 graphs/page
plot(lmfit00)
par(oldpar,no.readonly=TRUE)
#######ADDITION
# Some useful R functions
# anova.lm(): conduct an Analysis of Variance for the linear regression model, detailing the computation of the F-statistic for no regression structure.
# #anova.lm(lmfit0)
# influence.measures(): compute regression diagnostics evaluating case influence for the linear regression model; includes `hat' matirx, case-deletion statistics for the regression coefficients and for the residual standard deviation.
# 3.7 Apply influence.measures() ----
# Compute influence measures (case-deletion statistics)
lmfit0.inflm<-influence.measures(lmfit00)
# Table counts of influential/non-influential cases
# as measured by the hat/leverage statistic.
table(lmfit0.inflm$is.inf[,"hat"])
##
## FALSE TRUE
## 3682 151
# 3.8 Plot data, fitted model, influential cases ----
# selective highlighting of influential cases
plot(r.daily.SP500.0, r.daily.symbol0.0,
main=paste(symbol0," vs SP500 Data \n OLS Fit (Green line)\n High-Leverage Cases (red points)\n High Cooks Dist (blue Xs)",sep=""),
cex.main=0.8)
abline(h=0,v=0)
abline(lmfit0, col=3, lwd=3)
# Plot cases with high leverage as red (col=2) "o"s
index.inf.hat<-which(lmfit0.inflm$is.inf[,"hat"]==TRUE)
points(r.daily.SP500.0[index.inf.hat], r.daily.symbol0.0[index.inf.hat],
col=2, pch="o")
# Plot cases with high cooks distance as big (cex=2) blue (col=4) "X"s
index.inf.cook.d<-which(lmfit0.inflm$is.inf[,"cook.d"]==TRUE)
dim(r.daily.SP500.0)
## [1] 3833 1
dim(r.daily.SP500.0)
## [1] 3833 1
points(r.daily.SP500.0[index.inf.cook.d], r.daily.symbol0.0[index.inf.cook.d],
col=4, pch="X", cex=2.)