# Rproject2_script2_gamma_MOMwithMLE.r
# 0.0 Load library
library("MASS")
# 1.0 Read in data ----
# LeCam and Neyman Precipitation Data from Rice 3e Datasets
# From Rice, p. 414:
# rainfall of summer storms, in inches, measured by network of rain gauges
# southern Illinois for the years 1960-1964
# measurements are average amount of rainfall from each storm
#
#C:\PseudoFdrive\MIT\mit18443\rstudio18443\Rice 3e Datasets\ASCII Comma\Chapter 10
file0.60<-"Rice 3e Datasets\\ASCII Comma\\Chapter 10\\illinois60.txt"
file0.60data<-scan(file=file0.60,sep=",")
file0.61<-"Rice 3e Datasets\\ASCII Comma\\Chapter 10\\illinois61.txt"
file0.61data<-scan(file=file0.61,sep=",")
file0.62<-"Rice 3e Datasets\\ASCII Comma\\Chapter 10\\illinois62.txt"
file0.62data<-scan(file=file0.62,sep=",")
file0.63<-"Rice 3e Datasets\\ASCII Comma\\Chapter 10\\illinois63.txt"
file0.63data<-scan(file=file0.63,sep=",")
file0.64<-"Rice 3e Datasets\\ASCII Comma\\Chapter 10\\illinois64.txt"
file0.64data<-scan(file=file0.64,sep=",")
data.precipitation<-c(file0.60data, file0.61data, file0.62data, file0.63data, file0.64data)
# 2. Display the data ----
# 2.1 Histograms with different bin-counts (nclass) ----
par(mfcol=c(1,1))
hist(data.precipitation,nclass=15,
main="Precipitation Data (nclass=15)\n(Le Cam and Neyman)")
hist(data.precipitation,nclass=50,
main="Precipitation Data (nclass=50)\n(Le Cam and Neyman)")
# 2.2 Index plot ----
plot(data.precipitation, main="Precipitation Data")
# 3.0 Parameter Estimation of Gamma Distribution ----
# 3.1 Method of moments estimates ----
# Compute first moment (mean) and variance (second moment minus square of first moment)
data.precipitation.xbar=mean(data.precipitation)
data.precipitation.var=mean(data.precipitation^2) - (mean(data.precipitation))^2
# Compute MOM estimates per theory
lambdahat.mom=(data.precipitation.xbar)/data.precipitation.var
alphahat.mom=(data.precipitation.xbar^2)/data.precipitation.var
print(lambdahat.mom)
## [1] 1.684175
print(alphahat.mom)
## [1] 0.3779155
# 3.2 Maximum Likelihood Estiamtes
options(warn=-1)
data.precipitation.fitdistr.mle<-fitdistr(data.precipitation,densfun="gamma")
print(data.precipitation.fitdistr.mle)
## shape rate
## 0.44080171 1.96476293
## (0.03376322) (0.24743990)
# 3.2 Simulation of MOM Estimates ----
# 3.2.1 Define function used in simulation ----
# Define function computing MOM estimates of the rate and shape parameters
fcn.fitdistr.mom.gamma<-function(x){
x.mean=mean(x)
x.var=mean(x^2) - (x.mean^2)
rate.mom=x.mean/x.var
shape.mom=(x.mean^2)/x.var
result=c(shape=shape.mom, rate=rate.mom)
return(result)
}
data.precipitation.mom.gamma<-fcn.fitdistr.mom.gamma(data.precipitation)
print(data.precipitation.mom.gamma)
## shape rate
## 0.3779155 1.6841748
shape.mom=data.precipitation.mom.gamma["shape"]
rate.mom=data.precipitation.mom.gamma["rate"]
data.precipitation.fitdistr.mle<-fitdistr(data.precipitation,densfun="gamma")
print(data.precipitation.fitdistr.mle)
## shape rate
## 0.44080171 1.96476293
## (0.03376322) (0.24743990)
shape.mle=data.precipitation.fitdistr.mle$estimate["shape"]
rate.mle=data.precipitation.fitdistr.mle$estimate["rate"]
# 3.2.2 Set up simulation objects ----
nsimulation=1000
n0=length(data.precipitation)
output.mom.simulation<-matrix(nrow=nsimulation, ncol=2)
dimnames(output.mom.simulation)[[2]]<-c("shape","rate")
output.mle.simulation<-matrix(nrow=nsimulation, ncol=2)
dimnames(output.mle.simulation)[[2]]<-c("shape","rate")
options(warn=-1)
# 3.2.3 Conduct simulation ----
for (j in c(1:nsimulation)){
#j=1
data.simulation=rgamma(n0,shape=shape.mle, rate=rate.mle)
data.simulation.fitdistr.mom<-fcn.fitdistr.mom.gamma(data.simulation)
output.mom.simulation[j, 1]<-data.simulation.fitdistr.mom["shape"]
output.mom.simulation[j, 2]<-data.simulation.fitdistr.mom["rate"]
#
# For MLE use function fitdistr from the library MASS
# turn off warnings
options(warn=-1)
# data.simulation.fitdistr.mle<-fitdistr(data.simulation,densfun="gamma")
data.simulation.fitdistr.mle<-suppressWarnings(fitdistr(data.simulation,densfun="gamma"))
output.mle.simulation[j, "shape"]<-data.simulation.fitdistr.mle$estimate["shape"]
output.mle.simulation[j, "rate"]<-data.simulation.fitdistr.mle$estimate["rate"]
# print(j)
# print(output.mom.simulation[j,])
}
# 4 Display/Compare sampling distributions of MOMs and MLEs----
par(mfcol=c(2,1))
# 4.1.1 Displays for MOM of shape parameter ----
hist(output.mom.simulation[,"shape"],nclass=50)
simulation.shape.mom.Mean=mean(output.mom.simulation[,"shape"])
simulation.shape.mom.StandardError=sqrt(var(output.mom.simulation[,"shape"]))
print(shape.mom)
## shape
## 0.3779155
print(simulation.shape.mom.Mean)
## [1] 0.4580915
print(simulation.shape.mom.StandardError)
## [1] 0.06982183
# Add red vertical(v) lines at simulation mean +/- 1 standard error
abline(v=simulation.shape.mom.Mean + c(-1,0,1)*simulation.shape.mom.StandardError,
col=c("red","red","red"),lwd=c(2,2,2))
# Add blue vertical(v) line at true shape parameter for simulation
abline(v=shape.mle,col="blue",lwd=2)
# 4.1.2 Displays for MLE of shape parameter
hist(output.mle.simulation[,"shape"],nclass=50)
simulation.shape.mle.Mean=mean(output.mle.simulation[,"shape"])
simulation.shape.mle.StandardError=sqrt(var(output.mle.simulation[,"shape"]))
print(shape.mle)
## shape
## 0.4408017
print(simulation.shape.mle.Mean)
## [1] 0.4446394
print(simulation.shape.mle.StandardError)
## [1] 0.0337602
# Add red vertical(v) lines at simulation mean +/- 1 standard error
abline(v=simulation.shape.mle.Mean + c(-1,0,1)*simulation.shape.mle.StandardError,
col=c("green","green","green"),lwd=c(3,3,3))
# Add blue vertical(v) line at true shape parameter for simulation
abline(v=shape.mle,col="blue",lwd=2)
##########
#
#
# 4.2.1 Displays of MOM for Rate parameter ----
par(mfcol=c(2,1))
hist(output.mom.simulation[,"rate"],nclass=50)
simulation.rate.mom.Mean=mean(output.mom.simulation[,"rate"])
simulation.rate.mom.StandardError=sqrt(var(output.mom.simulation[,"rate"]))
print(rate.mom)
## rate
## 1.684175
print(simulation.rate.mom.Mean)
## [1] 2.058181
print(simulation.rate.mom.StandardError)
## [1] 0.3775248
# Add red vertical(v) lines at simulation mean +/- 1 standard error
abline(v=simulation.rate.mom.Mean + c(-1,0,1)*simulation.rate.mom.StandardError,
col=c("red","red","red"),lwd=c(2,2,2))
# Add blue vertical(v) line at true rate parameter for simulation
abline(v=rate.mle,col="blue",lwd=2)
# 4.2.2 Displays of MLE for Rate parameter ----
#
hist(output.mle.simulation[,"rate"],nclass=50)
simulation.rate.mle.Mean=mean(output.mle.simulation[,"rate"])
simulation.rate.mle.StandardError=sqrt(var(output.mle.simulation[,"rate"]))
print(rate.mle)
## rate
## 1.964763
print(simulation.rate.mle.Mean)
## [1] 1.99772
print(simulation.rate.mle.StandardError)
## [1] 0.2488804
# Add red vertical(v) lines at simulation mean +/- 1 standard error
abline(v=simulation.rate.mle.Mean + c(-1,0,1)*simulation.rate.mle.StandardError,
col=c("red","red","red"),lwd=c(2,2,2))
# Add blue vertical(v) line at true rate parameter for simulation
abline(v=rate.mle,col="blue",lwd=2)
# 4.3. Boxplot comparisons of estimates
#help(boxplot)
par(mfcol=c(1,2))
boxplot(cbind(MOM=output.mom.simulation[,"shape"],MLE= output.mle.simulation[,"shape"]),
main="Shape Parameter\nSampling Distributions\n(simulated)")
boxplot(cbind(MOM=output.mom.simulation[,"rate"],MLE= output.mle.simulation[,"rate"]),
main="Rate Parameter\nSampling Distributions\n(simulated)")
par(mfcol=c(1,1))
plot(y=output.mom.simulation[,"shape"],x= output.mle.simulation[,"shape"],
xlab="MLE",ylab="MOM",
main="Shape Parameter\nSampling Distributions\n(simulated)")
# Add green vertical(v) lines at MLE simulation mean +/- 1 standard error
abline(v=simulation.shape.mle.Mean + c(-1,0,1)*simulation.shape.mle.StandardError,
col=c("green","green","green"),lwd=c(3,3,3))
# Add blue vertical(v) line at true shape parameter for simulation
abline(v=shape.mle,col="blue",lwd=2)
# Add red horizontal (h) lines at MOM simulation mean +/- 1 standard error
abline(h=simulation.shape.mom.Mean + c(-1,0,1)*simulation.shape.mom.StandardError,
col=c("red","red","red"),lwd=c(2,2,2))
# Add blue horizontal(h) line at true shape parameter for simulation
abline(h=shape.mle,col="blue",lwd=2)
#
par(mfcol=c(1,1))
plot(y=output.mom.simulation[,"rate"],x= output.mle.simulation[,"rate"],
xlab="MLE",ylab="MOM",
main="Rate Parameter\nSampling Distributions\n(simulated)")
# Add green vertical(v) lines at MLE simulation mean +/- 1 standard error
abline(v=simulation.rate.mle.Mean + c(-1,0,1)*simulation.rate.mle.StandardError,
col=c("green","green","green"),lwd=c(3,3,3))
# Add blue vertical(v) line at true rate parameter for simulation
abline(v=rate.mle,col="blue",lwd=2)
# Add red horizontal (h) lines at MOM simulation mean +/- 1 standard error
abline(h=simulation.rate.mom.Mean + c(-1,0,1)*simulation.rate.mom.StandardError,
col=c("red","red","red"),lwd=c(2,2,2))
# Add blue horizontal(h) line at true rate parameter for simulation
abline(h=rate.mle,col="blue",lwd=2)
#