# Problem_14_9_39.r
# 1.0 Read in data ----
# See Problem 14.9.39
# Data from Knafl et al. (1984)
#
tankvolume=read.table(file="Rice 3e Datasets/ASCII Comma/Chapter 14/tankvolume.txt",
sep=",",stringsAsFactors = FALSE,
header=TRUE)
Volume=tankvolume$Volume
Pressure=tankvolume$Pressure
# (a). Plot pressure versus volume. The relationship appears linear
plot(Volume, Pressure)
#summary(Volume)
# (b). Calculate the linear regression of pressure on volume
lmfit1=lm( Pressure~ Volume)
summary(lmfit1)
##
## Call:
## lm(formula = Pressure ~ Volume)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.429 -15.610 2.047 10.819 36.634
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -257.301 9.430 -27.29 <2e-16 ***
## Volume 2316.469 9.243 250.61 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19.44 on 19 degrees of freedom
## Multiple R-squared: 0.9997, Adjusted R-squared: 0.9997
## F-statistic: 6.28e+04 on 1 and 19 DF, p-value: < 2.2e-16
abline(lmfit1,col='green')
# Plot the residuals versus volume
plot(Volume, lmfit1$residuals)
#
# The residuals plot shows a non-linear relationship with volume
#
# (c). Fit Pressure as a quadratic function of volume.
VolumeSq=Volume*Volume
lmfit2=lm(Pressure ~ Volume + VolumeSq)
summary(lmfit2)
##
## Call:
## lm(formula = Pressure ~ Volume + VolumeSq)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.645 -7.189 1.944 7.371 15.528
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -204.995 9.274 -22.104 1.70e-14 ***
## Volume 2164.032 23.052 93.877 < 2e-16 ***
## VolumeSq 83.191 12.276 6.777 2.39e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.6 on 18 degrees of freedom
## Multiple R-squared: 0.9999, Adjusted R-squared: 0.9999
## F-statistic: 1.057e+05 on 2 and 18 DF, p-value: < 2.2e-16
plot(Volume, lmfit2$residuals)
abline(h=0,col='gray')
# The fit looks much better, but the residuals at specific volume
# levels tend to be all positive or all negative together.
# There is variability within given Volume level which is smaller
# than variability across Volume levels.
# There appears to be two sources of varability: across volume levels and within.