# Rproject11_Tablets_TwoSampleT.r
# 1.0 Read in data ----
# See Example 12.2A of Rice
#
# Measurements of chlorpheniramine maleate in tablets made by seven laboratories
# Nominal dosage equal to 4mg
# 10 measurements per laboratory
#
# Sources of variability
# within labs
# between labs
# Note: read.table has trouble parsing header row
if (FALSE){tablets1=read.table(file="Rice 3e Datasets/ASCII Comma/Chapter 12/tablets1.txt",
sep=",",stringsAsFactors = FALSE, quote="\'",
header=TRUE)
}
# Read in matrix and label columns
tablets1=read.table(file="Rice 3e Datasets/ASCII Comma/Chapter 12/tablets1.txt",
sep=",",stringsAsFactors = FALSE, skip=1,
header=FALSE)
dimnames(tablets1)[[2]]<-paste("Lab",c(1:7),sep="")
tablets1
## Lab1 Lab2 Lab3 Lab4 Lab5 Lab6 Lab7
## 1 4.13 3.86 4.00 3.88 4.02 4.02 4.00
## 2 4.07 3.85 4.02 3.88 3.95 3.86 4.02
## 3 4.04 4.08 4.01 3.91 4.02 3.96 4.03
## 4 4.07 4.11 4.01 3.95 3.89 3.97 4.04
## 5 4.05 4.08 4.04 3.92 3.91 4.00 4.10
## 6 4.04 4.01 3.99 3.97 4.01 3.82 3.81
## 7 4.02 4.02 4.03 3.92 3.89 3.98 3.91
## 8 4.06 4.04 3.97 3.90 3.89 3.99 3.96
## 9 4.10 3.97 3.98 3.97 3.99 4.02 4.05
## 10 4.04 3.95 3.98 3.90 4.00 3.93 4.06
# Replicate Figure 12.1 of Rice
boxplot(tablets1)
# 2. Comparing Two Independent Samples ----
boxplot(tablets1[,c("Lab4","Lab7")])
x=tablets1$Lab4
y=tablets1$Lab7
# 2.1 Define a function to implement Two-sample t test ----
fcn.TwoSampleTTest<-function(x,y, conf.level=0.95, digits0=4){
# conf.level=0.95; digits0=4
x.mean=mean(x)
y.mean=mean(y)
x.var=var(x)
y.var=var(y)
x.n=length(x)
y.n=length(y)
sigmasq.pooled=((x.n-1)*x.var + (y.n-1)*y.var)/(x.n + y.n -2)
# Print out statistics for each sample and pooled estimate of standard deviation
cat("Sample Statistics:\n")
print(t(data.frame(
x.mean,
x.n,
x.stdev=sqrt(x.var),
y.mean,
y.n,
y.stdev=sqrt(y.var),
sigma.pooled=sqrt(sigmasq.pooled)
)))
cat("\n t test Computations")
mean.diff=x.mean-y.mean
mean.diff.sterr=sqrt(sigmasq.pooled)*sqrt( 1/x.n + 1/y.n)
mean.diff.tstat=mean.diff/mean.diff.sterr
tstat.df=x.n + y.n -2
mean.diff.tstat.pvalue=2*pt(-abs(mean.diff.tstat), df=tstat.df)
print(t(data.frame(
mean.diff=mean.diff,
mean.diff.sterr=mean.diff.sterr,
tstat=mean.diff.tstat,
df=tstat.df,
pvalue=mean.diff.tstat.pvalue)))
# Confidence interval for difference
alphahalf=(1-conf.level)/2.
t.critical=qt(1-alphahalf, df=tstat.df)
mean.diff.confInterval=mean.diff +c(-1,1)*mean.diff.sterr*t.critical
cat(paste("\n", 100*conf.level, " Percent Confidence Interval:\n "))
cat(paste(c(
"[", round(mean.diff.confInterval[1],digits=digits0),
",", round(mean.diff.confInterval[2],digits=digits0),"]"), collapse=""))
# invisible(return(NULL))
}
# 2.2 Apply function fcn.TwoSampleTTest ----
# Compare Lab7 to Lab4
fcn.TwoSampleTTest(tablets1$Lab7, tablets1$Lab4)
## Sample Statistics:
## [,1]
## x.mean 3.99800000
## x.n 10.00000000
## x.stdev 0.08482662
## y.mean 3.92000000
## y.n 10.00000000
## y.stdev 0.03333333
## sigma.pooled 0.06444636
##
## t test Computations [,1]
## mean.diff 0.07800000
## mean.diff.sterr 0.02882129
## tstat 2.70633286
## df 18.00000000
## pvalue 0.01445587
##
## 95 Percent Confidence Interval:
## [0.0174,0.1386]
#
# 2.3 Compare to built-in r function t.test() ----
t.test(tablets1$Lab7, tablets1$Lab4, var.equal=TRUE)
##
## Two Sample t-test
##
## data: tablets1$Lab7 and tablets1$Lab4
## t = 2.7063, df = 18, p-value = 0.01446
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.01744872 0.13855128
## sample estimates:
## mean of x mean of y
## 3.998 3.920
# 2.4 Compare to Linear Regression ----
x=tablets1$Lab4
y=tablets1$Lab7
# Create yvec and xmat matrix
yvec=c(x,y)
xmat.col1=0*yvec +1
xmat.col2=c(0*x, (1 + 0*y))
xmat=cbind(xmat.col1, xmat.col2)
plot(xmat.col2, yvec)
# 2.4.1 Fit linear regression
lmfit=lm(yvec ~ xmat.col2,x=TRUE, y=TRUE)
abline(lmfit, col='green')
# x matrix used by lm() is same as xmat
# print(abs(xmat-lmfit$x))
print(summary(lmfit))
##
## Call:
## lm(formula = yvec ~ xmat.col2, x = TRUE, y = TRUE)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.1880 -0.0245 0.0010 0.0440 0.1020
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.92000 0.02038 192.348 <2e-16 ***
## xmat.col2 0.07800 0.02882 2.706 0.0145 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06445 on 18 degrees of freedom
## Multiple R-squared: 0.2892, Adjusted R-squared: 0.2497
## F-statistic: 7.324 on 1 and 18 DF, p-value: 0.01446
# t value / P-value for xmat.col2 estimate same as two-sample t test
# F-statistic in lm() equal to square of t value
# (p-values of F and t are same)