# Rproject11_Tablets_TwoSampleT.r
# 1.0 Read in data ----
# See Example 12.2A of Rice
#
# Measurements of chlorpheniramine maleate in tablets made by seven laboratories
# Nominal dosage equal to 4mg
# 10 measurements per laboratory
#
# Sources of variability
# within labs
# between labs
# Note: read.table has trouble parsing header row
if (FALSE){tablets1=read.table(file="Rice 3e Datasets/ASCII Comma/Chapter 12/tablets1.txt",
sep=",",stringsAsFactors = FALSE, quote="\'",
header=TRUE)
}
# Read in matrix and label columns
tablets1=read.table(file="Rice 3e Datasets/ASCII Comma/Chapter 12/tablets1.txt",
sep=",",stringsAsFactors = FALSE, skip=1,
header=FALSE)
dimnames(tablets1)[[2]]<-paste("Lab",c(1:7),sep="")
tablets1
## Lab1 Lab2 Lab3 Lab4 Lab5 Lab6 Lab7
## 1 4.13 3.86 4.00 3.88 4.02 4.02 4.00
## 2 4.07 3.85 4.02 3.88 3.95 3.86 4.02
## 3 4.04 4.08 4.01 3.91 4.02 3.96 4.03
## 4 4.07 4.11 4.01 3.95 3.89 3.97 4.04
## 5 4.05 4.08 4.04 3.92 3.91 4.00 4.10
## 6 4.04 4.01 3.99 3.97 4.01 3.82 3.81
## 7 4.02 4.02 4.03 3.92 3.89 3.98 3.91
## 8 4.06 4.04 3.97 3.90 3.89 3.99 3.96
## 9 4.10 3.97 3.98 3.97 3.99 4.02 4.05
## 10 4.04 3.95 3.98 3.90 4.00 3.93 4.06
# Replicate Figure 12.1 of Rice
boxplot(tablets1)
# 2.1 Define a function to implement Two-sample t test ----
fcn.TwoSampleTTest<-function(x,y, conf.level=0.95, digits0=4){
# conf.level=0.95; digits0=4
x.mean=mean(x)
y.mean=mean(y)
x.var=var(x)
y.var=var(y)
x.n=length(x)
y.n=length(y)
sigmasq.pooled=((x.n-1)*x.var + (y.n-1)*y.var)/(x.n + y.n -2)
# Print out statistics for each sample and pooled estimate of standard deviation
cat("Sample Statistics:\n")
print(t(data.frame(
x.mean,
x.n,
x.stdev=sqrt(x.var),
y.mean,
y.n,
y.stdev=sqrt(y.var),
sigma.pooled=sqrt(sigmasq.pooled)
)))
cat("\n t test Computations")
mean.diff=x.mean-y.mean
mean.diff.sterr=sqrt(sigmasq.pooled)*sqrt( 1/x.n + 1/y.n)
mean.diff.tstat=mean.diff/mean.diff.sterr
tstat.df=x.n + y.n -2
mean.diff.tstat.pvalue=2*pt(-abs(mean.diff.tstat), df=tstat.df)
print(t(data.frame(
mean.diff=mean.diff,
mean.diff.sterr=mean.diff.sterr,
tstat=mean.diff.tstat,
df=tstat.df,
pvalue=mean.diff.tstat.pvalue)))
# Confidence interval for difference
alphahalf=(1-conf.level)/2.
t.critical=qt(1-alphahalf, df=tstat.df)
mean.diff.confInterval=mean.diff +c(-1,1)*mean.diff.sterr*t.critical
cat(paste("\n", 100*conf.level, " Percent Confidence Interval:\n "))
cat(paste(c(
"[", round(mean.diff.confInterval[1],digits=digits0),
",", round(mean.diff.confInterval[2],digits=digits0),"]"), collapse=""))
# invisible(return(NULL))
}
# 2.2 Apply function fcn.TwoSampleTTest ----
# Compare Lab7 to Lab4
fcn.TwoSampleTTest(tablets1$Lab7, tablets1$Lab4)
## Sample Statistics:
## [,1]
## x.mean 3.99800000
## x.n 10.00000000
## x.stdev 0.08482662
## y.mean 3.92000000
## y.n 10.00000000
## y.stdev 0.03333333
## sigma.pooled 0.06444636
##
## t test Computations [,1]
## mean.diff 0.07800000
## mean.diff.sterr 0.02882129
## tstat 2.70633286
## df 18.00000000
## pvalue 0.01445587
##
## 95 Percent Confidence Interval:
## [0.0174,0.1386]
# Compare output to t.test
t.test(tablets1$Lab7, tablets1$Lab4, var.equal=TRUE)
##
## Two Sample t-test
##
## data: tablets1$Lab7 and tablets1$Lab4
## t = 2.7063, df = 18, p-value = 0.01446
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.01744872 0.13855128
## sample estimates:
## mean of x mean of y
## 3.998 3.920
# 3.1 Conduct One-Way ANOVA ----
# Analysis of Variance with R function aov()
# Create vector variables
# yvec = Dependent variable
yvec=as.vector(data.matrix(tablets1))
# xvec.factor.Lab = Independent variable (of factor type in R)
xvec.factor.Lab=as.factor(as.vector(col(data.matrix(tablets1))))
table(xvec.factor.Lab)
## xvec.factor.Lab
## 1 2 3 4 5 6 7
## 10 10 10 10 10 10 10
plot(as.numeric(xvec.factor.Lab), yvec)
# One-way ANOVA using r function aov()
tablets1.aov<-aov(yvec ~ xvec.factor.Lab)
tablets1.aov.summary<-summary(tablets1.aov)
# Replicate Table in Example 12.2.A, p. 483 of Rice
print(tablets1.aov.summary)
## Df Sum Sq Mean Sq F value Pr(>F)
## xvec.factor.Lab 6 0.1247 0.020790 5.66 9.45e-05 ***
## Residuals 63 0.2314 0.003673
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Construct model tables
tablets1.aov.model.tables<-model.tables(tablets1.aov, type="means",se=TRUE)
print(tablets1.aov.model.tables)
## Tables of means
## Grand mean
##
## 3.984571
##
## xvec.factor.Lab
## xvec.factor.Lab
## 1 2 3 4 5 6 7
## 4.062 3.997 4.003 3.920 3.957 3.955 3.998
##
## Standard errors for differences of means
## xvec.factor.Lab
## 0.0271
## replic. 10
# Validate standard error for difference of means
ResidualsMeanSq=0.003673
J=nrow(tablets1)
sqrt(ResidualsMeanSq/J) # standard error of each mean
## [1] 0.01916507
sqrt(ResidualsMeanSq/J)*sqrt(2) # standard error of difference of two means
## [1] 0.02710351
#tablets1.aov.model.tables<-model.tables(tablets1.aov, type="effects",se=TRUE)
#print(tablets1.aov.model.tables)
# 3.2 Confidence intervals for pairwise differences ----
# Create confidence intervals on differences between means
# Studentized range statistic
# Tukey's 'Honest Significant Difference' method
# Apply R function TukeyHSD():
TukeyHSD(tablets1.aov)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = yvec ~ xvec.factor.Lab)
##
## $xvec.factor.Lab
## diff lwr upr p adj
## 2-1 -0.065 -0.147546752 0.017546752 0.2165897
## 3-1 -0.059 -0.141546752 0.023546752 0.3226101
## 4-1 -0.142 -0.224546752 -0.059453248 0.0000396
## 5-1 -0.105 -0.187546752 -0.022453248 0.0045796
## 6-1 -0.107 -0.189546752 -0.024453248 0.0036211
## 7-1 -0.064 -0.146546752 0.018546752 0.2323813
## 3-2 0.006 -0.076546752 0.088546752 0.9999894
## 4-2 -0.077 -0.159546752 0.005546752 0.0830664
## 5-2 -0.040 -0.122546752 0.042546752 0.7578129
## 6-2 -0.042 -0.124546752 0.040546752 0.7140108
## 7-2 0.001 -0.081546752 0.083546752 1.0000000
## 4-3 -0.083 -0.165546752 -0.000453248 0.0478900
## 5-3 -0.046 -0.128546752 0.036546752 0.6204148
## 6-3 -0.048 -0.130546752 0.034546752 0.5720976
## 7-3 -0.005 -0.087546752 0.077546752 0.9999964
## 5-4 0.037 -0.045546752 0.119546752 0.8178759
## 6-4 0.035 -0.047546752 0.117546752 0.8533629
## 7-4 0.078 -0.004546752 0.160546752 0.0760155
## 6-5 -0.002 -0.084546752 0.080546752 1.0000000
## 7-5 0.041 -0.041546752 0.123546752 0.7362355
## 7-6 0.043 -0.039546752 0.125546752 0.6912252
#
# Compare Tukey HSD confidence interval for Lab7 vs Lab4
fcn.TwoSampleTTest(tablets1$Lab7,tablets1$Lab4)
## Sample Statistics:
## [,1]
## x.mean 3.99800000
## x.n 10.00000000
## x.stdev 0.08482662
## y.mean 3.92000000
## y.n 10.00000000
## y.stdev 0.03333333
## sigma.pooled 0.06444636
##
## t test Computations [,1]
## mean.diff 0.07800000
## mean.diff.sterr 0.02882129
## tstat 2.70633286
## df 18.00000000
## pvalue 0.01445587
##
## 95 Percent Confidence Interval:
## [0.0174,0.1386]
# Note P-value from t-test is 0.0144 while TukeyHSD is 0.076
# 4. Implement One-Way Anova with lm() ----
lmfit.oneway.Lab=lm(yvec ~ xvec.factor.Lab,x=TRUE, y=TRUE)
lmfit.oneway.Lab.summary<-summary(lmfit.oneway.Lab)
print(lmfit.oneway.Lab.summary)
##
## Call:
## lm(formula = yvec ~ xvec.factor.Lab, x = TRUE, y = TRUE)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.1880 -0.0245 0.0060 0.0410 0.1130
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.06200 0.01917 211.948 < 2e-16 ***
## xvec.factor.Lab2 -0.06500 0.02710 -2.398 0.019449 *
## xvec.factor.Lab3 -0.05900 0.02710 -2.177 0.033248 *
## xvec.factor.Lab4 -0.14200 0.02710 -5.239 1.99e-06 ***
## xvec.factor.Lab5 -0.10500 0.02710 -3.874 0.000257 ***
## xvec.factor.Lab6 -0.10700 0.02710 -3.948 0.000201 ***
## xvec.factor.Lab7 -0.06400 0.02710 -2.361 0.021316 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06061 on 63 degrees of freedom
## Multiple R-squared: 0.3503, Adjusted R-squared: 0.2884
## F-statistic: 5.66 on 6 and 63 DF, p-value: 9.453e-05
# Compare regression output from lm() with anova output from aov()
print(tablets1.aov.summary)
## Df Sum Sq Mean Sq F value Pr(>F)
## xvec.factor.Lab 6 0.1247 0.020790 5.66 9.45e-05 ***
## Residuals 63 0.2314 0.003673
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Residual standard error equals root (Mean Sq for Residuals)
print(lmfit.oneway.Lab.summary$sigma)
## [1] 0.06060541
print((lmfit.oneway.Lab.summary$sigma)^2)
## [1] 0.003673016
# Compare to Mean Sq for Residuals in tablets1.aov.summary
# F-statistic/degrees of freedom/P-values are same