# RProject12_ChisquareTest.r
# De Veaux, Velleman and Bock (2014) Example
# 1. Tattoo/HepC Two-Way Table ----
tableA=data.frame(
HepC=rbind(TattooParlor=17,
TattooElsewhere=8,
NoTattoo=22),
NoHepC=rbind(TattooParlor=35,
TattooElsewhere=53,
NoTattoo=491)
)
print(tableA)
## HepC NoHepC
## TattooParlor 17 35
## TattooElsewhere 8 53
## NoTattoo 22 491
#
# 2. Conduct ChiSquare Test of Independence ----
# Custom function implementing chisqtest
fcn.chisqtest<-function(tableA){
cat("\n Two-Way Table: \n")
print(tableA)
n.total=sum(as.vector(tableA))
cat("\n Total Counts in Table: ", n.total,"\n")
# Compute marginal probabilities of
# TattooStatus and of HepCStatus
probs.TattooStatus=rowSums(tableA)/n.total
probs.HepCStatus=colSums(tableA)/n.total
cat("\n MLEs of row level probabilities\n")
print(probs.TattooStatus)
cat("\n MLEs of column level probabilities\n")
print(probs.HepCStatus)
# Compute table of fitted cell probabilities and
# expected counts assuming independence of two factors
tableA.fittedprobs=as.matrix(probs.TattooStatus)%*% t(
as.matrix(probs.HepCStatus) )
cat("\n Fitted cell probabilities assuming independence\n")
print(tableA.fittedprobs)
tableA.expected=n.total* tableA.fittedprobs
cat("\n Expected Counts assuming independence \n")
print(tableA.expected)
# Compute standardized residuals fitted table
tableA.chisqresiduals=((tableA - tableA.expected))/sqrt(tableA.expected)
cat("\n Table of Chi-Square Residuals by cell\n")
print(tableA.chisqresiduals)
# Compute table of chi-square test statistic contributions
tableA.chisqterms=((tableA - tableA.expected)^2)/tableA.expected
cat("\n Table of Chi-Square statistic terms by cell\n")
print(tableA.chisqterms)
tableA.chisqStatistic=sum(as.vector(tableA.chisqterms))
cat("\n Chi-Square Statistic: ",tableA.chisqStatistic,"\n")
df.tableA=(nrow(tableA)-1)*(ncol(tableA)-1)
cat("\n degrees of freedom: ", df.tableA, "\n")
tableA.chisqStatistic.pvalue=1-
pchisq(tableA.chisqStatistic, df=df.tableA)
cat("\n P-Value : ", tableA.chisqStatistic.pvalue, "\n\n")
}
fcn.chisqtest(tableA)
##
## Two-Way Table:
## HepC NoHepC
## TattooParlor 17 35
## TattooElsewhere 8 53
## NoTattoo 22 491
##
## Total Counts in Table: 626
##
## MLEs of row level probabilities
## TattooParlor TattooElsewhere NoTattoo
## 0.08306709 0.09744409 0.81948882
##
## MLEs of column level probabilities
## HepC NoHepC
## 0.07507987 0.92492013
##
## Fitted cell probabilities assuming independence
## HepC NoHepC
## TattooParlor 0.006236667 0.07683043
## TattooElsewhere 0.007316090 0.09012800
## NoTattoo 0.061527116 0.75796170
##
## Expected Counts assuming independence
## HepC NoHepC
## TattooParlor 3.904153 48.09585
## TattooElsewhere 4.579872 56.42013
## NoTattoo 38.515974 474.48403
##
## Table of Chi-Square Residuals by cell
## HepC NoHepC
## TattooParlor 6.627811 -1.8883383
## TattooElsewhere 1.598143 -0.4553290
## NoTattoo -2.661238 0.7582168
##
## Table of Chi-Square statistic terms by cell
## HepC NoHepC
## TattooParlor 43.927885 3.5658214
## TattooElsewhere 2.554061 0.2073245
## NoTattoo 7.082189 0.5748927
##
## Chi-Square Statistic: 57.91217
##
## degrees of freedom: 2
##
## P-Value : 2.657874e-13
# 3. Apply built-in R function chisq.test() ----
print(chisq.test(tableA, correct=FALSE))
## Warning in chisq.test(tableA, correct = FALSE): Chi-squared approximation
## may be incorrect
##
## Pearson's Chi-squared test
##
## data: tableA
## X-squared = 57.9122, df = 2, p-value = 2.658e-13
#
# 4. Specify Two-Way Table aggregating Tattoo ----
tableB=data.frame(
HepC=rbind(Tattoo=25,
NoTattoo=22),
NoHepC=rbind(Tattoo=88,
NoTattoo=491)
)
print(tableB)
## HepC NoHepC
## Tattoo 25 88
## NoTattoo 22 491
# Apply fcn.chisqtest() and chisq.test() ----
fcn.chisqtest(tableB)
##
## Two-Way Table:
## HepC NoHepC
## Tattoo 25 88
## NoTattoo 22 491
##
## Total Counts in Table: 626
##
## MLEs of row level probabilities
## Tattoo NoTattoo
## 0.1805112 0.8194888
##
## MLEs of column level probabilities
## HepC NoHepC
## 0.07507987 0.92492013
##
## Fitted cell probabilities assuming independence
## HepC NoHepC
## Tattoo 0.01355276 0.1669584
## NoTattoo 0.06152712 0.7579617
##
## Expected Counts assuming independence
## HepC NoHepC
## Tattoo 8.484026 104.516
## NoTattoo 38.515974 474.484
##
## Table of Chi-Square Residuals by cell
## HepC NoHepC
## Tattoo 5.670263 -1.6155220
## NoTattoo -2.661238 0.7582168
##
## Table of Chi-Square statistic terms by cell
## HepC NoHepC
## Tattoo 32.151885 2.6099112
## NoTattoo 7.082189 0.5748927
##
## Chi-Square Statistic: 42.41888
##
## degrees of freedom: 1
##
## P-Value : 7.367551e-11
chisq.test(tableB)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: tableB
## X-squared = 39.8894, df = 1, p-value = 2.688e-10
chisq.test(tableB,correct=FALSE)
##
## Pearson's Chi-squared test
##
## data: tableB
## X-squared = 42.4189, df = 1, p-value = 7.368e-11
# 5. Specify Recidivism Study Two-Way Table ----
tableC=data.frame(
ReOffended=rbind(FGC=46, Control=77),
NoReOffence=rbind(FGC=186, Control=149))
print(tableC)
## ReOffended NoReOffence
## FGC 46 186
## Control 77 149
# Apply fcn.chisqtest() and chisq.test() ----
fcn.chisqtest(tableC)
##
## Two-Way Table:
## ReOffended NoReOffence
## FGC 46 186
## Control 77 149
##
## Total Counts in Table: 458
##
## MLEs of row level probabilities
## FGC Control
## 0.5065502 0.4934498
##
## MLEs of column level probabilities
## ReOffended NoReOffence
## 0.268559 0.731441
##
## Fitted cell probabilities assuming independence
## ReOffended NoReOffence
## FGC 0.1360386 0.3705116
## Control 0.1325204 0.3609294
##
## Expected Counts assuming independence
## ReOffended NoReOffence
## FGC 62.30568 169.6943
## Control 60.69432 165.3057
##
## Table of Chi-Square Residuals by cell
## ReOffended NoReOffence
## FGC -2.065737 1.251714
## Control 2.092979 -1.268221
##
## Table of Chi-Square statistic terms by cell
## ReOffended NoReOffence
## FGC 4.267269 1.566788
## Control 4.380560 1.608385
##
## Chi-Square Statistic: 11.823
##
## degrees of freedom: 1
##
## P-Value : 0.0005850347
chisq.test(tableC, correct=FALSE)
##
## Pearson's Chi-squared test
##
## data: tableC
## X-squared = 11.823, df = 1, p-value = 0.000585