# RProject12_ChisquareTest.r

# De Veaux, Velleman and Bock (2014) Example

# 1.  Tattoo/HepC  Two-Way Table ----
tableA=data.frame(
  HepC=rbind(TattooParlor=17,
             TattooElsewhere=8,
             NoTattoo=22),
  NoHepC=rbind(TattooParlor=35,
             TattooElsewhere=53,
             NoTattoo=491)
)
print(tableA)
##                 HepC NoHepC
## TattooParlor      17     35
## TattooElsewhere    8     53
## NoTattoo          22    491
# 
# 2. Conduct ChiSquare Test of Independence ----

# Custom function implementing chisqtest
fcn.chisqtest<-function(tableA){
  
cat("\n Two-Way Table: \n")
print(tableA)

n.total=sum(as.vector(tableA))
cat("\n Total Counts in Table:  ", n.total,"\n")

# Compute marginal probabilities of
# TattooStatus and of HepCStatus
probs.TattooStatus=rowSums(tableA)/n.total
probs.HepCStatus=colSums(tableA)/n.total
cat("\n  MLEs of  row level probabilities\n")
print(probs.TattooStatus)
cat("\n  MLEs of  column level probabilities\n")
print(probs.HepCStatus)

# Compute table of fitted cell probabilities and
#   expected counts assuming independence of two factors
tableA.fittedprobs=as.matrix(probs.TattooStatus)%*% t(
  as.matrix(probs.HepCStatus) )
cat("\n Fitted cell probabilities assuming independence\n")
print(tableA.fittedprobs)

tableA.expected=n.total* tableA.fittedprobs
cat("\n Expected Counts assuming independence \n")
print(tableA.expected)


# Compute standardized residuals fitted table
tableA.chisqresiduals=((tableA - tableA.expected))/sqrt(tableA.expected)
cat("\n Table of Chi-Square Residuals  by cell\n")
print(tableA.chisqresiduals)

# Compute table of chi-square test statistic contributions
tableA.chisqterms=((tableA - tableA.expected)^2)/tableA.expected
cat("\n Table of Chi-Square statistic terms by cell\n")
print(tableA.chisqterms)

tableA.chisqStatistic=sum(as.vector(tableA.chisqterms))
cat("\n Chi-Square Statistic: ",tableA.chisqStatistic,"\n")
df.tableA=(nrow(tableA)-1)*(ncol(tableA)-1)
cat("\n degrees of freedom: ", df.tableA, "\n")
tableA.chisqStatistic.pvalue=1-
  pchisq(tableA.chisqStatistic, df=df.tableA)
cat("\n P-Value :  ", tableA.chisqStatistic.pvalue, "\n\n")

}

fcn.chisqtest(tableA)
## 
##  Two-Way Table: 
##                 HepC NoHepC
## TattooParlor      17     35
## TattooElsewhere    8     53
## NoTattoo          22    491
## 
##  Total Counts in Table:   626 
## 
##   MLEs of  row level probabilities
##    TattooParlor TattooElsewhere        NoTattoo 
##      0.08306709      0.09744409      0.81948882 
## 
##   MLEs of  column level probabilities
##       HepC     NoHepC 
## 0.07507987 0.92492013 
## 
##  Fitted cell probabilities assuming independence
##                        HepC     NoHepC
## TattooParlor    0.006236667 0.07683043
## TattooElsewhere 0.007316090 0.09012800
## NoTattoo        0.061527116 0.75796170
## 
##  Expected Counts assuming independence 
##                      HepC    NoHepC
## TattooParlor     3.904153  48.09585
## TattooElsewhere  4.579872  56.42013
## NoTattoo        38.515974 474.48403
## 
##  Table of Chi-Square Residuals  by cell
##                      HepC     NoHepC
## TattooParlor     6.627811 -1.8883383
## TattooElsewhere  1.598143 -0.4553290
## NoTattoo        -2.661238  0.7582168
## 
##  Table of Chi-Square statistic terms by cell
##                      HepC    NoHepC
## TattooParlor    43.927885 3.5658214
## TattooElsewhere  2.554061 0.2073245
## NoTattoo         7.082189 0.5748927
## 
##  Chi-Square Statistic:  57.91217 
## 
##  degrees of freedom:  2 
## 
##  P-Value :   2.657874e-13
# 3.  Apply built-in R function chisq.test() ----
print(chisq.test(tableA, correct=FALSE))
## Warning in chisq.test(tableA, correct = FALSE): Chi-squared approximation
## may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  tableA
## X-squared = 57.9122, df = 2, p-value = 2.658e-13
# 
# 4. Specify Two-Way Table aggregating Tattoo  ----
tableB=data.frame(
  HepC=rbind(Tattoo=25,
             NoTattoo=22),
  NoHepC=rbind(Tattoo=88,
               NoTattoo=491)
)
print(tableB)
##          HepC NoHepC
## Tattoo     25     88
## NoTattoo   22    491
#   Apply fcn.chisqtest() and chisq.test() ----
fcn.chisqtest(tableB)
## 
##  Two-Way Table: 
##          HepC NoHepC
## Tattoo     25     88
## NoTattoo   22    491
## 
##  Total Counts in Table:   626 
## 
##   MLEs of  row level probabilities
##    Tattoo  NoTattoo 
## 0.1805112 0.8194888 
## 
##   MLEs of  column level probabilities
##       HepC     NoHepC 
## 0.07507987 0.92492013 
## 
##  Fitted cell probabilities assuming independence
##                HepC    NoHepC
## Tattoo   0.01355276 0.1669584
## NoTattoo 0.06152712 0.7579617
## 
##  Expected Counts assuming independence 
##               HepC  NoHepC
## Tattoo    8.484026 104.516
## NoTattoo 38.515974 474.484
## 
##  Table of Chi-Square Residuals  by cell
##               HepC     NoHepC
## Tattoo    5.670263 -1.6155220
## NoTattoo -2.661238  0.7582168
## 
##  Table of Chi-Square statistic terms by cell
##               HepC    NoHepC
## Tattoo   32.151885 2.6099112
## NoTattoo  7.082189 0.5748927
## 
##  Chi-Square Statistic:  42.41888 
## 
##  degrees of freedom:  1 
## 
##  P-Value :   7.367551e-11
chisq.test(tableB)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  tableB
## X-squared = 39.8894, df = 1, p-value = 2.688e-10
chisq.test(tableB,correct=FALSE)
## 
##  Pearson's Chi-squared test
## 
## data:  tableB
## X-squared = 42.4189, df = 1, p-value = 7.368e-11
# 5. Specify Recidivism Study Two-Way Table ---- 

tableC=data.frame(
  ReOffended=rbind(FGC=46, Control=77),
  NoReOffence=rbind(FGC=186, Control=149))
print(tableC)
##         ReOffended NoReOffence
## FGC             46         186
## Control         77         149
#   Apply fcn.chisqtest() and chisq.test() ----

fcn.chisqtest(tableC)
## 
##  Two-Way Table: 
##         ReOffended NoReOffence
## FGC             46         186
## Control         77         149
## 
##  Total Counts in Table:   458 
## 
##   MLEs of  row level probabilities
##       FGC   Control 
## 0.5065502 0.4934498 
## 
##   MLEs of  column level probabilities
##  ReOffended NoReOffence 
##    0.268559    0.731441 
## 
##  Fitted cell probabilities assuming independence
##         ReOffended NoReOffence
## FGC      0.1360386   0.3705116
## Control  0.1325204   0.3609294
## 
##  Expected Counts assuming independence 
##         ReOffended NoReOffence
## FGC       62.30568    169.6943
## Control   60.69432    165.3057
## 
##  Table of Chi-Square Residuals  by cell
##         ReOffended NoReOffence
## FGC      -2.065737    1.251714
## Control   2.092979   -1.268221
## 
##  Table of Chi-Square statistic terms by cell
##         ReOffended NoReOffence
## FGC       4.267269    1.566788
## Control   4.380560    1.608385
## 
##  Chi-Square Statistic:  11.823 
## 
##  degrees of freedom:  1 
## 
##  P-Value :   0.0005850347
chisq.test(tableC, correct=FALSE)
## 
##  Pearson's Chi-squared test
## 
## data:  tableC
## X-squared = 11.823, df = 1, p-value = 0.000585