Re: [R] 3-dimensional table

From: Jim Lemon <bitwrit_at_ozemail.com.au>
Date: Tue 07 Feb 2006 - 06:28:39 EST

Jeffrey Stratford wrote:
> Hi,
>
> Last week my class conducted an experiment by putting out clay
> caterpillars to look at the effects of urbanization, color, and location
> on caterpillar predation. There were two sites (urban, rural), three
> colors (green, yellow, red) and two locations at each site (edge,
> interior). The entire data set is below. I've checked out the MASS
> book, Dalgaard's book, and the R-help archives and I haven't found
> anything that suggests how to set up a spreadsheet for the xtab function
> (say, xtab(predation ~ location + site + color, data=class). It would
> not be a problem to input the data by hand but I wouldn't know how to
> set that up either. Any suggestions would be greatly appreciated. The
> class is mostly college sophmores and juniors and biology and education
> majors. We are using R 2.2.1 on Windows XP.
>
Hi Jeff,

For some unknown reason, you used the name of a function that I wrote some years ago that may do what you want. Just call it the way you have above and it should work.

Jim

# xtab.format attempts to display a
# conventionally formatted 2D crosstabulation with
# optional phi coefficient and chi-squared test.

xtab.format<-function(v1,v2,dataframe,dnn=NULL,fieldwidth=10,chisq=FALSE,  phi=FALSE,include.na=FALSE) {
 if(!missing(v1) && !missing(v2)) {
  # get the table of frequencies
  if(!missing(dataframe))
   basetab<-table(dataframe[[v1]],dataframe[[v2]])   else basetab<-table(v1,v2)
  # row and column names
  row.names<-rownames(basetab)
  missing.index<-which(row.names == "")
  if(length(missing.index)) {
   if(include.na) {
    row.names[missing.index]<-"NA"
   }
   else {
    basetab<-basetab[-missing.index,]
    row.names<-row.names[-missing.index]    }
  }
  row.names<-formatC(row.names,width=fieldwidth)   col.names<-colnames(basetab)
  missing.index<-which(col.names == "")
  if(length(missing.index)) {
   if(include.na) {
    col.names[missing.index]<-"NA"
   }
   else {
    basetab<-basetab[,-missing.index]
    col.names<-col.names[-missing.index]    }
  }
  col.names<-formatC(col.names,width=fieldwidth)   # get the dimension of the current table   btdim<-dim(basetab)
  # create two empty vectors to hold each row of row and column percentages   row.pc<-col.pc<-vector("numeric",btdim[2])   # row and column sums
  row.sums<-apply(basetab,1,sum)
  col.sums<-apply(basetab,2,sum)
  grand.total<-sum(row.sums)
  rowlabelspace<-paste(rep(" ",nchar(row.names[1])),sep="",collapse="")   # make sure that there are some sort of dimension labels   if(is.null(dnn)) {
   dnn<-formatC(c(deparse(substitute(v1)),deparse(substitute(v2))),     width=fieldwidth)
  }
  else dnn<-formatC(dnn,width=fieldwidth)   # display the header
  cat("Crosstabulation of",dnn[1],"by",dnn[2],"\n")   # display the column dimension label
  cat(rowlabelspace,dnn[2],"\n")
  # display the column labels
  cat(rowlabelspace,col.names,"\n")
  # display the row dimension label
  cat(dnn[1],"\n")
  # now display each row of frequencies with its label and row sum   # followed by the percentages
  for(i in 1:btdim[1]) {
   for(j in 1:btdim[2]) {
    row.pc[j]<-ifelse(row.sums[i],100*basetab[i,j]/row.sums[i],0)     col.pc[j]<-ifelse(col.sums[j],100*basetab[i,j]/col.sums[j],0)    }
   cat(row.names[i],formatC(basetab[i,],width=fieldwidth),     formatC(row.sums[i],width=fieldwidth),"\n")    cat(rowlabelspace,formatC(round(row.pc,2),width=fieldwidth),     formatC(round(100*row.sums[i]/grand.total,2),width=fieldwidth),"\n")    cat(rowlabelspace,formatC(round(col.pc,2),width=fieldwidth),"\n\n")   }
  # display the column sums and grand total   cat(rowlabelspace,formatC(col.sums,width=fieldwidth),    formatC(grand.total,width=fieldwidth),"\n")   # display the column percentages
  cat(rowlabelspace,
   formatC(round(100*col.sums/grand.total,2),width=fieldwidth),"\n\n")   # do the chi squared if it was ordered   if(chisq) {
   chisq.obs<-chisq.test(basetab)
   cat(names(chisq.obs$statistic),"=",
    round(chisq.obs$statistic,3),
    names(chisq.obs$parameter),"=",
    chisq.obs$parameter,"p =",
    round(chisq.obs$p.value,5),"\n\n")
  }
  if(phi) {
   if(btdim[1] == 2 && btdim[2] == 2) {
    num<-basetab[1,1]*basetab[2,2] - basetab[1,2]*basetab[2,1]     denom<-sqrt(row.sums[1]*row.sums[2]*col.sums[1]*col.sums[2])     cat("phi =",num/denom,"\n")
   }
   else cat("phi coefficient only valid for 2x2 table\n")   }
  invisible(basetab)
 }
 else cat("Usage: xtab.format(v1, v2, dataframe[, dnn=NULL, fieldwidth = 10, chisq = FALSE, phi = FALSE])\n") }

# xtab will try to break down the formula passed to it into
# one or more 2D crosstabulations with hierarchical counts
# for higher level factors.

xtab<-function(formula,dataframe,dnn=NULL,fieldwidth=10,chisq=FALSE,phi=FALSE,  include.na=FALSE) {
 if(!missing(formula) && !missing(dataframe)) {   xt<-as.character(attr(terms(formula),"variables")[-1])   nxt<-length(xt)
  if(nxt > 2) {
   by.factor<-as.factor(dataframe[[xt[nxt]]])    factor.levels<-levels(by.factor)
   nlevels<-length(factor.levels)
   brkstats<-as.list(rep(0,nlevels))
   names(brkstats)<-factor.levels
   for(i in 1:nlevels) {
    currentdata<-subset(dataframe,by.factor == factor.levels[i])     currentcount<-length(currentdata[[nxt]])     totalcount<-length(dataframe[[nxt]])     cat("\nCount for",xt[nxt],"=",factor.levels[i],"is",      currentcount,"(",round(100*currentcount/totalcount,1),"%)\n\n")     rightside <-ifelse(nxt > 3,paste(xt[2:(nxt-1)],sep="",collapse="+"),xt[2])     next.formula<-
     as.formula(paste(xt[1],rightside,sep="~",collapse=""))     xtab(next.formula,currentdata,dnn,fieldwidth,chisq)    }
  }
  else xtab.format(xt[1],xt[2],dataframe,dnn,fieldwidth,chisq,include.na)  }
 else cat("Usage: xtab(formula, dataframe[, dnn=NULL, fieldwidth = 10, chisq = FALSE, include.na = FALSE])\n") }

\name{xtab}
\alias{xtab}
\title{

 Crosstabulate variables
}
\description{
 Crosstabulates variables with small numbers of unique values. }
\usage{
 xtab(formula,dataframe,dnn=NULL,fieldwidth=10,chisq=FALSE,phi=FALSE,   include.na=FALSE)
}
\arguments{
  \item{formula}{a formula containing the variables to be crosstabulated}
  \item{dataframe}{the data frame from which to select the variables}
  \item{dnn}{labels for the variables}
  \item{fieldwidth}{the width of each cell in the display}
  \item{chisq}{logical - whether to apply a chi squared test to the table(s)}
  \item{phi}{whether to calculate and display the phi coefficient
   of association - only for 2x2 tables}   \item{include.na}{whether to include rows or columns of NAs in the table(s).} }
\details{
 \code{xtab} will accept a formula referring to columns in a data frame.  It calls \code{table} for the calculations and displays one or more tables  of results by calling \code{xtab.format}. }
\value{nil}
\author{Jim Lemon}
\seealso{\code{\link{table}},\code{\link{xtab.format}}}
\examples{

 test.df<-data.frame(sex=sample(c("MALE","FEMALE"),100,TRUE),   suburb=sample(1:4,100,TRUE),social.type=sample(LETTERS[1:4],100,TRUE))  xtab(sex~suburb+social.type,test.df)
}
\keyword{misc}



R-help@stat.math.ethz.ch mailing list
https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide! http://www.R-project.org/posting-guide.html Received on Mon Feb 06 14:30:11 2006

This archive was generated by hypermail 2.1.8 : Tue 07 Feb 2006 - 00:53:16 EST