Re: [R] Contingency tables from data.frames

From: Jose Claudio Faria <joseclaudio.faria_at_terra.com.br>
Date: Thu 26 May 2005 - 00:10:27 EST

Gabor Grothendieck wrote:

> On 5/24/05, Jose Claudio Faria <joseclaudio.faria@terra.com.br> wrote:
> 

>>Dear list,
>>
>>I'm trying to do a set of generic functions do make contingency tables from
>>data.frames. It is just running "nice" (I'm learning R), but I think it can be
>>better.
>>
>>I would like to filter the data.frame, i.e, eliminate all not numeric variables.
>>And I don't know how to make it: please, help me.
>>
>>Below one of the my functions ('er' is a mention to EasieR, because I'm trying
>>to do a package for myself and the my students):
>>
>>#2. Tables from data.frames
>>#2.1---er.table.df.br (User define breaks and right)------------
>>er.table.df.br <- function(df,
>> breaks = c('Sturges', 'Scott', 'FD'),
>> right = FALSE) {
>>
>> if (is.data.frame(df) != 'TRUE')
>> stop('need "data.frame" data')
>>
>> dim_df <- dim(df)
>>
>> tmpList <- list()
>>
>> for (i in 1:dim_df[2]) {
>>
>> x <- as.matrix(df[ ,i])
>> x <- na.omit(x)
>>
>> k <- switch(breaks[1],
>> 'Sturges' = nclass.Sturges(x),
>> 'Scott' = nclass.scott(x),
>> 'FD' = nclass.FD(x),
>> stop("'breaks' must be 'Sturges', 'Scott' or 'FD'"))
>>
>> tmp <- range(x)
>> classIni <- tmp[1] - tmp[2]/100
>> classEnd <- tmp[2] + tmp[2]/100
>> R <- classEnd-classIni
>> h <- R/k
>>
>> # Absolut frequency
>> f <- table(cut(x, br = seq(classIni, classEnd, h), right = right))
>>
>> # Relative frequency
>> fr <- f/length(x)
>>
>> # Relative frequency, %
>> frP <- 100*(f/length(x))
>>
>> # Cumulative frequency
>> fac <- cumsum(f)
>>
>> # Cumulative frequency, %
>> facP <- 100*(cumsum(f/length(x)))
>>
>> fi <- round(f, 2)
>> fr <- round(as.numeric(fr), 2)
>> frP <- round(as.numeric(frP), 2)
>> fac <- round(as.numeric(fac), 2)
>> facP <- round(as.numeric(facP),2)
>>
>> # Table
>> res <- data.frame(fi, fr, frP, fac, facP)
>> names(res) <- c('Class limits', 'fi', 'fr', 'fr(%)', 'fac', 'fac(%)')
>> tmpList <- c(tmpList, list(res))
>> }
>> names(tmpList) <- names(df)
>> return(tmpList)
>>}
>>
>>To try the function:
>>
>>#a) runing nice
>>y1=rnorm(100, 10, 1)
>>y2=rnorm(100, 58, 4)
>>y3=rnorm(100, 500, 10)
>>mydf=data.frame(y1, y2, y3)
>>#tbdf=er.table.df.br (mydf, breaks = 'Sturges', right=F)
>>#tbdf=er.table.df.br (mydf, breaks = 'Scott', right=F)
>>tbdf=er.table.df.br (mydf, breaks = 'FD', right=F)
>>print(tbdf)
>>
>>
>>#b) One of the problems
>>y1=rnorm(100, 10, 1)
>>y2=rnorm(100, 58, 4)
>>y3=rnorm(100, 500, 10)
>>y4=rep(letters[1:10], 10)
>>mydf=data.frame(y1, y2, y3, y4)
>>tbdf=er.table.df.br (mydf, breaks = 'Scott', right=F)
>>print(tbdf)
>>
> 
> 
> Try this:
> 
> sapply(my.data.frame, is.numeric)
> 
> Also you might want to look up:
> 
> ?match.arg
> ?stopifnot
> ?ncol
> ?sapply
> ?lapply
> 

Thanks Gabor, you suggestion solve my basic problem. I'm working is same basic (but I think useful) functions for begginiers.

Below you can see the set of functions:

#######################

# EasyeR - Package #
#######################

# Common function---------------------------------------------------------------
er.make.table <- function(x,
                           classIni,
                           classEnd,
                           h,
                           right) {

   # Absolut frequency
   f <- table(cut(x, br = seq(classIni, classEnd, h), right = right))

   # Relative frequency
   fr <- f/length(x)

   # Relative frequency, %
   frP <- 100*(f/length(x))

   # Cumulative frequency
   fac <- cumsum(f)

   # Cumulative frequency, %
   facP <<- 100*(cumsum(f/length(x)))

   fi   <- round(f, 2)
   fr   <- round(as.numeric(fr), 2)
   frP  <- round(as.numeric(frP), 2)
   fac  <- round(as.numeric(fac), 2)
   facP <- round(as.numeric(facP),2)

   # Table
   res <- data.frame(fi, fr, frP, fac, facP)    names(res) <- c('Class limits', 'fi', 'fr', 'fr(%)', 'fac', 'fac(%)')    return(res)
}

#1. Tables from vectors
#1.1---er.table.br (User define breaks and right)-------------------------------
er.table.br <- function(x,
                         breaks = c('Sturges', 'Scott', 'FD'),
                         right = FALSE) {

   if (is.factor(x) || mode(x) != 'numeric') stop('need numeric data')

   x <- na.omit(x)

   k <- switch(breaks[1],

               'Sturges' = nclass.Sturges(x),
               'Scott'   = nclass.scott(x),
               'FD'      = nclass.FD(x),
               stop("'breaks' must be 'Sturges', 'Scott' or 'FD'"))

   tmp      <- range(x)
   classIni <- tmp[1] - abs(tmp[2])/100
   classEnd <- tmp[2] + abs(tmp[2])/100
   R        <- classEnd-classIni
   h        <- R/k

   tbl <- er.make.table(x, classIni, classEnd, h, right)    return(tbl)
}

#1.2---er.table.kr (User define the class number (k) and right)-----------------
er.table.kr <- function(x,
                         k,
                         right = FALSE) {

   if (is.factor(x) || mode(x) != 'numeric') stop('need numeric data')    if ((k == '') || (k == ' ')) stop('k not defined')

   x <- na.omit(x)

   tmp      <- range(x)
   classIni <- tmp[1] - abs(tmp[2])/100
   classEnd <- tmp[2] + abs(tmp[2])/100
   R        <- classEnd-classIni
   h        <- R/k

   tbl <- er.make.table(x, classIni, classEnd, h, right)    return(tbl)
}

#1.3---er.table.ier (User define the classIni, classEnd and right)-------------- er.table.ier <- function(x,

                          classIni,
                          classEnd,
                          right = FALSE) {

   if (is.factor(x) || mode(x) != 'numeric') stop('need numeric data')
   if ((classIni == '') || (classIni == ' ')) stop('classIni not defined')    if ((classEnd == '') || (classEnd == ' ')) stop('classEnd not defined')

   x <- na.omit(x)

   tmp <- range(x)
   R   <- classEnd-classIni
   k   <- sqrt(abs(R))

   if (k < 5) k <- 5
   h <- R/k

   tbl <- er.make.table(x, classIni, classEnd, h, right)    return(tbl)
}

#1.4---er.table.all (User define classIni, ClassEnd, h and right)--------------- er.table.iehr <- function(x,

                          classIni,
                          classEnd,
                          h,
                          right=FALSE) {

   if (is.factor(x) || mode(x) != 'numeric') stop('need numeric data')
   if ((classIni == '') || (classIni == ' ')) stop('classIni not defined')
   if ((classEnd == '') || (classEnd == ' ')) stop('classEnd not defined')
   if ((h == '')        || (h == ' '))        stop('h not defined')

   x <- na.omit(x)

   tbl <- er.make.table(x, classIni, classEnd, h, right)    return(tbl)
}

#2. Tables from data.frames
#2.1---er.table.df.br (User define breaks and right)----------------------------
er.table.df.br <- function(df,
                            breaks = c('Sturges', 'Scott', 'FD'),
                            right = FALSE) {

   tmpList <- list()

   if (is.data.frame(df) != 'TRUE') stop('need "data.frame" data')

   logCol <- sapply(df, is.numeric)
   dim_df <- dim(df)

   for (i in 1:dim_df[2]) {

     if (logCol[i]!=FALSE) {
       x <- as.matrix(df[ ,i])
       x <- na.omit(x)

       k <- switch(breaks[1],
                   'Sturges' = nclass.Sturges(x),
                   'Scott'   = nclass.scott(x),
                   'FD'      = nclass.FD(x),
                   stop("'breaks' must be 'Sturges', 'Scott' or 'FD'"))

       tmp      <- range(x)
       classIni <- tmp[1] - abs(tmp[2])/100
       classEnd <- tmp[2] + abs(tmp[2])/100
       R        <- classEnd-classIni
       h        <- R/k

       tbl <- er.make.table(x, classIni, classEnd, h, right)
       tmpList <- c(tmpList, list(tbl))
     }

   }
   valCol <- logCol[logCol!=FALSE]
   names(tmpList) <- names(valCol)
   return(tmpList)
}

#2.2---er.table.df.kr (User define the class number (k) and right)-------------- er.table.df.kr <- function(df,

                            k,
                            right = FALSE) {

   if ((k == '') || (k == ' ')) stop('k not defined')    if (is.data.frame(df) != 'TRUE') stop('need "data.frame" data')

   tmpList <- list()

   logCol <- sapply(df, is.numeric)
   dim_df <- dim(df)

   for (i in 1:dim_df[2]) {

     if (logCol[i]!=FALSE) {
       x <- as.matrix(df[ ,i])
       x <- na.omit(x)

       tmp      <- range(x)
       classIni <- tmp[1] - abs(tmp[2])/100
       classEnd <- tmp[2] + abs(tmp[2])/100
       R        <- classEnd-classIni
       h        <- R/k

       tbl <- er.make.table(x, classIni, classEnd, h, right)
       tmpList <- c(tmpList, list(tbl))
     }

   }
   valCol <- logCol[logCol!=FALSE]
   names(tmpList) <- names(valCol)
   return(tmpList)
}

Best regards,

-- 
Jose Claudio Faria
Brasil/Bahia/UESC/DCET
Estatistica Experimental/Prof. Adjunto
mails:
  joseclaudio.faria@terra.com.br
  jc_faria@uesc.br
  jc_faria@uol.com.br
tel: 73-3634.2779

______________________________________________
R-help@stat.math.ethz.ch mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide! http://www.R-project.org/posting-guide.html
Received on Thu May 26 00:23:06 2005

This archive was generated by hypermail 2.1.8 : Fri 03 Mar 2006 - 03:32:05 EST