[R] Tables: Invitation to make a collective package

From: Jose Claudio Faria <joseclaudio.faria_at_terra.com.br>
Date: Thu 07 Jul 2005 - 21:57:01 EST


Hi All,

I would like to make an invitation to make a collective package with all functions related to TABLES.

I know that there are many packages with these functions, the original idea is collect all this functions and to make a single package, because is arduous for the user know all this functions broadcast in many packages.

So, I think that the original packages can continue with its original functions, but, is very good to know that exist one package with many (I dream all) the functions related to tables.

I've been working with these functions (while I am learning R programming):

#######################
# Tables - Package #
#######################

#
# 1. Tables
#

#
# Common function
#

tb.make.table.I <- function(x,

                             start,
                             end,
                             h,
                             right)

{
   f    <- table(cut(x, br=seq(start, end, h), right=right)) # Absolut freq
   fr   <- f/length(x)                                       # Relative freq
   frP  <- 100*(f/length(x))                                 # Relative freq, %
   fac  <- cumsum(f)                                         # Cumulative freq
   facP <- 100*(cumsum(f/length(x)))                         # Cumulative freq, %
   fi   <- round(f, 2)
   fr   <- round(as.numeric(fr), 2)
   frP  <- round(as.numeric(frP), 2)
   fac  <- round(as.numeric(fac), 2)
   facP <- round(as.numeric(facP),2)
   res  <- data.frame(fi, fr, frP, fac, facP)                # Make final table
   names(res) <- c('Class limits', 'fi', 'fr', 'fr(%)', 'fac', 'fac(%)')    return(res)
}

#
# Common function
#

tb.make.table.II <- function (x,

                               k,
                               breaks=c('Sturges', 'Scott', 'FD'),
                               right=FALSE)
{

   x <- na.omit(x)

   # User defines only x and/or 'breaks'    # (x, {k,}[breaks, right])
   if (missing(k)) {

     brk   <- match.arg(breaks)
     switch(brk,
            Sturges = k <- nclass.Sturges(x),
            Scott   = k <- nclass.scott(x),
            FD      = k <- nclass.FD(x))
     tmp   <- range(x)
     start <- tmp[1] - abs(tmp[2])/100
     end   <- tmp[2] + abs(tmp[2])/100
     R     <- end-start
     h     <- R/k

   }

   # User defines 'x' and 'k'
   # (x, k,[breaks, right])
   else {

     tmp   <- range(x)
     start <- tmp[1] - abs(tmp[2])/100
     end   <- tmp[2] + abs(tmp[2])/100
     R     <- end-start
     h     <- R/abs(k)
   }
   tbl     <- tb.make.table.I(x, start, end, h, right)
   return(tbl)
}

#
# With Gabor Grotendieck suggestions (thanks Gabor, very much!)
#
tb.table <- function(x, ...) UseMethod("tb.table")

#
# Table form vectors
#

tb.table.default <- function(x,

                              k,
                              start,
                              end,
                              h, breaks=c('Sturges', 'Scott', 'FD'),
                              right=FALSE)
{

   # User defines nothing or not 'x' isn't numeric -> stop    stopifnot(is.numeric(x))
   x <- na.omit(x)

   # User defines only 'x'
   # (x, {k, start, end, h}, [breaks, right])    if (missing(k) && missing(start) && missing(end) && missing(h) ) {

     brk   <- match.arg(breaks)
     switch(brk,
            Sturges = k <- nclass.Sturges(x),
            Scott   = k <- nclass.scott(x),
            FD      = k <- nclass.FD(x))
     tmp   <- range(x)
     start <- tmp[1] - abs(tmp[2])/100
     end   <- tmp[2] + abs(tmp[2])/100
     R     <- end-start
     h     <- R/k

   }

   # User defines 'x' and 'k'
   # (x, k, {start, end, h}, [breaks, right])    else if (missing(start) && missing(end) && missing(h)) {

     stopifnot(length(k) >= 1)
     tmp   <- range(x)
     start <- tmp[1] - abs(tmp[2])/100
     end   <- tmp[2] + abs(tmp[2])/100
     R     <- end-start
     h     <- R/abs(k)

   }

   # User defines 'x', 'start' and 'end'    # (x, {k,} start, end, {h,} [breaks, right])    else if (missing(k) && missing(h)) {

     stopifnot(length(start) >= 1, length(end) >=1)
     tmp <- range(x)
     R   <- end-start
     k   <- sqrt(abs(R))
     if (k < 5)  k <- 5 # min value of k
     h   <- R/k

   }

   # User defines 'x', 'start', 'end' and 'h'    # (x, {k,} start, end, h, [breaks, right])    else if (missing(k)) {
     stopifnot(length(start) >= 1, length(end) >= 1, length(h) >= 1)    }

   else stop('Error, please, see the function sintax!')    tbl <- tb.make.table.I(x, start, end, h, right)    return(tbl)
}

#
# Table form data.frame
#
tb.table.data.frame <- function(df,

                                 k,
                                 by,
                                 breaks=c('Sturges', 'Scott', 'FD'),
                                 right=FALSE)
{

   stopifnot(is.data.frame(df))

   tmpList <- list()
   nameF   <- character()
   nameY   <- character()

   # User didn't defines a factor
   if (missing(by)) {

     logCol  <- sapply(df, is.numeric)
     for (i in 1:ncol(df)) {
       if (logCol[i]) {
         x       <- as.matrix(df[ ,i])
         tbl     <- tb.make.table.II(x, k, breaks, right)
         tmpList <- c(tmpList, list(tbl))
       }
     }
     valCol <- logCol[logCol]
     names(tmpList) <- names(valCol)
     return(tmpList)

   }

   # User defines one factor
   else {

     namesdf <- names(df)
     pos     <- which(namesdf == by)
     stopifnot(is.factor((df[[pos]])))
     numF    <- table(df[[pos]])
     for(i in 1:length(numF)) {
       tmpdf  <- subset(df, df[[pos]] == names(numF[i]))
       logCol <- sapply(tmpdf, is.numeric)
       for (j in 1:ncol(tmpdf)) {
         if (logCol[j]) {
           x            <- as.matrix(tmpdf[ ,j])
           tbl          <- tb.make.table.II(x, k, breaks, right)
           newFY        <- list(tbl)
           nameF        <- names(numF[i])
           nameY        <- names(logCol[j])
           nameFY       <- paste(nameF,'.', nameY, sep="")
           names(newFY) <- sub(' +$', '', nameFY)
           tmpList      <- c(tmpList, newFY)
         }
       }
     }

   }
   return(tmpList)
}

############################
# Tables package #
# to try #
############################

# 1.Tables
# 1.1. Tables from vectors

# Making a vector

set.seed(1)
x=rnorm(100, 5, 1)
#x=as.factor(rep(1:10, 10)) # to try

tbl <- tb.table(x)
print(tbl); cat('\n')

# Equal to above

tbl <- tb.table(x, breaks='Sturges')
print(tbl); cat('\n')

tbl <- tb.table(x, breaks='Scott')
print(tbl); cat('\n')

tbl <- tb.table(x, breaks='FD')
print(tbl); cat('\n')

tbl <- tb.table(x, breaks='F', right=T)
print(tbl); cat('\n')

tbl <- tb.table(x, k=4)
print(tbl); cat('\n')

tbl <- tb.table(x, k=20)
print(tbl); cat('\n')

# Partial

tbl <- tb.table(x, start=4, end=6)
print(tbl); cat('\n')

# Partial

tbl <- tb.table(x, start=4.5, end=5.5)
print(tbl); cat('\n')

# Nonsense

tbl <- tb.table(x, start=0, end=10, h=.5) print(tbl); cat('\n')

# First and last class forced (fi=0)

tbl <- tb.table(x, start=1, end=9, h=1)
print(tbl); cat('\n')

tbl <- tb.table(x, start=1, end=10, h=2) print(tbl); cat('\n')

# 1.2. Tables from data.frame

# 1.2.1. Making a data.frame

mdf=data.frame(X1=rep(LETTERS[1:4], 25),

                X2=as.factor(rep(1:10, 10)),
                Y1=c(NA, NA, rnorm(96, 10, 1), NA, NA),
                Y2=rnorm(100, 58, 4),
                Y3=c(NA, NA, rnorm(98, -20, 2)))

tbl <- tb.table(mdf)
print(tbl)

# Equal to above

tbl <- tb.table(mdf, breaks='Sturges')
print(tbl)

tbl <- tb.table(mdf, breaks='Scott')
print(tbl)

tbl <- tb.table(mdf, breaks='FD')
print(tbl)

tbl <- tb.table(mdf, k=4)
print(tbl)

tbl <- tb.table(mdf, k=10)
print(tbl)

levels(mdf$X1)
tbl=tb.table(mdf, k=5, by='X1')
length(tbl)
names(tbl)
print(tbl)

tbl=tb.table(mdf, breaks='FD', by='X1')
print(tbl)

# A 'big' result: X2 is a factor with 10 levels!
tbl=tb.table(mdf, breaks='FD', by='X2')
print(tbl)

# 1.2.2. Using 'iris'

tbl=tb.table(iris, k=5)
print(tbl)

levels(iris$Species)
tbl=tb.table(iris, k=5, by='Species')
length(tbl)
names(tbl)
print(tbl)

tbl=tb.table(iris, k=5, by='Species', right=T) print(tbl)

tbl=tb.table(iris, breaks='FD', by='Species') print(tbl)

library(MASS)
levels(Cars93$Origin)
tbl=tb.table(Cars93, k=5, by='Origin')
names(tbl)
print(tbl)

tbl=tb.table(Cars93, breaks='FD', by='Origin') print(tbl)

I find that this package would be very useful and would like to hear the opinion of the interested parties in participating.

Best regards,

-- 
Jose Claudio Faria
Brasil/Bahia/UESC/DCET
Estatistica Experimental/Prof. Adjunto
mails:
  joseclaudio.faria@terra.com.br
  jc_faria@uesc.br
  jc_faria@uol.com.br
tel: 73-3634.2779

______________________________________________
R-help@stat.math.ethz.ch mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide! http://www.R-project.org/posting-guide.html
Received on Thu Jul 07 22:03:12 2005

This archive was generated by hypermail 2.1.8 : Fri 03 Mar 2006 - 03:33:19 EST