From: Trevor Wiens <twiens_at_interbaun.com>

Date: Thu 17 Mar 2005 - 15:31:01 EST

logcv <- function(mdata, formula, rvar, fvar) { require(Hmisc)

pred.cc <- unlist(pred.c)

delta.cv.k <- sum(sorted[[rpos]] != pred.cc)/n p.k <- countlist/n

delta.app <- mean(sorted[[rpos]] != pred.all)/n

Date: Thu 17 Mar 2005 - 15:31:01 EST

On Wed, 16 Mar 2005 17:59:01 -0700

Trevor Wiens <twiens@interbaun.com> wrote:

> I apologize for posting on this question again, but unfortunately, I don't have and can't get access to MASS for at least three weeks. I have found some code on the web however which implements the prediction error algorithm in cv.glm.

*>
**> http://www.bioconductor.org/workshops/NGFN03/modelsel-exercise.pdf
**>
**> Now I've tried to adapt it to my purposes, but since I'm not deeply familiar with R programming, I don't know why it doesn't work. Now checking the r-help list faq it seems this is an appropriate question.
**>
*

OK. I've determined why that didn't work. But I'm still unsure if I've implemented the algorithm correctly. Any suggestions for testing would be appreciated. The corrected function is attached.

Thanks for your assistance.

logcv <- function(mdata, formula, rvar, fvar) { require(Hmisc)

# determine index of variables

rpos <- match(rvar, names(mdata))

fpos <- match(fvar, names(mdata))

# sort by fold variable

sorted <- mdata[order(mdata[[fpos]]), ]

# get fold values and count for each group
vardesc <- describe(sorted[[fpos]])$values
fvarlist <- as.integer(dimnames(vardesc)[[2]])
k <- length(fvarlist)

countlist <- vardesc[1,1]

for (i in 2:k)

{

countlist[i] <- vardesc[1,i]

}

n <- length(sorted[[fpos]])

# fit to all the mdata

fit.all <- glm(formula, sorted, family=binomial)
pred.all <- ifelse( predict(fit.all, type="response") < 0.5, 0, 1)

#setup

pred.c <- list()

error.i <- vector(length=k)

for (i in 1:k)

{

fit.i <- glm(formula, subset(sorted, sorted[[fpos]] != fvarlist[i]), family=binomial)

pred.i <- ifelse(predict(fit.i, newdata=subset(sorted, sorted[[fpos]] == fvarlist[i]), type="response") < 0.5, 0, 1) pred.c[[i]] = pred.i pred.all.i <- ifelse(predict(fit.i, newdata=sorted, type="response") < 0.5, 0, 1)error.i[i] <- sum(sorted[[rpos]] != pred.all.i)/n }

pred.cc <- unlist(pred.c)

delta.cv.k <- sum(sorted[[rpos]] != pred.cc)/n p.k <- countlist/n

delta.app <- mean(sorted[[rpos]] != pred.all)/n

delta.acv.k <- delta.cv.k + delta.app - sum(p.k*error.i)

return(delta.acv.k)

}

T

-- Trevor Wiens twiens@interbaun.com ______________________________________________ R-help@stat.math.ethz.ch mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide! http://www.R-project.org/posting-guide.htmlReceived on Thu Mar 17 15:34:35 2005

*
This archive was generated by hypermail 2.1.8
: Fri 03 Mar 2006 - 03:30:50 EST
*