From: Gabor Grothendieck <ggrothendieck_at_gmail.com>

Date: Thu 20 Jul 2006 - 22:56:20 EST

+ like.mat

+ }

*> system.time(for(i in 1:100) like.mat(score,items,theta))
*

[1] 1.30 0.00 1.34 NA NA

*>
*

*> like.mat2 <- function(score, items, theta)
*

+ matrix(mapply(pcm, rep(theta,length(items)), items, score), + ncol = length(theta), byrow = TRUE)

*> system.time(for(i in 1:100) like.mat2(score,items,theta))
*

[1] 5.70 0.00 5.91 NA NA

*> all.equal(like.mat(score, items, theta), like.mat2(score, items, theta))
*

[1] "Mean relative difference: 1.268095"

*>
*

*> like.mat3 <- function(score, items, theta)
*

+ t(mapply(pcm, items, score, MoreArgs = list(theta = theta)))

*> system.time(for(i in 1:100) like.mat3(score,items,theta))
*

[1] 1.32 0.01 1.39 NA NA

*> m3 <- like.mat3(score, items, theta)
*

*> dimnames(m3) <- NULL
*

*> all.equal(like.mat(score, items, theta), m3)
*

**[1] TRUE
**
On 7/20/06, Doran, Harold <HDoran@air.org> wrote:

*>
*

*>
*

*>
*

*> List:
*

*>
*

*> Thank you for the replies to my post yesterday. Gabor and Phil also gave
*

*> useful replies on how to improve the function by relying on mapply rather
*

*> than the explicit for loop. In general, I try and use the family of apply
*

*> functions rather than the looping constructs such as for, while etc as a
*

*> matter of practice.
*

*>
*

*> However, it seems the mapply function in this case is slower (in terms of
*

*> CPU speed) than the for loop. Here is an example.
*

*>
*

*> # data needed for example
*

*>
*

> items <- list(item1 = c(0,1,2), item2 = c(0,1), item3 = c(0,1,2,3,4), item4

*> = c(0,1), item5=c(0,1,2,3,4),
*

*> item6=c(0,1,2,3))
*

*> score <- c(2,1,3,1,3,2)
*

*> theta <- c(-1,-.5,0,.5,1)
*

*>
*

*> # My old function using the for loop
*

*>
*

*> like.mat <- function(score, items, theta){
*

*> like.mat <- matrix(numeric(length(items) * length(theta)), ncol =
*

*> length(theta))
*

*> for(i in 1:length(items)) like.mat[i, ] <- pcm(theta, items[[i]],
*

*> score[[i]])
*

*> like.mat
*

*> }
*

*>
*

*> system.time(like.mat(score,items,theta))
*

*> [1] 0 0 0 NA NA
*

*>
*

*> # Revised using mapply
*

*>
*

*> like.mat <- function(score, items, theta){
*

*> matrix(mapply(pcm,rep(theta,length(items)),items,score),ncol=length(theta),byrow=TRUE)
*

*> }
*

*>
*

*> > system.time(like.mat(score,items,theta))
*

*> [1] 0.03 0.00 0.03 NA NA
*

*>
*

*>
*

*> It is obviously slower to use mapply, but nominaly. So, let's actually look
*

*> at this within the context of the full program I am working on. For context,
*

*> I am evaluating an integral using Gaussian quadrature. This is a
*

*> psychometric problem where the function 'pcm" is Master's partial credit
*

*> model and 'score' is the student's score on test item i. When an item has
*

*> two categories (0,1), pcm reduces to the Rasch model for dichotomous data.
*

*> The dnorm is set at N(0,1) by default, but the parameters of the population
*

*> distribution are estimated from a separate procedure and are normally input
*

*> into the function, but this default works for the example.
*

*>
*

*> Here is the full program.
*

*>
*

*> library(statmod)
*

*>
*

*> # Master's partial credit model
*

*> pcm <- function(theta,d,score){
*

*> exp(rowSums(outer(theta,d[1:score],'-')))/
*

*> apply(exp(apply(outer(theta,d, '-'), 1, cumsum)), 2, sum)
*

*> }
*

*>
*

*> like.mat <- function(score, items, theta){
*

*> like.mat <- matrix(numeric(length(items) * length(theta)), ncol =
*

*> length(theta))
*

*> for(i in 1:length(items)) like.mat[i, ] <- pcm(theta, items[[i]],
*

*> score[[i]])
*

*> like.mat
*

*> }
*

*>
*

*> # turn this off for now
*

*> #like.mat <- function(score, items, theta){
*

*> #matrix(mapply(pcm,rep(theta,length(items)),items,score),ncol=length(theta),byrow=TRUE)
*

*> #}
*

*>
*

*> class.numer <- function(score,items, prof_cut, mu=0, sigma=1, aboveQ){
*

*> gauss_numer <- gauss.quad(49,kind="laguerre")
*

*> if(aboveQ==FALSE){
*

*> mat <- rbind(like.mat(score,items, (prof_cut-gauss_numer$nodes)),
*

*> dnorm(prof_cut-gauss_numer$nodes, mean=mu, sd=sigma))
*

*>
*

*> } else { mat <- rbind(like.mat(score,items,
*

*> (gauss_numer$nodes+prof_cut)),
*

*> dnorm(gauss_numer$nodes+prof_cut, mean=mu,
*

*> sd=sigma))
*

*>
*

*> }
*

*> f_y <- rbind(apply(mat, 2, prod), exp(gauss_numer$nodes),
*

*> gauss_numer$weights)
*

*> sum(apply(f_y,2,prod))
*

*> }
*

*>
*

*> class.denom <- function(score,items, mu=0, sigma=1){
*

*> gauss_denom <- gauss.quad.prob(49, dist='normal', mu=mu, sigma=sigma)
*

*> mat <-
*

*> rbind(like.mat(score,items,gauss_denom$nodes),gauss_denom$weights)
*

*> sum(apply(mat, 2, prod))
*

*> }
*

*>
*

*> class.acc <-function(score,items,prof_cut, mu=0, sigma=1,
*

*> aboveQ=TRUE){
*

*> result <- class.numer(score,items,prof_cut, mu,sigma,
*

*> aboveQ)/class.denom(score,items, mu, sigma)
*

*> return(result)
*

*> }
*

*>
*

*> # Test the function "class.acc"
*

*> items <- list(item1 = c(0,1,2), item2 = c(0,1), item3 = c(0,1,2,3,4), item4
*

*> = c(0,1), item5=c(0,1,2,3,4),
*

*> item6=c(0,1,2,3))
*

*> score <- c(2,1,3,1,3,2)
*

*>
*

*> # This is the system time when using the for loop for the like.mat function
*

*> system.time(class.acc(score,items,1,aboveQ=T))
*

*> [1] 0.04 0.00 0.04 NA NA
*

*>
*

*> # This is the system time using the mapply for the like.mat function
*

*> system.time(class.acc(score,items,1,aboveQ=T))
*

*> [1] 0.70 0.00 0.73 NA NA
*

*>
*

*>
*

*> There is a substantial improvement in CPU seconds when the for loop is
*

*> applied rather than using the mapply function. I experimented with adding
*

*> more items and varying the quadrature points and it always turned out the
*

*> for loop was faster.
*

*>
*

*> Given this result, I wonder what advice might be offered. Is there an
*

*> inherent reason one might opt for the mapply function (such as reliability,
*

*> etc) even when it compromises computational speed? Or, should the issue of
*

*> computational speed be considered ahead of common advice to rely on the
*

*> family of apply functions instead of the explicit loops.
*

*>
*

*> Thanks for your consideration of my question,
*

*> Harold
*

*>
*

*> orm i386-pc-mingw32
*

*> arch i386
*

*> os mingw32
*

*> system i386, mingw32
*

*> status
*

*> major 2
*

*> minor 3.0
*

*> year 2006
*

*> month 04
*

*> day 24
*

*> svn rev 37909
*

*> language R
*

*> version.string Version 2.3.0 (2006-04-24)
*

*>
*

*>
*

*>
*

R-help@stat.math.ethz.ch mailing list

https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code. Received on Thu Jul 20 23:00:27 2006

Date: Thu 20 Jul 2006 - 22:56:20 EST

Note that if you use mapply in the way I suggested, which is not the same as in your post, then its just as fast. (Also the version of mapply in your post gives different numerical results than the for loop whereas mine gives the same.) like.mat is the for loop version, like.mat2 is your mapply version and like.mat3 is my mapply version.

*> like.mat <- function(score, items, theta){
*

+ like.mat <- matrix(numeric(length(items) * length(theta)), ncol = + length(theta)) + for(i in 1:length(items)) like.mat[i, ] <- pcm(theta, items[[i]],score[[i]])

+ like.mat

+ }

[1] 1.30 0.00 1.34 NA NA

+ matrix(mapply(pcm, rep(theta,length(items)), items, score), + ncol = length(theta), byrow = TRUE)

[1] 5.70 0.00 5.91 NA NA

[1] "Mean relative difference: 1.268095"

+ t(mapply(pcm, items, score, MoreArgs = list(theta = theta)))

[1] 1.32 0.01 1.39 NA NA

> items <- list(item1 = c(0,1,2), item2 = c(0,1), item3 = c(0,1,2,3,4), item4

R-help@stat.math.ethz.ch mailing list

https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code. Received on Thu Jul 20 23:00:27 2006

Archive maintained by Robert King, hosted by
the discipline of
statistics at the
University of Newcastle,
Australia.

Archive generated by hypermail 2.1.8, at Fri 21 Jul 2006 - 02:19:57 EST.

*
Mailing list information is available at https://stat.ethz.ch/mailman/listinfo/r-help.
Please read the posting
guide before posting to the list.
*