From: Frank E Harrell Jr <f.harrell_at_vanderbilt.edu>

Date: Thu 20 Jul 2006 - 23:13:02 EST

>> like.mat <- function(score, items, theta){

>> system.time(for(i in 1:100) like.mat(score,items,theta))

>> like.mat2 <- function(score, items, theta)

>> system.time(for(i in 1:100) like.mat2(score,items,theta))

>> all.equal(like.mat(score, items, theta), like.mat2(score, items, theta))

>> like.mat3 <- function(score, items, theta)

>> system.time(for(i in 1:100) like.mat3(score,items,theta))

>> m3 <- like.mat3(score, items, theta)

*>> dimnames(m3) <- NULL
*

*>> all.equal(like.mat(score, items, theta), m3)
*

*>>
*

*>>
*

>> List:

*>>
*

*>> Thank you for the replies to my post yesterday. Gabor and Phil also gave
*

*>> useful replies on how to improve the function by relying on mapply rather
*

*>> than the explicit for loop. In general, I try and use the family of apply
*

*>> functions rather than the looping constructs such as for, while etc as a
*

*>> matter of practice.
*

*>>
*

*>> However, it seems the mapply function in this case is slower (in terms of
*

*>> CPU speed) than the for loop. Here is an example.
*

*>>
*

*>> # data needed for example
*

*>>
*

*>> items <- list(item1 = c(0,1,2), item2 = c(0,1), item3 = c(0,1,2,3,4), item4
*

*>> = c(0,1), item5=c(0,1,2,3,4),
*

*>> item6=c(0,1,2,3))
*

*>> score <- c(2,1,3,1,3,2)
*

*>> theta <- c(-1,-.5,0,.5,1)
*

*>>
*

*>> # My old function using the for loop
*

*>>
*

*>> like.mat <- function(score, items, theta){
*

*>> like.mat <- matrix(numeric(length(items) * length(theta)), ncol =
*

*>> length(theta))
*

*>> for(i in 1:length(items)) like.mat[i, ] <- pcm(theta, items[[i]],
*

*>> score[[i]])
*

*>> like.mat
*

*>> }
*

*>>
*

*>> system.time(like.mat(score,items,theta))
*

*>> [1] 0 0 0 NA NA
*

*>>
*

*>> # Revised using mapply
*

*>>
*

*>> like.mat <- function(score, items, theta){
*

*>> matrix(mapply(pcm,rep(theta,length(items)),items,score),ncol=length(theta),byrow=TRUE)
*

*>> }
*

*>>
*

*>>> system.time(like.mat(score,items,theta))
*

*>> [1] 0.03 0.00 0.03 NA NA
*

*>>
*

*>>
*

*>> It is obviously slower to use mapply, but nominaly. So, let's actually look
*

*>> at this within the context of the full program I am working on. For context,
*

*>> I am evaluating an integral using Gaussian quadrature. This is a
*

*>> psychometric problem where the function 'pcm" is Master's partial credit
*

*>> model and 'score' is the student's score on test item i. When an item has
*

*>> two categories (0,1), pcm reduces to the Rasch model for dichotomous data.
*

*>> The dnorm is set at N(0,1) by default, but the parameters of the population
*

*>> distribution are estimated from a separate procedure and are normally input
*

*>> into the function, but this default works for the example.
*

*>>
*

*>> Here is the full program.
*

*>>
*

*>> library(statmod)
*

*>>
*

*>> # Master's partial credit model
*

*>> pcm <- function(theta,d,score){
*

*>> exp(rowSums(outer(theta,d[1:score],'-')))/
*

*>> apply(exp(apply(outer(theta,d, '-'), 1, cumsum)), 2, sum)
*

*>> }
*

*>>
*

*>> like.mat <- function(score, items, theta){
*

*>> like.mat <- matrix(numeric(length(items) * length(theta)), ncol =
*

*>> length(theta))
*

*>> for(i in 1:length(items)) like.mat[i, ] <- pcm(theta, items[[i]],
*

*>> score[[i]])
*

*>> like.mat
*

*>> }
*

*>>
*

*>> # turn this off for now
*

*>> #like.mat <- function(score, items, theta){
*

*>> #matrix(mapply(pcm,rep(theta,length(items)),items,score),ncol=length(theta),byrow=TRUE)
*

*>> #}
*

*>>
*

*>> class.numer <- function(score,items, prof_cut, mu=0, sigma=1, aboveQ){
*

*>> gauss_numer <- gauss.quad(49,kind="laguerre")
*

*>> if(aboveQ==FALSE){
*

*>> mat <- rbind(like.mat(score,items, (prof_cut-gauss_numer$nodes)),
*

*>> dnorm(prof_cut-gauss_numer$nodes, mean=mu, sd=sigma))
*

*>>
*

*>> } else { mat <- rbind(like.mat(score,items,
*

*>> (gauss_numer$nodes+prof_cut)),
*

*>> dnorm(gauss_numer$nodes+prof_cut, mean=mu,
*

*>> sd=sigma))
*

*>>
*

*>> }
*

*>> f_y <- rbind(apply(mat, 2, prod), exp(gauss_numer$nodes),
*

*>> gauss_numer$weights)
*

*>> sum(apply(f_y,2,prod))
*

*>> }
*

*>>
*

*>> class.denom <- function(score,items, mu=0, sigma=1){
*

*>> gauss_denom <- gauss.quad.prob(49, dist='normal', mu=mu, sigma=sigma)
*

*>> mat <-
*

*>> rbind(like.mat(score,items,gauss_denom$nodes),gauss_denom$weights)
*

*>> sum(apply(mat, 2, prod))
*

*>> }
*

*>>
*

*>> class.acc <-function(score,items,prof_cut, mu=0, sigma=1,
*

*>> aboveQ=TRUE){
*

*>> result <- class.numer(score,items,prof_cut, mu,sigma,
*

*>> aboveQ)/class.denom(score,items, mu, sigma)
*

*>> return(result)
*

*>> }
*

*>>
*

*>> # Test the function "class.acc"
*

*>> items <- list(item1 = c(0,1,2), item2 = c(0,1), item3 = c(0,1,2,3,4), item4
*

*>> = c(0,1), item5=c(0,1,2,3,4),
*

*>> item6=c(0,1,2,3))
*

*>> score <- c(2,1,3,1,3,2)
*

*>>
*

*>> # This is the system time when using the for loop for the like.mat function
*

*>> system.time(class.acc(score,items,1,aboveQ=T))
*

*>> [1] 0.04 0.00 0.04 NA NA
*

*>>
*

*>> # This is the system time using the mapply for the like.mat function
*

*>> system.time(class.acc(score,items,1,aboveQ=T))
*

*>> [1] 0.70 0.00 0.73 NA NA
*

*>>
*

*>>
*

*>> There is a substantial improvement in CPU seconds when the for loop is
*

*>> applied rather than using the mapply function. I experimented with adding
*

*>> more items and varying the quadrature points and it always turned out the
*

*>> for loop was faster.
*

*>>
*

*>> Given this result, I wonder what advice might be offered. Is there an
*

*>> inherent reason one might opt for the mapply function (such as reliability,
*

*>> etc) even when it compromises computational speed? Or, should the issue of
*

*>> computational speed be considered ahead of common advice to rely on the
*

*>> family of apply functions instead of the explicit loops.
*

*>>
*

*>> Thanks for your consideration of my question,
*

*>> Harold
*

R-help@stat.math.ethz.ch mailing list

https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code. Received on Fri Jul 21 00:32:29 2006

Date: Thu 20 Jul 2006 - 23:13:02 EST

Gabor Grothendieck wrote:

> Note that if you use mapply in the way I suggested, which is not > the same as in your post, then its just as fast. (Also the version > of mapply in your post gives different numerical results than > the for loop whereas mine gives the same.) like.mat is the for > loop version, like.mat2 is your mapply version and like.mat3 > is my mapply version.

You might also test mApply in Hmisc.

Cheers

Frank

>

>> like.mat <- function(score, items, theta){

> + like.mat <- matrix(numeric(length(items) * length(theta)), ncol = > + length(theta)) > + for(i in 1:length(items)) like.mat[i, ] <- pcm(theta, items[[i]], > score[[i]]) > + like.mat > + }

>> system.time(for(i in 1:100) like.mat(score,items,theta))

> [1] 1.30 0.00 1.34 NA NA

>> like.mat2 <- function(score, items, theta)

> + matrix(mapply(pcm, rep(theta,length(items)), items, score), > + ncol = length(theta), byrow = TRUE)

>> system.time(for(i in 1:100) like.mat2(score,items,theta))

> [1] 5.70 0.00 5.91 NA NA

>> all.equal(like.mat(score, items, theta), like.mat2(score, items, theta))

> [1] "Mean relative difference: 1.268095"

>> like.mat3 <- function(score, items, theta)

> + t(mapply(pcm, items, score, MoreArgs = list(theta = theta)))

>> system.time(for(i in 1:100) like.mat3(score,items,theta))

> [1] 1.32 0.01 1.39 NA NA

>> m3 <- like.mat3(score, items, theta)

> [1] TRUE > > On 7/20/06, Doran, Harold <HDoran@air.org> wrote:

>> List:

R-help@stat.math.ethz.ch mailing list

https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code. Received on Fri Jul 21 00:32:29 2006

Archive maintained by Robert King, hosted by
the discipline of
statistics at the
University of Newcastle,
Australia.

Archive generated by hypermail 2.1.8, at Fri 21 Jul 2006 - 02:20:14 EST.

*
Mailing list information is available at https://stat.ethz.ch/mailman/listinfo/r-help.
Please read the posting
guide before posting to the list.
*