Re: [R] Translating R code + library into Fortran?

From: Paul Gilbert <pgilbert_at_bank-banque-canada.ca>
Date: Mon 11 Sep 2006 - 19:49:52 GMT

I have the impression (after only a quick glance at your code, so I may have missed something) that you are generating multiple datasets and then using them. As a general strategy this does not work very well. You will get slightly improved performance with compiled code, but the real problem is that you grab too much memory and start swapping, and then things will be very slow.

It is better to generate a dataset, use it and save partial results, then generate the next dataset, using the same variable name so the memory use does not increase. There are examples of this in the dse2 package in the dse bundle on CRAN. (In fact, you may be able to use some of the structure in that package.)

Paul Gilbert

Mike Lawrence wrote:
> Hi all,
>
> I'm running a monte carlo test of a neural network tool I've developed,
> and it looks like it's going to take a very long time if I run it in R
> so I'm interested in translating my code (included below) into something
> faster like Fortran (which I'll have to learn from scratch). However, as
> you'll see my code loads the nnet library and uses it quite a bit, and I
> don't have a good sense of how this impacts the translation process;
> will I have to translate all the code for the nnet library itself as well?
>
> Any pointers would be greatly appreciated! Here's my code:
>
> #This code replicates the simulation performed by Rouder et al (2005),
> #which attempts to test the estimation of weibull distribution parameters
> #from sample data. In this implementation, their HB estimation method is
> #replaced by an iterative neural network approach.
>
> library(nnet)
>
> data.gen=function(iterations,min.sample.size,max.sample.size,min.shift,max.shift,min.scale,max.scale,min.shape,max.shape){
> #set up some collection vectors
> sample.size=vector(mode="numeric",length=iterations)
> exp.shift=vector(mode="numeric",length=iterations)
> exp.scale=vector(mode="numeric",length=iterations)
> exp.shape=vector(mode="numeric",length=iterations)
> for(i in 1:iterations){
> #sample from the parameter space
>
> sample.size[i]=round(runif(1,min.sample.size,max.sample.size),digits=0)
> exp.shift[i]=runif(1,min.shift,max.shift)
> exp.scale[i]=runif(1,min.scale,max.scale)
> exp.shape[i]=runif(1,min.shape,max.shape)
> #generate rt data and record summary stats
>
> obs.rt=rweibull(sample.size[i],exp.shape[i],exp.scale[i])+exp.shift[i]
> if(i==1){
> obs.stats=summary(obs.rt)
> }else{
> obs.stats=rbind(obs.stats,summary(obs.rt))
> }
> }
> row.names(obs.stats)=c(1:iterations)
> obs.stats=as.data.frame(obs.stats)
>
> obs=as.data.frame(cbind(obs.stats,sample.size,exp.shift,exp.scale,exp.shape))
>
> names(obs)=c("min","q1","med","mean","q3","max","samples","exp.shift","exp.scale","exp.shape")
> return(obs)
> }
>
> #set working directory
> setwd("E:/Various Data/NNEst/NetWeibull/Rouder data")
>
> stadler=read.table("bayest.par")
> names(stadler)=c("exp.shift","exp.scale","exp.shape")
>
> cell.size=20
> sim.size=600
> #first train initial neural nets
> training.data=data.gen(1e4,cell.size,cell.size,.1,1,.1,1,1,4)
> #train nn.shift with error checking
> ok=F
> while(ok==F){
>
> nn1.shift=nnet(exp.shift~min+q1+med+mean+q3+max+samples,data=training.data,size=8,linout=T,rang=1e-08,maxit=500,trace=F)
> cor.shift=predict(nn.shift,training.data[,c(1:7)],type="raw")
> temp=hist(cor.shift,plot=F)
> if(length(temp$counts[temp$counts>0])>10){
> ok=T
> }
> }
> #train nn.scale with error checking
> ok=F
> while(ok==F){
>
> nn1.scale=nnet(exp.scale~min+q1+med+mean+q3+max+samples,data=training.data,size=8,linout=T,rang=1e-08,maxit=500,trace=F)
> cor.scale=predict(nn.scale,training.data[,c(1:7)],type="raw")
> temp=hist(cor.scale,plot=F)
> if(length(temp$counts[temp$counts>0])>10){
> ok=T
> }
> }
> #train nn.shape with error checking
> ok=F
> while(ok==F){
>
> nn1.shape=nnet(exp.shape~min+q1+med+mean+q3+max+samples,data=training.data,size=8,linout=T,rang=1e-08,maxit=500,trace=F)
> cor.shape=predict(nn.shape,training.data[,c(1:7)],type="raw")
> temp=hist(cor.shape,plot=F)
> if(length(temp$counts[temp$counts>0])>10){
> ok=T
> }
> }
>
>
> #run simulation
> obs.stats=matrix(0,80,7)
> ind.shift.err=matrix(0,80,sim.size)
> ind.scale.err=matrix(0,80,sim.size)
> ind.shape.err=matrix(0,80,sim.size)
> group.shift.err=vector(mode="numeric",length=sim.size)
> group.scale.err=vector(mode="numeric",length=sim.size)
> group.shape.err=vector(mode="numeric",length=sim.size)
> for(i in 1:sim.size){
> for(j in 1:80){
>
> obs.stats[j,]=c(summary(rweibull(cell.size,stadler$exp.shape[j],stadler$exp.scale[j])+stadler$exp.shift[j]),cell.size)
> }
> obs.stats=as.data.frame(obs.stats)
> names(obs.stats)=c("min","q1","med","mean","q3","max","samples")
> #estimation iteration 1
> cor.shift=predict(nn1.shift,obs.stats,type="raw")
> cor.scale=predict(nn1.scale,obs.stats,type="raw")
> cor.shape=predict(nn1.shape,obs.stats,type="raw")
> min.obs.samples=min(obs.stats$samples)
> max.obs.samples=max(obs.stats$samples)
> min.shift=quantile(cor.shift,seq(0,1,.05))[2]
> max.shift=quantile(cor.shift,seq(0,1,.05))[20]
> min.scale=quantile(cor.scale,seq(0,1,.05))[2]
> max.scale=quantile(cor.scale,seq(0,1,.05))[20]
> min.shape=quantile(cor.shape,seq(0,1,.05))[2]
> max.shape=quantile(cor.shape,seq(0,1,.05))[20]
> #re-train nets to reduced parameter space
>
> training.data=data.gen(1e4,min.obs.samples,max.obs.samples,min.shift,max.shift,min.scale,max.scale,min.shape,max.shape)
> #train nn.shift with error checking
> ok=F
> while(ok==F){
>
> nn2.shift=nnet(exp.shift~min+q1+med+mean+q3+max+samples,data=training.data,size=8,linout=T,rang=1e-08,maxit=500,trace=F)
> cor.shift=predict(nn2.shift,training.data[,c(1:7)],type="raw")
> temp=hist(cor.shift,plot=F)
> if(length(temp$counts[temp$counts>0])>10){
> ok=T
> }
> }
> #train nn.scale with error checking
> ok=F
> while(ok==F){
>
> nn2.scale=nnet(exp.scale~min+q1+med+mean+q3+max+samples,data=training.data,size=8,linout=T,rang=1e-08,maxit=500,trace=F)
> cor.scale=predict(nn2.scale,training.data[,c(1:7)],type="raw")
> temp=hist(cor.scale,plot=F)
> if(length(temp$counts[temp$counts>0])>10){
> ok=T
> }
> }
> #train nn.shape with error checking
> ok=F
> while(ok==F){
>
> nn2.shape=nnet(exp.shape~min+q1+med+mean+q3+max+samples,data=training.data,size=8,linout=T,rang=1e-08,maxit=500,trace=F)
> cor.shape=predict(nn2.shape,training.data[,c(1:7)],type="raw")
> temp=hist(cor.shape,plot=F)
> if(length(temp$counts[temp$counts>0])>10){
> ok=T
> }
> }
> #estimation iteration 2
> cor.shift=predict(nn2.shift,obs.stats,type="raw")
> cor.scale=predict(nn2.scale,obs.stats,type="raw")
> cor.shape=predict(nn2.shape,obs.stats,type="raw")
> #record error
> ind.shift.err[,i]=cor.shift-stadler$exp.shift
> ind.scale.err[,i]=cor.scale-stadler$exp.scale
> ind.shape.err[,i]=cor.shape-stadler$exp.shape
> group.shift.err[i]=mean(cor.shift)-mean(stadler$exp.shift)
> group.scale.err[i]=mean(cor.scale)-mean(stadler$exp.scale)
> group.shape.err[i]=mean(cor.shape)-mean(stadler$exp.shape)
> }
>
> results=as.data.frame(rbind(cbind(sd(c(ind.shift.err[,1:162])),sd(c(ind.scale.err[,1:162])),sd(c(ind.shape.err[,1:162]))),cbind(sd(group.shift.err[1:162]),sd(group.scale.err[1:162]),sd(group.shape.err[1:162]))))
> results
>


La version franšaise suit le texte anglais.


This email may contain privileged and/or confidential inform...{{dropped}}



R-help@stat.math.ethz.ch mailing list
https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code. Received on Tue Sep 12 05:54:49 2006

Archive maintained by Robert King, hosted by the discipline of statistics at the University of Newcastle, Australia.
Archive generated by hypermail 2.1.8, at Mon 11 Sep 2006 - 20:18:51 GMT.

Mailing list information is available at https://stat.ethz.ch/mailman/listinfo/r-help. Please read the posting guide before posting to the list.