[R] speeding up likelihood computation

From: DEEPANKAR BASU <basu.15_at_osu.edu>
Date: Sun, 02 Dec 2007 12:49:34 -0500

I am trying to estimate a model of fertility behaviour using birth history data with maximum likelihood. My code works but is extremely slow (because of several for loops and my programming inefficiencies); when I use the genetic algorithm to optimize the likelihood function, it takes several days to complete (on a machine with Intel Core 2 processor [2.66GHz] and 2.99 GB RAM). Computing the hessian and using it to calculate the standard errors takes a large chunk of this time.

I am copying the code for my likelihood function below; it would be great if someone could suggest any method to speed up the code (by avoiding the for loops or by any other method).

I am not providing details of my model or what exactly I am trying to do in each step of the computation below; i would be happy to provide these details if they are deemed necessary for re-working the code.

Thanks.
Deepankar

LLK1 <- function(paramets, data.frame, ...) { # DEFINING THE LOGLIKELIHOOD FUNCTION

# paramets IS A 1x27 VECTOR OF PARAMETERS OVER WHICH THE FUNCTION WILL BE MAXIMISED
# data.frame IS A DATA FRAME. THE DATA FRAME CONTAINS OBSERVATIONS ON SEVERAL VARIABLES
# (LIKE EDUCATION, AGE, ETC.) FOR EACH RESPONDENT. COLUMNS REFER TO VARIABLES AND ROWS REFER
# TO OBSERVATIONS.

```########## PARAMETERS ###############################

```

# alpha: interaction between son targeting and family size
# beta : son targeting
# gamma : family size
# delta : a 1x6 vector of probabilities of male birth at various parities (q1, q2, q3, q4, q5, q6)
# zeta : a 1x11 vector of conditional probabilities with zeta[1]=1 always

```alpha <- paramets[1]      # FIRST PARAMETER
beta <- paramets[2:9]     # SECOND TO SEVENTH PARAMETER
```
gamma <- paramets[10:16]
delta <- paramets[17]
zeta <- paramets[18:27] # LAST 10 PARAMETERS
```################# VARIABLES ###############################
```

# READING IN THE VARIABLES IN THE DATA FRAME
# AND RENAMING THEM FOR USE IN THE LIKELIHOOD FUNCTION
everborn <- data.frame\$v201
alive <- data.frame\$alive
age <- data.frame\$age
edu <- data.frame\$edu
rural <- data.frame\$rur
rich <- data.frame\$rich
middle <- data.frame\$middle
poor <- data.frame\$poor
work <- data.frame\$work
jointfam <- data.frame\$jfam
contracep <- data.frame\$contra
hindu <- data.frame\$hindu
```muslim <- data.frame\$muslim
scaste <- data.frame\$scaste
stribe <- data.frame\$stribe
```

obc <- data.frame\$obc
ucaste <- data.frame\$ucaste
N <- data.frame\$dfsize
indN <- data.frame\$dfsize1 # INDICATOR FUNCTION THAT dfsize==alive nb <- data.frame\$nboy
ng <- data.frame\$ngirl
```ncord1 <- data.frame\$ncord1  # FIRST CHILD: BOY=0; GIRL=1
ncord2 <- data.frame\$ncord2  #SECOND CHILD: BOY=0; GIRL=1
ncord3 <- data.frame\$ncord3
ncord4 <- data.frame\$ncord4
ncord5 <- data.frame\$ncord5
ncord6 <- data.frame\$ncord6  # SIXTH CHILD: BOY=0; GIRL=1

######### POSITION OF i^th BOY ################################################
boy1 <- data.frame\$boy1     # BIRTH POSITION OF FIRST BOY (ZERO IF THE FAMILY HAS NO BOYS)
boy2 <- data.frame\$boy2     # BIRTH POSITION OF SECOND BOY (ZERO IF THE FAMILY HAS ONLY ONE BOY)
boy3 <- data.frame\$boy3
boy4 <- data.frame\$boy4
boy5 <- data.frame\$boy5
boy6 <- data.frame\$boy6     # BIRTH POSITION OF SIXTH BOY (ZERO IF THE FAMILY HAS ONLY FIVE BOYS)

######################## CONDITIONAL PROBABILITIES ##########################
```
qq21 <- 1

qq31 <- 1/(1+exp(zeta[1]))
qq32 <- exp(zeta[1])/(1+exp(zeta[1]))

```qq41 <- 1/(1+exp(zeta[2])+exp(zeta[3]))
qq42 <- exp(zeta[2])/(1+exp(zeta[2])+exp(zeta[3]))
qq43 <- exp(zeta[3])/(1+exp(zeta[2])+exp(zeta[3]))

qq51 <- 1/(1+exp(zeta[4])+exp(zeta[5])+exp(zeta[6]))
qq52 <- exp(zeta[4])/(1+exp(zeta[4])+exp(zeta[5])+exp(zeta[6]))
```
qq53 <- exp(zeta[5])/(1+exp(zeta[4])+exp(zeta[5])+exp(zeta[6])) qq54 <- exp(zeta[6])/(1+exp(zeta[4])+exp(zeta[5])+exp(zeta[6]))
```qq61 <- 1/(1+exp(zeta[7])+exp(zeta[8])+exp(zeta[9])+exp(zeta[10]))
qq62 <- exp(zeta[7])/(1+exp(zeta[7])+exp(zeta[8])+exp(zeta[9])+exp(zeta[10]))
qq63 <- exp(zeta[8])/(1+exp(zeta[7])+exp(zeta[8])+exp(zeta[9])+exp(zeta[10]))
qq64 <- exp(zeta[9])/(1+exp(zeta[7])+exp(zeta[8])+exp(zeta[9])+exp(zeta[10]))
qq65 <- exp(zeta[10])/(1+exp(zeta[7])+exp(zeta[8])+exp(zeta[9])+exp(zeta[10]))

```

zeta1 <- c(qq21,qq31,qq32,qq41,qq42,qq43,qq51,qq52,qq53,qq54,qq61,qq62,qq63,qq64,qq65)

```#########################################################################

n <- length(N)         # LENGTH OF SAMPLE; SIZE OF THE MAIN LOOP

lglik <- numeric(n)    # INITIALIZING THE LOGLIKELIHOOD FUNCTION
# CREATES A 1xn VECTOR OF ZEROS

for (j in 1:n) {      # START OF MAIN LOOP

S <- matrix(0, 6, 6)  # CREATE A 6x6 MATRIX OF ZEROS
y <- numeric(15)      # CREATE A 1x15 VECTOR OF ZEROS
N1 <- N[j]       # DESIRED FAMILY SIZE

q <- 1/(1+exp(delta))   # PROBABILITY OF MALE BIRTH

```
```	for (i in 1:(N1-1)) {
S[N1,i] <- (q^(nb[j]))*((1-q)^(ng[j]))
}
```

}

else {

```	for (i in 1:(N1-1)) {
S[N1,i] <- 0
}

```

}

######### CREATE A 1x6 VECTOR WITH POSITION OF BOYS WITHIN FAMILY

x <- c(boy1[j], boy2[j], boy3[j], boy4[j], boy5[j])

```      if (N1>1) {
for (i in 1:(N1-1)) {
if (alive[j]>x[i] & x[i]>0) {
S[N1,i] <- 0
}
if (x[i] == alive[j] ) {
S[N1,i] <- (q^(nb[j]))*((1-q)^(ng[j]))
}
}
}

```

y <- c(S[2,1],S[3,1],S[3,2],S[4,1],S[4,2],S[4,3],S[5,1],S[5,2],S[5,3],S[5,4],S[6,1],S[6,2],S[6,3],S[6,4],S[6,5])

z1 <- c(age[j],edu[j],work[j],rural[j],poor[j],middle[j],hindu[j]) # DETERMINANTS OF FAMILY SIZE    z2 <- c(1,age[j],edu[j],work[j],contracep[j],rural[j],jointfam[j],hindu[j]) # DETERMINANTS OF SON TARGETING

t1 <- (indN[j])*((q^(nb[j]))*((1-q)^(ng[j])))*(exp(-exp(sum(z1*gamma)))*((exp(sum(z1*gamma)))^N1)*pnorm(-sum(z2*beta)))/factorial(N1)    t2 <- (sum(y*zeta1))*(exp(-exp(sum(z1*gamma) + alpha))*((exp(sum(z1*gamma) + alpha))^N1)*(1-pnorm(-sum(z2*beta)))/factorial(N1))    lglik[j] <- log(t1+t2)
}

return(-sum(lglik)) # RETURNING THE NEGATIVE OF THE LOGLIKELIHOOD

```                     # SUMMED OVER ALL OBSERVATIONS

```

}

• end code ----------------------

R-help_at_r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code. Received on Sun 02 Dec 2007 - 17:52:18 GMT

Archive maintained by Robert King, hosted by the discipline of statistics at the University of Newcastle, Australia.
Archive generated by hypermail 2.2.0, at Mon 03 Dec 2007 - 02:30:16 GMT.

Mailing list information is available at https://stat.ethz.ch/mailman/listinfo/r-help. Please read the posting guide before posting to the list.