# Re: [R] r programming help II

From: Gabor Grothendieck <ggrothendieck_at_gmail.com>
Date: Fri 24 Jun 2005 - 15:20:00 EST

On 6/24/05, Mohammad Ehsanul Karim <wildscop@yahoo.com> wrote:
> Dear List,
>
> Suppose we have a variable K.JUN defined as (with
> 1=wet, 0=dry):
>
> K.JUN1984 = c(1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1,
> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
> K.JUN1985 = c(0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1,
> 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1)
> K.JUN1986 = c(0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
> 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1)
> K.JUN1987 = c(0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0,
> 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0)
> K.JUN1988 = c(1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
> 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0)
> K.JUN1989 = c(0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1,
> 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1)
> K.JUN1990 = c(1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
> 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0)
> K.JUN1991 = c(0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
> 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0)
> K.JUN1992 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
> 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0)
> K.JUN1993 = c(0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0,
> 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0)
> K.JUN1994 = c(0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1,
> 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1)
> K.JUN1995 = c(0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1,
> 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0)
> K.JUN1996 = c(0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0,
> 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1)
> K.JUN1997 = c(0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1,
> 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1)
> K.JUN1998 = c(1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0,
> 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0)
> K.JUN1999 = c(0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0,
> 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0)
> K.JUN2000 = c(1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1,
> 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0)
> K.JUN2001 = c(1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1,
> 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1)
> K.JUN2002 = c(1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1,
> 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1)
>
> K.JUN<-c(K.JUN1984,K.JUN1985,K.JUN1986,K.JUN1987,K.JUN1988,K.JUN1989,K.JUN1990,K.JUN1991,K.JUN1992,K.JUN1993,K.JUN1994,K.JUN1995,K.JUN1996,K.JUN1997,K.JUN1998,K.JUN1999,K.JUN2000,K.JUN2001,K.JUN2002)
>
> Our motivation is to count number of wet days (1's) in
> each weeks. But counting number of wet days for entire
> K.JUN will not do.
> Thus in r console,
>
> > k<-0;j<-0;i<-(1:7)+30*j+7*k;K.JUN[i];rle(K.JUN[i])
> [1] 1 0 1 1 1 1 1
> Run Length Encoding
> lengths: int [1:3] 1 1 5
> values : num [1:3] 1 0 1
>
> where k=0,1,2,3 for each j=0 to 18 (k indicating weeks
> of any June, and j indicates years 1984-2002
> respectively).
>
> Now we need to sum the run 'lengths' corresponding to
> each 'values' "1" (that is 'lengths' of each "0"
> 'values' need to be excluded) for all k=0,1,2,3 for
> each j=0 to 18 (for example, for k<-0;j<-0 we find
> 'lengths' 1 and 5 for 'values' "1" in the above: then
> we sum it as
> sum(rle(K.JUN[(1:7)+30*0+7*0])\$lengths[c(1,3)])
> manually).
>
> Doing so for all k=0,1,2,3 for each j=0 to 18 manually
> like this
> k<-0;j<-0;i<-(1:7)+30*j+7*k;K.JUN[i];rle(K.JUN[i])
> k<-1;j<-0;i<-(1:7)+30*j+7*k;K.JUN[i];rle(K.JUN[i])
> ...
> k<-3;j<-18;i<-(1:7)+30*j+7*k;K.JUN[i];rle(K.JUN[i])
> we observe the run 'lengths' corresponding to all "1"
> 'values' and sum them under a new variable JUN.w as
> follows (a cumbersome process obviously):
>
>
> JUN.w<-c(sum(rle(K.JUN[(1:7)+30*0+7*0])\$lengths[c(1,3)]),sum(rle(K.JUN[(1:7)+30*0+7*1])\$lengths[c(1,3)]),sum(rle(K.JUN[(1:7)+30*0+7*2])\$lengths[1]),sum(rle(K.JUN[(1:7)+30*0+7*3])\$lengths[1]),
> sum(rle(K.JUN[(1:7)+30*1+7*0])\$lengths[c(2,4,6)]),sum(rle(K.JUN[(1:7)+30*1+7*1])\$lengths[c(2,4)]),sum(rle(K.JUN[(1:7)+30*1+7*2])\$lengths[c(1,3)]),sum(rle(K.JUN[(1:7)+30*1+7*3])\$lengths[2]),
> sum(rle(K.JUN[(1:7)+30*2+7*0])\$lengths[2]),sum(rle(K.JUN[(1:7)+30*2+7*1])\$lengths[2]),sum(rle(K.JUN[(1:7)+30*2+7*2])\$lengths[2]),sum(rle(K.JUN[(1:7)+30*2+7*3])\$lengths[1]),
> sum(rle(K.JUN[(1:7)+30*3+7*0])\$lengths[2]),sum(rle(K.JUN[(1:7)+30*3+7*1])\$lengths[c(1,3,5)]),sum(rle(K.JUN[(1:7)+30*3+7*2])\$lengths[c(1,3)]),sum(rle(K.JUN[(1:7)+30*3+7*3])\$lengths[1]),
> sum(rle(K.JUN[(1:7)+30*4+7*0])\$lengths[c(1,3)]),sum(rle(K.JUN[(1:7)+30*4+7*1])\$lengths[1]),sum(rle(K.JUN[(1:7)+30*4+7*2])\$lengths[1]),sum(rle(K.JUN[(1:7)+30*4+7*3])\$lengths[1]),
> sum(rle(K.JUN[(1:7)+30*5+7*0])\$lengths[c(2,4)]),sum(rle(K.JUN[(1:7)+30*5+7*1])\$lengths[c(2,4)]),sum(rle(K.JUN[(1:7)+30*5+7*2])\$lengths[c(1,3)]),sum(rle(K.JUN[(1:7)+30*5+7*3])\$lengths[1]),
> sum(rle(K.JUN[(1:7)+30*6+7*0])\$lengths[c(1,3)]),sum(rle(K.JUN[(1:7)+30*6+7*1])\$lengths[c(2,4)]),sum(rle(K.JUN[(1:7)+30*6+7*2])\$lengths[c(1,3)]),sum(rle(K.JUN[(1:7)+30*6+7*3])\$lengths[c(1,3)]),
> sum(rle(K.JUN[(1:7)+30*7+7*0])\$lengths[c(2,4)]),sum(rle(K.JUN[(1:7)+30*7+7*1])\$lengths[1]),sum(rle(K.JUN[(1:7)+30*7+7*2])\$lengths[1]),sum(rle(K.JUN[(1:7)+30*7+7*3])\$lengths[2]),
> 0,sum(rle(K.JUN[(1:7)+30*8+7*1])\$lengths[2]),sum(rle(K.JUN[(1:7)+30*8+7*2])\$lengths[c(2,4)]),sum(rle(K.JUN[(1:7)+30*8+7*3])\$lengths[1]),
> sum(rle(K.JUN[(1:7)+30*9+7*0])\$lengths[2]),sum(rle(K.JUN[(1:7)+30*9+7*1])\$lengths[c(1,3)]),sum(rle(K.JUN[(1:7)+30*9+7*2])\$lengths[2]),sum(rle(K.JUN[(1:7)+30*9+7*3])\$lengths[1]),
> sum(rle(K.JUN[(1:7)+30*10+7*0])\$lengths[2]),sum(rle(K.JUN[(1:7)+30*10+7*1])\$lengths[c(1,3)]),sum(rle(K.JUN[(1:7)+30*10+7*2])\$lengths[c(1,3)]),sum(rle(K.JUN[(1:7)+30*10+7*3])\$lengths[c(1,3)]),
> sum(rle(K.JUN[(1:7)+30*11+7*0])\$lengths[2]),sum(rle(K.JUN[(1:7)+30*11+7*1])\$lengths[c(1,3)]),sum(rle(K.JUN[(1:7)+30*11+7*2])\$lengths[c(2,4)]),sum(rle(K.JUN[(1:7)+30*11+7*3])\$lengths[c(1,3)]),
> sum(rle(K.JUN[(1:7)+30*12+7*0])\$lengths[2]),sum(rle(K.JUN[(1:7)+30*12+7*1])\$lengths[c(1,3)]),sum(rle(K.JUN[(1:7)+30*12+7*2])\$lengths[2]),sum(rle(K.JUN[(1:7)+30*12+7*3])\$lengths[1]),
> sum(rle(K.JUN[(1:7)+30*13+7*0])\$lengths[c(2,4)]),sum(rle(K.JUN[(1:7)+30*13+7*1])\$lengths[c(1,3)]),sum(rle(K.JUN[(1:7)+30*13+7*2])\$lengths[2]),sum(rle(K.JUN[(1:7)+30*13+7*3])\$lengths[1]),
> sum(rle(K.JUN[(1:7)+30*14+7*0])\$lengths[c(1,3)]),sum(rle(K.JUN[(1:7)+30*14+7*1])\$lengths[c(2,4)]),sum(rle(K.JUN[(1:7)+30*14+7*2])\$lengths[c(2,4)]),sum(rle(K.JUN[(1:7)+30*14+7*3])\$lengths[c(2,4)]),
> sum(rle(K.JUN[(1:7)+30*15+7*0])\$lengths[2]),sum(rle(K.JUN[(1:7)+30*15+7*1])\$lengths[2]),sum(rle(K.JUN[(1:7)+30*15+7*2])\$lengths[2]),sum(rle(K.JUN[(1:7)+30*15+7*3])\$lengths[1]),
> sum(rle(K.JUN[(1:7)+30*16+7*0])\$lengths[c(1,3)]),sum(rle(K.JUN[(1:7)+30*16+7*1])\$lengths[c(1,3)]),sum(rle(K.JUN[(1:7)+30*16+7*2])\$lengths[2]),sum(rle(K.JUN[(1:7)+30*16+7*3])\$lengths[1]),
> sum(rle(K.JUN[(1:7)+30*17+7*0])\$lengths[c(1,3)]),sum(rle(K.JUN[(1:7)+30*17+7*1])\$lengths[c(1,3)]),sum(rle(K.JUN[(1:7)+30*17+7*2])\$lengths[1]),sum(rle(K.JUN[(1:7)+30*17+7*3])\$lengths[c(1,3)]),
> sum(rle(K.JUN[(1:7)+30*18+7*0])\$lengths[c(1,3)]),sum(rle(K.JUN[(1:7)+30*18+7*1])\$lengths[c(1,3)]),sum(rle(K.JUN[(1:7)+30*18+7*2])\$lengths[1]),sum(rle(K.JUN[(1:7)+30*18+7*3])\$lengths[c(1,3)]))
> # calculating the observed distribution
> # This only considers 4 weeks a month, leaving 2 days
> of each June
> > table(JUN.w)
> JUN.w
> 0 1 2 3 4 5 6 7
> 1 6 9 13 8 17 15 7
>
>
> Now, i know this is a huge problem and time consuming
> for us, but is there any way we can solve this
> automatically only by specifying K.JUN data variable
> (or its particles K.JUN1984,...,K.JUN2002) by means of
> R programming?
>
> Thank you for your time. Any hint, help, support,
> references will be highly appreciated.
>

My understanding is that you want to create a sum for each of the first 4 weeks in each of 19 months and then tabulate the frequencies of those 76 numbers.

1. First create a 30x19 matrix of the numbers where each column is a month. Take the first 28 rows, i.e. the first 28 days of each month. Call this j2.
2. Now reshape j2 into a 7x4x19 array such that the first dimension is the 7 days of the week, the second dimension is the 4 weeks in the month and the last dimension is the 19 years. Call this 3 dimensional array, jj3.
3. perform the required sum using apply and then string out the results with 'c' calling the answer jj. Note that this is the same as your JUN.w.
4. finally create a table of frequencies. This gives the same answer as you displayed.

Here is the code:

```j2 <- matrix(K.JUN, 30)[1:28,]
j3 <- array(j2, c(7,4,19))
jj <- c(apply(j3, 2:3, sum))
```

table(jj)

R-help@stat.math.ethz.ch mailing list