[Rd] data is getting corrupted

From: Jeff D. Hamann <jeff.hamann_at_forestinformatics.com>
Date: Tue 30 Nov 2004 - 04:27:42 EST


I've been attempting to perform some analysis on a model that was interfaced with R (R calls a library that takes SEXPs and converts the data frames into the internal structures of data), and I notice that for small data.frames the vectors don't get corrupt (n<200-ish). When I pass in larger data.frames, the vectors will become corrupt. I've been PROTECTING the heck out of everything (as best as I can from the examples) to make sure that something is not overlooked. I know the code in my library works fine becuase when I attempt to do the same thing (with much larger data arrays) none of this behaviour occurs.

An example of the corruption is,

    1  1714   ARPA   0.00   0.0000   0.00   0.0000   3.64  0.000     1   
20.00   0.00   0.00     0
    1  1715   ARPA   0.00   0.0000   0.00   0.0000   3.14  0.000     1   
20.00   0.00   0.00     0

    1 1716 ARPA

97538806975312948000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.00
  0.0000   0.00   0.0000   4.68  0.000     1    20.00   0.00   0.00   
 0
    1  1717   ARPA   0.00   0.0000   0.00   0.0000   2.50  0.000     1   
20.00   0.00   0.00     0
    1  1718   ARPA   0.00   0.0000   0.00   0.0000   4.78  0.000     1   
20.00   0.00   0.00     0
    1  1719   ARPA   0.00   0.0000   0.00   0.0000   4.04  0.000     1   
20.00   0.00   0.00     0
    1  1720   ARPA   0.00   0.0000   0.00   0.0000   2.60  0.000     1   
20.00   0.00   0.00     0

    1 1721 ARPA

1141566538356936100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.00
  0.0000   0.00   0.0000   3.57  0.000     1    20.00   0.00   0.00   
 0
    1  1722   ARPA   0.00   0.0000   0.00   0.0000   2.29  0.000     1   
20.00   0.00   0.00     0


And it's the same columns that become corrupt. I wanted to make sure I'm using PROTECT correctly (the examples in the docs don't appear very thourough) and included a snippet below,

SEXP r_write_sample_to_file( SEXP sample_in,

                             SEXP filename )
{

   unsigned long return_code;
   struct SAMPLE_RECORD *sample_ptr;
   SEXP ans;

   PROTECT( filename = AS_CHARACTER( filename ) );    PROTECT(ans = allocVector(INTSXP, 1));

   PROTECT( sample_in = AS_LIST( sample_in ) );    sample_ptr = build_sample_from_sexp( sample_in );

   write_sample_to_file(

      &return_code,
      CHAR(STRING_ELT(filename, 0)),
      sample_ptr,
      N_SPECIES,
      SPECIES_PTR );

   if( return_code != CONIFERS_SUCCESS )    {

      Rprintf( "unable to write %s\n", CHAR(STRING_ELT(filename, 0)) );
      INTEGER(ans)[0] = -1;
      UNPROTECT(2);
      UNPROTECT( 1 );
      return ans;

   }

   INTEGER(ans)[0] = 0;
   UNPROTECT(2);
   UNPROTECT( 1 );    return ans;

}

which calls this rather lengthy function (but I thought I should include the entire function for completeness),

/* this function converts the sample list	*/
/* from R into the internal structure		*/
struct SAMPLE_RECORD *build_sample_from_sexp( SEXP sample ) {

   int i;

/* plots variables */

   SEXP plot_list;
   SEXP plot_plot_sexp;
   SEXP plot_lat_sexp;
   SEXP plot_long_sexp;
   SEXP plot_elev_sexp;
   SEXP plot_slp_sexp;
   SEXP plot_asp_sexp;
   SEXP plot_h20_sexp;
   SEXP plot_map_sexp;

/* plants variables */

   SEXP plant_list;
   SEXP plant_plot_sexp;
   SEXP plant_plant_sexp;
   SEXP plant_sp_code_sexp;
   SEXP plant_d6_sexp;
   SEXP plant_d6_area_sexp;
   SEXP plant_dbh_sexp;
   SEXP plant_basal_area_sexp;
   SEXP plant_tht_sexp;
   SEXP plant_cr_sexp;
   SEXP plant_n_stems_sexp;
   SEXP plant_expf_sexp;
   SEXP plant_crown_width_sexp;
   SEXP plant_crown_area_sexp;
   SEXP plant_user_code_sexp;

   char                    temp_sp_code[16];
   struct SAMPLE_RECORD *s_ptr;
   struct SPECIES_RECORD *sp_ptr;

   s_ptr = (struct SAMPLE_RECORD *)calloc( 1, sizeof( struct SAMPLE_RECORD ) );
/* s_ptr = (struct SAMPLE_RECORD *)Calloc( 1, struct SAMPLE_RECORD ); */

/* *fill in the header info */

   strcpy( s_ptr->forest,
CHAR(STRING_ELT(get_list_element(sample,"forest"), 0)) ) ;

   strcpy( s_ptr->subunit,
CHAR(STRING_ELT(get_list_element(sample,"subunit"), 0)) );

   strcpy( s_ptr->stand_name,
CHAR(STRING_ELT(get_list_element(sample,"stand.name"), 0)) );

   strcpy( s_ptr->legal, CHAR(STRING_ELT(get_list_element(sample,"legal"), 0)) );

   s_ptr->elevation = asInteger( get_list_element( sample, "elevation" ) );
   s_ptr->acreage = asReal( get_list_element( sample, "acreage" ) );
   s_ptr->age = asInteger( get_list_element( sample, "age" ) );
   s_ptr->sampled_month = asInteger( get_list_element( sample,
"sampled.month" ) );

   s_ptr->sampled_day = asInteger( get_list_element( sample, "sampled.day" ) );

   s_ptr->sampled_year = asInteger( get_list_element( sample, "sampled.year" ) );

   s_ptr->current_year = asInteger( get_list_element( sample, "current.year" ) );

   s_ptr->x0 = asReal( get_list_element( sample, "x0" ) );

   if( s_ptr->age <= 0 )
   {

      s_ptr->age = 0;
   }

   if( s_ptr->sampled_month <= 0 )
   {

      s_ptr->sampled_month = 0;
   }

/* build the plots vector */

   s_ptr->n_points = asInteger( get_list_element( sample, "n.points" ) );   s_ptr->plots_ptr = (struct PLOT_RECORD*)calloc(

         s_ptr->n_points, sizeof( struct PLOT_RECORD ) );

/* s_ptr->plots_ptr = (struct PLOT_RECORD*)Calloc( */ /* s_ptr->n_points, struct PLOT_RECORD ); */

   plot_list = get_list_element( sample, "plots" );    PROTECT( plot_list = AS_LIST( plot_list ) ); // PROTECT( plot_list );

   plot_plot_sexp = get_list_element( plot_list, "plot" );
   plot_lat_sexp = get_list_element( plot_list, "latitude" );
   plot_long_sexp = get_list_element( plot_list, "longitude" );
   plot_elev_sexp = get_list_element( plot_list, "elevation" );
   plot_slp_sexp = get_list_element( plot_list, "slope" );
   plot_asp_sexp = get_list_element( plot_list, "aspect" );
   plot_h20_sexp = get_list_element( plot_list, "whc" );
   plot_map_sexp = get_list_element( plot_list, "map" );

   PROTECT( plot_plot_sexp = coerceVector( plot_plot_sexp, INTSXP ) );
   PROTECT( plot_lat_sexp = coerceVector( plot_lat_sexp, REALSXP ) );
   PROTECT( plot_long_sexp = coerceVector( plot_long_sexp, REALSXP ) );
   PROTECT( plot_elev_sexp = coerceVector( plot_elev_sexp, REALSXP ) );
   PROTECT( plot_slp_sexp = coerceVector( plot_slp_sexp, REALSXP ) );
   PROTECT( plot_asp_sexp = coerceVector( plot_asp_sexp, REALSXP ) );
   PROTECT( plot_h20_sexp = coerceVector( plot_h20_sexp, REALSXP ) );    PROTECT( plot_map_sexp = coerceVector( plot_map_sexp, REALSXP ) );

/* assign the plot array */

   for( i = 0; i < s_ptr->n_points; i++ )    {

      s_ptr->plots_ptr[i].plot = INTEGER( plot_plot_sexp )[i];
      s_ptr->plots_ptr[i].latitude = REAL( plot_lat_sexp )[i];
      s_ptr->plots_ptr[i].longitude = REAL( plot_long_sexp )[i];
      s_ptr->plots_ptr[i].elevation = REAL( plot_elev_sexp )[i];
      s_ptr->plots_ptr[i].slope = REAL( plot_slp_sexp )[i];
      s_ptr->plots_ptr[i].aspect = REAL( plot_asp_sexp )[i];
      s_ptr->plots_ptr[i].water_capacity = REAL( plot_h20_sexp )[i];
      s_ptr->plots_ptr[i].mean_annual_precip = REAL( plot_map_sexp )[i];
   }

// UNPROTECT( 8 );
/* build the plants vector */

   s_ptr->n_plants = asInteger( get_list_element( sample, "n.plants" ) );   s_ptr->plants_ptr = (struct PLANT_RECORD*)calloc(

     s_ptr->n_plants, sizeof( struct PLANT_RECORD ) );

/* s_ptr->plants_ptr = (struct PLANT_RECORD*)Calloc( */ /* s_ptr->n_plants, struct PLANT_RECORD ); */

/* build the plots vector */

  plant_list = get_list_element( sample, "plants" );    PROTECT( plant_list = AS_LIST( plant_list ) ); // PROTECT( plant_list );

   plant_plot_sexp = get_list_element( plant_list, "plot" );
   plant_plant_sexp = get_list_element( plant_list, "plant" );
   plant_sp_code_sexp = get_list_element( plant_list, "sp.code" );
   plant_d6_sexp = get_list_element( plant_list, "d6" );
   plant_d6_area_sexp = get_list_element( plant_list, "d6.area" );
   plant_dbh_sexp = get_list_element( plant_list, "dbh" );
   plant_basal_area_sexp = get_list_element( plant_list, "basal.area" );
   plant_tht_sexp = get_list_element( plant_list, "tht" );
   plant_cr_sexp = get_list_element( plant_list, "cr" );
   plant_n_stems_sexp = get_list_element( plant_list, "n.stems" );
   plant_expf_sexp = get_list_element( plant_list, "expf" );
   plant_crown_width_sexp = get_list_element( plant_list, "crown.width" );
   plant_crown_area_sexp = get_list_element( plant_list, "crown.area" );
   plant_user_code_sexp = get_list_element( plant_list, "user.code" );


/* read the plants */

   PROTECT( plant_plot_sexp = coerceVector( plant_plot_sexp, INTSXP ) );
   PROTECT( plant_plant_sexp = coerceVector( plant_plant_sexp, INTSXP ) );
   PROTECT( plant_sp_code_sexp = coerceVector( plant_sp_code_sexp, STRSXP
) );

   PROTECT( plant_d6_sexp = coerceVector( plant_d6_sexp, REALSXP ) );    PROTECT( plant_d6_area_sexp = coerceVector( plant_d6_area_sexp, REALSXP ) );

   PROTECT( plant_dbh_sexp = coerceVector( plant_dbh_sexp, REALSXP ) );    PROTECT( plant_basal_area_sexp = coerceVector( plant_basal_area_sexp, REALSXP ) );

   PROTECT( plant_tht_sexp = coerceVector( plant_tht_sexp, REALSXP ) );
   PROTECT( plant_cr_sexp = coerceVector( plant_cr_sexp, REALSXP ) );
   PROTECT( plant_n_stems_sexp = coerceVector( plant_n_stems_sexp, INTSXP
) );

   PROTECT( plant_expf_sexp = coerceVector( plant_expf_sexp, REALSXP ) );    PROTECT( plant_crown_width_sexp = coerceVector( plant_crown_width_sexp, REALSXP ) );
   PROTECT( plant_crown_area_sexp = coerceVector( plant_crown_area_sexp, REALSXP ) );
   PROTECT( plant_user_code_sexp = coerceVector( plant_user_code_sexp, INTSXP ) );     /* sort the species codes based on sp_code */     qsort( (void*)SPECIES_PTR,

            (size_t)(N_SPECIES),
            sizeof( struct SPECIES_RECORD ),
	        compare_species_by_sp_code );

/* assign the plot array */

   for( i = 0; i < s_ptr->n_plants; i++ )    {

      s_ptr->plants_ptr[i].plot = INTEGER( plant_plot_sexp )[i];
      s_ptr->plants_ptr[i].plant = INTEGER( plant_plant_sexp )[i];
      strcpy( temp_sp_code, CHAR( STRING_ELT( plant_sp_code_sexp, i ) ) );

      /* get the species code and look up the correct index */
      sp_ptr = get_species_entry_from_code(    N_SPECIES,
					       SPECIES_PTR,
					       temp_sp_code );
      if( !sp_ptr )
      {
	 Rprintf( "couldn't find the species code for %s, %s\n",
		  temp_sp_code, CHAR( STRING_ELT( plant_sp_code_sexp, i ) ) );
	 continue;
      }

      /* this is the index of the "unsorted" array */
      s_ptr->plants_ptr[i].sp_idx = sp_ptr->idx;
      s_ptr->plants_ptr[i].d6 = REAL( plant_d6_sexp )[i];
      s_ptr->plants_ptr[i].d6_area = REAL( plant_d6_area_sexp )[i];
      s_ptr->plants_ptr[i].dbh = REAL( plant_dbh_sexp )[i];
      s_ptr->plants_ptr[i].basal_area = REAL( plant_basal_area_sexp )[i];
      s_ptr->plants_ptr[i].tht = REAL( plant_tht_sexp )[i];
      s_ptr->plants_ptr[i].cr = REAL( plant_cr_sexp )[i];
      s_ptr->plants_ptr[i].n_stems = INTEGER( plant_n_stems_sexp )[i];
      s_ptr->plants_ptr[i].expf = REAL( plant_expf_sexp )[i];
      s_ptr->plants_ptr[i].crown_width = REAL( plant_crown_width_sexp )[i];
      s_ptr->plants_ptr[i].crown_area = REAL( plant_crown_area_sexp )[i];
      s_ptr->plants_ptr[i].user_code = INTEGER( plant_user_code_sexp )[i];

      /* Rprintf( "dbh = %lf\n", s_ptr->plants_ptr[i].dbh );  */
      /* perform some basic error checking here */
      /* see if you can use the ISNAN macro here */

      /* try the isnan macro */
/*       if( ISNAN( REAL( plant_d6_sexp )[i] ) || s_ptr->plants_ptr[i].d6
< 0.0 ) */
/*       { */
/* 	 s_ptr->plants_ptr[i].d6 = 0.0; */
/*       } */

      if( ISNA( REAL( plant_d6_sexp )[i] ) ||
	  ISNAN( REAL( plant_d6_sexp )[i] )  ||
	  s_ptr->plants_ptr[i].d6 < 0.0 )
      {
	 s_ptr->plants_ptr[i].d6 = 0.0;
      }

      if( ISNA( REAL( plant_dbh_sexp )[i] ) ||
	  ISNAN( REAL( plant_dbh_sexp )[i] )  ||
	  s_ptr->plants_ptr[i].dbh < 0.0 )
      {
	 s_ptr->plants_ptr[i].dbh = 0.0;
      }

      if( ISNAN( REAL( plant_tht_sexp )[i] )  || s_ptr->plants_ptr[i].expf
< 0.0 )
      {
	 s_ptr->plants_ptr[i].tht = 0.0;
      }

      if( ISNAN( REAL( plant_cr_sexp )[i] )  || s_ptr->plants_ptr[i].cr <
0.0 )
      {
	 s_ptr->plants_ptr[i].cr = 0.0;
      }

      if( ISNAN( REAL( plant_expf_sexp )[i] )  ||
s_ptr->plants_ptr[i].expf < 0.0 )
      {
	 s_ptr->plants_ptr[i].expf = 0.0;
      }

      if( ISNAN( REAL( plant_crown_width_sexp )[i] )  ||
s_ptr->plants_ptr[i].crown_width < 0.0 )
      {
	 s_ptr->plants_ptr[i].crown_width = 0.0;
      }

      if( ISNAN( REAL( plant_crown_area_sexp )[i] )  ||
s_ptr->plants_ptr[i].crown_area < 0.0 )
      {
	 s_ptr->plants_ptr[i].crown_area = 0.0;
      }

/*       s_ptr->plants_ptr[i].crown_area = REAL( plant_crown_area_sexp
)[i]; */

   }

/* now sort the species back to the "native" order (by index) */
   qsort( (void*)SPECIES_PTR,

	   (size_t)(N_SPECIES),
	   sizeof( struct SPECIES_RECORD ),
	   compare_species_by_idx );

   UNPROTECT( 8 ); /* plot lists */
   UNPROTECT( 14 );    UNPROTECT( 1 ); /* plot_list */
   UNPROTECT( 1 ); /* plant list */

   return s_ptr;

}

I appolgise for the long email, but I'd rather appolgize than ask permission.

I'm sure there's something I don't understand about the PROTECT/UNPROTECT sequence as this seems to work on smaller data.frames

Thanks,
Jeff.

-- 
Jeff D. Hamann
Forest Informatics, Inc.
PO Box 1421
Corvallis, Oregon 97339-1421
phone 541-754-1428
fax 541-752-0288
jeff.hamann@forestinformatics.com
http://www.forestinformatics.com

______________________________________________
R-devel@stat.math.ethz.ch mailing list
https://stat.ethz.ch/mailman/listinfo/r-devel
Received on Tue Nov 30 04:34:00 2004

This archive was generated by hypermail 2.1.8 : Fri 18 Mar 2005 - 09:01:54 EST