[Rd] writeForeignSAS and potential extensions

From: Stephen Weigand <Weigand.Stephen_at_mayo.edu>
Date: Thu 13 Jul 2006 - 18:48:19 GMT


Dear R-devel,

I've made some potential extensions to writeForeignSAS in 'foreign' that I wanted to pass along if anyone is interested. I've attached the diff -u output against the version found in foreign_0.8-15 and an .R file with my changes. (In this .R file, the function is named writeForeignSAS7 to simplify testing/comparisons.)

I've tried to alter the current version as little as possible while making the following changes:

If it'd be helpful to make any changes or add anything, I'd be happy try to do so.

Finally, some testing code that works in SAS 6.12, 8.2, and 9.

d <-
  structure(list(a.b = as.integer(c(1, 2)),

                 alphabetsoup = 
                 structure(as.integer(c(1, 2)),
                   .Label = c("A", "B"), 
                   class = "factor"),
                 datevar1 = structure(c(13342, 12977), 
                                      class = "Date"),
                 datetimevar1 = structure(c(1152802685, 
                                            1152716285),
                   class = c("POSIXt", "POSIXct")),
                 charactervariable = c("L", 
                    "Last, First")),
            .Names = c("a.b", "alphabetsoup",
                "datevar1", "datetimevar1",
                "charactervariable"),
            row.names = c("1", "2"),
            class = "data.frame")

require(foreign)

### adQuote here to (temporarily) avoid ':::' adQuote <- function (x) paste("\"", x, "\"", sep = "")

dfile <- file.path(tempdir(), "test.dat") cfile <- file.path(tempdir(), "test.sas") write.foreign(d, datafile = dfile, codefile = cfile,

              package = "SAS7", validvarname = "V6") file.show(dfile)
file.show(cfile)

Sincerely,

Stephen

::::::::::::::::::::::::::::::::::  

Stephen Weigand
Division of Biostatistics
Mayo Clinic Rochester, Minn., USA
Phone (507) 266-1650, fax 284-9542

+  x <- sub("^([0-9])", "_\\1", varnames)
+  x <- gsub("[^a-zA-Z0-9_]", "_", x)
+  x <- abbreviate(x, minlength = nmax)
+  
+  if (any(nchar(x) > nmax) || any(duplicated(x)))
+    stop("Cannot uniquely abbreviate the variable names to ",
+         nmax, " or fewer characters")
+  names(x) <- varnames
+  x
+}
+
+make.SAS.formats <- function(varnames){  
+  x <- sub("^([0-9])", "_\\1", varnames)
+  x <- gsub("[^a-zA-Z0-9_]", "_", x)
+  x <- sub("([0-9])$", "\\1f", x) # can't end in digit so append 'f'
+  x <- abbreviate(x, minlength = 8)
+  
+  if(any(nchar(x) > 8) || any(duplicated(x)))
+    stop("Cannot uniquely abbreviate format names to conform to ",
+         " eight-character limit and not ending in a digit")
+  names(x) <- varnames
+  x  
+}
+
+writeForeignSAS7<-function(df,datafile,codefile,dataname="rdata",
+                          validvarname = c("V7", "V6")){

   factors <- sapply(df, is.factor)
   strings <- sapply(df, is.character)
-

+  dates <- sapply(df, FUN = function(x) inherits(x, "Date"))
+  datetimes <- sapply(df, FUN = function(x) inherits(x, "POSIXct"))
+  

   varlabels <- names(df)

-  varnames <- abbreviate(names(df), 8)
-  if (any(sapply(varnames, nchar) > 8)) 
-    stop("Cannot abbreviate the variable names to eight or fewer letters")
-  if (any(abbreviated <- (varnames != varlabels))) 
-    message("Some variable names were abbreviated.")
+  varnames <- make.SAS.names(names(df), validvarname = validvarname)
+ if (any(varnames != varlabels))
+ message("Some variable names were abbreviated or otherwise altered.")        

   dfn<-df
   if (any(factors))
     dfn[factors]<-lapply(dfn[factors], as.numeric)

+  if (any(datetimes))
+    dfn[datetimes] <- lapply(dfn[datetimes],
+                             FUN = function(x) format(x, "%d%b%Y %H:%M:%S"))
   write.table(dfn, file = datafile, row = FALSE, col = FALSE, 
-              sep = ",", quote = FALSE, na = ".")
+              sep = ",", quote = TRUE, na = "")

   lrecl<-max(sapply(readLines(datafile),nchar))+4  

   cat("* Written by R;\n", file=codefile) @@ -22,24 +53,50 @@

   cat("* ",deparse(sys.call(-2))[1],";\n\n",file=codefile,append=TRUE)    if (any(factors)){
     cat("PROC FORMAT;\n",file=codefile,append=TRUE)

-    for(v in 1:ncol(df)){
-      if (factors[v]){
-        cat("value ",varnames[v],"\n",file=codefile,append=TRUE)
-        values<-levels(df[[v]])
+    fmtnames <- make.SAS.formats(varnames[factors])
+    fmt.values <- lapply(df[, factors, drop = FALSE], levels)
+    names(fmt.values) <- fmtnames
+    for (f in fmtnames){
+      cat("value",f,"\n",file=codefile,append = TRUE)
+      values<-fmt.values[[f]]
         for(i in 1:length(values)){
           cat("    ",i,"=",adQuote(values[i]),"\n",file=codefile,append=TRUE)
         }
         cat(";\n\n",file=codefile,append=TRUE)
-      }
-      }
+     }

   }  

   cat("DATA ",dataname,";\n",file=codefile,append=TRUE)

+
+  if (any(strings)){    
+    cat("LENGTH", file = codefile, append = TRUE)
+    lengths <- sapply(df[,strings, drop = FALSE],
+                      FUN = function(x) max(nchar(x)))
+    names(lengths) <- varnames[strings]
+    for(v in varnames[strings])
+      cat("\n", v, "$", lengths[v],file=codefile,append=TRUE)
+    cat("\n;\n\n", file = codefile, append = TRUE)
+  }
+
+  if (any(dates)){    
+    cat("INFORMAT", file = codefile, append = TRUE)
+    for(v in varnames[dates])
+      cat("\n", v, file = codefile, append = TRUE)
+    cat("\n YYMMDD10.\n;\n\n", file = codefile, append = TRUE)
+  }                  
+
+  if (any(datetimes)){    
+    cat("INFORMAT", file = codefile, append = TRUE)
+    for(v in varnames[datetimes])
+      cat("\n", v, file = codefile, append = TRUE)
+    cat("\n DATETIME18.\n;\n\n", file = codefile, append = TRUE)
+  }
+  
   cat("INFILE ",adQuote(datafile),
-      "\n     DELIMITER=','",
+      "\n     DSD", 
       "\n     LRECL=",lrecl,";\n",
       file=codefile,append=TRUE)

-
+

   cat("INPUT",file=codefile,append=TRUE)    for(v in 1:ncol(df)){
     cat("\n",varnames[v],file=codefile,append=TRUE) @@ -49,16 +106,26 @@

   cat("\n;\n",file=codefile,append=TRUE)  

   for(v in 1:ncol(df)){
- if (abbreviated[v])
+ if (varnames[v] != names(varnames)[v])

       cat("LABEL ",varnames[v],"=",adQuote(varlabels[v]),";\n",
           file=codefile,append=TRUE)
-  } 
-
-  for(v in 1:ncol(df)){
-    if(factors[v])
-      cat("FORMAT ",varnames[v],paste(varnames[v],".",sep=""),";\n",
+  }
+  
+  if (any(factors)){
+    for (f in 1:length(fmtnames)) 
+      cat("FORMAT", names(fmtnames)[f],paste(fmtnames[f],".",sep = ""),";\n",
           file=codefile,append=TRUE)

   }
-
+  
+  if (any(dates)){    
+    for(v in varnames[dates])
+      cat("FORMAT", v, "yymmdd10.;\n", file = codefile, append = TRUE)
+  }        
+  
+  if (any(datetimes)){    
+    for(v in varnames[datetimes])
+      cat("FORMAT", v, "datetime18.;\n", file = codefile, append = TRUE)
+  }
+  

   cat("RUN;\n",file=codefile,append=TRUE)  }

make.SAS.names <- function(varnames, validvarname = c("V7", "V6")){   validvarname <- match.arg(validvarname)   nmax <- if(validvarname == "V7") 32 else 8   

  x <- sub("^([0-9])", "_\\1", varnames)
  x <- gsub("[^a-zA-Z0-9_]", "_", x)
  x <- abbreviate(x, minlength = nmax)
  

  if (any(nchar(x) > nmax) || any(duplicated(x)))     stop("Cannot uniquely abbreviate the variable names to ",

         nmax, " or fewer characters")
  names(x) <- varnames
  x
}

make.SAS.formats <- function(varnames){

  x <- sub("^([0-9])", "_\\1", varnames)
  x <- gsub("[^a-zA-Z0-9_]", "_", x)
  x <- sub("([0-9])$", "\\1f", x) # can't end in digit so append 'f'
  x <- abbreviate(x, minlength = 8)
  

  if(any(nchar(x) > 8) || any(duplicated(x)))     stop("Cannot uniquely abbreviate format names to conform to ",

         " eight-character limit and not ending in a digit")   names(x) <- varnames
  x
}

writeForeignSAS7<-function(df,datafile,codefile,dataname="rdata",

                          validvarname = c("V7", "V6")){
  factors <- sapply(df, is.factor)
  strings <- sapply(df, is.character)
  dates <- sapply(df, FUN = function(x) inherits(x, "Date"))   datetimes <- sapply(df, FUN = function(x) inherits(x, "POSIXct"))   

  varlabels <- names(df)
  varnames <- make.SAS.names(names(df), validvarname = validvarname)   if (any(varnames != varlabels))
    message("Some variable names were abbreviated or otherwise altered.")      

  dfn<-df
  if (any(factors))
    dfn[factors]<-lapply(dfn[factors], as.numeric)   if (any(datetimes))
    dfn[datetimes] <- lapply(dfn[datetimes],

                             FUN = function(x) format(x, "%d%b%Y %H:%M:%S"))
  write.table(dfn, file = datafile, row = FALSE, col = FALSE, 
              sep = ",", quote = TRUE, na = "")
  lrecl<-max(sapply(readLines(datafile),nchar))+4
  cat("* Written by R;\n", file=codefile)
  cat("* ",deparse(sys.call(-2))[1],";\n\n",file=codefile,append=TRUE)
  if (any(factors)){

    cat("PROC FORMAT;\n",file=codefile,append=TRUE)     fmtnames <- make.SAS.formats(varnames[factors])     fmt.values <- lapply(df[, factors, drop = FALSE], levels)     names(fmt.values) <- fmtnames
    for (f in fmtnames){
      cat("value",f,"\n",file=codefile,append = TRUE)
      values<-fmt.values[[f]]
        for(i in 1:length(values)){
          cat("    ",i,"=",adQuote(values[i]),"\n",file=codefile,append=TRUE)
        }
        cat(";\n\n",file=codefile,append=TRUE)
     }

  }

  cat("DATA ",dataname,";\n",file=codefile,append=TRUE)

  if (any(strings)){
    cat("LENGTH", file = codefile, append = TRUE)     lengths <- sapply(df[,strings, drop = FALSE],

                      FUN = function(x) max(nchar(x)))
    names(lengths) <- varnames[strings]
    for(v in varnames[strings])
      cat("\n", v, "$", lengths[v],file=codefile,append=TRUE)     cat("\n;\n\n", file = codefile, append = TRUE)   }

  if (any(dates)){
    cat("INFORMAT", file = codefile, append = TRUE)     for(v in varnames[dates])
      cat("\n", v, file = codefile, append = TRUE)     cat("\n YYMMDD10.\n;\n\n", file = codefile, append = TRUE)   }

  if (any(datetimes)){
    cat("INFORMAT", file = codefile, append = TRUE)     for(v in varnames[datetimes])
      cat("\n", v, file = codefile, append = TRUE)     cat("\n DATETIME18.\n;\n\n", file = codefile, append = TRUE)   }   

  cat("INFILE ",adQuote(datafile),
      "\n     DSD", 
      "\n     LRECL=",lrecl,";\n",
      file=codefile,append=TRUE)
  

  cat("INPUT",file=codefile,append=TRUE)   for(v in 1:ncol(df)){
    cat("\n",varnames[v],file=codefile,append=TRUE)     if(strings[v])
      cat(" $ ",file=codefile,append=TRUE)   }
  cat("\n;\n",file=codefile,append=TRUE)

  for(v in 1:ncol(df)){
    if (varnames[v] != names(varnames)[v])

      cat("LABEL ",varnames[v],"=",adQuote(varlabels[v]),";\n",
          file=codefile,append=TRUE)

  }   

  if (any(factors)){
    for (f in 1:length(fmtnames))

      cat("FORMAT", names(fmtnames)[f],paste(fmtnames[f],".",sep = ""),";\n",
          file=codefile,append=TRUE)

  }   

  if (any(dates)){
    for(v in varnames[dates])

      cat("FORMAT", v, "yymmdd10.;\n", file = codefile, append = TRUE)
  }        
  

  if (any(datetimes)){
    for(v in varnames[datetimes])
      cat("FORMAT", v, "datetime18.;\n", file = codefile, append = TRUE)   }   

  cat("RUN;\n",file=codefile,append=TRUE) }



R-devel@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-devel Received on Fri Jul 14 05:03:09 2006

Archive maintained by Robert King, hosted by the discipline of statistics at the University of Newcastle, Australia.
Archive generated by hypermail 2.1.8, at Fri 14 Jul 2006 - 06:24:20 GMT.

Mailing list information is available at https://stat.ethz.ch/mailman/listinfo/r-help. Please read the posting guide before posting to the list.