R-alpha: converting S-style help files

Thomas Lumley (thomas@biostat.washington.edu)
Tue, 24 Sep 1996 08:53:34 -0700 (PDT)


Date: Tue, 24 Sep 1996 08:53:34 -0700 (PDT)
From: Thomas Lumley <thomas@biostat.washington.edu>
To: R testers <r-testers@stat.math.ethz.ch>
Subject: R-alpha: converting S-style help files
In-Reply-To: <Pine.SUN.3.91.960920165618.3386A-100000@zen>


While most S code runs on R without modification the same is not true for 
their help files.  Attached is a sed/awk script to convert files from the S 
troff format to R help files.  It is a modification of a program by 
Chambers & Hastie for converting S help files to TeX. 

It does not hyperlink the SEE ALSO section of the help page -- that still 
has to be done by hand. Also note that S help files can contain 
unbalanced parentheses (either by design eg half-open interval (0,1] or by 
accident). These need to be fixed or the R help tools will crash.

The code uses "nawk" instead of the original "awk".  On some systems (our 
SunOS 4.1 ones) awk is an older version that can't escape double quotes 
and nawk is the version you need.  Under Linux, awk works.  If your Unix 
doesn't have nawk then awk should work.


thomas lumley       Penguin: the name is supposed to come from 
PhD student         the Welsh _pen_gwyn_ meaning "white head". 
Biostatistics       As penguins have black heads and do not live 
U. Washington       within 10000 miles of Wales it is difficult 
Seattle WA 98195    to see how this theory arose.

----------------------------------------------
#!/bin/sh
# Converts S(-PLUS) documentation files to R format (more-or-less)
# BUGS: Doesn't hyperlink SEEALSO properly. 
#       Subsequent R tools fail if there are unbalanced parentheses
# 
# adapted from doc_to_tex (in s.to.latex, by Chambers & Hastie) 
# thomas lumley
#
cat "$1" |sed \
-e 's/\\f2/@em /g' -e 's/\\f1/em@/g' \
-e 's/\\\~/\~/g' \
-e 's/\\\.\(.*\)$/COMMENT(\1)/g' \
-e 's/	/    /g' -e 's/\\/&(&backslash&)/g' -e 's/[{}]/\\&/g' \
-e 's/@em/{\\em/g' -e 's/em@/}/g' \
-e '/^\./s/""/@/g' -e '/^\./s/"//g' -e '/^\./s/\@/"/g' \
-e "s/\`\([^']*\)'/LANG(\1)/g" -e 's/\$/\\$/g' \
-e 's/GLIM/LANG(glim)/g' \
-e 's/GENSTAT/LANG(genstat)/g' \
-e 's/FORTRAN/LANG(fortran)/g' \
-e 's/GAM/LANG(gam)/g' \
-e 's/GLM/LANG(glm)/g' \
-e 's/LM/LANG(lm)/g' \
-e 's/UNIX/LANG(unix)/g' \
-e 's/ASCII/LANG(ASCII)/g' \
-e 's/LINPACK/LANG(LINPACK)/g' |
nawk 'function paragraph(name,doarg) {
	Check(doarg)
	print "PARA BOLD(" name ")" 
}
function Check(doarg){
	if(inExample) {
		if(inExample>1)print ")"
		inExample = 0
		inbodyEX = 0
	}
	if(doarg && inArgument) {
		inArgument = 0
		print ")"
	}
}
function printFields(newline) {
	for(i = 2; i<=NF; i++) printf $i " "
	if(newline)printf "\n"
}
function itemline() {
	line = $2
	for(i=3; i<=NF; i++)line = line " " $i
	print "ARG(" line "@@)"
}
function checkExample() {
	if(inExample==1) {
		if(Eheader != 0){
			print "BLANK\n"
			paragraph(Eheader, 1)
			print "BLANK\n"
			}
		if(Eheader !=0) print "EXAMPLES("
		if(Eheader ==0) print "USAGE("
		inExample = 2
	}
}
/^\.I/ { if(NF > 1) {
	if(NF > 2 && length($NF)<=2) {trailer = $NF ")\n"; NF--}
	else  trailer = ")\n"
	printf "ITALIC( "; printFields(0); printf trailer
	}
	else {
		print "LANG("
		while(1) {
			getline
			if($1 ~ /^\./)break
			print
		} # and go on to match something else
	}
}
/^\.BG/ {nf = inArgument = inExample = inbodyEX = 0
	if($2== "D")header="COMPONENTS"
	else header = "VALUE"
}
/^\.FN/ { if(nf++) {
	out = $2 ".tex"
	print "\\seeTitle{" $2 "}{See {\\tt " fun "}}" > out 

}
		else fun=$2
}
/^\.TL/ {
	getline
	print "TITLE(" fun " @@ " $0 " )"
	next
}
/^\.EX/{
	Eheader = " "
	inExample = 1
}
/^\.CS/{
	Eheader= 0
	inExample = 1
}
/^\.Cs/ {
	Eheader= 0
	inExample = 1
	inbodyEX = 1
}
/^\.RC/ {
	Check(0)
	itemline()
}
/^\.AG/ {
	Check(0)
	if(!inArgument) {
		inArgument++
		print "ARGUMENTS("
	}
	itemline()
}
/^\.AO/ {
	Check(0)
	print ""
	printf "Arguments to "
	if(NF==2) printf "%s()", $2
	else if(NF==3) printf "%s() and %s()", $2, $3
	else {
		for(i=2; i<NF; i++) printf "%s(), ", $i
		printf "and %s()", $NF
	}
	printf " can also be supplied\n"
}
/^\.Xp/ {
	Check(0)
	print ""
	print "For an example of the output of LANG(" fun "()), see Figure \\ref{" $2 "} on page \\pageref{" $2 "}."
}
/^\.GE/ {
	Check(0)
	print ""
	print "This is a generic function."
	print "Functions with names beginning in LANG({\"" fun ".\") will be methods for this function."
}
/^\.ME/{
	Check(0)
	print ""
	print "This function is a method for the generic function LANG(" $2 "()) for class LANG\"" $3 "\")."
	print "It can be invoked by calling LANG(" $2 "(x)) for an object LANG(x)"
	print "of the appropriate class, or directly by calling LANG(" $2 "." $3 "()),"
	print "regardless of the class of the object."
}
/^\.PP/ { print "" }
/^\.RT/ { paragraph(header,1); header = ""}
/^\.SA/ { paragraph("SEE ALSO",1)}
/^\.SE/ { paragraph("SIDE EFFECTS",1)}
/^\.SH/ {
	line = $2
	for(i=3; i<=NF; i++)line = line " " $i
	paragraph(line,1)
}
/^\.Ce/ {
	Check(0)
}
/^\.WR/ { Check(1)
	print "PARA ITALIC(File automatically converted from S(-PLUS) help format)"
}
/^\.BL/ { print ""
	listType = "itemize"
}
/^\.NL/ { print ""
	listType = "enumerate"
}
/^\.LE/ { print "" }
/^\.LI/ { print "PARA BOLD(-) " }
/^\.C[io]/ { print "LANG(" $2 ")" $3}
/^\.Cq/ {print "LANG\"" $2 "\")" }
/^\.EQ/ { print "Warning: .EQ stuff in", fun >2
	while(1) {
		getline
		if( $1 ~ /^\.EN/) break
	}
	next
}
/^\.GR/ {
	Check(0)
	print ""
	print "Graphical parameters (see LANG(par())) may also be supplied as arguments to this function."
}
/^\.NA/ { print "Missing values (LANG(NA)s) are allowed." }
/^\.PI/ { print "Documentation for", fun, "expects a figure" >2 }
/^\.Si/ { checkExample(); printf "> ";  printFields(1) }
/^\.S\+/ { checkExample(); printf "+ "; printFields(1) }
/^\.Tl/ {
	print "In addition, the high-level graphics control arguments described under LANG(plot.default())"
	print "and the arguments to LANG(title()) may be supplied to this function."
}
/^[^.]/ {
	checkExample()
	if( inbodyEX )printf "  "
	print
}'






=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
r-testers mailing list -- To (un)subscribe, send
subscribe	or	unsubscribe
(in the "body", not the subject !)  To: r-testers-request@stat.math.ethz.ch
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-