Re: [R] Can I index a dataframe with a reference from/to a second dataframe?

From: jim holtman <jholtman_at_gmail.com>
Date: Fri, 8 Feb 2008 18:48:34 -0500

try this:

Bos$type <- tree$Type[match(Bos$spp, tree$spp)]

On Feb 8, 2008 3:17 PM, Thompson, David (MNR) <David.John.Thompson_at_ontario.ca> wrote:
> Hello,
>
> I am unable to figure out how to code a new column in a data frame based
> on an existing column that matches a column in a reference data frame,
> in a relational-db fashion. I would like this to maintain a minimum set
> of reference tables that may be reused over several similar datasets.
>
> Specifically, I have two data frames as listed below, 'Bos' and 'tree.'
> For each case in 'Bos' I want to look up the matching 'spp' code in
> 'tree' and insert the associated 'type' code into a new 'type' column in
> 'Bos' as in:
>
> # add type and keep factors from reference list
> Bos$type <- tree[as.character(tree$spp)==as.character(Bos$spp),
> 'type']
> Bos$keep <- tree[tree$spp==Bos$spp, 'keep']
>
> And I know I have seen this before but, can't remember where. I have
> filtered through many of the threads referencing
> 'as.numeric(levels(Bos$spp))[as.integer(Bos$spp)]' handling of factors,
> any kind of sql reference I could think of, anything 'ODBC'-ish but, I
> think this may be an indexing issue. I am trying to compare elements of
> two different sized (list) objects (different type objects even?) and
> not cycling through Bos$spp to find matches in tree$spp as expected, . .
> . , has this an apply solution?
>
> My data frames:
> > dput(head(Bos, 30))
> structure(list(oplt = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), rplt = c(3,
> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> 0, 0, 0, 0, 0, 0, 0, 0), tree = c(32, 101, 102, 103, 104, 105,
> 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118,
> 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129), spp =
> structure(c(10L,
> 10L, 12L, 14L, 10L, 10L, 14L, 10L, 15L, 10L, 9L, 3L, 10L, 10L,
> 12L, 10L, 13L, 12L, 12L, 10L, 12L, 10L, 10L, 8L, 5L, 2L, 10L,
> 2L, 12L, 10L), .Label = c("AW", "BD", "BE", "BF", "BW", "BY",
> "CB", "HE", "IW", "MH", "MR", "OR", "PO", "SW", "SA"), class =
> "factor"),
> dbh = c(12.1, 10.1, 63.3, 9, 7.1, 12.1, 13.9, 6.3, 6.1, 7.9,
> 5.1, 9.8, 7.1, 18.7, 44.2, 28.7, 19.8, 28, 46.6, 9, 61.6,
> 3.3, 9.1, 8.7, 5.8, 3.1, 11.1, 12.3, 28, 8.6), cc = structure(c(2L,
> 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L,
> 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L), .Label =
> c("dom",
> "sup"), class = "factor"), ba = c(114.990145103020,
> 80.1184666481737,
> 3147.0040469356, 63.6172512351933, 39.5919214168654,
> 114.990145103020,
> 151.746779150021, 31.1724531052447, 29.2246656600190,
> 49.0166993776348,
> 20.4282062299676, 75.429639612691, 39.5919214168654,
> 274.645883758454,
> 1534.38526793979, 646.924613208844, 307.907495978336,
> 615.7521601036,
> 1705.53923570736, 63.6172512351933, 2980.24045490142,
> 8.55298599939821,
> 65.0388219109427, 59.4467869875528, 26.4207942166902,
> 7.54767635024948,
> 96.7689077121996, 118.8228881404, 615.7521601036, 58.0880481648753
> )), .Names = c("oplt", "rplt", "tree", "spp", "dbh", "cc",
> "ba"), row.names = c("1", "2", "3", "4", "5", "6", "7", "8",
> "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19",
> "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30"
> ), class = "data.frame")
>
> > dput(tree)
> structure(list(spp = structure(1:33, .Label = c("AB", "AS", "AW",
> "BD", "BE", "BF", "BW", "BY", "CA", "CB", "CC", "CE", "DL", "DP",
> "EA", "HE", "IW", "LC", "MH", "MM", "MR", "OR", "PO", "PR", "PV",
> "PW", "RS", "SA", "SB", "SM", "SW", "VC", "VL"), class = "factor"),
> spp.orig = structure(c(1L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 12L,
> 10L, 13L, 11L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L,
> 23L, 24L, 25L, 26L, 27L, 28L, 31L, 29L, 30L, 32L, 33L, 34L
> ), .Label = c("AB", "AMEHUM", "AMESPP", "AW", "BD", "BE",
> "BF", "BW", "BY", "CB", "CE", "CORALT", "CORCOR", "DIELON",
> "DIRPAL", "EA", "HE", "IW", "LONCAN", "MH", "MM", "MR", "OR",
> "PO", "PR", "PRUVIR", "PW", "RIBSPP", "SB", "SM", "SORAME",
> "SW", "VIBACE", "VIBALN"), class = "factor"), OPL = structure(c(15L,
>
> 7L, 14L, 29L, 13L, 2L, 9L, 8L, 10L, 23L, 1L, 28L, 11L, 12L,
> 31L, 30L, 17L, 16L, 5L, 6L, 4L, 25L, 22L, 20L, 24L, 21L,
> 26L, 27L, 19L, 3L, 18L, 32L, 33L), .Label = c("HCORCAN",
> "WABIBAL", "WACEPEN", "WACERUB", "WACESAS", "WACESPI", "WAMESPP",
> "WBETALL", "WBETPAP", "WCORALT", "WDIELON", "WDIRPAL", "WFAGGRA",
> "WFRAAME", "WFRANIG", "WLONCAN", "WOSTVIR", "WPICGLA", "WPICMAR",
> "WPINRES", "WPINSTR", "WPOPTRE", "WPRUSER", "WPRUVIV", "WQUERUB",
> "WRIBAME", "WSORAME", "WTHUOCC", "WTILAME", "WTSUCAN", "WULMAME",
> "WVIBACE", "WVIBLAO"), class = "factor"), form = c(1.1, 1.2,
> 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.2, 1.1, 2, 1.1, 1.2, 1.2,
> 1.1, 1.1, 1.1, 1.2, 1.1, 1.2, 1.1, 1.1, 1.1, 1.1, 1.2, 1.1,
> 1.2, 1.1, 1.1, 1.2, 1.1, 1.2, 1.2), Type = structure(c(2L,
> 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L,
> 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
> 2L, 2L), .Label = c("H", "W"), class = "factor"), keep =
> structure(c(1L,
> 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L,
> 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
> 1L, 1L), .Label = c("no", "yes"), class = "factor"), Sname =
> structure(c(15L,
> 6L, 14L, 29L, 13L, 1L, 8L, 7L, 9L, 23L, 10L, 28L, 11L, 12L,
> 31L, 30L, 17L, 16L, 4L, 5L, 3L, 25L, 22L, 20L, 24L, 21L,
> 26L, 27L, 19L, 2L, 18L, 32L, 33L), .Label = c("Abies balsamea",
> "Acer pensylvanicum", "Acer rubrum", "Acer saccharum", "Acer
> spicatum",
> "Amelanchier", "Betula alleghaniensis", "Betula papyrifera",
> "Cornus alternifolia", "Cornus canadensis", "Diervilla lonicera",
> "Dirca palustris", "Fagus grandifolia", "Fraxinus americana",
> "Fraxinus nigra", "Lonicera canadensis", "Ostrya virginiana",
> "Picea glauca", "Picea mariana", "Pinus resinosa", "Pinus strobus",
> "Populus tremuloides", "Prunus serotina", "Prunus virginiana",
> "Quercus rubra", "Ribes ", "Sorbus americana", "Thuja occidentalis",
>
> "Tilia americana", "Tsuga canadensis", "Ulmus americana",
> "Viburnum acerifolium", "Viburnum lantanoides"), class = "factor"),
> Cname = structure(c(7L, 27L, 30L, 2L, 3L, 6L, 31L, 33L, 1L,
> 8L, 10L, 15L, 11L, 21L, 4L, 14L, 20L, 17L, 18L, 23L, 25L,
> 24L, 29L, 26L, 12L, 16L, 13L, 5L, 9L, 28L, 32L, 22L, 19L), .Label =
> c("Alternate-leaved Dogwood",
> "American Basswood", "American Beech", "American Elm", "American
> Mountain-ash",
> "Balsam Fir", "Black Ash", "Black Cherry", "Black Spruce",
> "Bunchberry", "Bush Honeysuckle", "Choke Cherry", "Currant",
> "Eastern Hemlock", "Eastern White Cedar", "Eastern White Pine",
> "Fly Honeysuckle", "Hard Maple", "Hobblebush", "Ironwood",
> "Leatherwood", "Maple-leaved Viburnum", "Mountain Maple",
> "Northern Red Oak", "Red Maple", "Red Pine", "Serviceberry",
> "Striped Maple", "Trembling Aspen", "White Ash", "White Birch",
> "White Spruce", "Yellow Birch"), class = "factor")), .Names =
> c("spp",
> "spp.orig", "OPL", "form", "Type", "keep", "Sname", "Cname"), row.names
> = c("1",
> "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14",
> "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25",
> "26", "27", "28", "29", "30", "31", "32", "33", "34"), class =
> "data.frame")
>
> Thanks, DaveT.
> *************************************
> Silviculture Data Analyst
> Ontario Forest Research Institute
> Ontario Ministry of Natural Resources
> david.john.thompson_at_ontario.ca
> http://ofri.mnr.gov.on.ca
>
> ______________________________________________
> R-help_at_r-project.org mailing list
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
> and provide commented, minimal, self-contained, reproducible code.
>

-- 
Jim Holtman
Cincinnati, OH
+1 513 646 9390

What is the problem you are trying to solve?

______________________________________________
R-help_at_r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.
Received on Fri 08 Feb 2008 - 23:53:15 GMT

Archive maintained by Robert King, hosted by the discipline of statistics at the University of Newcastle, Australia.
Archive generated by hypermail 2.2.0, at Sat 09 Feb 2008 - 00:30:13 GMT.

Mailing list information is available at https://stat.ethz.ch/mailman/listinfo/r-help. Please read the posting guide before posting to the list.

list of date sections of archive