Date: Fri 16 Dec 2005 - 04:04:37 EST

As a toy example, having an input matrix called "input", my algorithm looks like this:

## code start

ncolumns <- 6

input <- bincombinations(ncolumns) # from package e1071
# subset, let's say 97% of rows

input <- input[sample(2^ncolumns, round(2^ncolumns*0.97, 0), ]
minimized <- 1

while (sum(minimized) > 0) {

minimized <- logical(nrow(input))

to.be.compared <- combn2(1:nrow(input)) # from package combinat

# the following line takes _a lot_ of time, for millions of comparisons logical.result <- apply(to.be.compared, 1, function(idx) input[idx[1], ] == input[idx[2], ])

compare.minimized <- which(colSums(!logical.result) == 1)

logical.result <- logical.result[, compare.minimized]

result <- sapply(compare.minimized, function(idx) input[to.be.compared[idx, 1], ])

result[!logical.result] <- "x"

minimized[unique(as.vector(to.be.compared[compare.minimized, ]))] <- TRUE

if (sum(minimized) > 0) {

input <- rbind(input[!minimized, ], unique(t(result)))
}

}

## code end

Any suggestion is welcomed, thank you very much in advance. Adrian

