**From:** David States (*dstates@bioinformatics.med.umich.edu*)

**Date:** Tue 04 Feb 2003 - 10:37:09 EST

**Next message:**Archaux Frederic: "[R] basic question"**Previous message:**Yang, Richard: "Re: [R] Mtext and xyplot"

Message-id: <8D9D9C12B61BD242AD67248A007140270A2901@mail.bicc.med.umich.edu>

Last year there was a request seeking functions to plot Venn diagrams in

R. Seeing no reply and for other reasons needing this, I wrote a quick

routine. The general problem of creating a Venn diagram with overlap

areas proportional to the number of counts in each overlap is over

determined. An approximate solution is to make the area of the circles

and pair wise overlap areas proportional to the number of counts in

each. Using just the pair wise overlaps ignores (or hopes for the best

on :) ) the area of the three-way overlap. R code that implements this

is shown below.

Usage: A, B and C are Boolean vectors of equal length, e.g.

A = runif(100) < 0.5

B = runif(100) < 0.4

C = runif(100) < 0.6

d = list()

d$table = table(A,B,C)

d$labels = c("A","B","C")

plot.venn.diagram(d)

This is pre-alpha code, caveat emptor. In particular, the code to

position the labels needs work. If someone wants to convert this to a

package or add it to an existing package, feel free to do so.

To optimize the solution over all of the overlaps or if you have more

than three sets, you can use a random sampling approach, but

implementing this in R is too slow to be useful.

David

David J. States, M.D., Ph.D.

Professor of Human Genetics

Director of Bioinformatics

University of Michigan School of Medicine

Medical Science Building I, Room 5443

Ann Arbor, MI 48109

USA

venn.overlap <-

function(r, a, b, target = 0)

{

#

# calculate the overlap area for circles of radius a and b

# with centers separated by r

# target is included for the root finding code

#

pi = acos(-1)

if(r >= a + b) {

return( - target)

}

if(r < a - b) {

return(pi * b * b - target)

}

if(r < b - a) {

return(pi * a * a - target)

}

s = (a + b + r)/2

triangle.area = sqrt(s * (s - a) * (s - b) * (s - r))

h = (2 * triangle.area)/r

aa = 2 * atan(sqrt(((s - r) * (s - a))/(s * (s - b))))

ab = 2 * atan(sqrt(((s - r) * (s - b))/(s * (s - a))))

sector.area = aa * (a * a) + ab * (b * b)

overlap = sector.area - 2 * triangle.area

return(overlap - target)

}

plot.venn.diagram <-

function(d)

{

#

# Draw Venn diagrams with proportional overlaps

# d$table = 3 way table of overlaps

# d$labels = array of character string to use as labels

#

pi = acos(-1)

csz = 0.1

# Normalize the data

n = length(dim(d$table))

c1 = vector(length = n)

c1[1] = sum(d$table[2, , ])

c1[2] = sum(d$table[, 2, ])

c1[3] = sum(d$table[, , 2])

n1 = c1

#

c2 = matrix(nrow = n, ncol = n, 0)

c2[1, 2] = sum(d$table[2, 2, ])

c2[2, 1] = c2[1, 2]

c2[1, 3] = sum(d$table[2, , 2])

c2[3, 1] = c2[1, 3]

c2[2, 3] = sum(d$table[, 2, 2])

c2[3, 2] = c2[2, 3]

n2 = c2

#

c3 = d$table[2, 2, 2]

n3 = c3

c2 = c2/sum(c1)

c3 = c3/sum(c1)

c1 = c1/sum(c1)

n = length(c1)

# Radii are set so the area is proporitional to number of counts

pi = acos(-1)

r = sqrt(c1/pi)

r12 = uniroot(venn.overlap, interval = c(max(r[1] - r[2], r[2] - r[1],

0) + 0.01, r[1] + r[2] - 0.01), a = r[1], b = r[

2], target = c2[1, 2])$root

r13 = uniroot(venn.overlap, interval = c(max(r[1] - r[3], r[3] - r[1],

0) + 0.01, r[1] + r[3] - 0.01), a = r[1], b = r[

3], target = c2[1, 3])$root

r23 = uniroot(venn.overlap, interval = c(max(r[2] - r[3], r[3] - r[2],

0) + 0.01, r[2] + r[3] - 0.01), a = r[2], b = r[

3], target = c2[2, 3])$root

s = (r12 + r13 + r23)/2

x = vector()

y = vector()

x[1] = 0

y[1] = 0

x[2] = r12

y[2] = 0

angle = 2 * atan(sqrt(((s - r12) * (s - r13))/(s * (s - r13))))

x[3] = r13 * cos(angle)

y[3] = r13 * sin(angle)

xc = cos(seq(from = 0, to = 2 * pi, by = 0.01))

yc = sin(seq(from = 0, to = 2 * pi, by = 0.01))

cmx = sum(x * c1)

cmy = sum(y * c1)

x = x - cmx

y = y - cmy

rp=sqrt(x*x + y*y)

frame()

par(usr = c(-1, 1, -1, 1), pty = "s")

box()

for(i in 1:3) {

lines(xc * r[i] + x[i], yc * r[i] + y[i])

}

xl = (rp[1] + (0.7 * r[1])) * x[1]/rp[1]

yl = (rp[1] + (0.7 * r[1])) * y[1]/rp[1]

text(xl, yl, d$labels[1])

text(xl, yl - csz, d$table[2, 1, 1])

xl = (rp[2] + (0.7 * r[2])) * x[2]/rp[2]

yl = (rp[2] + (0.7 * r[2])) * y[2]/rp[2]

text(xl, yl, d$labels[2])

text(xl, yl - csz, d$table[1, 2, 1])

xl = (rp[3] + (0.7 * r[3])) * x[3]/rp[3]

yl = (rp[3] + (0.7 * r[3])) * y[3]/rp[3]

text(xl, yl, d$labels[3])

text(xl, yl - csz, d$table[1, 1, 2])

#

text((x[1] + x[2])/2, (y[1] + y[2])/2, d$table[2, 2, 1])

text((x[1] + x[3])/2, (y[1] + y[3])/2, d$table[2, 1, 2])

text((x[2] + x[3])/2, (y[2] + y[3])/2, d$table[1, 2, 2])

#

text(0, 0, n3)

list(r = r, x = x, y = y, dist = c(r12, r13, r23), count1 = c1, count2 =

c2, labels = d$labels)

}

______________________________________________

R-help@stat.math.ethz.ch mailing list

http://www.stat.math.ethz.ch/mailman/listinfo/r-help

**Next message:**Archaux Frederic: "[R] basic question"**Previous message:**Yang, Richard: "Re: [R] Mtext and xyplot"

*
This archive was generated by hypermail 2.1.3
: Tue 01 Jul 2003 - 09:11:21 EST
*