## ## Various R functions ## (c) 2005-2006, Vincent Zoonekynd ## ############################################################ # Functions that should (eventually) be here: # eda.ts: Exploratory plots for time series # eda: Exploratory plots (hist, qqnorm, ...) # eda2: Exploratory plots for two variables (plot, # density estimation, hexbin, MeanPerFractile, # loess, linear regression, ...) # my.hist: Histogram, density estimation, etc. # p.hist: Position of one or more values wrt a histogram # my.qqnorm: qqnorm, with a histogram in a corner, woth # the p-value of a normality test # barplot2D: 2-dimensional barplot ############################################################ ## ## barplot2D(area, colour) ## ## 2-dimensional barplot, to represent two quantitative ## variables, one of them positive. ## ## The algorithm is not that obvious. ## - Start with a rectangle, representing 100%, to be ## filled by other rectangles. ## - Try to put the first rectangle on the left ## - If it too elongated, try to put two rectangles, on ## top of each other, on the left ## - Go on, until you are satisfied ## - When you have put those rectangles, proceed with the ## remaining of the large rectangle container. ## More precisely, we choose the number of rectables to ## stack so as to minimize the following penalty: ## penalty for the first rectangle in the stack + penalty for the last ## where the penalty of a rectangle is ## ratio - 1.1 ## where "ratio" is the ratio of the longer side by the ## smaller. ## ## Arguments: ## area: vector, containing positive number (NAs are ## discarded), to be used as the area of the ## rectangles ## colour: vector (same length) of strings containing ## the colours You can create it with "rgb", or ## "cm.colors". ## threshold: The maximum acceptable aspect ratio of the ## rectangles ## width, height: Dimensions of the initial rectangle. ## I suggest to plot the picture in a ## rectangular device, e.g., ## pdf(width=6, height=4) ## but to tell the function that this ## rectangle is actually a square, i.e., ## barplot2D(area, colour, width=1, height=1) ## so that the cells be horizontal ## rectangles: you get more space to add ## labels ## ## Returns: ## A matrix, one row per cell, containing the x- and ## y-coordinates of the corners of all the cells (first ## eight columns), and the coordinates of the center of ## those cells (last two columns). ## The rows are in one-to-one correspondance with the ## elements of the "area" vector: if there were missing ## values, we have rows of missing values. ## The row names are the same as the names of the "area" ## vector, in the same order. ## barplot2D <- function (area, colour, threshold=1.1, width=1, height=1) { stopifnot(is.vector(area), is.vector(colour), length(area) == length(colour), !all(is.na(area))) if (is.null(names(area))) { names(area) <- as.character(1:length(area)) } area0 <- area if (any(is.na(area))) { warning("Discarding NAs") i <- which(!is.na(area)) area <- area[i] colour <- colour[i] } stopifnot(all(area>=0), sum(area)>0) i <- order(-area) area <- area[i] colour <- colour[i] n <- length(area) res <- matrix(NA, nr=n, nc=8) colnames(res) <- as.vector(t(outer(LETTERS[1:4], 1:2, paste, sep=""))) rownames(res) <- names(area) A <- c(0,height) B <- c(0,0) C <- c(width,0) D <- c(width,height) plot.new() plot.window(xlim=c(0,1), ylim=c(0,1)) i <- 1 while (i <= n) { lambda <- cumsum(area[i:n]) / sum(area[i:n]) mu <- area[i] / cumsum(area[i:n]) nu <- area[i:n] / cumsum(area[i:n]) penalty1 <- mu * sum(abs(A-B)) / ( lambda * sum(abs(A-D)) ) penalty1 <- ifelse(penalty1 <= threshold, 0, penalty1 - threshold) penalty2 <- lambda * sum(abs(A-D)) / ( nu * sum(abs(A-B)) ) penalty2 <- ifelse(penalty2 <= threshold, 0, penalty2 - threshold) j <- which.min(penalty1 + penalty2)[1] + i - 1 cat(i, " => ", j, "\n") lambda <- sum(area[i:j]) / sum(area[i:n]) A1 <- A B1 <- B C1 <- (1-lambda) * B + lambda * C D1 <- (1-lambda) * A + lambda * D AA <- C1 BB <- C CC <- D DD <- D1 while (i <= j) { lambda <- area[i] / sum(area[i:j]) B2 <- (1-lambda) * A1 + lambda * B1 C2 <- (1-lambda) * D1 + lambda * C1 polygon(rbind(A1, B2, C2, D1), col=colour[i]) res[i,] <- c(A1, B2, C2, D1) A1 <- B2 D1 <- C2 i <- i + 1 } A <- AA B <- BB C <- CC D <- DD } # Main loop res0 <- matrix(NA, nr=length(area0), nc=10) colnames(res0) <- c(colnames(res), "x", "y") rownames(res0) <- names(area0) res0[ names(area), 1:8] <- res res0[, "x"] <- apply(res0[,c("A1","B1","C1","D1")],1,mean) res0[, "y"] <- apply(res0[,c("A2","B2","C2","D2")],1,mean) invisible(res0) } # # Example: # N <- 20 # area <- rlnorm(N) # names(area) <- LETTERS[1:N] # value <- rt(N, df=4) # # Difficult part: compute the colours... # colour <- cm.colors(255)[ # 1 + round( # 254 * (value - min(value, na.rm = TRUE)) / # diff(range(value, na.rm = TRUE)) # ) # ] # r <- barplot2D(area, colour) # title("2-dimensional barplot") # # Add the labels # text(r[,"x"], r[,"y"], names(area), cex=.8) random.rotation.matrix <- function (n=3) { m <- NULL for (i in 1:n) { x <- rnorm(n) x <- x / sqrt(sum(x*x)) y <- rep(0,n) if (i>1) for (j in 1:(i-1)) { y <- y + sum( x * m[,j] ) * m[,j] } x <- x - y x <- x / sqrt(sum(x*x)) m <- cbind(m, x) } m } ## TODO: ## - Stationarity tests eda.ts <- function (x, bands=FALSE) { op <- par(no.readonly = TRUE) par(mar=c(0,0,0,0), oma=c(1,4,2,1)) # Compute the Ljung-Box p-values # (we only display them if needed, i.e., if we have any reason of # thinking it is white noise). p.min <- .05 k <- 15 p <- rep(NA, k) for (i in 1:k) { p[i] <- Box.test(x, i, type = "Ljung-Box")$p.value } if( max(p)>p.min ) { par(mfrow=c(5,1)) } else { par(mfrow=c(4,1)) } if(!is.ts(x)) x <- ts(x) plot(x, axes=FALSE); axis(2); axis(3); box(lwd=2) if(bands) { a <- time(x) i1 <- floor(min(a)) i2 <- ceiling(max(a)) y1 <- par('usr')[3] y2 <- par('usr')[4] if( par("ylog") ){ y1 <- 10^y1 y2 <- 10^y2 } for (i in seq(from=i1, to=i2-1, by=2)) { polygon( c(i,i+1,i+1,i), c(y1,y1,y2,y2), col='grey', border=NA ) } lines(x) } acf(x, axes=FALSE); axis(2, las=2); box(lwd=2); mtext("ACF", side=2, line=2.5) pacf(x, axes=FALSE); axis(2, las=2); box(lwd=2); mtext("ACF", side=2, line=2.5) spectrum(x, col=par('fg'), log="dB", main="", axes=FALSE ) axis(2, las=2); box(lwd=2); mtext("Spectrum", side=2, line=2.5) abline(v=1, lty=2, lwd=2) abline(v=2:10, lty=3) abline(v=1/2:5, lty=3) if( max(p)>p.min ) { main <- plot(p, type='h', ylim=c(0,1), lwd=3, main="", axes=F) axis(2, las=2) box(lwd=2) mtext("Ljung-Box p-value", side=2, line=2.5) abline(h=c(0,.05),lty=3) } par(op) }