##
## Various R functions
## (c) 2005-2006, Vincent Zoonekynd <zoonek@gmail.com>
##
############################################################

# Functions that should (eventually) be here:
#   eda.ts:    Exploratory plots for time series
#   eda:       Exploratory plots (hist, qqnorm, ...)
#   eda2:      Exploratory plots for two variables (plot, 
#              density estimation, hexbin, MeanPerFractile,
#              loess, linear regression, ...)
#   my.hist:   Histogram, density estimation, etc.
#   p.hist:    Position of one or more values wrt a histogram
#   my.qqnorm: qqnorm, with a histogram in a corner, woth
#              the p-value of a normality test
#   barplot2D: 2-dimensional barplot 

############################################################

##
## barplot2D(area, colour)
##
## 2-dimensional barplot, to represent two quantitative
## variables, one of them positive.
##
## The algorithm is not that obvious.
##   - Start with a rectangle, representing 100%, to be
##     filled by other rectangles.
##   - Try to put the first rectangle on the left
##   - If it too elongated, try to put two rectangles, on
##     top of each other, on the left
##   - Go on, until you are satisfied
##   - When you have put those rectangles, proceed with the
##     remaining of the large rectangle container.
## More precisely, we choose the number of rectables to
## stack so as to minimize the following penalty:
##   penalty for the first rectangle in the stack + penalty for the last
## where the penalty of a rectangle is
##   ratio - 1.1
## where "ratio" is the ratio of the longer side by the
## smaller.
##
## Arguments:
##  area:      vector, containing positive number (NAs are
##             discarded), to be used as the area of the
##             rectangles
##  colour:    vector (same length) of strings containing 
##             the colours You can create it with "rgb", or
##             "cm.colors".

##  threshold: The maximum acceptable aspect ratio of the
##             rectangles

##  width, height: Dimensions of the initial rectangle.
##                 I suggest to plot the picture in a
##                 rectangular device, e.g.,
##                   pdf(width=6, height=4)
##                 but to tell the function that this
##                 rectangle is actually a square, i.e.,
##                   barplot2D(area, colour, width=1, height=1)
##                 so that the cells be horizontal
##                 rectangles: you get more space to add
##                 labels
##
## Returns:
##   A matrix, one row per cell, containing the x- and
##   y-coordinates of the corners of all the cells (first
##   eight columns), and the coordinates of the center of
##   those cells (last two columns).
##   The rows are in one-to-one correspondance with the
##   elements of the "area" vector: if there were missing
##   values, we have rows of missing values.
##   The row names are the same as the names of the "area"
##   vector, in the same order.
##

barplot2D <- function (area, colour, 
                       threshold=1.1, 
                       width=1, height=1) {
  stopifnot(is.vector(area), is.vector(colour),
            length(area) == length(colour),
            !all(is.na(area)))
  if (is.null(names(area))) {
    names(area) <- as.character(1:length(area))
  }
  area0 <- area
  if (any(is.na(area))) {
    warning("Discarding NAs")
    i <- which(!is.na(area))
    area <- area[i]
    colour <- colour[i]
  }
  stopifnot(all(area>=0), sum(area)>0)
  i <- order(-area)
  area <- area[i]
  colour <- colour[i]
  n <- length(area)
  res <- matrix(NA, nr=n, nc=8)
  colnames(res) <- as.vector(t(outer(LETTERS[1:4], 1:2, paste, sep="")))
  rownames(res) <- names(area)
  A <- c(0,height)
  B <- c(0,0)
  C <- c(width,0)
  D <- c(width,height)
  plot.new()
  plot.window(xlim=c(0,1), ylim=c(0,1))
  i <- 1
  while (i <= n) {
    lambda <- cumsum(area[i:n]) / sum(area[i:n])
    mu <- area[i]   / cumsum(area[i:n])
    nu <- area[i:n] / cumsum(area[i:n])
    penalty1 <- mu * sum(abs(A-B)) / ( lambda * sum(abs(A-D)) )
    penalty1 <- ifelse(penalty1 <= threshold, 0, penalty1 - threshold)
    penalty2 <- lambda * sum(abs(A-D)) / ( nu * sum(abs(A-B)) )
    penalty2 <- ifelse(penalty2 <= threshold, 0, penalty2 - threshold)
    j <- which.min(penalty1 + penalty2)[1] + i - 1
    cat(i, " => ", j, "\n")
    lambda <- sum(area[i:j]) / sum(area[i:n])
    A1 <- A
    B1 <- B
    C1 <- (1-lambda) * B + lambda * C
    D1 <- (1-lambda) * A + lambda * D
    AA <- C1
    BB <- C
    CC <- D
    DD <- D1
    while (i <= j) {
      lambda <- area[i] / sum(area[i:j])
      B2 <- (1-lambda) * A1 + lambda * B1
      C2 <- (1-lambda) * D1 + lambda * C1
      polygon(rbind(A1, B2, C2, D1), col=colour[i])
      res[i,] <- c(A1, B2, C2, D1)
      A1 <- B2 
      D1 <- C2
      i <- i + 1
    }
    A <- AA
    B <- BB
    C <- CC
    D <- DD
  } # Main loop
  res0 <- matrix(NA, nr=length(area0), nc=10)
  colnames(res0) <- c(colnames(res), "x", "y")
  rownames(res0) <- names(area0)
  res0[ names(area), 1:8] <- res  
  res0[, "x"] <- apply(res0[,c("A1","B1","C1","D1")],1,mean)
  res0[, "y"] <- apply(res0[,c("A2","B2","C2","D2")],1,mean)
  invisible(res0)
}
#  # Example:
#  N <- 20
#  area <- rlnorm(N)
#  names(area) <- LETTERS[1:N]
#  value <- rt(N, df=4)
#  # Difficult part: compute the colours...
#  colour <- cm.colors(255)[
#    1 + round(
#      254 * (value - min(value, na.rm = TRUE)) /
#       diff(range(value, na.rm = TRUE))
#    )
#  ]
#  r <- barplot2D(area, colour)
#  title("2-dimensional barplot")
#  # Add the labels
#  text(r[,"x"], r[,"y"], names(area), cex=.8)


random.rotation.matrix <- function (n=3) {
  m <- NULL
  for (i in 1:n) {
    x <- rnorm(n)
    x <- x / sqrt(sum(x*x))
    y <- rep(0,n)
    if (i>1) for (j in 1:(i-1)) {
      y <- y + sum( x * m[,j] ) * m[,j]
    }
    x <- x - y
    x <- x / sqrt(sum(x*x))
    m <- cbind(m, x)
  }
  m
}

## TODO:
##   - Stationarity tests
eda.ts <- function (x, bands=FALSE) {
  op <- par(no.readonly = TRUE)
  par(mar=c(0,0,0,0), oma=c(1,4,2,1))
  # Compute the Ljung-Box p-values
  # (we only display them if needed, i.e., if we have any reason of
  # thinking it is white noise).
  p.min <- .05
  k <- 15
  p <- rep(NA, k)
  for (i in 1:k) {
    p[i] <- Box.test(x, i, type = "Ljung-Box")$p.value
  }
  if( max(p)>p.min ) {
    par(mfrow=c(5,1))
  } else {
    par(mfrow=c(4,1))
  }
  if(!is.ts(x))
    x <- ts(x)
  plot(x, axes=FALSE);
  axis(2); axis(3); box(lwd=2)
  if(bands) {
    a <- time(x)
    i1 <- floor(min(a))
    i2 <- ceiling(max(a))
    y1 <- par('usr')[3]
    y2 <- par('usr')[4]
    if( par("ylog") ){
      y1 <- 10^y1
      y2 <- 10^y2
    }
    for (i in seq(from=i1, to=i2-1, by=2)) {
      polygon( c(i,i+1,i+1,i), c(y1,y1,y2,y2), col='grey', border=NA )
    }
    lines(x)
  }
  acf(x, axes=FALSE); axis(2, las=2); box(lwd=2); mtext("ACF", side=2, line=2.5)
  pacf(x, axes=FALSE); axis(2, las=2); box(lwd=2); mtext("ACF", side=2, line=2.5)
  spectrum(x, col=par('fg'), log="dB", main="", axes=FALSE )
  axis(2, las=2); box(lwd=2); mtext("Spectrum", side=2, line=2.5)
  abline(v=1, lty=2, lwd=2)
  abline(v=2:10, lty=3)
  abline(v=1/2:5, lty=3)
  if( max(p)>p.min ) {
    main <- 
    plot(p, type='h', ylim=c(0,1), lwd=3, main="", axes=F)
    axis(2, las=2)
    box(lwd=2)
    mtext("Ljung-Box p-value", side=2, line=2.5)
    abline(h=c(0,.05),lty=3)
  }
  par(op)
}