Martin Maechler · Martin Maechler · Martin Maechler · 76c90b28 · 76c90b28 · 76c90b28
--- a/DESCRIPTION
+++ b/DESCRIPTION
 Package: CLA
 Version: 0.90-1
-Date: 2018-01-25
+Date: 2018-02-05
 Title: Critical Line Algorithm in Pure R
 Author: Yanhao Shi <syhelena@163.com>,
 	Martin Maechler <maechler@stat.math.ethz.ch>

--- a/R/findSigMu.R
+++ b/R/findSigMu.R
-## From Yanhao Shi's thesis -- R/Functions/CLA_analysis.R
+#### From Yanhao Shi's thesis -- R/Functions/CLA_analysis.R

-findSig <- function(Mu0, result, covar, equal.tol){ # equal.tol > 1e-16
+## Vectorized in first argument by Martin Maechler
+
+### Version 1 -- of the code, simply "vectorize via lapply" -- not so fast
+
+findMu <- function(Sig0, result, covar, tol.unir = 1e-6, equal.tol = 1e-6) {
+    R <- lapply(Sig0, findMu.1,
+                result=result, covar=covar, tol.unir=tol.unir, equal.tol=equal.tol)
+    ## each R[[i]] has (Mu, weight)
+    list(Mu     = vapply(R, `[[`, numeric(1),           "Mu"),
+         weight = vapply(R, `[[`, numeric(nrow(covar)), "weight"))
+}
+
+findSig <- function(Mu0, result, covar, equal.tol = 1e-6) { # equal.tol > 1e-16
+    R <- lapply(Mu0, findSig.1,
+                result=result, covar=covar, equal.tol=equal.tol)
+    ## each R[[i]] has (Sig, weight):
+    list(Sig    = vapply(R, `[[`, numeric(1),           "Sig"),
+         weight = vapply(R, `[[`, numeric(nrow(covar)), "weight"))
+}
+
+
+##' Now simplified and very close to findSig0() -- just returning weights additionally
+findSig.1 <- function(Mu0, result, covar, equal.tol) { # equal.tol > 1e-16
+  stopifnot(length(Mu0) == 1L)
  ms.w <- result$MS_weight
  nd <- order(ms.w[, "Mu"])
  n <- length(nd)
-
  ## revert order for all (w, mu, sig):
  ms.w <- ms.w[nd, ]
  w <- result$weights_set[, nd]
-  ind.uni <- c(TRUE, sapply(1:(n-1), function(i) !
-                              isTRUE(all.equal(ms.w[i, ], ms.w[i+1, ], tol = equal.tol))))
-
-  p <- rep(1, n)  # pointer, different weights of same ms have same pointer
-  k <- 1
-  for(i in 1:n){
-    if(ind.uni[i]){
-      p[i] <- k
-      k <- k + 1
-    }else{
-      p[i] <- p[i-1]
-    }
-  }
-
-  mu.w <- ms.w[ind.uni, "Mu"]
-  sig.w <- ms.w[ind.uni, "Sig"]
-  if(Mu0 < min(mu.w) || Mu0 > max(mu.w)) stop(sprintf("Mu0 must be in [%g, %g]",
-                                                     max(mu.w), min(mu.w)))
+  mu.w  <- ms.w[, "Mu"]
+  sig.w <- ms.w[, "Sig"]
+  if(Mu0 < min(mu.w) || Mu0 > max(mu.w))
+      stop(sprintf("Mu0 must be in [%g, %g]", min(mu.w), max(mu.w)))
  i <- findInterval(Mu0, mu.w) # [..)...[..]
-  if(i == n) { # Mu0 is max(mu.w)
+  if(i == n || # Mu0 is max(mu.w)
+     isTRUE(all.equal(mu.w[i], mu.w[i+1], tol = equal.tol))) {
    list(Sig = sig.w[i], weight = w[, i])
  } else {
-    m1 <- which(p == i)
-    m2 <- which(p == i+1)
    a <- (Mu0 - mu.w[i+1])/(mu.w[i] - mu.w[i+1])
    # solve for a : Mu0 = a* Mu1 + (1-a)* Mu2 :
-    wm <- matrix(0, n, m1*m2)
-    for(s in m1){
-        wm[,(s-1)*m2 + 1:m2] <- a*w[, s] + (1-a)*w[, m2]
-    }
-    list(Sig = sqrt(colSums(wm *(covar %*% wm))), #return Sig0
-         weight = wm) # might have replicate columns
+    wm <- a*w[, i] + (1-a)*w[, i+1]
+    list(Sig = sqrt(colSums(wm *(covar %*% wm) )),
+         weight = wm)
  }
 }

-
+##' This is much cleaner and logical than the original findSig() ==> use a version of it
 ##' A version of findSig() only used inside  findMu():
 findSig0 <- function(Mu0, result, covar, equal.tol = 1e-8){ # equal.tol > 1e-16
+  stopifnot(length(Mu0) == 1L)
  ms.w <- result$MS_weight
  nd <- order(ms.w[, "Mu"])
  n <- length(nd)
-
  ## revert order for all (w, mu, sig):
  ms.w <- ms.w[nd, ]
  w <- result$weights_set[, nd]
-
  mu.w  <- ms.w[, "Mu"]
  sig.w <- ms.w[, "Sig"]
  i <- findInterval(Mu0, mu.w) # [..)...[..]
-  if(i == n){ # Mu0 is max(mu.w)
-    sig.w[i]
-  } else if(isTRUE(all.equal(mu.w[i], mu.w[i+1], tol = equal.tol))){
+  if(i == n || isTRUE(all.equal(mu.w[i], mu.w[i+1], tol = equal.tol))) {
    sig.w[i]
  } else {
    a <- (Mu0 - mu.w[i+1])/(mu.w[i] - mu.w[i+1])
@@ -69,21 +70,21 @@ findSig0 <- function(Mu0, result, covar, equal.tol = 1e-8){ # equal.tol > 1e-16
  }
 }

-findMu <- function(Sig0, result, covar, tol.unir = 1e-6, equal.tol = 1e-6){
+findMu.1 <- function(Sig0, result, covar, tol.unir = 1e-6, equal.tol = 1e-6) {
+  stopifnot(length(Sig0) == 1L)
  ms.w <- result$MS_weight
  nd <- order(ms.w[, "Sig"])
  n <- length(nd)
  mu.w  <- ms.w[nd, "Mu"]
  sig.w <- ms.w[nd, "Sig"]
  w <- result$weights_set[, nd]
-  if(Sig0 < min(sig.w) || Sig0 > max(sig.w)) stop(sprintf("Sig0 must be in [%g, %g]",
-                                                          max(sig.w), min(sig.w)))
+  if(Sig0 < min(sig.w) || Sig0 > max(sig.w))
+      stop(sprintf("Sig0 must be in [%g, %g]", min(sig.w), max(sig.w)))
  i <- findInterval(Sig0, sig.w)
-  if(i == n){ # Mu0 is max(mu.w)
+  if(i == n || # Mu0 is max(mu.w)
+     isTRUE(all.equal(mu.w[i], mu.w[i+1], tol = equal.tol))) {
    list(Mu = mu.w[i], weight = w[, i])
-  }else if(isTRUE(all.equal(mu.w[i], mu.w[i+1], tol = equal.tol))){
-    list(Mu = mu.w[i], weight = w[, i])
-  } else{
+  } else { ## FIXME: here are using default equal.tol = 1e-8 in findSig0() !
    r <- uniroot(function(mu) findSig0(mu, result, covar) - Sig0,
                 interval = mu.w[c(i, i+1)], tol=tol.unir)
    Mu0 <- r$root
@@ -94,6 +95,54 @@ findMu <- function(Sig0, result, covar, tol.unir = 1e-6, equal.tol = 1e-6){
  }
 }

+
+### Version 2 --- vectorize inside -- only those parts that need it
+
+## not yet !!!  === For first part, look at
+## ===========      findSigMu.R+
+##                  ============
+if(FALSE) ## will be .... not yet
+findMu <- function(Sig0, result, covar, tol.unir = 1e-6, equal.tol = 1e-6){
+  ms.w <- result$MS_weight
+  nd <- order(ms.w[, "Sig"])
+  n <- length(nd)
+   mu.w <- ms.w[nd, "Mu"]
+  sig.w <- ms.w[nd, "Sig"]
+  w <- result$weights_set[, nd]
+  if(Sig0 < min(sig.w) || Sig0 > max(sig.w))
+      stop(sprintf("Sig0 must be in [%g, %g]", min(sig.w), max(sig.w)))
+  ini <- findInterval(Sig0, sig.w)
+  m <- length(Sig0)
+  mu0 <- numeric(m)
+  wt0 <- matrix(NA_real_, n, m)
+  iBnd <- vapply(ini, function(i) {
+      (i == n || # Mu0 is max(mu.w)
+       isTRUE(all.equal(mu.w[i], mu.w[i+1], tol = equal.tol))) # duplicate turning pt
+  }, NA)
+  if(any(iBnd)) {
+      i <- ini[iBnd]
+      mu0[ iBnd] <- mu.w[i]
+      wt0[,iBnd] <- w[, i]
+  }
+  if(any(iIn <- !iBnd)) { # regular case
+      i <- ini[iIn]
+      mus <- vapply(..., function(i) {
+          r <- uniroot(function(mu) findSig0(mu, result, covar) - Sig0[i],
+                       interval = mu.w[c(i, i+1)], tol = tol.unir)
+          ## TODO check convergence?
+          r$root
+      }, numeric(1))
+
+      ## solve for a : mu = a* Mu1 + (1-a)* Mu2  ==>
+      a <- (Mu0 - mu.w[i+1])/(mu.w[i] - mu.w[i+1])
+      w0 <- a* w[, i] + (1-a)*w[, i+1]
+      mu0[ii] <- mu.w[i]
+      wt0[,ii] <- w[, i]
+
+  }
+  list(Mu = Mu0, weight = w0)
+}
+




--- a/TODO
+++ b/TODO
@@ -2,11 +2,11 @@

 * ASAP (no longer "Before release of package")
 ** check arguments  e.g., lB <= uB, sum upper Bounds >= 1
-** TODO References --> (partly done)
+** TODO References --> (mostly done)
 *** DONE 1) References from the thesis, including the "buried" python-paper with *WRONG* algo
 *** TODO 2) Master thesis: I'd like the thesis to be on our web page
 ** DONE CLA() should return a (S3) class, "CLA" w/ print() and plot() methods
-** TODO findMu() and findSig() regression check examples *before* changing R/findSigMu.R
+** TODO findMu() and findSig() regression check examples *before* much changing R/findSigMu.R
 ** TODO Improve plot() method, using hyperbolic interpolation see  R/CLA.R  man/plot.CLA.Rd
 ** TODO  A. Norring's Masters thesis has a small 12-asset example (from a published source).
   We should add that as a minimally small data set to use in examples,

--- a/man/CLA.Rd
+++ b/man/CLA.Rd
@@ -110,4 +110,6 @@ if(require(Matrix)) { ## visualize how weights change "along turning points"
             xlab = "turning point", ylab = "asset number"))
 }
 }
+\keyword{optimize}
+\keyword{arith}

--- a/man/findMu.Rd
+++ b/man/findMu.Rd
 \name{findMu}
 \alias{findMu} %% keep in sync with ./findSig.Rd
-%%                                    ^^^^^^^^^^ %% ??? TODO ??? one help page also for  findSig() ???
+%%                                    ^^^^^^^^^^ ?? TODO ?? one help page also for  findSig() ???
 \title{Find  mu(W) and W, given sigma(W) and CLA result}
 \description{
-  Find \eqn{\mu(W)} and \eqn{W}, given \eqn{\sigma(W}) and \code{\link{CLA}} result.
+  Find \eqn{\mu(W)} and \eqn{W}, given \eqn{\sigma(W}) and
+  \code{\link{CLA}} result.
 }
 \usage{
 findMu(Sig0, result, covar, tol.unir = 1e-06, equal.tol = 1e-06)
 }
 \arguments{
  \item{Sig0}{numeric vector of \eqn{\sigma(W)} values.}
-  \item{result}{ .. }
-  \item{covar}{  ... }
+  \item{result}{a \code{\link{list}} with components \code{MS_weight}
+    and \code{weights_set} as resulting from \code{\link{CLA}()}.}
+  \item{covar}{the same \eqn{n \times n}{n x n} covariance matrix (of
+    asset returns) as the argument of \code{\link{CLA}()}.}
  \item{tol.unir}{numeric tolerance passed to \code{\link{uniroot}}.}
  \item{equal.tol}{numeric tolerance to be used in
-    \code{\link{all.equal}(.., tolerance = equal.tol)}.}% TODO:  is this correct?
+    \code{\link{all.equal}(.., tolerance = equal.tol)} in the check to
+    see if the \eqn{\mu}{mu} of two neighbouring turning points are
+    equal.}
 }
 \value{
  a \code{\link{list}} with components
-  \item{Mu}{.....}
-  \item{weight}{.....}
+  \item{Mu}{numeric vector of same length, say \eqn{M}, as \code{Sig0}.}
+  \item{weight}{numeric \eqn{n \times M}{n x M} matrix of weights.}
 }
 \references{
  Master thesis, p.33
@@ -30,7 +35,21 @@ findMu(Sig0, result, covar, tol.unir = 1e-06, equal.tol = 1e-06)
 \code{\link{CLA}}, \code{\link{MS}}.
 }
 \examples{
-## TODO
+data(muS.sp500)
+## Full data taking too much time for example
+set.seed(2016)
+iS <- sample.int(length(muS.sp500$mu), 17)
+cov17 <- muS.sp500$covar[iS, iS]
+CLsp.17 <- CLA(muS.sp500$mu[iS], covar=cov17, lB=0, uB = 1/2)
+CLsp.17 # 16 turning points
+summary(tpS <- CLsp.17$MS_weights[,"Sig"])
+str(s0 <- seq(0.0186, 0.0477, by = 0.0001))
+mu.. <- findMu(s0, result=CLsp.17, covar=cov17)
+str(mu..)
+stopifnot(dim(mu..$weight) == c(17, length(s0)))
+plot(s0, mu..$Mu, xlab=quote(sigma), ylab = quote(mu),
+     type = "o", cex = 1/4)
+points(CLsp.17$MS_weights, col = "tomato", cex = 1.5)
 }
-%% \keyword{ ~kwd1 }% use one of  RShowDoc("KEYWORDS")
-%% \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
+\keyword{optimize}
+\keyword{arith}
--- a/man/findSig.Rd
+++ b/man/findSig.Rd
@@ -2,37 +2,56 @@
 \alias{findSig} %% keep in sync with ./findMu.Rd
 \title{Find sigma(W) and W, given mu(W) and CLA result}
 \description{
-  Find \eqn{\sigma(W)} and \eqn{W}, given \eqn{\mu(W}) and \code{\link{CLA}} result.
+  Find \eqn{\sigma(W)} and \eqn{W}, given \eqn{\mu(W}) and
+  \code{\link{CLA}} result.
 }
 \usage{
 findSig(Mu0, result, covar, equal.tol)
 }
 \arguments{
-  \item{Mu0}{numeric vector}
-  \item{result}{ .. }
-  \item{covar}{  ... }
+  \item{Mu0}{numeric vector of \eqn{\mu(W)} values.}
+  \item{result}{a \code{\link{list}} with components \code{MS_weight}
+    and \code{weights_set} as resulting from \code{\link{CLA}()}.}
+  \item{covar}{the same \eqn{n \times n}{n x n} covariance matrix (of
+    asset returns) as the argument of \code{\link{CLA}()}.}
  \item{equal.tol}{numeric tolerance to be used in
-    \code{\link{all.equal}(.., tolerance = equal.tol)}.}% TODO:  is this correct?
+    \code{\link{all.equal}(.., tolerance = equal.tol)} in the check to
+    see if the \eqn{\mu}{mu} of two neighbouring turning points are
+    equal.}
 }
 %% \details{
 %% %%  ~~ If necessary, more details than the description above ~~
 %% }
 \value{
  a \code{\link{list}} with components
-  \item{Sig}{.....}
-  \item{weight}{.....}
+  \item{Sig}{numeric vector of same length, say \eqn{M}, as \code{Mu0}.}
+  \item{weight}{numeric \eqn{n \times M}{n x M} matrix of weights.}
 }
 \references{
  Master thesis, p.33
 }
-
 \seealso{
 \code{\link{findMu}},
 \code{\link{CLA}}, \code{\link{MS}}.
 }
 \examples{
-
- ##  ...
-
+data(muS.sp500)
+## Full data taking too much time for example: Subset of n=21:
+set.seed(2018)
+iS <- sample.int(length(muS.sp500$mu), 21)
+cov21 <- muS.sp500$covar[iS, iS]
+CLsp.21 <- CLA(muS.sp500$mu[iS], covar=cov21, lB=0, uB = 1/2)
+CLsp.21 # 14 turning points
+summary(tpM <- CLsp.21$MS_weights[,"Mu"])
+str(m0 <- c(min(tpM),seq(0.00205, 0.00525, by = 0.00005), max(tpM)))
+sig. <- findSig(m0, result=CLsp.21, covar=cov21)
+str(sig.)
+stopifnot(dim(sig.$weight) == c(21, length(m0)))
+plot(sig.$Sig, m0, xlab=quote(sigma), ylab = quote(mu),
+     type = "o", cex = 1/4)
+points(CLsp.21$MS_weights, col = "tomato", cex = 1.5)
+title("Efficient Frontier from CLA()")
+mtext("findSig() to interpolate between turning points", side=3)
 }
-%% \keyword{ ~kwd1 }% use one of  RShowDoc("KEYWORDS")
+\keyword{arith}
+\keyword{dplot}