Martin Maechler · Martin Maechler · 7150fa66 · 7150fa66 · 7150fa66 · 7150fa66
--- a/DESCRIPTION
+++ b/DESCRIPTION
 Package: CLA
-Version: 0.90-2
-Date: 2018-09-10
+Version: 0.95-0
+Date: 2018-10-05
 Title: Critical Line Algorithm in Pure R
 Author: Yanhao Shi <syhelena@163.com>,
 	Martin Maechler <maechler@stat.math.ethz.ch>

--- a/R/CLA.R
+++ b/R/CLA.R
@@ -43,7 +43,8 @@ computeW <- function(lam, inv, wB) {
    ## 1) compute gamma
    g <- (-lam * inv.s[2] + (1- sum(wB) + inv.s[3]))/inv.s[1]
    ## 2) compute free weights
-    list(wF = - inv[,3] + g * inv[,1] + lam * inv[,2], gamma = g)
+    list(wF = - inv[,3] + g * inv[,1] + lam * inv[,2],
+         gamma = g)
 }

 ## computeLambda --------------------------------------------------------------
@@ -80,7 +81,7 @@ MS <- function(weights_set, mu, covar) {


 CLA <- function(mu, covar, lB, uB, tol.lambda = 1e-7,
-                give.MS = TRUE, keep.names = TRUE) {
+                give.MS = TRUE, keep.names = TRUE, trace = 0) {
    ## minimal argument checks
    cl <- match.call()
    n <- length(mu)
@@ -93,15 +94,16 @@ CLA <- function(mu, covar, lB, uB, tol.lambda = 1e-7,
    ans <- initAlgo(mu, lB, uB)
    f <- ans$index
    w <- ans$weights
-    weights_set <- w  # store solution
-    lambdas <- NA  # The first step has no lambda or gamma, add NA instead.
-    gammas <- NA
-    free_indices <- list(f)
+    ## initialize result parts
+    lambdas <- gammas  <- numeric()
+    weights_set <- array(dim = c(n,0L))
+    free_indices <- list()
    lam <- 1 # set non-zero lam
-    while ( lam > 0 && length(f) < length(mu)) {
+    while (lam > 0 && (nf <- length(f)) <= length(mu)) {
+      if(trace) cat(sprintf("while(lam = %g > 0 & ..): |f|=%d ..\n", lam, nf))
      ## 1) case a): Bound one free weight  F -> B
      l_in <- 0
-      if(length(f) > 1 ) {
+      if(nf > 1L) {
        compl <- computeLambda(wB = w[-f], inv = inv, # inv from last step k (k > 1)
                               i = f, bi.input = cbind(lB, uB))
        lam_in <- compl$lambda
@@ -119,13 +121,13 @@ CLA <- function(mu, covar, lB, uB, tol.lambda = 1e-7,
        computeInv(get_i)
      })

-      fi <- length(f) + 1
+      if(nf < length(mu)) {
+          fi <- nf + 1L
          lam_out <- sapply(seq_along(b), function(i) {
              computeLambda(wB = w[b[-i]], inv = inv_list[[i]],
                            i = fi, bi.input = w[b[i]])
          })
-
-      if (length(lambdas) > 1 && any(!(sml <- lam_out < lam*(1-tol.lambda)))) {
+          if (length(lambdas) && any(!(sml <- lam_out < lam*(1-tol.lambda)))) {
              lam_out <- lam_out[sml]
              b       <- b      [sml]
              inv_list <- inv_list[sml]
@@ -134,9 +136,13 @@ CLA <- function(mu, covar, lB, uB, tol.lambda = 1e-7,
          i_out <- b      [k] # one only !
          l_out <- lam_out[k]
          inv_out <- inv_list[[k]]
-
+      } else { ## length(f) == length(mu)  <==>  |b| = 0
+          l_out  <- -Inf
+      }
      ## 3) decide lambda
-      lam <- max(l_in, l_out, 0)
+      lam <- max(l_in, l_out)
+      if(trace) cat(sprintf("l_{in,out}=(%g,%g) => new candidate lam=%g\n",
+                            l_in, l_out, lam))
      if(lam > 0) { # remove i_in from f; or add i_out into f
        if(l_in > l_out) {
          w[i_in] <- bi_in  # set value at the correct boundary
@@ -148,20 +154,18 @@ CLA <- function(mu, covar, lB, uB, tol.lambda = 1e-7,
          f <- c(f, i_out)
          inv <- inv_out
        }
-        compW <- computeW(lam, inv = inv, wB = w[-f])
      }
-      else{ # 4) if max(l_in, l_out) < 0, "stop" when at the min var solution!
-        compW <- computeW(lam = lam, inv = inv, wB = w[-f])
+      else{ # 4) if max(l_in, l_out) <= 0, "stop" when at the min var solution!
+        lam <- 0
        ## muF = 0 not necessary, get1 replaced by getM (ie getM from previous step)
      }
-
-      wF <- compW$wF
+      compW <- computeW(lam, inv = inv, wB = w[-f])
      g    <- compW$gamma
-      w[f] <- wF[seq_along(f)]
+      w[f] <- compW$wF[seq_along(f)]

      lambdas <- c(lambdas, lam)
-      weights_set <- cbind(weights_set, w, deparse.level = 0L) # store solution
      gammas <- c(gammas, g)
+      weights_set <- cbind(weights_set, w, deparse.level = 0L) # store solution
      free_indices <- c(free_indices, list(sort(f)))
    }# end While

@@ -194,7 +198,6 @@ print.CLA <- function(x, ...) {
    invisible(x)
 }

-### TODO:  plot method -- efficient frontier

 ## As basically from  .../YanhaoShi/R/Functions/Plot.R :
 MS_plot <- function(ms, type = "o",

--- a/TODO
+++ b/TODO
 ##-*- org -*-->  Emacs .. [Tab] key + [Org] menu; C-c C-o to follow links

 * ASAP (no longer "Before release of package")
-** check arguments  e.g., lB <= uB, sum upper Bounds >= 1
+** TODO check arguments  e.g., lB <= uB, sum upper Bounds >= 1
 ** TODO References --> (mostly done)
 *** DONE 1) References from the thesis, including the "buried" python-paper with *WRONG* algo
 *** TODO 2) Master thesis: I'd like the thesis to be on our web page
-** DONE CLA() should return a (S3) class, "CLA" w/ print() and plot() methods
-** TODO findMu() and findSig() regression check examples *before* much changing R/findSigMu.R
 ** TODO Improve plot() method, using hyperbolic interpolation see  R/CLA.R  man/plot.CLA.Rd
-** TODO  A. Norring's Masters thesis has a small 12-asset example (from a published source).
+** DONE CLA() should return a (S3) class, "CLA" w/ print() and plot() methods
+** DONE findMu() and findSig() regression check examples *before* much changing: tests/findSigMu-ex.R
+** DONE A. Norring's Masters thesis has small 10-asset example (from Markovitz & Todd).
   We should add that as a minimally small data set to use in examples,
   e.g. plot().   His thesis is in ~/Betreute-Arbeiten/YanhaoShi/Previous_Work/

+** DONE Fixed Bug: did not get the "border case" lambda=0, e.g. for 10a example,
+   where others (incl A.Norring) *do* return that.
 * With more time, also, e.g., for a short R Journal paper
 ** SparseMatrix plot of the weights
--- a/inst/NEWS.Rd
+++ b/inst/NEWS.Rd
+% Check from R:
+%  news(db = tools:::.build_news_db_from_package_NEWS_Rd("~/R/Pkgs/CLA/inst/NEWS.Rd"))
+\name{NEWS}
+\title{News for \R Package \pkg{CLA}}% MM: look into ../svn-log-from.all
+\newcommand{\CRANpkg}{\href{https://CRAN.R-project.org/package=#1}{\pkg{#1}}}
+
+\section{Changes in version 0.95-0 (2018-10-05)}{
+  \subsection{New Features}{
+    \itemize{
+      \item \emph{Not} back-compatible \emph{changed} result: The very
+      first \dQuote{turning point} from the algorithm with (\code{lambda
+	= NA} etc) is no longer stored as part of the result, compatibly
+      with the litterature and other implementations.
+
+      \item Added data set \code{muS.10ex} orginally from Markowitz and Todd.
+      \item Added \file{NEWS} file.
+    }
+  }
+  \subsection{Bug Fixes}{
+    \itemize{
+      \item needed \code{, drop=FALSE} in internal \code{getMatrices()},
+      e.g., for the \code{*10ex} example.
+
+      \item Test works with old R 3.3.x.
+
+      \item In some cases such as \code{muS.10ex}, the last turning
+      point with \code{lambda=0} was not computed previously.
+    }
+  }
+}
+
+\section{Changes in version 0.90-1 (2018-02-05)}{
+  \subsection{New Features}{
+    \itemize{
+      \item Add nice \code{plot()} method base on Shi's \code{MS_plot()}
+      \item Literature references in \code{?CLA}
+      \item Vectorized \code{findSig()} and \code{findMu()}.
+    }
+  }
+  \subsection{Bug Fixes}{
+    \itemize{
+      \item \code{findSig.1()} now works.
+      \item test also works in \R configured with \command{--no-long-double}.
+    }
+  }
+}
+
+\section{Changes in version 0.90-0 (2018-01-15)}{
+  \subsection{New Features}{
+    \itemize{
+      \item First release to CRAN
+    }
+  }
+}
+
--- a/man/CLA.Rd
+++ b/man/CLA.Rd
@@ -14,7 +14,7 @@
 }
 \usage{
 CLA(mu, covar, lB, uB, tol.lambda = 1e-07,
-    give.MS = TRUE, keep.names = TRUE)
+    give.MS = TRUE, keep.names = TRUE, trace = 0)
 }
 \arguments{
  \item{mu}{numeric vector of length \code{n} containing the expected
@@ -29,6 +29,8 @@ CLA(mu, covar, lB, uB, tol.lambda = 1e-07,
    should be computed (and returned) as well.}
  \item{keep.names}{\code{\link{logical}} indicating if the
    \code{weights_set} matrix should keep the (asset) \code{names(mu)}.}
+  \item{trace}{an integer (or \code{\link{logical}}) indicating if and
+    how much diagnostic or progress output should be produced.}
 }
 \details{
  The current implementation of the CLA is based (via Norring's)
@@ -49,8 +51,8 @@ CLA(mu, covar, lB, uB, tol.lambda = 1e-07,
    corresponding to the \eqn{m} steps that the CLA has completed or the
    \eqn{m} \dQuote{turning points} it has computed.}
  \item{free_indices}{a \code{\link{list}} of length \code{m}, the
-    \eqn{k}-th component with the indices in \eqn{\{1,\dots,n\}} of
-    those assets whose weights were not at the boundary afater ... }
+    \eqn{k}-th component with the indices in \eqn{{1,\dots,n}} of
+    those assets whose weights were not at the boundary after ... }% <<< FIXME
  \item{gammas}{numeric vector of length \eqn{m} of the values
    \eqn{\gamma_k}{gamma[k]} for CLA step \eqn{k}, \eqn{k=1,\dots,n}.}
  \item{lambdas}{numeric vector of length \eqn{m} of the Lagrange parameters

--- a/man/muS.10ex.Rd
+++ b/man/muS.10ex.Rd
@@ -36,9 +36,11 @@ data("muS.10ex")
 data(muS.10ex)
 str(muS.10ex)

-CLA.10ex <- with(muS.10ex, CLA(mu, covar, lB=0, uB=1)) # works after 'drop = FALSE' fix in getMatrices() !!
+CLA.10ex <- with(muS.10ex, CLA(mu, covar, lB=0, uB=1))
 if(require("Matrix"))
  drop0(zapsmall(CLA.10ex$weights_set))
+## The results, summarized, as in Bayley and López de Prado (Table 2, p.18) :
+with(CLA.10ex, round(cbind(MS_weights[,2:1], lambda=lambdas, t(weights_set)), 3))

 CLA.10ex.1c <- with(muS.10ex, CLA(mu, covar, lB=1/100, uB=1))
 round(CLA.10ex.1c$weights_set, 3)

--- a/tests/SP500-ex.R
+++ b/tests/SP500-ex.R
@@ -33,7 +33,7 @@ if(doExtras) system.time({
    names(tols) <- paste0("10^", round(log10(tols)))
    CLs5c.ls <- lapply(tols, function(tol)
        CLA(muS.sp500$mu, muS.sp500$covar, lB=0, uB=1/20, tol.lambda = tol))
-}) #  78.101 elapsed [nb-mm4]
+}) #  78.101 elapsed [nb-mm4] ; 46.108 [lynne 2018-10]

 if(doExtras) {
    identical(un.call(CLs5c.ls[["10^-7"]]), un.call(CLs5c.0.120))
@@ -49,6 +49,11 @@ if(doExtras) {
        }
    }
 }
+## 2018-10 lynne, 64b Fedora 28
+##  10^-1  vs.  10^-3 : are different [all.equal()]: dim(..[[ 10^-1 ]]$weights_set) = 476 x 47
+##  10^-3  vs.  10^-5 : are different [all.equal()]: dim(..[[ 10^-3 ]]$weights_set) = 476 x 156
+##  10^-5  vs.  10^-6 :  are all.equal()
+##   ................     "   "    "

 op <- options(width = max(500, getOption("width"))) # then it actually fits

@@ -76,7 +81,7 @@ stopifnot(nrow(wts.non0) == 79)

 if(FALSE) # once, manually (into tests/ directory
    saveRDS(wts.non0, "wtsn0.rds")
-file.info("wtsn0.rds")$size  # 27049
+file.info("wtsn0.rds")$size  # 2702049
 wtsn0.ref <- readRDS("wtsn0.rds")

 ## see on all platforms what we get
@@ -90,21 +95,21 @@ stopifnot(all.equal(target = wtsn0.ref, current = wts.non0,
 non.0.assets <- Filter(function(.) . > 0, apply(wts.non0, 1, function(c) sum(c > 0)))


-
-b64.n0 <- c(AAPL = 136L, ADSK = 66L, AET = 147L, AMGN = 3L, ATI = 76L,
+b64.n0 <-
+    c(AAPL = 135L, ADSK = 66L, AET = 147L, AMGN = 3L, ATI = 75L,
      AYE = 56L, AZO = 26L, BAX = 95L, BCR = 35L, BDX = 36L, BIIB = 118L,
-            BNI = 86L, BRL = 23L, BTU = 28L, BUD = 7L, CCE = 54L, CELG = 129L,
-            CI = 69L, CL = 83L, CLX = 53L, CME = 141L, CNX = 17L, COST = 40L,
+      BNI = 86L, BRL = 23L, BTU = 27L, BUD = 7L, CCE = 54L, CELG = 128L,
+      CI = 69L, CL = 83L, CLX = 53L, CME = 140L, CNX = 16L, COST = 40L,
      CTL = 5L, CVS = 102L, DF = 36L, DGX = 33L, DVN = 14L, ED = 32L,
-            EIX = 127L, ESRX = 48L, FCX = 55L, FE = 61L, GILD = 38L, HAL = 31L,
-            HES = 41L, HST = 108L, HUM = 71L, INTU = 48L, JNJ = 34L, K = 61L,
+      EIX = 127L, ESRX = 48L, FCX = 54L, FE = 61L, GILD = 38L, HAL = 31L,
+      HES = 40L, HST = 108L, HUM = 71L, INTU = 48L, JNJ = 34L, K = 61L,
      LH = 80L, LLL = 96L, LMT = 83L, LUK = 72L, MCD = 61L, MDT = 43L,
-            MMC = 7L, MON = 54L, MRO = 137L, MTW = 67L, MUR = 97L, NEM = 45L,
-            NOC = 74L, NUE = 31L, NVDA = 14L, PBG = 72L, PCP = 103L, PDCO = 71L,
-            PEP = 69L, PG = 87L, RAI = 110L, RIG = 121L, RRC = 106L, RTN = 90L,
-            SII = 27L, SSP = 14L, SYK = 19L, SYMC = 13L, TEX = 37L, TIE = 85L,
-            TSO = 116L, TYC = 59L, UST = 127L, WAG = 17L, WFR = 6L, WMT = 6L,
-            X = 44L, XTO = 102L)
+      MMC = 7L, MON = 53L, MRO = 136L, MTW = 66L, MUR = 97L, NEM = 45L,
+      NOC = 74L, NUE = 30L, NVDA = 13L, PBG = 72L, PCP = 102L, PDCO = 71L,
+      PEP = 69L, PG = 87L, RAI = 110L, RIG = 121L, RRC = 105L, RTN = 90L,
+      SII = 27L, SSP = 14L, SYK = 19L, SYMC = 13L, TEX = 36L, TIE = 84L,
+      TSO = 115L, TYC = 59L, UST = 127L, WAG = 17L, WFR = 5L, WMT = 6L,
+      X = 43L, XTO = 102L)

 ## 32-bit Linux (Unfortunately, currently  the results are slighly *platform dependent*)
 nn <- c("AZO", "BAX", "CLX", "COST", "DGX",  "DVN", "ESRX", "LMT", "MUR", "PEP",
@@ -170,15 +175,15 @@ if(all(non.0.assets == non.0.TARG)) { ## show differences:
 ## They have the same names and only differ by  +/- 1:
 stopifnot(
    identical(names(b64.n0), names(b32.n0))
-    ##                              ______      ______
-  , if(b64) identical(non.0.assets, non.0.TARG) # fails on ATLAS, MKL, OpenBLAS
+  , ##                              ______      ______
+    if(b64) identical(non.0.assets, non.0.TARG) # fails on ATLAS, MKL, OpenBLAS
    else if(nonWindows) identical(non.0.assets, b32.n0)
    else ## 32-bit Windows
        TRUE ## for now
  , identical(head(CLs5c.0.120$free_indices, 12),
-              list(295L, c(295L, 453L), 453L, c(453L, 472L), c(19L, 453L, 472L),
+              list(c(295L, 453L), 453L, c(453L, 472L), c(19L, 453L, 472L),
                   c(19L, 453L), 453L, c(15L, 453L), 15L, c(15L, 320L),
-                   c(15L, 105L, 320L), c(105L, 320L)))
+                   c(15L, 105L, 320L), c(105L, 320L), c(105L, 320L, 472L)))
 )

 ## Check some of the 'Env<n>' versions: ---------
@@ -194,17 +199,28 @@ claStrip <- function(res) {

 rCLA <- claStrip(CLs5c.0.120)

+##' Drop "first turning point" from old, pre-0.95,  CLA() result:
+claDrop1st <- function(res) {
+    res$weights_set <- res$weights_set[, -1L , drop=FALSE] # drop 1st column
+    if(is.matrix(res$MS_weight))
+        res$MS_weight <- res$MS_weight[ -1L, , drop=FALSE] # drop 1st row
+    for(nm in c("free_indices", "gammas", "lambdas"))
+        res[[nm]] <- res[[nm]][-1L]
+    res
+}
+
+## back compatibility to "old" Env8 results:
 nsCLA <- asNamespace("CLA")
 if(is.environment(e8 <- nsCLA$Env8)) local(withAutoprint({
    system.time(r8 <- e8$cla.solve(muS.sp500$mu, muS.sp500$covar,
                                   lB = rep(0,n), uB= rep(1/20, n)))
-    ## lynne: 9.6--9.8 sec
-    stopifnot(all.equal(r8, rCLA, tol = 1e-14)) # they are the same!
+    ## lynne (2017): 9.6--9.8 sec; 2018: 6.1 sec
+    stopifnot(all.equal(claDrop1st(r8), rCLA, tol = 1e-14)) # they are the same!
 }))

 if(is.environment(e9 <- nsCLA$Env9)) local(withAutoprint({
    system.time(r9 <- e9$cla.solve(muS.sp500$mu, muS.sp500$covar,
                                   lB = rep(0,n), uB= rep(1/20, n)))
-    ## lynne: 10.0 sec
-    stopifnot(all.equal(r9, rCLA, tol = 1e-14)) # they are the same!
+    ## lynne(2017): 10.0 sec;  2018: 6.6 sec
+    stopifnot(all.equal(claDrop1st(r9), rCLA, tol = 1e-14)) # they are the same!
 }))
--- a/tests/wtsn0.rds
+++ b/tests/wtsn0.rds