...

Commits (2)
• - 'trace' argument · 66a37749
Martin Maechler authored
- Drop the first ("virtual", lambda=0) turning point from result
- update TODO, and tests (for 64b, for now)
• - 'trace' argument · 7150fa66
Martin Maechler authored
- Drop the first ("virtual", lambda=0) turning point from result
- update TODO, and tests (for 64b, for now)
 Package: CLA Version: 0.90-2 Date: 2018-09-10 Version: 0.95-0 Date: 2018-10-05 Title: Critical Line Algorithm in Pure R Author: Yanhao Shi , Martin Maechler ... ...
 ... ... @@ -43,7 +43,8 @@ computeW <- function(lam, inv, wB) { ## 1) compute gamma g <- (-lam * inv.s + (1- sum(wB) + inv.s))/inv.s ## 2) compute free weights list(wF = - inv[,3] + g * inv[,1] + lam * inv[,2], gamma = g) list(wF = - inv[,3] + g * inv[,1] + lam * inv[,2], gamma = g) } ## computeLambda -------------------------------------------------------------- ... ... @@ -80,7 +81,7 @@ MS <- function(weights_set, mu, covar) { CLA <- function(mu, covar, lB, uB, tol.lambda = 1e-7, give.MS = TRUE, keep.names = TRUE) { give.MS = TRUE, keep.names = TRUE, trace = 0) { ## minimal argument checks cl <- match.call() n <- length(mu) ... ... @@ -93,15 +94,16 @@ CLA <- function(mu, covar, lB, uB, tol.lambda = 1e-7, ans <- initAlgo(mu, lB, uB) f <- ans$index w <- ans$weights weights_set <- w # store solution lambdas <- NA # The first step has no lambda or gamma, add NA instead. gammas <- NA free_indices <- list(f) ## initialize result parts lambdas <- gammas <- numeric() weights_set <- array(dim = c(n,0L)) free_indices <- list() lam <- 1 # set non-zero lam while ( lam > 0 && length(f) < length(mu)) { while (lam > 0 && (nf <- length(f)) <= length(mu)) { if(trace) cat(sprintf("while(lam = %g > 0 & ..): |f|=%d ..\n", lam, nf)) ## 1) case a): Bound one free weight F -> B l_in <- 0 if(length(f) > 1 ) { if(nf > 1L) { compl <- computeLambda(wB = w[-f], inv = inv, # inv from last step k (k > 1) i = f, bi.input = cbind(lB, uB)) lam_in <- compl$lambda ... ... @@ -119,49 +121,51 @@ CLA <- function(mu, covar, lB, uB, tol.lambda = 1e-7, computeInv(get_i) }) fi <- length(f) + 1 lam_out <- sapply(seq_along(b), function(i) { computeLambda(wB = w[b[-i]], inv = inv_list[[i]], i = fi, bi.input = w[b[i]]) }) if (length(lambdas) > 1 && any(!(sml <- lam_out < lam*(1-tol.lambda)))) { lam_out <- lam_out[sml] b <- b [sml] inv_list <- inv_list[sml] if(nf < length(mu)) { fi <- nf + 1L lam_out <- sapply(seq_along(b), function(i) { computeLambda(wB = w[b[-i]], inv = inv_list[[i]], i = fi, bi.input = w[b[i]]) }) if (length(lambdas) && any(!(sml <- lam_out < lam*(1-tol.lambda)))) { lam_out <- lam_out[sml] b <- b [sml] inv_list <- inv_list[sml] } k <- which.max(lam_out) i_out <- b [k] # one only ! l_out <- lam_out[k] inv_out <- inv_list[[k]] } else { ## length(f) == length(mu) <==> |b| = 0 l_out <- -Inf } k <- which.max(lam_out) i_out <- b [k] # one only ! l_out <- lam_out[k] inv_out <- inv_list[[k]] ## 3) decide lambda lam <- max(l_in, l_out, 0) lam <- max(l_in, l_out) if(trace) cat(sprintf("l_{in,out}=(%g,%g) => new candidate lam=%g\n", l_in, l_out, lam)) if(lam > 0) { # remove i_in from f; or add i_out into f if(l_in > l_out ) { if(l_in > l_out) { w[i_in] <- bi_in # set value at the correct boundary f <- f[f != i_in] getM <- getMatrices(mu, covar, w, f) inv <- computeInv(getM) } else { f <- c(f,i_out) f <- c(f, i_out) inv <- inv_out } compW <- computeW(lam, inv = inv, wB = w[-f]) } else{ # 4) if max(l_in, l_out) < 0, "stop" when at the min var solution! compW <- computeW(lam = lam, inv = inv, wB = w[-f]) else{ # 4) if max(l_in, l_out) <= 0, "stop" when at the min var solution! lam <- 0 ## muF = 0 not necessary, get1 replaced by getM (ie getM from previous step) } wF <- compW$wF g <- compW$gamma w[f] <- wF[seq_along(f)] compW <- computeW(lam, inv = inv, wB = w[-f]) g <- compW$gamma w[f] <- compW$wF[seq_along(f)] lambdas <- c(lambdas, lam) weights_set <- cbind(weights_set, w, deparse.level = 0L) # store solution gammas <- c(gammas, g) weights_set <- cbind(weights_set, w, deparse.level = 0L) # store solution free_indices <- c(free_indices, list(sort(f))) }# end While ... ... @@ -194,7 +198,6 @@ print.CLA <- function(x, ...) { invisible(x) } ### TODO: plot method -- efficient frontier ## As basically from .../YanhaoShi/R/Functions/Plot.R : MS_plot <- function(ms, type = "o", ... ...  ##-*- org -*--> Emacs .. [Tab] key + [Org] menu; C-c C-o to follow links * ASAP (no longer "Before release of package") ** check arguments e.g., lB <= uB, sum upper Bounds >= 1 ** TODO check arguments e.g., lB <= uB, sum upper Bounds >= 1 ** TODO References --> (mostly done) *** DONE 1) References from the thesis, including the "buried" python-paper with *WRONG* algo *** TODO 2) Master thesis: I'd like the thesis to be on our web page ** DONE CLA() should return a (S3) class, "CLA" w/ print() and plot() methods ** TODO findMu() and findSig() regression check examples *before* much changing R/findSigMu.R ** TODO Improve plot() method, using hyperbolic interpolation see R/CLA.R man/plot.CLA.Rd ** TODO A. Norring's Masters thesis has a small 12-asset example (from a published source). ** DONE CLA() should return a (S3) class, "CLA" w/ print() and plot() methods ** DONE findMu() and findSig() regression check examples *before* much changing: tests/findSigMu-ex.R ** DONE A. Norring's Masters thesis has small 10-asset example (from Markovitz & Todd). We should add that as a minimally small data set to use in examples, e.g. plot(). His thesis is in ~/Betreute-Arbeiten/YanhaoShi/Previous_Work/ e.g. plot(). His thesis is in ~/Betreute-Arbeiten/YanhaoShi/Previous_Work/ ** DONE Fixed Bug: did not get the "border case" lambda=0, e.g. for 10a example, where others (incl A.Norring) *do* return that. * With more time, also, e.g., for a short R Journal paper ** SparseMatrix plot of the weights inst/NEWS.Rd 0 → 100644  % Check from R: % news(db = tools:::.build_news_db_from_package_NEWS_Rd("~/R/Pkgs/CLA/inst/NEWS.Rd")) \name{NEWS} \title{News for \R Package \pkg{CLA}}% MM: look into ../svn-log-from.all \newcommand{\CRANpkg}{\href{https://CRAN.R-project.org/package=#1}{\pkg{#1}}} \section{Changes in version 0.95-0 (2018-10-05)}{ \subsection{New Features}{ \itemize{ \item \emph{Not} back-compatible \emph{changed} result: The very first \dQuote{turning point} from the algorithm with (\code{lambda = NA} etc) is no longer stored as part of the result, compatibly with the litterature and other implementations. \item Added data set \code{muS.10ex} orginally from Markowitz and Todd. \item Added \file{NEWS} file. } } \subsection{Bug Fixes}{ \itemize{ \item needed \code{, drop=FALSE} in internal \code{getMatrices()}, e.g., for the \code{*10ex} example. \item Test works with old R 3.3.x. \item In some cases such as \code{muS.10ex}, the last turning point with \code{lambda=0} was not computed previously. } } } \section{Changes in version 0.90-1 (2018-02-05)}{ \subsection{New Features}{ \itemize{ \item Add nice \code{plot()} method base on Shi's \code{MS_plot()} \item Literature references in \code{?CLA} \item Vectorized \code{findSig()} and \code{findMu()}. } } \subsection{Bug Fixes}{ \itemize{ \item \code{findSig.1()} now works. \item test also works in \R configured with \command{--no-long-double}. } } } \section{Changes in version 0.90-0 (2018-01-15)}{ \subsection{New Features}{ \itemize{ \item First release to CRAN } } }  ... ... @@ -14,7 +14,7 @@ } \usage{ CLA(mu, covar, lB, uB, tol.lambda = 1e-07, give.MS = TRUE, keep.names = TRUE) give.MS = TRUE, keep.names = TRUE, trace = 0) } \arguments{ \item{mu}{numeric vector of length \code{n} containing the expected ... ... @@ -29,6 +29,8 @@ CLA(mu, covar, lB, uB, tol.lambda = 1e-07, should be computed (and returned) as well.} \item{keep.names}{\code{\link{logical}} indicating if the \code{weights_set} matrix should keep the (asset) \code{names(mu)}.} \item{trace}{an integer (or \code{\link{logical}}) indicating if and how much diagnostic or progress output should be produced.} } \details{ The current implementation of the CLA is based (via Norring's) ... ... @@ -49,8 +51,8 @@ CLA(mu, covar, lB, uB, tol.lambda = 1e-07, corresponding to the \eqn{m} steps that the CLA has completed or the \eqn{m} \dQuote{turning points} it has computed.} \item{free_indices}{a \code{\link{list}} of length \code{m}, the \eqn{k}-th component with the indices in \eqn{\{1,\dots,n\}} of those assets whose weights were not at the boundary afater ... } \eqn{k}-th component with the indices in \eqn{{1,\dots,n}} of those assets whose weights were not at the boundary after ... }% <<< FIXME \item{gammas}{numeric vector of length \eqn{m} of the values \eqn{\gamma_k}{gamma[k]} for CLA step \eqn{k}, \eqn{k=1,\dots,n}.} \item{lambdas}{numeric vector of length \eqn{m} of the Lagrange parameters ... ...  ... ... @@ -36,9 +36,11 @@ data("muS.10ex") data(muS.10ex) str(muS.10ex) CLA.10ex <- with(muS.10ex, CLA(mu, covar, lB=0, uB=1)) # works after 'drop = FALSE' fix in getMatrices() !! CLA.10ex <- with(muS.10ex, CLA(mu, covar, lB=0, uB=1)) if(require("Matrix")) drop0(zapsmall(CLA.10ex$weights_set)) ## The results, summarized, as in Bayley and López de Prado (Table 2, p.18) : with(CLA.10ex, round(cbind(MS_weights[,2:1], lambda=lambdas, t(weights_set)), 3)) CLA.10ex.1c <- with(muS.10ex, CLA(mu, covar, lB=1/100, uB=1)) round(CLA.10ex.1c$weights_set, 3) ... ...  ... ... @@ -33,7 +33,7 @@ if(doExtras) system.time({ names(tols) <- paste0("10^", round(log10(tols))) CLs5c.ls <- lapply(tols, function(tol) CLA(muS.sp500$mu, muS.sp500$covar, lB=0, uB=1/20, tol.lambda = tol)) }) # 78.101 elapsed [nb-mm4] }) # 78.101 elapsed [nb-mm4] ; 46.108 [lynne 2018-10] if(doExtras) { identical(un.call(CLs5c.ls[["10^-7"]]), un.call(CLs5c.0.120)) ... ... @@ -49,6 +49,11 @@ if(doExtras) { } } } ## 2018-10 lynne, 64b Fedora 28 ## 10^-1 vs. 10^-3 : are different [all.equal()]: dim(..[[ 10^-1 ]]$weights_set) = 476 x 47 ## 10^-3 vs. 10^-5 : are different [all.equal()]: dim(..[[ 10^-3 ]]$weights_set) = 476 x 156 ## 10^-5 vs. 10^-6 : are all.equal() ## ................ " " " op <- options(width = max(500, getOption("width"))) # then it actually fits ... ... @@ -76,7 +81,7 @@ stopifnot(nrow(wts.non0) == 79) if(FALSE) # once, manually (into tests/ directory saveRDS(wts.non0, "wtsn0.rds") file.info("wtsn0.rds")$size # 27049 file.info("wtsn0.rds")$size # 2702049 wtsn0.ref <- readRDS("wtsn0.rds") ## see on all platforms what we get ... ... @@ -90,21 +95,21 @@ stopifnot(all.equal(target = wtsn0.ref, current = wts.non0, non.0.assets <- Filter(function(.) . > 0, apply(wts.non0, 1, function(c) sum(c > 0))) b64.n0 <- c(AAPL = 136L, ADSK = 66L, AET = 147L, AMGN = 3L, ATI = 76L, AYE = 56L, AZO = 26L, BAX = 95L, BCR = 35L, BDX = 36L, BIIB = 118L, BNI = 86L, BRL = 23L, BTU = 28L, BUD = 7L, CCE = 54L, CELG = 129L, CI = 69L, CL = 83L, CLX = 53L, CME = 141L, CNX = 17L, COST = 40L, CTL = 5L, CVS = 102L, DF = 36L, DGX = 33L, DVN = 14L, ED = 32L, EIX = 127L, ESRX = 48L, FCX = 55L, FE = 61L, GILD = 38L, HAL = 31L, HES = 41L, HST = 108L, HUM = 71L, INTU = 48L, JNJ = 34L, K = 61L, LH = 80L, LLL = 96L, LMT = 83L, LUK = 72L, MCD = 61L, MDT = 43L, MMC = 7L, MON = 54L, MRO = 137L, MTW = 67L, MUR = 97L, NEM = 45L, NOC = 74L, NUE = 31L, NVDA = 14L, PBG = 72L, PCP = 103L, PDCO = 71L, PEP = 69L, PG = 87L, RAI = 110L, RIG = 121L, RRC = 106L, RTN = 90L, SII = 27L, SSP = 14L, SYK = 19L, SYMC = 13L, TEX = 37L, TIE = 85L, TSO = 116L, TYC = 59L, UST = 127L, WAG = 17L, WFR = 6L, WMT = 6L, X = 44L, XTO = 102L) b64.n0 <- c(AAPL = 135L, ADSK = 66L, AET = 147L, AMGN = 3L, ATI = 75L, AYE = 56L, AZO = 26L, BAX = 95L, BCR = 35L, BDX = 36L, BIIB = 118L, BNI = 86L, BRL = 23L, BTU = 27L, BUD = 7L, CCE = 54L, CELG = 128L, CI = 69L, CL = 83L, CLX = 53L, CME = 140L, CNX = 16L, COST = 40L, CTL = 5L, CVS = 102L, DF = 36L, DGX = 33L, DVN = 14L, ED = 32L, EIX = 127L, ESRX = 48L, FCX = 54L, FE = 61L, GILD = 38L, HAL = 31L, HES = 40L, HST = 108L, HUM = 71L, INTU = 48L, JNJ = 34L, K = 61L, LH = 80L, LLL = 96L, LMT = 83L, LUK = 72L, MCD = 61L, MDT = 43L, MMC = 7L, MON = 53L, MRO = 136L, MTW = 66L, MUR = 97L, NEM = 45L, NOC = 74L, NUE = 30L, NVDA = 13L, PBG = 72L, PCP = 102L, PDCO = 71L, PEP = 69L, PG = 87L, RAI = 110L, RIG = 121L, RRC = 105L, RTN = 90L, SII = 27L, SSP = 14L, SYK = 19L, SYMC = 13L, TEX = 36L, TIE = 84L, TSO = 115L, TYC = 59L, UST = 127L, WAG = 17L, WFR = 5L, WMT = 6L, X = 43L, XTO = 102L) ## 32-bit Linux (Unfortunately, currently the results are slighly *platform dependent*) nn <- c("AZO", "BAX", "CLX", "COST", "DGX", "DVN", "ESRX", "LMT", "MUR", "PEP", ... ... @@ -170,15 +175,15 @@ if(all(non.0.assets == non.0.TARG)) { ## show differences: ## They have the same names and only differ by +/- 1: stopifnot( identical(names(b64.n0), names(b32.n0)) ## ______ ______ , if(b64) identical(non.0.assets, non.0.TARG) # fails on ATLAS, MKL, OpenBLAS , ## ______ ______ if(b64) identical(non.0.assets, non.0.TARG) # fails on ATLAS, MKL, OpenBLAS else if(nonWindows) identical(non.0.assets, b32.n0) else ## 32-bit Windows TRUE ## for now , identical(head(CLs5c.0.120$free_indices, 12), list(295L, c(295L, 453L), 453L, c(453L, 472L), c(19L, 453L, 472L), list(c(295L, 453L), 453L, c(453L, 472L), c(19L, 453L, 472L), c(19L, 453L), 453L, c(15L, 453L), 15L, c(15L, 320L), c(15L, 105L, 320L), c(105L, 320L))) c(15L, 105L, 320L), c(105L, 320L), c(105L, 320L, 472L))) ) ## Check some of the 'Env' versions: --------- ... ... @@ -194,17 +199,28 @@ claStrip <- function(res) { rCLA <- claStrip(CLs5c.0.120) ##' Drop "first turning point" from old, pre-0.95, CLA() result: claDrop1st <- function(res) { res$weights_set <- res$weights_set[, -1L , drop=FALSE] # drop 1st column if(is.matrix(res$MS_weight)) res$MS_weight <- res$MS_weight[ -1L, , drop=FALSE] # drop 1st row for(nm in c("free_indices", "gammas", "lambdas")) res[[nm]] <- res[[nm]][-1L] res } ## back compatibility to "old" Env8 results: nsCLA <- asNamespace("CLA") if(is.environment(e8 <- nsCLA$Env8)) local(withAutoprint({ system.time(r8 <- e8$cla.solve(muS.sp500$mu, muS.sp500$covar, lB = rep(0,n), uB= rep(1/20, n))) ## lynne: 9.6--9.8 sec stopifnot(all.equal(r8, rCLA, tol = 1e-14)) # they are the same! ## lynne (2017): 9.6--9.8 sec; 2018: 6.1 sec stopifnot(all.equal(claDrop1st(r8), rCLA, tol = 1e-14)) # they are the same! })) if(is.environment(e9 <- nsCLA$Env9)) local(withAutoprint({ system.time(r9 <- e9$cla.solve(muS.sp500$mu, muS.sp500\$covar, lB = rep(0,n), uB= rep(1/20, n))) ## lynne: 10.0 sec stopifnot(all.equal(r9, rCLA, tol = 1e-14)) # they are the same! ## lynne(2017): 10.0 sec; 2018: 6.6 sec stopifnot(all.equal(claDrop1st(r9), rCLA, tol = 1e-14)) # they are the same! }))
No preview for this file type