Skip to content
Commits on Source (2)
  • Martin Maechler's avatar
    - 'trace' argument · 66a37749
    Martin Maechler authored
    - Drop the first ("virtual", lambda=0) turning point from result
    - update TODO, and tests (for 64b, for now)
    66a37749
  • Martin Maechler's avatar
    - 'trace' argument · 7150fa66
    Martin Maechler authored
    - Drop the first ("virtual", lambda=0) turning point from result
    - update TODO, and tests (for 64b, for now)
    7150fa66
Package: CLA
Version: 0.90-2
Date: 2018-09-10
Version: 0.95-0
Date: 2018-10-05
Title: Critical Line Algorithm in Pure R
Author: Yanhao Shi <syhelena@163.com>,
Martin Maechler <maechler@stat.math.ethz.ch>
......
......@@ -43,7 +43,8 @@ computeW <- function(lam, inv, wB) {
## 1) compute gamma
g <- (-lam * inv.s[2] + (1- sum(wB) + inv.s[3]))/inv.s[1]
## 2) compute free weights
list(wF = - inv[,3] + g * inv[,1] + lam * inv[,2], gamma = g)
list(wF = - inv[,3] + g * inv[,1] + lam * inv[,2],
gamma = g)
}
## computeLambda --------------------------------------------------------------
......@@ -80,7 +81,7 @@ MS <- function(weights_set, mu, covar) {
CLA <- function(mu, covar, lB, uB, tol.lambda = 1e-7,
give.MS = TRUE, keep.names = TRUE) {
give.MS = TRUE, keep.names = TRUE, trace = 0) {
## minimal argument checks
cl <- match.call()
n <- length(mu)
......@@ -93,15 +94,16 @@ CLA <- function(mu, covar, lB, uB, tol.lambda = 1e-7,
ans <- initAlgo(mu, lB, uB)
f <- ans$index
w <- ans$weights
weights_set <- w # store solution
lambdas <- NA # The first step has no lambda or gamma, add NA instead.
gammas <- NA
free_indices <- list(f)
## initialize result parts
lambdas <- gammas <- numeric()
weights_set <- array(dim = c(n,0L))
free_indices <- list()
lam <- 1 # set non-zero lam
while ( lam > 0 && length(f) < length(mu)) {
while (lam > 0 && (nf <- length(f)) <= length(mu)) {
if(trace) cat(sprintf("while(lam = %g > 0 & ..): |f|=%d ..\n", lam, nf))
## 1) case a): Bound one free weight F -> B
l_in <- 0
if(length(f) > 1 ) {
if(nf > 1L) {
compl <- computeLambda(wB = w[-f], inv = inv, # inv from last step k (k > 1)
i = f, bi.input = cbind(lB, uB))
lam_in <- compl$lambda
......@@ -119,13 +121,13 @@ CLA <- function(mu, covar, lB, uB, tol.lambda = 1e-7,
computeInv(get_i)
})
fi <- length(f) + 1
if(nf < length(mu)) {
fi <- nf + 1L
lam_out <- sapply(seq_along(b), function(i) {
computeLambda(wB = w[b[-i]], inv = inv_list[[i]],
i = fi, bi.input = w[b[i]])
})
if (length(lambdas) > 1 && any(!(sml <- lam_out < lam*(1-tol.lambda)))) {
if (length(lambdas) && any(!(sml <- lam_out < lam*(1-tol.lambda)))) {
lam_out <- lam_out[sml]
b <- b [sml]
inv_list <- inv_list[sml]
......@@ -134,9 +136,13 @@ CLA <- function(mu, covar, lB, uB, tol.lambda = 1e-7,
i_out <- b [k] # one only !
l_out <- lam_out[k]
inv_out <- inv_list[[k]]
} else { ## length(f) == length(mu) <==> |b| = 0
l_out <- -Inf
}
## 3) decide lambda
lam <- max(l_in, l_out, 0)
lam <- max(l_in, l_out)
if(trace) cat(sprintf("l_{in,out}=(%g,%g) => new candidate lam=%g\n",
l_in, l_out, lam))
if(lam > 0) { # remove i_in from f; or add i_out into f
if(l_in > l_out) {
w[i_in] <- bi_in # set value at the correct boundary
......@@ -148,20 +154,18 @@ CLA <- function(mu, covar, lB, uB, tol.lambda = 1e-7,
f <- c(f, i_out)
inv <- inv_out
}
compW <- computeW(lam, inv = inv, wB = w[-f])
}
else{ # 4) if max(l_in, l_out) < 0, "stop" when at the min var solution!
compW <- computeW(lam = lam, inv = inv, wB = w[-f])
else{ # 4) if max(l_in, l_out) <= 0, "stop" when at the min var solution!
lam <- 0
## muF = 0 not necessary, get1 replaced by getM (ie getM from previous step)
}
wF <- compW$wF
compW <- computeW(lam, inv = inv, wB = w[-f])
g <- compW$gamma
w[f] <- wF[seq_along(f)]
w[f] <- compW$wF[seq_along(f)]
lambdas <- c(lambdas, lam)
weights_set <- cbind(weights_set, w, deparse.level = 0L) # store solution
gammas <- c(gammas, g)
weights_set <- cbind(weights_set, w, deparse.level = 0L) # store solution
free_indices <- c(free_indices, list(sort(f)))
}# end While
......@@ -194,7 +198,6 @@ print.CLA <- function(x, ...) {
invisible(x)
}
### TODO: plot method -- efficient frontier
## As basically from .../YanhaoShi/R/Functions/Plot.R :
MS_plot <- function(ms, type = "o",
......
##-*- org -*--> Emacs .. [Tab] key + [Org] menu; C-c C-o to follow links
* ASAP (no longer "Before release of package")
** check arguments e.g., lB <= uB, sum upper Bounds >= 1
** TODO check arguments e.g., lB <= uB, sum upper Bounds >= 1
** TODO References --> (mostly done)
*** DONE 1) References from the thesis, including the "buried" python-paper with *WRONG* algo
*** TODO 2) Master thesis: I'd like the thesis to be on our web page
** DONE CLA() should return a (S3) class, "CLA" w/ print() and plot() methods
** TODO findMu() and findSig() regression check examples *before* much changing R/findSigMu.R
** TODO Improve plot() method, using hyperbolic interpolation see R/CLA.R man/plot.CLA.Rd
** TODO A. Norring's Masters thesis has a small 12-asset example (from a published source).
** DONE CLA() should return a (S3) class, "CLA" w/ print() and plot() methods
** DONE findMu() and findSig() regression check examples *before* much changing: tests/findSigMu-ex.R
** DONE A. Norring's Masters thesis has small 10-asset example (from Markovitz & Todd).
We should add that as a minimally small data set to use in examples,
e.g. plot(). His thesis is in ~/Betreute-Arbeiten/YanhaoShi/Previous_Work/
** DONE Fixed Bug: did not get the "border case" lambda=0, e.g. for 10a example,
where others (incl A.Norring) *do* return that.
* With more time, also, e.g., for a short R Journal paper
** SparseMatrix plot of the weights
% Check from R:
% news(db = tools:::.build_news_db_from_package_NEWS_Rd("~/R/Pkgs/CLA/inst/NEWS.Rd"))
\name{NEWS}
\title{News for \R Package \pkg{CLA}}% MM: look into ../svn-log-from.all
\newcommand{\CRANpkg}{\href{https://CRAN.R-project.org/package=#1}{\pkg{#1}}}
\section{Changes in version 0.95-0 (2018-10-05)}{
\subsection{New Features}{
\itemize{
\item \emph{Not} back-compatible \emph{changed} result: The very
first \dQuote{turning point} from the algorithm with (\code{lambda
= NA} etc) is no longer stored as part of the result, compatibly
with the litterature and other implementations.
\item Added data set \code{muS.10ex} orginally from Markowitz and Todd.
\item Added \file{NEWS} file.
}
}
\subsection{Bug Fixes}{
\itemize{
\item needed \code{, drop=FALSE} in internal \code{getMatrices()},
e.g., for the \code{*10ex} example.
\item Test works with old R 3.3.x.
\item In some cases such as \code{muS.10ex}, the last turning
point with \code{lambda=0} was not computed previously.
}
}
}
\section{Changes in version 0.90-1 (2018-02-05)}{
\subsection{New Features}{
\itemize{
\item Add nice \code{plot()} method base on Shi's \code{MS_plot()}
\item Literature references in \code{?CLA}
\item Vectorized \code{findSig()} and \code{findMu()}.
}
}
\subsection{Bug Fixes}{
\itemize{
\item \code{findSig.1()} now works.
\item test also works in \R configured with \command{--no-long-double}.
}
}
}
\section{Changes in version 0.90-0 (2018-01-15)}{
\subsection{New Features}{
\itemize{
\item First release to CRAN
}
}
}
......@@ -14,7 +14,7 @@
}
\usage{
CLA(mu, covar, lB, uB, tol.lambda = 1e-07,
give.MS = TRUE, keep.names = TRUE)
give.MS = TRUE, keep.names = TRUE, trace = 0)
}
\arguments{
\item{mu}{numeric vector of length \code{n} containing the expected
......@@ -29,6 +29,8 @@ CLA(mu, covar, lB, uB, tol.lambda = 1e-07,
should be computed (and returned) as well.}
\item{keep.names}{\code{\link{logical}} indicating if the
\code{weights_set} matrix should keep the (asset) \code{names(mu)}.}
\item{trace}{an integer (or \code{\link{logical}}) indicating if and
how much diagnostic or progress output should be produced.}
}
\details{
The current implementation of the CLA is based (via Norring's)
......@@ -49,8 +51,8 @@ CLA(mu, covar, lB, uB, tol.lambda = 1e-07,
corresponding to the \eqn{m} steps that the CLA has completed or the
\eqn{m} \dQuote{turning points} it has computed.}
\item{free_indices}{a \code{\link{list}} of length \code{m}, the
\eqn{k}-th component with the indices in \eqn{\{1,\dots,n\}} of
those assets whose weights were not at the boundary afater ... }
\eqn{k}-th component with the indices in \eqn{{1,\dots,n}} of
those assets whose weights were not at the boundary after ... }% <<< FIXME
\item{gammas}{numeric vector of length \eqn{m} of the values
\eqn{\gamma_k}{gamma[k]} for CLA step \eqn{k}, \eqn{k=1,\dots,n}.}
\item{lambdas}{numeric vector of length \eqn{m} of the Lagrange parameters
......
......@@ -36,9 +36,11 @@ data("muS.10ex")
data(muS.10ex)
str(muS.10ex)
CLA.10ex <- with(muS.10ex, CLA(mu, covar, lB=0, uB=1)) # works after 'drop = FALSE' fix in getMatrices() !!
CLA.10ex <- with(muS.10ex, CLA(mu, covar, lB=0, uB=1))
if(require("Matrix"))
drop0(zapsmall(CLA.10ex$weights_set))
## The results, summarized, as in Bayley and López de Prado (Table 2, p.18) :
with(CLA.10ex, round(cbind(MS_weights[,2:1], lambda=lambdas, t(weights_set)), 3))
CLA.10ex.1c <- with(muS.10ex, CLA(mu, covar, lB=1/100, uB=1))
round(CLA.10ex.1c$weights_set, 3)
......
......@@ -33,7 +33,7 @@ if(doExtras) system.time({
names(tols) <- paste0("10^", round(log10(tols)))
CLs5c.ls <- lapply(tols, function(tol)
CLA(muS.sp500$mu, muS.sp500$covar, lB=0, uB=1/20, tol.lambda = tol))
}) # 78.101 elapsed [nb-mm4]
}) # 78.101 elapsed [nb-mm4] ; 46.108 [lynne 2018-10]
if(doExtras) {
identical(un.call(CLs5c.ls[["10^-7"]]), un.call(CLs5c.0.120))
......@@ -49,6 +49,11 @@ if(doExtras) {
}
}
}
## 2018-10 lynne, 64b Fedora 28
## 10^-1 vs. 10^-3 : are different [all.equal()]: dim(..[[ 10^-1 ]]$weights_set) = 476 x 47
## 10^-3 vs. 10^-5 : are different [all.equal()]: dim(..[[ 10^-3 ]]$weights_set) = 476 x 156
## 10^-5 vs. 10^-6 : are all.equal()
## ................ " " "
op <- options(width = max(500, getOption("width"))) # then it actually fits
......@@ -76,7 +81,7 @@ stopifnot(nrow(wts.non0) == 79)
if(FALSE) # once, manually (into tests/ directory
saveRDS(wts.non0, "wtsn0.rds")
file.info("wtsn0.rds")$size # 27049
file.info("wtsn0.rds")$size # 2702049
wtsn0.ref <- readRDS("wtsn0.rds")
## see on all platforms what we get
......@@ -90,21 +95,21 @@ stopifnot(all.equal(target = wtsn0.ref, current = wts.non0,
non.0.assets <- Filter(function(.) . > 0, apply(wts.non0, 1, function(c) sum(c > 0)))
b64.n0 <- c(AAPL = 136L, ADSK = 66L, AET = 147L, AMGN = 3L, ATI = 76L,
b64.n0 <-
c(AAPL = 135L, ADSK = 66L, AET = 147L, AMGN = 3L, ATI = 75L,
AYE = 56L, AZO = 26L, BAX = 95L, BCR = 35L, BDX = 36L, BIIB = 118L,
BNI = 86L, BRL = 23L, BTU = 28L, BUD = 7L, CCE = 54L, CELG = 129L,
CI = 69L, CL = 83L, CLX = 53L, CME = 141L, CNX = 17L, COST = 40L,
BNI = 86L, BRL = 23L, BTU = 27L, BUD = 7L, CCE = 54L, CELG = 128L,
CI = 69L, CL = 83L, CLX = 53L, CME = 140L, CNX = 16L, COST = 40L,
CTL = 5L, CVS = 102L, DF = 36L, DGX = 33L, DVN = 14L, ED = 32L,
EIX = 127L, ESRX = 48L, FCX = 55L, FE = 61L, GILD = 38L, HAL = 31L,
HES = 41L, HST = 108L, HUM = 71L, INTU = 48L, JNJ = 34L, K = 61L,
EIX = 127L, ESRX = 48L, FCX = 54L, FE = 61L, GILD = 38L, HAL = 31L,
HES = 40L, HST = 108L, HUM = 71L, INTU = 48L, JNJ = 34L, K = 61L,
LH = 80L, LLL = 96L, LMT = 83L, LUK = 72L, MCD = 61L, MDT = 43L,
MMC = 7L, MON = 54L, MRO = 137L, MTW = 67L, MUR = 97L, NEM = 45L,
NOC = 74L, NUE = 31L, NVDA = 14L, PBG = 72L, PCP = 103L, PDCO = 71L,
PEP = 69L, PG = 87L, RAI = 110L, RIG = 121L, RRC = 106L, RTN = 90L,
SII = 27L, SSP = 14L, SYK = 19L, SYMC = 13L, TEX = 37L, TIE = 85L,
TSO = 116L, TYC = 59L, UST = 127L, WAG = 17L, WFR = 6L, WMT = 6L,
X = 44L, XTO = 102L)
MMC = 7L, MON = 53L, MRO = 136L, MTW = 66L, MUR = 97L, NEM = 45L,
NOC = 74L, NUE = 30L, NVDA = 13L, PBG = 72L, PCP = 102L, PDCO = 71L,
PEP = 69L, PG = 87L, RAI = 110L, RIG = 121L, RRC = 105L, RTN = 90L,
SII = 27L, SSP = 14L, SYK = 19L, SYMC = 13L, TEX = 36L, TIE = 84L,
TSO = 115L, TYC = 59L, UST = 127L, WAG = 17L, WFR = 5L, WMT = 6L,
X = 43L, XTO = 102L)
## 32-bit Linux (Unfortunately, currently the results are slighly *platform dependent*)
nn <- c("AZO", "BAX", "CLX", "COST", "DGX", "DVN", "ESRX", "LMT", "MUR", "PEP",
......@@ -170,15 +175,15 @@ if(all(non.0.assets == non.0.TARG)) { ## show differences:
## They have the same names and only differ by +/- 1:
stopifnot(
identical(names(b64.n0), names(b32.n0))
## ______ ______
, if(b64) identical(non.0.assets, non.0.TARG) # fails on ATLAS, MKL, OpenBLAS
, ## ______ ______
if(b64) identical(non.0.assets, non.0.TARG) # fails on ATLAS, MKL, OpenBLAS
else if(nonWindows) identical(non.0.assets, b32.n0)
else ## 32-bit Windows
TRUE ## for now
, identical(head(CLs5c.0.120$free_indices, 12),
list(295L, c(295L, 453L), 453L, c(453L, 472L), c(19L, 453L, 472L),
list(c(295L, 453L), 453L, c(453L, 472L), c(19L, 453L, 472L),
c(19L, 453L), 453L, c(15L, 453L), 15L, c(15L, 320L),
c(15L, 105L, 320L), c(105L, 320L)))
c(15L, 105L, 320L), c(105L, 320L), c(105L, 320L, 472L)))
)
## Check some of the 'Env<n>' versions: ---------
......@@ -194,17 +199,28 @@ claStrip <- function(res) {
rCLA <- claStrip(CLs5c.0.120)
##' Drop "first turning point" from old, pre-0.95, CLA() result:
claDrop1st <- function(res) {
res$weights_set <- res$weights_set[, -1L , drop=FALSE] # drop 1st column
if(is.matrix(res$MS_weight))
res$MS_weight <- res$MS_weight[ -1L, , drop=FALSE] # drop 1st row
for(nm in c("free_indices", "gammas", "lambdas"))
res[[nm]] <- res[[nm]][-1L]
res
}
## back compatibility to "old" Env8 results:
nsCLA <- asNamespace("CLA")
if(is.environment(e8 <- nsCLA$Env8)) local(withAutoprint({
system.time(r8 <- e8$cla.solve(muS.sp500$mu, muS.sp500$covar,
lB = rep(0,n), uB= rep(1/20, n)))
## lynne: 9.6--9.8 sec
stopifnot(all.equal(r8, rCLA, tol = 1e-14)) # they are the same!
## lynne (2017): 9.6--9.8 sec; 2018: 6.1 sec
stopifnot(all.equal(claDrop1st(r8), rCLA, tol = 1e-14)) # they are the same!
}))
if(is.environment(e9 <- nsCLA$Env9)) local(withAutoprint({
system.time(r9 <- e9$cla.solve(muS.sp500$mu, muS.sp500$covar,
lB = rep(0,n), uB= rep(1/20, n)))
## lynne: 10.0 sec
stopifnot(all.equal(r9, rCLA, tol = 1e-14)) # they are the same!
## lynne(2017): 10.0 sec; 2018: 6.6 sec
stopifnot(all.equal(claDrop1st(r9), rCLA, tol = 1e-14)) # they are the same!
}))
No preview for this file type