diff --git a/Code_Python/main_ME.py b/Code_Python/main_ME.py new file mode 100644 index 0000000..21b51f3 --- /dev/null +++ b/Code_Python/main_ME.py @@ -0,0 +1,207 @@ +import numpy as np +import scipy.stats as st + +class MEBOOT: + def __init__(self,x,trimval=0.1,seed=None): + ''' + x: multivariate-time series N x T + trimval: trim value (default 0.1) + ''' + + self.sd = np.random.RandomState(seed) + m,n = x.shape + + self.meanx = x.mean(axis=1) + + self.sdx = x.std(axis=1) + + + self.ordxx = np.argsort(x,axis=1) + + xx = x.ravel()[self.ordxx.ravel()].reshape(x.shape) + + self.z = 0.5*(xx[:,1:]+xx[:,:-1]) + + dv = abs(np.diff(x,axis=1)) + + dvtrim = st.trim_mean(dv,trimval,axis=1) + + self.xmin = xx[:, 0]- dvtrim + self.xmax = xx[:,-1]+ dvtrim + + tmp = np.array([[0.25]*(n-2)+[0.5]*(n-2)+[0.25]*(n-2)]) + cmd = (np.column_stack((xx[:,:n-2],xx[:,1:n-1],xx[:,2:n])) * tmp) + + aux = np.array([cmd[:,i::n-2].sum(axis=1) for i in range(n-2)]).T + + self.desintxb = np.column_stack((0.75 * xx[:,:1] + 0.25 * xx[:,1:2], aux, 0.25 * xx[:,-2:-1] + 0.75 * xx[:,-1:])) + + + def _mrapproxpy(self,p,z,desintxb): + + m,n = p.shape + + q = -np.inf*np.ones((n)*m) + + a = (p//(1/n)-1).astype(int) + + hs = np.arange(n-2) + + dz = np.column_stack(([-np.inf]*m,np.diff(z,axis=1)*n,[0]*m)).ravel() + + sz = np.column_stack(([0]*m,(0.5*(z[:,hs+1]+z[:,hs]))[:,hs],[0]*m)).ravel() + + zt = np.column_stack(([-np.inf]*m,z[:,hs],[-np.inf]*m)).ravel() + + dh = np.column_stack(([-np.inf]*m,desintxb[:,hs],[0]*m)).ravel() + + plus = (n*np.arange(m))[np.newaxis].T + jx = (np.tile(range(n),(m,1))+plus).ravel() + + ixo = a+1 + ix = (ixo+plus).ravel() + + tmp = zt[ix]+dh[ix]- sz[ix] + + q[jx] = dz[ix]*(p.ravel()[jx]-(ixo.ravel())/n)+tmp + + return q.reshape((m,n)) + + + def _expandSD(self,bt,fiv): + + obt = len(bt.shape) + + if obt==2: + bt = bt[np.newaxis] + + + sd = self.sdx + + bt = np.swapaxes(bt,0,1) + + sdf = np.column_stack((sd,bt.std(axis=2))) + + sdfa = sdf/sdf[:,:1] + + sdfd = sdf[:,:1]/sdf + + mx = 1+(fiv/100) + + idx = np.where(sdfa<1) + + sdfa[idx] = np.random.uniform(1,mx,size=len(idx[0])) + + sdfdXsdfa = sdfd[:,1:]*sdfa[:,1:] + + bt *= np.moveaxis(sdfdXsdfa[np.newaxis],0,-1) + bt = np.swapaxes(bt,0,1) + + if obt==2: + bt = bt[0] + + return bt + + def _adjust(self,bt): + zz = np.column_stack((self.xmin[np.newaxis].T,self.z,self.xmax[np.newaxis].T)) + + v = np.diff(zz**2,axis=1)/12 + + xb = self.meanx[np.newaxis].T + + s1 = ((self.desintxb - xb)**2).sum(axis=1) + + act_sd = np.sqrt( (s1+v.sum(axis=1))/(self.z.shape[1]+1) ) + + des_sd = self.sdx + + kappa =( des_sd/ act_sd -1)[np.newaxis].T + + bt = bt + kappa* (bt - xb) + return bt + + def bootstrap(self,fiv=5,adjust_sd=True): + ''' + Single realization of ME Bootstrap for the multivariate time series. + + fiv: Increment standard deviation (default fiv=5 %) + adjust_sd: Fix the standard deviation from the observation. + ''' + + m,n = self.z.shape + n+=1 + + p = self.sd.uniform(0,1,size=(m,n)) + + q = self._mrapproxpy(p,self.z,self.desintxb[:,1:]) + + + f_low = np.column_stack((self.xmin[np.newaxis].T,self.z[:,0])) + f_hi = np.column_stack((self.z[:,-1],self.xmax[np.newaxis].T)) + + low = p<1/n + hi = p>(n-1)/n + + + for i in range(m): + q[i][low[i]] = np.interp(p[i][low[i]],[0,1/n],f_low[i]) + q[i][hi[i]] = np.interp(p[i][hi[i]],[(n - 1)/n,1],f_hi[i]) + + qseq = np.sort(q[i]) + q[i][self.ordxx[i]] = qseq + + if fiv!=None: + q = self._expandSD(q,fiv) + if adjust_sd==True: + q = self._adjust(q) + return q + + def bootstrap_clt(self,nt,fiv=5,adjust_sd=True): + ''' + Multiple ME boostrap copies. + Force the central limit theorem. Warning it requires to compute all + bootstrap copies at once, so it could require a lot of memory. + + nt: number of bootstrap copies + fiv: Increment standard deviation (default fiv=5 %) + adjust_sd: Fix the standard deviation from the observation. + ''' + + + bt = np.array([self.bootstrap(fiv=None) for i in range(nt)]) + if fiv!=None: + + bt = self._expandSD(bt,fiv) + + bt = np.swapaxes(bt,0,1) + + N,nt,T = bt.shape + + gm = self.meanx + s = self.sdx + + smean = s/ np.sqrt(nt) + + xbar = bt.mean(axis=2) + + sortxbar = np.sort(xbar,axis=1) + + oo = np.argsort(xbar,axis=1) + + newbar = gm[np.newaxis].T + st.norm.ppf((np.arange(1,nt+1)/(nt+1))[np.newaxis])* smean[np.newaxis].T + + scn = st.zscore(newbar,axis=1) + + newm = scn*smean[np.newaxis].T+gm[np.newaxis].T + + meanfix = newm- sortxbar + + oinv = np.array([np.array(sorted(zip(oo[i],range(len(oo[i])))))[:,1] for i in range(len(oo))]) + + out = np.array([(bt[i][oo[i]]+meanfix[i][np.newaxis].T)[oinv[i]] for i in range(bt.shape[0])]) + out = np.swapaxes(out,0,1) + + if adjust_sd==True: + out = self._adjust(out) + + return out diff --git a/Code_Python/meBoot_2.py b/Code_Python/meBoot_2.py new file mode 100644 index 0000000..7a089f1 --- /dev/null +++ b/Code_Python/meBoot_2.py @@ -0,0 +1,102 @@ +""" + MEBOOT.PY - Python package for the meboot (Maximum Entropy Bootstrap) algorithm for Time Series + Author: Fabian Brix + Method by H.D. Vinod, Fordham University - +""" +import sys +import numpy as np +import matplotlib.pyplot as plt + +def sort(series): + ind_sorted = np.argsort(series) + s_sorted = series[ind_sorted] + return s_sorted, ind_sorted + +def get_trm_mean(series, percent): + # FIXED + dev = np.abs(series[1:]-series[:-1]) + n = len(dev) + k = n*(percent/100.0)/2.0 + k = round(k,0) + # return np.mean(dev[k:n-k]) + return 15.0 + +def get_intermed_pts(series, s_sorted, percent): + zt = (s_sorted[:-1]+s_sorted[1:])/2.0 + m_trm = get_trm_mean(series, percent) + print(m_trm) + z0 = s_sorted[0]-m_trm + zT = s_sorted[-1]+m_trm + z = np.hstack((z0,zt,zT)) + return z + +def get_intervals(z): + return np.vstack((z[:-1], z[1:])).T + +def get_me_density(intervals): + return 1.0/(intervals[:,1]-intervals[:,0]) + +def get_cpf(me_density, intervals): + cpf = np.array([sum(me_density[:i+1]) for i in range(me_density.shape[0]-1)]) + print(cpf) + return cpf/np.max(cpf) + +def get_quantiles(cpf, intervals, series): + quantiles = [] + T = float(len(series)) + t = np.arange(T+1) + Rt = np.vstack((t[:-1]/T,t[1:]/T)).T + # print(Rt) + aaa = np.array([0.12, 0.83, 0.53, 0.59, 0.11]) + for d in range(series.shape[0]): + # u = np.random.uniform(0,1) + u = aaa[d] + # print(d, u) + # u = aaa[d] + for i in range(cpf.shape[0]): + cp = cpf[i] + if u <= cp: + cpm = cpf[i-1] + if i == 0: + cpm = 0 + m = (cp-cpm)/1.0*(intervals[i,1]-intervals[i,0]) + xp = (u - cpm)*1.0/m+intervals[i,0] + quantiles.append(xp) + break + return np.array(quantiles) + +def meboot(series, replicates): + # ASC by default + print(series) + np.random.seed(0) + + s_sorted, ind_sorted = sort(series) + + z = get_intermed_pts(series, s_sorted, 10) + print('z ', z) + + intervals = get_intervals(z) + print('intervals ', intervals) + + me_density = get_me_density(intervals) + print('uni dens ', me_density) + + cpf = get_cpf(me_density, intervals) + print('cpf ', cpf) + + quantiles = get_quantiles(cpf, intervals, series) + print('quantiles ', quantiles) + + quantiles = np.sort(quantiles) + + replicate = quantiles[ind_sorted] + print('replicate ', replicate) + + # TODO: Undertand and add repeat mechanism + # plt.plot(series, color='r') + # plt.plot(replicate, color='b') + # plt.ylim(0,30) + # plt.show() + +series = np.array([4,12,36,20,8]) +meboot(series, 1) diff --git a/Code_Python/meboot.r b/Code_Python/meboot.r deleted file mode 100644 index d717178..0000000 --- a/Code_Python/meboot.r +++ /dev/null @@ -1,246 +0,0 @@ - -#meboot <- function(x, reps=999, trim=0.10, reachbnd=TRUE, -# expand.sd=TRUE, force.clt=TRUE, elaps=FALSE, -# colsubj, coldata, coltimes, ...){ -# UseMethod("meboot", x) -#} - -meboot <- function(x, reps=999, trim=list(trim=0.10, xmin=NULL, xmax=NULL), reachbnd=TRUE, - expand.sd=TRUE, force.clt=TRUE, - scl.adjustment = FALSE, sym = FALSE, elaps=FALSE, - colsubj, coldata, coltimes,...) -{ - if ("pdata.frame" %in% class(x)) - { - res <- meboot.pdata.frame (x, reps, trim$trim, reachbnd, - expand.sd, force.clt, scl.adjustment, sym, elaps, - colsubj, coldata, coltimes, ...) - return(res) - } - - if (reps == 1 && isTRUE(force.clt)) - { - force.clt <- FALSE - warning("force.clt was set to FALSE since the ensemble contains only one replicate.") - } - - if (!is.list(trim)) { - trimval <- trim - } else { - trimval <- if (is.null(trim$trim)) 0.1 else trim$trim - } - - ptm1 <- proc.time() - - n <- length(x) - - # Sort the original data in increasing order and - # store the ordering index vector. - - xx <- sort(x) - ordxx <- order(x) - #ordxx <- sort.int(x, index.return=TRUE) - - # symmetry - - if (sym) - { - xxr <- rev(xx) #reordered values - xx.sym <- mean(xx) + 0.5*(xx - xxr) #symmetrized order stats - xx <- xx.sym #replace order stats by symmetrized ones - } - - # Compute intermediate points on the sorted series. - - z <- rowMeans(embed(xx, 2)) - - # Compute lower limit for left tail ('xmin') and - # upper limit for right tail ('xmax'). - # This is done by computing the 'trim' (e.g. 10%) trimmed mean - # of deviations among all consecutive observations ('dv'). - # Thus the tails are uniform distributed. - - dv <- abs(diff(as.numeric(x))) - dvtrim <- mean(dv, trim=trimval) - - if (is.list(trim)) - { - if (is.null(trim$xmin)) - { - xmin <- xx[1] - dvtrim - } else - xmin <- trim$xmin - - if (is.null(trim$xmax)) - { - xmax <- xx[n] + dvtrim - } else - xmax <- trim$xmax - - if (!is.null(trim$xmin) || !is.null(trim$xmax)) - { - if (isTRUE(force.clt)) - { - expand.sd <- FALSE - force.clt <- FALSE - warning("expand.sd and force.clt were set to FALSE in order to ", - "enforce the limits xmin/xmax.") - } - } - } else { - xmin <- xx[1] - dvtrim - xmax <- xx[n] + dvtrim - } - - # do this here so that this warnings are printed after - # the above warnings (if necessary) - - if (is.list(trim)) - { - if (!is.null(trim$xmin) && trim$xmin > min(x)) - warning("the lower limit trim$xmin may not be satisfied in the replicates ", - "since it is higher than the minimum value observed ", - "in the input series x") - if (!is.null(trim$xmax) && trim$xmax < max(x)) - warning("the upper limit trim$xmax may not be satisfied in the replicates ", - "since it is lower than the maximum value observed ", - "in the input series x") - } - - # Compute the mean of the maximum entropy density within each - # interval in such a way that the 'mean preserving constraint' - # is satisfied. (Denoted as m_t in the reference paper.) - # The first and last interval means have distinct formulas. - # See Theil and Laitinen (1980) for details. - - aux <- colSums( t(embed(xx, 3))*c(0.25,0.5,0.25) ) - desintxb <- c(0.75*xx[1]+0.25*xx[2], aux, 0.25*xx[n-1]+0.75*xx[n]) - - # Generate random numbers from the [0,1] uniform interval and - # compute sample quantiles at those points. - - # Generate random numbers from the [0,1] uniform interval. - - ensemble <- matrix(x, nrow=n, ncol=reps) - ensemble <- apply(ensemble, 2, meboot.part, - n, z, xmin, xmax, desintxb, reachbnd) - - # So far the object 'ensemble' contains the quantiles. - # Now give them time series dependence and heterogeneity. - - qseq <- apply(ensemble, 2, sort) - - # 'qseq' has monotonic series, the correct series is obtained - # after applying the order according to 'ordxx' defined above. - - ensemble[ordxx,] <- qseq - #ensemble[ordxx$ix,] <- qseq - - if(expand.sd) - ensemble <- expand.sd(x=x, ensemble=ensemble, ...) - if(force.clt) - ensemble <- force.clt(x=x, ensemble=ensemble) - - # scale adjustment - - if (scl.adjustment) - { - zz <- c(xmin,z,xmax) #extended list of z values - #v <- rep(NA, n) #storing within variances - #for (i in 2:(n+1)) { - # v[i-1] <- ((zz[i] - zz[i-1])^2) / 12 - #} - v <- diff(zz^2) / 12 - xb <- mean(x) - s1 <- sum((desintxb - xb)^2) - uv <- (s1 + sum(v)) / n - desired.sd <- sd(x) - actualME.sd <- sqrt(uv) - if (actualME.sd <= 0) - stop("actualME.sd<=0 Error") - out <- desired.sd / actualME.sd - kappa <- out - 1 - - ensemble <- ensemble + kappa * (ensemble - xb) - } else - kappa <- NULL - - #ensemble <- cbind(x, ensemble) - if(is.ts(x)){ - ensemble <- ts(ensemble, frequency=frequency(x), start=start(x)) - dimnames(ensemble)[[2]] <- paste("Series", 1:reps) - #dimnames(ensemble)[[2]] <- c("original", paste("Series", 1:reps)) - } - - # Computation time - ptm2 <- proc.time(); elapsr <- elapsedtime(ptm1, ptm2) - if(elaps) - cat("\n Elapsed time:", elapsr$elaps, - paste(elapsr$units, ".", sep=""), "\n") - - list(x=x, ensemble=ensemble, xx=xx, z=z, dv=dv, dvtrim=dvtrim, xmin=xmin, - xmax=xmax, desintxb=desintxb, ordxx=ordxx, kappa = kappa, elaps=elapsr) -} - -meboot.part <- function(x, n, z, xmin, xmax, desintxb, reachbnd) -{ - # Generate random numbers from the [0,1] uniform interval - - p <- runif(n, min=0, max=1) - - # Compute sample quantiles by linear interpolation at - # those 'p'-s (if any) ... - - # ... 'p'-s within the (i1/n, (i1+1)/n)] interval (i1=1,...,n-2). - - q <- .C("mrapprox", p=as.double(p), n=as.integer(n), z=as.double(z), - desintxb=as.double(desintxb[-1]), ref23=double(n), qq=double(1), - q=double(n), PACKAGE="meboot")$q - - # ... 'p'-s within the [0, (1/n)] interval. (Left tail.) - - ref1 <- which(p <= (1/n)) - if(length(ref1) > 0){ - qq <- approx(c(0,1/n), c(xmin,z[1]), p[ref1])$y - q[ref1] <- qq - if(!reachbnd) q[ref1] <- qq + desintxb[1]-0.5*(z[1]+xmin) - } - - # ... 'p'-s equal to (n-1)/n. - - ref4 <- which(p == ((n-1)/n)) - if(length(ref4) > 0) - q[ref4] <- z[n-1] - - # ... 'p'-s greater than (n-1)/n. (Right tail.) - - ref5 <- which(p > ((n-1)/n)) - if(length(ref5) > 0){ - # Right tail proportion p[i] - qq <- approx(c((n-1)/n,1), c(z[n-1],xmax), p[ref5])$y - q[ref5] <- qq # this implicitly shifts xmax for algorithm - if(!reachbnd) q[ref5] <- qq + desintxb[n]-0.5*(z[n-1]+xmax) - # such that the algorithm gives xmax when p[i]=1 - # this is the meaning of reaching the bounds xmax and xmin - } - - q -} - -elapsedtime <- function(ptm1, ptm2) -{ - elaps <- (ptm2 - ptm1)[3] - - if(elaps < 60) - units <- "seconds" - else if(elaps < 3600){ - elaps <- elaps/60 - units <- "minutes" } - else if(elaps < 86400){ - elaps <- elaps/3600 - units <- "hours" } - else { elaps <- elaps/86400 - units <- "days" } - - list(elaps=as.numeric(elaps), units=units) -} diff --git a/Code_Python/mrapprox.c b/Code_Python/mrapprox.c deleted file mode 100644 index aee066a..0000000 --- a/Code_Python/mrapprox.c +++ /dev/null @@ -1,40 +0,0 @@ - -// q <- .C("mrapprox", p=as.double(p), n=as.integer(n), z=as.double(z), -// desintxb=as.double(desintxb[-1]), ref23=double(n), qq=double(1), -// q=double(n), PACKAGE="meboot")$q - -void mrapprox(double *p, int *n, double *z, double *desintxb, double *ref23, - double *qq, double *q) -{ - int i, j, ii1, k; - double i1, nn; - - nn = *n; - for(i=0; i < *n; i=i+1){ - q[i] = -99999; - ref23[i] = -99999; - } - - j = 0; - i1 = 1.0; - for(ii1=0; ii1 < *n-2; ii1=ii1+1){ - - j = 0; - for(i=0; i < *n; i=i+1){ - - if( p[i] > i1/nn && p[i] <= (i1+1)/nn ){ - ref23[j] = i; - j = j+1; - } - - } - - for(i=0; i < j; i=i+1){ - - k = ref23[i]; - qq[0] = z[ii1] + ( (z[ii1+1]- z[ii1]) / ((i1+1)/nn - i1/nn) ) * (p[k] - i1/nn); - q[k] = qq[0] + desintxb[ii1] - 0.5*(z[ii1] + z[ii1+1]); - } - i1 = i1+1; - } -} diff --git a/Code_Python/synthetic.py b/Code_Python/synthetic.py index 8cb6743..f7695b7 100644 --- a/Code_Python/synthetic.py +++ b/Code_Python/synthetic.py @@ -154,6 +154,10 @@ def synthetic_FFT(X, multiv=False): - univariate and single time-series - univariate and multi-time series (can be used to generate multi from same) - multi-variate multi-time series + + generate more than n_samples, remome multi-time series for uni-variate + use 3d matrix looping on the time-series for multi-variate + """ n_samples, n_series = X.shape @@ -196,7 +200,7 @@ def synthetic_FFT(X, multiv=False): def synthetic_sampling(X, replace=True): """ - generate more than n_samples? + generate more than n_samples, remome multi-time series """ n_samples, n_series = X.shape X_synt = np.zeros_like(X) @@ -231,30 +235,53 @@ def synthetic_MEboot(X, alpha=0.1): # Sort the time series keeping track of the original position idx = np.argsort(X[:, ts]) Y = X[idx, ts] - print(idx, idx.shape) - print(Y, Y.shape) - # Compute the trimmed mean + # Compute the trimmed mean + # trimmed_mean = np.abs(np.diff(x)).mean() g = int(np.floor(n * alpha)) r = n * alpha - g - print(n, g, r) m_trm = ((1.0 - r) * (Y[g] + Y[n-g-1]) + Y[g+1:n-g-1].sum()) \ / (n * (1.0 - 2.0 * alpha)) - print(m_trm) # Compute the intermediate points Z = np.zeros(n+1) - Z[1:-1] = (Y[0:-1] + Y[1:]) / 2.0 Z[0] = Y[0] - m_trm + Z[1:-1] = (Y[0:-1] + Y[1:]) / 2.0 Z[n] = Y[n-1] + m_trm - print(Z, Z.shape) # Compute the interval means mt = np.zeros(n) mt[0] = 0.75 * Y[0] + 0.25 * Y[1] mt[1:n-1] = 0.25 * Y[0:n-2] + 0.5 * Y[1:n-1] + 0.25 * Y[2:n] mt[n-1] = 0.25 * Y[n-2] + 0.75 * Y[n-1] - print(mt) + # Randomly generate new points + t_w = np.random.rand(n) + # t_w = np.array([0.12, 0.83, 0.53, 0.59, 0.11]) +# order here???? and remove correction inside intervals??? + # Interpolate new points + t = np.linspace(0.0, 1.0, num=n+1) + w_int = np.interp(t_w, t, Z) + print('w_int=', w_int) + + # Correct the new points to satisfy mass constraint + idw = (np.floor_divide(t_w, 1.0 / n)).astype(int) + print('idw=', idw) + w_corr = w_int + mt[idw] - (Z[idw] + Z[idw+1]) / 2.0 + print('w_corr', w_corr) + + # Enforce limits + # w_corr = np.fmin(np.fmax(w_corr, Z[0]), Z[n]) + + # Re-order sampled values + w_ord = np.sort(w_corr) + # print(w_ord) + # w_ord = np.array([5.85, 6.7, 8.9, 10.7, 23.95]) + + # Recovery the time-dependencies + W = np.zeros(n) + W[idx] = w_ord + + return W diff --git a/Code_Python/test.py b/Code_Python/test.py index c3fdeb1..eccca84 100644 --- a/Code_Python/test.py +++ b/Code_Python/test.py @@ -13,8 +13,8 @@ ToDo: - example for alpha-beta-gamma using variable sigma as in financial time series (see Ehler) - example using noisy multi-sine-waves -- synt: boot, paper Vinod (as a class?) - vectors must be ( .., 1) +- synt remove multi-series unless multi-variate and add number of istances (fft 3 D) """ import sys @@ -97,5 +97,5 @@ np.random.seed(1294404794) # bb = syn.synthetic_boot(aa, replace=False) # print(bb) aa = np.array([4, 12, 36, 20, 8]).reshape(5, 1) -# print(aa) -syn.synthetic_MEboot(aa) +W = syn.synthetic_MEboot(aa, alpha=0.1) +# print(bb.sum())