diff --git a/Code_Python/meboot.r b/Code_Python/meboot.r new file mode 100644 index 0000000..d717178 --- /dev/null +++ b/Code_Python/meboot.r @@ -0,0 +1,246 @@ + +#meboot <- function(x, reps=999, trim=0.10, reachbnd=TRUE, +# expand.sd=TRUE, force.clt=TRUE, elaps=FALSE, +# colsubj, coldata, coltimes, ...){ +# UseMethod("meboot", x) +#} + +meboot <- function(x, reps=999, trim=list(trim=0.10, xmin=NULL, xmax=NULL), reachbnd=TRUE, + expand.sd=TRUE, force.clt=TRUE, + scl.adjustment = FALSE, sym = FALSE, elaps=FALSE, + colsubj, coldata, coltimes,...) +{ + if ("pdata.frame" %in% class(x)) + { + res <- meboot.pdata.frame (x, reps, trim$trim, reachbnd, + expand.sd, force.clt, scl.adjustment, sym, elaps, + colsubj, coldata, coltimes, ...) + return(res) + } + + if (reps == 1 && isTRUE(force.clt)) + { + force.clt <- FALSE + warning("force.clt was set to FALSE since the ensemble contains only one replicate.") + } + + if (!is.list(trim)) { + trimval <- trim + } else { + trimval <- if (is.null(trim$trim)) 0.1 else trim$trim + } + + ptm1 <- proc.time() + + n <- length(x) + + # Sort the original data in increasing order and + # store the ordering index vector. + + xx <- sort(x) + ordxx <- order(x) + #ordxx <- sort.int(x, index.return=TRUE) + + # symmetry + + if (sym) + { + xxr <- rev(xx) #reordered values + xx.sym <- mean(xx) + 0.5*(xx - xxr) #symmetrized order stats + xx <- xx.sym #replace order stats by symmetrized ones + } + + # Compute intermediate points on the sorted series. + + z <- rowMeans(embed(xx, 2)) + + # Compute lower limit for left tail ('xmin') and + # upper limit for right tail ('xmax'). + # This is done by computing the 'trim' (e.g. 10%) trimmed mean + # of deviations among all consecutive observations ('dv'). + # Thus the tails are uniform distributed. + + dv <- abs(diff(as.numeric(x))) + dvtrim <- mean(dv, trim=trimval) + + if (is.list(trim)) + { + if (is.null(trim$xmin)) + { + xmin <- xx[1] - dvtrim + } else + xmin <- trim$xmin + + if (is.null(trim$xmax)) + { + xmax <- xx[n] + dvtrim + } else + xmax <- trim$xmax + + if (!is.null(trim$xmin) || !is.null(trim$xmax)) + { + if (isTRUE(force.clt)) + { + expand.sd <- FALSE + force.clt <- FALSE + warning("expand.sd and force.clt were set to FALSE in order to ", + "enforce the limits xmin/xmax.") + } + } + } else { + xmin <- xx[1] - dvtrim + xmax <- xx[n] + dvtrim + } + + # do this here so that this warnings are printed after + # the above warnings (if necessary) + + if (is.list(trim)) + { + if (!is.null(trim$xmin) && trim$xmin > min(x)) + warning("the lower limit trim$xmin may not be satisfied in the replicates ", + "since it is higher than the minimum value observed ", + "in the input series x") + if (!is.null(trim$xmax) && trim$xmax < max(x)) + warning("the upper limit trim$xmax may not be satisfied in the replicates ", + "since it is lower than the maximum value observed ", + "in the input series x") + } + + # Compute the mean of the maximum entropy density within each + # interval in such a way that the 'mean preserving constraint' + # is satisfied. (Denoted as m_t in the reference paper.) + # The first and last interval means have distinct formulas. + # See Theil and Laitinen (1980) for details. + + aux <- colSums( t(embed(xx, 3))*c(0.25,0.5,0.25) ) + desintxb <- c(0.75*xx[1]+0.25*xx[2], aux, 0.25*xx[n-1]+0.75*xx[n]) + + # Generate random numbers from the [0,1] uniform interval and + # compute sample quantiles at those points. + + # Generate random numbers from the [0,1] uniform interval. + + ensemble <- matrix(x, nrow=n, ncol=reps) + ensemble <- apply(ensemble, 2, meboot.part, + n, z, xmin, xmax, desintxb, reachbnd) + + # So far the object 'ensemble' contains the quantiles. + # Now give them time series dependence and heterogeneity. + + qseq <- apply(ensemble, 2, sort) + + # 'qseq' has monotonic series, the correct series is obtained + # after applying the order according to 'ordxx' defined above. + + ensemble[ordxx,] <- qseq + #ensemble[ordxx$ix,] <- qseq + + if(expand.sd) + ensemble <- expand.sd(x=x, ensemble=ensemble, ...) + if(force.clt) + ensemble <- force.clt(x=x, ensemble=ensemble) + + # scale adjustment + + if (scl.adjustment) + { + zz <- c(xmin,z,xmax) #extended list of z values + #v <- rep(NA, n) #storing within variances + #for (i in 2:(n+1)) { + # v[i-1] <- ((zz[i] - zz[i-1])^2) / 12 + #} + v <- diff(zz^2) / 12 + xb <- mean(x) + s1 <- sum((desintxb - xb)^2) + uv <- (s1 + sum(v)) / n + desired.sd <- sd(x) + actualME.sd <- sqrt(uv) + if (actualME.sd <= 0) + stop("actualME.sd<=0 Error") + out <- desired.sd / actualME.sd + kappa <- out - 1 + + ensemble <- ensemble + kappa * (ensemble - xb) + } else + kappa <- NULL + + #ensemble <- cbind(x, ensemble) + if(is.ts(x)){ + ensemble <- ts(ensemble, frequency=frequency(x), start=start(x)) + dimnames(ensemble)[[2]] <- paste("Series", 1:reps) + #dimnames(ensemble)[[2]] <- c("original", paste("Series", 1:reps)) + } + + # Computation time + ptm2 <- proc.time(); elapsr <- elapsedtime(ptm1, ptm2) + if(elaps) + cat("\n Elapsed time:", elapsr$elaps, + paste(elapsr$units, ".", sep=""), "\n") + + list(x=x, ensemble=ensemble, xx=xx, z=z, dv=dv, dvtrim=dvtrim, xmin=xmin, + xmax=xmax, desintxb=desintxb, ordxx=ordxx, kappa = kappa, elaps=elapsr) +} + +meboot.part <- function(x, n, z, xmin, xmax, desintxb, reachbnd) +{ + # Generate random numbers from the [0,1] uniform interval + + p <- runif(n, min=0, max=1) + + # Compute sample quantiles by linear interpolation at + # those 'p'-s (if any) ... + + # ... 'p'-s within the (i1/n, (i1+1)/n)] interval (i1=1,...,n-2). + + q <- .C("mrapprox", p=as.double(p), n=as.integer(n), z=as.double(z), + desintxb=as.double(desintxb[-1]), ref23=double(n), qq=double(1), + q=double(n), PACKAGE="meboot")$q + + # ... 'p'-s within the [0, (1/n)] interval. (Left tail.) + + ref1 <- which(p <= (1/n)) + if(length(ref1) > 0){ + qq <- approx(c(0,1/n), c(xmin,z[1]), p[ref1])$y + q[ref1] <- qq + if(!reachbnd) q[ref1] <- qq + desintxb[1]-0.5*(z[1]+xmin) + } + + # ... 'p'-s equal to (n-1)/n. + + ref4 <- which(p == ((n-1)/n)) + if(length(ref4) > 0) + q[ref4] <- z[n-1] + + # ... 'p'-s greater than (n-1)/n. (Right tail.) + + ref5 <- which(p > ((n-1)/n)) + if(length(ref5) > 0){ + # Right tail proportion p[i] + qq <- approx(c((n-1)/n,1), c(z[n-1],xmax), p[ref5])$y + q[ref5] <- qq # this implicitly shifts xmax for algorithm + if(!reachbnd) q[ref5] <- qq + desintxb[n]-0.5*(z[n-1]+xmax) + # such that the algorithm gives xmax when p[i]=1 + # this is the meaning of reaching the bounds xmax and xmin + } + + q +} + +elapsedtime <- function(ptm1, ptm2) +{ + elaps <- (ptm2 - ptm1)[3] + + if(elaps < 60) + units <- "seconds" + else if(elaps < 3600){ + elaps <- elaps/60 + units <- "minutes" } + else if(elaps < 86400){ + elaps <- elaps/3600 + units <- "hours" } + else { elaps <- elaps/86400 + units <- "days" } + + list(elaps=as.numeric(elaps), units=units) +} diff --git a/Code_Python/mrapprox.c b/Code_Python/mrapprox.c new file mode 100644 index 0000000..aee066a --- /dev/null +++ b/Code_Python/mrapprox.c @@ -0,0 +1,40 @@ + +// q <- .C("mrapprox", p=as.double(p), n=as.integer(n), z=as.double(z), +// desintxb=as.double(desintxb[-1]), ref23=double(n), qq=double(1), +// q=double(n), PACKAGE="meboot")$q + +void mrapprox(double *p, int *n, double *z, double *desintxb, double *ref23, + double *qq, double *q) +{ + int i, j, ii1, k; + double i1, nn; + + nn = *n; + for(i=0; i < *n; i=i+1){ + q[i] = -99999; + ref23[i] = -99999; + } + + j = 0; + i1 = 1.0; + for(ii1=0; ii1 < *n-2; ii1=ii1+1){ + + j = 0; + for(i=0; i < *n; i=i+1){ + + if( p[i] > i1/nn && p[i] <= (i1+1)/nn ){ + ref23[j] = i; + j = j+1; + } + + } + + for(i=0; i < j; i=i+1){ + + k = ref23[i]; + qq[0] = z[ii1] + ( (z[ii1+1]- z[ii1]) / ((i1+1)/nn - i1/nn) ) * (p[k] - i1/nn); + q[k] = qq[0] + desintxb[ii1] - 0.5*(z[ii1] + z[ii1+1]); + } + i1 = i1+1; + } +} diff --git a/Code_Python/synthetic.py b/Code_Python/synthetic.py index a172e84..8cb6743 100644 --- a/Code_Python/synthetic.py +++ b/Code_Python/synthetic.py @@ -61,97 +61,101 @@ def scale_data(X, param=()): return Xs, param -def value2diff(X, mode=None): +def value2diff(X, percent=True): """ from value to difference in abs or % - diff in value first element is zero - diff in % first element is one + dx is reduced by 1 row """ - - # Difference in value - if (mode == 'V'): - dX = np.zeros_like(X) - dX[1:, :] = X[1:, :] - X[:-1, :] - # Difference in percent + if (percent): + dX = X[1:, :] / X[:-1, :] - 1.0 + + # Difference in value else: - dX = np.ones_like(X) - dX[1:, :] = X[1:, :] / X[:-1, :] - 1.0 + dX = X[1:, :] - X[:-1, :] return dX -def diff2value(dX, mode=None): +def diff2value(dX, percent=True): """ - from difference in abs or % to value (first row should be all zeros but - will be over-written + from difference in abs or % to value + X is increased by one row - Reference X[0,:] is assumed to be zero. If X0[0,:] is the desired - reference, the actual vector X can be determined as X0+X + Value from percent: first row set to one. If X0 defines the starting + values, then X0*X would be the ??? - Reference X[0,:] is assumed to be one. If X0[0,:] is the desired - reference, the actual vector X can be determined as X0*X + Value from difference: first row set to zero. If X0 defines the starting + values, then X0+X would be the ??? """ - # Value from the difference (first row equal to zero) - # X[0, :] = 0 - # X[1, :] = X[0, :] + dX[1, :] = dX[1, :] - # X[2, :] = X[0, :] + dX[1, :] + dX[2, :] = dX[1, :] + dX[2, :] - # .... - if (mode == 'V'): - X = np.zeros_like(dX) - X[1:, :] = np.cumsum(dX[1:, :], axis=0) - - # Value from percent (first row equal to 1) + n_rows, n_cols = dX.shape + X = np.zeros((n_rows+1, n_cols)) + + # Value from percent # X[0, :] = 1 # X[1, :] = X[0, :] * (1 + dX[1, :]) = (1 + dX[1, :]) # X[2, :] = X[1, :] * (1 + dX[2, :]) # = X[0, :] * (1 + dX[1, :]) * (1 + dX[2, :]) # = (1 + dX[1, :]) * (1 + dX[2, :]) # .... - else: - X = np.ones_like(dX) + if (percent): + X[0, :] = 1.0 X[1:, :] = np.cumprod((1.0 + dX), axis=0) + # Value from difference + # X[0, :] = 0 + # X[1, :] = X[0, :] + dX[1, :] = dX[1, :] + # X[2, :] = X[0, :] + dX[1, :] + dX[2, :] = dX[1, :] + dX[2, :] + # .... + else: + # First row already set to zero + X[1:, :] = np.cumsum(dX, axis=0) + return X -def synthetic_wave(per, amp=None, pha=None, num=1000): +def synthetic_wave(P, A=None, phi=None, num=1000): """ Generates a multi-sinewave. - P = [ P1 P2 ... Pn ] Periods - A = [ A1 A2 ... An ] Amplitudes - PH = [PH1 PH2 ... PHn] Phases (rad) + P = [P_1, P_2, ... P_n] Periods + A = [A_1, A_2, ... A_n] Amplitudes + phi = [phi_1, phi_2, ... phi_n] Phases (rad) Default amplitudes are ones Default phases are zeros Time is from 0 to largest period (default 1000 steps) """ - n_waves = len(per) - per = np.asarray(per) + n_waves = len(P) + P = np.asarray(P) - # Define amplitudes and phases - if (amp is None): - amp = np.ones(n_waves) + # Define amplitudes + if (A is None): + A = np.ones(n_waves) else: - amp = np.asarray(amp) - if (pha is None): - pha = np.zeros(n_waves) + A = np.asarray(A) + + # Define phases + if (phi is None): + phi = np.zeros(n_waves) else: - pha = np.asarray(pha) + phi = np.asarray(phi) # Add all the waves - t = np.linspace(0.0, np.amax(per), num=num) + t = np.linspace(0.0, np.amax(P), num=num) f = np.zeros(len(t)) for i in range(n_waves): - f = f + amp[i] * np.sin(2.0 * np.pi * t / per[i] + pha[i]) + f = f + A[i] * np.sin(2.0 * np.pi * t / P[i] + phi[i]) return t, f -def synthetic_series(X, multiv=False): +def synthetic_FFT(X, multiv=False): """ + - univariate and single time-series + - univariate and multi-time series (can be used to generate multi from same) + - multi-variate multi-time series """ - n_samples, n_series = data.shape + n_samples, n_series = X.shape # The number of samples must be odd (if the number is even drop the last value) if ((n_samples % 2) == 0): @@ -188,3 +192,69 @@ def synthetic_series(X, multiv=False): X_synt = np.real(np.fft.ifft(synt_fft, axis=0)) return X_synt + + +def synthetic_sampling(X, replace=True): + """ + generate more than n_samples? + """ + n_samples, n_series = X.shape + X_synt = np.zeros_like(X) + + # Sampling with replacement + if (replace): + idx = np.random.randint(0, n_samples, size=(n_samples, n_series)) + i = np.arange(n_series) + X_synt[:, i] = X[idx[:, i], i] + + # Sampling without replacement + else: + idx = np.zeros_like(X) + for j in range(n_series): + idx[:, j] = np.random.permutation(n_samples) + i = np.arange(n_series) + X_synt[:, i] = X[idx[:, i], i] + + return X_synt + + +def synthetic_MEboot(X, alpha=0.1): + """ + """ + n_samples, n_series = X.shape + X_synt = np.zeros_like(X) + + # Loop over time-series + n = n_samples + for ts in range(n_series): + + # Sort the time series keeping track of the original position + idx = np.argsort(X[:, ts]) + Y = X[idx, ts] + print(idx, idx.shape) + print(Y, Y.shape) + + # Compute the trimmed mean + g = int(np.floor(n * alpha)) + r = n * alpha - g + print(n, g, r) + m_trm = ((1.0 - r) * (Y[g] + Y[n-g-1]) + Y[g+1:n-g-1].sum()) \ + / (n * (1.0 - 2.0 * alpha)) + print(m_trm) + + # Compute the intermediate points + Z = np.zeros(n+1) + Z[1:-1] = (Y[0:-1] + Y[1:]) / 2.0 + Z[0] = Y[0] - m_trm + Z[n] = Y[n-1] + m_trm + print(Z, Z.shape) + + # Compute the interval means + mt = np.zeros(n) + mt[0] = 0.75 * Y[0] + 0.25 * Y[1] + mt[1:n-1] = 0.25 * Y[0:n-2] + 0.5 * Y[1:n-1] + 0.25 * Y[2:n] + mt[n-1] = 0.25 * Y[n-2] + 0.75 * Y[n-1] + print(mt) + + + diff --git a/Code_Python/test.py b/Code_Python/test.py index b1b889b..c3fdeb1 100644 --- a/Code_Python/test.py +++ b/Code_Python/test.py @@ -15,8 +15,6 @@ ToDo: - example using noisy multi-sine-waves - synt: boot, paper Vinod (as a class?) - vectors must be ( .., 1) -- reduce the vector diff by one and pass initial value - (with zero/one as default) """ import sys @@ -56,30 +54,48 @@ np.random.seed(1294404794) # signals = (spx.data[:, 0], Yc[:, 0], Yp[:, 0]) # utl.plot_signals(signals, 0, 50) -# t, f = utl.synthetic_wave([1., 2., 3.], A=None, PH=None, num=30) +# t, f = syn.synthetic_wave([1., 2., 3.], A=None, phi=None, num=100) # plt.plot(t,f) # plt.show() -aa = np.array([ - [ 0.8252, 0.2820], - [ 1.3790, 0.0335], - [-1.0582, -1.3337], - [-0.4686, 1.1275], - [-0.2725, 0.3502], - [ 1.0984, -0.2991], - [-0.2779, 0.0229], - [ 0.7015, -0.2620], - [-2.0518, -1.7502], - [-0.3538, -0.2857], - [-0.8236, -0.8314], - [-1.5771, -0.9792], - [ 0.5080, -1.1564]]) -# synt_aa = utl.synthetic_series(data, False) -# plt.plot(synt_aa) +# aa = np.array([ +# [ 0.8252, 0.2820], +# [ 1.3790, 0.0335], +# [-1.0582, -1.3337], +# [-0.4686, 1.1275], +# [-0.2725, 0.3502], +# [ 1.0984, -0.2991], +# [-0.2779, 0.0229], +# [ 0.7015, -0.2620], +# [-2.0518, -1.7502], +# [-0.3538, -0.2857], +# [-0.8236, -0.8314], +# [-1.5771, -0.9792], +# [ 0.5080, -1.1564]]) +# synt_data1 = syn.synthetic_FFT(data, False) +# synt_data2 = syn.synthetic_FFT(data, False) +# plt.plot(synt_data1) +# plt.plot(synt_data2) # plt.plot(data) +# names = ['syn1', 'syn2', 'spx'] +# plt.legend(names) # plt.show() -print(data[0:10, :]) -bb = syn.value2diff(data, mode='V') -print(bb[0:10, :]) -bb[0, 0] = 1399.48 -cc = syn.diff2value(bb, mode='V') -print(cc[0:10, :]) \ No newline at end of file +# percent = False +# print(data[0:10, :]) +# bb = syn.value2diff(data, percent) +# print(bb[0:10, :]) +# cc = syn.diff2value(bb, percent) +# print(cc[0:10, :]+1399.48) +# aa = np.arange(10,28).reshape(6,3) +# print(aa) +# idx = np.zeros_like(aa) +# bb = np.zeros_like(aa) +# for i in range(aa.shape[1]): +# idx[:, i] = np.random.permutation(aa.shape[0]) +# print(idx) +# i = np.arange(aa.shape[1]) +# bb[:, i] = aa[idx[:, i], i] +# bb = syn.synthetic_boot(aa, replace=False) +# print(bb) +aa = np.array([4, 12, 36, 20, 8]).reshape(5, 1) +# print(aa) +syn.synthetic_MEboot(aa) diff --git a/README.md b/README.md index 5435712..ecbbc45 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,8 @@ - D. Prichard, and J. Theiler, "[Generating surrogate data for time series with several simultaneously measured variables](https://journals.aps.org/prl/abstract/10.1103/PhysRevLett.73.951)." +- H. Vinod, and J. Lopez-de-Lacalle, "[Maximum entropy bootstrap for time series: the meboot R package](https://www.jstatsoft.org/article/view/v029i05)." + ## Characteristics ## Parameters