start meboot function

master
Gabriele Gilardi 2020-06-22 21:39:53 +09:00
rodzic f080286c3d
commit 9763bbb652
5 zmienionych plików z 446 dodań i 72 usunięć

Wyświetl plik

@ -0,0 +1,246 @@
#meboot <- function(x, reps=999, trim=0.10, reachbnd=TRUE,
# expand.sd=TRUE, force.clt=TRUE, elaps=FALSE,
# colsubj, coldata, coltimes, ...){
# UseMethod("meboot", x)
#}
meboot <- function(x, reps=999, trim=list(trim=0.10, xmin=NULL, xmax=NULL), reachbnd=TRUE,
expand.sd=TRUE, force.clt=TRUE,
scl.adjustment = FALSE, sym = FALSE, elaps=FALSE,
colsubj, coldata, coltimes,...)
{
if ("pdata.frame" %in% class(x))
{
res <- meboot.pdata.frame (x, reps, trim$trim, reachbnd,
expand.sd, force.clt, scl.adjustment, sym, elaps,
colsubj, coldata, coltimes, ...)
return(res)
}
if (reps == 1 && isTRUE(force.clt))
{
force.clt <- FALSE
warning("force.clt was set to FALSE since the ensemble contains only one replicate.")
}
if (!is.list(trim)) {
trimval <- trim
} else {
trimval <- if (is.null(trim$trim)) 0.1 else trim$trim
}
ptm1 <- proc.time()
n <- length(x)
# Sort the original data in increasing order and
# store the ordering index vector.
xx <- sort(x)
ordxx <- order(x)
#ordxx <- sort.int(x, index.return=TRUE)
# symmetry
if (sym)
{
xxr <- rev(xx) #reordered values
xx.sym <- mean(xx) + 0.5*(xx - xxr) #symmetrized order stats
xx <- xx.sym #replace order stats by symmetrized ones
}
# Compute intermediate points on the sorted series.
z <- rowMeans(embed(xx, 2))
# Compute lower limit for left tail ('xmin') and
# upper limit for right tail ('xmax').
# This is done by computing the 'trim' (e.g. 10%) trimmed mean
# of deviations among all consecutive observations ('dv').
# Thus the tails are uniform distributed.
dv <- abs(diff(as.numeric(x)))
dvtrim <- mean(dv, trim=trimval)
if (is.list(trim))
{
if (is.null(trim$xmin))
{
xmin <- xx[1] - dvtrim
} else
xmin <- trim$xmin
if (is.null(trim$xmax))
{
xmax <- xx[n] + dvtrim
} else
xmax <- trim$xmax
if (!is.null(trim$xmin) || !is.null(trim$xmax))
{
if (isTRUE(force.clt))
{
expand.sd <- FALSE
force.clt <- FALSE
warning("expand.sd and force.clt were set to FALSE in order to ",
"enforce the limits xmin/xmax.")
}
}
} else {
xmin <- xx[1] - dvtrim
xmax <- xx[n] + dvtrim
}
# do this here so that this warnings are printed after
# the above warnings (if necessary)
if (is.list(trim))
{
if (!is.null(trim$xmin) && trim$xmin > min(x))
warning("the lower limit trim$xmin may not be satisfied in the replicates ",
"since it is higher than the minimum value observed ",
"in the input series x")
if (!is.null(trim$xmax) && trim$xmax < max(x))
warning("the upper limit trim$xmax may not be satisfied in the replicates ",
"since it is lower than the maximum value observed ",
"in the input series x")
}
# Compute the mean of the maximum entropy density within each
# interval in such a way that the 'mean preserving constraint'
# is satisfied. (Denoted as m_t in the reference paper.)
# The first and last interval means have distinct formulas.
# See Theil and Laitinen (1980) for details.
aux <- colSums( t(embed(xx, 3))*c(0.25,0.5,0.25) )
desintxb <- c(0.75*xx[1]+0.25*xx[2], aux, 0.25*xx[n-1]+0.75*xx[n])
# Generate random numbers from the [0,1] uniform interval and
# compute sample quantiles at those points.
# Generate random numbers from the [0,1] uniform interval.
ensemble <- matrix(x, nrow=n, ncol=reps)
ensemble <- apply(ensemble, 2, meboot.part,
n, z, xmin, xmax, desintxb, reachbnd)
# So far the object 'ensemble' contains the quantiles.
# Now give them time series dependence and heterogeneity.
qseq <- apply(ensemble, 2, sort)
# 'qseq' has monotonic series, the correct series is obtained
# after applying the order according to 'ordxx' defined above.
ensemble[ordxx,] <- qseq
#ensemble[ordxx$ix,] <- qseq
if(expand.sd)
ensemble <- expand.sd(x=x, ensemble=ensemble, ...)
if(force.clt)
ensemble <- force.clt(x=x, ensemble=ensemble)
# scale adjustment
if (scl.adjustment)
{
zz <- c(xmin,z,xmax) #extended list of z values
#v <- rep(NA, n) #storing within variances
#for (i in 2:(n+1)) {
# v[i-1] <- ((zz[i] - zz[i-1])^2) / 12
#}
v <- diff(zz^2) / 12
xb <- mean(x)
s1 <- sum((desintxb - xb)^2)
uv <- (s1 + sum(v)) / n
desired.sd <- sd(x)
actualME.sd <- sqrt(uv)
if (actualME.sd <= 0)
stop("actualME.sd<=0 Error")
out <- desired.sd / actualME.sd
kappa <- out - 1
ensemble <- ensemble + kappa * (ensemble - xb)
} else
kappa <- NULL
#ensemble <- cbind(x, ensemble)
if(is.ts(x)){
ensemble <- ts(ensemble, frequency=frequency(x), start=start(x))
dimnames(ensemble)[[2]] <- paste("Series", 1:reps)
#dimnames(ensemble)[[2]] <- c("original", paste("Series", 1:reps))
}
# Computation time
ptm2 <- proc.time(); elapsr <- elapsedtime(ptm1, ptm2)
if(elaps)
cat("\n Elapsed time:", elapsr$elaps,
paste(elapsr$units, ".", sep=""), "\n")
list(x=x, ensemble=ensemble, xx=xx, z=z, dv=dv, dvtrim=dvtrim, xmin=xmin,
xmax=xmax, desintxb=desintxb, ordxx=ordxx, kappa = kappa, elaps=elapsr)
}
meboot.part <- function(x, n, z, xmin, xmax, desintxb, reachbnd)
{
# Generate random numbers from the [0,1] uniform interval
p <- runif(n, min=0, max=1)
# Compute sample quantiles by linear interpolation at
# those 'p'-s (if any) ...
# ... 'p'-s within the (i1/n, (i1+1)/n)] interval (i1=1,...,n-2).
q <- .C("mrapprox", p=as.double(p), n=as.integer(n), z=as.double(z),
desintxb=as.double(desintxb[-1]), ref23=double(n), qq=double(1),
q=double(n), PACKAGE="meboot")$q
# ... 'p'-s within the [0, (1/n)] interval. (Left tail.)
ref1 <- which(p <= (1/n))
if(length(ref1) > 0){
qq <- approx(c(0,1/n), c(xmin,z[1]), p[ref1])$y
q[ref1] <- qq
if(!reachbnd) q[ref1] <- qq + desintxb[1]-0.5*(z[1]+xmin)
}
# ... 'p'-s equal to (n-1)/n.
ref4 <- which(p == ((n-1)/n))
if(length(ref4) > 0)
q[ref4] <- z[n-1]
# ... 'p'-s greater than (n-1)/n. (Right tail.)
ref5 <- which(p > ((n-1)/n))
if(length(ref5) > 0){
# Right tail proportion p[i]
qq <- approx(c((n-1)/n,1), c(z[n-1],xmax), p[ref5])$y
q[ref5] <- qq # this implicitly shifts xmax for algorithm
if(!reachbnd) q[ref5] <- qq + desintxb[n]-0.5*(z[n-1]+xmax)
# such that the algorithm gives xmax when p[i]=1
# this is the meaning of reaching the bounds xmax and xmin
}
q
}
elapsedtime <- function(ptm1, ptm2)
{
elaps <- (ptm2 - ptm1)[3]
if(elaps < 60)
units <- "seconds"
else if(elaps < 3600){
elaps <- elaps/60
units <- "minutes" }
else if(elaps < 86400){
elaps <- elaps/3600
units <- "hours" }
else { elaps <- elaps/86400
units <- "days" }
list(elaps=as.numeric(elaps), units=units)
}

Wyświetl plik

@ -0,0 +1,40 @@
// q <- .C("mrapprox", p=as.double(p), n=as.integer(n), z=as.double(z),
// desintxb=as.double(desintxb[-1]), ref23=double(n), qq=double(1),
// q=double(n), PACKAGE="meboot")$q
void mrapprox(double *p, int *n, double *z, double *desintxb, double *ref23,
double *qq, double *q)
{
int i, j, ii1, k;
double i1, nn;
nn = *n;
for(i=0; i < *n; i=i+1){
q[i] = -99999;
ref23[i] = -99999;
}
j = 0;
i1 = 1.0;
for(ii1=0; ii1 < *n-2; ii1=ii1+1){
j = 0;
for(i=0; i < *n; i=i+1){
if( p[i] > i1/nn && p[i] <= (i1+1)/nn ){
ref23[j] = i;
j = j+1;
}
}
for(i=0; i < j; i=i+1){
k = ref23[i];
qq[0] = z[ii1] + ( (z[ii1+1]- z[ii1]) / ((i1+1)/nn - i1/nn) ) * (p[k] - i1/nn);
q[k] = qq[0] + desintxb[ii1] - 0.5*(z[ii1] + z[ii1+1]);
}
i1 = i1+1;
}
}

Wyświetl plik

@ -61,97 +61,101 @@ def scale_data(X, param=()):
return Xs, param
def value2diff(X, mode=None):
def value2diff(X, percent=True):
"""
from value to difference in abs or %
diff in value first element is zero
diff in % first element is one
dx is reduced by 1 row
"""
# Difference in value
if (mode == 'V'):
dX = np.zeros_like(X)
dX[1:, :] = X[1:, :] - X[:-1, :]
# Difference in percent
if (percent):
dX = X[1:, :] / X[:-1, :] - 1.0
# Difference in value
else:
dX = np.ones_like(X)
dX[1:, :] = X[1:, :] / X[:-1, :] - 1.0
dX = X[1:, :] - X[:-1, :]
return dX
def diff2value(dX, mode=None):
def diff2value(dX, percent=True):
"""
from difference in abs or % to value (first row should be all zeros but
will be over-written
from difference in abs or % to value
X is increased by one row
Reference X[0,:] is assumed to be zero. If X0[0,:] is the desired
reference, the actual vector X can be determined as X0+X
Value from percent: first row set to one. If X0 defines the starting
values, then X0*X would be the ???
Reference X[0,:] is assumed to be one. If X0[0,:] is the desired
reference, the actual vector X can be determined as X0*X
Value from difference: first row set to zero. If X0 defines the starting
values, then X0+X would be the ???
"""
# Value from the difference (first row equal to zero)
# X[0, :] = 0
# X[1, :] = X[0, :] + dX[1, :] = dX[1, :]
# X[2, :] = X[0, :] + dX[1, :] + dX[2, :] = dX[1, :] + dX[2, :]
# ....
if (mode == 'V'):
X = np.zeros_like(dX)
X[1:, :] = np.cumsum(dX[1:, :], axis=0)
# Value from percent (first row equal to 1)
n_rows, n_cols = dX.shape
X = np.zeros((n_rows+1, n_cols))
# Value from percent
# X[0, :] = 1
# X[1, :] = X[0, :] * (1 + dX[1, :]) = (1 + dX[1, :])
# X[2, :] = X[1, :] * (1 + dX[2, :])
# = X[0, :] * (1 + dX[1, :]) * (1 + dX[2, :])
# = (1 + dX[1, :]) * (1 + dX[2, :])
# ....
else:
X = np.ones_like(dX)
if (percent):
X[0, :] = 1.0
X[1:, :] = np.cumprod((1.0 + dX), axis=0)
# Value from difference
# X[0, :] = 0
# X[1, :] = X[0, :] + dX[1, :] = dX[1, :]
# X[2, :] = X[0, :] + dX[1, :] + dX[2, :] = dX[1, :] + dX[2, :]
# ....
else:
# First row already set to zero
X[1:, :] = np.cumsum(dX, axis=0)
return X
def synthetic_wave(per, amp=None, pha=None, num=1000):
def synthetic_wave(P, A=None, phi=None, num=1000):
"""
Generates a multi-sinewave.
P = [ P1 P2 ... Pn ] Periods
A = [ A1 A2 ... An ] Amplitudes
PH = [PH1 PH2 ... PHn] Phases (rad)
P = [P_1, P_2, ... P_n] Periods
A = [A_1, A_2, ... A_n] Amplitudes
phi = [phi_1, phi_2, ... phi_n] Phases (rad)
Default amplitudes are ones
Default phases are zeros
Time is from 0 to largest period (default 1000 steps)
"""
n_waves = len(per)
per = np.asarray(per)
n_waves = len(P)
P = np.asarray(P)
# Define amplitudes and phases
if (amp is None):
amp = np.ones(n_waves)
# Define amplitudes
if (A is None):
A = np.ones(n_waves)
else:
amp = np.asarray(amp)
if (pha is None):
pha = np.zeros(n_waves)
A = np.asarray(A)
# Define phases
if (phi is None):
phi = np.zeros(n_waves)
else:
pha = np.asarray(pha)
phi = np.asarray(phi)
# Add all the waves
t = np.linspace(0.0, np.amax(per), num=num)
t = np.linspace(0.0, np.amax(P), num=num)
f = np.zeros(len(t))
for i in range(n_waves):
f = f + amp[i] * np.sin(2.0 * np.pi * t / per[i] + pha[i])
f = f + A[i] * np.sin(2.0 * np.pi * t / P[i] + phi[i])
return t, f
def synthetic_series(X, multiv=False):
def synthetic_FFT(X, multiv=False):
"""
- univariate and single time-series
- univariate and multi-time series (can be used to generate multi from same)
- multi-variate multi-time series
"""
n_samples, n_series = data.shape
n_samples, n_series = X.shape
# The number of samples must be odd (if the number is even drop the last value)
if ((n_samples % 2) == 0):
@ -188,3 +192,69 @@ def synthetic_series(X, multiv=False):
X_synt = np.real(np.fft.ifft(synt_fft, axis=0))
return X_synt
def synthetic_sampling(X, replace=True):
"""
generate more than n_samples?
"""
n_samples, n_series = X.shape
X_synt = np.zeros_like(X)
# Sampling with replacement
if (replace):
idx = np.random.randint(0, n_samples, size=(n_samples, n_series))
i = np.arange(n_series)
X_synt[:, i] = X[idx[:, i], i]
# Sampling without replacement
else:
idx = np.zeros_like(X)
for j in range(n_series):
idx[:, j] = np.random.permutation(n_samples)
i = np.arange(n_series)
X_synt[:, i] = X[idx[:, i], i]
return X_synt
def synthetic_MEboot(X, alpha=0.1):
"""
"""
n_samples, n_series = X.shape
X_synt = np.zeros_like(X)
# Loop over time-series
n = n_samples
for ts in range(n_series):
# Sort the time series keeping track of the original position
idx = np.argsort(X[:, ts])
Y = X[idx, ts]
print(idx, idx.shape)
print(Y, Y.shape)
# Compute the trimmed mean
g = int(np.floor(n * alpha))
r = n * alpha - g
print(n, g, r)
m_trm = ((1.0 - r) * (Y[g] + Y[n-g-1]) + Y[g+1:n-g-1].sum()) \
/ (n * (1.0 - 2.0 * alpha))
print(m_trm)
# Compute the intermediate points
Z = np.zeros(n+1)
Z[1:-1] = (Y[0:-1] + Y[1:]) / 2.0
Z[0] = Y[0] - m_trm
Z[n] = Y[n-1] + m_trm
print(Z, Z.shape)
# Compute the interval means
mt = np.zeros(n)
mt[0] = 0.75 * Y[0] + 0.25 * Y[1]
mt[1:n-1] = 0.25 * Y[0:n-2] + 0.5 * Y[1:n-1] + 0.25 * Y[2:n]
mt[n-1] = 0.25 * Y[n-2] + 0.75 * Y[n-1]
print(mt)

Wyświetl plik

@ -15,8 +15,6 @@ ToDo:
- example using noisy multi-sine-waves
- synt: boot, paper Vinod (as a class?)
- vectors must be ( .., 1)
- reduce the vector diff by one and pass initial value
(with zero/one as default)
"""
import sys
@ -56,30 +54,48 @@ np.random.seed(1294404794)
# signals = (spx.data[:, 0], Yc[:, 0], Yp[:, 0])
# utl.plot_signals(signals, 0, 50)
# t, f = utl.synthetic_wave([1., 2., 3.], A=None, PH=None, num=30)
# t, f = syn.synthetic_wave([1., 2., 3.], A=None, phi=None, num=100)
# plt.plot(t,f)
# plt.show()
aa = np.array([
[ 0.8252, 0.2820],
[ 1.3790, 0.0335],
[-1.0582, -1.3337],
[-0.4686, 1.1275],
[-0.2725, 0.3502],
[ 1.0984, -0.2991],
[-0.2779, 0.0229],
[ 0.7015, -0.2620],
[-2.0518, -1.7502],
[-0.3538, -0.2857],
[-0.8236, -0.8314],
[-1.5771, -0.9792],
[ 0.5080, -1.1564]])
# synt_aa = utl.synthetic_series(data, False)
# plt.plot(synt_aa)
# aa = np.array([
# [ 0.8252, 0.2820],
# [ 1.3790, 0.0335],
# [-1.0582, -1.3337],
# [-0.4686, 1.1275],
# [-0.2725, 0.3502],
# [ 1.0984, -0.2991],
# [-0.2779, 0.0229],
# [ 0.7015, -0.2620],
# [-2.0518, -1.7502],
# [-0.3538, -0.2857],
# [-0.8236, -0.8314],
# [-1.5771, -0.9792],
# [ 0.5080, -1.1564]])
# synt_data1 = syn.synthetic_FFT(data, False)
# synt_data2 = syn.synthetic_FFT(data, False)
# plt.plot(synt_data1)
# plt.plot(synt_data2)
# plt.plot(data)
# names = ['syn1', 'syn2', 'spx']
# plt.legend(names)
# plt.show()
print(data[0:10, :])
bb = syn.value2diff(data, mode='V')
print(bb[0:10, :])
bb[0, 0] = 1399.48
cc = syn.diff2value(bb, mode='V')
print(cc[0:10, :])
# percent = False
# print(data[0:10, :])
# bb = syn.value2diff(data, percent)
# print(bb[0:10, :])
# cc = syn.diff2value(bb, percent)
# print(cc[0:10, :]+1399.48)
# aa = np.arange(10,28).reshape(6,3)
# print(aa)
# idx = np.zeros_like(aa)
# bb = np.zeros_like(aa)
# for i in range(aa.shape[1]):
# idx[:, i] = np.random.permutation(aa.shape[0])
# print(idx)
# i = np.arange(aa.shape[1])
# bb[:, i] = aa[idx[:, i], i]
# bb = syn.synthetic_boot(aa, replace=False)
# print(bb)
aa = np.array([4, 12, 36, 20, 8]).reshape(5, 1)
# print(aa)
syn.synthetic_MEboot(aa)

Wyświetl plik

@ -8,6 +8,8 @@
- D. Prichard, and J. Theiler, "[Generating surrogate data for time series with several simultaneously measured variables](https://journals.aps.org/prl/abstract/10.1103/PhysRevLett.73.951)."
- H. Vinod, and J. Lopez-de-Lacalle, "[Maximum entropy bootstrap for time series: the meboot R package](https://www.jstatsoft.org/article/view/v029i05)."
## Characteristics
## Parameters