work on function meboot

master
Gabriele Gilardi 2020-06-23 20:21:52 +09:00
rodzic 9763bbb652
commit 1f9b4f9489
6 zmienionych plików z 348 dodań i 298 usunięć

Wyświetl plik

@ -0,0 +1,207 @@
import numpy as np
import scipy.stats as st
class MEBOOT:
def __init__(self,x,trimval=0.1,seed=None):
'''
x: multivariate-time series N x T
trimval: trim value (default 0.1)
'''
self.sd = np.random.RandomState(seed)
m,n = x.shape
self.meanx = x.mean(axis=1)
self.sdx = x.std(axis=1)
self.ordxx = np.argsort(x,axis=1)
xx = x.ravel()[self.ordxx.ravel()].reshape(x.shape)
self.z = 0.5*(xx[:,1:]+xx[:,:-1])
dv = abs(np.diff(x,axis=1))
dvtrim = st.trim_mean(dv,trimval,axis=1)
self.xmin = xx[:, 0]- dvtrim
self.xmax = xx[:,-1]+ dvtrim
tmp = np.array([[0.25]*(n-2)+[0.5]*(n-2)+[0.25]*(n-2)])
cmd = (np.column_stack((xx[:,:n-2],xx[:,1:n-1],xx[:,2:n])) * tmp)
aux = np.array([cmd[:,i::n-2].sum(axis=1) for i in range(n-2)]).T
self.desintxb = np.column_stack((0.75 * xx[:,:1] + 0.25 * xx[:,1:2], aux, 0.25 * xx[:,-2:-1] + 0.75 * xx[:,-1:]))
def _mrapproxpy(self,p,z,desintxb):
m,n = p.shape
q = -np.inf*np.ones((n)*m)
a = (p//(1/n)-1).astype(int)
hs = np.arange(n-2)
dz = np.column_stack(([-np.inf]*m,np.diff(z,axis=1)*n,[0]*m)).ravel()
sz = np.column_stack(([0]*m,(0.5*(z[:,hs+1]+z[:,hs]))[:,hs],[0]*m)).ravel()
zt = np.column_stack(([-np.inf]*m,z[:,hs],[-np.inf]*m)).ravel()
dh = np.column_stack(([-np.inf]*m,desintxb[:,hs],[0]*m)).ravel()
plus = (n*np.arange(m))[np.newaxis].T
jx = (np.tile(range(n),(m,1))+plus).ravel()
ixo = a+1
ix = (ixo+plus).ravel()
tmp = zt[ix]+dh[ix]- sz[ix]
q[jx] = dz[ix]*(p.ravel()[jx]-(ixo.ravel())/n)+tmp
return q.reshape((m,n))
def _expandSD(self,bt,fiv):
obt = len(bt.shape)
if obt==2:
bt = bt[np.newaxis]
sd = self.sdx
bt = np.swapaxes(bt,0,1)
sdf = np.column_stack((sd,bt.std(axis=2)))
sdfa = sdf/sdf[:,:1]
sdfd = sdf[:,:1]/sdf
mx = 1+(fiv/100)
idx = np.where(sdfa<1)
sdfa[idx] = np.random.uniform(1,mx,size=len(idx[0]))
sdfdXsdfa = sdfd[:,1:]*sdfa[:,1:]
bt *= np.moveaxis(sdfdXsdfa[np.newaxis],0,-1)
bt = np.swapaxes(bt,0,1)
if obt==2:
bt = bt[0]
return bt
def _adjust(self,bt):
zz = np.column_stack((self.xmin[np.newaxis].T,self.z,self.xmax[np.newaxis].T))
v = np.diff(zz**2,axis=1)/12
xb = self.meanx[np.newaxis].T
s1 = ((self.desintxb - xb)**2).sum(axis=1)
act_sd = np.sqrt( (s1+v.sum(axis=1))/(self.z.shape[1]+1) )
des_sd = self.sdx
kappa =( des_sd/ act_sd -1)[np.newaxis].T
bt = bt + kappa* (bt - xb)
return bt
def bootstrap(self,fiv=5,adjust_sd=True):
'''
Single realization of ME Bootstrap for the multivariate time series.
fiv: Increment standard deviation (default fiv=5 %)
adjust_sd: Fix the standard deviation from the observation.
'''
m,n = self.z.shape
n+=1
p = self.sd.uniform(0,1,size=(m,n))
q = self._mrapproxpy(p,self.z,self.desintxb[:,1:])
f_low = np.column_stack((self.xmin[np.newaxis].T,self.z[:,0]))
f_hi = np.column_stack((self.z[:,-1],self.xmax[np.newaxis].T))
low = p<1/n
hi = p>(n-1)/n
for i in range(m):
q[i][low[i]] = np.interp(p[i][low[i]],[0,1/n],f_low[i])
q[i][hi[i]] = np.interp(p[i][hi[i]],[(n - 1)/n,1],f_hi[i])
qseq = np.sort(q[i])
q[i][self.ordxx[i]] = qseq
if fiv!=None:
q = self._expandSD(q,fiv)
if adjust_sd==True:
q = self._adjust(q)
return q
def bootstrap_clt(self,nt,fiv=5,adjust_sd=True):
'''
Multiple ME boostrap copies.
Force the central limit theorem. Warning it requires to compute all
bootstrap copies at once, so it could require a lot of memory.
nt: number of bootstrap copies
fiv: Increment standard deviation (default fiv=5 %)
adjust_sd: Fix the standard deviation from the observation.
'''
bt = np.array([self.bootstrap(fiv=None) for i in range(nt)])
if fiv!=None:
bt = self._expandSD(bt,fiv)
bt = np.swapaxes(bt,0,1)
N,nt,T = bt.shape
gm = self.meanx
s = self.sdx
smean = s/ np.sqrt(nt)
xbar = bt.mean(axis=2)
sortxbar = np.sort(xbar,axis=1)
oo = np.argsort(xbar,axis=1)
newbar = gm[np.newaxis].T + st.norm.ppf((np.arange(1,nt+1)/(nt+1))[np.newaxis])* smean[np.newaxis].T
scn = st.zscore(newbar,axis=1)
newm = scn*smean[np.newaxis].T+gm[np.newaxis].T
meanfix = newm- sortxbar
oinv = np.array([np.array(sorted(zip(oo[i],range(len(oo[i])))))[:,1] for i in range(len(oo))])
out = np.array([(bt[i][oo[i]]+meanfix[i][np.newaxis].T)[oinv[i]] for i in range(bt.shape[0])])
out = np.swapaxes(out,0,1)
if adjust_sd==True:
out = self._adjust(out)
return out

Wyświetl plik

@ -0,0 +1,102 @@
"""
MEBOOT.PY - Python package for the meboot (Maximum Entropy Bootstrap) algorithm for Time Series
Author: Fabian Brix
Method by H.D. Vinod, Fordham University -
"""
import sys
import numpy as np
import matplotlib.pyplot as plt
def sort(series):
ind_sorted = np.argsort(series)
s_sorted = series[ind_sorted]
return s_sorted, ind_sorted
def get_trm_mean(series, percent):
# FIXED
dev = np.abs(series[1:]-series[:-1])
n = len(dev)
k = n*(percent/100.0)/2.0
k = round(k,0)
# return np.mean(dev[k:n-k])
return 15.0
def get_intermed_pts(series, s_sorted, percent):
zt = (s_sorted[:-1]+s_sorted[1:])/2.0
m_trm = get_trm_mean(series, percent)
print(m_trm)
z0 = s_sorted[0]-m_trm
zT = s_sorted[-1]+m_trm
z = np.hstack((z0,zt,zT))
return z
def get_intervals(z):
return np.vstack((z[:-1], z[1:])).T
def get_me_density(intervals):
return 1.0/(intervals[:,1]-intervals[:,0])
def get_cpf(me_density, intervals):
cpf = np.array([sum(me_density[:i+1]) for i in range(me_density.shape[0]-1)])
print(cpf)
return cpf/np.max(cpf)
def get_quantiles(cpf, intervals, series):
quantiles = []
T = float(len(series))
t = np.arange(T+1)
Rt = np.vstack((t[:-1]/T,t[1:]/T)).T
# print(Rt)
aaa = np.array([0.12, 0.83, 0.53, 0.59, 0.11])
for d in range(series.shape[0]):
# u = np.random.uniform(0,1)
u = aaa[d]
# print(d, u)
# u = aaa[d]
for i in range(cpf.shape[0]):
cp = cpf[i]
if u <= cp:
cpm = cpf[i-1]
if i == 0:
cpm = 0
m = (cp-cpm)/1.0*(intervals[i,1]-intervals[i,0])
xp = (u - cpm)*1.0/m+intervals[i,0]
quantiles.append(xp)
break
return np.array(quantiles)
def meboot(series, replicates):
# ASC by default
print(series)
np.random.seed(0)
s_sorted, ind_sorted = sort(series)
z = get_intermed_pts(series, s_sorted, 10)
print('z ', z)
intervals = get_intervals(z)
print('intervals ', intervals)
me_density = get_me_density(intervals)
print('uni dens ', me_density)
cpf = get_cpf(me_density, intervals)
print('cpf ', cpf)
quantiles = get_quantiles(cpf, intervals, series)
print('quantiles ', quantiles)
quantiles = np.sort(quantiles)
replicate = quantiles[ind_sorted]
print('replicate ', replicate)
# TODO: Undertand and add repeat mechanism
# plt.plot(series, color='r')
# plt.plot(replicate, color='b')
# plt.ylim(0,30)
# plt.show()
series = np.array([4,12,36,20,8])
meboot(series, 1)

Wyświetl plik

@ -1,246 +0,0 @@
#meboot <- function(x, reps=999, trim=0.10, reachbnd=TRUE,
# expand.sd=TRUE, force.clt=TRUE, elaps=FALSE,
# colsubj, coldata, coltimes, ...){
# UseMethod("meboot", x)
#}
meboot <- function(x, reps=999, trim=list(trim=0.10, xmin=NULL, xmax=NULL), reachbnd=TRUE,
expand.sd=TRUE, force.clt=TRUE,
scl.adjustment = FALSE, sym = FALSE, elaps=FALSE,
colsubj, coldata, coltimes,...)
{
if ("pdata.frame" %in% class(x))
{
res <- meboot.pdata.frame (x, reps, trim$trim, reachbnd,
expand.sd, force.clt, scl.adjustment, sym, elaps,
colsubj, coldata, coltimes, ...)
return(res)
}
if (reps == 1 && isTRUE(force.clt))
{
force.clt <- FALSE
warning("force.clt was set to FALSE since the ensemble contains only one replicate.")
}
if (!is.list(trim)) {
trimval <- trim
} else {
trimval <- if (is.null(trim$trim)) 0.1 else trim$trim
}
ptm1 <- proc.time()
n <- length(x)
# Sort the original data in increasing order and
# store the ordering index vector.
xx <- sort(x)
ordxx <- order(x)
#ordxx <- sort.int(x, index.return=TRUE)
# symmetry
if (sym)
{
xxr <- rev(xx) #reordered values
xx.sym <- mean(xx) + 0.5*(xx - xxr) #symmetrized order stats
xx <- xx.sym #replace order stats by symmetrized ones
}
# Compute intermediate points on the sorted series.
z <- rowMeans(embed(xx, 2))
# Compute lower limit for left tail ('xmin') and
# upper limit for right tail ('xmax').
# This is done by computing the 'trim' (e.g. 10%) trimmed mean
# of deviations among all consecutive observations ('dv').
# Thus the tails are uniform distributed.
dv <- abs(diff(as.numeric(x)))
dvtrim <- mean(dv, trim=trimval)
if (is.list(trim))
{
if (is.null(trim$xmin))
{
xmin <- xx[1] - dvtrim
} else
xmin <- trim$xmin
if (is.null(trim$xmax))
{
xmax <- xx[n] + dvtrim
} else
xmax <- trim$xmax
if (!is.null(trim$xmin) || !is.null(trim$xmax))
{
if (isTRUE(force.clt))
{
expand.sd <- FALSE
force.clt <- FALSE
warning("expand.sd and force.clt were set to FALSE in order to ",
"enforce the limits xmin/xmax.")
}
}
} else {
xmin <- xx[1] - dvtrim
xmax <- xx[n] + dvtrim
}
# do this here so that this warnings are printed after
# the above warnings (if necessary)
if (is.list(trim))
{
if (!is.null(trim$xmin) && trim$xmin > min(x))
warning("the lower limit trim$xmin may not be satisfied in the replicates ",
"since it is higher than the minimum value observed ",
"in the input series x")
if (!is.null(trim$xmax) && trim$xmax < max(x))
warning("the upper limit trim$xmax may not be satisfied in the replicates ",
"since it is lower than the maximum value observed ",
"in the input series x")
}
# Compute the mean of the maximum entropy density within each
# interval in such a way that the 'mean preserving constraint'
# is satisfied. (Denoted as m_t in the reference paper.)
# The first and last interval means have distinct formulas.
# See Theil and Laitinen (1980) for details.
aux <- colSums( t(embed(xx, 3))*c(0.25,0.5,0.25) )
desintxb <- c(0.75*xx[1]+0.25*xx[2], aux, 0.25*xx[n-1]+0.75*xx[n])
# Generate random numbers from the [0,1] uniform interval and
# compute sample quantiles at those points.
# Generate random numbers from the [0,1] uniform interval.
ensemble <- matrix(x, nrow=n, ncol=reps)
ensemble <- apply(ensemble, 2, meboot.part,
n, z, xmin, xmax, desintxb, reachbnd)
# So far the object 'ensemble' contains the quantiles.
# Now give them time series dependence and heterogeneity.
qseq <- apply(ensemble, 2, sort)
# 'qseq' has monotonic series, the correct series is obtained
# after applying the order according to 'ordxx' defined above.
ensemble[ordxx,] <- qseq
#ensemble[ordxx$ix,] <- qseq
if(expand.sd)
ensemble <- expand.sd(x=x, ensemble=ensemble, ...)
if(force.clt)
ensemble <- force.clt(x=x, ensemble=ensemble)
# scale adjustment
if (scl.adjustment)
{
zz <- c(xmin,z,xmax) #extended list of z values
#v <- rep(NA, n) #storing within variances
#for (i in 2:(n+1)) {
# v[i-1] <- ((zz[i] - zz[i-1])^2) / 12
#}
v <- diff(zz^2) / 12
xb <- mean(x)
s1 <- sum((desintxb - xb)^2)
uv <- (s1 + sum(v)) / n
desired.sd <- sd(x)
actualME.sd <- sqrt(uv)
if (actualME.sd <= 0)
stop("actualME.sd<=0 Error")
out <- desired.sd / actualME.sd
kappa <- out - 1
ensemble <- ensemble + kappa * (ensemble - xb)
} else
kappa <- NULL
#ensemble <- cbind(x, ensemble)
if(is.ts(x)){
ensemble <- ts(ensemble, frequency=frequency(x), start=start(x))
dimnames(ensemble)[[2]] <- paste("Series", 1:reps)
#dimnames(ensemble)[[2]] <- c("original", paste("Series", 1:reps))
}
# Computation time
ptm2 <- proc.time(); elapsr <- elapsedtime(ptm1, ptm2)
if(elaps)
cat("\n Elapsed time:", elapsr$elaps,
paste(elapsr$units, ".", sep=""), "\n")
list(x=x, ensemble=ensemble, xx=xx, z=z, dv=dv, dvtrim=dvtrim, xmin=xmin,
xmax=xmax, desintxb=desintxb, ordxx=ordxx, kappa = kappa, elaps=elapsr)
}
meboot.part <- function(x, n, z, xmin, xmax, desintxb, reachbnd)
{
# Generate random numbers from the [0,1] uniform interval
p <- runif(n, min=0, max=1)
# Compute sample quantiles by linear interpolation at
# those 'p'-s (if any) ...
# ... 'p'-s within the (i1/n, (i1+1)/n)] interval (i1=1,...,n-2).
q <- .C("mrapprox", p=as.double(p), n=as.integer(n), z=as.double(z),
desintxb=as.double(desintxb[-1]), ref23=double(n), qq=double(1),
q=double(n), PACKAGE="meboot")$q
# ... 'p'-s within the [0, (1/n)] interval. (Left tail.)
ref1 <- which(p <= (1/n))
if(length(ref1) > 0){
qq <- approx(c(0,1/n), c(xmin,z[1]), p[ref1])$y
q[ref1] <- qq
if(!reachbnd) q[ref1] <- qq + desintxb[1]-0.5*(z[1]+xmin)
}
# ... 'p'-s equal to (n-1)/n.
ref4 <- which(p == ((n-1)/n))
if(length(ref4) > 0)
q[ref4] <- z[n-1]
# ... 'p'-s greater than (n-1)/n. (Right tail.)
ref5 <- which(p > ((n-1)/n))
if(length(ref5) > 0){
# Right tail proportion p[i]
qq <- approx(c((n-1)/n,1), c(z[n-1],xmax), p[ref5])$y
q[ref5] <- qq # this implicitly shifts xmax for algorithm
if(!reachbnd) q[ref5] <- qq + desintxb[n]-0.5*(z[n-1]+xmax)
# such that the algorithm gives xmax when p[i]=1
# this is the meaning of reaching the bounds xmax and xmin
}
q
}
elapsedtime <- function(ptm1, ptm2)
{
elaps <- (ptm2 - ptm1)[3]
if(elaps < 60)
units <- "seconds"
else if(elaps < 3600){
elaps <- elaps/60
units <- "minutes" }
else if(elaps < 86400){
elaps <- elaps/3600
units <- "hours" }
else { elaps <- elaps/86400
units <- "days" }
list(elaps=as.numeric(elaps), units=units)
}

Wyświetl plik

@ -1,40 +0,0 @@
// q <- .C("mrapprox", p=as.double(p), n=as.integer(n), z=as.double(z),
// desintxb=as.double(desintxb[-1]), ref23=double(n), qq=double(1),
// q=double(n), PACKAGE="meboot")$q
void mrapprox(double *p, int *n, double *z, double *desintxb, double *ref23,
double *qq, double *q)
{
int i, j, ii1, k;
double i1, nn;
nn = *n;
for(i=0; i < *n; i=i+1){
q[i] = -99999;
ref23[i] = -99999;
}
j = 0;
i1 = 1.0;
for(ii1=0; ii1 < *n-2; ii1=ii1+1){
j = 0;
for(i=0; i < *n; i=i+1){
if( p[i] > i1/nn && p[i] <= (i1+1)/nn ){
ref23[j] = i;
j = j+1;
}
}
for(i=0; i < j; i=i+1){
k = ref23[i];
qq[0] = z[ii1] + ( (z[ii1+1]- z[ii1]) / ((i1+1)/nn - i1/nn) ) * (p[k] - i1/nn);
q[k] = qq[0] + desintxb[ii1] - 0.5*(z[ii1] + z[ii1+1]);
}
i1 = i1+1;
}
}

Wyświetl plik

@ -154,6 +154,10 @@ def synthetic_FFT(X, multiv=False):
- univariate and single time-series
- univariate and multi-time series (can be used to generate multi from same)
- multi-variate multi-time series
generate more than n_samples, remome multi-time series for uni-variate
use 3d matrix looping on the time-series for multi-variate
"""
n_samples, n_series = X.shape
@ -196,7 +200,7 @@ def synthetic_FFT(X, multiv=False):
def synthetic_sampling(X, replace=True):
"""
generate more than n_samples?
generate more than n_samples, remome multi-time series
"""
n_samples, n_series = X.shape
X_synt = np.zeros_like(X)
@ -231,30 +235,53 @@ def synthetic_MEboot(X, alpha=0.1):
# Sort the time series keeping track of the original position
idx = np.argsort(X[:, ts])
Y = X[idx, ts]
print(idx, idx.shape)
print(Y, Y.shape)
# Compute the trimmed mean
# Compute the trimmed mean
# trimmed_mean = np.abs(np.diff(x)).mean()
g = int(np.floor(n * alpha))
r = n * alpha - g
print(n, g, r)
m_trm = ((1.0 - r) * (Y[g] + Y[n-g-1]) + Y[g+1:n-g-1].sum()) \
/ (n * (1.0 - 2.0 * alpha))
print(m_trm)
# Compute the intermediate points
Z = np.zeros(n+1)
Z[1:-1] = (Y[0:-1] + Y[1:]) / 2.0
Z[0] = Y[0] - m_trm
Z[1:-1] = (Y[0:-1] + Y[1:]) / 2.0
Z[n] = Y[n-1] + m_trm
print(Z, Z.shape)
# Compute the interval means
mt = np.zeros(n)
mt[0] = 0.75 * Y[0] + 0.25 * Y[1]
mt[1:n-1] = 0.25 * Y[0:n-2] + 0.5 * Y[1:n-1] + 0.25 * Y[2:n]
mt[n-1] = 0.25 * Y[n-2] + 0.75 * Y[n-1]
print(mt)
# Randomly generate new points
t_w = np.random.rand(n)
# t_w = np.array([0.12, 0.83, 0.53, 0.59, 0.11])
# order here???? and remove correction inside intervals???
# Interpolate new points
t = np.linspace(0.0, 1.0, num=n+1)
w_int = np.interp(t_w, t, Z)
print('w_int=', w_int)
# Correct the new points to satisfy mass constraint
idw = (np.floor_divide(t_w, 1.0 / n)).astype(int)
print('idw=', idw)
w_corr = w_int + mt[idw] - (Z[idw] + Z[idw+1]) / 2.0
print('w_corr', w_corr)
# Enforce limits
# w_corr = np.fmin(np.fmax(w_corr, Z[0]), Z[n])
# Re-order sampled values
w_ord = np.sort(w_corr)
# print(w_ord)
# w_ord = np.array([5.85, 6.7, 8.9, 10.7, 23.95])
# Recovery the time-dependencies
W = np.zeros(n)
W[idx] = w_ord
return W

Wyświetl plik

@ -13,8 +13,8 @@ ToDo:
- example for alpha-beta-gamma using variable sigma as in financial time series
(see Ehler)
- example using noisy multi-sine-waves
- synt: boot, paper Vinod (as a class?)
- vectors must be ( .., 1)
- synt remove multi-series unless multi-variate and add number of istances (fft 3 D)
"""
import sys
@ -97,5 +97,5 @@ np.random.seed(1294404794)
# bb = syn.synthetic_boot(aa, replace=False)
# print(bb)
aa = np.array([4, 12, 36, 20, 8]).reshape(5, 1)
# print(aa)
syn.synthetic_MEboot(aa)
W = syn.synthetic_MEboot(aa, alpha=0.1)
# print(bb.sum())