kopia lustrzana https://github.com/gabrielegilardi/SignalFilters
finish function MEboot
rodzic
1f9b4f9489
commit
afad3296ed
|
@ -1,207 +0,0 @@
|
|||
import numpy as np
|
||||
import scipy.stats as st
|
||||
|
||||
class MEBOOT:
|
||||
def __init__(self,x,trimval=0.1,seed=None):
|
||||
'''
|
||||
x: multivariate-time series N x T
|
||||
trimval: trim value (default 0.1)
|
||||
'''
|
||||
|
||||
self.sd = np.random.RandomState(seed)
|
||||
m,n = x.shape
|
||||
|
||||
self.meanx = x.mean(axis=1)
|
||||
|
||||
self.sdx = x.std(axis=1)
|
||||
|
||||
|
||||
self.ordxx = np.argsort(x,axis=1)
|
||||
|
||||
xx = x.ravel()[self.ordxx.ravel()].reshape(x.shape)
|
||||
|
||||
self.z = 0.5*(xx[:,1:]+xx[:,:-1])
|
||||
|
||||
dv = abs(np.diff(x,axis=1))
|
||||
|
||||
dvtrim = st.trim_mean(dv,trimval,axis=1)
|
||||
|
||||
self.xmin = xx[:, 0]- dvtrim
|
||||
self.xmax = xx[:,-1]+ dvtrim
|
||||
|
||||
tmp = np.array([[0.25]*(n-2)+[0.5]*(n-2)+[0.25]*(n-2)])
|
||||
cmd = (np.column_stack((xx[:,:n-2],xx[:,1:n-1],xx[:,2:n])) * tmp)
|
||||
|
||||
aux = np.array([cmd[:,i::n-2].sum(axis=1) for i in range(n-2)]).T
|
||||
|
||||
self.desintxb = np.column_stack((0.75 * xx[:,:1] + 0.25 * xx[:,1:2], aux, 0.25 * xx[:,-2:-1] + 0.75 * xx[:,-1:]))
|
||||
|
||||
|
||||
def _mrapproxpy(self,p,z,desintxb):
|
||||
|
||||
m,n = p.shape
|
||||
|
||||
q = -np.inf*np.ones((n)*m)
|
||||
|
||||
a = (p//(1/n)-1).astype(int)
|
||||
|
||||
hs = np.arange(n-2)
|
||||
|
||||
dz = np.column_stack(([-np.inf]*m,np.diff(z,axis=1)*n,[0]*m)).ravel()
|
||||
|
||||
sz = np.column_stack(([0]*m,(0.5*(z[:,hs+1]+z[:,hs]))[:,hs],[0]*m)).ravel()
|
||||
|
||||
zt = np.column_stack(([-np.inf]*m,z[:,hs],[-np.inf]*m)).ravel()
|
||||
|
||||
dh = np.column_stack(([-np.inf]*m,desintxb[:,hs],[0]*m)).ravel()
|
||||
|
||||
plus = (n*np.arange(m))[np.newaxis].T
|
||||
jx = (np.tile(range(n),(m,1))+plus).ravel()
|
||||
|
||||
ixo = a+1
|
||||
ix = (ixo+plus).ravel()
|
||||
|
||||
tmp = zt[ix]+dh[ix]- sz[ix]
|
||||
|
||||
q[jx] = dz[ix]*(p.ravel()[jx]-(ixo.ravel())/n)+tmp
|
||||
|
||||
return q.reshape((m,n))
|
||||
|
||||
|
||||
def _expandSD(self,bt,fiv):
|
||||
|
||||
obt = len(bt.shape)
|
||||
|
||||
if obt==2:
|
||||
bt = bt[np.newaxis]
|
||||
|
||||
|
||||
sd = self.sdx
|
||||
|
||||
bt = np.swapaxes(bt,0,1)
|
||||
|
||||
sdf = np.column_stack((sd,bt.std(axis=2)))
|
||||
|
||||
sdfa = sdf/sdf[:,:1]
|
||||
|
||||
sdfd = sdf[:,:1]/sdf
|
||||
|
||||
mx = 1+(fiv/100)
|
||||
|
||||
idx = np.where(sdfa<1)
|
||||
|
||||
sdfa[idx] = np.random.uniform(1,mx,size=len(idx[0]))
|
||||
|
||||
sdfdXsdfa = sdfd[:,1:]*sdfa[:,1:]
|
||||
|
||||
bt *= np.moveaxis(sdfdXsdfa[np.newaxis],0,-1)
|
||||
bt = np.swapaxes(bt,0,1)
|
||||
|
||||
if obt==2:
|
||||
bt = bt[0]
|
||||
|
||||
return bt
|
||||
|
||||
def _adjust(self,bt):
|
||||
zz = np.column_stack((self.xmin[np.newaxis].T,self.z,self.xmax[np.newaxis].T))
|
||||
|
||||
v = np.diff(zz**2,axis=1)/12
|
||||
|
||||
xb = self.meanx[np.newaxis].T
|
||||
|
||||
s1 = ((self.desintxb - xb)**2).sum(axis=1)
|
||||
|
||||
act_sd = np.sqrt( (s1+v.sum(axis=1))/(self.z.shape[1]+1) )
|
||||
|
||||
des_sd = self.sdx
|
||||
|
||||
kappa =( des_sd/ act_sd -1)[np.newaxis].T
|
||||
|
||||
bt = bt + kappa* (bt - xb)
|
||||
return bt
|
||||
|
||||
def bootstrap(self,fiv=5,adjust_sd=True):
|
||||
'''
|
||||
Single realization of ME Bootstrap for the multivariate time series.
|
||||
|
||||
fiv: Increment standard deviation (default fiv=5 %)
|
||||
adjust_sd: Fix the standard deviation from the observation.
|
||||
'''
|
||||
|
||||
m,n = self.z.shape
|
||||
n+=1
|
||||
|
||||
p = self.sd.uniform(0,1,size=(m,n))
|
||||
|
||||
q = self._mrapproxpy(p,self.z,self.desintxb[:,1:])
|
||||
|
||||
|
||||
f_low = np.column_stack((self.xmin[np.newaxis].T,self.z[:,0]))
|
||||
f_hi = np.column_stack((self.z[:,-1],self.xmax[np.newaxis].T))
|
||||
|
||||
low = p<1/n
|
||||
hi = p>(n-1)/n
|
||||
|
||||
|
||||
for i in range(m):
|
||||
q[i][low[i]] = np.interp(p[i][low[i]],[0,1/n],f_low[i])
|
||||
q[i][hi[i]] = np.interp(p[i][hi[i]],[(n - 1)/n,1],f_hi[i])
|
||||
|
||||
qseq = np.sort(q[i])
|
||||
q[i][self.ordxx[i]] = qseq
|
||||
|
||||
if fiv!=None:
|
||||
q = self._expandSD(q,fiv)
|
||||
if adjust_sd==True:
|
||||
q = self._adjust(q)
|
||||
return q
|
||||
|
||||
def bootstrap_clt(self,nt,fiv=5,adjust_sd=True):
|
||||
'''
|
||||
Multiple ME boostrap copies.
|
||||
Force the central limit theorem. Warning it requires to compute all
|
||||
bootstrap copies at once, so it could require a lot of memory.
|
||||
|
||||
nt: number of bootstrap copies
|
||||
fiv: Increment standard deviation (default fiv=5 %)
|
||||
adjust_sd: Fix the standard deviation from the observation.
|
||||
'''
|
||||
|
||||
|
||||
bt = np.array([self.bootstrap(fiv=None) for i in range(nt)])
|
||||
if fiv!=None:
|
||||
|
||||
bt = self._expandSD(bt,fiv)
|
||||
|
||||
bt = np.swapaxes(bt,0,1)
|
||||
|
||||
N,nt,T = bt.shape
|
||||
|
||||
gm = self.meanx
|
||||
s = self.sdx
|
||||
|
||||
smean = s/ np.sqrt(nt)
|
||||
|
||||
xbar = bt.mean(axis=2)
|
||||
|
||||
sortxbar = np.sort(xbar,axis=1)
|
||||
|
||||
oo = np.argsort(xbar,axis=1)
|
||||
|
||||
newbar = gm[np.newaxis].T + st.norm.ppf((np.arange(1,nt+1)/(nt+1))[np.newaxis])* smean[np.newaxis].T
|
||||
|
||||
scn = st.zscore(newbar,axis=1)
|
||||
|
||||
newm = scn*smean[np.newaxis].T+gm[np.newaxis].T
|
||||
|
||||
meanfix = newm- sortxbar
|
||||
|
||||
oinv = np.array([np.array(sorted(zip(oo[i],range(len(oo[i])))))[:,1] for i in range(len(oo))])
|
||||
|
||||
out = np.array([(bt[i][oo[i]]+meanfix[i][np.newaxis].T)[oinv[i]] for i in range(bt.shape[0])])
|
||||
out = np.swapaxes(out,0,1)
|
||||
|
||||
if adjust_sd==True:
|
||||
out = self._adjust(out)
|
||||
|
||||
return out
|
|
@ -1,102 +0,0 @@
|
|||
"""
|
||||
MEBOOT.PY - Python package for the meboot (Maximum Entropy Bootstrap) algorithm for Time Series
|
||||
Author: Fabian Brix
|
||||
Method by H.D. Vinod, Fordham University -
|
||||
"""
|
||||
import sys
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
def sort(series):
|
||||
ind_sorted = np.argsort(series)
|
||||
s_sorted = series[ind_sorted]
|
||||
return s_sorted, ind_sorted
|
||||
|
||||
def get_trm_mean(series, percent):
|
||||
# FIXED
|
||||
dev = np.abs(series[1:]-series[:-1])
|
||||
n = len(dev)
|
||||
k = n*(percent/100.0)/2.0
|
||||
k = round(k,0)
|
||||
# return np.mean(dev[k:n-k])
|
||||
return 15.0
|
||||
|
||||
def get_intermed_pts(series, s_sorted, percent):
|
||||
zt = (s_sorted[:-1]+s_sorted[1:])/2.0
|
||||
m_trm = get_trm_mean(series, percent)
|
||||
print(m_trm)
|
||||
z0 = s_sorted[0]-m_trm
|
||||
zT = s_sorted[-1]+m_trm
|
||||
z = np.hstack((z0,zt,zT))
|
||||
return z
|
||||
|
||||
def get_intervals(z):
|
||||
return np.vstack((z[:-1], z[1:])).T
|
||||
|
||||
def get_me_density(intervals):
|
||||
return 1.0/(intervals[:,1]-intervals[:,0])
|
||||
|
||||
def get_cpf(me_density, intervals):
|
||||
cpf = np.array([sum(me_density[:i+1]) for i in range(me_density.shape[0]-1)])
|
||||
print(cpf)
|
||||
return cpf/np.max(cpf)
|
||||
|
||||
def get_quantiles(cpf, intervals, series):
|
||||
quantiles = []
|
||||
T = float(len(series))
|
||||
t = np.arange(T+1)
|
||||
Rt = np.vstack((t[:-1]/T,t[1:]/T)).T
|
||||
# print(Rt)
|
||||
aaa = np.array([0.12, 0.83, 0.53, 0.59, 0.11])
|
||||
for d in range(series.shape[0]):
|
||||
# u = np.random.uniform(0,1)
|
||||
u = aaa[d]
|
||||
# print(d, u)
|
||||
# u = aaa[d]
|
||||
for i in range(cpf.shape[0]):
|
||||
cp = cpf[i]
|
||||
if u <= cp:
|
||||
cpm = cpf[i-1]
|
||||
if i == 0:
|
||||
cpm = 0
|
||||
m = (cp-cpm)/1.0*(intervals[i,1]-intervals[i,0])
|
||||
xp = (u - cpm)*1.0/m+intervals[i,0]
|
||||
quantiles.append(xp)
|
||||
break
|
||||
return np.array(quantiles)
|
||||
|
||||
def meboot(series, replicates):
|
||||
# ASC by default
|
||||
print(series)
|
||||
np.random.seed(0)
|
||||
|
||||
s_sorted, ind_sorted = sort(series)
|
||||
|
||||
z = get_intermed_pts(series, s_sorted, 10)
|
||||
print('z ', z)
|
||||
|
||||
intervals = get_intervals(z)
|
||||
print('intervals ', intervals)
|
||||
|
||||
me_density = get_me_density(intervals)
|
||||
print('uni dens ', me_density)
|
||||
|
||||
cpf = get_cpf(me_density, intervals)
|
||||
print('cpf ', cpf)
|
||||
|
||||
quantiles = get_quantiles(cpf, intervals, series)
|
||||
print('quantiles ', quantiles)
|
||||
|
||||
quantiles = np.sort(quantiles)
|
||||
|
||||
replicate = quantiles[ind_sorted]
|
||||
print('replicate ', replicate)
|
||||
|
||||
# TODO: Undertand and add repeat mechanism
|
||||
# plt.plot(series, color='r')
|
||||
# plt.plot(replicate, color='b')
|
||||
# plt.ylim(0,30)
|
||||
# plt.show()
|
||||
|
||||
series = np.array([4,12,36,20,8])
|
||||
meboot(series, 1)
|
|
@ -198,37 +198,39 @@ def synthetic_FFT(X, multiv=False):
|
|||
return X_synt
|
||||
|
||||
|
||||
def synthetic_sampling(X, replace=True):
|
||||
def synthetic_sampling(X, n_reps=1, replace=True):
|
||||
"""
|
||||
generate more than n_samples, remome multi-time series
|
||||
X must be (n_samples, 1)
|
||||
"""
|
||||
n_samples, n_series = X.shape
|
||||
X_synt = np.zeros_like(X)
|
||||
n_samples = X.shape[0]
|
||||
|
||||
# Sampling with replacement
|
||||
if (replace):
|
||||
idx = np.random.randint(0, n_samples, size=(n_samples, n_series))
|
||||
i = np.arange(n_series)
|
||||
X_synt[:, i] = X[idx[:, i], i]
|
||||
idx = np.random.randint(0, n_samples, size=(n_samples, n_reps))
|
||||
i = np.arange(n_reps)
|
||||
Xe = np.tile(X,(1, n_reps))
|
||||
X_synt = Xe[idx, i]
|
||||
|
||||
# Sampling without replacement
|
||||
else:
|
||||
idx = np.zeros_like(X)
|
||||
for j in range(n_series):
|
||||
idx = np.zeros(n_samples, n_reps)
|
||||
for j in range(n_reps):
|
||||
idx[:, j] = np.random.permutation(n_samples)
|
||||
i = np.arange(n_series)
|
||||
X_synt[:, i] = X[idx[:, i], i]
|
||||
i = np.arange(n_reps)
|
||||
Xe = np.tile(X,(1, n_reps))
|
||||
X_synt = Xe[idx, i]
|
||||
|
||||
return X_synt
|
||||
|
||||
|
||||
def synthetic_MEboot(X, alpha=0.1):
|
||||
def synthetic_MEboot(X, alpha=0.1, bounds=True, scale=False):
|
||||
"""
|
||||
"""
|
||||
n_samples, n_series = X.shape
|
||||
X_synt = np.zeros_like(X)
|
||||
|
||||
# Loop over time-series
|
||||
# Loop over time-series iterate over number of desired series or set parallel
|
||||
# if possile
|
||||
n = n_samples
|
||||
for ts in range(n_series):
|
||||
|
||||
|
@ -256,32 +258,40 @@ def synthetic_MEboot(X, alpha=0.1):
|
|||
mt[n-1] = 0.25 * Y[n-2] + 0.75 * Y[n-1]
|
||||
|
||||
# Randomly generate new points
|
||||
t_w = np.random.rand(n)
|
||||
# t_w = np.array([0.12, 0.83, 0.53, 0.59, 0.11])
|
||||
|
||||
# order here???? and remove correction inside intervals???
|
||||
# t_w = np.random.rand(n)
|
||||
t_w = np.array([0.12, 0.83, 0.53, 0.59, 0.11])
|
||||
t_w = np.sort(t_w)
|
||||
|
||||
# Interpolate new points
|
||||
t = np.linspace(0.0, 1.0, num=n+1)
|
||||
w_int = np.interp(t_w, t, Z)
|
||||
print('w_int=', w_int)
|
||||
|
||||
# Correct the new points to satisfy mass constraint
|
||||
# Correct the new points (first and last interval) to satisfy mass constraint
|
||||
idw = (np.floor_divide(t_w, 1.0 / n)).astype(int)
|
||||
print('idw=', idw)
|
||||
w_corr = w_int + mt[idw] - (Z[idw] + Z[idw+1]) / 2.0
|
||||
print('w_corr', w_corr)
|
||||
corr = np.where(idw == 0, mt[idw] - (Z[idw] + Z[idw+1]) / 2.0, 0.0)
|
||||
w_corr = w_int + corr
|
||||
if (n > 1):
|
||||
corr = np.where(idw == n-1, mt[idw] - (Z[idw] + Z[idw+1]) / 2.0, 0.0)
|
||||
w_corr += corr
|
||||
|
||||
# Enforce limits
|
||||
# w_corr = np.fmin(np.fmax(w_corr, Z[0]), Z[n])
|
||||
if (bounds):
|
||||
w_corr = np.fmin(np.fmax(w_corr, Z[0]), Z[n])
|
||||
|
||||
# Re-order sampled values
|
||||
w_ord = np.sort(w_corr)
|
||||
# print(w_ord)
|
||||
# w_ord = np.array([5.85, 6.7, 8.9, 10.7, 23.95])
|
||||
# Recovery the time-dependencies (done all togheter?)
|
||||
X_synt = np.zeros(n)
|
||||
X_synt[idx] = w_corr
|
||||
|
||||
# Scale (done all together?)
|
||||
if (scale):
|
||||
var_Z = np.diff(Z) ** 2.0 / 12.0
|
||||
X_mean = X.mean(axis=0)
|
||||
var_ME = (((mt - X_mean) ** 2).sum() + var_Z.sum()) / n
|
||||
std_X= X.std(axis=0, ddof=1)
|
||||
std_ME = np.sqrt(var_ME)
|
||||
k_scale = std_X / std_ME - 1.0
|
||||
X_synt = X_synt + k_scale * (X_synt - X_mean)
|
||||
|
||||
return X_synt
|
||||
|
||||
# Recovery the time-dependencies
|
||||
W = np.zeros(n)
|
||||
W[idx] = w_ord
|
||||
|
||||
return W
|
||||
|
|
|
@ -4,10 +4,10 @@ Filters for time series.
|
|||
Copyright (c) 2020 Gabriele Gilardi
|
||||
|
||||
ToDo:
|
||||
- add comments to the code
|
||||
- in comments write what filters do
|
||||
- is necessary to copy X for Y untouched?
|
||||
- decide default values in functions
|
||||
- check conditions on P and N
|
||||
- why lag plot gives errors
|
||||
- fix plotting function
|
||||
- example for alpha-beta-gamma using variable sigma as in financial time series
|
||||
|
@ -96,6 +96,19 @@ np.random.seed(1294404794)
|
|||
# bb[:, i] = aa[idx[:, i], i]
|
||||
# bb = syn.synthetic_boot(aa, replace=False)
|
||||
# print(bb)
|
||||
aa = np.array([4, 12, 36, 20, 8]).reshape(5, 1)
|
||||
W = syn.synthetic_MEboot(aa, alpha=0.1)
|
||||
# aa = np.array([4, 12, 36, 20, 8]).reshape(5, 1)
|
||||
# W = syn.synthetic_MEboot(aa, alpha=0.1, bounds=False, scale=True)
|
||||
# print(bb.sum())
|
||||
# print('W=', W)
|
||||
|
||||
n = 8
|
||||
aa = np.arange(n).reshape(n,1) * 1.1
|
||||
print(aa)
|
||||
idx = np.random.randint(0, n, size=(n, 3))
|
||||
print(idx)
|
||||
i = np.arange(3)
|
||||
# print(i)
|
||||
bb = np.tile(aa,(1, 3))
|
||||
# print(bb)
|
||||
cc = bb[idx, i]
|
||||
print(cc)
|
Ładowanie…
Reference in New Issue