kopia lustrzana https://github.com/gabrielegilardi/SignalFilters
finish function MEboot
rodzic
1f9b4f9489
commit
afad3296ed
|
@ -1,207 +0,0 @@
|
||||||
import numpy as np
|
|
||||||
import scipy.stats as st
|
|
||||||
|
|
||||||
class MEBOOT:
|
|
||||||
def __init__(self,x,trimval=0.1,seed=None):
|
|
||||||
'''
|
|
||||||
x: multivariate-time series N x T
|
|
||||||
trimval: trim value (default 0.1)
|
|
||||||
'''
|
|
||||||
|
|
||||||
self.sd = np.random.RandomState(seed)
|
|
||||||
m,n = x.shape
|
|
||||||
|
|
||||||
self.meanx = x.mean(axis=1)
|
|
||||||
|
|
||||||
self.sdx = x.std(axis=1)
|
|
||||||
|
|
||||||
|
|
||||||
self.ordxx = np.argsort(x,axis=1)
|
|
||||||
|
|
||||||
xx = x.ravel()[self.ordxx.ravel()].reshape(x.shape)
|
|
||||||
|
|
||||||
self.z = 0.5*(xx[:,1:]+xx[:,:-1])
|
|
||||||
|
|
||||||
dv = abs(np.diff(x,axis=1))
|
|
||||||
|
|
||||||
dvtrim = st.trim_mean(dv,trimval,axis=1)
|
|
||||||
|
|
||||||
self.xmin = xx[:, 0]- dvtrim
|
|
||||||
self.xmax = xx[:,-1]+ dvtrim
|
|
||||||
|
|
||||||
tmp = np.array([[0.25]*(n-2)+[0.5]*(n-2)+[0.25]*(n-2)])
|
|
||||||
cmd = (np.column_stack((xx[:,:n-2],xx[:,1:n-1],xx[:,2:n])) * tmp)
|
|
||||||
|
|
||||||
aux = np.array([cmd[:,i::n-2].sum(axis=1) for i in range(n-2)]).T
|
|
||||||
|
|
||||||
self.desintxb = np.column_stack((0.75 * xx[:,:1] + 0.25 * xx[:,1:2], aux, 0.25 * xx[:,-2:-1] + 0.75 * xx[:,-1:]))
|
|
||||||
|
|
||||||
|
|
||||||
def _mrapproxpy(self,p,z,desintxb):
|
|
||||||
|
|
||||||
m,n = p.shape
|
|
||||||
|
|
||||||
q = -np.inf*np.ones((n)*m)
|
|
||||||
|
|
||||||
a = (p//(1/n)-1).astype(int)
|
|
||||||
|
|
||||||
hs = np.arange(n-2)
|
|
||||||
|
|
||||||
dz = np.column_stack(([-np.inf]*m,np.diff(z,axis=1)*n,[0]*m)).ravel()
|
|
||||||
|
|
||||||
sz = np.column_stack(([0]*m,(0.5*(z[:,hs+1]+z[:,hs]))[:,hs],[0]*m)).ravel()
|
|
||||||
|
|
||||||
zt = np.column_stack(([-np.inf]*m,z[:,hs],[-np.inf]*m)).ravel()
|
|
||||||
|
|
||||||
dh = np.column_stack(([-np.inf]*m,desintxb[:,hs],[0]*m)).ravel()
|
|
||||||
|
|
||||||
plus = (n*np.arange(m))[np.newaxis].T
|
|
||||||
jx = (np.tile(range(n),(m,1))+plus).ravel()
|
|
||||||
|
|
||||||
ixo = a+1
|
|
||||||
ix = (ixo+plus).ravel()
|
|
||||||
|
|
||||||
tmp = zt[ix]+dh[ix]- sz[ix]
|
|
||||||
|
|
||||||
q[jx] = dz[ix]*(p.ravel()[jx]-(ixo.ravel())/n)+tmp
|
|
||||||
|
|
||||||
return q.reshape((m,n))
|
|
||||||
|
|
||||||
|
|
||||||
def _expandSD(self,bt,fiv):
|
|
||||||
|
|
||||||
obt = len(bt.shape)
|
|
||||||
|
|
||||||
if obt==2:
|
|
||||||
bt = bt[np.newaxis]
|
|
||||||
|
|
||||||
|
|
||||||
sd = self.sdx
|
|
||||||
|
|
||||||
bt = np.swapaxes(bt,0,1)
|
|
||||||
|
|
||||||
sdf = np.column_stack((sd,bt.std(axis=2)))
|
|
||||||
|
|
||||||
sdfa = sdf/sdf[:,:1]
|
|
||||||
|
|
||||||
sdfd = sdf[:,:1]/sdf
|
|
||||||
|
|
||||||
mx = 1+(fiv/100)
|
|
||||||
|
|
||||||
idx = np.where(sdfa<1)
|
|
||||||
|
|
||||||
sdfa[idx] = np.random.uniform(1,mx,size=len(idx[0]))
|
|
||||||
|
|
||||||
sdfdXsdfa = sdfd[:,1:]*sdfa[:,1:]
|
|
||||||
|
|
||||||
bt *= np.moveaxis(sdfdXsdfa[np.newaxis],0,-1)
|
|
||||||
bt = np.swapaxes(bt,0,1)
|
|
||||||
|
|
||||||
if obt==2:
|
|
||||||
bt = bt[0]
|
|
||||||
|
|
||||||
return bt
|
|
||||||
|
|
||||||
def _adjust(self,bt):
|
|
||||||
zz = np.column_stack((self.xmin[np.newaxis].T,self.z,self.xmax[np.newaxis].T))
|
|
||||||
|
|
||||||
v = np.diff(zz**2,axis=1)/12
|
|
||||||
|
|
||||||
xb = self.meanx[np.newaxis].T
|
|
||||||
|
|
||||||
s1 = ((self.desintxb - xb)**2).sum(axis=1)
|
|
||||||
|
|
||||||
act_sd = np.sqrt( (s1+v.sum(axis=1))/(self.z.shape[1]+1) )
|
|
||||||
|
|
||||||
des_sd = self.sdx
|
|
||||||
|
|
||||||
kappa =( des_sd/ act_sd -1)[np.newaxis].T
|
|
||||||
|
|
||||||
bt = bt + kappa* (bt - xb)
|
|
||||||
return bt
|
|
||||||
|
|
||||||
def bootstrap(self,fiv=5,adjust_sd=True):
|
|
||||||
'''
|
|
||||||
Single realization of ME Bootstrap for the multivariate time series.
|
|
||||||
|
|
||||||
fiv: Increment standard deviation (default fiv=5 %)
|
|
||||||
adjust_sd: Fix the standard deviation from the observation.
|
|
||||||
'''
|
|
||||||
|
|
||||||
m,n = self.z.shape
|
|
||||||
n+=1
|
|
||||||
|
|
||||||
p = self.sd.uniform(0,1,size=(m,n))
|
|
||||||
|
|
||||||
q = self._mrapproxpy(p,self.z,self.desintxb[:,1:])
|
|
||||||
|
|
||||||
|
|
||||||
f_low = np.column_stack((self.xmin[np.newaxis].T,self.z[:,0]))
|
|
||||||
f_hi = np.column_stack((self.z[:,-1],self.xmax[np.newaxis].T))
|
|
||||||
|
|
||||||
low = p<1/n
|
|
||||||
hi = p>(n-1)/n
|
|
||||||
|
|
||||||
|
|
||||||
for i in range(m):
|
|
||||||
q[i][low[i]] = np.interp(p[i][low[i]],[0,1/n],f_low[i])
|
|
||||||
q[i][hi[i]] = np.interp(p[i][hi[i]],[(n - 1)/n,1],f_hi[i])
|
|
||||||
|
|
||||||
qseq = np.sort(q[i])
|
|
||||||
q[i][self.ordxx[i]] = qseq
|
|
||||||
|
|
||||||
if fiv!=None:
|
|
||||||
q = self._expandSD(q,fiv)
|
|
||||||
if adjust_sd==True:
|
|
||||||
q = self._adjust(q)
|
|
||||||
return q
|
|
||||||
|
|
||||||
def bootstrap_clt(self,nt,fiv=5,adjust_sd=True):
|
|
||||||
'''
|
|
||||||
Multiple ME boostrap copies.
|
|
||||||
Force the central limit theorem. Warning it requires to compute all
|
|
||||||
bootstrap copies at once, so it could require a lot of memory.
|
|
||||||
|
|
||||||
nt: number of bootstrap copies
|
|
||||||
fiv: Increment standard deviation (default fiv=5 %)
|
|
||||||
adjust_sd: Fix the standard deviation from the observation.
|
|
||||||
'''
|
|
||||||
|
|
||||||
|
|
||||||
bt = np.array([self.bootstrap(fiv=None) for i in range(nt)])
|
|
||||||
if fiv!=None:
|
|
||||||
|
|
||||||
bt = self._expandSD(bt,fiv)
|
|
||||||
|
|
||||||
bt = np.swapaxes(bt,0,1)
|
|
||||||
|
|
||||||
N,nt,T = bt.shape
|
|
||||||
|
|
||||||
gm = self.meanx
|
|
||||||
s = self.sdx
|
|
||||||
|
|
||||||
smean = s/ np.sqrt(nt)
|
|
||||||
|
|
||||||
xbar = bt.mean(axis=2)
|
|
||||||
|
|
||||||
sortxbar = np.sort(xbar,axis=1)
|
|
||||||
|
|
||||||
oo = np.argsort(xbar,axis=1)
|
|
||||||
|
|
||||||
newbar = gm[np.newaxis].T + st.norm.ppf((np.arange(1,nt+1)/(nt+1))[np.newaxis])* smean[np.newaxis].T
|
|
||||||
|
|
||||||
scn = st.zscore(newbar,axis=1)
|
|
||||||
|
|
||||||
newm = scn*smean[np.newaxis].T+gm[np.newaxis].T
|
|
||||||
|
|
||||||
meanfix = newm- sortxbar
|
|
||||||
|
|
||||||
oinv = np.array([np.array(sorted(zip(oo[i],range(len(oo[i])))))[:,1] for i in range(len(oo))])
|
|
||||||
|
|
||||||
out = np.array([(bt[i][oo[i]]+meanfix[i][np.newaxis].T)[oinv[i]] for i in range(bt.shape[0])])
|
|
||||||
out = np.swapaxes(out,0,1)
|
|
||||||
|
|
||||||
if adjust_sd==True:
|
|
||||||
out = self._adjust(out)
|
|
||||||
|
|
||||||
return out
|
|
|
@ -1,102 +0,0 @@
|
||||||
"""
|
|
||||||
MEBOOT.PY - Python package for the meboot (Maximum Entropy Bootstrap) algorithm for Time Series
|
|
||||||
Author: Fabian Brix
|
|
||||||
Method by H.D. Vinod, Fordham University -
|
|
||||||
"""
|
|
||||||
import sys
|
|
||||||
import numpy as np
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
|
|
||||||
def sort(series):
|
|
||||||
ind_sorted = np.argsort(series)
|
|
||||||
s_sorted = series[ind_sorted]
|
|
||||||
return s_sorted, ind_sorted
|
|
||||||
|
|
||||||
def get_trm_mean(series, percent):
|
|
||||||
# FIXED
|
|
||||||
dev = np.abs(series[1:]-series[:-1])
|
|
||||||
n = len(dev)
|
|
||||||
k = n*(percent/100.0)/2.0
|
|
||||||
k = round(k,0)
|
|
||||||
# return np.mean(dev[k:n-k])
|
|
||||||
return 15.0
|
|
||||||
|
|
||||||
def get_intermed_pts(series, s_sorted, percent):
|
|
||||||
zt = (s_sorted[:-1]+s_sorted[1:])/2.0
|
|
||||||
m_trm = get_trm_mean(series, percent)
|
|
||||||
print(m_trm)
|
|
||||||
z0 = s_sorted[0]-m_trm
|
|
||||||
zT = s_sorted[-1]+m_trm
|
|
||||||
z = np.hstack((z0,zt,zT))
|
|
||||||
return z
|
|
||||||
|
|
||||||
def get_intervals(z):
|
|
||||||
return np.vstack((z[:-1], z[1:])).T
|
|
||||||
|
|
||||||
def get_me_density(intervals):
|
|
||||||
return 1.0/(intervals[:,1]-intervals[:,0])
|
|
||||||
|
|
||||||
def get_cpf(me_density, intervals):
|
|
||||||
cpf = np.array([sum(me_density[:i+1]) for i in range(me_density.shape[0]-1)])
|
|
||||||
print(cpf)
|
|
||||||
return cpf/np.max(cpf)
|
|
||||||
|
|
||||||
def get_quantiles(cpf, intervals, series):
|
|
||||||
quantiles = []
|
|
||||||
T = float(len(series))
|
|
||||||
t = np.arange(T+1)
|
|
||||||
Rt = np.vstack((t[:-1]/T,t[1:]/T)).T
|
|
||||||
# print(Rt)
|
|
||||||
aaa = np.array([0.12, 0.83, 0.53, 0.59, 0.11])
|
|
||||||
for d in range(series.shape[0]):
|
|
||||||
# u = np.random.uniform(0,1)
|
|
||||||
u = aaa[d]
|
|
||||||
# print(d, u)
|
|
||||||
# u = aaa[d]
|
|
||||||
for i in range(cpf.shape[0]):
|
|
||||||
cp = cpf[i]
|
|
||||||
if u <= cp:
|
|
||||||
cpm = cpf[i-1]
|
|
||||||
if i == 0:
|
|
||||||
cpm = 0
|
|
||||||
m = (cp-cpm)/1.0*(intervals[i,1]-intervals[i,0])
|
|
||||||
xp = (u - cpm)*1.0/m+intervals[i,0]
|
|
||||||
quantiles.append(xp)
|
|
||||||
break
|
|
||||||
return np.array(quantiles)
|
|
||||||
|
|
||||||
def meboot(series, replicates):
|
|
||||||
# ASC by default
|
|
||||||
print(series)
|
|
||||||
np.random.seed(0)
|
|
||||||
|
|
||||||
s_sorted, ind_sorted = sort(series)
|
|
||||||
|
|
||||||
z = get_intermed_pts(series, s_sorted, 10)
|
|
||||||
print('z ', z)
|
|
||||||
|
|
||||||
intervals = get_intervals(z)
|
|
||||||
print('intervals ', intervals)
|
|
||||||
|
|
||||||
me_density = get_me_density(intervals)
|
|
||||||
print('uni dens ', me_density)
|
|
||||||
|
|
||||||
cpf = get_cpf(me_density, intervals)
|
|
||||||
print('cpf ', cpf)
|
|
||||||
|
|
||||||
quantiles = get_quantiles(cpf, intervals, series)
|
|
||||||
print('quantiles ', quantiles)
|
|
||||||
|
|
||||||
quantiles = np.sort(quantiles)
|
|
||||||
|
|
||||||
replicate = quantiles[ind_sorted]
|
|
||||||
print('replicate ', replicate)
|
|
||||||
|
|
||||||
# TODO: Undertand and add repeat mechanism
|
|
||||||
# plt.plot(series, color='r')
|
|
||||||
# plt.plot(replicate, color='b')
|
|
||||||
# plt.ylim(0,30)
|
|
||||||
# plt.show()
|
|
||||||
|
|
||||||
series = np.array([4,12,36,20,8])
|
|
||||||
meboot(series, 1)
|
|
|
@ -198,37 +198,39 @@ def synthetic_FFT(X, multiv=False):
|
||||||
return X_synt
|
return X_synt
|
||||||
|
|
||||||
|
|
||||||
def synthetic_sampling(X, replace=True):
|
def synthetic_sampling(X, n_reps=1, replace=True):
|
||||||
"""
|
"""
|
||||||
generate more than n_samples, remome multi-time series
|
X must be (n_samples, 1)
|
||||||
"""
|
"""
|
||||||
n_samples, n_series = X.shape
|
n_samples = X.shape[0]
|
||||||
X_synt = np.zeros_like(X)
|
|
||||||
|
|
||||||
# Sampling with replacement
|
# Sampling with replacement
|
||||||
if (replace):
|
if (replace):
|
||||||
idx = np.random.randint(0, n_samples, size=(n_samples, n_series))
|
idx = np.random.randint(0, n_samples, size=(n_samples, n_reps))
|
||||||
i = np.arange(n_series)
|
i = np.arange(n_reps)
|
||||||
X_synt[:, i] = X[idx[:, i], i]
|
Xe = np.tile(X,(1, n_reps))
|
||||||
|
X_synt = Xe[idx, i]
|
||||||
|
|
||||||
# Sampling without replacement
|
# Sampling without replacement
|
||||||
else:
|
else:
|
||||||
idx = np.zeros_like(X)
|
idx = np.zeros(n_samples, n_reps)
|
||||||
for j in range(n_series):
|
for j in range(n_reps):
|
||||||
idx[:, j] = np.random.permutation(n_samples)
|
idx[:, j] = np.random.permutation(n_samples)
|
||||||
i = np.arange(n_series)
|
i = np.arange(n_reps)
|
||||||
X_synt[:, i] = X[idx[:, i], i]
|
Xe = np.tile(X,(1, n_reps))
|
||||||
|
X_synt = Xe[idx, i]
|
||||||
|
|
||||||
return X_synt
|
return X_synt
|
||||||
|
|
||||||
|
|
||||||
def synthetic_MEboot(X, alpha=0.1):
|
def synthetic_MEboot(X, alpha=0.1, bounds=True, scale=False):
|
||||||
"""
|
"""
|
||||||
"""
|
"""
|
||||||
n_samples, n_series = X.shape
|
n_samples, n_series = X.shape
|
||||||
X_synt = np.zeros_like(X)
|
X_synt = np.zeros_like(X)
|
||||||
|
|
||||||
# Loop over time-series
|
# Loop over time-series iterate over number of desired series or set parallel
|
||||||
|
# if possile
|
||||||
n = n_samples
|
n = n_samples
|
||||||
for ts in range(n_series):
|
for ts in range(n_series):
|
||||||
|
|
||||||
|
@ -256,32 +258,40 @@ def synthetic_MEboot(X, alpha=0.1):
|
||||||
mt[n-1] = 0.25 * Y[n-2] + 0.75 * Y[n-1]
|
mt[n-1] = 0.25 * Y[n-2] + 0.75 * Y[n-1]
|
||||||
|
|
||||||
# Randomly generate new points
|
# Randomly generate new points
|
||||||
t_w = np.random.rand(n)
|
# t_w = np.random.rand(n)
|
||||||
# t_w = np.array([0.12, 0.83, 0.53, 0.59, 0.11])
|
t_w = np.array([0.12, 0.83, 0.53, 0.59, 0.11])
|
||||||
|
t_w = np.sort(t_w)
|
||||||
# order here???? and remove correction inside intervals???
|
|
||||||
|
|
||||||
# Interpolate new points
|
# Interpolate new points
|
||||||
t = np.linspace(0.0, 1.0, num=n+1)
|
t = np.linspace(0.0, 1.0, num=n+1)
|
||||||
w_int = np.interp(t_w, t, Z)
|
w_int = np.interp(t_w, t, Z)
|
||||||
print('w_int=', w_int)
|
|
||||||
|
|
||||||
# Correct the new points to satisfy mass constraint
|
# Correct the new points (first and last interval) to satisfy mass constraint
|
||||||
idw = (np.floor_divide(t_w, 1.0 / n)).astype(int)
|
idw = (np.floor_divide(t_w, 1.0 / n)).astype(int)
|
||||||
print('idw=', idw)
|
corr = np.where(idw == 0, mt[idw] - (Z[idw] + Z[idw+1]) / 2.0, 0.0)
|
||||||
w_corr = w_int + mt[idw] - (Z[idw] + Z[idw+1]) / 2.0
|
w_corr = w_int + corr
|
||||||
print('w_corr', w_corr)
|
if (n > 1):
|
||||||
|
corr = np.where(idw == n-1, mt[idw] - (Z[idw] + Z[idw+1]) / 2.0, 0.0)
|
||||||
|
w_corr += corr
|
||||||
|
|
||||||
# Enforce limits
|
# Enforce limits
|
||||||
# w_corr = np.fmin(np.fmax(w_corr, Z[0]), Z[n])
|
if (bounds):
|
||||||
|
w_corr = np.fmin(np.fmax(w_corr, Z[0]), Z[n])
|
||||||
|
|
||||||
# Re-order sampled values
|
# Recovery the time-dependencies (done all togheter?)
|
||||||
w_ord = np.sort(w_corr)
|
X_synt = np.zeros(n)
|
||||||
# print(w_ord)
|
X_synt[idx] = w_corr
|
||||||
# w_ord = np.array([5.85, 6.7, 8.9, 10.7, 23.95])
|
|
||||||
|
# Scale (done all together?)
|
||||||
|
if (scale):
|
||||||
|
var_Z = np.diff(Z) ** 2.0 / 12.0
|
||||||
|
X_mean = X.mean(axis=0)
|
||||||
|
var_ME = (((mt - X_mean) ** 2).sum() + var_Z.sum()) / n
|
||||||
|
std_X= X.std(axis=0, ddof=1)
|
||||||
|
std_ME = np.sqrt(var_ME)
|
||||||
|
k_scale = std_X / std_ME - 1.0
|
||||||
|
X_synt = X_synt + k_scale * (X_synt - X_mean)
|
||||||
|
|
||||||
|
return X_synt
|
||||||
|
|
||||||
# Recovery the time-dependencies
|
|
||||||
W = np.zeros(n)
|
|
||||||
W[idx] = w_ord
|
|
||||||
|
|
||||||
return W
|
|
||||||
|
|
|
@ -4,10 +4,10 @@ Filters for time series.
|
||||||
Copyright (c) 2020 Gabriele Gilardi
|
Copyright (c) 2020 Gabriele Gilardi
|
||||||
|
|
||||||
ToDo:
|
ToDo:
|
||||||
|
- add comments to the code
|
||||||
- in comments write what filters do
|
- in comments write what filters do
|
||||||
- is necessary to copy X for Y untouched?
|
- is necessary to copy X for Y untouched?
|
||||||
- decide default values in functions
|
- decide default values in functions
|
||||||
- check conditions on P and N
|
|
||||||
- why lag plot gives errors
|
- why lag plot gives errors
|
||||||
- fix plotting function
|
- fix plotting function
|
||||||
- example for alpha-beta-gamma using variable sigma as in financial time series
|
- example for alpha-beta-gamma using variable sigma as in financial time series
|
||||||
|
@ -96,6 +96,19 @@ np.random.seed(1294404794)
|
||||||
# bb[:, i] = aa[idx[:, i], i]
|
# bb[:, i] = aa[idx[:, i], i]
|
||||||
# bb = syn.synthetic_boot(aa, replace=False)
|
# bb = syn.synthetic_boot(aa, replace=False)
|
||||||
# print(bb)
|
# print(bb)
|
||||||
aa = np.array([4, 12, 36, 20, 8]).reshape(5, 1)
|
# aa = np.array([4, 12, 36, 20, 8]).reshape(5, 1)
|
||||||
W = syn.synthetic_MEboot(aa, alpha=0.1)
|
# W = syn.synthetic_MEboot(aa, alpha=0.1, bounds=False, scale=True)
|
||||||
# print(bb.sum())
|
# print(bb.sum())
|
||||||
|
# print('W=', W)
|
||||||
|
|
||||||
|
n = 8
|
||||||
|
aa = np.arange(n).reshape(n,1) * 1.1
|
||||||
|
print(aa)
|
||||||
|
idx = np.random.randint(0, n, size=(n, 3))
|
||||||
|
print(idx)
|
||||||
|
i = np.arange(3)
|
||||||
|
# print(i)
|
||||||
|
bb = np.tile(aa,(1, 3))
|
||||||
|
# print(bb)
|
||||||
|
cc = bb[idx, i]
|
||||||
|
print(cc)
|
Ładowanie…
Reference in New Issue