finish function MEboot

master
Gabriele Gilardi 2020-06-24 22:17:49 +09:00
rodzic 1f9b4f9489
commit afad3296ed
4 zmienionych plików z 57 dodań i 343 usunięć

Wyświetl plik

@ -1,207 +0,0 @@
import numpy as np
import scipy.stats as st
class MEBOOT:
def __init__(self,x,trimval=0.1,seed=None):
'''
x: multivariate-time series N x T
trimval: trim value (default 0.1)
'''
self.sd = np.random.RandomState(seed)
m,n = x.shape
self.meanx = x.mean(axis=1)
self.sdx = x.std(axis=1)
self.ordxx = np.argsort(x,axis=1)
xx = x.ravel()[self.ordxx.ravel()].reshape(x.shape)
self.z = 0.5*(xx[:,1:]+xx[:,:-1])
dv = abs(np.diff(x,axis=1))
dvtrim = st.trim_mean(dv,trimval,axis=1)
self.xmin = xx[:, 0]- dvtrim
self.xmax = xx[:,-1]+ dvtrim
tmp = np.array([[0.25]*(n-2)+[0.5]*(n-2)+[0.25]*(n-2)])
cmd = (np.column_stack((xx[:,:n-2],xx[:,1:n-1],xx[:,2:n])) * tmp)
aux = np.array([cmd[:,i::n-2].sum(axis=1) for i in range(n-2)]).T
self.desintxb = np.column_stack((0.75 * xx[:,:1] + 0.25 * xx[:,1:2], aux, 0.25 * xx[:,-2:-1] + 0.75 * xx[:,-1:]))
def _mrapproxpy(self,p,z,desintxb):
m,n = p.shape
q = -np.inf*np.ones((n)*m)
a = (p//(1/n)-1).astype(int)
hs = np.arange(n-2)
dz = np.column_stack(([-np.inf]*m,np.diff(z,axis=1)*n,[0]*m)).ravel()
sz = np.column_stack(([0]*m,(0.5*(z[:,hs+1]+z[:,hs]))[:,hs],[0]*m)).ravel()
zt = np.column_stack(([-np.inf]*m,z[:,hs],[-np.inf]*m)).ravel()
dh = np.column_stack(([-np.inf]*m,desintxb[:,hs],[0]*m)).ravel()
plus = (n*np.arange(m))[np.newaxis].T
jx = (np.tile(range(n),(m,1))+plus).ravel()
ixo = a+1
ix = (ixo+plus).ravel()
tmp = zt[ix]+dh[ix]- sz[ix]
q[jx] = dz[ix]*(p.ravel()[jx]-(ixo.ravel())/n)+tmp
return q.reshape((m,n))
def _expandSD(self,bt,fiv):
obt = len(bt.shape)
if obt==2:
bt = bt[np.newaxis]
sd = self.sdx
bt = np.swapaxes(bt,0,1)
sdf = np.column_stack((sd,bt.std(axis=2)))
sdfa = sdf/sdf[:,:1]
sdfd = sdf[:,:1]/sdf
mx = 1+(fiv/100)
idx = np.where(sdfa<1)
sdfa[idx] = np.random.uniform(1,mx,size=len(idx[0]))
sdfdXsdfa = sdfd[:,1:]*sdfa[:,1:]
bt *= np.moveaxis(sdfdXsdfa[np.newaxis],0,-1)
bt = np.swapaxes(bt,0,1)
if obt==2:
bt = bt[0]
return bt
def _adjust(self,bt):
zz = np.column_stack((self.xmin[np.newaxis].T,self.z,self.xmax[np.newaxis].T))
v = np.diff(zz**2,axis=1)/12
xb = self.meanx[np.newaxis].T
s1 = ((self.desintxb - xb)**2).sum(axis=1)
act_sd = np.sqrt( (s1+v.sum(axis=1))/(self.z.shape[1]+1) )
des_sd = self.sdx
kappa =( des_sd/ act_sd -1)[np.newaxis].T
bt = bt + kappa* (bt - xb)
return bt
def bootstrap(self,fiv=5,adjust_sd=True):
'''
Single realization of ME Bootstrap for the multivariate time series.
fiv: Increment standard deviation (default fiv=5 %)
adjust_sd: Fix the standard deviation from the observation.
'''
m,n = self.z.shape
n+=1
p = self.sd.uniform(0,1,size=(m,n))
q = self._mrapproxpy(p,self.z,self.desintxb[:,1:])
f_low = np.column_stack((self.xmin[np.newaxis].T,self.z[:,0]))
f_hi = np.column_stack((self.z[:,-1],self.xmax[np.newaxis].T))
low = p<1/n
hi = p>(n-1)/n
for i in range(m):
q[i][low[i]] = np.interp(p[i][low[i]],[0,1/n],f_low[i])
q[i][hi[i]] = np.interp(p[i][hi[i]],[(n - 1)/n,1],f_hi[i])
qseq = np.sort(q[i])
q[i][self.ordxx[i]] = qseq
if fiv!=None:
q = self._expandSD(q,fiv)
if adjust_sd==True:
q = self._adjust(q)
return q
def bootstrap_clt(self,nt,fiv=5,adjust_sd=True):
'''
Multiple ME boostrap copies.
Force the central limit theorem. Warning it requires to compute all
bootstrap copies at once, so it could require a lot of memory.
nt: number of bootstrap copies
fiv: Increment standard deviation (default fiv=5 %)
adjust_sd: Fix the standard deviation from the observation.
'''
bt = np.array([self.bootstrap(fiv=None) for i in range(nt)])
if fiv!=None:
bt = self._expandSD(bt,fiv)
bt = np.swapaxes(bt,0,1)
N,nt,T = bt.shape
gm = self.meanx
s = self.sdx
smean = s/ np.sqrt(nt)
xbar = bt.mean(axis=2)
sortxbar = np.sort(xbar,axis=1)
oo = np.argsort(xbar,axis=1)
newbar = gm[np.newaxis].T + st.norm.ppf((np.arange(1,nt+1)/(nt+1))[np.newaxis])* smean[np.newaxis].T
scn = st.zscore(newbar,axis=1)
newm = scn*smean[np.newaxis].T+gm[np.newaxis].T
meanfix = newm- sortxbar
oinv = np.array([np.array(sorted(zip(oo[i],range(len(oo[i])))))[:,1] for i in range(len(oo))])
out = np.array([(bt[i][oo[i]]+meanfix[i][np.newaxis].T)[oinv[i]] for i in range(bt.shape[0])])
out = np.swapaxes(out,0,1)
if adjust_sd==True:
out = self._adjust(out)
return out

Wyświetl plik

@ -1,102 +0,0 @@
"""
MEBOOT.PY - Python package for the meboot (Maximum Entropy Bootstrap) algorithm for Time Series
Author: Fabian Brix
Method by H.D. Vinod, Fordham University -
"""
import sys
import numpy as np
import matplotlib.pyplot as plt
def sort(series):
ind_sorted = np.argsort(series)
s_sorted = series[ind_sorted]
return s_sorted, ind_sorted
def get_trm_mean(series, percent):
# FIXED
dev = np.abs(series[1:]-series[:-1])
n = len(dev)
k = n*(percent/100.0)/2.0
k = round(k,0)
# return np.mean(dev[k:n-k])
return 15.0
def get_intermed_pts(series, s_sorted, percent):
zt = (s_sorted[:-1]+s_sorted[1:])/2.0
m_trm = get_trm_mean(series, percent)
print(m_trm)
z0 = s_sorted[0]-m_trm
zT = s_sorted[-1]+m_trm
z = np.hstack((z0,zt,zT))
return z
def get_intervals(z):
return np.vstack((z[:-1], z[1:])).T
def get_me_density(intervals):
return 1.0/(intervals[:,1]-intervals[:,0])
def get_cpf(me_density, intervals):
cpf = np.array([sum(me_density[:i+1]) for i in range(me_density.shape[0]-1)])
print(cpf)
return cpf/np.max(cpf)
def get_quantiles(cpf, intervals, series):
quantiles = []
T = float(len(series))
t = np.arange(T+1)
Rt = np.vstack((t[:-1]/T,t[1:]/T)).T
# print(Rt)
aaa = np.array([0.12, 0.83, 0.53, 0.59, 0.11])
for d in range(series.shape[0]):
# u = np.random.uniform(0,1)
u = aaa[d]
# print(d, u)
# u = aaa[d]
for i in range(cpf.shape[0]):
cp = cpf[i]
if u <= cp:
cpm = cpf[i-1]
if i == 0:
cpm = 0
m = (cp-cpm)/1.0*(intervals[i,1]-intervals[i,0])
xp = (u - cpm)*1.0/m+intervals[i,0]
quantiles.append(xp)
break
return np.array(quantiles)
def meboot(series, replicates):
# ASC by default
print(series)
np.random.seed(0)
s_sorted, ind_sorted = sort(series)
z = get_intermed_pts(series, s_sorted, 10)
print('z ', z)
intervals = get_intervals(z)
print('intervals ', intervals)
me_density = get_me_density(intervals)
print('uni dens ', me_density)
cpf = get_cpf(me_density, intervals)
print('cpf ', cpf)
quantiles = get_quantiles(cpf, intervals, series)
print('quantiles ', quantiles)
quantiles = np.sort(quantiles)
replicate = quantiles[ind_sorted]
print('replicate ', replicate)
# TODO: Undertand and add repeat mechanism
# plt.plot(series, color='r')
# plt.plot(replicate, color='b')
# plt.ylim(0,30)
# plt.show()
series = np.array([4,12,36,20,8])
meboot(series, 1)

Wyświetl plik

@ -198,37 +198,39 @@ def synthetic_FFT(X, multiv=False):
return X_synt
def synthetic_sampling(X, replace=True):
def synthetic_sampling(X, n_reps=1, replace=True):
"""
generate more than n_samples, remome multi-time series
X must be (n_samples, 1)
"""
n_samples, n_series = X.shape
X_synt = np.zeros_like(X)
n_samples = X.shape[0]
# Sampling with replacement
if (replace):
idx = np.random.randint(0, n_samples, size=(n_samples, n_series))
i = np.arange(n_series)
X_synt[:, i] = X[idx[:, i], i]
idx = np.random.randint(0, n_samples, size=(n_samples, n_reps))
i = np.arange(n_reps)
Xe = np.tile(X,(1, n_reps))
X_synt = Xe[idx, i]
# Sampling without replacement
else:
idx = np.zeros_like(X)
for j in range(n_series):
idx = np.zeros(n_samples, n_reps)
for j in range(n_reps):
idx[:, j] = np.random.permutation(n_samples)
i = np.arange(n_series)
X_synt[:, i] = X[idx[:, i], i]
i = np.arange(n_reps)
Xe = np.tile(X,(1, n_reps))
X_synt = Xe[idx, i]
return X_synt
def synthetic_MEboot(X, alpha=0.1):
def synthetic_MEboot(X, alpha=0.1, bounds=True, scale=False):
"""
"""
n_samples, n_series = X.shape
X_synt = np.zeros_like(X)
# Loop over time-series
# Loop over time-series iterate over number of desired series or set parallel
# if possile
n = n_samples
for ts in range(n_series):
@ -256,32 +258,40 @@ def synthetic_MEboot(X, alpha=0.1):
mt[n-1] = 0.25 * Y[n-2] + 0.75 * Y[n-1]
# Randomly generate new points
t_w = np.random.rand(n)
# t_w = np.array([0.12, 0.83, 0.53, 0.59, 0.11])
# order here???? and remove correction inside intervals???
# t_w = np.random.rand(n)
t_w = np.array([0.12, 0.83, 0.53, 0.59, 0.11])
t_w = np.sort(t_w)
# Interpolate new points
t = np.linspace(0.0, 1.0, num=n+1)
w_int = np.interp(t_w, t, Z)
print('w_int=', w_int)
# Correct the new points to satisfy mass constraint
# Correct the new points (first and last interval) to satisfy mass constraint
idw = (np.floor_divide(t_w, 1.0 / n)).astype(int)
print('idw=', idw)
w_corr = w_int + mt[idw] - (Z[idw] + Z[idw+1]) / 2.0
print('w_corr', w_corr)
corr = np.where(idw == 0, mt[idw] - (Z[idw] + Z[idw+1]) / 2.0, 0.0)
w_corr = w_int + corr
if (n > 1):
corr = np.where(idw == n-1, mt[idw] - (Z[idw] + Z[idw+1]) / 2.0, 0.0)
w_corr += corr
# Enforce limits
# w_corr = np.fmin(np.fmax(w_corr, Z[0]), Z[n])
if (bounds):
w_corr = np.fmin(np.fmax(w_corr, Z[0]), Z[n])
# Re-order sampled values
w_ord = np.sort(w_corr)
# print(w_ord)
# w_ord = np.array([5.85, 6.7, 8.9, 10.7, 23.95])
# Recovery the time-dependencies (done all togheter?)
X_synt = np.zeros(n)
X_synt[idx] = w_corr
# Scale (done all together?)
if (scale):
var_Z = np.diff(Z) ** 2.0 / 12.0
X_mean = X.mean(axis=0)
var_ME = (((mt - X_mean) ** 2).sum() + var_Z.sum()) / n
std_X= X.std(axis=0, ddof=1)
std_ME = np.sqrt(var_ME)
k_scale = std_X / std_ME - 1.0
X_synt = X_synt + k_scale * (X_synt - X_mean)
return X_synt
# Recovery the time-dependencies
W = np.zeros(n)
W[idx] = w_ord
return W

Wyświetl plik

@ -4,10 +4,10 @@ Filters for time series.
Copyright (c) 2020 Gabriele Gilardi
ToDo:
- add comments to the code
- in comments write what filters do
- is necessary to copy X for Y untouched?
- decide default values in functions
- check conditions on P and N
- why lag plot gives errors
- fix plotting function
- example for alpha-beta-gamma using variable sigma as in financial time series
@ -96,6 +96,19 @@ np.random.seed(1294404794)
# bb[:, i] = aa[idx[:, i], i]
# bb = syn.synthetic_boot(aa, replace=False)
# print(bb)
aa = np.array([4, 12, 36, 20, 8]).reshape(5, 1)
W = syn.synthetic_MEboot(aa, alpha=0.1)
# aa = np.array([4, 12, 36, 20, 8]).reshape(5, 1)
# W = syn.synthetic_MEboot(aa, alpha=0.1, bounds=False, scale=True)
# print(bb.sum())
# print('W=', W)
n = 8
aa = np.arange(n).reshape(n,1) * 1.1
print(aa)
idx = np.random.randint(0, n, size=(n, 3))
print(idx)
i = np.arange(3)
# print(i)
bb = np.tile(aa,(1, 3))
# print(bb)
cc = bb[idx, i]
print(cc)