From afad3296ed7e4ccbd56b66717997de98a7cd388f Mon Sep 17 00:00:00 2001 From: Gabriele Gilardi Date: Wed, 24 Jun 2020 22:17:49 +0900 Subject: [PATCH] finish function MEboot --- Code_Python/main_ME.py | 207 --------------------------------------- Code_Python/meBoot_2.py | 102 ------------------- Code_Python/synthetic.py | 72 ++++++++------ Code_Python/test.py | 19 +++- 4 files changed, 57 insertions(+), 343 deletions(-) delete mode 100644 Code_Python/main_ME.py delete mode 100644 Code_Python/meBoot_2.py diff --git a/Code_Python/main_ME.py b/Code_Python/main_ME.py deleted file mode 100644 index 21b51f3..0000000 --- a/Code_Python/main_ME.py +++ /dev/null @@ -1,207 +0,0 @@ -import numpy as np -import scipy.stats as st - -class MEBOOT: - def __init__(self,x,trimval=0.1,seed=None): - ''' - x: multivariate-time series N x T - trimval: trim value (default 0.1) - ''' - - self.sd = np.random.RandomState(seed) - m,n = x.shape - - self.meanx = x.mean(axis=1) - - self.sdx = x.std(axis=1) - - - self.ordxx = np.argsort(x,axis=1) - - xx = x.ravel()[self.ordxx.ravel()].reshape(x.shape) - - self.z = 0.5*(xx[:,1:]+xx[:,:-1]) - - dv = abs(np.diff(x,axis=1)) - - dvtrim = st.trim_mean(dv,trimval,axis=1) - - self.xmin = xx[:, 0]- dvtrim - self.xmax = xx[:,-1]+ dvtrim - - tmp = np.array([[0.25]*(n-2)+[0.5]*(n-2)+[0.25]*(n-2)]) - cmd = (np.column_stack((xx[:,:n-2],xx[:,1:n-1],xx[:,2:n])) * tmp) - - aux = np.array([cmd[:,i::n-2].sum(axis=1) for i in range(n-2)]).T - - self.desintxb = np.column_stack((0.75 * xx[:,:1] + 0.25 * xx[:,1:2], aux, 0.25 * xx[:,-2:-1] + 0.75 * xx[:,-1:])) - - - def _mrapproxpy(self,p,z,desintxb): - - m,n = p.shape - - q = -np.inf*np.ones((n)*m) - - a = (p//(1/n)-1).astype(int) - - hs = np.arange(n-2) - - dz = np.column_stack(([-np.inf]*m,np.diff(z,axis=1)*n,[0]*m)).ravel() - - sz = np.column_stack(([0]*m,(0.5*(z[:,hs+1]+z[:,hs]))[:,hs],[0]*m)).ravel() - - zt = np.column_stack(([-np.inf]*m,z[:,hs],[-np.inf]*m)).ravel() - - dh = np.column_stack(([-np.inf]*m,desintxb[:,hs],[0]*m)).ravel() - - plus = (n*np.arange(m))[np.newaxis].T - jx = (np.tile(range(n),(m,1))+plus).ravel() - - ixo = a+1 - ix = (ixo+plus).ravel() - - tmp = zt[ix]+dh[ix]- sz[ix] - - q[jx] = dz[ix]*(p.ravel()[jx]-(ixo.ravel())/n)+tmp - - return q.reshape((m,n)) - - - def _expandSD(self,bt,fiv): - - obt = len(bt.shape) - - if obt==2: - bt = bt[np.newaxis] - - - sd = self.sdx - - bt = np.swapaxes(bt,0,1) - - sdf = np.column_stack((sd,bt.std(axis=2))) - - sdfa = sdf/sdf[:,:1] - - sdfd = sdf[:,:1]/sdf - - mx = 1+(fiv/100) - - idx = np.where(sdfa<1) - - sdfa[idx] = np.random.uniform(1,mx,size=len(idx[0])) - - sdfdXsdfa = sdfd[:,1:]*sdfa[:,1:] - - bt *= np.moveaxis(sdfdXsdfa[np.newaxis],0,-1) - bt = np.swapaxes(bt,0,1) - - if obt==2: - bt = bt[0] - - return bt - - def _adjust(self,bt): - zz = np.column_stack((self.xmin[np.newaxis].T,self.z,self.xmax[np.newaxis].T)) - - v = np.diff(zz**2,axis=1)/12 - - xb = self.meanx[np.newaxis].T - - s1 = ((self.desintxb - xb)**2).sum(axis=1) - - act_sd = np.sqrt( (s1+v.sum(axis=1))/(self.z.shape[1]+1) ) - - des_sd = self.sdx - - kappa =( des_sd/ act_sd -1)[np.newaxis].T - - bt = bt + kappa* (bt - xb) - return bt - - def bootstrap(self,fiv=5,adjust_sd=True): - ''' - Single realization of ME Bootstrap for the multivariate time series. - - fiv: Increment standard deviation (default fiv=5 %) - adjust_sd: Fix the standard deviation from the observation. - ''' - - m,n = self.z.shape - n+=1 - - p = self.sd.uniform(0,1,size=(m,n)) - - q = self._mrapproxpy(p,self.z,self.desintxb[:,1:]) - - - f_low = np.column_stack((self.xmin[np.newaxis].T,self.z[:,0])) - f_hi = np.column_stack((self.z[:,-1],self.xmax[np.newaxis].T)) - - low = p<1/n - hi = p>(n-1)/n - - - for i in range(m): - q[i][low[i]] = np.interp(p[i][low[i]],[0,1/n],f_low[i]) - q[i][hi[i]] = np.interp(p[i][hi[i]],[(n - 1)/n,1],f_hi[i]) - - qseq = np.sort(q[i]) - q[i][self.ordxx[i]] = qseq - - if fiv!=None: - q = self._expandSD(q,fiv) - if adjust_sd==True: - q = self._adjust(q) - return q - - def bootstrap_clt(self,nt,fiv=5,adjust_sd=True): - ''' - Multiple ME boostrap copies. - Force the central limit theorem. Warning it requires to compute all - bootstrap copies at once, so it could require a lot of memory. - - nt: number of bootstrap copies - fiv: Increment standard deviation (default fiv=5 %) - adjust_sd: Fix the standard deviation from the observation. - ''' - - - bt = np.array([self.bootstrap(fiv=None) for i in range(nt)]) - if fiv!=None: - - bt = self._expandSD(bt,fiv) - - bt = np.swapaxes(bt,0,1) - - N,nt,T = bt.shape - - gm = self.meanx - s = self.sdx - - smean = s/ np.sqrt(nt) - - xbar = bt.mean(axis=2) - - sortxbar = np.sort(xbar,axis=1) - - oo = np.argsort(xbar,axis=1) - - newbar = gm[np.newaxis].T + st.norm.ppf((np.arange(1,nt+1)/(nt+1))[np.newaxis])* smean[np.newaxis].T - - scn = st.zscore(newbar,axis=1) - - newm = scn*smean[np.newaxis].T+gm[np.newaxis].T - - meanfix = newm- sortxbar - - oinv = np.array([np.array(sorted(zip(oo[i],range(len(oo[i])))))[:,1] for i in range(len(oo))]) - - out = np.array([(bt[i][oo[i]]+meanfix[i][np.newaxis].T)[oinv[i]] for i in range(bt.shape[0])]) - out = np.swapaxes(out,0,1) - - if adjust_sd==True: - out = self._adjust(out) - - return out diff --git a/Code_Python/meBoot_2.py b/Code_Python/meBoot_2.py deleted file mode 100644 index 7a089f1..0000000 --- a/Code_Python/meBoot_2.py +++ /dev/null @@ -1,102 +0,0 @@ -""" - MEBOOT.PY - Python package for the meboot (Maximum Entropy Bootstrap) algorithm for Time Series - Author: Fabian Brix - Method by H.D. Vinod, Fordham University - -""" -import sys -import numpy as np -import matplotlib.pyplot as plt - -def sort(series): - ind_sorted = np.argsort(series) - s_sorted = series[ind_sorted] - return s_sorted, ind_sorted - -def get_trm_mean(series, percent): - # FIXED - dev = np.abs(series[1:]-series[:-1]) - n = len(dev) - k = n*(percent/100.0)/2.0 - k = round(k,0) - # return np.mean(dev[k:n-k]) - return 15.0 - -def get_intermed_pts(series, s_sorted, percent): - zt = (s_sorted[:-1]+s_sorted[1:])/2.0 - m_trm = get_trm_mean(series, percent) - print(m_trm) - z0 = s_sorted[0]-m_trm - zT = s_sorted[-1]+m_trm - z = np.hstack((z0,zt,zT)) - return z - -def get_intervals(z): - return np.vstack((z[:-1], z[1:])).T - -def get_me_density(intervals): - return 1.0/(intervals[:,1]-intervals[:,0]) - -def get_cpf(me_density, intervals): - cpf = np.array([sum(me_density[:i+1]) for i in range(me_density.shape[0]-1)]) - print(cpf) - return cpf/np.max(cpf) - -def get_quantiles(cpf, intervals, series): - quantiles = [] - T = float(len(series)) - t = np.arange(T+1) - Rt = np.vstack((t[:-1]/T,t[1:]/T)).T - # print(Rt) - aaa = np.array([0.12, 0.83, 0.53, 0.59, 0.11]) - for d in range(series.shape[0]): - # u = np.random.uniform(0,1) - u = aaa[d] - # print(d, u) - # u = aaa[d] - for i in range(cpf.shape[0]): - cp = cpf[i] - if u <= cp: - cpm = cpf[i-1] - if i == 0: - cpm = 0 - m = (cp-cpm)/1.0*(intervals[i,1]-intervals[i,0]) - xp = (u - cpm)*1.0/m+intervals[i,0] - quantiles.append(xp) - break - return np.array(quantiles) - -def meboot(series, replicates): - # ASC by default - print(series) - np.random.seed(0) - - s_sorted, ind_sorted = sort(series) - - z = get_intermed_pts(series, s_sorted, 10) - print('z ', z) - - intervals = get_intervals(z) - print('intervals ', intervals) - - me_density = get_me_density(intervals) - print('uni dens ', me_density) - - cpf = get_cpf(me_density, intervals) - print('cpf ', cpf) - - quantiles = get_quantiles(cpf, intervals, series) - print('quantiles ', quantiles) - - quantiles = np.sort(quantiles) - - replicate = quantiles[ind_sorted] - print('replicate ', replicate) - - # TODO: Undertand and add repeat mechanism - # plt.plot(series, color='r') - # plt.plot(replicate, color='b') - # plt.ylim(0,30) - # plt.show() - -series = np.array([4,12,36,20,8]) -meboot(series, 1) diff --git a/Code_Python/synthetic.py b/Code_Python/synthetic.py index f7695b7..55c7b2c 100644 --- a/Code_Python/synthetic.py +++ b/Code_Python/synthetic.py @@ -198,37 +198,39 @@ def synthetic_FFT(X, multiv=False): return X_synt -def synthetic_sampling(X, replace=True): +def synthetic_sampling(X, n_reps=1, replace=True): """ - generate more than n_samples, remome multi-time series + X must be (n_samples, 1) """ - n_samples, n_series = X.shape - X_synt = np.zeros_like(X) + n_samples = X.shape[0] # Sampling with replacement if (replace): - idx = np.random.randint(0, n_samples, size=(n_samples, n_series)) - i = np.arange(n_series) - X_synt[:, i] = X[idx[:, i], i] + idx = np.random.randint(0, n_samples, size=(n_samples, n_reps)) + i = np.arange(n_reps) + Xe = np.tile(X,(1, n_reps)) + X_synt = Xe[idx, i] # Sampling without replacement else: - idx = np.zeros_like(X) - for j in range(n_series): + idx = np.zeros(n_samples, n_reps) + for j in range(n_reps): idx[:, j] = np.random.permutation(n_samples) - i = np.arange(n_series) - X_synt[:, i] = X[idx[:, i], i] + i = np.arange(n_reps) + Xe = np.tile(X,(1, n_reps)) + X_synt = Xe[idx, i] return X_synt -def synthetic_MEboot(X, alpha=0.1): +def synthetic_MEboot(X, alpha=0.1, bounds=True, scale=False): """ """ n_samples, n_series = X.shape X_synt = np.zeros_like(X) - # Loop over time-series + # Loop over time-series iterate over number of desired series or set parallel + # if possile n = n_samples for ts in range(n_series): @@ -256,32 +258,40 @@ def synthetic_MEboot(X, alpha=0.1): mt[n-1] = 0.25 * Y[n-2] + 0.75 * Y[n-1] # Randomly generate new points - t_w = np.random.rand(n) - # t_w = np.array([0.12, 0.83, 0.53, 0.59, 0.11]) - -# order here???? and remove correction inside intervals??? + # t_w = np.random.rand(n) + t_w = np.array([0.12, 0.83, 0.53, 0.59, 0.11]) + t_w = np.sort(t_w) # Interpolate new points t = np.linspace(0.0, 1.0, num=n+1) w_int = np.interp(t_w, t, Z) - print('w_int=', w_int) - # Correct the new points to satisfy mass constraint + # Correct the new points (first and last interval) to satisfy mass constraint idw = (np.floor_divide(t_w, 1.0 / n)).astype(int) - print('idw=', idw) - w_corr = w_int + mt[idw] - (Z[idw] + Z[idw+1]) / 2.0 - print('w_corr', w_corr) + corr = np.where(idw == 0, mt[idw] - (Z[idw] + Z[idw+1]) / 2.0, 0.0) + w_corr = w_int + corr + if (n > 1): + corr = np.where(idw == n-1, mt[idw] - (Z[idw] + Z[idw+1]) / 2.0, 0.0) + w_corr += corr # Enforce limits - # w_corr = np.fmin(np.fmax(w_corr, Z[0]), Z[n]) + if (bounds): + w_corr = np.fmin(np.fmax(w_corr, Z[0]), Z[n]) - # Re-order sampled values - w_ord = np.sort(w_corr) - # print(w_ord) - # w_ord = np.array([5.85, 6.7, 8.9, 10.7, 23.95]) + # Recovery the time-dependencies (done all togheter?) + X_synt = np.zeros(n) + X_synt[idx] = w_corr + + # Scale (done all together?) + if (scale): + var_Z = np.diff(Z) ** 2.0 / 12.0 + X_mean = X.mean(axis=0) + var_ME = (((mt - X_mean) ** 2).sum() + var_Z.sum()) / n + std_X= X.std(axis=0, ddof=1) + std_ME = np.sqrt(var_ME) + k_scale = std_X / std_ME - 1.0 + X_synt = X_synt + k_scale * (X_synt - X_mean) + + return X_synt - # Recovery the time-dependencies - W = np.zeros(n) - W[idx] = w_ord - return W diff --git a/Code_Python/test.py b/Code_Python/test.py index eccca84..cf800d4 100644 --- a/Code_Python/test.py +++ b/Code_Python/test.py @@ -4,10 +4,10 @@ Filters for time series. Copyright (c) 2020 Gabriele Gilardi ToDo: +- add comments to the code - in comments write what filters do - is necessary to copy X for Y untouched? - decide default values in functions -- check conditions on P and N - why lag plot gives errors - fix plotting function - example for alpha-beta-gamma using variable sigma as in financial time series @@ -96,6 +96,19 @@ np.random.seed(1294404794) # bb[:, i] = aa[idx[:, i], i] # bb = syn.synthetic_boot(aa, replace=False) # print(bb) -aa = np.array([4, 12, 36, 20, 8]).reshape(5, 1) -W = syn.synthetic_MEboot(aa, alpha=0.1) +# aa = np.array([4, 12, 36, 20, 8]).reshape(5, 1) +# W = syn.synthetic_MEboot(aa, alpha=0.1, bounds=False, scale=True) # print(bb.sum()) +# print('W=', W) + +n = 8 +aa = np.arange(n).reshape(n,1) * 1.1 +print(aa) +idx = np.random.randint(0, n, size=(n, 3)) +print(idx) +i = np.arange(3) +# print(i) +bb = np.tile(aa,(1, 3)) +# print(bb) +cc = bb[idx, i] +print(cc) \ No newline at end of file