finish function MEboot

2020-06-24 22:17:49 +09:00 · 2020-06-24 22:17:49 +09:00 · afad3296ed
commit afad3296ed
--- a/Code_Python/main_ME.py
+++ b/Code_Python/main_ME.py
@ -1,207 +0,0 @@
 import numpy as np
 import scipy.stats as st
 class MEBOOT:
 	def __init__(self,x,trimval=0.1,seed=None):
 		'''
 		x: multivariate-time series N x T
 		trimval: trim value (default 0.1)
 		'''
 		self.sd = np.random.RandomState(seed)
 		m,n = x.shape
 		self.meanx = x.mean(axis=1)
 		self.sdx =  x.std(axis=1)
 		self.ordxx = np.argsort(x,axis=1)
 		xx = x.ravel()[self.ordxx.ravel()].reshape(x.shape)
 		self.z = 0.5*(xx[:,1:]+xx[:,:-1])
 		dv = abs(np.diff(x,axis=1))
 		dvtrim = st.trim_mean(dv,trimval,axis=1)
 		self.xmin = xx[:, 0]- dvtrim
 		self.xmax = xx[:,-1]+ dvtrim
 		tmp = np.array([[0.25]*(n-2)+[0.5]*(n-2)+[0.25]*(n-2)])
 		cmd = (np.column_stack((xx[:,:n-2],xx[:,1:n-1],xx[:,2:n])) * tmp)
 		aux = np.array([cmd[:,i::n-2].sum(axis=1) for i in range(n-2)]).T
 		self.desintxb = np.column_stack((0.75 * xx[:,:1] + 0.25 * xx[:,1:2], aux,   0.25 * xx[:,-2:-1] + 0.75 * xx[:,-1:]))
 	def _mrapproxpy(self,p,z,desintxb):
 		m,n = p.shape
 		q = -np.inf*np.ones((n)*m)
 		a = (p//(1/n)-1).astype(int)
 		hs = np.arange(n-2)
 		dz = np.column_stack(([-np.inf]*m,np.diff(z,axis=1)*n,[0]*m)).ravel()
 		sz = np.column_stack(([0]*m,(0.5*(z[:,hs+1]+z[:,hs]))[:,hs],[0]*m)).ravel()
 		zt = np.column_stack(([-np.inf]*m,z[:,hs],[-np.inf]*m)).ravel()
 		dh = np.column_stack(([-np.inf]*m,desintxb[:,hs],[0]*m)).ravel()
 		plus = (n*np.arange(m))[np.newaxis].T
 		jx = (np.tile(range(n),(m,1))+plus).ravel()
 		ixo = a+1
 		ix = (ixo+plus).ravel()
 		tmp = zt[ix]+dh[ix]- sz[ix]
 		q[jx] = dz[ix]*(p.ravel()[jx]-(ixo.ravel())/n)+tmp
 		return q.reshape((m,n))
 	def _expandSD(self,bt,fiv):
 		obt = len(bt.shape)
 		if obt==2:
 			bt = bt[np.newaxis]
 		sd = self.sdx
 		bt = np.swapaxes(bt,0,1)
 		sdf = np.column_stack((sd,bt.std(axis=2)))
 		sdfa = sdf/sdf[:,:1]
 		sdfd = sdf[:,:1]/sdf
 		mx = 1+(fiv/100)
 		idx = np.where(sdfa<1)
 		sdfa[idx] = np.random.uniform(1,mx,size=len(idx[0]))
 		sdfdXsdfa = sdfd[:,1:]*sdfa[:,1:]
 		bt *= np.moveaxis(sdfdXsdfa[np.newaxis],0,-1)
 		bt = np.swapaxes(bt,0,1)
 		if obt==2:
 			bt = bt[0]
 		return bt
 	def _adjust(self,bt):
 		zz = np.column_stack((self.xmin[np.newaxis].T,self.z,self.xmax[np.newaxis].T))
 		v = np.diff(zz**2,axis=1)/12
 		xb = self.meanx[np.newaxis].T
 		s1 = ((self.desintxb - xb)**2).sum(axis=1)
 		act_sd = np.sqrt( (s1+v.sum(axis=1))/(self.z.shape[1]+1) ) 
 		des_sd = self.sdx
 		kappa =( des_sd/ act_sd -1)[np.newaxis].T
 		bt = bt + kappa* (bt - xb)
 		return bt
 	def bootstrap(self,fiv=5,adjust_sd=True):
 		'''
 		Single realization of ME Bootstrap for the multivariate time series.
 		fiv: Increment standard deviation (default fiv=5 %)
 		adjust_sd: Fix the standard deviation from the observation. 
 		'''
 		m,n = self.z.shape
 		n+=1
 		p = self.sd.uniform(0,1,size=(m,n))
 		q = self._mrapproxpy(p,self.z,self.desintxb[:,1:])
 		f_low =  np.column_stack((self.xmin[np.newaxis].T,self.z[:,0]))
 		f_hi =  np.column_stack((self.z[:,-1],self.xmax[np.newaxis].T))
 		low = p<1/n
 		hi = p>(n-1)/n
 		for i in range(m): 
 			q[i][low[i]] = np.interp(p[i][low[i]],[0,1/n],f_low[i])  
 			q[i][hi[i]] = np.interp(p[i][hi[i]],[(n - 1)/n,1],f_hi[i])  
 			qseq = np.sort(q[i])
 			q[i][self.ordxx[i]] = qseq
 		if fiv!=None:
 			q = self._expandSD(q,fiv)
 		if adjust_sd==True:
 			q = self._adjust(q)
 		return q
 	def bootstrap_clt(self,nt,fiv=5,adjust_sd=True):
 		'''
 		Multiple ME boostrap copies.
 		Force the central limit theorem. Warning it requires to compute all
 		 bootstrap copies at once, so it could require a lot of memory.
 		 nt: number of bootstrap copies
 		 fiv: Increment standard deviation (default fiv=5 %)
 		 adjust_sd: Fix the standard deviation from the observation. 
 		'''
 		bt = np.array([self.bootstrap(fiv=None) for i in range(nt)])
 		if fiv!=None:
 			bt = self._expandSD(bt,fiv)
 		bt = np.swapaxes(bt,0,1)
 		N,nt,T = bt.shape
 		gm = self.meanx
 		s = self.sdx
 		smean = s/ np.sqrt(nt)
 		xbar = bt.mean(axis=2)
 		sortxbar = np.sort(xbar,axis=1)
 		oo = np.argsort(xbar,axis=1)
 		newbar = gm[np.newaxis].T + st.norm.ppf((np.arange(1,nt+1)/(nt+1))[np.newaxis])* smean[np.newaxis].T
 		scn = st.zscore(newbar,axis=1)
 		newm = scn*smean[np.newaxis].T+gm[np.newaxis].T
 		meanfix = newm- sortxbar
 		oinv = np.array([np.array(sorted(zip(oo[i],range(len(oo[i])))))[:,1] for i in range(len(oo))])
 		out = np.array([(bt[i][oo[i]]+meanfix[i][np.newaxis].T)[oinv[i]] for i in range(bt.shape[0])])
 		out = np.swapaxes(out,0,1)
 		if adjust_sd==True:
 			out = self._adjust(out)
 		return out
--- a/Code_Python/meBoot_2.py
+++ b/Code_Python/meBoot_2.py
@ -1,102 +0,0 @@
 """
    MEBOOT.PY - Python package for the meboot (Maximum Entropy Bootstrap) algorithm for Time Series
    Author: Fabian Brix
    Method by H.D. Vinod, Fordham University - 
 """
 import sys
 import numpy as np
 import matplotlib.pyplot as plt
 def sort(series):
    ind_sorted = np.argsort(series)
    s_sorted = series[ind_sorted]
    return s_sorted, ind_sorted 
 def get_trm_mean(series, percent):
    # FIXED
    dev = np.abs(series[1:]-series[:-1])
    n = len(dev)
    k = n*(percent/100.0)/2.0
    k = round(k,0)
    # return np.mean(dev[k:n-k])
    return 15.0
 def get_intermed_pts(series, s_sorted, percent):
    zt = (s_sorted[:-1]+s_sorted[1:])/2.0
    m_trm = get_trm_mean(series, percent)
    print(m_trm)
    z0 = s_sorted[0]-m_trm
    zT = s_sorted[-1]+m_trm
    z = np.hstack((z0,zt,zT))
    return z 
 def get_intervals(z):
    return np.vstack((z[:-1], z[1:])).T
 def get_me_density(intervals):
    return 1.0/(intervals[:,1]-intervals[:,0])
 def get_cpf(me_density, intervals):
    cpf = np.array([sum(me_density[:i+1]) for i in range(me_density.shape[0]-1)])
    print(cpf)
    return cpf/np.max(cpf)
 def get_quantiles(cpf, intervals, series):
    quantiles = []
    T = float(len(series))
    t = np.arange(T+1)
    Rt = np.vstack((t[:-1]/T,t[1:]/T)).T
    # print(Rt)
    aaa = np.array([0.12, 0.83, 0.53, 0.59, 0.11])
    for d in range(series.shape[0]):
        # u = np.random.uniform(0,1)
        u = aaa[d]
        # print(d, u)
        # u = aaa[d]
        for i in range(cpf.shape[0]):
            cp = cpf[i]
            if u <= cp:
                cpm = cpf[i-1]
                if i == 0: 
                    cpm = 0
                m = (cp-cpm)/1.0*(intervals[i,1]-intervals[i,0])
                xp = (u - cpm)*1.0/m+intervals[i,0]
                quantiles.append(xp)
                break
    return np.array(quantiles)
 def meboot(series, replicates):
    # ASC by default
    print(series)
    np.random.seed(0)
    s_sorted, ind_sorted = sort(series)
    z = get_intermed_pts(series, s_sorted, 10)
    print('z ', z)
    intervals = get_intervals(z)
    print('intervals ', intervals)
    me_density = get_me_density(intervals)
    print('uni dens ', me_density)
    cpf = get_cpf(me_density, intervals)
    print('cpf ', cpf)
    quantiles = get_quantiles(cpf, intervals, series)
    print('quantiles ', quantiles)
    quantiles = np.sort(quantiles)
    replicate = quantiles[ind_sorted]
    print('replicate ', replicate)
    # TODO: Undertand and add repeat mechanism
    # plt.plot(series, color='r')
    # plt.plot(replicate, color='b')
    # plt.ylim(0,30)
    # plt.show()
 series = np.array([4,12,36,20,8])
 meboot(series, 1)
--- a/Code_Python/synthetic.py
+++ b/Code_Python/synthetic.py
@ -198,37 +198,39 @@ def synthetic_FFT(X, multiv=False):
    return X_synt
-def synthetic_sampling(X, replace=True):
+def synthetic_sampling(X, n_reps=1, replace=True):
    """
-    generate more than n_samples, remome multi-time series
+    X must be (n_samples, 1)
    """
-    n_samples, n_series = X.shape
+    n_samples = X.shape[0]
    X_synt = np.zeros_like(X)
    # Sampling with replacement
    if (replace):
-        idx = np.random.randint(0, n_samples, size=(n_samples, n_series))
+        idx = np.random.randint(0, n_samples, size=(n_samples, n_reps))
-        i = np.arange(n_series)
+        i = np.arange(n_reps)
-        X_synt[:, i] = X[idx[:, i], i]
+        Xe = np.tile(X,(1, n_reps))
        X_synt = Xe[idx, i]
    # Sampling without replacement
    else:
-        idx = np.zeros_like(X)
+        idx = np.zeros(n_samples, n_reps)
-        for j in range(n_series):
+        for j in range(n_reps):
            idx[:, j] = np.random.permutation(n_samples)
-        i = np.arange(n_series)
+        i = np.arange(n_reps)
-        X_synt[:, i] = X[idx[:, i], i]
+        Xe = np.tile(X,(1, n_reps))
        X_synt = Xe[idx, i]
    return X_synt
-def synthetic_MEboot(X, alpha=0.1):
+def synthetic_MEboot(X, alpha=0.1, bounds=True, scale=False):
    """
    """
    n_samples, n_series = X.shape
    X_synt = np.zeros_like(X)
-    # Loop over time-series
+    # Loop over time-series iterate over number of desired series or set parallel
    # if possile
    n = n_samples
    for ts in range(n_series):
@ -256,32 +258,40 @@ def synthetic_MEboot(X, alpha=0.1):
        mt[n-1] = 0.25 * Y[n-2] + 0.75 * Y[n-1]
        # Randomly generate new points
-        t_w = np.random.rand(n)
+        # t_w = np.random.rand(n)
-        # t_w = np.array([0.12, 0.83, 0.53, 0.59, 0.11])
+        t_w = np.array([0.12, 0.83, 0.53, 0.59, 0.11])
-
+        t_w = np.sort(t_w)
 # order here???? and remove correction inside intervals???
        # Interpolate new points
        t = np.linspace(0.0, 1.0, num=n+1)
        w_int = np.interp(t_w, t, Z)
        print('w_int=', w_int)
-        # Correct the new points to satisfy mass constraint
+        # Correct the new points (first and last interval) to satisfy mass constraint
        idw = (np.floor_divide(t_w, 1.0 / n)).astype(int)
-        print('idw=', idw)
+        corr = np.where(idw == 0, mt[idw] - (Z[idw] + Z[idw+1]) / 2.0, 0.0)
-        w_corr = w_int + mt[idw] - (Z[idw] + Z[idw+1]) / 2.0
+        w_corr = w_int + corr
-        print('w_corr', w_corr)
+        if (n > 1):
            corr = np.where(idw == n-1, mt[idw] - (Z[idw] + Z[idw+1]) / 2.0, 0.0)
            w_corr += corr
        # Enforce limits
-        # w_corr = np.fmin(np.fmax(w_corr, Z[0]), Z[n])
+        if (bounds):
            w_corr = np.fmin(np.fmax(w_corr, Z[0]), Z[n])
-        # Re-order sampled values
+        # Recovery the time-dependencies (done all togheter?)
-        w_ord = np.sort(w_corr)
+        X_synt = np.zeros(n)
-        # print(w_ord)
+        X_synt[idx] = w_corr
-        # w_ord = np.array([5.85,  6.7,  8.9,  10.7,  23.95])
+
        # Scale (done all together?)
        if (scale):
            var_Z = np.diff(Z) ** 2.0 / 12.0
            X_mean = X.mean(axis=0)
            var_ME = (((mt - X_mean) ** 2).sum() + var_Z.sum()) / n
            std_X= X.std(axis=0, ddof=1)
            std_ME = np.sqrt(var_ME)
            k_scale = std_X / std_ME - 1.0
            X_synt = X_synt + k_scale * (X_synt - X_mean)
        return X_synt
        # Recovery the time-dependencies
        W = np.zeros(n)
        W[idx] = w_ord
        return W
--- a/Code_Python/test.py
+++ b/Code_Python/test.py
@ -4,10 +4,10 @@ Filters for time series.
 Copyright (c) 2020 Gabriele Gilardi
 ToDo:
 - add comments to the code
 - in comments write what filters do
 - is necessary to copy X for Y untouched?
 - decide default values in functions
 - check conditions on P and N
 - why lag plot gives errors
 - fix plotting function
 - example for alpha-beta-gamma using variable sigma as in financial time series
@ -96,6 +96,19 @@ np.random.seed(1294404794)
 # bb[:, i] = aa[idx[:, i], i]
 # bb = syn.synthetic_boot(aa, replace=False)
 # print(bb)
-aa = np.array([4, 12, 36, 20, 8]).reshape(5, 1)
+# aa = np.array([4, 12, 36, 20, 8]).reshape(5, 1)
-W = syn.synthetic_MEboot(aa, alpha=0.1)
+# W = syn.synthetic_MEboot(aa, alpha=0.1, bounds=False, scale=True)
 # print(bb.sum())
 # print('W=', W)
 n = 8
 aa = np.arange(n).reshape(n,1) * 1.1
 print(aa)
 idx = np.random.randint(0, n, size=(n, 3))
 print(idx)
 i = np.arange(3)
 # print(i)
 bb = np.tile(aa,(1, 3))
 # print(bb)
 cc = bb[idx, i]
 print(cc)