From afad3296ed7e4ccbd56b66717997de98a7cd388f Mon Sep 17 00:00:00 2001
From: Gabriele Gilardi <gilardi.gabriele@gmail.com>
Date: Wed, 24 Jun 2020 22:17:49 +0900
Subject: [PATCH] finish function MEboot

---
 Code_Python/main_ME.py   | 207 ---------------------------------------
 Code_Python/meBoot_2.py  | 102 -------------------
 Code_Python/synthetic.py |  72 ++++++++------
 Code_Python/test.py      |  19 +++-
 4 files changed, 57 insertions(+), 343 deletions(-)
 delete mode 100644 Code_Python/main_ME.py
 delete mode 100644 Code_Python/meBoot_2.py

diff --git a/Code_Python/main_ME.py b/Code_Python/main_ME.py
deleted file mode 100644
index 21b51f3..0000000
--- a/Code_Python/main_ME.py
+++ /dev/null
@@ -1,207 +0,0 @@
-import numpy as np
-import scipy.stats as st
-
-class MEBOOT:
-	def __init__(self,x,trimval=0.1,seed=None):
-		'''
-		x: multivariate-time series N x T
-		trimval: trim value (default 0.1)
-		'''
-
-		self.sd = np.random.RandomState(seed)
-		m,n = x.shape
-
-		self.meanx = x.mean(axis=1)
-
-		self.sdx =  x.std(axis=1)
-
-
-		self.ordxx = np.argsort(x,axis=1)
-
-		xx = x.ravel()[self.ordxx.ravel()].reshape(x.shape)
-
-		self.z = 0.5*(xx[:,1:]+xx[:,:-1])
-
-		dv = abs(np.diff(x,axis=1))
-
-		dvtrim = st.trim_mean(dv,trimval,axis=1)
-
-		self.xmin = xx[:, 0]- dvtrim
-		self.xmax = xx[:,-1]+ dvtrim
-
-		tmp = np.array([[0.25]*(n-2)+[0.5]*(n-2)+[0.25]*(n-2)])
-		cmd = (np.column_stack((xx[:,:n-2],xx[:,1:n-1],xx[:,2:n])) * tmp)
-
-		aux = np.array([cmd[:,i::n-2].sum(axis=1) for i in range(n-2)]).T
-
-		self.desintxb = np.column_stack((0.75 * xx[:,:1] + 0.25 * xx[:,1:2], aux,   0.25 * xx[:,-2:-1] + 0.75 * xx[:,-1:]))
-
-
-	def _mrapproxpy(self,p,z,desintxb):
-
-		m,n = p.shape
-
-		q = -np.inf*np.ones((n)*m)
-
-		a = (p//(1/n)-1).astype(int)
-
-		hs = np.arange(n-2)
-
-		dz = np.column_stack(([-np.inf]*m,np.diff(z,axis=1)*n,[0]*m)).ravel()
-
-		sz = np.column_stack(([0]*m,(0.5*(z[:,hs+1]+z[:,hs]))[:,hs],[0]*m)).ravel()
-
-		zt = np.column_stack(([-np.inf]*m,z[:,hs],[-np.inf]*m)).ravel()
-
-		dh = np.column_stack(([-np.inf]*m,desintxb[:,hs],[0]*m)).ravel()
-
-		plus = (n*np.arange(m))[np.newaxis].T
-		jx = (np.tile(range(n),(m,1))+plus).ravel()
-
-		ixo = a+1
-		ix = (ixo+plus).ravel()
-
-		tmp = zt[ix]+dh[ix]- sz[ix]
-
-		q[jx] = dz[ix]*(p.ravel()[jx]-(ixo.ravel())/n)+tmp
-
-		return q.reshape((m,n))
-
-
-	def _expandSD(self,bt,fiv):
-
-		obt = len(bt.shape)
-		
-		if obt==2:
-			bt = bt[np.newaxis]
-		
-			   
-		sd = self.sdx
-
-		bt = np.swapaxes(bt,0,1)
-
-		sdf = np.column_stack((sd,bt.std(axis=2)))
-
-		sdfa = sdf/sdf[:,:1]
-
-		sdfd = sdf[:,:1]/sdf
-
-		mx = 1+(fiv/100)
-
-		idx = np.where(sdfa<1)
-
-		sdfa[idx] = np.random.uniform(1,mx,size=len(idx[0]))
-
-		sdfdXsdfa = sdfd[:,1:]*sdfa[:,1:]
-
-		bt *= np.moveaxis(sdfdXsdfa[np.newaxis],0,-1)
-		bt = np.swapaxes(bt,0,1)
-		
-		if obt==2:
-			bt = bt[0]
-			
-		return bt
-
-	def _adjust(self,bt):
-		zz = np.column_stack((self.xmin[np.newaxis].T,self.z,self.xmax[np.newaxis].T))
-
-		v = np.diff(zz**2,axis=1)/12
-
-		xb = self.meanx[np.newaxis].T
-
-		s1 = ((self.desintxb - xb)**2).sum(axis=1)
-
-		act_sd = np.sqrt( (s1+v.sum(axis=1))/(self.z.shape[1]+1) ) 
-
-		des_sd = self.sdx
-
-		kappa =( des_sd/ act_sd -1)[np.newaxis].T
-
-		bt = bt + kappa* (bt - xb)
-		return bt
-
-	def bootstrap(self,fiv=5,adjust_sd=True):
-		'''
-		Single realization of ME Bootstrap for the multivariate time series.
-
-		fiv: Increment standard deviation (default fiv=5 %)
-		adjust_sd: Fix the standard deviation from the observation. 
-		'''
-
-		m,n = self.z.shape
-		n+=1
-
-		p = self.sd.uniform(0,1,size=(m,n))
-
-		q = self._mrapproxpy(p,self.z,self.desintxb[:,1:])
-
-
-		f_low =  np.column_stack((self.xmin[np.newaxis].T,self.z[:,0]))
-		f_hi =  np.column_stack((self.z[:,-1],self.xmax[np.newaxis].T))
-
-		low = p<1/n
-		hi = p>(n-1)/n
-
-
-		for i in range(m): 
-			q[i][low[i]] = np.interp(p[i][low[i]],[0,1/n],f_low[i])  
-			q[i][hi[i]] = np.interp(p[i][hi[i]],[(n - 1)/n,1],f_hi[i])  
-
-			qseq = np.sort(q[i])
-			q[i][self.ordxx[i]] = qseq
-			
-		if fiv!=None:
-			q = self._expandSD(q,fiv)
-		if adjust_sd==True:
-			q = self._adjust(q)
-		return q
-
-	def bootstrap_clt(self,nt,fiv=5,adjust_sd=True):
-		'''
-		Multiple ME boostrap copies.
-		Force the central limit theorem. Warning it requires to compute all
-		 bootstrap copies at once, so it could require a lot of memory.
-		 
-		 nt: number of bootstrap copies
-		 fiv: Increment standard deviation (default fiv=5 %)
-		 adjust_sd: Fix the standard deviation from the observation. 
-		'''
-
-
-		bt = np.array([self.bootstrap(fiv=None) for i in range(nt)])
-		if fiv!=None:
-			
-			bt = self._expandSD(bt,fiv)
-
-		bt = np.swapaxes(bt,0,1)
-
-		N,nt,T = bt.shape
-
-		gm = self.meanx
-		s = self.sdx
-
-		smean = s/ np.sqrt(nt)
-
-		xbar = bt.mean(axis=2)
-
-		sortxbar = np.sort(xbar,axis=1)
-
-		oo = np.argsort(xbar,axis=1)
-
-		newbar = gm[np.newaxis].T + st.norm.ppf((np.arange(1,nt+1)/(nt+1))[np.newaxis])* smean[np.newaxis].T
-
-		scn = st.zscore(newbar,axis=1)
-
-		newm = scn*smean[np.newaxis].T+gm[np.newaxis].T
-
-		meanfix = newm- sortxbar
-
-		oinv = np.array([np.array(sorted(zip(oo[i],range(len(oo[i])))))[:,1] for i in range(len(oo))])
-
-		out = np.array([(bt[i][oo[i]]+meanfix[i][np.newaxis].T)[oinv[i]] for i in range(bt.shape[0])])
-		out = np.swapaxes(out,0,1)
-
-		if adjust_sd==True:
-			out = self._adjust(out)
-
-		return out
diff --git a/Code_Python/meBoot_2.py b/Code_Python/meBoot_2.py
deleted file mode 100644
index 7a089f1..0000000
--- a/Code_Python/meBoot_2.py
+++ /dev/null
@@ -1,102 +0,0 @@
-"""
-    MEBOOT.PY - Python package for the meboot (Maximum Entropy Bootstrap) algorithm for Time Series
-    Author: Fabian Brix
-    Method by H.D. Vinod, Fordham University - 
-"""
-import sys
-import numpy as np
-import matplotlib.pyplot as plt
-
-def sort(series):
-    ind_sorted = np.argsort(series)
-    s_sorted = series[ind_sorted]
-    return s_sorted, ind_sorted 
-
-def get_trm_mean(series, percent):
-    # FIXED
-    dev = np.abs(series[1:]-series[:-1])
-    n = len(dev)
-    k = n*(percent/100.0)/2.0
-    k = round(k,0)
-    # return np.mean(dev[k:n-k])
-    return 15.0
-
-def get_intermed_pts(series, s_sorted, percent):
-    zt = (s_sorted[:-1]+s_sorted[1:])/2.0
-    m_trm = get_trm_mean(series, percent)
-    print(m_trm)
-    z0 = s_sorted[0]-m_trm
-    zT = s_sorted[-1]+m_trm
-    z = np.hstack((z0,zt,zT))
-    return z 
-
-def get_intervals(z):
-    return np.vstack((z[:-1], z[1:])).T
-
-def get_me_density(intervals):
-    return 1.0/(intervals[:,1]-intervals[:,0])
-
-def get_cpf(me_density, intervals):
-    cpf = np.array([sum(me_density[:i+1]) for i in range(me_density.shape[0]-1)])
-    print(cpf)
-    return cpf/np.max(cpf)
-
-def get_quantiles(cpf, intervals, series):
-    quantiles = []
-    T = float(len(series))
-    t = np.arange(T+1)
-    Rt = np.vstack((t[:-1]/T,t[1:]/T)).T
-    # print(Rt)
-    aaa = np.array([0.12, 0.83, 0.53, 0.59, 0.11])
-    for d in range(series.shape[0]):
-        # u = np.random.uniform(0,1)
-        u = aaa[d]
-        # print(d, u)
-        # u = aaa[d]
-        for i in range(cpf.shape[0]):
-            cp = cpf[i]
-            if u <= cp:
-                cpm = cpf[i-1]
-                if i == 0: 
-                    cpm = 0
-                m = (cp-cpm)/1.0*(intervals[i,1]-intervals[i,0])
-                xp = (u - cpm)*1.0/m+intervals[i,0]
-                quantiles.append(xp)
-                break
-    return np.array(quantiles)
-
-def meboot(series, replicates):
-    # ASC by default
-    print(series)
-    np.random.seed(0)
-
-    s_sorted, ind_sorted = sort(series)
-
-    z = get_intermed_pts(series, s_sorted, 10)
-    print('z ', z)
-
-    intervals = get_intervals(z)
-    print('intervals ', intervals)
-
-    me_density = get_me_density(intervals)
-    print('uni dens ', me_density)
-
-    cpf = get_cpf(me_density, intervals)
-    print('cpf ', cpf)
-
-    quantiles = get_quantiles(cpf, intervals, series)
-    print('quantiles ', quantiles)
-
-    quantiles = np.sort(quantiles)
-    
-    replicate = quantiles[ind_sorted]
-    print('replicate ', replicate)
-
-    # TODO: Undertand and add repeat mechanism
-    # plt.plot(series, color='r')
-    # plt.plot(replicate, color='b')
-    # plt.ylim(0,30)
-    # plt.show()
-
-series = np.array([4,12,36,20,8])
-meboot(series, 1)
diff --git a/Code_Python/synthetic.py b/Code_Python/synthetic.py
index f7695b7..55c7b2c 100644
--- a/Code_Python/synthetic.py
+++ b/Code_Python/synthetic.py
@@ -198,37 +198,39 @@ def synthetic_FFT(X, multiv=False):
     return X_synt
 
 
-def synthetic_sampling(X, replace=True):
+def synthetic_sampling(X, n_reps=1, replace=True):
     """
-    generate more than n_samples, remome multi-time series
+    X must be (n_samples, 1)
     """
-    n_samples, n_series = X.shape
-    X_synt = np.zeros_like(X)
+    n_samples = X.shape[0]
 
     # Sampling with replacement
     if (replace):
-        idx = np.random.randint(0, n_samples, size=(n_samples, n_series))
-        i = np.arange(n_series)
-        X_synt[:, i] = X[idx[:, i], i]
+        idx = np.random.randint(0, n_samples, size=(n_samples, n_reps))
+        i = np.arange(n_reps)
+        Xe = np.tile(X,(1, n_reps))
+        X_synt = Xe[idx, i]
 
     # Sampling without replacement
     else:
-        idx = np.zeros_like(X)
-        for j in range(n_series):
+        idx = np.zeros(n_samples, n_reps)
+        for j in range(n_reps):
             idx[:, j] = np.random.permutation(n_samples)
-        i = np.arange(n_series)
-        X_synt[:, i] = X[idx[:, i], i]
+        i = np.arange(n_reps)
+        Xe = np.tile(X,(1, n_reps))
+        X_synt = Xe[idx, i]
 
     return X_synt
 
 
-def synthetic_MEboot(X, alpha=0.1):
+def synthetic_MEboot(X, alpha=0.1, bounds=True, scale=False):
     """
     """
     n_samples, n_series = X.shape
     X_synt = np.zeros_like(X)
 
-    # Loop over time-series
+    # Loop over time-series iterate over number of desired series or set parallel
+    # if possile
     n = n_samples
     for ts in range(n_series):
         
@@ -256,32 +258,40 @@ def synthetic_MEboot(X, alpha=0.1):
         mt[n-1] = 0.25 * Y[n-2] + 0.75 * Y[n-1]
 
         # Randomly generate new points
-        t_w = np.random.rand(n)
-        # t_w = np.array([0.12, 0.83, 0.53, 0.59, 0.11])
-
-# order here???? and remove correction inside intervals???
+        # t_w = np.random.rand(n)
+        t_w = np.array([0.12, 0.83, 0.53, 0.59, 0.11])
+        t_w = np.sort(t_w)
 
         # Interpolate new points
         t = np.linspace(0.0, 1.0, num=n+1)
         w_int = np.interp(t_w, t, Z)
-        print('w_int=', w_int)
 
-        # Correct the new points to satisfy mass constraint
+        # Correct the new points (first and last interval) to satisfy mass constraint
         idw = (np.floor_divide(t_w, 1.0 / n)).astype(int)
-        print('idw=', idw)
-        w_corr = w_int + mt[idw] - (Z[idw] + Z[idw+1]) / 2.0
-        print('w_corr', w_corr)
+        corr = np.where(idw == 0, mt[idw] - (Z[idw] + Z[idw+1]) / 2.0, 0.0)
+        w_corr = w_int + corr
+        if (n > 1):
+            corr = np.where(idw == n-1, mt[idw] - (Z[idw] + Z[idw+1]) / 2.0, 0.0)
+            w_corr += corr
 
         # Enforce limits
-        # w_corr = np.fmin(np.fmax(w_corr, Z[0]), Z[n])
+        if (bounds):
+            w_corr = np.fmin(np.fmax(w_corr, Z[0]), Z[n])
 
-        # Re-order sampled values
-        w_ord = np.sort(w_corr)
-        # print(w_ord)
-        # w_ord = np.array([5.85,  6.7,  8.9,  10.7,  23.95])
+        # Recovery the time-dependencies (done all togheter?)
+        X_synt = np.zeros(n)
+        X_synt[idx] = w_corr
+
+        # Scale (done all together?)
+        if (scale):
+            var_Z = np.diff(Z) ** 2.0 / 12.0
+            X_mean = X.mean(axis=0)
+            var_ME = (((mt - X_mean) ** 2).sum() + var_Z.sum()) / n
+            std_X= X.std(axis=0, ddof=1)
+            std_ME = np.sqrt(var_ME)
+            k_scale = std_X / std_ME - 1.0
+            X_synt = X_synt + k_scale * (X_synt - X_mean)
+
+        return X_synt
 
-        # Recovery the time-dependencies
-        W = np.zeros(n)
-        W[idx] = w_ord
 
-        return W
diff --git a/Code_Python/test.py b/Code_Python/test.py
index eccca84..cf800d4 100644
--- a/Code_Python/test.py
+++ b/Code_Python/test.py
@@ -4,10 +4,10 @@ Filters for time series.
 Copyright (c) 2020 Gabriele Gilardi
 
 ToDo:
+- add comments to the code
 - in comments write what filters do
 - is necessary to copy X for Y untouched?
 - decide default values in functions
-- check conditions on P and N
 - why lag plot gives errors
 - fix plotting function
 - example for alpha-beta-gamma using variable sigma as in financial time series
@@ -96,6 +96,19 @@ np.random.seed(1294404794)
 # bb[:, i] = aa[idx[:, i], i]
 # bb = syn.synthetic_boot(aa, replace=False)
 # print(bb)
-aa = np.array([4, 12, 36, 20, 8]).reshape(5, 1)
-W = syn.synthetic_MEboot(aa, alpha=0.1)
+# aa = np.array([4, 12, 36, 20, 8]).reshape(5, 1)
+# W = syn.synthetic_MEboot(aa, alpha=0.1, bounds=False, scale=True)
 # print(bb.sum())
+# print('W=', W)
+
+n = 8
+aa = np.arange(n).reshape(n,1) * 1.1
+print(aa)
+idx = np.random.randint(0, n, size=(n, 3))
+print(idx)
+i = np.arange(3)
+# print(i)
+bb = np.tile(aa,(1, 3))
+# print(bb)
+cc = bb[idx, i]
+print(cc)
\ No newline at end of file