start meboot function

2020-06-22 21:39:53 +09:00 · 2020-06-22 21:39:53 +09:00 · 9763bbb652
commit 9763bbb652
--- a/Code_Python/meboot.r
+++ b/Code_Python/meboot.r
@ -0,0 +1,246 @@
+
+#meboot <- function(x, reps=999, trim=0.10, reachbnd=TRUE,
+#  expand.sd=TRUE, force.clt=TRUE, elaps=FALSE,
+#  colsubj, coldata, coltimes, ...){
+#  UseMethod("meboot", x)
+#}
+
+meboot <- function(x, reps=999, trim=list(trim=0.10, xmin=NULL, xmax=NULL), reachbnd=TRUE,
+  expand.sd=TRUE, force.clt=TRUE, 
+  scl.adjustment = FALSE, sym = FALSE, elaps=FALSE, 
+  colsubj, coldata, coltimes,...)
+{
+  if ("pdata.frame" %in% class(x))
+  {
+     res <- meboot.pdata.frame (x, reps, trim$trim, reachbnd,
+      expand.sd, force.clt, scl.adjustment, sym, elaps,
+      colsubj, coldata, coltimes, ...)
+     return(res)
+  }
+
+  if (reps == 1 && isTRUE(force.clt))
+  {
+    force.clt <- FALSE
+    warning("force.clt was set to FALSE since the ensemble contains only one replicate.")
+  }
+
+  if (!is.list(trim)) {
+    trimval <- trim
+  } else {
+    trimval <- if (is.null(trim$trim)) 0.1 else trim$trim
+  }
+
+  ptm1 <- proc.time()
+
+  n <- length(x)
+
+  # Sort the original data in increasing order and
+  # store the ordering index vector.
+
+  xx <- sort(x)
+  ordxx <- order(x)
+  #ordxx <- sort.int(x, index.return=TRUE)
+
+  # symmetry
+
+  if (sym)
+  {
+    xxr <- rev(xx) #reordered values
+    xx.sym <- mean(xx) + 0.5*(xx - xxr) #symmetrized order stats
+    xx <- xx.sym #replace order stats by symmetrized ones
+  }
+
+  # Compute intermediate points on the sorted series.
+
+  z <- rowMeans(embed(xx, 2))
+
+  # Compute lower limit for left tail ('xmin') and
+  # upper limit for right tail ('xmax').
+  # This is done by computing the 'trim' (e.g. 10%) trimmed mean
+  # of deviations among all consecutive observations ('dv').
+  # Thus the tails are uniform distributed.
+
+  dv <- abs(diff(as.numeric(x)))
+  dvtrim <- mean(dv, trim=trimval)
+
+  if (is.list(trim))
+  {
+    if (is.null(trim$xmin))
+    {
+      xmin <- xx[1] - dvtrim
+    } else
+      xmin <- trim$xmin
+
+    if (is.null(trim$xmax))
+    {
+      xmax <- xx[n] + dvtrim
+    } else
+      xmax <- trim$xmax
+
+    if (!is.null(trim$xmin) || !is.null(trim$xmax))
+    {
+      if (isTRUE(force.clt))
+      {
+        expand.sd <- FALSE
+        force.clt <- FALSE
+        warning("expand.sd and force.clt were set to FALSE in order to ",
+          "enforce the limits xmin/xmax.")
+      }
+    }
+  } else {
+    xmin <- xx[1] - dvtrim
+    xmax <- xx[n] + dvtrim
+  }
+
+  # do this here so that this warnings are printed after
+  # the above warnings (if necessary)
+
+  if (is.list(trim))
+  {
+    if (!is.null(trim$xmin) && trim$xmin > min(x))
+      warning("the lower limit trim$xmin may not be satisfied in the replicates ",
+       "since it is higher than the minimum value observed ",
+       "in the input series x")
+    if (!is.null(trim$xmax) && trim$xmax < max(x))
+      warning("the upper limit trim$xmax may not be satisfied in the replicates ",
+       "since it is lower than the maximum value observed ",
+       "in the input series x")
+  }
+
+  # Compute the mean of the maximum entropy density within each
+  # interval in such a way that the 'mean preserving constraint'
+  # is satisfied. (Denoted as m_t in the reference paper.)
+  # The first and last interval means have distinct formulas.
+  # See Theil and Laitinen (1980) for details.
+
+  aux <- colSums( t(embed(xx, 3))*c(0.25,0.5,0.25) )
+  desintxb <- c(0.75*xx[1]+0.25*xx[2], aux, 0.25*xx[n-1]+0.75*xx[n])
+
+  # Generate random numbers from the [0,1] uniform interval and
+  # compute sample quantiles at those points.
+
+  # Generate random numbers from the [0,1] uniform interval.
+
+  ensemble <- matrix(x, nrow=n, ncol=reps)
+  ensemble <- apply(ensemble, 2, meboot.part,
+                n, z, xmin, xmax, desintxb, reachbnd)
+
+  # So far the object 'ensemble' contains the quantiles.
+  # Now give them time series dependence and heterogeneity.
+
+  qseq <- apply(ensemble, 2, sort)
+
+  # 'qseq' has monotonic series, the correct series is obtained
+  # after applying the order according to 'ordxx' defined above.
+
+  ensemble[ordxx,] <- qseq
+  #ensemble[ordxx$ix,] <- qseq
+
+  if(expand.sd)
+    ensemble <- expand.sd(x=x, ensemble=ensemble, ...)
+  if(force.clt)
+    ensemble <- force.clt(x=x, ensemble=ensemble)
+
+  # scale adjustment
+
+  if (scl.adjustment)
+  {
+    zz <- c(xmin,z,xmax) #extended list of z values
+    #v <- rep(NA, n) #storing within variances
+    #for (i in 2:(n+1)) {
+    #  v[i-1] <- ((zz[i] - zz[i-1])^2) / 12
+    #}
+    v <- diff(zz^2) / 12
+    xb <- mean(x)
+    s1 <- sum((desintxb - xb)^2)
+    uv <- (s1 + sum(v)) / n
+    desired.sd <- sd(x)
+    actualME.sd <- sqrt(uv)
+    if (actualME.sd <= 0) 
+      stop("actualME.sd<=0 Error")
+    out <- desired.sd / actualME.sd
+    kappa <- out - 1
+
+    ensemble <- ensemble + kappa * (ensemble - xb)
+  } else
+    kappa <- NULL
+
+  #ensemble <- cbind(x, ensemble)
+  if(is.ts(x)){
+    ensemble <- ts(ensemble, frequency=frequency(x), start=start(x))
+    dimnames(ensemble)[[2]] <- paste("Series", 1:reps)
+    #dimnames(ensemble)[[2]] <- c("original", paste("Series", 1:reps))
+  }
+
+  # Computation time
+  ptm2 <- proc.time(); elapsr <- elapsedtime(ptm1, ptm2)
+  if(elaps)
+    cat("\n  Elapsed time:", elapsr$elaps, 
+                             paste(elapsr$units, ".", sep=""), "\n")
+
+  list(x=x, ensemble=ensemble, xx=xx, z=z, dv=dv, dvtrim=dvtrim, xmin=xmin,
+       xmax=xmax, desintxb=desintxb, ordxx=ordxx, kappa = kappa, elaps=elapsr)
+}
+
+meboot.part <- function(x, n, z, xmin, xmax, desintxb, reachbnd)
+{
+  # Generate random numbers from the [0,1] uniform interval
+
+  p <- runif(n, min=0, max=1)
+
+  # Compute sample quantiles by linear interpolation at
+  # those 'p'-s (if any) ...
+
+  # ... 'p'-s within the (i1/n, (i1+1)/n)] interval (i1=1,...,n-2).
+
+  q <- .C("mrapprox", p=as.double(p), n=as.integer(n), z=as.double(z),
+      desintxb=as.double(desintxb[-1]), ref23=double(n), qq=double(1), 
+      q=double(n), PACKAGE="meboot")$q
+
+  # ... 'p'-s within the [0, (1/n)] interval. (Left tail.)
+
+  ref1 <- which(p <= (1/n))
+  if(length(ref1) > 0){
+    qq <- approx(c(0,1/n), c(xmin,z[1]), p[ref1])$y
+    q[ref1] <- qq
+    if(!reachbnd)  q[ref1] <- qq + desintxb[1]-0.5*(z[1]+xmin)
+  }
+
+  # ... 'p'-s equal to (n-1)/n.
+
+  ref4 <- which(p == ((n-1)/n))
+  if(length(ref4) > 0)
+    q[ref4] <- z[n-1]
+
+  # ... 'p'-s greater than (n-1)/n. (Right tail.)
+
+  ref5 <- which(p > ((n-1)/n))
+  if(length(ref5) > 0){
+    # Right tail proportion p[i]
+    qq <- approx(c((n-1)/n,1), c(z[n-1],xmax), p[ref5])$y
+    q[ref5] <- qq   # this implicitly shifts xmax for algorithm
+    if(!reachbnd)  q[ref5] <- qq + desintxb[n]-0.5*(z[n-1]+xmax)
+    # such that the algorithm gives xmax when p[i]=1
+    # this is the meaning of reaching the bounds xmax and xmin
+  }
+
+  q
+}
+
+elapsedtime <- function(ptm1, ptm2)
+{
+  elaps <- (ptm2 - ptm1)[3]
+
+  if(elaps < 60)
+         units <- "seconds"
+  else if(elaps < 3600){
+         elaps <- elaps/60
+         units <- "minutes" }
+  else if(elaps < 86400){
+         elaps <- elaps/3600
+         units <- "hours" }
+  else { elaps <- elaps/86400
+         units <- "days" }
+
+  list(elaps=as.numeric(elaps), units=units)
+}
--- a/Code_Python/mrapprox.c
+++ b/Code_Python/mrapprox.c
@ -0,0 +1,40 @@
+
+// q <- .C("mrapprox", p=as.double(p), n=as.integer(n), z=as.double(z),
+//      desintxb=as.double(desintxb[-1]), ref23=double(n), qq=double(1), 
+//      q=double(n), PACKAGE="meboot")$q
+
+void mrapprox(double *p, int *n, double *z, double *desintxb, double *ref23,
+              double *qq, double *q)
+{
+    int i, j, ii1, k;
+    double i1, nn;
+
+    nn = *n;
+    for(i=0; i < *n; i=i+1){
+        q[i] = -99999;
+        ref23[i] = -99999;
+    }
+
+    j = 0;
+    i1 = 1.0;
+    for(ii1=0; ii1 < *n-2; ii1=ii1+1){
+
+        j = 0;
+        for(i=0; i < *n; i=i+1){
+
+            if( p[i] > i1/nn && p[i] <= (i1+1)/nn ){
+                ref23[j] = i;
+                j = j+1;
+            }
+
+        }
+
+        for(i=0; i < j; i=i+1){
+
+            k = ref23[i];
+            qq[0] = z[ii1] + ( (z[ii1+1]- z[ii1]) / ((i1+1)/nn - i1/nn) ) * (p[k] - i1/nn);
+            q[k] = qq[0] + desintxb[ii1] - 0.5*(z[ii1] + z[ii1+1]);
+        }
+        i1 = i1+1;
+    }
+}
--- a/Code_Python/synthetic.py
+++ b/Code_Python/synthetic.py
@ -61,97 +61,101 @@ def scale_data(X, param=()):
        return Xs, param


-def value2diff(X, mode=None):
+def value2diff(X, percent=True):
    """
    from value to difference in abs or %
-    diff in value first element is zero
-    diff in % first element is one
+    dx is reduced by 1 row
    """
-
-    # Difference in value
-    if (mode == 'V'):
-        dX = np.zeros_like(X)
-        dX[1:, :] = X[1:, :] - X[:-1, :]
-    
    # Difference in percent
+    if (percent):
+        dX = X[1:, :] / X[:-1, :] - 1.0
+    
+    # Difference in value
    else:
-        dX = np.ones_like(X)
-        dX[1:, :] = X[1:, :] / X[:-1, :] - 1.0
+        dX = X[1:, :] - X[:-1, :]
    
    return dX


-def diff2value(dX, mode=None):
+def diff2value(dX, percent=True):
    """
-    from difference in abs or % to value (first row should be all zeros but
-    will be over-written
+    from difference in abs or % to value
+    X is increased by one row

-    Reference X[0,:] is assumed to be zero. If X0[0,:] is the desired
-    reference, the actual vector X can be determined as X0+X
+    Value from percent: first row set to one. If X0 defines the starting
+    values, then X0*X would be the ???

-    Reference X[0,:] is assumed to be one. If X0[0,:] is the desired
-    reference, the actual vector X can be determined as X0*X
+    Value from difference: first row set to zero. If X0 defines the starting
+    values, then X0+X would be the ???
    """
-    # Value from the difference (first row equal to zero)
-    # X[0, :] = 0
-    # X[1, :] = X[0, :] + dX[1, :] = dX[1, :]
-    # X[2, :] = X[0, :] + dX[1, :] + dX[2, :] = dX[1, :] + dX[2, :]
-    # ....
-    if (mode == 'V'):
-        X = np.zeros_like(dX)
-        X[1:, :] = np.cumsum(dX[1:, :], axis=0)
-    
-    # Value from percent (first row equal to 1)
+    n_rows, n_cols = dX.shape
+    X = np.zeros((n_rows+1, n_cols))
+
+    # Value from percent
    # X[0, :] = 1
    # X[1, :] = X[0, :] * (1 + dX[1, :]) = (1 + dX[1, :])
    # X[2, :] = X[1, :] * (1 + dX[2, :])
    #         = X[0, :] * (1 + dX[1, :]) * (1 + dX[2, :])
    #         = (1 + dX[1, :]) * (1 + dX[2, :])
    # ....
-    else:
-        X = np.ones_like(dX)
+    if (percent):
+        X[0, :] = 1.0
        X[1:, :] = np.cumprod((1.0 + dX), axis=0)
    
+    # Value from difference
+    # X[0, :] = 0
+    # X[1, :] = X[0, :] + dX[1, :] = dX[1, :]
+    # X[2, :] = X[0, :] + dX[1, :] + dX[2, :] = dX[1, :] + dX[2, :]
+    # ....
+    else:
+        # First row already set to zero
+        X[1:, :] = np.cumsum(dX, axis=0)
+    
    return X


-def synthetic_wave(per, amp=None, pha=None, num=1000):
+def synthetic_wave(P, A=None, phi=None, num=1000):
    """
    Generates a multi-sinewave.
-    P = [ P1 P2 ... Pn ]      Periods
-    A = [ A1 A2 ... An ]      Amplitudes
-    PH = [PH1 PH2 ... PHn]    Phases (rad)
+    P = [P_1, P_2, ... P_n]                 Periods
+    A = [A_1, A_2, ... A_n]                 Amplitudes
+    phi = [phi_1, phi_2, ... phi_n]         Phases (rad)

    Default amplitudes are ones
    Default phases are zeros
    Time is from 0 to largest period (default 1000 steps)
    """
-    n_waves = len(per)
-    per = np.asarray(per)
+    n_waves = len(P)
+    P = np.asarray(P)

-    # Define amplitudes and phases
-    if (amp is None):
-        amp = np.ones(n_waves)
+    # Define amplitudes
+    if (A is None):
+        A = np.ones(n_waves)
    else:
-        amp = np.asarray(amp)
-    if (pha is None):
-        pha = np.zeros(n_waves)
+        A = np.asarray(A)
+
+    # Define phases
+    if (phi is None):
+        phi = np.zeros(n_waves)
    else:
-        pha = np.asarray(pha)
+        phi = np.asarray(phi)

    # Add all the waves
-    t = np.linspace(0.0, np.amax(per), num=num)
+    t = np.linspace(0.0, np.amax(P), num=num)
    f = np.zeros(len(t))
    for i in range(n_waves):
-        f = f + amp[i] * np.sin(2.0 * np.pi * t / per[i] + pha[i])
+        f = f + A[i] * np.sin(2.0 * np.pi * t / P[i] + phi[i])

    return t, f


-def synthetic_series(X, multiv=False):
+def synthetic_FFT(X, multiv=False):
    """
+    - univariate and single time-series
+    - univariate and multi-time series (can be used to generate multi from same)
+    - multi-variate multi-time series
    """
-    n_samples, n_series = data.shape
+    n_samples, n_series = X.shape

    # The number of samples must be odd (if the number is even drop the last value)
    if ((n_samples % 2) == 0):
@ -188,3 +192,69 @@ def synthetic_series(X, multiv=False):
    X_synt = np.real(np.fft.ifft(synt_fft, axis=0))

    return X_synt
+
+
+def synthetic_sampling(X, replace=True):
+    """
+    generate more than n_samples?
+    """
+    n_samples, n_series = X.shape
+    X_synt = np.zeros_like(X)
+
+    # Sampling with replacement
+    if (replace):
+        idx = np.random.randint(0, n_samples, size=(n_samples, n_series))
+        i = np.arange(n_series)
+        X_synt[:, i] = X[idx[:, i], i]
+
+    # Sampling without replacement
+    else:
+        idx = np.zeros_like(X)
+        for j in range(n_series):
+            idx[:, j] = np.random.permutation(n_samples)
+        i = np.arange(n_series)
+        X_synt[:, i] = X[idx[:, i], i]
+
+    return X_synt
+
+
+def synthetic_MEboot(X, alpha=0.1):
+    """
+    """
+    n_samples, n_series = X.shape
+    X_synt = np.zeros_like(X)
+
+    # Loop over time-series
+    n = n_samples
+    for ts in range(n_series):
+        
+        # Sort the time series keeping track of the original position
+        idx = np.argsort(X[:, ts])
+        Y = X[idx, ts]
+        print(idx, idx.shape)
+        print(Y, Y.shape)
+
+        # Compute the trimmed mean
+        g = int(np.floor(n * alpha))
+        r = n * alpha - g
+        print(n, g, r)
+        m_trm = ((1.0 - r) * (Y[g] + Y[n-g-1]) + Y[g+1:n-g-1].sum()) \
+                 / (n * (1.0 - 2.0 * alpha))
+        print(m_trm)
+
+        # Compute the intermediate points
+        Z = np.zeros(n+1)
+        Z[1:-1] = (Y[0:-1] + Y[1:]) / 2.0
+        Z[0] = Y[0] - m_trm
+        Z[n] = Y[n-1] + m_trm
+        print(Z, Z.shape)
+
+        # Compute the interval means
+        mt = np.zeros(n)
+        mt[0] = 0.75 * Y[0] + 0.25 * Y[1]
+        mt[1:n-1] = 0.25 * Y[0:n-2] + 0.5 * Y[1:n-1] + 0.25 * Y[2:n]
+        mt[n-1] = 0.25 * Y[n-2] + 0.75 * Y[n-1]
+        print(mt)
+
+
+
--- a/Code_Python/test.py
+++ b/Code_Python/test.py
@ -15,8 +15,6 @@ ToDo:
 - example using noisy multi-sine-waves
 - synt: boot, paper Vinod (as a class?)
 - vectors must be ( .., 1)
- reduce the vector diff by one and pass initial value
-  (with zero/one as default)
 """

 import sys
@ -56,30 +54,48 @@ np.random.seed(1294404794)
 # signals = (spx.data[:, 0], Yc[:, 0], Yp[:, 0])
 # utl.plot_signals(signals, 0, 50)

-# t, f = utl.synthetic_wave([1., 2., 3.], A=None, PH=None, num=30)
+# t, f = syn.synthetic_wave([1., 2., 3.], A=None, phi=None, num=100)
 # plt.plot(t,f)
 # plt.show()
-aa = np.array([
-        [ 0.8252,  0.2820],
-        [ 1.3790,  0.0335],
-        [-1.0582, -1.3337],
-        [-0.4686,  1.1275],
-        [-0.2725,  0.3502],
-        [ 1.0984, -0.2991],
-        [-0.2779,  0.0229],
-        [ 0.7015, -0.2620],
-        [-2.0518, -1.7502],
-        [-0.3538, -0.2857],
-        [-0.8236, -0.8314],
-        [-1.5771, -0.9792],
-        [ 0.5080, -1.1564]])
-# synt_aa = utl.synthetic_series(data, False)
-# plt.plot(synt_aa)
+# aa = np.array([
+#         [ 0.8252,  0.2820],
+#         [ 1.3790,  0.0335],
+#         [-1.0582, -1.3337],
+#         [-0.4686,  1.1275],
+#         [-0.2725,  0.3502],
+#         [ 1.0984, -0.2991],
+#         [-0.2779,  0.0229],
+#         [ 0.7015, -0.2620],
+#         [-2.0518, -1.7502],
+#         [-0.3538, -0.2857],
+#         [-0.8236, -0.8314],
+#         [-1.5771, -0.9792],
+#         [ 0.5080, -1.1564]])
+# synt_data1 = syn.synthetic_FFT(data, False)
+# synt_data2 = syn.synthetic_FFT(data, False)
+# plt.plot(synt_data1)
+# plt.plot(synt_data2)
 # plt.plot(data)
+# names = ['syn1', 'syn2', 'spx']
+# plt.legend(names)
 # plt.show()
-print(data[0:10, :])
-bb = syn.value2diff(data, mode='V')
-print(bb[0:10, :])
-bb[0, 0] = 1399.48
-cc = syn.diff2value(bb, mode='V')
-print(cc[0:10, :])
+# percent = False
+# print(data[0:10, :])
+# bb = syn.value2diff(data, percent)
+# print(bb[0:10, :])
+# cc = syn.diff2value(bb, percent)
+# print(cc[0:10, :]+1399.48)
+# aa = np.arange(10,28).reshape(6,3)
+# print(aa)
+# idx = np.zeros_like(aa)
+# bb = np.zeros_like(aa)
+# for i in range(aa.shape[1]):
+#     idx[:, i] = np.random.permutation(aa.shape[0])
+# print(idx)
+# i = np.arange(aa.shape[1])
+# bb[:, i] = aa[idx[:, i], i]
+# bb = syn.synthetic_boot(aa, replace=False)
+# print(bb)
+aa = np.array([4, 12, 36, 20, 8]).reshape(5, 1)
+# print(aa)
+syn.synthetic_MEboot(aa)
--- a/README.md
+++ b/README.md
@ -8,6 +8,8 @@

 - D. Prichard, and J. Theiler, "[Generating surrogate data for time series with several simultaneously measured variables](https://journals.aps.org/prl/abstract/10.1103/PhysRevLett.73.951)."

+- H. Vinod, and J. Lopez-de-Lacalle, "[Maximum entropy bootstrap for time series: the meboot R package](https://www.jstatsoft.org/article/view/v029i05)."
+
 ## Characteristics

 ## Parameters