diff --git a/resampler/resample.c b/resampler/resample.c
index 3d157aa..5144434 100644
--- a/resampler/resample.c
+++ b/resampler/resample.c
@@ -57,16 +57,15 @@
    The latter both reduces CPU time and makes the algorithm more SIMD-friendly.
 */
 
-
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 
 #ifdef OUTSIDE_SPEEX
 #include <stdlib.h>
-static void *speex_alloc(int size) {return calloc(size,1);}
-static void *speex_realloc(void *ptr, int size) {return realloc(ptr, size);}
-static void speex_free(void *ptr) {free(ptr);}
+static void* speex_alloc(int size) { return calloc(size, 1); }
+static void* speex_realloc(void* ptr, int size) { return realloc(ptr, size); }
+static void speex_free(void* ptr) { free(ptr); }
 #ifndef EXPORT
 #define EXPORT
 #endif
@@ -74,9 +73,9 @@ static void speex_free(void *ptr) {free(ptr);}
 #include "arch.h"
 #else /* OUTSIDE_SPEEX */
 
-#include "resampler/speex_resampler.h"
-#include "resampler/arch.h"
-#include "resampler/os_support.h"
+#include "speex/speex_resampler.h"
+#include "arch.h"
+#include "os_support.h"
 #endif /* OUTSIDE_SPEEX */
 
 #include <math.h>
@@ -97,7 +96,7 @@ static void speex_free(void *ptr) {free(ptr);}
 #define UINT32_MAX 4294967295U
 #endif
 
-#if defined(__SSE__) && !defined(FIXED_POINT)
+#ifdef USE_SSE
 #include "resample_sse.h"
 #endif
 
@@ -112,39 +111,39 @@ static void speex_free(void *ptr) {free(ptr);}
 #define FIXED_STACK_ALLOC 1024
 #endif
 
-typedef int (*resampler_basic_func)(SpeexResamplerState *, spx_uint32_t , const spx_word16_t *, spx_uint32_t *, spx_word16_t *, spx_uint32_t *);
+typedef int (*resampler_basic_func)(SpeexResamplerState*, spx_uint32_t, const spx_word16_t*, spx_uint32_t*, spx_word16_t*, spx_uint32_t*);
 
 struct SpeexResamplerState_ {
-   spx_uint32_t in_rate;
-   spx_uint32_t out_rate;
-   spx_uint32_t num_rate;
-   spx_uint32_t den_rate;
+    spx_uint32_t in_rate;
+    spx_uint32_t out_rate;
+    spx_uint32_t num_rate;
+    spx_uint32_t den_rate;
 
-   int    quality;
-   spx_uint32_t nb_channels;
-   spx_uint32_t filt_len;
-   spx_uint32_t mem_alloc_size;
-   spx_uint32_t buffer_size;
-   int          int_advance;
-   int          frac_advance;
-   float  cutoff;
-   spx_uint32_t oversample;
-   int          initialised;
-   int          started;
+    int    quality;
+    spx_uint32_t nb_channels;
+    spx_uint32_t filt_len;
+    spx_uint32_t mem_alloc_size;
+    spx_uint32_t buffer_size;
+    int          int_advance;
+    int          frac_advance;
+    float  cutoff;
+    spx_uint32_t oversample;
+    int          initialised;
+    int          started;
 
-   /* These are per-channel */
-   spx_int32_t  *last_sample;
-   spx_uint32_t *samp_frac_num;
-   spx_uint32_t *magic_samples;
+    /* These are per-channel */
+    spx_int32_t* last_sample;
+    spx_uint32_t* samp_frac_num;
+    spx_uint32_t* magic_samples;
 
-   spx_word16_t *mem;
-   spx_word16_t *sinc_table;
-   spx_uint32_t sinc_table_length;
-   resampler_basic_func resampler_ptr;
+    spx_word16_t* mem;
+    spx_word16_t* sinc_table;
+    spx_uint32_t sinc_table_length;
+    resampler_basic_func resampler_ptr;
 
-   int    in_stride;
-   int    out_stride;
-} ;
+    int    in_stride;
+    int    out_stride;
+};
 
 static const double kaiser12_table[68] = {
    0.99859849, 1.00000000, 0.99859849, 0.99440475, 0.98745105, 0.97779076,
@@ -158,7 +157,7 @@ static const double kaiser12_table[68] = {
    0.02584161, 0.02127838, 0.01736250, 0.01402878, 0.01121463, 0.00886058,
    0.00691064, 0.00531256, 0.00401805, 0.00298291, 0.00216702, 0.00153438,
    0.00105297, 0.00069463, 0.00043489, 0.00025272, 0.00013031, 0.0000527734,
-   0.00001000, 0.00000000};
+   0.00001000, 0.00000000 };
 /*
 static const double kaiser12_table[36] = {
    0.99440475, 1.00000000, 0.99440475, 0.97779076, 0.95066529, 0.91384741,
@@ -174,7 +173,7 @@ static const double kaiser10_table[36] = {
    0.56155915, 0.50119680, 0.44221549, 0.38553619, 0.33194107, 0.28205962,
    0.23636152, 0.19515633, 0.15859932, 0.12670280, 0.09935205, 0.07632451,
    0.05731132, 0.04193980, 0.02979584, 0.02044510, 0.01345224, 0.00839739,
-   0.00488951, 0.00257636, 0.00115101, 0.00035515, 0.00000000, 0.00000000};
+   0.00488951, 0.00257636, 0.00115101, 0.00035515, 0.00000000, 0.00000000 };
 
 static const double kaiser8_table[36] = {
    0.99635258, 1.00000000, 0.99635258, 0.98548012, 0.96759014, 0.94302200,
@@ -182,7 +181,7 @@ static const double kaiser8_table[36] = {
    0.63451750, 0.58014482, 0.52566725, 0.47185369, 0.41941150, 0.36897272,
    0.32108304, 0.27619388, 0.23465776, 0.19672670, 0.16255380, 0.13219758,
    0.10562887, 0.08273982, 0.06335451, 0.04724088, 0.03412321, 0.02369490,
-   0.01563093, 0.00959968, 0.00527363, 0.00233883, 0.00050000, 0.00000000};
+   0.01563093, 0.00959968, 0.00527363, 0.00233883, 0.00050000, 0.00000000 };
 
 static const double kaiser6_table[36] = {
    0.99733006, 1.00000000, 0.99733006, 0.98935595, 0.97618418, 0.95799003,
@@ -190,28 +189,28 @@ static const double kaiser6_table[36] = {
    0.71712752, 0.67172623, 0.62508937, 0.57774224, 0.53019925, 0.48295561,
    0.43647969, 0.39120616, 0.34752997, 0.30580127, 0.26632152, 0.22934058,
    0.19505503, 0.16360756, 0.13508755, 0.10953262, 0.08693120, 0.06722600,
-   0.05031820, 0.03607231, 0.02432151, 0.01487334, 0.00752000, 0.00000000};
+   0.05031820, 0.03607231, 0.02432151, 0.01487334, 0.00752000, 0.00000000 };
 
 struct FuncDef {
-   const double *table;
-   int oversample;
+    const double* table;
+    int oversample;
 };
 
-static const struct FuncDef kaiser12_funcdef = {kaiser12_table, 64};
+static const struct FuncDef kaiser12_funcdef = { kaiser12_table, 64 };
 #define KAISER12 (&kaiser12_funcdef)
-static const struct FuncDef kaiser10_funcdef = {kaiser10_table, 32};
+static const struct FuncDef kaiser10_funcdef = { kaiser10_table, 32 };
 #define KAISER10 (&kaiser10_funcdef)
-static const struct FuncDef kaiser8_funcdef = {kaiser8_table, 32};
+static const struct FuncDef kaiser8_funcdef = { kaiser8_table, 32 };
 #define KAISER8 (&kaiser8_funcdef)
-static const struct FuncDef kaiser6_funcdef = {kaiser6_table, 32};
+static const struct FuncDef kaiser6_funcdef = { kaiser6_table, 32 };
 #define KAISER6 (&kaiser6_funcdef)
 
 struct QualityMapping {
-   int base_length;
-   int oversample;
-   float downsample_bandwidth;
-   float upsample_bandwidth;
-   const struct FuncDef *window_func;
+    int base_length;
+    int oversample;
+    float downsample_bandwidth;
+    float upsample_bandwidth;
+    const struct FuncDef* window_func;
 };
 
 
@@ -238,324 +237,324 @@ static const struct QualityMapping quality_map[11] = {
    {256, 32, 0.975f, 0.975f, KAISER12}, /* Q10 */ /* 96.6% cutoff (~100 dB stop) 10 */
 };
 /*8,24,40,56,80,104,128,160,200,256,320*/
-static double compute_func(float x, const struct FuncDef *func)
+static double compute_func(float x, const struct FuncDef* func)
 {
-   float y, frac;
-   double interp[4];
-   int ind;
-   y = x*func->oversample;
-   ind = (int)floor(y);
-   frac = (y-ind);
-   /* CSE with handle the repeated powers */
-   interp[3] =  -0.1666666667*frac + 0.1666666667*(frac*frac*frac);
-   interp[2] = frac + 0.5*(frac*frac) - 0.5*(frac*frac*frac);
-   /*interp[2] = 1.f - 0.5f*frac - frac*frac + 0.5f*frac*frac*frac;*/
-   interp[0] = -0.3333333333*frac + 0.5*(frac*frac) - 0.1666666667*(frac*frac*frac);
-   /* Just to make sure we don't have rounding problems */
-   interp[1] = 1.f-interp[3]-interp[2]-interp[0];
+    float y, frac;
+    double interp[4];
+    int ind;
+    y = x * func->oversample;
+    ind = (int)floor(y);
+    frac = (y - ind);
+    /* CSE with handle the repeated powers */
+    interp[3] = -0.1666666667 * frac + 0.1666666667 * (frac * frac * frac);
+    interp[2] = frac + 0.5 * (frac * frac) - 0.5 * (frac * frac * frac);
+    /*interp[2] = 1.f - 0.5f*frac - frac*frac + 0.5f*frac*frac*frac;*/
+    interp[0] = -0.3333333333 * frac + 0.5 * (frac * frac) - 0.1666666667 * (frac * frac * frac);
+    /* Just to make sure we don't have rounding problems */
+    interp[1] = 1.f - interp[3] - interp[2] - interp[0];
 
-   /*sum = frac*accum[1] + (1-frac)*accum[2];*/
-   return interp[0]*func->table[ind] + interp[1]*func->table[ind+1] + interp[2]*func->table[ind+2] + interp[3]*func->table[ind+3];
+    /*sum = frac*accum[1] + (1-frac)*accum[2];*/
+    return interp[0] * func->table[ind] + interp[1] * func->table[ind + 1] + interp[2] * func->table[ind + 2] + interp[3] * func->table[ind + 3];
 }
 
 #if 0
 #include <stdio.h>
-int main(int argc, char **argv)
+int main(int argc, char** argv)
 {
-   int i;
-   for (i=0;i<256;i++)
-   {
-      printf ("%f\n", compute_func(i/256., KAISER12));
-   }
-   return 0;
+    int i;
+    for (i = 0; i < 256; i++)
+    {
+        printf("%f\n", compute_func(i / 256., KAISER12));
+    }
+    return 0;
 }
 #endif
 
 #ifdef FIXED_POINT
 /* The slow way of computing a sinc for the table. Should improve that some day */
-static spx_word16_t sinc(float cutoff, float x, int N, const struct FuncDef *window_func)
+static spx_word16_t sinc(float cutoff, float x, int N, const struct FuncDef* window_func)
 {
-   /*fprintf (stderr, "%f ", x);*/
-   float xx = x * cutoff;
-   if (fabs(x)<1e-6f)
-      return WORD2INT(32768.*cutoff);
-   else if (fabs(x) > .5f*N)
-      return 0;
-   /*FIXME: Can it really be any slower than this? */
-   return WORD2INT(32768.*cutoff*sin(M_PI*xx)/(M_PI*xx) * compute_func(fabs(2.*x/N), window_func));
+    /*fprintf (stderr, "%f ", x);*/
+    float xx = x * cutoff;
+    if (fabs(x) < 1e-6f)
+        return WORD2INT(32768. * cutoff);
+    else if (fabs(x) > .5f * N)
+        return 0;
+    /*FIXME: Can it really be any slower than this? */
+    return WORD2INT(32768. * cutoff * sin(M_PI * xx) / (M_PI * xx) * compute_func(fabs(2. * x / N), window_func));
 }
 #else
 /* The slow way of computing a sinc for the table. Should improve that some day */
-static spx_word16_t sinc(float cutoff, float x, int N, const struct FuncDef *window_func)
+static spx_word16_t sinc(float cutoff, float x, int N, const struct FuncDef* window_func)
 {
-   /*fprintf (stderr, "%f ", x);*/
-   float xx = x * cutoff;
-   if (fabs(x)<1e-6)
-      return cutoff;
-   else if (fabs(x) > .5*N)
-      return 0;
-   /*FIXME: Can it really be any slower than this? */
-   return cutoff*sin(M_PI*xx)/(M_PI*xx) * compute_func(fabs(2.*x/N), window_func);
+    /*fprintf (stderr, "%f ", x);*/
+    float xx = x * cutoff;
+    if (fabs(x) < 1e-6)
+        return cutoff;
+    else if (fabs(x) > .5 * N)
+        return 0;
+    /*FIXME: Can it really be any slower than this? */
+    return cutoff * sin(M_PI * xx) / (M_PI * xx) * compute_func(fabs(2. * x / N), window_func);
 }
 #endif
 
 #ifdef FIXED_POINT
 static void cubic_coef(spx_word16_t x, spx_word16_t interp[4])
 {
-   /* Compute interpolation coefficients. I'm not sure whether this corresponds to cubic interpolation
-   but I know it's MMSE-optimal on a sinc */
-   spx_word16_t x2, x3;
-   x2 = MULT16_16_P15(x, x);
-   x3 = MULT16_16_P15(x, x2);
-   interp[0] = PSHR32(MULT16_16(QCONST16(-0.16667f, 15),x) + MULT16_16(QCONST16(0.16667f, 15),x3),15);
-   interp[1] = EXTRACT16(EXTEND32(x) + SHR32(SUB32(EXTEND32(x2),EXTEND32(x3)),1));
-   interp[3] = PSHR32(MULT16_16(QCONST16(-0.33333f, 15),x) + MULT16_16(QCONST16(.5f,15),x2) - MULT16_16(QCONST16(0.16667f, 15),x3),15);
-   /* Just to make sure we don't have rounding problems */
-   interp[2] = Q15_ONE-interp[0]-interp[1]-interp[3];
-   if (interp[2]<32767)
-      interp[2]+=1;
+    /* Compute interpolation coefficients. I'm not sure whether this corresponds to cubic interpolation
+    but I know it's MMSE-optimal on a sinc */
+    spx_word16_t x2, x3;
+    x2 = MULT16_16_P15(x, x);
+    x3 = MULT16_16_P15(x, x2);
+    interp[0] = PSHR32(MULT16_16(QCONST16(-0.16667f, 15), x) + MULT16_16(QCONST16(0.16667f, 15), x3), 15);
+    interp[1] = EXTRACT16(EXTEND32(x) + SHR32(SUB32(EXTEND32(x2), EXTEND32(x3)), 1));
+    interp[3] = PSHR32(MULT16_16(QCONST16(-0.33333f, 15), x) + MULT16_16(QCONST16(.5f, 15), x2) - MULT16_16(QCONST16(0.16667f, 15), x3), 15);
+    /* Just to make sure we don't have rounding problems */
+    interp[2] = Q15_ONE - interp[0] - interp[1] - interp[3];
+    if (interp[2] < 32767)
+        interp[2] += 1;
 }
 #else
 static void cubic_coef(spx_word16_t frac, spx_word16_t interp[4])
 {
-   /* Compute interpolation coefficients. I'm not sure whether this corresponds to cubic interpolation
-   but I know it's MMSE-optimal on a sinc */
-   interp[0] =  -0.16667f*frac + 0.16667f*frac*frac*frac;
-   interp[1] = frac + 0.5f*frac*frac - 0.5f*frac*frac*frac;
-   /*interp[2] = 1.f - 0.5f*frac - frac*frac + 0.5f*frac*frac*frac;*/
-   interp[3] = -0.33333f*frac + 0.5f*frac*frac - 0.16667f*frac*frac*frac;
-   /* Just to make sure we don't have rounding problems */
-   interp[2] = 1.-interp[0]-interp[1]-interp[3];
+    /* Compute interpolation coefficients. I'm not sure whether this corresponds to cubic interpolation
+    but I know it's MMSE-optimal on a sinc */
+    interp[0] = -0.16667f * frac + 0.16667f * frac * frac * frac;
+    interp[1] = frac + 0.5f * frac * frac - 0.5f * frac * frac * frac;
+    /*interp[2] = 1.f - 0.5f*frac - frac*frac + 0.5f*frac*frac*frac;*/
+    interp[3] = -0.33333f * frac + 0.5f * frac * frac - 0.16667f * frac * frac * frac;
+    /* Just to make sure we don't have rounding problems */
+    interp[2] = 1. - interp[0] - interp[1] - interp[3];
 }
 #endif
 
-static int resampler_basic_direct_single(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_word16_t *in, spx_uint32_t *in_len, spx_word16_t *out, spx_uint32_t *out_len)
+static int resampler_basic_direct_single(SpeexResamplerState* st, spx_uint32_t channel_index, const spx_word16_t* in, spx_uint32_t* in_len, spx_word16_t* out, spx_uint32_t* out_len)
 {
-   const int N = st->filt_len;
-   int out_sample = 0;
-   int last_sample = st->last_sample[channel_index];
-   spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index];
-   const spx_word16_t *sinc_table = st->sinc_table;
-   const int out_stride = st->out_stride;
-   const int int_advance = st->int_advance;
-   const int frac_advance = st->frac_advance;
-   const spx_uint32_t den_rate = st->den_rate;
-   spx_word32_t sum;
+    const int N = st->filt_len;
+    int out_sample = 0;
+    int last_sample = st->last_sample[channel_index];
+    spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index];
+    const spx_word16_t* sinc_table = st->sinc_table;
+    const int out_stride = st->out_stride;
+    const int int_advance = st->int_advance;
+    const int frac_advance = st->frac_advance;
+    const spx_uint32_t den_rate = st->den_rate;
+    spx_word32_t sum;
 
-   while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
-   {
-      const spx_word16_t *sinct = & sinc_table[samp_frac_num*N];
-      const spx_word16_t *iptr = & in[last_sample];
+    while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
+    {
+        const spx_word16_t* sinct = &sinc_table[samp_frac_num * N];
+        const spx_word16_t* iptr = &in[last_sample];
 
 #ifndef OVERRIDE_INNER_PRODUCT_SINGLE
-      int j;
-      sum = 0;
-      for(j=0;j<N;j++) sum += MULT16_16(sinct[j], iptr[j]);
+        int j;
+        sum = 0;
+        for (j = 0; j < N; j++) sum += MULT16_16(sinct[j], iptr[j]);
 
-/*    This code is slower on most DSPs which have only 2 accumulators.
-      Plus this this forces truncation to 32 bits and you lose the HW guard bits.
-      I think we can trust the compiler and let it vectorize and/or unroll itself.
-      spx_word32_t accum[4] = {0,0,0,0};
-      for(j=0;j<N;j+=4) {
-        accum[0] += MULT16_16(sinct[j], iptr[j]);
-        accum[1] += MULT16_16(sinct[j+1], iptr[j+1]);
-        accum[2] += MULT16_16(sinct[j+2], iptr[j+2]);
-        accum[3] += MULT16_16(sinct[j+3], iptr[j+3]);
-      }
-      sum = accum[0] + accum[1] + accum[2] + accum[3];
-*/
-      sum = SATURATE32PSHR(sum, 15, 32767);
+        /*    This code is slower on most DSPs which have only 2 accumulators.
+              Plus this this forces truncation to 32 bits and you lose the HW guard bits.
+              I think we can trust the compiler and let it vectorize and/or unroll itself.
+              spx_word32_t accum[4] = {0,0,0,0};
+              for(j=0;j<N;j+=4) {
+                accum[0] += MULT16_16(sinct[j], iptr[j]);
+                accum[1] += MULT16_16(sinct[j+1], iptr[j+1]);
+                accum[2] += MULT16_16(sinct[j+2], iptr[j+2]);
+                accum[3] += MULT16_16(sinct[j+3], iptr[j+3]);
+              }
+              sum = accum[0] + accum[1] + accum[2] + accum[3];
+        */
+        sum = SATURATE32PSHR(sum, 15, 32767);
 #else
-      sum = inner_product_single(sinct, iptr, N);
+        sum = inner_product_single(sinct, iptr, N);
 #endif
 
-      out[out_stride * out_sample++] = sum;
-      last_sample += int_advance;
-      samp_frac_num += frac_advance;
-      if (samp_frac_num >= den_rate)
-      {
-         samp_frac_num -= den_rate;
-         last_sample++;
-      }
-   }
+        out[out_stride * out_sample++] = sum;
+        last_sample += int_advance;
+        samp_frac_num += frac_advance;
+        if (samp_frac_num >= den_rate)
+        {
+            samp_frac_num -= den_rate;
+            last_sample++;
+        }
+    }
 
-   st->last_sample[channel_index] = last_sample;
-   st->samp_frac_num[channel_index] = samp_frac_num;
-   return out_sample;
+    st->last_sample[channel_index] = last_sample;
+    st->samp_frac_num[channel_index] = samp_frac_num;
+    return out_sample;
 }
 
 #ifdef FIXED_POINT
 #else
 /* This is the same as the previous function, except with a double-precision accumulator */
-static int resampler_basic_direct_double(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_word16_t *in, spx_uint32_t *in_len, spx_word16_t *out, spx_uint32_t *out_len)
+static int resampler_basic_direct_double(SpeexResamplerState* st, spx_uint32_t channel_index, const spx_word16_t* in, spx_uint32_t* in_len, spx_word16_t* out, spx_uint32_t* out_len)
 {
-   const int N = st->filt_len;
-   int out_sample = 0;
-   int last_sample = st->last_sample[channel_index];
-   spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index];
-   const spx_word16_t *sinc_table = st->sinc_table;
-   const int out_stride = st->out_stride;
-   const int int_advance = st->int_advance;
-   const int frac_advance = st->frac_advance;
-   const spx_uint32_t den_rate = st->den_rate;
-   double sum;
+    const int N = st->filt_len;
+    int out_sample = 0;
+    int last_sample = st->last_sample[channel_index];
+    spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index];
+    const spx_word16_t* sinc_table = st->sinc_table;
+    const int out_stride = st->out_stride;
+    const int int_advance = st->int_advance;
+    const int frac_advance = st->frac_advance;
+    const spx_uint32_t den_rate = st->den_rate;
+    double sum;
 
-   while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
-   {
-      const spx_word16_t *sinct = & sinc_table[samp_frac_num*N];
-      const spx_word16_t *iptr = & in[last_sample];
+    while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
+    {
+        const spx_word16_t* sinct = &sinc_table[samp_frac_num * N];
+        const spx_word16_t* iptr = &in[last_sample];
 
 #ifndef OVERRIDE_INNER_PRODUCT_DOUBLE
-      int j;
-      double accum[4] = {0,0,0,0};
+        int j;
+        double accum[4] = { 0,0,0,0 };
 
-      for(j=0;j<N;j+=4) {
-        accum[0] += sinct[j]*iptr[j];
-        accum[1] += sinct[j+1]*iptr[j+1];
-        accum[2] += sinct[j+2]*iptr[j+2];
-        accum[3] += sinct[j+3]*iptr[j+3];
-      }
-      sum = accum[0] + accum[1] + accum[2] + accum[3];
+        for (j = 0; j < N; j += 4) {
+            accum[0] += sinct[j] * iptr[j];
+            accum[1] += sinct[j + 1] * iptr[j + 1];
+            accum[2] += sinct[j + 2] * iptr[j + 2];
+            accum[3] += sinct[j + 3] * iptr[j + 3];
+        }
+        sum = accum[0] + accum[1] + accum[2] + accum[3];
 #else
-      sum = inner_product_double(sinct, iptr, N);
+        sum = inner_product_double(sinct, iptr, N);
 #endif
 
-      out[out_stride * out_sample++] = PSHR32(sum, 15);
-      last_sample += int_advance;
-      samp_frac_num += frac_advance;
-      if (samp_frac_num >= den_rate)
-      {
-         samp_frac_num -= den_rate;
-         last_sample++;
-      }
-   }
+        out[out_stride * out_sample++] = PSHR32(sum, 15);
+        last_sample += int_advance;
+        samp_frac_num += frac_advance;
+        if (samp_frac_num >= den_rate)
+        {
+            samp_frac_num -= den_rate;
+            last_sample++;
+        }
+    }
 
-   st->last_sample[channel_index] = last_sample;
-   st->samp_frac_num[channel_index] = samp_frac_num;
-   return out_sample;
+    st->last_sample[channel_index] = last_sample;
+    st->samp_frac_num[channel_index] = samp_frac_num;
+    return out_sample;
 }
 #endif
 
-static int resampler_basic_interpolate_single(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_word16_t *in, spx_uint32_t *in_len, spx_word16_t *out, spx_uint32_t *out_len)
+static int resampler_basic_interpolate_single(SpeexResamplerState* st, spx_uint32_t channel_index, const spx_word16_t* in, spx_uint32_t* in_len, spx_word16_t* out, spx_uint32_t* out_len)
 {
-   const int N = st->filt_len;
-   int out_sample = 0;
-   int last_sample = st->last_sample[channel_index];
-   spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index];
-   const int out_stride = st->out_stride;
-   const int int_advance = st->int_advance;
-   const int frac_advance = st->frac_advance;
-   const spx_uint32_t den_rate = st->den_rate;
-   spx_word32_t sum;
+    const int N = st->filt_len;
+    int out_sample = 0;
+    int last_sample = st->last_sample[channel_index];
+    spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index];
+    const int out_stride = st->out_stride;
+    const int int_advance = st->int_advance;
+    const int frac_advance = st->frac_advance;
+    const spx_uint32_t den_rate = st->den_rate;
+    spx_word32_t sum;
 
-   while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
-   {
-      const spx_word16_t *iptr = & in[last_sample];
+    while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
+    {
+        const spx_word16_t* iptr = &in[last_sample];
 
-      const int offset = samp_frac_num*st->oversample/st->den_rate;
+        const int offset = samp_frac_num * st->oversample / st->den_rate;
 #ifdef FIXED_POINT
-      const spx_word16_t frac = PDIV32(SHL32((samp_frac_num*st->oversample) % st->den_rate,15),st->den_rate);
+        const spx_word16_t frac = PDIV32(SHL32((samp_frac_num * st->oversample) % st->den_rate, 15), st->den_rate);
 #else
-      const spx_word16_t frac = ((float)((samp_frac_num*st->oversample) % st->den_rate))/st->den_rate;
+        const spx_word16_t frac = ((float)((samp_frac_num * st->oversample) % st->den_rate)) / st->den_rate;
 #endif
-      spx_word16_t interp[4];
+        spx_word16_t interp[4];
 
 
 #ifndef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE
-      int j;
-      spx_word32_t accum[4] = {0,0,0,0};
+        int j;
+        spx_word32_t accum[4] = { 0,0,0,0 };
 
-      for(j=0;j<N;j++) {
-        const spx_word16_t curr_in=iptr[j];
-        accum[0] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-2]);
-        accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]);
-        accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]);
-        accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]);
-      }
+        for (j = 0; j < N; j++) {
+            const spx_word16_t curr_in = iptr[j];
+            accum[0] += MULT16_16(curr_in, st->sinc_table[4 + (j + 1) * st->oversample - offset - 2]);
+            accum[1] += MULT16_16(curr_in, st->sinc_table[4 + (j + 1) * st->oversample - offset - 1]);
+            accum[2] += MULT16_16(curr_in, st->sinc_table[4 + (j + 1) * st->oversample - offset]);
+            accum[3] += MULT16_16(curr_in, st->sinc_table[4 + (j + 1) * st->oversample - offset + 1]);
+        }
 
-      cubic_coef(frac, interp);
-      sum = MULT16_32_Q15(interp[0],SHR32(accum[0], 1)) + MULT16_32_Q15(interp[1],SHR32(accum[1], 1)) + MULT16_32_Q15(interp[2],SHR32(accum[2], 1)) + MULT16_32_Q15(interp[3],SHR32(accum[3], 1));
-      sum = SATURATE32PSHR(sum, 15, 32767);
+        cubic_coef(frac, interp);
+        sum = MULT16_32_Q15(interp[0], accum[0]) + MULT16_32_Q15(interp[1], accum[1]) + MULT16_32_Q15(interp[2], accum[2]) + MULT16_32_Q15(interp[3], accum[3]);
+        sum = SATURATE32PSHR(sum, 15, 32767);
 #else
-      cubic_coef(frac, interp);
-      sum = interpolate_product_single(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp);
+        cubic_coef(frac, interp);
+        sum = interpolate_product_single(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp);
 #endif
 
-      out[out_stride * out_sample++] = sum;
-      last_sample += int_advance;
-      samp_frac_num += frac_advance;
-      if (samp_frac_num >= den_rate)
-      {
-         samp_frac_num -= den_rate;
-         last_sample++;
-      }
-   }
+        out[out_stride * out_sample++] = sum;
+        last_sample += int_advance;
+        samp_frac_num += frac_advance;
+        if (samp_frac_num >= den_rate)
+        {
+            samp_frac_num -= den_rate;
+            last_sample++;
+        }
+    }
 
-   st->last_sample[channel_index] = last_sample;
-   st->samp_frac_num[channel_index] = samp_frac_num;
-   return out_sample;
+    st->last_sample[channel_index] = last_sample;
+    st->samp_frac_num[channel_index] = samp_frac_num;
+    return out_sample;
 }
 
 #ifdef FIXED_POINT
 #else
 /* This is the same as the previous function, except with a double-precision accumulator */
-static int resampler_basic_interpolate_double(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_word16_t *in, spx_uint32_t *in_len, spx_word16_t *out, spx_uint32_t *out_len)
+static int resampler_basic_interpolate_double(SpeexResamplerState* st, spx_uint32_t channel_index, const spx_word16_t* in, spx_uint32_t* in_len, spx_word16_t* out, spx_uint32_t* out_len)
 {
-   const int N = st->filt_len;
-   int out_sample = 0;
-   int last_sample = st->last_sample[channel_index];
-   spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index];
-   const int out_stride = st->out_stride;
-   const int int_advance = st->int_advance;
-   const int frac_advance = st->frac_advance;
-   const spx_uint32_t den_rate = st->den_rate;
-   spx_word32_t sum;
+    const int N = st->filt_len;
+    int out_sample = 0;
+    int last_sample = st->last_sample[channel_index];
+    spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index];
+    const int out_stride = st->out_stride;
+    const int int_advance = st->int_advance;
+    const int frac_advance = st->frac_advance;
+    const spx_uint32_t den_rate = st->den_rate;
+    spx_word32_t sum;
 
-   while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
-   {
-      const spx_word16_t *iptr = & in[last_sample];
+    while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
+    {
+        const spx_word16_t* iptr = &in[last_sample];
 
-      const int offset = samp_frac_num*st->oversample/st->den_rate;
+        const int offset = samp_frac_num * st->oversample / st->den_rate;
 #ifdef FIXED_POINT
-      const spx_word16_t frac = PDIV32(SHL32((samp_frac_num*st->oversample) % st->den_rate,15),st->den_rate);
+        const spx_word16_t frac = PDIV32(SHL32((samp_frac_num * st->oversample) % st->den_rate, 15), st->den_rate);
 #else
-      const spx_word16_t frac = ((float)((samp_frac_num*st->oversample) % st->den_rate))/st->den_rate;
+        const spx_word16_t frac = ((float)((samp_frac_num * st->oversample) % st->den_rate)) / st->den_rate;
 #endif
-      spx_word16_t interp[4];
+        spx_word16_t interp[4];
 
 
 #ifndef OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE
-      int j;
-      double accum[4] = {0,0,0,0};
+        int j;
+        double accum[4] = { 0,0,0,0 };
 
-      for(j=0;j<N;j++) {
-        const double curr_in=iptr[j];
-        accum[0] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-2]);
-        accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]);
-        accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]);
-        accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]);
-      }
+        for (j = 0; j < N; j++) {
+            const double curr_in = iptr[j];
+            accum[0] += MULT16_16(curr_in, st->sinc_table[4 + (j + 1) * st->oversample - offset - 2]);
+            accum[1] += MULT16_16(curr_in, st->sinc_table[4 + (j + 1) * st->oversample - offset - 1]);
+            accum[2] += MULT16_16(curr_in, st->sinc_table[4 + (j + 1) * st->oversample - offset]);
+            accum[3] += MULT16_16(curr_in, st->sinc_table[4 + (j + 1) * st->oversample - offset + 1]);
+        }
 
-      cubic_coef(frac, interp);
-      sum = MULT16_32_Q15(interp[0],accum[0]) + MULT16_32_Q15(interp[1],accum[1]) + MULT16_32_Q15(interp[2],accum[2]) + MULT16_32_Q15(interp[3],accum[3]);
+        cubic_coef(frac, interp);
+        sum = MULT16_32_Q15(interp[0], accum[0]) + MULT16_32_Q15(interp[1], accum[1]) + MULT16_32_Q15(interp[2], accum[2]) + MULT16_32_Q15(interp[3], accum[3]);
 #else
-      cubic_coef(frac, interp);
-      sum = interpolate_product_double(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp);
+        cubic_coef(frac, interp);
+        sum = interpolate_product_double(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp);
 #endif
 
-      out[out_stride * out_sample++] = PSHR32(sum,15);
-      last_sample += int_advance;
-      samp_frac_num += frac_advance;
-      if (samp_frac_num >= den_rate)
-      {
-         samp_frac_num -= den_rate;
-         last_sample++;
-      }
-   }
+        out[out_stride * out_sample++] = PSHR32(sum, 15);
+        last_sample += int_advance;
+        samp_frac_num += frac_advance;
+        if (samp_frac_num >= den_rate)
+        {
+            samp_frac_num -= den_rate;
+            last_sample++;
+        }
+    }
 
-   st->last_sample[channel_index] = last_sample;
-   st->samp_frac_num[channel_index] = samp_frac_num;
-   return out_sample;
+    st->last_sample[channel_index] = last_sample;
+    st->samp_frac_num[channel_index] = samp_frac_num;
+    return out_sample;
 }
 #endif
 
@@ -563,678 +562,688 @@ static int resampler_basic_interpolate_double(SpeexResamplerState *st, spx_uint3
    for the filter could not be allocated.  The expected numbers of input and
    output samples are still processed so that callers failing to check error
    codes are not surprised, possibly getting into infinite loops. */
-static int resampler_basic_zero(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_word16_t *in, spx_uint32_t *in_len, spx_word16_t *out, spx_uint32_t *out_len)
+static int resampler_basic_zero(SpeexResamplerState* st, spx_uint32_t channel_index, const spx_word16_t* in, spx_uint32_t* in_len, spx_word16_t* out, spx_uint32_t* out_len)
 {
-   int out_sample = 0;
-   int last_sample = st->last_sample[channel_index];
-   spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index];
-   const int out_stride = st->out_stride;
-   const int int_advance = st->int_advance;
-   const int frac_advance = st->frac_advance;
-   const spx_uint32_t den_rate = st->den_rate;
+    int out_sample = 0;
+    int last_sample = st->last_sample[channel_index];
+    spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index];
+    const int out_stride = st->out_stride;
+    const int int_advance = st->int_advance;
+    const int frac_advance = st->frac_advance;
+    const spx_uint32_t den_rate = st->den_rate;
 
-   (void)in;
-   while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
-   {
-      out[out_stride * out_sample++] = 0;
-      last_sample += int_advance;
-      samp_frac_num += frac_advance;
-      if (samp_frac_num >= den_rate)
-      {
-         samp_frac_num -= den_rate;
-         last_sample++;
-      }
-   }
+    (void)in;
+    while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len))
+    {
+        out[out_stride * out_sample++] = 0;
+        last_sample += int_advance;
+        samp_frac_num += frac_advance;
+        if (samp_frac_num >= den_rate)
+        {
+            samp_frac_num -= den_rate;
+            last_sample++;
+        }
+    }
 
-   st->last_sample[channel_index] = last_sample;
-   st->samp_frac_num[channel_index] = samp_frac_num;
-   return out_sample;
+    st->last_sample[channel_index] = last_sample;
+    st->samp_frac_num[channel_index] = samp_frac_num;
+    return out_sample;
 }
 
-static int multiply_frac(spx_uint32_t *result, spx_uint32_t value, spx_uint32_t num, spx_uint32_t den)
+static int multiply_frac(spx_uint32_t* result, spx_uint32_t value, spx_uint32_t num, spx_uint32_t den)
 {
-   spx_uint32_t major = value / den;
-   spx_uint32_t remain = value % den;
-   /* TODO: Could use 64 bits operation to check for overflow. But only guaranteed in C99+ */
-   if (remain > UINT32_MAX / num || major > UINT32_MAX / num
-       || major * num > UINT32_MAX - remain * num / den)
-      return RESAMPLER_ERR_OVERFLOW;
-   *result = remain * num / den + major * num;
-   return RESAMPLER_ERR_SUCCESS;
+    spx_uint32_t major = value / den;
+    spx_uint32_t remain = value % den;
+    /* TODO: Could use 64 bits operation to check for overflow. But only guaranteed in C99+ */
+    if (remain > UINT32_MAX / num || major > UINT32_MAX / num
+        || major * num > UINT32_MAX - remain * num / den)
+        return RESAMPLER_ERR_OVERFLOW;
+    *result = remain * num / den + major * num;
+    return RESAMPLER_ERR_SUCCESS;
 }
 
-static int update_filter(SpeexResamplerState *st)
+static int update_filter(SpeexResamplerState* st)
 {
-   spx_uint32_t old_length = st->filt_len;
-   spx_uint32_t old_alloc_size = st->mem_alloc_size;
-   int use_direct;
-   spx_uint32_t min_sinc_table_length;
-   spx_uint32_t min_alloc_size;
+    spx_uint32_t old_length = st->filt_len;
+    spx_uint32_t old_alloc_size = st->mem_alloc_size;
+    int use_direct;
+    spx_uint32_t min_sinc_table_length;
+    spx_uint32_t min_alloc_size;
 
-   st->int_advance = st->num_rate/st->den_rate;
-   st->frac_advance = st->num_rate%st->den_rate;
-   st->oversample = quality_map[st->quality].oversample;
-   st->filt_len = quality_map[st->quality].base_length;
+    st->int_advance = st->num_rate / st->den_rate;
+    st->frac_advance = st->num_rate % st->den_rate;
+    st->oversample = quality_map[st->quality].oversample;
+    st->filt_len = quality_map[st->quality].base_length;
 
-   if (st->num_rate > st->den_rate)
-   {
-      /* down-sampling */
-      st->cutoff = quality_map[st->quality].downsample_bandwidth * st->den_rate / st->num_rate;
-      if (multiply_frac(&st->filt_len,st->filt_len,st->num_rate,st->den_rate) != RESAMPLER_ERR_SUCCESS)
-         goto fail;
-      /* Round up to make sure we have a multiple of 8 for SSE */
-      st->filt_len = ((st->filt_len-1)&(~0x7))+8;
-      if (2*st->den_rate < st->num_rate)
-         st->oversample >>= 1;
-      if (4*st->den_rate < st->num_rate)
-         st->oversample >>= 1;
-      if (8*st->den_rate < st->num_rate)
-         st->oversample >>= 1;
-      if (16*st->den_rate < st->num_rate)
-         st->oversample >>= 1;
-      if (st->oversample < 1)
-         st->oversample = 1;
-   } else {
-      /* up-sampling */
-      st->cutoff = quality_map[st->quality].upsample_bandwidth;
-   }
+    if (st->num_rate > st->den_rate)
+    {
+        /* down-sampling */
+        st->cutoff = quality_map[st->quality].downsample_bandwidth * st->den_rate / st->num_rate;
+        if (multiply_frac(&st->filt_len, st->filt_len, st->num_rate, st->den_rate) != RESAMPLER_ERR_SUCCESS)
+            goto fail;
+        /* Round up to make sure we have a multiple of 8 for SSE */
+        st->filt_len = ((st->filt_len - 1) & (~0x7)) + 8;
+        if (2 * st->den_rate < st->num_rate)
+            st->oversample >>= 1;
+        if (4 * st->den_rate < st->num_rate)
+            st->oversample >>= 1;
+        if (8 * st->den_rate < st->num_rate)
+            st->oversample >>= 1;
+        if (16 * st->den_rate < st->num_rate)
+            st->oversample >>= 1;
+        if (st->oversample < 1)
+            st->oversample = 1;
+    }
+    else {
+        /* up-sampling */
+        st->cutoff = quality_map[st->quality].upsample_bandwidth;
+    }
 
 #ifdef RESAMPLE_FULL_SINC_TABLE
-   use_direct = 1;
-   if (INT_MAX/sizeof(spx_word16_t)/st->den_rate < st->filt_len)
-      goto fail;
+    use_direct = 1;
+    if (INT_MAX / sizeof(spx_word16_t) / st->den_rate < st->filt_len)
+        goto fail;
 #else
-   /* Choose the resampling type that requires the least amount of memory */
-   use_direct = st->filt_len*st->den_rate <= st->filt_len*st->oversample+8
-                && INT_MAX/sizeof(spx_word16_t)/st->den_rate >= st->filt_len;
+    /* Choose the resampling type that requires the least amount of memory */
+    use_direct = st->filt_len * st->den_rate <= st->filt_len * st->oversample + 8
+        && INT_MAX / sizeof(spx_word16_t) / st->den_rate >= st->filt_len;
 #endif
-   if (use_direct)
-   {
-      min_sinc_table_length = st->filt_len*st->den_rate;
-   } else {
-      if ((INT_MAX/sizeof(spx_word16_t)-8)/st->oversample < st->filt_len)
-         goto fail;
+    if (use_direct)
+    {
+        min_sinc_table_length = st->filt_len * st->den_rate;
+    }
+    else {
+        if ((INT_MAX / sizeof(spx_word16_t) - 8) / st->oversample < st->filt_len)
+            goto fail;
 
-      min_sinc_table_length = st->filt_len*st->oversample+8;
-   }
-   if (st->sinc_table_length < min_sinc_table_length)
-   {
-      spx_word16_t *sinc_table = (spx_word16_t *)speex_realloc(st->sinc_table,min_sinc_table_length*sizeof(spx_word16_t));
-      if (!sinc_table)
-         goto fail;
+        min_sinc_table_length = st->filt_len * st->oversample + 8;
+    }
+    if (st->sinc_table_length < min_sinc_table_length)
+    {
+        spx_word16_t* sinc_table = (spx_word16_t*)speex_realloc(st->sinc_table, min_sinc_table_length * sizeof(spx_word16_t));
+        if (!sinc_table)
+            goto fail;
 
-      st->sinc_table = sinc_table;
-      st->sinc_table_length = min_sinc_table_length;
-   }
-   if (use_direct)
-   {
-      spx_uint32_t i;
-      for (i=0;i<st->den_rate;i++)
-      {
-         spx_int32_t j;
-         for (j=0;j<st->filt_len;j++)
-         {
-            st->sinc_table[i*st->filt_len+j] = sinc(st->cutoff,((j-(spx_int32_t)st->filt_len/2+1)-((float)i)/st->den_rate), st->filt_len, quality_map[st->quality].window_func);
-         }
-      }
-#ifdef FIXED_POINT
-      st->resampler_ptr = resampler_basic_direct_single;
-#else
-      if (st->quality>8)
-         st->resampler_ptr = resampler_basic_direct_double;
-      else
-         st->resampler_ptr = resampler_basic_direct_single;
-#endif
-      /*fprintf (stderr, "resampler uses direct sinc table and normalised cutoff %f\n", cutoff);*/
-   } else {
-      spx_int32_t i;
-      for (i=-4;i<(spx_int32_t)(st->oversample*st->filt_len+4);i++)
-         st->sinc_table[i+4] = sinc(st->cutoff,(i/(float)st->oversample - st->filt_len/2), st->filt_len, quality_map[st->quality].window_func);
-#ifdef FIXED_POINT
-      st->resampler_ptr = resampler_basic_interpolate_single;
-#else
-      if (st->quality>8)
-         st->resampler_ptr = resampler_basic_interpolate_double;
-      else
-         st->resampler_ptr = resampler_basic_interpolate_single;
-#endif
-      /*fprintf (stderr, "resampler uses interpolated sinc table and normalised cutoff %f\n", cutoff);*/
-   }
-
-   /* Here's the place where we update the filter memory to take into account
-      the change in filter length. It's probably the messiest part of the code
-      due to handling of lots of corner cases. */
-
-   /* Adding buffer_size to filt_len won't overflow here because filt_len
-      could be multiplied by sizeof(spx_word16_t) above. */
-   min_alloc_size = st->filt_len-1 + st->buffer_size;
-   if (min_alloc_size > st->mem_alloc_size)
-   {
-      spx_word16_t *mem;
-      if (INT_MAX/sizeof(spx_word16_t)/st->nb_channels < min_alloc_size)
-          goto fail;
-      else if (!(mem = (spx_word16_t*)speex_realloc(st->mem, st->nb_channels*min_alloc_size * sizeof(*mem))))
-          goto fail;
-
-      st->mem = mem;
-      st->mem_alloc_size = min_alloc_size;
-   }
-   if (!st->started)
-   {
-      spx_uint32_t i;
-      for (i=0;i<st->nb_channels*st->mem_alloc_size;i++)
-         st->mem[i] = 0;
-      /*speex_warning("reinit filter");*/
-   } else if (st->filt_len > old_length)
-   {
-      spx_uint32_t i;
-      /* Increase the filter length */
-      /*speex_warning("increase filter size");*/
-      for (i=st->nb_channels;i--;)
-      {
-         spx_uint32_t j;
-         spx_uint32_t olen = old_length;
-         /*if (st->magic_samples[i])*/
-         {
-            /* Try and remove the magic samples as if nothing had happened */
-
-            /* FIXME: This is wrong but for now we need it to avoid going over the array bounds */
-            olen = old_length + 2*st->magic_samples[i];
-            for (j=old_length-1+st->magic_samples[i];j--;)
-               st->mem[i*st->mem_alloc_size+j+st->magic_samples[i]] = st->mem[i*old_alloc_size+j];
-            for (j=0;j<st->magic_samples[i];j++)
-               st->mem[i*st->mem_alloc_size+j] = 0;
-            st->magic_samples[i] = 0;
-         }
-         if (st->filt_len > olen)
-         {
-            /* If the new filter length is still bigger than the "augmented" length */
-            /* Copy data going backward */
-            for (j=0;j<olen-1;j++)
-               st->mem[i*st->mem_alloc_size+(st->filt_len-2-j)] = st->mem[i*st->mem_alloc_size+(olen-2-j)];
-            /* Then put zeros for lack of anything better */
-            for (;j<st->filt_len-1;j++)
-               st->mem[i*st->mem_alloc_size+(st->filt_len-2-j)] = 0;
-            /* Adjust last_sample */
-            st->last_sample[i] += (st->filt_len - olen)/2;
-         } else {
-            /* Put back some of the magic! */
-            st->magic_samples[i] = (olen - st->filt_len)/2;
-            for (j=0;j<st->filt_len-1+st->magic_samples[i];j++)
-               st->mem[i*st->mem_alloc_size+j] = st->mem[i*st->mem_alloc_size+j+st->magic_samples[i]];
-         }
-      }
-   } else if (st->filt_len < old_length)
-   {
-      spx_uint32_t i;
-      /* Reduce filter length, this a bit tricky. We need to store some of the memory as "magic"
-         samples so they can be used directly as input the next time(s) */
-      for (i=0;i<st->nb_channels;i++)
-      {
-         spx_uint32_t j;
-         spx_uint32_t old_magic = st->magic_samples[i];
-         st->magic_samples[i] = (old_length - st->filt_len)/2;
-         /* We must copy some of the memory that's no longer used */
-         /* Copy data going backward */
-         for (j=0;j<st->filt_len-1+st->magic_samples[i]+old_magic;j++)
-            st->mem[i*st->mem_alloc_size+j] = st->mem[i*st->mem_alloc_size+j+st->magic_samples[i]];
-         st->magic_samples[i] += old_magic;
-      }
-   }
-   return RESAMPLER_ERR_SUCCESS;
-
-fail:
-   st->resampler_ptr = resampler_basic_zero;
-   /* st->mem may still contain consumed input samples for the filter.
-      Restore filt_len so that filt_len - 1 still points to the position after
-      the last of these samples. */
-   st->filt_len = old_length;
-   return RESAMPLER_ERR_ALLOC_FAILED;
-}
-
-EXPORT SpeexResamplerState *speex_resampler_init(spx_uint32_t nb_channels, spx_uint32_t in_rate, spx_uint32_t out_rate, int quality, int *err)
-{
-   return speex_resampler_init_frac(nb_channels, in_rate, out_rate, in_rate, out_rate, quality, err);
-}
-
-EXPORT SpeexResamplerState *speex_resampler_init_frac(spx_uint32_t nb_channels, spx_uint32_t ratio_num, spx_uint32_t ratio_den, spx_uint32_t in_rate, spx_uint32_t out_rate, int quality, int *err)
-{
-   SpeexResamplerState *st;
-   int filter_err;
-
-   if (nb_channels == 0 || ratio_num == 0 || ratio_den == 0 || quality > 10 || quality < 0)
-   {
-      if (err)
-         *err = RESAMPLER_ERR_INVALID_ARG;
-      return NULL;
-   }
-   st = (SpeexResamplerState *)speex_alloc(sizeof(SpeexResamplerState));
-   if (!st)
-   {
-      if (err)
-         *err = RESAMPLER_ERR_ALLOC_FAILED;
-      return NULL;
-   }
-   st->initialised = 0;
-   st->started = 0;
-   st->in_rate = 0;
-   st->out_rate = 0;
-   st->num_rate = 0;
-   st->den_rate = 0;
-   st->quality = -1;
-   st->sinc_table_length = 0;
-   st->mem_alloc_size = 0;
-   st->filt_len = 0;
-   st->mem = 0;
-   st->resampler_ptr = 0;
-
-   st->cutoff = 1.f;
-   st->nb_channels = nb_channels;
-   st->in_stride = 1;
-   st->out_stride = 1;
-
-   st->buffer_size = 160;
-
-   /* Per channel data */
-   if (!(st->last_sample = (spx_int32_t*)speex_alloc(nb_channels*sizeof(spx_int32_t))))
-      goto fail;
-   if (!(st->magic_samples = (spx_uint32_t*)speex_alloc(nb_channels*sizeof(spx_uint32_t))))
-      goto fail;
-   if (!(st->samp_frac_num = (spx_uint32_t*)speex_alloc(nb_channels*sizeof(spx_uint32_t))))
-      goto fail;
-
-   speex_resampler_set_quality(st, quality);
-   speex_resampler_set_rate_frac(st, ratio_num, ratio_den, in_rate, out_rate);
-
-   filter_err = update_filter(st);
-   if (filter_err == RESAMPLER_ERR_SUCCESS)
-   {
-      st->initialised = 1;
-   } else {
-      speex_resampler_destroy(st);
-      st = NULL;
-   }
-   if (err)
-      *err = filter_err;
-
-   return st;
-
-fail:
-   if (err)
-      *err = RESAMPLER_ERR_ALLOC_FAILED;
-   speex_resampler_destroy(st);
-   return NULL;
-}
-
-EXPORT void speex_resampler_destroy(SpeexResamplerState *st)
-{
-   speex_free(st->mem);
-   speex_free(st->sinc_table);
-   speex_free(st->last_sample);
-   speex_free(st->magic_samples);
-   speex_free(st->samp_frac_num);
-   speex_free(st);
-}
-
-static int speex_resampler_process_native(SpeexResamplerState *st, spx_uint32_t channel_index, spx_uint32_t *in_len, spx_word16_t *out, spx_uint32_t *out_len)
-{
-   int j=0;
-   const int N = st->filt_len;
-   int out_sample = 0;
-   spx_word16_t *mem = st->mem + channel_index * st->mem_alloc_size;
-   spx_uint32_t ilen;
-
-   st->started = 1;
-
-   /* Call the right resampler through the function ptr */
-   out_sample = st->resampler_ptr(st, channel_index, mem, in_len, out, out_len);
-
-   if (st->last_sample[channel_index] < (spx_int32_t)*in_len)
-      *in_len = st->last_sample[channel_index];
-   *out_len = out_sample;
-   st->last_sample[channel_index] -= *in_len;
-
-   ilen = *in_len;
-
-   for(j=0;j<N-1;++j)
-     mem[j] = mem[j+ilen];
-
-   return RESAMPLER_ERR_SUCCESS;
-}
-
-static int speex_resampler_magic(SpeexResamplerState *st, spx_uint32_t channel_index, spx_word16_t **out, spx_uint32_t out_len) {
-   spx_uint32_t tmp_in_len = st->magic_samples[channel_index];
-   spx_word16_t *mem = st->mem + channel_index * st->mem_alloc_size;
-   const int N = st->filt_len;
-
-   speex_resampler_process_native(st, channel_index, &tmp_in_len, *out, &out_len);
-
-   st->magic_samples[channel_index] -= tmp_in_len;
-
-   /* If we couldn't process all "magic" input samples, save the rest for next time */
-   if (st->magic_samples[channel_index])
-   {
-      spx_uint32_t i;
-      for (i=0;i<st->magic_samples[channel_index];i++)
-         mem[N-1+i]=mem[N-1+i+tmp_in_len];
-   }
-   *out += out_len*st->out_stride;
-   return out_len;
-}
-
-#ifdef FIXED_POINT
-EXPORT int speex_resampler_process_int(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_int16_t *in, spx_uint32_t *in_len, spx_int16_t *out, spx_uint32_t *out_len)
-#else
-EXPORT int speex_resampler_process_float(SpeexResamplerState *st, spx_uint32_t channel_index, const float *in, spx_uint32_t *in_len, float *out, spx_uint32_t *out_len)
-#endif
-{
-   unsigned int j;
-   spx_uint32_t ilen = *in_len;
-   spx_uint32_t olen = *out_len;
-   spx_word16_t *x = st->mem + channel_index * st->mem_alloc_size;
-   const int filt_offs = st->filt_len - 1;
-   const spx_uint32_t xlen = st->mem_alloc_size - filt_offs;
-   const int istride = st->in_stride;
-
-   if (st->magic_samples[channel_index])
-      olen -= speex_resampler_magic(st, channel_index, &out, olen);
-   if (! st->magic_samples[channel_index]) {
-      while (ilen && olen) {
-        spx_uint32_t ichunk = (ilen > xlen) ? xlen : ilen;
-        spx_uint32_t ochunk = olen;
-
-        if (in) {
-           for(j=0;j<ichunk;++j)
-              x[j+filt_offs]=in[j*istride];
-        } else {
-          for(j=0;j<ichunk;++j)
-            x[j+filt_offs]=0;
+        st->sinc_table = sinc_table;
+        st->sinc_table_length = min_sinc_table_length;
+    }
+    if (use_direct)
+    {
+        spx_uint32_t i;
+        for (i = 0; i < st->den_rate; i++)
+        {
+            spx_int32_t j;
+            for (j = 0; j < st->filt_len; j++)
+            {
+                st->sinc_table[i * st->filt_len + j] = sinc(st->cutoff, ((j - (spx_int32_t)st->filt_len / 2 + 1) - ((float)i) / st->den_rate), st->filt_len, quality_map[st->quality].window_func);
+            }
         }
-        speex_resampler_process_native(st, channel_index, &ichunk, out, &ochunk);
+#ifdef FIXED_POINT
+        st->resampler_ptr = resampler_basic_direct_single;
+#else
+        if (st->quality > 8)
+            st->resampler_ptr = resampler_basic_direct_double;
+        else
+            st->resampler_ptr = resampler_basic_direct_single;
+#endif
+        /*fprintf (stderr, "resampler uses direct sinc table and normalised cutoff %f\n", cutoff);*/
+    }
+    else {
+        spx_int32_t i;
+        for (i = -4; i < (spx_int32_t)(st->oversample * st->filt_len + 4); i++)
+            st->sinc_table[i + 4] = sinc(st->cutoff, (i / (float)st->oversample - st->filt_len / 2), st->filt_len, quality_map[st->quality].window_func);
+#ifdef FIXED_POINT
+        st->resampler_ptr = resampler_basic_interpolate_single;
+#else
+        if (st->quality > 8)
+            st->resampler_ptr = resampler_basic_interpolate_double;
+        else
+            st->resampler_ptr = resampler_basic_interpolate_single;
+#endif
+        /*fprintf (stderr, "resampler uses interpolated sinc table and normalised cutoff %f\n", cutoff);*/
+    }
+
+    /* Here's the place where we update the filter memory to take into account
+       the change in filter length. It's probably the messiest part of the code
+       due to handling of lots of corner cases. */
+
+       /* Adding buffer_size to filt_len won't overflow here because filt_len
+          could be multiplied by sizeof(spx_word16_t) above. */
+    min_alloc_size = st->filt_len - 1 + st->buffer_size;
+    if (min_alloc_size > st->mem_alloc_size)
+    {
+        spx_word16_t* mem;
+        if (INT_MAX / sizeof(spx_word16_t) / st->nb_channels < min_alloc_size)
+            goto fail;
+        else if (!(mem = (spx_word16_t*)speex_realloc(st->mem, st->nb_channels * min_alloc_size * sizeof(*mem))))
+            goto fail;
+
+        st->mem = mem;
+        st->mem_alloc_size = min_alloc_size;
+    }
+    if (!st->started)
+    {
+        spx_uint32_t i;
+        for (i = 0; i < st->nb_channels * st->mem_alloc_size; i++)
+            st->mem[i] = 0;
+        /*speex_warning("reinit filter");*/
+    }
+    else if (st->filt_len > old_length)
+    {
+        spx_uint32_t i;
+        /* Increase the filter length */
+        /*speex_warning("increase filter size");*/
+        for (i = st->nb_channels; i--;)
+        {
+            spx_uint32_t j;
+            spx_uint32_t olen = old_length;
+            /*if (st->magic_samples[i])*/
+            {
+                /* Try and remove the magic samples as if nothing had happened */
+
+                /* FIXME: This is wrong but for now we need it to avoid going over the array bounds */
+                olen = old_length + 2 * st->magic_samples[i];
+                for (j = old_length - 1 + st->magic_samples[i]; j--;)
+                    st->mem[i * st->mem_alloc_size + j + st->magic_samples[i]] = st->mem[i * old_alloc_size + j];
+                for (j = 0; j < st->magic_samples[i]; j++)
+                    st->mem[i * st->mem_alloc_size + j] = 0;
+                st->magic_samples[i] = 0;
+            }
+            if (st->filt_len > olen)
+            {
+                /* If the new filter length is still bigger than the "augmented" length */
+                /* Copy data going backward */
+                for (j = 0; j < olen - 1; j++)
+                    st->mem[i * st->mem_alloc_size + (st->filt_len - 2 - j)] = st->mem[i * st->mem_alloc_size + (olen - 2 - j)];
+                /* Then put zeros for lack of anything better */
+                for (; j < st->filt_len - 1; j++)
+                    st->mem[i * st->mem_alloc_size + (st->filt_len - 2 - j)] = 0;
+                /* Adjust last_sample */
+                st->last_sample[i] += (st->filt_len - olen) / 2;
+            }
+            else {
+                /* Put back some of the magic! */
+                st->magic_samples[i] = (olen - st->filt_len) / 2;
+                for (j = 0; j < st->filt_len - 1 + st->magic_samples[i]; j++)
+                    st->mem[i * st->mem_alloc_size + j] = st->mem[i * st->mem_alloc_size + j + st->magic_samples[i]];
+            }
+        }
+    }
+    else if (st->filt_len < old_length)
+    {
+        spx_uint32_t i;
+        /* Reduce filter length, this a bit tricky. We need to store some of the memory as "magic"
+           samples so they can be used directly as input the next time(s) */
+        for (i = 0; i < st->nb_channels; i++)
+        {
+            spx_uint32_t j;
+            spx_uint32_t old_magic = st->magic_samples[i];
+            st->magic_samples[i] = (old_length - st->filt_len) / 2;
+            /* We must copy some of the memory that's no longer used */
+            /* Copy data going backward */
+            for (j = 0; j < st->filt_len - 1 + st->magic_samples[i] + old_magic; j++)
+                st->mem[i * st->mem_alloc_size + j] = st->mem[i * st->mem_alloc_size + j + st->magic_samples[i]];
+            st->magic_samples[i] += old_magic;
+        }
+    }
+    return RESAMPLER_ERR_SUCCESS;
+
+fail:
+    st->resampler_ptr = resampler_basic_zero;
+    /* st->mem may still contain consumed input samples for the filter.
+       Restore filt_len so that filt_len - 1 still points to the position after
+       the last of these samples. */
+    st->filt_len = old_length;
+    return RESAMPLER_ERR_ALLOC_FAILED;
+}
+
+EXPORT SpeexResamplerState* speex_resampler_init(spx_uint32_t nb_channels, spx_uint32_t in_rate, spx_uint32_t out_rate, int quality, int* err)
+{
+    return speex_resampler_init_frac(nb_channels, in_rate, out_rate, in_rate, out_rate, quality, err);
+}
+
+EXPORT SpeexResamplerState* speex_resampler_init_frac(spx_uint32_t nb_channels, spx_uint32_t ratio_num, spx_uint32_t ratio_den, spx_uint32_t in_rate, spx_uint32_t out_rate, int quality, int* err)
+{
+    SpeexResamplerState* st;
+    int filter_err;
+
+    if (nb_channels == 0 || ratio_num == 0 || ratio_den == 0 || quality > 10 || quality < 0)
+    {
+        if (err)
+            *err = RESAMPLER_ERR_INVALID_ARG;
+        return NULL;
+    }
+    st = (SpeexResamplerState*)speex_alloc(sizeof(SpeexResamplerState));
+    if (!st)
+    {
+        if (err)
+            *err = RESAMPLER_ERR_ALLOC_FAILED;
+        return NULL;
+    }
+    st->initialised = 0;
+    st->started = 0;
+    st->in_rate = 0;
+    st->out_rate = 0;
+    st->num_rate = 0;
+    st->den_rate = 0;
+    st->quality = -1;
+    st->sinc_table_length = 0;
+    st->mem_alloc_size = 0;
+    st->filt_len = 0;
+    st->mem = 0;
+    st->resampler_ptr = 0;
+
+    st->cutoff = 1.f;
+    st->nb_channels = nb_channels;
+    st->in_stride = 1;
+    st->out_stride = 1;
+
+    st->buffer_size = 160;
+
+    /* Per channel data */
+    if (!(st->last_sample = (spx_int32_t*)speex_alloc(nb_channels * sizeof(spx_int32_t))))
+        goto fail;
+    if (!(st->magic_samples = (spx_uint32_t*)speex_alloc(nb_channels * sizeof(spx_uint32_t))))
+        goto fail;
+    if (!(st->samp_frac_num = (spx_uint32_t*)speex_alloc(nb_channels * sizeof(spx_uint32_t))))
+        goto fail;
+
+    speex_resampler_set_quality(st, quality);
+    speex_resampler_set_rate_frac(st, ratio_num, ratio_den, in_rate, out_rate);
+
+    filter_err = update_filter(st);
+    if (filter_err == RESAMPLER_ERR_SUCCESS)
+    {
+        st->initialised = 1;
+    }
+    else {
+        speex_resampler_destroy(st);
+        st = NULL;
+    }
+    if (err)
+        *err = filter_err;
+
+    return st;
+
+fail:
+    if (err)
+        *err = RESAMPLER_ERR_ALLOC_FAILED;
+    speex_resampler_destroy(st);
+    return NULL;
+}
+
+EXPORT void speex_resampler_destroy(SpeexResamplerState* st)
+{
+    speex_free(st->mem);
+    speex_free(st->sinc_table);
+    speex_free(st->last_sample);
+    speex_free(st->magic_samples);
+    speex_free(st->samp_frac_num);
+    speex_free(st);
+}
+
+static int speex_resampler_process_native(SpeexResamplerState* st, spx_uint32_t channel_index, spx_uint32_t* in_len, spx_word16_t* out, spx_uint32_t* out_len)
+{
+    int j = 0;
+    const int N = st->filt_len;
+    int out_sample = 0;
+    spx_word16_t* mem = st->mem + channel_index * st->mem_alloc_size;
+    spx_uint32_t ilen;
+
+    st->started = 1;
+
+    /* Call the right resampler through the function ptr */
+    out_sample = st->resampler_ptr(st, channel_index, mem, in_len, out, out_len);
+
+    if (st->last_sample[channel_index] < (spx_int32_t)*in_len)
+        *in_len = st->last_sample[channel_index];
+    *out_len = out_sample;
+    st->last_sample[channel_index] -= *in_len;
+
+    ilen = *in_len;
+
+    for (j = 0; j < N - 1; ++j)
+        mem[j] = mem[j + ilen];
+
+    return RESAMPLER_ERR_SUCCESS;
+}
+
+static int speex_resampler_magic(SpeexResamplerState* st, spx_uint32_t channel_index, spx_word16_t** out, spx_uint32_t out_len) {
+    spx_uint32_t tmp_in_len = st->magic_samples[channel_index];
+    spx_word16_t* mem = st->mem + channel_index * st->mem_alloc_size;
+    const int N = st->filt_len;
+
+    speex_resampler_process_native(st, channel_index, &tmp_in_len, *out, &out_len);
+
+    st->magic_samples[channel_index] -= tmp_in_len;
+
+    /* If we couldn't process all "magic" input samples, save the rest for next time */
+    if (st->magic_samples[channel_index])
+    {
+        spx_uint32_t i;
+        for (i = 0; i < st->magic_samples[channel_index]; i++)
+            mem[N - 1 + i] = mem[N - 1 + i + tmp_in_len];
+    }
+    *out += out_len * st->out_stride;
+    return out_len;
+}
+
+#ifdef FIXED_POINT
+EXPORT int speex_resampler_process_int(SpeexResamplerState* st, spx_uint32_t channel_index, const spx_int16_t* in, spx_uint32_t* in_len, spx_int16_t* out, spx_uint32_t* out_len)
+#else
+EXPORT int speex_resampler_process_float(SpeexResamplerState* st, spx_uint32_t channel_index, const float* in, spx_uint32_t* in_len, float* out, spx_uint32_t* out_len)
+#endif
+{
+    int j;
+    spx_uint32_t ilen = *in_len;
+    spx_uint32_t olen = *out_len;
+    spx_word16_t* x = st->mem + channel_index * st->mem_alloc_size;
+    const int filt_offs = st->filt_len - 1;
+    const spx_uint32_t xlen = st->mem_alloc_size - filt_offs;
+    const int istride = st->in_stride;
+
+    if (st->magic_samples[channel_index])
+        olen -= speex_resampler_magic(st, channel_index, &out, olen);
+    if (!st->magic_samples[channel_index]) {
+        while (ilen && olen) {
+            spx_uint32_t ichunk = (ilen > xlen) ? xlen : ilen;
+            spx_uint32_t ochunk = olen;
+
+            if (in) {
+                for (j = 0; j < ichunk; ++j)
+                    x[j + filt_offs] = in[j * istride];
+            }
+            else {
+                for (j = 0; j < ichunk; ++j)
+                    x[j + filt_offs] = 0;
+            }
+            speex_resampler_process_native(st, channel_index, &ichunk, out, &ochunk);
+            ilen -= ichunk;
+            olen -= ochunk;
+            out += ochunk * st->out_stride;
+            if (in)
+                in += ichunk * istride;
+        }
+    }
+    *in_len -= ilen;
+    *out_len -= olen;
+    return st->resampler_ptr == resampler_basic_zero ? RESAMPLER_ERR_ALLOC_FAILED : RESAMPLER_ERR_SUCCESS;
+}
+
+#ifdef FIXED_POINT
+EXPORT int speex_resampler_process_float(SpeexResamplerState* st, spx_uint32_t channel_index, const float* in, spx_uint32_t* in_len, float* out, spx_uint32_t* out_len)
+#else
+EXPORT int speex_resampler_process_int(SpeexResamplerState* st, spx_uint32_t channel_index, const spx_int16_t* in, spx_uint32_t* in_len, spx_int16_t* out, spx_uint32_t* out_len)
+#endif
+{
+    int j;
+    const int istride_save = st->in_stride;
+    const int ostride_save = st->out_stride;
+    spx_uint32_t ilen = *in_len;
+    spx_uint32_t olen = *out_len;
+    spx_word16_t* x = st->mem + channel_index * st->mem_alloc_size;
+    const spx_uint32_t xlen = st->mem_alloc_size - (st->filt_len - 1);
+#ifdef VAR_ARRAYS
+    const unsigned int ylen = (olen < FIXED_STACK_ALLOC) ? olen : FIXED_STACK_ALLOC;
+    spx_word16_t ystack[ylen];
+#else
+    const unsigned int ylen = FIXED_STACK_ALLOC;
+    spx_word16_t ystack[FIXED_STACK_ALLOC];
+#endif
+
+    st->out_stride = 1;
+
+    while (ilen && olen) {
+        spx_word16_t* y = ystack;
+        spx_uint32_t ichunk = (ilen > xlen) ? xlen : ilen;
+        spx_uint32_t ochunk = (olen > ylen) ? ylen : olen;
+        spx_uint32_t omagic = 0;
+
+        if (st->magic_samples[channel_index]) {
+            omagic = speex_resampler_magic(st, channel_index, &y, ochunk);
+            ochunk -= omagic;
+            olen -= omagic;
+        }
+        if (!st->magic_samples[channel_index]) {
+            if (in) {
+                for (j = 0; j < ichunk; ++j)
+#ifdef FIXED_POINT
+                    x[j + st->filt_len - 1] = WORD2INT(in[j * istride_save]);
+#else
+                    x[j + st->filt_len - 1] = in[j * istride_save];
+#endif
+            }
+            else {
+                for (j = 0; j < ichunk; ++j)
+                    x[j + st->filt_len - 1] = 0;
+            }
+
+            speex_resampler_process_native(st, channel_index, &ichunk, y, &ochunk);
+        }
+        else {
+            ichunk = 0;
+            ochunk = 0;
+        }
+
+        for (j = 0; j < ochunk + omagic; ++j)
+#ifdef FIXED_POINT
+            out[j * ostride_save] = ystack[j];
+#else
+            out[j * ostride_save] = WORD2INT(ystack[j]);
+#endif
+
         ilen -= ichunk;
         olen -= ochunk;
-        out += ochunk * st->out_stride;
+        out += (ochunk + omagic) * ostride_save;
         if (in)
-           in += ichunk * istride;
-      }
-   }
-   *in_len -= ilen;
-   *out_len -= olen;
-   return st->resampler_ptr == resampler_basic_zero ? RESAMPLER_ERR_ALLOC_FAILED : RESAMPLER_ERR_SUCCESS;
+            in += ichunk * istride_save;
+    }
+    st->out_stride = ostride_save;
+    *in_len -= ilen;
+    *out_len -= olen;
+
+    return st->resampler_ptr == resampler_basic_zero ? RESAMPLER_ERR_ALLOC_FAILED : RESAMPLER_ERR_SUCCESS;
 }
 
-#ifdef FIXED_POINT
-EXPORT int speex_resampler_process_float(SpeexResamplerState *st, spx_uint32_t channel_index, const float *in, spx_uint32_t *in_len, float *out, spx_uint32_t *out_len)
-#else
-EXPORT int speex_resampler_process_int(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_int16_t *in, spx_uint32_t *in_len, spx_int16_t *out, spx_uint32_t *out_len)
-#endif
+EXPORT int speex_resampler_process_interleaved_float(SpeexResamplerState* st, const float* in, spx_uint32_t* in_len, float* out, spx_uint32_t* out_len)
 {
-   unsigned int j;
-   const int istride_save = st->in_stride;
-   const int ostride_save = st->out_stride;
-   spx_uint32_t ilen = *in_len;
-   spx_uint32_t olen = *out_len;
-   spx_word16_t *x = st->mem + channel_index * st->mem_alloc_size;
-   const spx_uint32_t xlen = st->mem_alloc_size - (st->filt_len - 1);
-#ifdef VAR_ARRAYS
-   const unsigned int ylen = (olen < FIXED_STACK_ALLOC) ? olen : FIXED_STACK_ALLOC;
-   spx_word16_t ystack[ylen];
-#else
-   const unsigned int ylen = FIXED_STACK_ALLOC;
-   spx_word16_t ystack[FIXED_STACK_ALLOC];
-#endif
-
-   st->out_stride = 1;
-
-   while (ilen && olen) {
-     spx_word16_t *y = ystack;
-     spx_uint32_t ichunk = (ilen > xlen) ? xlen : ilen;
-     spx_uint32_t ochunk = (olen > ylen) ? ylen : olen;
-     spx_uint32_t omagic = 0;
-
-     if (st->magic_samples[channel_index]) {
-       omagic = speex_resampler_magic(st, channel_index, &y, ochunk);
-       ochunk -= omagic;
-       olen -= omagic;
-     }
-     if (! st->magic_samples[channel_index]) {
-       if (in) {
-         for(j=0;j<ichunk;++j)
-#ifdef FIXED_POINT
-           x[j+st->filt_len-1]=WORD2INT(in[j*istride_save]);
-#else
-           x[j+st->filt_len-1]=in[j*istride_save];
-#endif
-       } else {
-         for(j=0;j<ichunk;++j)
-           x[j+st->filt_len-1]=0;
-       }
-
-       speex_resampler_process_native(st, channel_index, &ichunk, y, &ochunk);
-     } else {
-       ichunk = 0;
-       ochunk = 0;
-     }
-
-     for (j=0;j<ochunk+omagic;++j)
-#ifdef FIXED_POINT
-        out[j*ostride_save] = ystack[j];
-#else
-        out[j*ostride_save] = WORD2INT(ystack[j]);
-#endif
-
-     ilen -= ichunk;
-     olen -= ochunk;
-     out += (ochunk+omagic) * ostride_save;
-     if (in)
-       in += ichunk * istride_save;
-   }
-   st->out_stride = ostride_save;
-   *in_len -= ilen;
-   *out_len -= olen;
-
-   return st->resampler_ptr == resampler_basic_zero ? RESAMPLER_ERR_ALLOC_FAILED : RESAMPLER_ERR_SUCCESS;
+    spx_uint32_t i;
+    int istride_save, ostride_save;
+    spx_uint32_t bak_out_len = *out_len;
+    spx_uint32_t bak_in_len = *in_len;
+    istride_save = st->in_stride;
+    ostride_save = st->out_stride;
+    st->in_stride = st->out_stride = st->nb_channels;
+    for (i = 0; i < st->nb_channels; i++)
+    {
+        *out_len = bak_out_len;
+        *in_len = bak_in_len;
+        if (in != NULL)
+            speex_resampler_process_float(st, i, in + i, in_len, out + i, out_len);
+        else
+            speex_resampler_process_float(st, i, NULL, in_len, out + i, out_len);
+    }
+    st->in_stride = istride_save;
+    st->out_stride = ostride_save;
+    return st->resampler_ptr == resampler_basic_zero ? RESAMPLER_ERR_ALLOC_FAILED : RESAMPLER_ERR_SUCCESS;
 }
 
-EXPORT int speex_resampler_process_interleaved_float(SpeexResamplerState *st, const float *in, spx_uint32_t *in_len, float *out, spx_uint32_t *out_len)
+EXPORT int speex_resampler_process_interleaved_int(SpeexResamplerState* st, const spx_int16_t* in, spx_uint32_t* in_len, spx_int16_t* out, spx_uint32_t* out_len)
 {
-   spx_uint32_t i;
-   int istride_save, ostride_save;
-   spx_uint32_t bak_out_len = *out_len;
-   spx_uint32_t bak_in_len = *in_len;
-   istride_save = st->in_stride;
-   ostride_save = st->out_stride;
-   st->in_stride = st->out_stride = st->nb_channels;
-   for (i=0;i<st->nb_channels;i++)
-   {
-      *out_len = bak_out_len;
-      *in_len = bak_in_len;
-      if (in != NULL)
-         speex_resampler_process_float(st, i, in+i, in_len, out+i, out_len);
-      else
-         speex_resampler_process_float(st, i, NULL, in_len, out+i, out_len);
-   }
-   st->in_stride = istride_save;
-   st->out_stride = ostride_save;
-   return st->resampler_ptr == resampler_basic_zero ? RESAMPLER_ERR_ALLOC_FAILED : RESAMPLER_ERR_SUCCESS;
+    spx_uint32_t i;
+    int istride_save, ostride_save;
+    spx_uint32_t bak_out_len = *out_len;
+    spx_uint32_t bak_in_len = *in_len;
+    istride_save = st->in_stride;
+    ostride_save = st->out_stride;
+    st->in_stride = st->out_stride = st->nb_channels;
+    for (i = 0; i < st->nb_channels; i++)
+    {
+        *out_len = bak_out_len;
+        *in_len = bak_in_len;
+        if (in != NULL)
+            speex_resampler_process_int(st, i, in + i, in_len, out + i, out_len);
+        else
+            speex_resampler_process_int(st, i, NULL, in_len, out + i, out_len);
+    }
+    st->in_stride = istride_save;
+    st->out_stride = ostride_save;
+    return st->resampler_ptr == resampler_basic_zero ? RESAMPLER_ERR_ALLOC_FAILED : RESAMPLER_ERR_SUCCESS;
 }
 
-EXPORT int speex_resampler_process_interleaved_int(SpeexResamplerState *st, const spx_int16_t *in, spx_uint32_t *in_len, spx_int16_t *out, spx_uint32_t *out_len)
+EXPORT int speex_resampler_set_rate(SpeexResamplerState* st, spx_uint32_t in_rate, spx_uint32_t out_rate)
 {
-   spx_uint32_t i;
-   int istride_save, ostride_save;
-   spx_uint32_t bak_out_len = *out_len;
-   spx_uint32_t bak_in_len = *in_len;
-   istride_save = st->in_stride;
-   ostride_save = st->out_stride;
-   st->in_stride = st->out_stride = st->nb_channels;
-   for (i=0;i<st->nb_channels;i++)
-   {
-      *out_len = bak_out_len;
-      *in_len = bak_in_len;
-      if (in != NULL)
-         speex_resampler_process_int(st, i, in+i, in_len, out+i, out_len);
-      else
-         speex_resampler_process_int(st, i, NULL, in_len, out+i, out_len);
-   }
-   st->in_stride = istride_save;
-   st->out_stride = ostride_save;
-   return st->resampler_ptr == resampler_basic_zero ? RESAMPLER_ERR_ALLOC_FAILED : RESAMPLER_ERR_SUCCESS;
+    return speex_resampler_set_rate_frac(st, in_rate, out_rate, in_rate, out_rate);
 }
 
-EXPORT int speex_resampler_set_rate(SpeexResamplerState *st, spx_uint32_t in_rate, spx_uint32_t out_rate)
+EXPORT void speex_resampler_get_rate(SpeexResamplerState* st, spx_uint32_t* in_rate, spx_uint32_t* out_rate)
 {
-   return speex_resampler_set_rate_frac(st, in_rate, out_rate, in_rate, out_rate);
-}
-
-EXPORT void speex_resampler_get_rate(SpeexResamplerState *st, spx_uint32_t *in_rate, spx_uint32_t *out_rate)
-{
-   *in_rate = st->in_rate;
-   *out_rate = st->out_rate;
+    *in_rate = st->in_rate;
+    *out_rate = st->out_rate;
 }
 
 static inline spx_uint32_t compute_gcd(spx_uint32_t a, spx_uint32_t b)
 {
-   while (b != 0)
-   {
-      spx_uint32_t temp = a;
+    while (b != 0)
+    {
+        spx_uint32_t temp = a;
 
-      a = b;
-      b = temp % b;
-   }
-   return a;
+        a = b;
+        b = temp % b;
+    }
+    return a;
 }
 
-EXPORT int speex_resampler_set_rate_frac(SpeexResamplerState *st, spx_uint32_t ratio_num, spx_uint32_t ratio_den, spx_uint32_t in_rate, spx_uint32_t out_rate)
+EXPORT int speex_resampler_set_rate_frac(SpeexResamplerState* st, spx_uint32_t ratio_num, spx_uint32_t ratio_den, spx_uint32_t in_rate, spx_uint32_t out_rate)
 {
-   spx_uint32_t fact;
-   spx_uint32_t old_den;
-   spx_uint32_t i;
+    spx_uint32_t fact;
+    spx_uint32_t old_den;
+    spx_uint32_t i;
 
-   if (ratio_num == 0 || ratio_den == 0)
-      return RESAMPLER_ERR_INVALID_ARG;
+    if (ratio_num == 0 || ratio_den == 0)
+        return RESAMPLER_ERR_INVALID_ARG;
 
-   if (st->in_rate == in_rate && st->out_rate == out_rate && st->num_rate == ratio_num && st->den_rate == ratio_den)
-      return RESAMPLER_ERR_SUCCESS;
+    if (st->in_rate == in_rate && st->out_rate == out_rate && st->num_rate == ratio_num && st->den_rate == ratio_den)
+        return RESAMPLER_ERR_SUCCESS;
 
-   old_den = st->den_rate;
-   st->in_rate = in_rate;
-   st->out_rate = out_rate;
-   st->num_rate = ratio_num;
-   st->den_rate = ratio_den;
+    old_den = st->den_rate;
+    st->in_rate = in_rate;
+    st->out_rate = out_rate;
+    st->num_rate = ratio_num;
+    st->den_rate = ratio_den;
 
-   fact = compute_gcd(st->num_rate, st->den_rate);
+    fact = compute_gcd(st->num_rate, st->den_rate);
 
-   st->num_rate /= fact;
-   st->den_rate /= fact;
+    st->num_rate /= fact;
+    st->den_rate /= fact;
 
-   if (old_den > 0)
-   {
-      for (i=0;i<st->nb_channels;i++)
-      {
-         if (multiply_frac(&st->samp_frac_num[i],st->samp_frac_num[i],st->den_rate,old_den) != RESAMPLER_ERR_SUCCESS)
-            return RESAMPLER_ERR_OVERFLOW;
-         /* Safety net */
-         if (st->samp_frac_num[i] >= st->den_rate)
-            st->samp_frac_num[i] = st->den_rate-1;
-      }
-   }
+    if (old_den > 0)
+    {
+        for (i = 0; i < st->nb_channels; i++)
+        {
+            if (multiply_frac(&st->samp_frac_num[i], st->samp_frac_num[i], st->den_rate, old_den) != RESAMPLER_ERR_SUCCESS)
+                return RESAMPLER_ERR_OVERFLOW;
+            /* Safety net */
+            if (st->samp_frac_num[i] >= st->den_rate)
+                st->samp_frac_num[i] = st->den_rate - 1;
+        }
+    }
 
-   if (st->initialised)
-      return update_filter(st);
-   return RESAMPLER_ERR_SUCCESS;
+    if (st->initialised)
+        return update_filter(st);
+    return RESAMPLER_ERR_SUCCESS;
 }
 
-EXPORT void speex_resampler_get_ratio(SpeexResamplerState *st, spx_uint32_t *ratio_num, spx_uint32_t *ratio_den)
+EXPORT void speex_resampler_get_ratio(SpeexResamplerState* st, spx_uint32_t* ratio_num, spx_uint32_t* ratio_den)
 {
-   *ratio_num = st->num_rate;
-   *ratio_den = st->den_rate;
+    *ratio_num = st->num_rate;
+    *ratio_den = st->den_rate;
 }
 
-EXPORT int speex_resampler_set_quality(SpeexResamplerState *st, int quality)
+EXPORT int speex_resampler_set_quality(SpeexResamplerState* st, int quality)
 {
-   if (quality > 10 || quality < 0)
-      return RESAMPLER_ERR_INVALID_ARG;
-   if (st->quality == quality)
-      return RESAMPLER_ERR_SUCCESS;
-   st->quality = quality;
-   if (st->initialised)
-      return update_filter(st);
-   return RESAMPLER_ERR_SUCCESS;
+    if (quality > 10 || quality < 0)
+        return RESAMPLER_ERR_INVALID_ARG;
+    if (st->quality == quality)
+        return RESAMPLER_ERR_SUCCESS;
+    st->quality = quality;
+    if (st->initialised)
+        return update_filter(st);
+    return RESAMPLER_ERR_SUCCESS;
 }
 
-EXPORT void speex_resampler_get_quality(SpeexResamplerState *st, int *quality)
+EXPORT void speex_resampler_get_quality(SpeexResamplerState* st, int* quality)
 {
-   *quality = st->quality;
+    *quality = st->quality;
 }
 
-EXPORT void speex_resampler_set_input_stride(SpeexResamplerState *st, spx_uint32_t stride)
+EXPORT void speex_resampler_set_input_stride(SpeexResamplerState* st, spx_uint32_t stride)
 {
-   st->in_stride = stride;
+    st->in_stride = stride;
 }
 
-EXPORT void speex_resampler_get_input_stride(SpeexResamplerState *st, spx_uint32_t *stride)
+EXPORT void speex_resampler_get_input_stride(SpeexResamplerState* st, spx_uint32_t* stride)
 {
-   *stride = st->in_stride;
+    *stride = st->in_stride;
 }
 
-EXPORT void speex_resampler_set_output_stride(SpeexResamplerState *st, spx_uint32_t stride)
+EXPORT void speex_resampler_set_output_stride(SpeexResamplerState* st, spx_uint32_t stride)
 {
-   st->out_stride = stride;
+    st->out_stride = stride;
 }
 
-EXPORT void speex_resampler_get_output_stride(SpeexResamplerState *st, spx_uint32_t *stride)
+EXPORT void speex_resampler_get_output_stride(SpeexResamplerState* st, spx_uint32_t* stride)
 {
-   *stride = st->out_stride;
+    *stride = st->out_stride;
 }
 
-EXPORT int speex_resampler_get_input_latency(SpeexResamplerState *st)
+EXPORT int speex_resampler_get_input_latency(SpeexResamplerState* st)
 {
-  return st->filt_len / 2;
+    return st->filt_len / 2;
 }
 
-EXPORT int speex_resampler_get_output_latency(SpeexResamplerState *st)
+EXPORT int speex_resampler_get_output_latency(SpeexResamplerState* st)
 {
-  return ((st->filt_len / 2) * st->den_rate + (st->num_rate >> 1)) / st->num_rate;
+    return ((st->filt_len / 2) * st->den_rate + (st->num_rate >> 1)) / st->num_rate;
 }
 
-EXPORT int speex_resampler_skip_zeros(SpeexResamplerState *st)
+EXPORT int speex_resampler_skip_zeros(SpeexResamplerState* st)
 {
-   spx_uint32_t i;
-   for (i=0;i<st->nb_channels;i++)
-      st->last_sample[i] = st->filt_len/2;
-   return RESAMPLER_ERR_SUCCESS;
+    spx_uint32_t i;
+    for (i = 0; i < st->nb_channels; i++)
+        st->last_sample[i] = st->filt_len / 2;
+    return RESAMPLER_ERR_SUCCESS;
 }
 
-EXPORT int speex_resampler_reset_mem(SpeexResamplerState *st)
+EXPORT int speex_resampler_reset_mem(SpeexResamplerState* st)
 {
-   spx_uint32_t i;
-   for (i=0;i<st->nb_channels;i++)
-   {
-      st->last_sample[i] = 0;
-      st->magic_samples[i] = 0;
-      st->samp_frac_num[i] = 0;
-   }
-   for (i=0;i<st->nb_channels*(st->filt_len-1);i++)
-      st->mem[i] = 0;
-   return RESAMPLER_ERR_SUCCESS;
+    spx_uint32_t i;
+    for (i = 0; i < st->nb_channels; i++)
+    {
+        st->last_sample[i] = 0;
+        st->magic_samples[i] = 0;
+        st->samp_frac_num[i] = 0;
+    }
+    for (i = 0; i < st->nb_channels * (st->filt_len - 1); i++)
+        st->mem[i] = 0;
+    return RESAMPLER_ERR_SUCCESS;
 }
 
-EXPORT const char *speex_resampler_strerror(int err)
+EXPORT const char* speex_resampler_strerror(int err)
 {
-   switch (err)
-   {
-      case RESAMPLER_ERR_SUCCESS:
-         return "Success.";
-      case RESAMPLER_ERR_ALLOC_FAILED:
-         return "Memory allocation failed.";
-      case RESAMPLER_ERR_BAD_STATE:
-         return "Bad resampler state.";
-      case RESAMPLER_ERR_INVALID_ARG:
-         return "Invalid argument.";
-      case RESAMPLER_ERR_PTR_OVERLAP:
-         return "Input and output buffers overlap.";
-      default:
-         return "Unknown error. Bad error code or strange version mismatch.";
-   }
-}
+    switch (err)
+    {
+    case RESAMPLER_ERR_SUCCESS:
+        return "Success.";
+    case RESAMPLER_ERR_ALLOC_FAILED:
+        return "Memory allocation failed.";
+    case RESAMPLER_ERR_BAD_STATE:
+        return "Bad resampler state.";
+    case RESAMPLER_ERR_INVALID_ARG:
+        return "Invalid argument.";
+    case RESAMPLER_ERR_PTR_OVERLAP:
+        return "Input and output buffers overlap.";
+    default:
+        return "Unknown error. Bad error code or strange version mismatch.";
+    }
+}
\ No newline at end of file
diff --git a/resampler/resample_neon.h b/resampler/resample_neon.h
new file mode 100644
index 0000000..b5f2c42
--- /dev/null
+++ b/resampler/resample_neon.h
@@ -0,0 +1,339 @@
+/* Copyright (C) 2007-2008 Jean-Marc Valin
+ * Copyright (C) 2008 Thorvald Natvig
+ * Copyright (C) 2011 Texas Instruments
+ *               author Jyri Sarha
+ */
+/**
+   @file resample_neon.h
+   @brief Resampler functions (NEON version)
+*/
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   - Neither the name of the Xiph.org Foundation nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef FIXED_POINT
+#if defined(__aarch64__)
+static inline int32_t saturate_32bit_to_16bit(int32_t a) {
+    int32_t ret;
+    asm ("fmov s0, %w[a]\n"
+         "sqxtn h0, s0\n"
+         "sxtl v0.4s, v0.4h\n"
+         "fmov %w[ret], s0\n"
+         : [ret] "=r" (ret)
+         : [a] "r" (a)
+         : "v0" );
+    return ret;
+}
+#elif defined(__thumb2__)
+static inline int32_t saturate_32bit_to_16bit(int32_t a) {
+    int32_t ret;
+    asm ("ssat %[ret], #16, %[a]"
+         : [ret] "=r" (ret)
+         : [a] "r" (a)
+         : );
+    return ret;
+}
+#else
+static inline int32_t saturate_32bit_to_16bit(int32_t a) {
+    int32_t ret;
+    asm ("vmov.s32 d0[0], %[a]\n"
+         "vqmovn.s32 d0, q0\n"
+         "vmov.s16 %[ret], d0[0]\n"
+         : [ret] "=r" (ret)
+         : [a] "r" (a)
+         : "q0");
+    return ret;
+}
+#endif
+#undef WORD2INT
+#define WORD2INT(x) (saturate_32bit_to_16bit(x))
+
+#define OVERRIDE_INNER_PRODUCT_SINGLE
+/* Only works when len % 4 == 0 and len >= 4 */
+#if defined(__aarch64__)
+static inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
+{
+    int32_t ret;
+    uint32_t remainder = len % 16;
+    len = len - remainder;
+
+    asm volatile ("	 cmp %w[len], #0\n"
+		  "	 b.ne 1f\n"
+		  "	 ld1 {v16.4h}, [%[b]], #8\n"
+		  "	 ld1 {v20.4h}, [%[a]], #8\n"
+		  "	 subs %w[remainder], %w[remainder], #4\n"
+		  "	 smull v0.4s, v16.4h, v20.4h\n"
+		  "      b.ne 4f\n"
+		  "	 b 5f\n"
+		  "1:"
+		  "	 ld1 {v16.4h, v17.4h, v18.4h, v19.4h}, [%[b]], #32\n"
+		  "	 ld1 {v20.4h, v21.4h, v22.4h, v23.4h}, [%[a]], #32\n"
+		  "	 subs %w[len], %w[len], #16\n"
+		  "	 smull v0.4s, v16.4h, v20.4h\n"
+		  "	 smlal v0.4s, v17.4h, v21.4h\n"
+		  "	 smlal v0.4s, v18.4h, v22.4h\n"
+		  "	 smlal v0.4s, v19.4h, v23.4h\n"
+		  "	 b.eq 3f\n"
+		  "2:"
+		  "	 ld1 {v16.4h, v17.4h, v18.4h, v19.4h}, [%[b]], #32\n"
+		  "	 ld1 {v20.4h, v21.4h, v22.4h, v23.4h}, [%[a]], #32\n"
+		  "	 subs %w[len], %w[len], #16\n"
+		  "	 smlal v0.4s, v16.4h, v20.4h\n"
+		  "	 smlal v0.4s, v17.4h, v21.4h\n"
+		  "	 smlal v0.4s, v18.4h, v22.4h\n"
+		  "	 smlal v0.4s, v19.4h, v23.4h\n"
+		  "	 b.ne 2b\n"
+		  "3:"
+		  "	 cmp %w[remainder], #0\n"
+		  "	 b.eq 5f\n"
+		  "4:"
+		  "	 ld1 {v18.4h}, [%[b]], #8\n"
+		  "	 ld1 {v22.4h}, [%[a]], #8\n"
+		  "	 subs %w[remainder], %w[remainder], #4\n"
+		  "	 smlal v0.4s, v18.4h, v22.4h\n"
+		  "	 b.ne 4b\n"
+		  "5:"
+		  "	 saddlv d0, v0.4s\n"
+		  "	 sqxtn s0, d0\n"
+		  "	 sqrshrn h0, s0, #15\n"
+		  "	 sxtl v0.4s, v0.4h\n"
+		  "	 fmov %w[ret], s0\n"
+		  : [ret] "=r" (ret), [a] "+r" (a), [b] "+r" (b),
+		    [len] "+r" (len), [remainder] "+r" (remainder)
+		  :
+		  : "cc", "v0",
+		    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23");
+    return ret;
+}
+#else
+static inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len)
+{
+    int32_t ret;
+    uint32_t remainder = len % 16;
+    len = len - remainder;
+
+    asm volatile ("	 cmp %[len], #0\n"
+		  "	 bne 1f\n"
+		  "	 vld1.16 {d16}, [%[b]]!\n"
+		  "	 vld1.16 {d20}, [%[a]]!\n"
+		  "	 subs %[remainder], %[remainder], #4\n"
+		  "	 vmull.s16 q0, d16, d20\n"
+		  "      beq 5f\n"
+		  "	 b 4f\n"
+		  "1:"
+		  "	 vld1.16 {d16, d17, d18, d19}, [%[b]]!\n"
+		  "	 vld1.16 {d20, d21, d22, d23}, [%[a]]!\n"
+		  "	 subs %[len], %[len], #16\n"
+		  "	 vmull.s16 q0, d16, d20\n"
+		  "	 vmlal.s16 q0, d17, d21\n"
+		  "	 vmlal.s16 q0, d18, d22\n"
+		  "	 vmlal.s16 q0, d19, d23\n"
+		  "	 beq 3f\n"
+		  "2:"
+		  "	 vld1.16 {d16, d17, d18, d19}, [%[b]]!\n"
+		  "	 vld1.16 {d20, d21, d22, d23}, [%[a]]!\n"
+		  "	 subs %[len], %[len], #16\n"
+		  "	 vmlal.s16 q0, d16, d20\n"
+		  "	 vmlal.s16 q0, d17, d21\n"
+		  "	 vmlal.s16 q0, d18, d22\n"
+		  "	 vmlal.s16 q0, d19, d23\n"
+		  "	 bne 2b\n"
+		  "3:"
+		  "	 cmp %[remainder], #0\n"
+		  "	 beq 5f\n"
+		  "4:"
+		  "	 vld1.16 {d16}, [%[b]]!\n"
+		  "	 vld1.16 {d20}, [%[a]]!\n"
+		  "	 subs %[remainder], %[remainder], #4\n"
+		  "	 vmlal.s16 q0, d16, d20\n"
+		  "	 bne 4b\n"
+		  "5:"
+		  "	 vaddl.s32 q0, d0, d1\n"
+		  "	 vadd.s64 d0, d0, d1\n"
+		  "	 vqmovn.s64 d0, q0\n"
+		  "	 vqrshrn.s32 d0, q0, #15\n"
+		  "	 vmov.s16 %[ret], d0[0]\n"
+		  : [ret] "=r" (ret), [a] "+r" (a), [b] "+r" (b),
+		    [len] "+r" (len), [remainder] "+r" (remainder)
+		  :
+		  : "cc", "q0",
+		    "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23");
+
+    return ret;
+}
+#endif  // !defined(__aarch64__)
+
+#elif defined(FLOATING_POINT)
+#if defined(__aarch64__)
+static inline int32_t saturate_float_to_16bit(float a) {
+    int32_t ret;
+    asm ("fcvtas s1, %s[a]\n"
+         "sqxtn h1, s1\n"
+         "sxtl v1.4s, v1.4h\n"
+         "fmov %w[ret], s1\n"
+         : [ret] "=r" (ret)
+         : [a] "w" (a)
+         : "v1");
+    return ret;
+}
+#else
+static inline int32_t saturate_float_to_16bit(float a) {
+    int32_t ret;
+    asm ("vmov.f32 d0[0], %[a]\n"
+         "vcvt.s32.f32 d0, d0, #15\n"
+         "vqrshrn.s32 d0, q0, #15\n"
+         "vmov.s16 %[ret], d0[0]\n"
+         : [ret] "=r" (ret)
+         : [a] "r" (a)
+         : "q0");
+    return ret;
+}
+#endif
+
+#undef WORD2INT
+#define WORD2INT(x) (saturate_float_to_16bit(x))
+
+#define OVERRIDE_INNER_PRODUCT_SINGLE
+/* Only works when len % 4 == 0 and len >= 4 */
+#if defined(__aarch64__)
+static inline float inner_product_single(const float *a, const float *b, unsigned int len)
+{
+    float ret;
+    uint32_t remainder = len % 16;
+    len = len - remainder;
+
+    asm volatile ("	 cmp %w[len], #0\n"
+		  "	 b.ne 1f\n"
+		  "	 ld1 {v16.4s}, [%[b]], #16\n"
+		  "	 ld1 {v20.4s}, [%[a]], #16\n"
+		  "	 subs %w[remainder], %w[remainder], #4\n"
+		  "	 fmul v1.4s, v16.4s, v20.4s\n"
+		  "      b.ne 4f\n"
+		  "	 b 5f\n"
+		  "1:"
+		  "	 ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [%[b]], #64\n"
+		  "	 ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [%[a]], #64\n"
+		  "	 subs %w[len], %w[len], #16\n"
+		  "	 fmul v1.4s, v16.4s, v20.4s\n"
+		  "	 fmul v2.4s, v17.4s, v21.4s\n"
+		  "	 fmul v3.4s, v18.4s, v22.4s\n"
+		  "	 fmul v4.4s, v19.4s, v23.4s\n"
+		  "	 b.eq 3f\n"
+		  "2:"
+		  "	 ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [%[b]], #64\n"
+		  "	 ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [%[a]], #64\n"
+		  "	 subs %w[len], %w[len], #16\n"
+		  "	 fmla v1.4s, v16.4s, v20.4s\n"
+		  "	 fmla v2.4s, v17.4s, v21.4s\n"
+		  "	 fmla v3.4s, v18.4s, v22.4s\n"
+		  "	 fmla v4.4s, v19.4s, v23.4s\n"
+		  "	 b.ne 2b\n"
+		  "3:"
+		  "	 fadd v16.4s, v1.4s, v2.4s\n"
+		  "	 fadd v17.4s, v3.4s, v4.4s\n"
+		  "	 cmp %w[remainder], #0\n"
+		  "	 fadd v1.4s, v16.4s, v17.4s\n"
+		  "	 b.eq 5f\n"
+		  "4:"
+		  "	 ld1 {v18.4s}, [%[b]], #16\n"
+		  "	 ld1 {v22.4s}, [%[a]], #16\n"
+		  "	 subs %w[remainder], %w[remainder], #4\n"
+		  "	 fmla v1.4s, v18.4s, v22.4s\n"
+		  "	 b.ne 4b\n"
+		  "5:"
+		  "	 faddp v1.4s, v1.4s, v1.4s\n"
+		  "	 faddp %[ret].4s, v1.4s, v1.4s\n"
+		  : [ret] "=w" (ret), [a] "+r" (a), [b] "+r" (b),
+		    [len] "+r" (len), [remainder] "+r" (remainder)
+		  :
+		  : "cc", "v1", "v2", "v3", "v4",
+		    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23");
+    return ret;
+}
+#else
+static inline float inner_product_single(const float *a, const float *b, unsigned int len)
+{
+    float ret;
+    uint32_t remainder = len % 16;
+    len = len - remainder;
+
+    asm volatile ("	 cmp %[len], #0\n"
+		  "	 bne 1f\n"
+		  "	 vld1.32 {q4}, [%[b]]!\n"
+		  "	 vld1.32 {q8}, [%[a]]!\n"
+		  "	 subs %[remainder], %[remainder], #4\n"
+		  "	 vmul.f32 q0, q4, q8\n"
+		  "      bne 4f\n"
+		  "	 b 5f\n"
+		  "1:"
+		  "	 vld1.32 {q4, q5}, [%[b]]!\n"
+		  "	 vld1.32 {q8, q9}, [%[a]]!\n"
+		  "	 vld1.32 {q6, q7}, [%[b]]!\n"
+		  "	 vld1.32 {q10, q11}, [%[a]]!\n"
+		  "	 subs %[len], %[len], #16\n"
+		  "	 vmul.f32 q0, q4, q8\n"
+		  "	 vmul.f32 q1, q5, q9\n"
+		  "	 vmul.f32 q2, q6, q10\n"
+		  "	 vmul.f32 q3, q7, q11\n"
+		  "	 beq 3f\n"
+		  "2:"
+		  "	 vld1.32 {q4, q5}, [%[b]]!\n"
+		  "	 vld1.32 {q8, q9}, [%[a]]!\n"
+		  "	 vld1.32 {q6, q7}, [%[b]]!\n"
+		  "	 vld1.32 {q10, q11}, [%[a]]!\n"
+		  "	 subs %[len], %[len], #16\n"
+		  "	 vmla.f32 q0, q4, q8\n"
+		  "	 vmla.f32 q1, q5, q9\n"
+		  "	 vmla.f32 q2, q6, q10\n"
+		  "	 vmla.f32 q3, q7, q11\n"
+		  "	 bne 2b\n"
+		  "3:"
+		  "	 vadd.f32 q4, q0, q1\n"
+		  "	 vadd.f32 q5, q2, q3\n"
+		  "	 cmp %[remainder], #0\n"
+		  "	 vadd.f32 q0, q4, q5\n"
+		  "	 beq 5f\n"
+		  "4:"
+		  "	 vld1.32 {q6}, [%[b]]!\n"
+		  "	 vld1.32 {q10}, [%[a]]!\n"
+		  "	 subs %[remainder], %[remainder], #4\n"
+		  "	 vmla.f32 q0, q6, q10\n"
+		  "	 bne 4b\n"
+		  "5:"
+		  "	 vadd.f32 d0, d0, d1\n"
+		  "	 vpadd.f32 d0, d0, d0\n"
+		  "	 vmov.f32 %[ret], d0[0]\n"
+		  : [ret] "=r" (ret), [a] "+r" (a), [b] "+r" (b),
+		    [len] "+l" (len), [remainder] "+l" (remainder)
+		  :
+		  : "cc", "q0", "q1", "q2", "q3",
+		    "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11");
+    return ret;
+}
+#endif  // defined(__aarch64__)
+#endif
\ No newline at end of file
diff --git a/resampler/resample_sse.h b/resampler/resample_sse.h
index 632bb48..dcab23e 100644
--- a/resampler/resample_sse.h
+++ b/resampler/resample_sse.h
@@ -2,127 +2,127 @@
  * Copyright (C) 2008 Thorvald Natvig
  */
 /**
-   @file resample_sse.h
-   @brief Resampler functions (SSE version)
+    @file resample_sse.h
+    @brief Resampler functions (SSE version)
 */
 /*
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions
-   are met:
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions
+    are met:
 
-   - Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
+    - Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
 
-   - Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the distribution.
+    - Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in the
+    documentation and/or other materials provided with the distribution.
 
-   - Neither the name of the Xiph.org Foundation nor the names of its
-   contributors may be used to endorse or promote products derived from
-   this software without specific prior written permission.
+    - Neither the name of the Xiph.org Foundation nor the names of its
+    contributors may be used to endorse or promote products derived from
+    this software without specific prior written permission.
 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
-   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
 
 #include <xmmintrin.h>
 
 #define OVERRIDE_INNER_PRODUCT_SINGLE
-static inline float inner_product_single(const float *a, const float *b, unsigned int len)
+static inline float inner_product_single(const float* a, const float* b, unsigned int len)
 {
-   unsigned int i;
-   float ret;
-   __m128 sum = _mm_setzero_ps();
-   for (i=0;i<len;i+=8)
-   {
-      sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+i), _mm_loadu_ps(b+i)));
-      sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+i+4), _mm_loadu_ps(b+i+4)));
-   }
-   sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
-   sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
-   _mm_store_ss(&ret, sum);
-   return ret;
+    int i;
+    float ret;
+    __m128 sum = _mm_setzero_ps();
+    for (i = 0; i < len; i += 8)
+    {
+        sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a + i), _mm_loadu_ps(b + i)));
+        sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a + i + 4), _mm_loadu_ps(b + i + 4)));
+    }
+    sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
+    sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
+    _mm_store_ss(&ret, sum);
+    return ret;
 }
 
 #define OVERRIDE_INTERPOLATE_PRODUCT_SINGLE
-static inline float interpolate_product_single(const float *a, const float *b, unsigned int len, const spx_uint32_t oversample, float *frac) {
-  unsigned int i;
-  float ret;
-  __m128 sum = _mm_setzero_ps();
-  __m128 f = _mm_loadu_ps(frac);
-  for(i=0;i<len;i+=2)
-  {
-    sum = _mm_add_ps(sum, _mm_mul_ps(_mm_load1_ps(a+i), _mm_loadu_ps(b+i*oversample)));
-    sum = _mm_add_ps(sum, _mm_mul_ps(_mm_load1_ps(a+i+1), _mm_loadu_ps(b+(i+1)*oversample)));
-  }
-   sum = _mm_mul_ps(f, sum);
-   sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
-   sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
-   _mm_store_ss(&ret, sum);
-   return ret;
+static inline float interpolate_product_single(const float* a, const float* b, unsigned int len, const spx_uint32_t oversample, float* frac) {
+    int i;
+    float ret;
+    __m128 sum = _mm_setzero_ps();
+    __m128 f = _mm_loadu_ps(frac);
+    for (i = 0; i < len; i += 2)
+    {
+        sum = _mm_add_ps(sum, _mm_mul_ps(_mm_load1_ps(a + i), _mm_loadu_ps(b + i * oversample)));
+        sum = _mm_add_ps(sum, _mm_mul_ps(_mm_load1_ps(a + i + 1), _mm_loadu_ps(b + (i + 1) * oversample)));
+    }
+    sum = _mm_mul_ps(f, sum);
+    sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
+    sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
+    _mm_store_ss(&ret, sum);
+    return ret;
 }
 
-#ifdef __SSE2__
+#ifdef USE_SSE2
 #include <emmintrin.h>
 #define OVERRIDE_INNER_PRODUCT_DOUBLE
 
-static inline double inner_product_double(const float *a, const float *b, unsigned int len)
+static inline double inner_product_double(const float* a, const float* b, unsigned int len)
 {
-   unsigned int i;
-   double ret;
-   __m128d sum = _mm_setzero_pd();
-   __m128 t;
-   for (i=0;i<len;i+=8)
-   {
-      t = _mm_mul_ps(_mm_loadu_ps(a+i), _mm_loadu_ps(b+i));
-      sum = _mm_add_pd(sum, _mm_cvtps_pd(t));
-      sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
+    int i;
+    double ret;
+    __m128d sum = _mm_setzero_pd();
+    __m128 t;
+    for (i = 0; i < len; i += 8)
+    {
+        t = _mm_mul_ps(_mm_loadu_ps(a + i), _mm_loadu_ps(b + i));
+        sum = _mm_add_pd(sum, _mm_cvtps_pd(t));
+        sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
 
-      t = _mm_mul_ps(_mm_loadu_ps(a+i+4), _mm_loadu_ps(b+i+4));
-      sum = _mm_add_pd(sum, _mm_cvtps_pd(t));
-      sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
-   }
-   sum = _mm_add_sd(sum, _mm_unpackhi_pd(sum, sum));
-   _mm_store_sd(&ret, sum);
-   return ret;
+        t = _mm_mul_ps(_mm_loadu_ps(a + i + 4), _mm_loadu_ps(b + i + 4));
+        sum = _mm_add_pd(sum, _mm_cvtps_pd(t));
+        sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
+    }
+    sum = _mm_add_sd(sum, _mm_unpackhi_pd(sum, sum));
+    _mm_store_sd(&ret, sum);
+    return ret;
 }
 
 #define OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE
-static inline double interpolate_product_double(const float *a, const float *b, unsigned int len, const spx_uint32_t oversample, float *frac) {
-  unsigned int i;
-  double ret;
-  __m128d sum;
-  __m128d sum1 = _mm_setzero_pd();
-  __m128d sum2 = _mm_setzero_pd();
-  __m128 f = _mm_loadu_ps(frac);
-  __m128d f1 = _mm_cvtps_pd(f);
-  __m128d f2 = _mm_cvtps_pd(_mm_movehl_ps(f,f));
-  __m128 t;
-  for(i=0;i<len;i+=2)
-  {
-    t = _mm_mul_ps(_mm_load1_ps(a+i), _mm_loadu_ps(b+i*oversample));
-    sum1 = _mm_add_pd(sum1, _mm_cvtps_pd(t));
-    sum2 = _mm_add_pd(sum2, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
+static inline double interpolate_product_double(const float* a, const float* b, unsigned int len, const spx_uint32_t oversample, float* frac) {
+    int i;
+    double ret;
+    __m128d sum;
+    __m128d sum1 = _mm_setzero_pd();
+    __m128d sum2 = _mm_setzero_pd();
+    __m128 f = _mm_loadu_ps(frac);
+    __m128d f1 = _mm_cvtps_pd(f);
+    __m128d f2 = _mm_cvtps_pd(_mm_movehl_ps(f, f));
+    __m128 t;
+    for (i = 0; i < len; i += 2)
+    {
+        t = _mm_mul_ps(_mm_load1_ps(a + i), _mm_loadu_ps(b + i * oversample));
+        sum1 = _mm_add_pd(sum1, _mm_cvtps_pd(t));
+        sum2 = _mm_add_pd(sum2, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
 
-    t = _mm_mul_ps(_mm_load1_ps(a+i+1), _mm_loadu_ps(b+(i+1)*oversample));
-    sum1 = _mm_add_pd(sum1, _mm_cvtps_pd(t));
-    sum2 = _mm_add_pd(sum2, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
-  }
-  sum1 = _mm_mul_pd(f1, sum1);
-  sum2 = _mm_mul_pd(f2, sum2);
-  sum = _mm_add_pd(sum1, sum2);
-  sum = _mm_add_sd(sum, _mm_unpackhi_pd(sum, sum));
-  _mm_store_sd(&ret, sum);
-  return ret;
+        t = _mm_mul_ps(_mm_load1_ps(a + i + 1), _mm_loadu_ps(b + (i + 1) * oversample));
+        sum1 = _mm_add_pd(sum1, _mm_cvtps_pd(t));
+        sum2 = _mm_add_pd(sum2, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
+    }
+    sum1 = _mm_mul_pd(f1, sum1);
+    sum2 = _mm_mul_pd(f2, sum2);
+    sum = _mm_add_pd(sum1, sum2);
+    sum = _mm_add_sd(sum, _mm_unpackhi_pd(sum, sum));
+    _mm_store_sd(&ret, sum);
+    return ret;
 }
 
-#endif
+#endif
\ No newline at end of file
diff --git a/wfview.pro b/wfview.pro
index 9aaf9f8..1e0aed9 100644
--- a/wfview.pro
+++ b/wfview.pro
@@ -31,7 +31,10 @@ linux:QMAKE_LFLAGS += -O2 -s
 DEFINES += QT_DEPRECATED_WARNINGS
 DEFINES += QCUSTOMPLOT_COMPILE_LIBRARY
 
+
 # These defines are used for the resampler
+equals(QT_ARCH, i386): DEFINES += USE_SSE
+equals(QT_ARCH, arm): DEFINES += USE_NEON
 DEFINES += OUTSIDE_SPEEX
 DEFINES += RANDOM_PREFIX=wf
 
diff --git a/wfview.vcxproj b/wfview.vcxproj
index 93967c1..2502415 100644
--- a/wfview.vcxproj
+++ b/wfview.vcxproj
@@ -48,7 +48,7 @@
   
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <ClCompile>
-      <AdditionalIncludeDirectories>.;..\qcustomplot;opus-tools\src;rtaudio;release;/include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>.;..\qcustomplot;resampler;rtaudio;release;/include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <AdditionalOptions>-Zc:rvalueCast -Zc:inline -Zc:strictStrings -Zc:throwingNew -Zc:referenceBinding -Zc:__cplusplus -w34100 -w34189 -w44996 -w44456 -w44457 -w44458 %(AdditionalOptions)</AdditionalOptions>
       <AssemblerListingLocation>release\</AssemblerListingLocation>
       <BrowseInformation>false</BrowseInformation>
@@ -57,7 +57,7 @@
       <ExceptionHandling>Sync</ExceptionHandling>
       <ObjectFileName>release\</ObjectFileName>
       <Optimization>MaxSpeed</Optimization>
-      <PreprocessorDefinitions>_WINDOWS;UNICODE;_UNICODE;WIN32;_ENABLE_EXTENDED_ALIGNED_STORAGE;QT_DEPRECATED_WARNINGS;QCUSTOMPLOT_COMPILE_LIBRARY;OUTSIDE_SPEEX;RANDOM_PREFIX=wf;__WINDOWS_WASAPI__;GITSHORT="141955b";HOST="wfview.org";UNAME="build";QT_NO_DEBUG;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>_WINDOWS;UNICODE;_UNICODE;WIN32;_ENABLE_EXTENDED_ALIGNED_STORAGE;QT_DEPRECATED_WARNINGS;QCUSTOMPLOT_COMPILE_LIBRARY;USE_SSE;OUTSIDE_SPEEX;RANDOM_PREFIX=wf;__WINDOWS_WASAPI__;GITSHORT="ea09e1f";HOST="wfview.org";UNAME="build";QT_NO_DEBUG;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <PreprocessToFile>false</PreprocessToFile>
       <ProgramDataBaseFileName></ProgramDataBaseFileName>
       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
@@ -84,12 +84,12 @@
       <WarningLevel>0</WarningLevel>
     </Midl>
     <ResourceCompile>
-      <PreprocessorDefinitions>_WINDOWS;UNICODE;_UNICODE;WIN32;_ENABLE_EXTENDED_ALIGNED_STORAGE;QT_DEPRECATED_WARNINGS;QCUSTOMPLOT_COMPILE_LIBRARY;OUTSIDE_SPEEX;RANDOM_PREFIX=wf;__WINDOWS_WASAPI__;GITSHORT=\"141955b\";HOST=\"wfview.org\";UNAME=\"build\";QT_NO_DEBUG;QT_MULTIMEDIA_LIB;QT_PRINTSUPPORT_LIB;QT_WIDGETS_LIB;QT_GUI_LIB;QT_SERIALPORT_LIB;QT_NETWORK_LIB;QT_CORE_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>_WINDOWS;UNICODE;_UNICODE;WIN32;_ENABLE_EXTENDED_ALIGNED_STORAGE;QT_DEPRECATED_WARNINGS;QCUSTOMPLOT_COMPILE_LIBRARY;USE_SSE;OUTSIDE_SPEEX;RANDOM_PREFIX=wf;__WINDOWS_WASAPI__;GITSHORT=\"ea09e1f\";HOST=\"wfview.org\";UNAME=\"build\";QT_NO_DEBUG;QT_MULTIMEDIA_LIB;QT_PRINTSUPPORT_LIB;QT_WIDGETS_LIB;QT_GUI_LIB;QT_SERIALPORT_LIB;QT_NETWORK_LIB;QT_CORE_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
     </ResourceCompile>
   <QtMoc><CompilerFlavor>msvc</CompilerFlavor><Include>./$(Configuration)/moc_predefs.h</Include><ExecutionDescription>Moc'ing %(Identity)...</ExecutionDescription><DynamicSource>output</DynamicSource><QtMocDir>$(Configuration)</QtMocDir><QtMocFileName>moc_%(Filename).cpp</QtMocFileName></QtMoc><QtRcc><Compression>default</Compression><ExecutionDescription>Rcc'ing %(Identity)...</ExecutionDescription><QtRccDir>$(Configuration)</QtRccDir><QtRccFileName>qrc_%(Filename).cpp</QtRccFileName></QtRcc><QtUic><ExecutionDescription>Uic'ing %(Identity)...</ExecutionDescription><QtUicDir>$(ProjectDir)</QtUicDir><QtUicFileName>ui_%(Filename).h</QtUicFileName></QtUic></ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <ClCompile>
-      <AdditionalIncludeDirectories>.;..\qcustomplot;opus-tools\src;rtaudio;debug;/include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>.;..\qcustomplot;resampler;rtaudio;debug;/include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <AdditionalOptions>-Zc:rvalueCast -Zc:inline -Zc:strictStrings -Zc:throwingNew -Zc:referenceBinding -Zc:__cplusplus -w34100 -w34189 -w44996 -w44456 -w44457 -w44458 %(AdditionalOptions)</AdditionalOptions>
       <AssemblerListingLocation>debug\</AssemblerListingLocation>
       <BrowseInformation>false</BrowseInformation>
@@ -98,7 +98,7 @@
       <ExceptionHandling>Sync</ExceptionHandling>
       <ObjectFileName>debug\</ObjectFileName>
       <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>_WINDOWS;UNICODE;_UNICODE;WIN32;_ENABLE_EXTENDED_ALIGNED_STORAGE;QT_DEPRECATED_WARNINGS;QCUSTOMPLOT_COMPILE_LIBRARY;OUTSIDE_SPEEX;RANDOM_PREFIX=wf;__WINDOWS_WASAPI__;GITSHORT="141955b";HOST="wfview.org";UNAME="build";%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>_WINDOWS;UNICODE;_UNICODE;WIN32;_ENABLE_EXTENDED_ALIGNED_STORAGE;QT_DEPRECATED_WARNINGS;QCUSTOMPLOT_COMPILE_LIBRARY;USE_SSE;OUTSIDE_SPEEX;RANDOM_PREFIX=wf;__WINDOWS_WASAPI__;GITSHORT="ea09e1f";HOST="wfview.org";UNAME="build";%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <PreprocessToFile>false</PreprocessToFile>
       <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
       <SuppressStartupBanner>true</SuppressStartupBanner>
@@ -123,7 +123,7 @@
       <WarningLevel>0</WarningLevel>
     </Midl>
     <ResourceCompile>
-      <PreprocessorDefinitions>_WINDOWS;UNICODE;_UNICODE;WIN32;_ENABLE_EXTENDED_ALIGNED_STORAGE;QT_DEPRECATED_WARNINGS;QCUSTOMPLOT_COMPILE_LIBRARY;OUTSIDE_SPEEX;RANDOM_PREFIX=wf;__WINDOWS_WASAPI__;GITSHORT=\"141955b\";HOST=\"wfview.org\";UNAME=\"build\";QT_MULTIMEDIA_LIB;QT_PRINTSUPPORT_LIB;QT_WIDGETS_LIB;QT_GUI_LIB;QT_SERIALPORT_LIB;QT_NETWORK_LIB;QT_CORE_LIB;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>_WINDOWS;UNICODE;_UNICODE;WIN32;_ENABLE_EXTENDED_ALIGNED_STORAGE;QT_DEPRECATED_WARNINGS;QCUSTOMPLOT_COMPILE_LIBRARY;USE_SSE;OUTSIDE_SPEEX;RANDOM_PREFIX=wf;__WINDOWS_WASAPI__;GITSHORT=\"ea09e1f\";HOST=\"wfview.org\";UNAME=\"build\";QT_MULTIMEDIA_LIB;QT_PRINTSUPPORT_LIB;QT_WIDGETS_LIB;QT_GUI_LIB;QT_SERIALPORT_LIB;QT_NETWORK_LIB;QT_CORE_LIB;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
     </ResourceCompile>
   <QtMoc><CompilerFlavor>msvc</CompilerFlavor><Include>./$(Configuration)/moc_predefs.h</Include><ExecutionDescription>Moc'ing %(Identity)...</ExecutionDescription><DynamicSource>output</DynamicSource><QtMocDir>$(Configuration)</QtMocDir><QtMocFileName>moc_%(Filename).cpp</QtMocFileName></QtMoc><QtRcc><Compression>default</Compression><ExecutionDescription>Rcc'ing %(Identity)...</ExecutionDescription><QtRccDir>$(Configuration)</QtRccDir><QtRccFileName>qrc_%(Filename).cpp</QtRccFileName></QtRcc><QtUic><ExecutionDescription>Uic'ing %(Identity)...</ExecutionDescription><QtUicDir>$(ProjectDir)</QtUicDir><QtUicFileName>ui_%(Filename).h</QtUicFileName></QtUic></ItemDefinitionGroup>
   <ItemGroup>
@@ -138,7 +138,7 @@
     <ClCompile Include="..\qcustomplot\qcustomplot.cpp" />
     <ClCompile Include="qledlabel.cpp" />
     <ClCompile Include="repeatersetup.cpp" />
-    <ClCompile Include="opus-tools\src\resample.c" />
+    <ClCompile Include="resampler\resample.c" />
     <ClCompile Include="rigcommander.cpp" />
     <ClCompile Include="rigctld.cpp" />
     <ClCompile Include="rigidentities.cpp" />
@@ -150,7 +150,7 @@
     <ClCompile Include="wfmain.cpp" />
   </ItemGroup>
   <ItemGroup>
-    <ClInclude Include="opus-tools\src\arch.h" />
+    <ClInclude Include="resampler\arch.h" />
     <QtMoc Include="audiohandler.h">
       
       
@@ -235,7 +235,7 @@
       
       
     </QtMoc>
-    <ClInclude Include="opus-tools\src\resample_sse.h" />
+    <ClInclude Include="resampler\resample_sse.h" />
     <QtMoc Include="rigcommander.h">
       
       
@@ -268,7 +268,7 @@
       
       
     </QtMoc>
-    <ClInclude Include="opus-tools\src\speex_resampler.h" />
+    <ClInclude Include="resampler\speex_resampler.h" />
     <QtMoc Include="udphandler.h">
       
       
diff --git a/wfview.vcxproj.filters b/wfview.vcxproj.filters
index 3e0705b..bc7845e 100644
--- a/wfview.vcxproj.filters
+++ b/wfview.vcxproj.filters
@@ -90,7 +90,7 @@
     <ClCompile Include="repeatersetup.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="opus-tools\src\resample.c">
+    <ClCompile Include="resampler\resample.c">
       <Filter>Source Files</Filter>
     </ClCompile>
     <ClCompile Include="rigcommander.cpp">
@@ -122,7 +122,7 @@
     </ClCompile>
   </ItemGroup>
   <ItemGroup>
-    <ClInclude Include="opus-tools\src\arch.h">
+    <ClInclude Include="resampler\arch.h">
       <Filter>Header Files</Filter>
     </ClInclude>
     <QtMoc Include="audiohandler.h">
@@ -161,7 +161,7 @@
     <QtMoc Include="repeatersetup.h">
       <Filter>Header Files</Filter>
     </QtMoc>
-    <ClInclude Include="opus-tools\src\resample_sse.h">
+    <ClInclude Include="resampler\resample_sse.h">
       <Filter>Header Files</Filter>
     </ClInclude>
     <QtMoc Include="rigcommander.h">
@@ -179,7 +179,7 @@
     <QtMoc Include="satellitesetup.h">
       <Filter>Header Files</Filter>
     </QtMoc>
-    <ClInclude Include="opus-tools\src\speex_resampler.h">
+    <ClInclude Include="resampler\speex_resampler.h">
       <Filter>Header Files</Filter>
     </ClInclude>
     <QtMoc Include="udphandler.h">