diff --git a/src/arduinoFFT.cpp b/src/arduinoFFT.cpp index b720132..54e92e3 100644 --- a/src/arduinoFFT.cpp +++ b/src/arduinoFFT.cpp @@ -35,6 +35,9 @@ ArduinoFFT::ArduinoFFT(T *vReal, T *vImag, uint_fast16_t samples, #ifdef FFT_SPEED_OVER_PRECISION _oneOverSamples = 1.0 / samples; #endif +#ifdef SWAP_TABLE + generateSwapIndexes(); +#endif } template ArduinoFFT::~ArduinoFFT(void) { @@ -42,6 +45,12 @@ template ArduinoFFT::~ArduinoFFT(void) { if (_precompiledWindowingFactors) { delete[] _precompiledWindowingFactors; } +#ifdef SWAP_TABLE + if (_swapIndexes != nullptr) { + delete[] _swapIndexes; + _swapIndexes = nullptr; + } +#endif } template void ArduinoFFT::complexToMagnitude(void) const { @@ -57,9 +66,16 @@ void ArduinoFFT::complexToMagnitude(T *vReal, T *vImag, } } +// Computes in-place complex-to-complex FFT with optional optimizations template void ArduinoFFT::compute(FFTDirection dir) const { - compute(this->_vReal, this->_vImag, this->_samples, exponent(this->_samples), - dir); + // Reverse bits +#ifdef SWAP_TABLE + swapWithIndexes(dir); +#else + swapData(this->_vReal, this->_vImag, this->_samples, dir); +#endif + // Compute the FFT + Computation(this->_vReal, this->_vImag, this->_samples, this->_power, dir); } template @@ -72,80 +88,13 @@ void ArduinoFFT::compute(T *vReal, T *vImag, uint_fast16_t samples, template void ArduinoFFT::compute(T *vReal, T *vImag, uint_fast16_t samples, uint_fast8_t power, FFTDirection dir) const { -#ifdef FFT_SPEED_OVER_PRECISION - T oneOverSamples = this->_oneOverSamples; - if (!this->_oneOverSamples) - oneOverSamples = 1.0 / samples; +#ifdef SWAP_TABLE +#warning "SWAP_TABLE has no effect when using the legacy API." #endif // Reverse bits - uint_fast16_t j = 0; - for (uint_fast16_t i = 0; i < (samples - 1); i++) { - if (i < j) { - swap(&vReal[i], &vReal[j]); -#ifdef COMPLEX_INPUT - swap(&vImag[i], &vImag[j]); - #else - if (dir == FFTDirection::Reverse) - swap(&vImag[i], &vImag[j]); -#endif - } - uint_fast16_t k = (samples >> 1); - - while (k <= j) { - j -= k; - k >>= 1; - } - j += k; - } + swapData(vReal, vImag, samples, dir); // Compute the FFT - T c1 = -1.0; - T c2 = 0.0; - uint_fast16_t l2 = 1; - for (uint_fast8_t l = 0; (l < power); l++) { - uint_fast16_t l1 = l2; - l2 <<= 1; - T u1 = 1.0; - T u2 = 0.0; - for (j = 0; j < l1; j++) { - for (uint_fast16_t i = j; i < samples; i += l2) { - uint_fast16_t i1 = i + l1; - T t1 = u1 * vReal[i1] - u2 * vImag[i1]; - T t2 = u1 * vImag[i1] + u2 * vReal[i1]; - vReal[i1] = vReal[i] - t1; - vImag[i1] = vImag[i] - t2; - vReal[i] += t1; - vImag[i] += t2; - } - T z = ((u1 * c1) - (u2 * c2)); - u2 = ((u1 * c2) + (u2 * c1)); - u1 = z; - } - -#if defined(__AVR__) && defined(USE_AVR_PROGMEM) - c2 = pgm_read_float_near(&(_c2[l])); - c1 = pgm_read_float_near(&(_c1[l])); -#else - T cTemp = 0.5 * c1; - c2 = sqrt_internal(0.5 - cTemp); - c1 = sqrt_internal(0.5 + cTemp); -#endif - - if (dir == FFTDirection::Forward) { - c2 = -c2; - } - } - // Scaling for reverse transform - if (dir == FFTDirection::Reverse) { - for (uint_fast16_t i = 0; i < samples; i++) { -#ifdef FFT_SPEED_OVER_PRECISION - vReal[i] *= oneOverSamples; - vImag[i] *= oneOverSamples; -#else - vReal[i] /= samples; - vImag[i] /= samples; -#endif - } - } + Computation(vReal, vImag, samples, power, dir); } template void ArduinoFFT::dcRemoval(void) const { @@ -524,5 +473,134 @@ const T ArduinoFFT::_WindowCompensationFactors[11] = { 1.0 // Custom, precompiled value. }; +#ifdef SWAP_TABLE +template void ArduinoFFT::generateSwapIndexes(void) { + if (this->_power == 0) { + this->_power = exponent(this->_samples); + } + if (this->_swapIndexes) { + delete[] this->_swapIndexes; + } + uint16_t minLength = this->_samples * 3 / 4; + this->_swapIndexes = new uint_fast16_t[minLength]; + for (uint_fast16_t i = 0; i < minLength; ++i) { + this->_swapIndexes[i] = reverseBits(i); + } +} + +template +uint_fast16_t ArduinoFFT::reverseBits(uint_fast16_t index) { + uint_fast16_t reversed = 0; + for (uint_fast16_t i = 0; i < this->_power; ++i) { + reversed <<= 1; + reversed |= index & 1; + index >>= 1; + } + return reversed; +} + +template void ArduinoFFT::swapWithIndexes(FFTDirection dir) { + uint_fast16_t j = 0; + // More efficient than samples * 0.75 + uint_fast16_t minLen = (samples >> 2) + (samples >> 1); + for (uint_fast16_t i = 1; i < minLen; i++) { + j = this->_swapIndexes[i]; + if (i < j) { + swap(&vReal[i], &vReal[j]); +#ifdef COMPLEX_INPUT + swap(&vImag[i], &vImag[j]); +#else + if (dir == FFTDirection::Reverse) + swap(&vImag[i], &vImag[j]); +#endif + } + } +} +#endif +template +void ArduinoFFT::swapData(T *vReal, T *vImag, uint_fast16_t samples, + FFTDirection dir) const { + uint_fast16_t j = 0; + for (uint_fast16_t i = 0; i < (samples - 1); i++) { + if (i < j) { + swap(&vReal[i], &vReal[j]); +#ifdef COMPLEX_INPUT + swap(&vImag[i], &vImag[j]); +#else + if (dir == FFTDirection::Reverse) + swap(&vImag[i], &vImag[j]); +#endif + } + uint_fast16_t k = (samples >> 1); + + while (k <= j) { + j -= k; + k >>= 1; + } + j += k; + } +} + +template +void ArduinoFFT::Computation(T *vReal, T *vImag, uint_fast16_t samples, + uint_fast8_t power, FFTDirection dir) const { +#ifdef FFT_SPEED_OVER_PRECISION + T oneOverSamples = this->_oneOverSamples; + if (!this->_oneOverSamples) { + oneOverSamples = 1.0 / samples; + this->_oneOverSamples = oneOverSamples; + } +#endif + uint_fast16_t j = 0; + T c1 = -1.0; + T c2 = 0.0; + uint_fast16_t l2 = 1; + for (uint_fast8_t l = 0; (l < power); l++) { + uint_fast16_t l1 = l2; + l2 <<= 1; + T u1 = 1.0; + T u2 = 0.0; + for (j = 0; j < l1; j++) { + for (uint_fast16_t i = j; i < samples; i += l2) { + uint_fast16_t i1 = i + l1; + T t1 = u1 * vReal[i1] - u2 * vImag[i1]; + T t2 = u1 * vImag[i1] + u2 * vReal[i1]; + vReal[i1] = vReal[i] - t1; + vImag[i1] = vImag[i] - t2; + vReal[i] += t1; + vImag[i] += t2; + } + T z = ((u1 * c1) - (u2 * c2)); + u2 = ((u1 * c2) + (u2 * c1)); + u1 = z; + } + +#if defined(__AVR__) && defined(USE_AVR_PROGMEM) + c2 = pgm_read_float_near(&(_c2[l])); + c1 = pgm_read_float_near(&(_c1[l])); +#else + T cTemp = 0.5 * c1; + c2 = sqrt_internal(0.5 - cTemp); + c1 = sqrt_internal(0.5 + cTemp); +#endif + + if (dir == FFTDirection::Forward) { + c2 = -c2; + } + } + // Scaling for reverse transform + if (dir == FFTDirection::Reverse) { + for (uint_fast16_t i = 0; i < samples; i++) { +#ifdef FFT_SPEED_OVER_PRECISION + vReal[i] *= oneOverSamples; + vImag[i] *= oneOverSamples; +#else + vReal[i] /= samples; + vImag[i] /= samples; +#endif + } + } +} + template class ArduinoFFT; template class ArduinoFFT; diff --git a/src/arduinoFFT.h b/src/arduinoFFT.h index 2099291..614f872 100644 --- a/src/arduinoFFT.h +++ b/src/arduinoFFT.h @@ -48,9 +48,9 @@ // This might only work for specific use cases, but is significantly faster. #ifndef FFT_SQRT_APPROXIMATION - #ifndef sqrt_internal - #define sqrt_internal sqrt - #endif +#ifndef sqrt_internal +#define sqrt_internal sqrt +#endif #endif #define FFT_LIB_REV 0x20 @@ -110,6 +110,9 @@ private: T *_precompiledWindowingFactors = nullptr; uint_fast16_t _samples; T _samplingFrequency; +#ifdef SWAP_TABLE + uint_fast16_t *_swapIndexes = nullptr; +#endif T *_vImag; T *_vReal; FFTWindow _windowFunction; @@ -120,6 +123,16 @@ private: void parabola(T x1, T y1, T x2, T y2, T x3, T y3, T *a, T *b, T *c) const; void swap(T *a, T *b) const; +#ifdef SWAP_TABLE + void generateSwapIndexes(void); + uint_fast16_t reverseBits(uint_fast16_t index); + void swapWithIndexes(FFTDirection dir); +#endif + void swapData(T *vReal, T *vImag, uint_fast16_t samples, + FFTDirection dir) const; + void Computation(T *vReal, T *vImag, uint_fast16_t samples, + uint_fast8_t power, FFTDirection dir) const; + #ifdef FFT_SQRT_APPROXIMATION float sqrt_internal(float x) const; double sqrt_internal(double x) const;