Implement swap table optimization for compute function

develop
Enrique Condes 2024-12-31 11:56:43 +08:00
rodzic 0913ec2f87
commit dcfbcd7182
2 zmienionych plików z 167 dodań i 76 usunięć

Wyświetl plik

@ -35,6 +35,9 @@ ArduinoFFT<T>::ArduinoFFT(T *vReal, T *vImag, uint_fast16_t samples,
#ifdef FFT_SPEED_OVER_PRECISION
_oneOverSamples = 1.0 / samples;
#endif
#ifdef SWAP_TABLE
generateSwapIndexes();
#endif
}
template <typename T> ArduinoFFT<T>::~ArduinoFFT(void) {
@ -42,6 +45,12 @@ template <typename T> ArduinoFFT<T>::~ArduinoFFT(void) {
if (_precompiledWindowingFactors) {
delete[] _precompiledWindowingFactors;
}
#ifdef SWAP_TABLE
if (_swapIndexes != nullptr) {
delete[] _swapIndexes;
_swapIndexes = nullptr;
}
#endif
}
template <typename T> void ArduinoFFT<T>::complexToMagnitude(void) const {
@ -57,9 +66,16 @@ void ArduinoFFT<T>::complexToMagnitude(T *vReal, T *vImag,
}
}
// Computes in-place complex-to-complex FFT with optional optimizations
template <typename T> void ArduinoFFT<T>::compute(FFTDirection dir) const {
compute(this->_vReal, this->_vImag, this->_samples, exponent(this->_samples),
dir);
// Reverse bits
#ifdef SWAP_TABLE
swapWithIndexes(dir);
#else
swapData(this->_vReal, this->_vImag, this->_samples, dir);
#endif
// Compute the FFT
Computation(this->_vReal, this->_vImag, this->_samples, this->_power, dir);
}
template <typename T>
@ -72,80 +88,13 @@ void ArduinoFFT<T>::compute(T *vReal, T *vImag, uint_fast16_t samples,
template <typename T>
void ArduinoFFT<T>::compute(T *vReal, T *vImag, uint_fast16_t samples,
uint_fast8_t power, FFTDirection dir) const {
#ifdef FFT_SPEED_OVER_PRECISION
T oneOverSamples = this->_oneOverSamples;
if (!this->_oneOverSamples)
oneOverSamples = 1.0 / samples;
#ifdef SWAP_TABLE
#warning "SWAP_TABLE has no effect when using the legacy API."
#endif
// Reverse bits
uint_fast16_t j = 0;
for (uint_fast16_t i = 0; i < (samples - 1); i++) {
if (i < j) {
swap(&vReal[i], &vReal[j]);
#ifdef COMPLEX_INPUT
swap(&vImag[i], &vImag[j]);
#else
if (dir == FFTDirection::Reverse)
swap(&vImag[i], &vImag[j]);
#endif
}
uint_fast16_t k = (samples >> 1);
while (k <= j) {
j -= k;
k >>= 1;
}
j += k;
}
swapData(vReal, vImag, samples, dir);
// Compute the FFT
T c1 = -1.0;
T c2 = 0.0;
uint_fast16_t l2 = 1;
for (uint_fast8_t l = 0; (l < power); l++) {
uint_fast16_t l1 = l2;
l2 <<= 1;
T u1 = 1.0;
T u2 = 0.0;
for (j = 0; j < l1; j++) {
for (uint_fast16_t i = j; i < samples; i += l2) {
uint_fast16_t i1 = i + l1;
T t1 = u1 * vReal[i1] - u2 * vImag[i1];
T t2 = u1 * vImag[i1] + u2 * vReal[i1];
vReal[i1] = vReal[i] - t1;
vImag[i1] = vImag[i] - t2;
vReal[i] += t1;
vImag[i] += t2;
}
T z = ((u1 * c1) - (u2 * c2));
u2 = ((u1 * c2) + (u2 * c1));
u1 = z;
}
#if defined(__AVR__) && defined(USE_AVR_PROGMEM)
c2 = pgm_read_float_near(&(_c2[l]));
c1 = pgm_read_float_near(&(_c1[l]));
#else
T cTemp = 0.5 * c1;
c2 = sqrt_internal(0.5 - cTemp);
c1 = sqrt_internal(0.5 + cTemp);
#endif
if (dir == FFTDirection::Forward) {
c2 = -c2;
}
}
// Scaling for reverse transform
if (dir == FFTDirection::Reverse) {
for (uint_fast16_t i = 0; i < samples; i++) {
#ifdef FFT_SPEED_OVER_PRECISION
vReal[i] *= oneOverSamples;
vImag[i] *= oneOverSamples;
#else
vReal[i] /= samples;
vImag[i] /= samples;
#endif
}
}
Computation(vReal, vImag, samples, power, dir);
}
template <typename T> void ArduinoFFT<T>::dcRemoval(void) const {
@ -524,5 +473,134 @@ const T ArduinoFFT<T>::_WindowCompensationFactors[11] = {
1.0 // Custom, precompiled value.
};
#ifdef SWAP_TABLE
template <typename T> void ArduinoFFT<T>::generateSwapIndexes(void) {
if (this->_power == 0) {
this->_power = exponent(this->_samples);
}
if (this->_swapIndexes) {
delete[] this->_swapIndexes;
}
uint16_t minLength = this->_samples * 3 / 4;
this->_swapIndexes = new uint_fast16_t[minLength];
for (uint_fast16_t i = 0; i < minLength; ++i) {
this->_swapIndexes[i] = reverseBits(i);
}
}
template <typename T>
uint_fast16_t ArduinoFFT<T>::reverseBits(uint_fast16_t index) {
uint_fast16_t reversed = 0;
for (uint_fast16_t i = 0; i < this->_power; ++i) {
reversed <<= 1;
reversed |= index & 1;
index >>= 1;
}
return reversed;
}
template <typename T> void ArduinoFFT<T>::swapWithIndexes(FFTDirection dir) {
uint_fast16_t j = 0;
// More efficient than samples * 0.75
uint_fast16_t minLen = (samples >> 2) + (samples >> 1);
for (uint_fast16_t i = 1; i < minLen; i++) {
j = this->_swapIndexes[i];
if (i < j) {
swap(&vReal[i], &vReal[j]);
#ifdef COMPLEX_INPUT
swap(&vImag[i], &vImag[j]);
#else
if (dir == FFTDirection::Reverse)
swap(&vImag[i], &vImag[j]);
#endif
}
}
}
#endif
template <typename T>
void ArduinoFFT<T>::swapData(T *vReal, T *vImag, uint_fast16_t samples,
FFTDirection dir) const {
uint_fast16_t j = 0;
for (uint_fast16_t i = 0; i < (samples - 1); i++) {
if (i < j) {
swap(&vReal[i], &vReal[j]);
#ifdef COMPLEX_INPUT
swap(&vImag[i], &vImag[j]);
#else
if (dir == FFTDirection::Reverse)
swap(&vImag[i], &vImag[j]);
#endif
}
uint_fast16_t k = (samples >> 1);
while (k <= j) {
j -= k;
k >>= 1;
}
j += k;
}
}
template <typename T>
void ArduinoFFT<T>::Computation(T *vReal, T *vImag, uint_fast16_t samples,
uint_fast8_t power, FFTDirection dir) const {
#ifdef FFT_SPEED_OVER_PRECISION
T oneOverSamples = this->_oneOverSamples;
if (!this->_oneOverSamples) {
oneOverSamples = 1.0 / samples;
this->_oneOverSamples = oneOverSamples;
}
#endif
uint_fast16_t j = 0;
T c1 = -1.0;
T c2 = 0.0;
uint_fast16_t l2 = 1;
for (uint_fast8_t l = 0; (l < power); l++) {
uint_fast16_t l1 = l2;
l2 <<= 1;
T u1 = 1.0;
T u2 = 0.0;
for (j = 0; j < l1; j++) {
for (uint_fast16_t i = j; i < samples; i += l2) {
uint_fast16_t i1 = i + l1;
T t1 = u1 * vReal[i1] - u2 * vImag[i1];
T t2 = u1 * vImag[i1] + u2 * vReal[i1];
vReal[i1] = vReal[i] - t1;
vImag[i1] = vImag[i] - t2;
vReal[i] += t1;
vImag[i] += t2;
}
T z = ((u1 * c1) - (u2 * c2));
u2 = ((u1 * c2) + (u2 * c1));
u1 = z;
}
#if defined(__AVR__) && defined(USE_AVR_PROGMEM)
c2 = pgm_read_float_near(&(_c2[l]));
c1 = pgm_read_float_near(&(_c1[l]));
#else
T cTemp = 0.5 * c1;
c2 = sqrt_internal(0.5 - cTemp);
c1 = sqrt_internal(0.5 + cTemp);
#endif
if (dir == FFTDirection::Forward) {
c2 = -c2;
}
}
// Scaling for reverse transform
if (dir == FFTDirection::Reverse) {
for (uint_fast16_t i = 0; i < samples; i++) {
#ifdef FFT_SPEED_OVER_PRECISION
vReal[i] *= oneOverSamples;
vImag[i] *= oneOverSamples;
#else
vReal[i] /= samples;
vImag[i] /= samples;
#endif
}
}
}
template class ArduinoFFT<double>;
template class ArduinoFFT<float>;

Wyświetl plik

@ -48,9 +48,9 @@
// This might only work for specific use cases, but is significantly faster.
#ifndef FFT_SQRT_APPROXIMATION
#ifndef sqrt_internal
#define sqrt_internal sqrt
#endif
#ifndef sqrt_internal
#define sqrt_internal sqrt
#endif
#endif
#define FFT_LIB_REV 0x20
@ -110,6 +110,9 @@ private:
T *_precompiledWindowingFactors = nullptr;
uint_fast16_t _samples;
T _samplingFrequency;
#ifdef SWAP_TABLE
uint_fast16_t *_swapIndexes = nullptr;
#endif
T *_vImag;
T *_vReal;
FFTWindow _windowFunction;
@ -120,6 +123,16 @@ private:
void parabola(T x1, T y1, T x2, T y2, T x3, T y3, T *a, T *b, T *c) const;
void swap(T *a, T *b) const;
#ifdef SWAP_TABLE
void generateSwapIndexes(void);
uint_fast16_t reverseBits(uint_fast16_t index);
void swapWithIndexes(FFTDirection dir);
#endif
void swapData(T *vReal, T *vImag, uint_fast16_t samples,
FFTDirection dir) const;
void Computation(T *vReal, T *vImag, uint_fast16_t samples,
uint_fast8_t power, FFTDirection dir) const;
#ifdef FFT_SQRT_APPROXIMATION
float sqrt_internal(float x) const;
double sqrt_internal(double x) const;