diff --git a/dspl/src/fft.c b/dspl/src/fft.c index e67c983..59c2c13 100755 --- a/dspl/src/fft.c +++ b/dspl/src/fft.c @@ -19,6 +19,7 @@ */ #include +#include #include #include "dspl.h" #include "dspl_internal.h" @@ -252,28 +253,27 @@ composite FFT kernel *******************************************************************************/ int DSPL_API fftn_krn(complex_t* t0, complex_t* t1, fft_t* p, int n, int addr) { - int n1, n2, s, k; - s = n; - addr = 0; + int n1, n2, k; + complex_t *pw = p->w+addr; - n2 = 1; - if(s%2 == 0) + n1 = 1; + if(n%2 == 0) { - n2 = 2; + n1 = 2; goto label_size; } label_size: - if(n2 == 1) + if(n1 == 1) return ERROR_FFT_SIZE; - n1 = s / n2; + n2 = n / n1; memcpy(t1, t0, n*sizeof(complex_t)); - transpose_cmplx(t1, n1, n2, t0); + transpose_cmplx(t1, n2, n1, t0); - if(n2 == 2) + if(n1 == 2) { - for(k = 0; k < n1; k++) + for(k = 0; k < n2; k++) { dft2(t0+2*k, t1+2*k); } @@ -281,18 +281,23 @@ label_size: if(n1 > 1) { + for(k =0; k < n; k++) { - RE(t0[k]) = CMRE(t1[k], p->w[addr+k]); - IM(t0[k]) = CMIM(t1[k], p->w[addr+k]); + RE(t0[k]) = CMRE(t1[k], pw[k]); + IM(t0[k]) = CMIM(t1[k], pw[k]); } - transpose_cmplx(t0, n2, n1, t1); - for(k = 0; k < n2; k++) - { - fftn_krn(t1, t0, p, s, addr+n); - } transpose_cmplx(t0, n1, n2, t1); + + + for(k = 0; k < n1; k++) + { + fftn_krn(t1+k*n2, t0+k*n2, p, n2, addr+n); + } + + + transpose_cmplx(t0, n2, n1, t1); } return RES_OK; @@ -330,9 +335,9 @@ label_size: pfft->w = pfft->w ? (complex_t*) realloc(pfft->w, nw*sizeof(complex_t)): (complex_t*) malloc( nw*sizeof(complex_t)); - for(k = 0; k < n2; k++) + for(k = 0; k < n1; k++) { - for(m = 0; m < n1; m++) + for(m = 0; m < n2; m++) { phi = - M_2PI * (double)(k*m) / (double)s; RE(pfft->w[addr]) = cos(phi); diff --git a/test/octave/fft_n1n2.m b/test/octave/fft_n1n2.m index e011bf9..5784164 100644 --- a/test/octave/fft_n1n2.m +++ b/test/octave/fft_n1n2.m @@ -2,16 +2,18 @@ clear all; close all; clc; -N = 6; -N1 = 3; -N2 = 2; +N = 32; +N1 = 2; +N2 = 16; % входной сигнал это вектор столбец размерности [N x 1] x = (0:N-1)'; +%x = [4;6;8;10]; + for n1 = 0 : N1-1 for k2 = 0 : N2-1 - W(n1 + 1, k2 + 1) = exp(-2i * pi * n1 * k2 / N); + W(n1 + 1, k2 + 1) = exp(-2i * pi * n1 * k2 / N); end end @@ -27,7 +29,3 @@ P = H.'; y = [reshape(P,N,1), fft(x)] - - - - diff --git a/test/octave/fft_performance.m b/test/octave/fft_performance.m new file mode 100644 index 0000000..ef07550 --- /dev/null +++ b/test/octave/fft_performance.m @@ -0,0 +1,20 @@ +clear all; close all; clc; + +n = 8388608; +m = 4; + +x0 = (0:n-1)+1i*(0:n-1); + +while(n > 4) + x = x0(1:n); + fprintf('n = %12d ', n); + t0 = tic(); + for k = 1:m + X = fft(x); + end + dt = toc(t0) / m; + fprintf('%12.6f\n', dt*1000); + n = n/2; + m = m*1.5; + +end \ No newline at end of file diff --git a/test/src/fftn_create_test.c b/test/src/fftn_create_test.c index 8838f09..48924f9 100644 --- a/test/src/fftn_create_test.c +++ b/test/src/fftn_create_test.c @@ -3,7 +3,7 @@ #include #include "dspl.h" -#define N 16 +#define N 32 int main() { void* handle; // DSPL handle @@ -26,9 +26,22 @@ int main() IM(x[n]) = 0.0; } + + for(n = 0; n < N*2; n++) + { + printf("W[%3d] = %12.4f%12.4f\n", n, RE(pfft.w[n]), IM(pfft.w[n])); + } + + fftn_krn(x, y, &pfft, N, 0); + for(n = 0; n < N; n++) + { + printf("y[%3d] = %12.4f%12.4f\n", n, RE(y[n]), IM(y[n])); + } + + fft_free(&pfft); // clear FFT object dspl_free(handle); // free dspl handle diff --git a/test/src/fftn_performance.c b/test/src/fftn_performance.c new file mode 100644 index 0000000..c7f9a63 --- /dev/null +++ b/test/src/fftn_performance.c @@ -0,0 +1,70 @@ +#include +#include +#include +#include +#include "dspl.h" +#define N 8388608 +int main() +{ + void* handle; // DSPL handle + handle = dspl_load(); // Load DSPL function + int n, k, m; + + complex_t *x = NULL; + complex_t *y = NULL; // DFT + fft_t pfft; // FFT object + + clock_t t0, t1; + double dt; + + x = (complex_t*)malloc(N*sizeof(complex_t)); + y = (complex_t*)malloc(N*sizeof(complex_t)); + + //clear fft object + memset(&pfft, 0, sizeof(fft_t)); + + for(n = 0; n < N; n++) + { + RE(x[n]) = (double)n; + IM(x[n]) = 0.0; + } + + n = N; + k = 4; + while(n > 4) + { + printf("n= %16d ", n); + fftn_create(&pfft, n); + + t0 = clock(); + for(m = 0; m < k; m++) + fftn_krn(x, y, &pfft, n, 0); + t1 = clock(); + dt = (1000.0 * (double) (t1 - t0)) / CLOCKS_PER_SEC / (double)m; + + printf("%12.6f ms ", dt); + + fft_free(&pfft); // clear FFT object + + + fft_create(&pfft, n); + t0 = clock(); + for(m = 0; m < k; m++) + fft_cmplx(x, n, &pfft, y); + t1 = clock(); + dt = (1000.0 * (double) (t1 - t0)) / CLOCKS_PER_SEC / (double)m; + + printf("%12.6f ms \n", dt); + + fft_free(&pfft); // clear FFT object + + + n/=2; + k = (int)(k*1.5); + } + dspl_free(handle); // free dspl handle + free(x); + free(y); + return 0; +} +