From d5586dfb9e36642aabb373c952c1017dcff04382 Mon Sep 17 00:00:00 2001 From: Dsplib Date: Tue, 10 Nov 2020 19:06:20 +0300 Subject: [PATCH] improved FFT performance and added verificator for radix-2 fft from 4 points to 65536 Changes to be committed: modified: _release/dspl.c modified: _release/dspl.h modified: dspl/dox/doxyfile_en modified: dspl/dox/footer_en.html modified: dspl/dox/header_en.html modified: dspl/src/dspl_internal.h modified: dspl/src/fft.c modified: dspl/src/fft_subkernel.c modified: dspl/src/filter_ap.c modified: include/dspl.h modified: performance/bin/octave/fft_cmplx_performance.m modified: performance/src/fft_cmplx_performance.c new file: verification/bin/octave/fft_radix2_verification.m new file: verification/src/fft_radix2_verification.c --- _release/dspl.c | 6 +- _release/dspl.h | 119 +-- dspl/dox/doxyfile_en | 60 +- dspl/dox/footer_en.html | 7 +- dspl/dox/header_en.html | 2 +- dspl/src/dspl_internal.h | 43 +- dspl/src/fft.c | 204 +++++- dspl/src/fft_subkernel.c | 679 +++++++++++++++++- dspl/src/filter_ap.c | 6 - include/dspl.h | 48 +- .../bin/octave/fft_cmplx_performance.m | 133 ++-- performance/src/fft_cmplx_performance.c | 8 +- .../bin/octave/fft_radix2_verification.m | 44 ++ verification/src/fft_radix2_verification.c | 156 ++++ 14 files changed, 1369 insertions(+), 146 deletions(-) create mode 100644 verification/bin/octave/fft_radix2_verification.m create mode 100644 verification/src/fft_radix2_verification.c diff --git a/_release/dspl.c b/_release/dspl.c index dfbf7b6..253d23b 100644 --- a/_release/dspl.c +++ b/_release/dspl.c @@ -137,8 +137,10 @@ p_matrix_eig_cmplx matrix_eig_cmplx ; p_matrix_eye matrix_eye ; p_matrix_eye_cmplx matrix_eye_cmplx ; p_matrix_mul matrix_mul ; +p_matrix_pinv matrix_pinv ; p_matrix_print matrix_print ; p_matrix_print_cmplx matrix_print_cmplx ; +p_matrix_svd matrix_svd ; p_matrix_transpose matrix_transpose ; p_matrix_transpose_cmplx matrix_transpose_cmplx ; p_matrix_transpose_hermite matrix_transpose_hermite ; @@ -349,8 +351,10 @@ void* dspl_load() LOAD_FUNC(matrix_eye); LOAD_FUNC(matrix_eye_cmplx); LOAD_FUNC(matrix_mul); + LOAD_FUNC(matrix_pinv); LOAD_FUNC(matrix_print); LOAD_FUNC(matrix_print_cmplx); + LOAD_FUNC(matrix_svd); LOAD_FUNC(matrix_transpose); LOAD_FUNC(matrix_transpose_cmplx); LOAD_FUNC(matrix_transpose_hermite); @@ -440,8 +444,6 @@ void* dspl_load() - - void dspl_free(void* handle) { #ifdef WIN_OS diff --git a/_release/dspl.h b/_release/dspl.h index 09c4d97..45a0f0f 100644 --- a/_release/dspl.h +++ b/_release/dspl.h @@ -225,10 +225,19 @@ www.dsplib.org #endif typedef struct { - complex_t* w; - complex_t* t0; - complex_t* t1; - int n; + complex_t* w; + complex_t* t0; + complex_t* t1; + + complex_t w32[ 32]; + complex_t w64[ 64]; + complex_t w128[128]; + complex_t w256[256]; + complex_t w512[512]; + complex_t* w1024; + complex_t* w2048; + complex_t* w4096; + int n; } fft_t; @@ -1216,6 +1225,13 @@ DECLARE_FUNC(int, matrix_mul, double* a COMMA int mb COMMA double* c); /*----------------------------------------------------------------------------*/ +DECLARE_FUNC(int, matrix_pinv, double* a + COMMA int n + COMMA int m + COMMA double* tol + COMMA double* inv + COMMA int* info); +/*----------------------------------------------------------------------------*/ DECLARE_FUNC(int, matrix_print, double* a COMMA int n COMMA int m @@ -1228,6 +1244,14 @@ DECLARE_FUNC(int, matrix_print_cmplx, complex_t* a COMMA const char* name COMMA const char* format); /*----------------------------------------------------------------------------*/ +DECLARE_FUNC(int, matrix_svd, double* a + COMMA int n + COMMA int m + COMMA double* u + COMMA double* s + COMMA double* vt + COMMA int* info); +/*----------------------------------------------------------------------------*/ DECLARE_FUNC(int, matrix_transpose, double* a COMMA int n COMMA int m @@ -1563,49 +1587,6 @@ DECLARE_FUNC(int, xcorr_cmplx, complex_t* x #endif - - - - -#ifdef DOXYGEN_ENGLISH -/*! **************************************************************************** -\ingroup SYS_LOADING_GROUP -\fn void dspl_free(void* handle) -\brief Cleans up the previously linked DSPL-2.0 dynamic library. - -This cross-platform function clears the library `libdspl.dll` in -Windows system and from the library `libdspl.so` on the Linux system. -After cleaning the library, all functions will become unavailable. - -\param [in] handle -Handle of the previously linked DSPL-2.0 library. \n -This pointer can be `NULL`, in this case no action -are being produced. - -\author Bakhurin Sergey. www.dsplib.org -***************************************************************************** */ -#endif -#ifdef DOXYGEN_RUSSIAN -/*! **************************************************************************** -\ingroup SYS_LOADING_GROUP -\fn void dspl_free(void* handle) -\brief Очищает связанную ранее динамическую библиотеку DSPL-2.0. - -Данная кроссплатформенная функция производит очистку библиотеки `libdspl.dll` в -системе Windows и с библиотеки `libdspl.so` в системе Linux. -После очистки библиотеки все функции станут недоступны. - -\param[in] handle -Хэндл прилинкованной ранее библиотеки DSPL-2.0. \n -Данный указатель может быть `NULL`, в этом случае никакие действия не -производятся.\n\n - -\author Бахурин Сергей. www.dsplib.org -**************************************************************************** */ -#endif -void* dspl_load(); - - #ifdef DOXYGEN_ENGLISH /*! **************************************************************************** \ingroup SYS_LOADING_GROUP @@ -1694,7 +1675,7 @@ int main(int argc, char* argv[]) void* hdspl; // DSPL хэндл hdspl = dspl_load(); // Динамическая линковка - // Проверяем указатель. Если `NULLL`, то линковка прошла неудачно + // Проверяем указатель. Если `NULL`, то линковка прошла неудачно if(!hdspl) { printf("libdspl loading error!\n"); @@ -1714,6 +1695,48 @@ int main(int argc, char* argv[]) \author Бахурин Сергей. www.dsplib.org ***************************************************************************** */ #endif +void* dspl_load(); + + + + + +#ifdef DOXYGEN_ENGLISH +/*! **************************************************************************** +\ingroup SYS_LOADING_GROUP +\fn void dspl_free(void* handle) +\brief Cleans up the previously linked DSPL-2.0 dynamic library. + +This cross-platform function clears the library `libdspl.dll` in +Windows system and from the library `libdspl.so` on the Linux system. +After cleaning the library, all functions will become unavailable. + +\param [in] handle +Handle of the previously linked DSPL-2.0 library. \n +This pointer can be `NULL`, in this case no action +are being produced. + +\author Bakhurin Sergey. www.dsplib.org +***************************************************************************** */ +#endif +#ifdef DOXYGEN_RUSSIAN +/*! **************************************************************************** +\ingroup SYS_LOADING_GROUP +\fn void dspl_free(void* handle) +\brief Очищает связанную ранее динамическую библиотеку DSPL-2.0. + +Данная кроссплатформенная функция производит очистку библиотеки `libdspl.dll` в +системе Windows и с библиотеки `libdspl.so` в системе Linux. +После очистки библиотеки все функции станут недоступны. + +\param[in] handle +Хэндл прилинкованной ранее библиотеки DSPL-2.0. \n +Данный указатель может быть `NULL`, в этом случае никакие действия не +производятся.\n\n + +\author Бахурин Сергей. www.dsplib.org +**************************************************************************** */ +#endif void dspl_free(void* handle); diff --git a/dspl/dox/doxyfile_en b/dspl/dox/doxyfile_en index 833bae0..ab367a9 100644 --- a/dspl/dox/doxyfile_en +++ b/dspl/dox/doxyfile_en @@ -1,4 +1,4 @@ -# Doxyfile 1.8.18 +# Doxyfile 1.8.20 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project. @@ -227,6 +227,14 @@ QT_AUTOBRIEF = NO MULTILINE_CPP_IS_BRIEF = NO +# By default Python docstrings are displayed as preformatted text and doxygen's +# special commands cannot be used. By setting PYTHON_DOCSTRING to NO the +# doxygen's special commands can be used and the contents of the docstring +# documentation blocks is shown as doxygen documentation. +# The default value is: YES. + +PYTHON_DOCSTRING = YES + # If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the # documentation from any documented member that it re-implements. # The default value is: YES. @@ -449,6 +457,19 @@ TYPEDEF_HIDES_STRUCT = NO LOOKUP_CACHE_SIZE = 0 +# The NUM_PROC_THREADS specifies the number threads doxygen is allowed to use +# during processing. When set to 0 doxygen will based this on the number of +# cores available in the system. You can set it explicitly to a value larger +# than 0 to get more control over the balance between CPU load and processing +# speed. At this moment only the input processing can be done using multiple +# threads. Since this is still an experimental feature the default is set to 1, +# which efficively disables parallel processing. Please report any issues you +# encounter. Generating dot graphs in parallel is controlled by the +# DOT_NUM_THREADS setting. +# Minimum value: 0, maximum value: 32, default value: 1. + +NUM_PROC_THREADS = 1 + #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- @@ -553,7 +574,7 @@ INTERNAL_DOCS = NO # names in lower-case letters. If set to YES, upper-case letters are also # allowed. This is useful if you have classes or files whose names only differ # in case and if your file system supports case sensitive file names. Windows -# (including Cygwin) ands Mac users are advised to set this option to NO. +# (including Cygwin) and Mac users are advised to set this option to NO. # The default value is: system dependent. CASE_SENSE_NAMES = NO @@ -1132,10 +1153,13 @@ CLANG_ASSISTED_PARSING = NO CLANG_OPTIONS = # If clang assisted parsing is enabled you can provide the clang parser with the -# path to the compilation database (see: -# http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html) used when the files -# were built. This is equivalent to specifying the "-p" option to a clang tool, -# such as clang-check. These options will then be passed to the parser. +# path to the directory containing a file called compile_commands.json. This +# file is the compilation database (see: +# http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html) containing the +# options used when the source files were built. This is equivalent to +# specifying the "-p" option to a clang tool, such as clang-check. These options +# will then be passed to the parser. Any options specified with CLANG_OPTIONS +# will be added as well. # Note: The availability of this option depends on whether or not doxygen was # generated with the -Duse_libclang=ON option for CMake. @@ -1405,7 +1429,7 @@ CHM_FILE = HHC_LOCATION = # The GENERATE_CHI flag controls if a separate .chi index file is generated -# (YES) or that it should be included in the master .chm file (NO). +# (YES) or that it should be included in the main .chm file (NO). # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. @@ -1571,8 +1595,8 @@ EXT_LINKS_IN_WINDOW = NO # tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see # https://inkscape.org) to generate formulas as SVG images instead of PNGs for # the HTML output. These images will generally look nicer at scaled resolutions. -# Possible values are: png The default and svg Looks nicer but requires the -# pdf2svg tool. +# Possible values are: png (the default) and svg (looks nicer but requires the +# pdf2svg or inkscape tool). # The default value is: png. # This tag requires that the tag GENERATE_HTML is set to YES. @@ -1613,7 +1637,7 @@ FORMULA_MACROFILE = # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. -USE_MATHJAX = NO +USE_MATHJAX = YES # When MathJax is enabled you can set the default output format to be used for # the MathJax output. See the MathJax site (see: @@ -1623,7 +1647,7 @@ USE_MATHJAX = NO # The default value is: HTML-CSS. # This tag requires that the tag USE_MATHJAX is set to YES. -MATHJAX_FORMAT = SVG +MATHJAX_FORMAT = HTML-CSS # When MathJax is enabled you need to specify the location relative to the HTML # output directory using the MATHJAX_RELPATH option. The destination directory @@ -1636,7 +1660,7 @@ MATHJAX_FORMAT = SVG # The default value is: https://cdn.jsdelivr.net/npm/mathjax@2. # This tag requires that the tag USE_MATHJAX is set to YES. -MATHJAX_RELPATH = http://dsplib.org/mathjax/latest +MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest # The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax # extension names that should be enabled during MathJax rendering. For example @@ -1872,9 +1896,11 @@ LATEX_EXTRA_FILES = PDF_HYPERLINKS = YES -# If the USE_PDFLATEX tag is set to YES, doxygen will use pdflatex to generate -# the PDF file directly from the LaTeX files. Set this option to YES, to get a -# higher quality PDF documentation. +# If the USE_PDFLATEX tag is set to YES, doxygen will use the engine as +# specified with LATEX_CMD_NAME to generate the PDF file directly from the LaTeX +# files. Set this option to YES, to get a higher quality PDF documentation. +# +# See also section LATEX_CMD_NAME for selecting the engine. # The default value is: YES. # This tag requires that the tag GENERATE_LATEX is set to YES. @@ -2113,6 +2139,10 @@ DOCBOOK_PROGRAMLISTING = NO GENERATE_AUTOGEN_DEF = NO +#--------------------------------------------------------------------------- +# Configuration options related to Sqlite3 output +#--------------------------------------------------------------------------- + #--------------------------------------------------------------------------- # Configuration options related to the Perl module output #--------------------------------------------------------------------------- diff --git a/dspl/dox/footer_en.html b/dspl/dox/footer_en.html index e9a0f17..f421ee1 100644 --- a/dspl/dox/footer_en.html +++ b/dspl/dox/footer_en.html @@ -27,7 +27,12 @@ $generatedby   Forum - +
+ +

diff --git a/dspl/dox/header_en.html b/dspl/dox/header_en.html index e007c29..a5846fe 100644 --- a/dspl/dox/header_en.html +++ b/dspl/dox/header_en.html @@ -102,7 +102,7 @@ screen.colorDepth:screen.pixelDepth))+";u"+escape(document.URL)+
  • DSPL–2.0
  • Forum
  • diff --git a/dspl/src/dspl_internal.h b/dspl/src/dspl_internal.h index 3787d64..b8fd1e1 100644 --- a/dspl/src/dspl_internal.h +++ b/dspl/src/dspl_internal.h @@ -30,16 +30,22 @@ /* sqrt(2^31) */ #define FFT_COMPOSITE_MAX 46340 + +/* FFT kernel */ int fft_krn(complex_t* t0, complex_t* t1, fft_t* p, int n, int addr); - +/* DFT 2 points */ void dft2 (complex_t *x, complex_t* y); +/* DFT 3 points */ #define DFT3_W 0.866025403784439 void dft3 (complex_t *x, complex_t* y); +/* DFT 4 points */ void dft4 (complex_t *x, complex_t* y); + +/* DFT 5 points */ #define DFT5_W1 -1.250000000000000 #define DFT5_W2 0.559016994374947 #define DFT5_W3 1.538841768587630 @@ -47,6 +53,8 @@ void dft4 (complex_t *x, complex_t* y); #define DFT5_W5 0.363271264002680 void dft5 (complex_t *x, complex_t* y); + +/* DFT 7 points */ #define DFT7_W1 -1.166666666666666518636930 #define DFT7_W2 0.790156468525400224045541 #define DFT7_W3 0.055854267289647742400494 @@ -57,18 +65,49 @@ void dft5 (complex_t *x, complex_t* y); #define DFT7_W8 -0.874842290961656665615465 void dft7 (complex_t *x, complex_t* y); + +/* DFT 8 points */ #define DFT8_W 0.707106781186548 void dft8 (complex_t *x, complex_t* y); void transpose2x4(complex_t *x, complex_t* y); void transpose4x2(complex_t *x, complex_t* y); - +/* DFT 16 points */ #define DFT16_W1 0.923879532511287 #define DFT16_W2 0.382683432365090 #define DFT16_W3 0.707106781186548 void dft16 (complex_t *x, complex_t* y); void transpose4x4(complex_t *x, complex_t* y); +/* DFT 32 points */ +void dft32(complex_t *x, complex_t* y, complex_t* w); +void transpose8x4(complex_t *x, complex_t* y); +void transpose4x8(complex_t *x, complex_t* y); + +/* DFT 64 points */ +void dft64(complex_t *x, complex_t* y, complex_t* w); +void transpose8x8(complex_t *x, complex_t* y); + +/* DFT 128 points */ +void dft128(complex_t *x, complex_t* y, complex_t* w); + +/* DFT 256 points */ +void dft256(complex_t *x, complex_t* y, complex_t* w); +void transpose16x16(complex_t* x, complex_t* y); + +/* DFT 512 points */ +void dft512(complex_t *x, complex_t* y, complex_t* w, complex_t* w32); + +/* DFT 1024 points */ +void dft1024(complex_t *x, complex_t* y, complex_t* w, complex_t* w32); + + +/* DFT 2048 points */ +void dft2048(complex_t *x, complex_t* y, complex_t* w, + complex_t* w32, complex_t* w64); + +/* DFT 4096 points */ +void dft4096(complex_t *x, complex_t* y, complex_t* w, complex_t* w256); /* Window functions */ int win_bartlett (double *w, int n, int win_type); diff --git a/dspl/src/fft.c b/dspl/src/fft.c index 2ee6bd7..c3b1ec5 100644 --- a/dspl/src/fft.c +++ b/dspl/src/fft.c @@ -675,13 +675,21 @@ int fft_krn(complex_t* t0, complex_t* t1, fft_t* p, int n, int addr) complex_t tmp; n1 = 1; - if(n%16== 0) { n1 = 16; goto label_size; } - if(n%7 == 0) { n1 = 7; goto label_size; } - if(n%8 == 0) { n1 = 8; goto label_size; } - if(n%5 == 0) { n1 = 5; goto label_size; } - if(n%4 == 0) { n1 = 4; goto label_size; } - if(n%3 == 0) { n1 = 3; goto label_size; } - if(n%2 == 0) { n1 = 2; goto label_size; } + if(n % 4096 == 0) { n1 = 4096; goto label_size; } + if(n % 2048 == 0) { n1 = 2048; goto label_size; } + if(n % 1024 == 0) { n1 = 1024; goto label_size; } + if(n % 512 == 0) { n1 = 512; goto label_size; } + if(n % 256 == 0) { n1 = 256; goto label_size; } + if(n % 128 == 0) { n1 = 128; goto label_size; } + if(n % 64 == 0) { n1 = 64; goto label_size; } + if(n % 32 == 0) { n1 = 32; goto label_size; } + if(n % 16 == 0) { n1 = 16; goto label_size; } + if(n % 7 == 0) { n1 = 7; goto label_size; } + if(n % 8 == 0) { n1 = 8; goto label_size; } + if(n % 5 == 0) { n1 = 5; goto label_size; } + if(n % 4 == 0) { n1 = 4; goto label_size; } + if(n % 3 == 0) { n1 = 3; goto label_size; } + if(n % 2 == 0) { n1 = 2; goto label_size; } label_size: if(n1 == 1) @@ -709,6 +717,38 @@ label_size: matrix_transpose_cmplx(t1, n2, n1, t0); } + if(n1 == 4096) + for(k = 0; k < n2; k++) + dft4096(t0+4096*k, t1+4096*k, p->w4096, p->w256); + + if(n1 == 2048) + for(k = 0; k < n2; k++) + dft2048(t0+2048*k, t1+2048*k, p->w2048, p->w32, p->w64); + + if(n1 == 1024) + for(k = 0; k < n2; k++) + dft1024(t0+1024*k, t1+1024*k, p->w1024, p->w32); + + if(n1 == 512) + for(k = 0; k < n2; k++) + dft512(t0+512*k, t1+512*k, p->w512, p->w32); + + if(n1 == 256) + for(k = 0; k < n2; k++) + dft256(t0+256*k, t1+256*k, p->w256); + + if(n1 == 128) + for(k = 0; k < n2; k++) + dft128(t0+128*k, t1+128*k, p->w128); + + if(n1 == 64) + for(k = 0; k < n2; k++) + dft64(t0+64*k, t1+64*k, p->w64); + + if(n1 == 32) + for(k = 0; k < n2; k++) + dft32(t0+32*k, t1+32*k, p->w32); + if(n1 == 16) for(k = 0; k < n2; k++) dft16(t0+16*k, t1+16*k); @@ -752,6 +792,7 @@ label_size: { fft_krn(t1+k*n2, t0+k*n2, p, n2, addr+n); } + matrix_transpose_cmplx(t0, n2, n1, t1); } } @@ -903,13 +944,21 @@ int DSPL_API fft_create(fft_t* pfft, int n) while(s > 1) { n2 = 1; - if(s%16== 0) { n2 = 16; goto label_size; } - if(s%7 == 0) { n2 = 7; goto label_size; } - if(s%8 == 0) { n2 = 8; goto label_size; } - if(s%5 == 0) { n2 = 5; goto label_size; } - if(s%4 == 0) { n2 = 4; goto label_size; } - if(s%3 == 0) { n2 = 3; goto label_size; } - if(s%2 == 0) { n2 = 2; goto label_size; } + if(s%4096 == 0) { n2 = 4096; goto label_size; } + if(s%2048 == 0) { n2 = 2048; goto label_size; } + if(s%1024 == 0) { n2 = 1024; goto label_size; } + if(s%512 == 0) { n2 = 512; goto label_size; } + if(s%256 == 0) { n2 = 256; goto label_size; } + if(s%128 == 0) { n2 = 128; goto label_size; } + if(s% 64 == 0) { n2 = 64; goto label_size; } + if(s% 32 == 0) { n2 = 32; goto label_size; } + if(s% 16 == 0) { n2 = 16; goto label_size; } + if(s% 7 == 0) { n2 = 7; goto label_size; } + if(s% 8 == 0) { n2 = 8; goto label_size; } + if(s% 5 == 0) { n2 = 5; goto label_size; } + if(s% 4 == 0) { n2 = 4; goto label_size; } + if(s% 3 == 0) { n2 = 3; goto label_size; } + if(s% 2 == 0) { n2 = 2; goto label_size; } label_size: @@ -962,6 +1011,123 @@ label_size: pfft->t1 = pfft->t1 ? (complex_t*) realloc(pfft->t1, n*sizeof(complex_t)): (complex_t*) malloc( n*sizeof(complex_t)); pfft->n = n; + + /* w32 fill */ + addr = 0; + for(k = 0; k < 4; k++) + { + for(m = 0; m < 8; m++) + { + phi = - M_2PI * (double)(k*m) / 32.0; + RE(pfft->w32[addr]) = cos(phi); + IM(pfft->w32[addr]) = sin(phi); + addr++; + } + } + + + /* w64 fill */ + addr = 0; + for(k = 0; k < 8; k++) + { + for(m = 0; m < 8; m++) + { + phi = - M_2PI * (double)(k*m) / 64.0; + RE(pfft->w64[addr]) = cos(phi); + IM(pfft->w64[addr]) = sin(phi); + addr++; + } + } + + /* w128 fill */ + addr = 0; + for(k = 0; k < 8; k++) + { + for(m = 0; m < 16; m++) + { + phi = - M_2PI * (double)(k*m) / 128.0; + RE(pfft->w128[addr]) = cos(phi); + IM(pfft->w128[addr]) = sin(phi); + addr++; + } + } + + /* w256 fill */ + addr = 0; + for(k = 0; k < 16; k++) + { + for(m = 0; m < 16; m++) + { + phi = - M_2PI * (double)(k*m) / 256.0; + RE(pfft->w256[addr]) = cos(phi); + IM(pfft->w256[addr]) = sin(phi); + addr++; + } + } + + /* w512 fill */ + addr = 0; + for(k = 0; k < 16; k++) + { + for(m = 0; m < 32; m++) + { + phi = - M_2PI * (double)(k*m) / 512.0; + RE(pfft->w512[addr]) = cos(phi); + IM(pfft->w512[addr]) = sin(phi); + addr++; + } + } + + /* w1024 fill */ + if(pfft->w1024 == NULL) + { + pfft->w1024 = (complex_t*) malloc(1024 * sizeof(complex_t)); + addr = 0; + for(k = 0; k < 32; k++) + { + for(m = 0; m < 32; m++) + { + phi = - M_2PI * (double)(k*m) / 1024.0; + RE(pfft->w1024[addr]) = cos(phi); + IM(pfft->w1024[addr]) = sin(phi); + addr++; + } + } + } + + /* w2048 fill */ + if(pfft->w2048 == NULL) + { + pfft->w2048= (complex_t*) malloc(2048 * sizeof(complex_t)); + addr = 0; + for(k = 0; k < 32; k++) + { + for(m = 0; m < 64; m++) + { + phi = - M_2PI * (double)(k*m) / 2048.0; + RE(pfft->w2048[addr]) = cos(phi); + IM(pfft->w2048[addr]) = sin(phi); + addr++; + } + } + } + + /* w4096 fill */ + if(pfft->w4096 == NULL) + { + pfft->w4096= (complex_t*) malloc(4096 * sizeof(complex_t)); + addr = 0; + for(k = 0; k < 16; k++) + { + for(m = 0; m < 256; m++) + { + phi = - M_2PI * (double)(k*m) / 4096.0; + RE(pfft->w4096[addr]) = cos(phi); + IM(pfft->w4096[addr]) = sin(phi); + addr++; + } + } + } return RES_OK; error_proc: @@ -1016,6 +1182,16 @@ void DSPL_API fft_free(fft_t *pfft) free(pfft->t0); if(pfft->t1) free(pfft->t1); + + if(pfft->w1024) + free(pfft->w1024); + + if(pfft->w2048) + free(pfft->w2048); + + if(pfft->w4096) + free(pfft->w4096); + memset(pfft, 0, sizeof(fft_t)); } diff --git a/dspl/src/fft_subkernel.c b/dspl/src/fft_subkernel.c index 3d59f62..c51cc64 100644 --- a/dspl/src/fft_subkernel.c +++ b/dspl/src/fft_subkernel.c @@ -26,10 +26,6 @@ - - - - /******************************************************************************* 2 points DFT *******************************************************************************/ @@ -319,7 +315,6 @@ void dft7 (complex_t *x, complex_t* y) RE(y[6]) = RE(sum[20]) - RE(sum[26]); IM(y[6]) = IM(sum[20]) - IM(sum[26]); - } @@ -455,6 +450,263 @@ void dft16(complex_t *x, complex_t* y) } +/******************************************************************************* +32 points DFT (Winograd algorithm) +*******************************************************************************/ +void dft32(complex_t *x, complex_t* y, complex_t* w) +{ + complex_t t0[32]; + complex_t t1[32]; + + int i; + + transpose4x8(x, t0); + dft8(t0, t1); + dft8(t0+8, t1+8); + dft8(t0+16, t1+16); + dft8(t0+24, t1+24); + + + for(i = 0; i < 32; i++) + { + RE(t0[i]) = CMRE(t1[i], w[i]); + IM(t0[i]) = CMIM(t1[i], w[i]); + } + + transpose8x4(t0, t1); + + for(i = 0; i < 8; i++) + dft4(t1 + i*4, t0 + i*4); + + transpose4x8(t0, y); +} + + +/******************************************************************************* +64 points DFT (Winograd algorithm) +*******************************************************************************/ +void dft64(complex_t *x, complex_t* y, complex_t* w) +{ + complex_t t0[64]; + complex_t t1[64]; + + int i; + + transpose8x8(x, t0); + + for(i = 0; i < 8; i++) + dft8(t0 + i*8, t1 + i*8); + + for(i = 0; i < 64; i++) + { + RE(t0[i]) = CMRE(t1[i], w[i]); + IM(t0[i]) = CMIM(t1[i], w[i]); + } + + transpose8x8(t0, t1); + + for(i = 0; i < 8; i++) + dft8(t1 + i*8, t0 + i*8); + transpose8x8(t0, y); +} + + + +/******************************************************************************* +256 points DFT (Winograd algorithm) +*******************************************************************************/ +void dft128(complex_t *x, complex_t* y, complex_t* w) +{ + complex_t t0[128]; + complex_t t1[128]; + + int i; + + matrix_transpose_cmplx(x,8,16,t0); + + for(i = 0; i < 8; i++) + dft16(t0 + i*16, t1 + i*16); + + for(i = 0; i < 128; i++) + { + RE(t0[i]) = CMRE(t1[i], w[i]); + IM(t0[i]) = CMIM(t1[i], w[i]); + } + + matrix_transpose_cmplx(t0, 16, 8, t1); + + for(i = 0; i < 16; i++) + dft8(t1 + i*8, t0 + i*8); + + matrix_transpose_cmplx(t0, 8, 16, y); +} + + + +/******************************************************************************* +256 points DFT (Winograd algorithm) +*******************************************************************************/ +void dft256(complex_t *x, complex_t* y, complex_t* w) +{ + complex_t t0[256]; + complex_t t1[256]; + + int i; + + transpose16x16(x, t0); + + for(i = 0; i < 16; i++) + dft16(t0 + i*16, t1 + i*16); + + for(i = 0; i < 256; i++) + { + RE(t0[i]) = CMRE(t1[i], w[i]); + IM(t0[i]) = CMIM(t1[i], w[i]); + } + + transpose16x16(t0, t1); + + for(i = 0; i < 16; i++) + dft16(t1 + i*16, t0 + i*16); + + transpose16x16(t0, y); +} + + +/******************************************************************************* +512 points DFT (Winograd algorithm) +*******************************************************************************/ +void dft512(complex_t *x, complex_t* y, complex_t* w, complex_t* w32) +{ + complex_t t0[512]; + complex_t t1[512]; + + int i; + + matrix_transpose_cmplx(x,16,32,t0); + + for(i = 0; i < 16; i++) + dft32(t0 + i*32, t1 + i*32, w32); + + for(i = 0; i < 512; i++) + { + RE(t0[i]) = CMRE(t1[i], w[i]); + IM(t0[i]) = CMIM(t1[i], w[i]); + } + + matrix_transpose_cmplx(t0, 32, 16, t1); + + for(i = 0; i < 32; i++) + dft16(t1 + i*16, t0 + i*16); + + matrix_transpose_cmplx(t0, 16, 32, y); +} + + +/******************************************************************************* +1024 points DFT (Winograd algorithm) +*******************************************************************************/ +void dft1024(complex_t *x, complex_t* y, complex_t* w, complex_t* w32) +{ + complex_t t0[1024]; + complex_t t1[1024]; + + int i; + + matrix_transpose_cmplx(x,32,32,t0); + + for(i = 0; i < 32; i++) + dft32(t0 + i*32, t1 + i*32, w32); + + for(i = 0; i < 1024; i++) + { + RE(t0[i]) = CMRE(t1[i], w[i]); + IM(t0[i]) = CMIM(t1[i], w[i]); + } + + matrix_transpose_cmplx(t0, 32, 32, t1); + + for(i = 0; i < 32; i++) + dft32(t1 + i*32, t0 + i*32, w32); + + matrix_transpose_cmplx(t0, 32, 32, y); +} + +/******************************************************************************* +2048 points DFT (Winograd algorithm) +*******************************************************************************/ +void dft2048(complex_t *x, complex_t* y, complex_t* w, + complex_t* w32, complex_t* w64) +{ + complex_t *t0 = NULL; + complex_t *t1 = NULL; + + int i; + + t0 = (complex_t*)malloc(2048*sizeof(complex_t)); + t1 = (complex_t*)malloc(2048*sizeof(complex_t)); + + matrix_transpose_cmplx(x,32,64,t0); + + for(i = 0; i < 32; i++) + dft64(t0 + i*64, t1 + i*64, w64); + + for(i = 0; i < 2048; i++) + { + RE(t0[i]) = CMRE(t1[i], w[i]); + IM(t0[i]) = CMIM(t1[i], w[i]); + } + + matrix_transpose_cmplx(t0, 64, 32, t1); + + for(i = 0; i < 64; i++) + dft32(t1 + i*32, t0 + i*32, w32); + + matrix_transpose_cmplx(t0, 32, 64, y); + + free(t0); + free(t1); +} + + + +/******************************************************************************* +4096 points DFT (Winograd algorithm) +*******************************************************************************/ +void dft4096(complex_t *x, complex_t* y, complex_t* w, complex_t* w256) +{ + complex_t *t0 = NULL; + complex_t *t1 = NULL; + + int i; + + t0 = (complex_t*)malloc(4096*sizeof(complex_t)); + t1 = (complex_t*)malloc(4096*sizeof(complex_t)); + + matrix_transpose_cmplx(x,16,256,t0); + + for(i = 0; i < 16; i++) + dft256(t0 + i*256, t1 + i*256, w256); + + for(i = 0; i < 4096; i++) + { + RE(t0[i]) = CMRE(t1[i], w[i]); + IM(t0[i]) = CMIM(t1[i], w[i]); + } + + matrix_transpose_cmplx(t0, 256, 16, t1); + + for(i = 0; i < 256; i++) + dft16(t1 + i*16, t0 + i*16); + + matrix_transpose_cmplx(t0, 16, 256, y); + + free(t0); + free(t1); +} + + + /******************************************************************************* 4 x 2 matrix transpose *******************************************************************************/ @@ -512,3 +764,420 @@ void transpose4x4(complex_t *x, complex_t* y) } +/******************************************************************************* +8 x 4 matrix transpose +*******************************************************************************/ +void transpose8x4(complex_t *x, complex_t* y) +{ + RE(y[ 0]) = RE(x[ 0]); IM(y[ 0]) = IM(x[ 0]); + RE(y[ 1]) = RE(x[ 8]); IM(y[ 1]) = IM(x[ 8]); + RE(y[ 2]) = RE(x[ 16]); IM(y[ 2]) = IM(x[ 16]); + RE(y[ 3]) = RE(x[ 24]); IM(y[ 3]) = IM(x[ 24]); + RE(y[ 4]) = RE(x[ 1]); IM(y[ 4]) = IM(x[ 1]); + RE(y[ 5]) = RE(x[ 9]); IM(y[ 5]) = IM(x[ 9]); + RE(y[ 6]) = RE(x[ 17]); IM(y[ 6]) = IM(x[ 17]); + RE(y[ 7]) = RE(x[ 25]); IM(y[ 7]) = IM(x[ 25]); + RE(y[ 8]) = RE(x[ 2]); IM(y[ 8]) = IM(x[ 2]); + RE(y[ 9]) = RE(x[ 10]); IM(y[ 9]) = IM(x[ 10]); + RE(y[10]) = RE(x[ 18]); IM(y[10]) = IM(x[ 18]); + RE(y[11]) = RE(x[ 26]); IM(y[11]) = IM(x[ 26]); + RE(y[12]) = RE(x[ 3]); IM(y[12]) = IM(x[ 3]); + RE(y[13]) = RE(x[ 11]); IM(y[13]) = IM(x[ 11]); + RE(y[14]) = RE(x[ 19]); IM(y[14]) = IM(x[ 19]); + RE(y[15]) = RE(x[ 27]); IM(y[15]) = IM(x[ 27]); + RE(y[16]) = RE(x[ 4]); IM(y[16]) = IM(x[ 4]); + RE(y[17]) = RE(x[ 12]); IM(y[17]) = IM(x[ 12]); + RE(y[18]) = RE(x[ 20]); IM(y[18]) = IM(x[ 20]); + RE(y[19]) = RE(x[ 28]); IM(y[19]) = IM(x[ 28]); + RE(y[20]) = RE(x[ 5]); IM(y[20]) = IM(x[ 5]); + RE(y[21]) = RE(x[ 13]); IM(y[21]) = IM(x[ 13]); + RE(y[22]) = RE(x[ 21]); IM(y[22]) = IM(x[ 21]); + RE(y[23]) = RE(x[ 29]); IM(y[23]) = IM(x[ 29]); + RE(y[24]) = RE(x[ 6]); IM(y[24]) = IM(x[ 6]); + RE(y[25]) = RE(x[ 14]); IM(y[25]) = IM(x[ 14]); + RE(y[26]) = RE(x[ 22]); IM(y[26]) = IM(x[ 22]); + RE(y[27]) = RE(x[ 30]); IM(y[27]) = IM(x[ 30]); + RE(y[28]) = RE(x[ 7]); IM(y[28]) = IM(x[ 7]); + RE(y[29]) = RE(x[ 15]); IM(y[29]) = IM(x[ 15]); + RE(y[30]) = RE(x[ 23]); IM(y[30]) = IM(x[ 23]); + RE(y[31]) = RE(x[ 31]); IM(y[31]) = IM(x[ 31]); +} + + +/******************************************************************************* +4 x 8 matrix transpose +*******************************************************************************/ +void transpose4x8(complex_t *x, complex_t* y) +{ + RE(y[ 0]) = RE(x[ 0]); IM(y[ 0]) = IM(x[ 0]); + RE(y[ 1]) = RE(x[ 4]); IM(y[ 1]) = IM(x[ 4]); + RE(y[ 2]) = RE(x[ 8]); IM(y[ 2]) = IM(x[ 8]); + RE(y[ 3]) = RE(x[ 12]); IM(y[ 3]) = IM(x[ 12]); + RE(y[ 4]) = RE(x[ 16]); IM(y[ 4]) = IM(x[ 16]); + RE(y[ 5]) = RE(x[ 20]); IM(y[ 5]) = IM(x[ 20]); + RE(y[ 6]) = RE(x[ 24]); IM(y[ 6]) = IM(x[ 24]); + RE(y[ 7]) = RE(x[ 28]); IM(y[ 7]) = IM(x[ 28]); + RE(y[ 8]) = RE(x[ 1]); IM(y[ 8]) = IM(x[ 1]); + RE(y[ 9]) = RE(x[ 5]); IM(y[ 9]) = IM(x[ 5]); + RE(y[10]) = RE(x[ 9]); IM(y[10]) = IM(x[ 9]); + RE(y[11]) = RE(x[ 13]); IM(y[11]) = IM(x[ 13]); + RE(y[12]) = RE(x[ 17]); IM(y[12]) = IM(x[ 17]); + RE(y[13]) = RE(x[ 21]); IM(y[13]) = IM(x[ 21]); + RE(y[14]) = RE(x[ 25]); IM(y[14]) = IM(x[ 25]); + RE(y[15]) = RE(x[ 29]); IM(y[15]) = IM(x[ 29]); + RE(y[16]) = RE(x[ 2]); IM(y[16]) = IM(x[ 2]); + RE(y[17]) = RE(x[ 6]); IM(y[17]) = IM(x[ 6]); + RE(y[18]) = RE(x[ 10]); IM(y[18]) = IM(x[ 10]); + RE(y[19]) = RE(x[ 14]); IM(y[19]) = IM(x[ 14]); + RE(y[20]) = RE(x[ 18]); IM(y[20]) = IM(x[ 18]); + RE(y[21]) = RE(x[ 22]); IM(y[21]) = IM(x[ 22]); + RE(y[22]) = RE(x[ 26]); IM(y[22]) = IM(x[ 26]); + RE(y[23]) = RE(x[ 30]); IM(y[23]) = IM(x[ 30]); + RE(y[24]) = RE(x[ 3]); IM(y[24]) = IM(x[ 3]); + RE(y[25]) = RE(x[ 7]); IM(y[25]) = IM(x[ 7]); + RE(y[26]) = RE(x[ 11]); IM(y[26]) = IM(x[ 11]); + RE(y[27]) = RE(x[ 15]); IM(y[27]) = IM(x[ 15]); + RE(y[28]) = RE(x[ 19]); IM(y[28]) = IM(x[ 19]); + RE(y[29]) = RE(x[ 23]); IM(y[29]) = IM(x[ 23]); + RE(y[30]) = RE(x[ 27]); IM(y[30]) = IM(x[ 27]); + RE(y[31]) = RE(x[ 31]); IM(y[31]) = IM(x[ 31]); +} + + +/******************************************************************************* +4 x 8 matrix transpose +*******************************************************************************/ +void transpose8x8(complex_t *x, complex_t* y) +{ + RE(y[ 0]) = RE(x[ 0]); IM(y[ 0]) = IM(x[ 0]); + RE(y[ 1]) = RE(x[ 8]); IM(y[ 1]) = IM(x[ 8]); + RE(y[ 2]) = RE(x[16]); IM(y[ 2]) = IM(x[16]); + RE(y[ 3]) = RE(x[24]); IM(y[ 3]) = IM(x[24]); + RE(y[ 4]) = RE(x[32]); IM(y[ 4]) = IM(x[32]); + RE(y[ 5]) = RE(x[40]); IM(y[ 5]) = IM(x[40]); + RE(y[ 6]) = RE(x[48]); IM(y[ 6]) = IM(x[48]); + RE(y[ 7]) = RE(x[56]); IM(y[ 7]) = IM(x[56]); + RE(y[ 8]) = RE(x[ 1]); IM(y[ 8]) = IM(x[ 1]); + RE(y[ 9]) = RE(x[ 9]); IM(y[ 9]) = IM(x[ 9]); + RE(y[ 10]) = RE(x[17]); IM(y[ 10]) = IM(x[17]); + RE(y[ 11]) = RE(x[25]); IM(y[ 11]) = IM(x[25]); + RE(y[ 12]) = RE(x[33]); IM(y[ 12]) = IM(x[33]); + RE(y[ 13]) = RE(x[41]); IM(y[ 13]) = IM(x[41]); + RE(y[ 14]) = RE(x[49]); IM(y[ 14]) = IM(x[49]); + RE(y[ 15]) = RE(x[57]); IM(y[ 15]) = IM(x[57]); + RE(y[ 16]) = RE(x[ 2]); IM(y[ 16]) = IM(x[ 2]); + RE(y[ 17]) = RE(x[10]); IM(y[ 17]) = IM(x[10]); + RE(y[ 18]) = RE(x[18]); IM(y[ 18]) = IM(x[18]); + RE(y[ 19]) = RE(x[26]); IM(y[ 19]) = IM(x[26]); + RE(y[ 20]) = RE(x[34]); IM(y[ 20]) = IM(x[34]); + RE(y[ 21]) = RE(x[42]); IM(y[ 21]) = IM(x[42]); + RE(y[ 22]) = RE(x[50]); IM(y[ 22]) = IM(x[50]); + RE(y[ 23]) = RE(x[58]); IM(y[ 23]) = IM(x[58]); + RE(y[ 24]) = RE(x[ 3]); IM(y[ 24]) = IM(x[ 3]); + RE(y[ 25]) = RE(x[11]); IM(y[ 25]) = IM(x[11]); + RE(y[ 26]) = RE(x[19]); IM(y[ 26]) = IM(x[19]); + RE(y[ 27]) = RE(x[27]); IM(y[ 27]) = IM(x[27]); + RE(y[ 28]) = RE(x[35]); IM(y[ 28]) = IM(x[35]); + RE(y[ 29]) = RE(x[43]); IM(y[ 29]) = IM(x[43]); + RE(y[ 30]) = RE(x[51]); IM(y[ 30]) = IM(x[51]); + RE(y[ 31]) = RE(x[59]); IM(y[ 31]) = IM(x[59]); + RE(y[ 32]) = RE(x[ 4]); IM(y[ 32]) = IM(x[ 4]); + RE(y[ 33]) = RE(x[12]); IM(y[ 33]) = IM(x[12]); + RE(y[ 34]) = RE(x[20]); IM(y[ 34]) = IM(x[20]); + RE(y[ 35]) = RE(x[28]); IM(y[ 35]) = IM(x[28]); + RE(y[ 36]) = RE(x[36]); IM(y[ 36]) = IM(x[36]); + RE(y[ 37]) = RE(x[44]); IM(y[ 37]) = IM(x[44]); + RE(y[ 38]) = RE(x[52]); IM(y[ 38]) = IM(x[52]); + RE(y[ 39]) = RE(x[60]); IM(y[ 39]) = IM(x[60]); + RE(y[ 40]) = RE(x[ 5]); IM(y[ 40]) = IM(x[ 5]); + RE(y[ 41]) = RE(x[13]); IM(y[ 41]) = IM(x[13]); + RE(y[ 42]) = RE(x[21]); IM(y[ 42]) = IM(x[21]); + RE(y[ 43]) = RE(x[29]); IM(y[ 43]) = IM(x[29]); + RE(y[ 44]) = RE(x[37]); IM(y[ 44]) = IM(x[37]); + RE(y[ 45]) = RE(x[45]); IM(y[ 45]) = IM(x[45]); + RE(y[ 46]) = RE(x[53]); IM(y[ 46]) = IM(x[53]); + RE(y[ 47]) = RE(x[61]); IM(y[ 47]) = IM(x[61]); + RE(y[ 48]) = RE(x[ 6]); IM(y[ 48]) = IM(x[ 6]); + RE(y[ 49]) = RE(x[14]); IM(y[ 49]) = IM(x[14]); + RE(y[ 50]) = RE(x[22]); IM(y[ 50]) = IM(x[22]); + RE(y[ 51]) = RE(x[30]); IM(y[ 51]) = IM(x[30]); + RE(y[ 52]) = RE(x[38]); IM(y[ 52]) = IM(x[38]); + RE(y[ 53]) = RE(x[46]); IM(y[ 53]) = IM(x[46]); + RE(y[ 54]) = RE(x[54]); IM(y[ 54]) = IM(x[54]); + RE(y[ 55]) = RE(x[62]); IM(y[ 55]) = IM(x[62]); + RE(y[ 56]) = RE(x[ 7]); IM(y[ 56]) = IM(x[ 7]); + RE(y[ 57]) = RE(x[15]); IM(y[ 57]) = IM(x[15]); + RE(y[ 58]) = RE(x[23]); IM(y[ 58]) = IM(x[23]); + RE(y[ 59]) = RE(x[31]); IM(y[ 59]) = IM(x[31]); + RE(y[ 60]) = RE(x[39]); IM(y[ 60]) = IM(x[39]); + RE(y[ 61]) = RE(x[47]); IM(y[ 61]) = IM(x[47]); + RE(y[ 62]) = RE(x[55]); IM(y[ 62]) = IM(x[55]); + RE(y[ 63]) = RE(x[63]); IM(y[ 63]) = IM(x[63]); +} + + + +/******************************************************************************* +16 x 16 matrix transpose +*******************************************************************************/ +void transpose16x16(complex_t* x, complex_t* y) +{ + RE(y[ 0]) = RE(x[ 0]); IM(y[ 0]) = IM(x[ 0]); + RE(y[ 1]) = RE(x[ 16]); IM(y[ 1]) = IM(x[ 16]); + RE(y[ 2]) = RE(x[ 32]); IM(y[ 2]) = IM(x[ 32]); + RE(y[ 3]) = RE(x[ 48]); IM(y[ 3]) = IM(x[ 48]); + RE(y[ 4]) = RE(x[ 64]); IM(y[ 4]) = IM(x[ 64]); + RE(y[ 5]) = RE(x[ 80]); IM(y[ 5]) = IM(x[ 80]); + RE(y[ 6]) = RE(x[ 96]); IM(y[ 6]) = IM(x[ 96]); + RE(y[ 7]) = RE(x[112]); IM(y[ 7]) = IM(x[112]); + RE(y[ 8]) = RE(x[128]); IM(y[ 8]) = IM(x[128]); + RE(y[ 9]) = RE(x[144]); IM(y[ 9]) = IM(x[144]); + RE(y[ 10]) = RE(x[160]); IM(y[ 10]) = IM(x[160]); + RE(y[ 11]) = RE(x[176]); IM(y[ 11]) = IM(x[176]); + RE(y[ 12]) = RE(x[192]); IM(y[ 12]) = IM(x[192]); + RE(y[ 13]) = RE(x[208]); IM(y[ 13]) = IM(x[208]); + RE(y[ 14]) = RE(x[224]); IM(y[ 14]) = IM(x[224]); + RE(y[ 15]) = RE(x[240]); IM(y[ 15]) = IM(x[240]); + RE(y[ 16]) = RE(x[ 1]); IM(y[ 16]) = IM(x[ 1]); + RE(y[ 17]) = RE(x[ 17]); IM(y[ 17]) = IM(x[ 17]); + RE(y[ 18]) = RE(x[ 33]); IM(y[ 18]) = IM(x[ 33]); + RE(y[ 19]) = RE(x[ 49]); IM(y[ 19]) = IM(x[ 49]); + RE(y[ 20]) = RE(x[ 65]); IM(y[ 20]) = IM(x[ 65]); + RE(y[ 21]) = RE(x[ 81]); IM(y[ 21]) = IM(x[ 81]); + RE(y[ 22]) = RE(x[ 97]); IM(y[ 22]) = IM(x[ 97]); + RE(y[ 23]) = RE(x[113]); IM(y[ 23]) = IM(x[113]); + RE(y[ 24]) = RE(x[129]); IM(y[ 24]) = IM(x[129]); + RE(y[ 25]) = RE(x[145]); IM(y[ 25]) = IM(x[145]); + RE(y[ 26]) = RE(x[161]); IM(y[ 26]) = IM(x[161]); + RE(y[ 27]) = RE(x[177]); IM(y[ 27]) = IM(x[177]); + RE(y[ 28]) = RE(x[193]); IM(y[ 28]) = IM(x[193]); + RE(y[ 29]) = RE(x[209]); IM(y[ 29]) = IM(x[209]); + RE(y[ 30]) = RE(x[225]); IM(y[ 30]) = IM(x[225]); + RE(y[ 31]) = RE(x[241]); IM(y[ 31]) = IM(x[241]); + RE(y[ 32]) = RE(x[ 2]); IM(y[ 32]) = IM(x[ 2]); + RE(y[ 33]) = RE(x[ 18]); IM(y[ 33]) = IM(x[ 18]); + RE(y[ 34]) = RE(x[ 34]); IM(y[ 34]) = IM(x[ 34]); + RE(y[ 35]) = RE(x[ 50]); IM(y[ 35]) = IM(x[ 50]); + RE(y[ 36]) = RE(x[ 66]); IM(y[ 36]) = IM(x[ 66]); + RE(y[ 37]) = RE(x[ 82]); IM(y[ 37]) = IM(x[ 82]); + RE(y[ 38]) = RE(x[ 98]); IM(y[ 38]) = IM(x[ 98]); + RE(y[ 39]) = RE(x[114]); IM(y[ 39]) = IM(x[114]); + RE(y[ 40]) = RE(x[130]); IM(y[ 40]) = IM(x[130]); + RE(y[ 41]) = RE(x[146]); IM(y[ 41]) = IM(x[146]); + RE(y[ 42]) = RE(x[162]); IM(y[ 42]) = IM(x[162]); + RE(y[ 43]) = RE(x[178]); IM(y[ 43]) = IM(x[178]); + RE(y[ 44]) = RE(x[194]); IM(y[ 44]) = IM(x[194]); + RE(y[ 45]) = RE(x[210]); IM(y[ 45]) = IM(x[210]); + RE(y[ 46]) = RE(x[226]); IM(y[ 46]) = IM(x[226]); + RE(y[ 47]) = RE(x[242]); IM(y[ 47]) = IM(x[242]); + RE(y[ 48]) = RE(x[ 3]); IM(y[ 48]) = IM(x[ 3]); + RE(y[ 49]) = RE(x[ 19]); IM(y[ 49]) = IM(x[ 19]); + RE(y[ 50]) = RE(x[ 35]); IM(y[ 50]) = IM(x[ 35]); + RE(y[ 51]) = RE(x[ 51]); IM(y[ 51]) = IM(x[ 51]); + RE(y[ 52]) = RE(x[ 67]); IM(y[ 52]) = IM(x[ 67]); + RE(y[ 53]) = RE(x[ 83]); IM(y[ 53]) = IM(x[ 83]); + RE(y[ 54]) = RE(x[ 99]); IM(y[ 54]) = IM(x[ 99]); + RE(y[ 55]) = RE(x[115]); IM(y[ 55]) = IM(x[115]); + RE(y[ 56]) = RE(x[131]); IM(y[ 56]) = IM(x[131]); + RE(y[ 57]) = RE(x[147]); IM(y[ 57]) = IM(x[147]); + RE(y[ 58]) = RE(x[163]); IM(y[ 58]) = IM(x[163]); + RE(y[ 59]) = RE(x[179]); IM(y[ 59]) = IM(x[179]); + RE(y[ 60]) = RE(x[195]); IM(y[ 60]) = IM(x[195]); + RE(y[ 61]) = RE(x[211]); IM(y[ 61]) = IM(x[211]); + RE(y[ 62]) = RE(x[227]); IM(y[ 62]) = IM(x[227]); + RE(y[ 63]) = RE(x[243]); IM(y[ 63]) = IM(x[243]); + RE(y[ 64]) = RE(x[ 4]); IM(y[ 64]) = IM(x[ 4]); + RE(y[ 65]) = RE(x[ 20]); IM(y[ 65]) = IM(x[ 20]); + RE(y[ 66]) = RE(x[ 36]); IM(y[ 66]) = IM(x[ 36]); + RE(y[ 67]) = RE(x[ 52]); IM(y[ 67]) = IM(x[ 52]); + RE(y[ 68]) = RE(x[ 68]); IM(y[ 68]) = IM(x[ 68]); + RE(y[ 69]) = RE(x[ 84]); IM(y[ 69]) = IM(x[ 84]); + RE(y[ 70]) = RE(x[100]); IM(y[ 70]) = IM(x[100]); + RE(y[ 71]) = RE(x[116]); IM(y[ 71]) = IM(x[116]); + RE(y[ 72]) = RE(x[132]); IM(y[ 72]) = IM(x[132]); + RE(y[ 73]) = RE(x[148]); IM(y[ 73]) = IM(x[148]); + RE(y[ 74]) = RE(x[164]); IM(y[ 74]) = IM(x[164]); + RE(y[ 75]) = RE(x[180]); IM(y[ 75]) = IM(x[180]); + RE(y[ 76]) = RE(x[196]); IM(y[ 76]) = IM(x[196]); + RE(y[ 77]) = RE(x[212]); IM(y[ 77]) = IM(x[212]); + RE(y[ 78]) = RE(x[228]); IM(y[ 78]) = IM(x[228]); + RE(y[ 79]) = RE(x[244]); IM(y[ 79]) = IM(x[244]); + RE(y[ 80]) = RE(x[ 5]); IM(y[ 80]) = IM(x[ 5]); + RE(y[ 81]) = RE(x[ 21]); IM(y[ 81]) = IM(x[ 21]); + RE(y[ 82]) = RE(x[ 37]); IM(y[ 82]) = IM(x[ 37]); + RE(y[ 83]) = RE(x[ 53]); IM(y[ 83]) = IM(x[ 53]); + RE(y[ 84]) = RE(x[ 69]); IM(y[ 84]) = IM(x[ 69]); + RE(y[ 85]) = RE(x[ 85]); IM(y[ 85]) = IM(x[ 85]); + RE(y[ 86]) = RE(x[101]); IM(y[ 86]) = IM(x[101]); + RE(y[ 87]) = RE(x[117]); IM(y[ 87]) = IM(x[117]); + RE(y[ 88]) = RE(x[133]); IM(y[ 88]) = IM(x[133]); + RE(y[ 89]) = RE(x[149]); IM(y[ 89]) = IM(x[149]); + RE(y[ 90]) = RE(x[165]); IM(y[ 90]) = IM(x[165]); + RE(y[ 91]) = RE(x[181]); IM(y[ 91]) = IM(x[181]); + RE(y[ 92]) = RE(x[197]); IM(y[ 92]) = IM(x[197]); + RE(y[ 93]) = RE(x[213]); IM(y[ 93]) = IM(x[213]); + RE(y[ 94]) = RE(x[229]); IM(y[ 94]) = IM(x[229]); + RE(y[ 95]) = RE(x[245]); IM(y[ 95]) = IM(x[245]); + RE(y[ 96]) = RE(x[ 6]); IM(y[ 96]) = IM(x[ 6]); + RE(y[ 97]) = RE(x[ 22]); IM(y[ 97]) = IM(x[ 22]); + RE(y[ 98]) = RE(x[ 38]); IM(y[ 98]) = IM(x[ 38]); + RE(y[ 99]) = RE(x[ 54]); IM(y[ 99]) = IM(x[ 54]); + RE(y[100]) = RE(x[ 70]); IM(y[100]) = IM(x[ 70]); + RE(y[101]) = RE(x[ 86]); IM(y[101]) = IM(x[ 86]); + RE(y[102]) = RE(x[102]); IM(y[102]) = IM(x[102]); + RE(y[103]) = RE(x[118]); IM(y[103]) = IM(x[118]); + RE(y[104]) = RE(x[134]); IM(y[104]) = IM(x[134]); + RE(y[105]) = RE(x[150]); IM(y[105]) = IM(x[150]); + RE(y[106]) = RE(x[166]); IM(y[106]) = IM(x[166]); + RE(y[107]) = RE(x[182]); IM(y[107]) = IM(x[182]); + RE(y[108]) = RE(x[198]); IM(y[108]) = IM(x[198]); + RE(y[109]) = RE(x[214]); IM(y[109]) = IM(x[214]); + RE(y[110]) = RE(x[230]); IM(y[110]) = IM(x[230]); + RE(y[111]) = RE(x[246]); IM(y[111]) = IM(x[246]); + RE(y[112]) = RE(x[ 7]); IM(y[112]) = IM(x[ 7]); + RE(y[113]) = RE(x[ 23]); IM(y[113]) = IM(x[ 23]); + RE(y[114]) = RE(x[ 39]); IM(y[114]) = IM(x[ 39]); + RE(y[115]) = RE(x[ 55]); IM(y[115]) = IM(x[ 55]); + RE(y[116]) = RE(x[ 71]); IM(y[116]) = IM(x[ 71]); + RE(y[117]) = RE(x[ 87]); IM(y[117]) = IM(x[ 87]); + RE(y[118]) = RE(x[103]); IM(y[118]) = IM(x[103]); + RE(y[119]) = RE(x[119]); IM(y[119]) = IM(x[119]); + RE(y[120]) = RE(x[135]); IM(y[120]) = IM(x[135]); + RE(y[121]) = RE(x[151]); IM(y[121]) = IM(x[151]); + RE(y[122]) = RE(x[167]); IM(y[122]) = IM(x[167]); + RE(y[123]) = RE(x[183]); IM(y[123]) = IM(x[183]); + RE(y[124]) = RE(x[199]); IM(y[124]) = IM(x[199]); + RE(y[125]) = RE(x[215]); IM(y[125]) = IM(x[215]); + RE(y[126]) = RE(x[231]); IM(y[126]) = IM(x[231]); + RE(y[127]) = RE(x[247]); IM(y[127]) = IM(x[247]); + RE(y[128]) = RE(x[ 8]); IM(y[128]) = IM(x[ 8]); + RE(y[129]) = RE(x[ 24]); IM(y[129]) = IM(x[ 24]); + RE(y[130]) = RE(x[ 40]); IM(y[130]) = IM(x[ 40]); + RE(y[131]) = RE(x[ 56]); IM(y[131]) = IM(x[ 56]); + RE(y[132]) = RE(x[ 72]); IM(y[132]) = IM(x[ 72]); + RE(y[133]) = RE(x[ 88]); IM(y[133]) = IM(x[ 88]); + RE(y[134]) = RE(x[104]); IM(y[134]) = IM(x[104]); + RE(y[135]) = RE(x[120]); IM(y[135]) = IM(x[120]); + RE(y[136]) = RE(x[136]); IM(y[136]) = IM(x[136]); + RE(y[137]) = RE(x[152]); IM(y[137]) = IM(x[152]); + RE(y[138]) = RE(x[168]); IM(y[138]) = IM(x[168]); + RE(y[139]) = RE(x[184]); IM(y[139]) = IM(x[184]); + RE(y[140]) = RE(x[200]); IM(y[140]) = IM(x[200]); + RE(y[141]) = RE(x[216]); IM(y[141]) = IM(x[216]); + RE(y[142]) = RE(x[232]); IM(y[142]) = IM(x[232]); + RE(y[143]) = RE(x[248]); IM(y[143]) = IM(x[248]); + RE(y[144]) = RE(x[ 9]); IM(y[144]) = IM(x[ 9]); + RE(y[145]) = RE(x[ 25]); IM(y[145]) = IM(x[ 25]); + RE(y[146]) = RE(x[ 41]); IM(y[146]) = IM(x[ 41]); + RE(y[147]) = RE(x[ 57]); IM(y[147]) = IM(x[ 57]); + RE(y[148]) = RE(x[ 73]); IM(y[148]) = IM(x[ 73]); + RE(y[149]) = RE(x[ 89]); IM(y[149]) = IM(x[ 89]); + RE(y[150]) = RE(x[105]); IM(y[150]) = IM(x[105]); + RE(y[151]) = RE(x[121]); IM(y[151]) = IM(x[121]); + RE(y[152]) = RE(x[137]); IM(y[152]) = IM(x[137]); + RE(y[153]) = RE(x[153]); IM(y[153]) = IM(x[153]); + RE(y[154]) = RE(x[169]); IM(y[154]) = IM(x[169]); + RE(y[155]) = RE(x[185]); IM(y[155]) = IM(x[185]); + RE(y[156]) = RE(x[201]); IM(y[156]) = IM(x[201]); + RE(y[157]) = RE(x[217]); IM(y[157]) = IM(x[217]); + RE(y[158]) = RE(x[233]); IM(y[158]) = IM(x[233]); + RE(y[159]) = RE(x[249]); IM(y[159]) = IM(x[249]); + RE(y[160]) = RE(x[ 10]); IM(y[160]) = IM(x[ 10]); + RE(y[161]) = RE(x[ 26]); IM(y[161]) = IM(x[ 26]); + RE(y[162]) = RE(x[ 42]); IM(y[162]) = IM(x[ 42]); + RE(y[163]) = RE(x[ 58]); IM(y[163]) = IM(x[ 58]); + RE(y[164]) = RE(x[ 74]); IM(y[164]) = IM(x[ 74]); + RE(y[165]) = RE(x[ 90]); IM(y[165]) = IM(x[ 90]); + RE(y[166]) = RE(x[106]); IM(y[166]) = IM(x[106]); + RE(y[167]) = RE(x[122]); IM(y[167]) = IM(x[122]); + RE(y[168]) = RE(x[138]); IM(y[168]) = IM(x[138]); + RE(y[169]) = RE(x[154]); IM(y[169]) = IM(x[154]); + RE(y[170]) = RE(x[170]); IM(y[170]) = IM(x[170]); + RE(y[171]) = RE(x[186]); IM(y[171]) = IM(x[186]); + RE(y[172]) = RE(x[202]); IM(y[172]) = IM(x[202]); + RE(y[173]) = RE(x[218]); IM(y[173]) = IM(x[218]); + RE(y[174]) = RE(x[234]); IM(y[174]) = IM(x[234]); + RE(y[175]) = RE(x[250]); IM(y[175]) = IM(x[250]); + RE(y[176]) = RE(x[ 11]); IM(y[176]) = IM(x[ 11]); + RE(y[177]) = RE(x[ 27]); IM(y[177]) = IM(x[ 27]); + RE(y[178]) = RE(x[ 43]); IM(y[178]) = IM(x[ 43]); + RE(y[179]) = RE(x[ 59]); IM(y[179]) = IM(x[ 59]); + RE(y[180]) = RE(x[ 75]); IM(y[180]) = IM(x[ 75]); + RE(y[181]) = RE(x[ 91]); IM(y[181]) = IM(x[ 91]); + RE(y[182]) = RE(x[107]); IM(y[182]) = IM(x[107]); + RE(y[183]) = RE(x[123]); IM(y[183]) = IM(x[123]); + RE(y[184]) = RE(x[139]); IM(y[184]) = IM(x[139]); + RE(y[185]) = RE(x[155]); IM(y[185]) = IM(x[155]); + RE(y[186]) = RE(x[171]); IM(y[186]) = IM(x[171]); + RE(y[187]) = RE(x[187]); IM(y[187]) = IM(x[187]); + RE(y[188]) = RE(x[203]); IM(y[188]) = IM(x[203]); + RE(y[189]) = RE(x[219]); IM(y[189]) = IM(x[219]); + RE(y[190]) = RE(x[235]); IM(y[190]) = IM(x[235]); + RE(y[191]) = RE(x[251]); IM(y[191]) = IM(x[251]); + RE(y[192]) = RE(x[ 12]); IM(y[192]) = IM(x[ 12]); + RE(y[193]) = RE(x[ 28]); IM(y[193]) = IM(x[ 28]); + RE(y[194]) = RE(x[ 44]); IM(y[194]) = IM(x[ 44]); + RE(y[195]) = RE(x[ 60]); IM(y[195]) = IM(x[ 60]); + RE(y[196]) = RE(x[ 76]); IM(y[196]) = IM(x[ 76]); + RE(y[197]) = RE(x[ 92]); IM(y[197]) = IM(x[ 92]); + RE(y[198]) = RE(x[108]); IM(y[198]) = IM(x[108]); + RE(y[199]) = RE(x[124]); IM(y[199]) = IM(x[124]); + RE(y[200]) = RE(x[140]); IM(y[200]) = IM(x[140]); + RE(y[201]) = RE(x[156]); IM(y[201]) = IM(x[156]); + RE(y[202]) = RE(x[172]); IM(y[202]) = IM(x[172]); + RE(y[203]) = RE(x[188]); IM(y[203]) = IM(x[188]); + RE(y[204]) = RE(x[204]); IM(y[204]) = IM(x[204]); + RE(y[205]) = RE(x[220]); IM(y[205]) = IM(x[220]); + RE(y[206]) = RE(x[236]); IM(y[206]) = IM(x[236]); + RE(y[207]) = RE(x[252]); IM(y[207]) = IM(x[252]); + RE(y[208]) = RE(x[ 13]); IM(y[208]) = IM(x[ 13]); + RE(y[209]) = RE(x[ 29]); IM(y[209]) = IM(x[ 29]); + RE(y[210]) = RE(x[ 45]); IM(y[210]) = IM(x[ 45]); + RE(y[211]) = RE(x[ 61]); IM(y[211]) = IM(x[ 61]); + RE(y[212]) = RE(x[ 77]); IM(y[212]) = IM(x[ 77]); + RE(y[213]) = RE(x[ 93]); IM(y[213]) = IM(x[ 93]); + RE(y[214]) = RE(x[109]); IM(y[214]) = IM(x[109]); + RE(y[215]) = RE(x[125]); IM(y[215]) = IM(x[125]); + RE(y[216]) = RE(x[141]); IM(y[216]) = IM(x[141]); + RE(y[217]) = RE(x[157]); IM(y[217]) = IM(x[157]); + RE(y[218]) = RE(x[173]); IM(y[218]) = IM(x[173]); + RE(y[219]) = RE(x[189]); IM(y[219]) = IM(x[189]); + RE(y[220]) = RE(x[205]); IM(y[220]) = IM(x[205]); + RE(y[221]) = RE(x[221]); IM(y[221]) = IM(x[221]); + RE(y[222]) = RE(x[237]); IM(y[222]) = IM(x[237]); + RE(y[223]) = RE(x[253]); IM(y[223]) = IM(x[253]); + RE(y[224]) = RE(x[ 14]); IM(y[224]) = IM(x[ 14]); + RE(y[225]) = RE(x[ 30]); IM(y[225]) = IM(x[ 30]); + RE(y[226]) = RE(x[ 46]); IM(y[226]) = IM(x[ 46]); + RE(y[227]) = RE(x[ 62]); IM(y[227]) = IM(x[ 62]); + RE(y[228]) = RE(x[ 78]); IM(y[228]) = IM(x[ 78]); + RE(y[229]) = RE(x[ 94]); IM(y[229]) = IM(x[ 94]); + RE(y[230]) = RE(x[110]); IM(y[230]) = IM(x[110]); + RE(y[231]) = RE(x[126]); IM(y[231]) = IM(x[126]); + RE(y[232]) = RE(x[142]); IM(y[232]) = IM(x[142]); + RE(y[233]) = RE(x[158]); IM(y[233]) = IM(x[158]); + RE(y[234]) = RE(x[174]); IM(y[234]) = IM(x[174]); + RE(y[235]) = RE(x[190]); IM(y[235]) = IM(x[190]); + RE(y[236]) = RE(x[206]); IM(y[236]) = IM(x[206]); + RE(y[237]) = RE(x[222]); IM(y[237]) = IM(x[222]); + RE(y[238]) = RE(x[238]); IM(y[238]) = IM(x[238]); + RE(y[239]) = RE(x[254]); IM(y[239]) = IM(x[254]); + RE(y[240]) = RE(x[ 15]); IM(y[240]) = IM(x[ 15]); + RE(y[241]) = RE(x[ 31]); IM(y[241]) = IM(x[ 31]); + RE(y[242]) = RE(x[ 47]); IM(y[242]) = IM(x[ 47]); + RE(y[243]) = RE(x[ 63]); IM(y[243]) = IM(x[ 63]); + RE(y[244]) = RE(x[ 79]); IM(y[244]) = IM(x[ 79]); + RE(y[245]) = RE(x[ 95]); IM(y[245]) = IM(x[ 95]); + RE(y[246]) = RE(x[111]); IM(y[246]) = IM(x[111]); + RE(y[247]) = RE(x[127]); IM(y[247]) = IM(x[127]); + RE(y[248]) = RE(x[143]); IM(y[248]) = IM(x[143]); + RE(y[249]) = RE(x[159]); IM(y[249]) = IM(x[159]); + RE(y[250]) = RE(x[175]); IM(y[250]) = IM(x[175]); + RE(y[251]) = RE(x[191]); IM(y[251]) = IM(x[191]); + RE(y[252]) = RE(x[207]); IM(y[252]) = IM(x[207]); + RE(y[253]) = RE(x[223]); IM(y[253]) = IM(x[223]); + RE(y[254]) = RE(x[239]); IM(y[254]) = IM(x[239]); + RE(y[255]) = RE(x[255]); IM(y[255]) = IM(x[255]); +} + + diff --git a/dspl/src/filter_ap.c b/dspl/src/filter_ap.c index d7a2a06..ba6ed1a 100644 --- a/dspl/src/filter_ap.c +++ b/dspl/src/filter_ap.c @@ -96,7 +96,6 @@ In addition, GNUPLOT will build the following graphs from data stored in files: \author Sergey Bakhurin www.dsplib.org ***************************************************************************** */ - #endif #ifdef DOXYGEN_RUSSIAN /*! **************************************************************************** @@ -211,11 +210,6 @@ exit_label: - - - - - #ifdef DOXYGEN_ENGLISH /*! **************************************************************************** \ingroup IIR_FILTER_DESIGN_GROUP diff --git a/include/dspl.h b/include/dspl.h index 298262b..045dc5f 100644 --- a/include/dspl.h +++ b/include/dspl.h @@ -118,7 +118,10 @@ Memory must be allocated by \ref fft_create function. \n\n Pointer to the vector of intermediate results. \n The size of the vector is `[n x 1]`. \n The memory must be allocated with the \ref fft_create function. \n\n -The structure is populated with the \ref fft_create function once + + + +The structure is calculated with the \ref fft_create function once before using the FFT algorithm. \n A pointer to an object of this structure may be reused when calling FFT functions. \n @@ -186,6 +189,32 @@ then the structure arrays will be automatically recreated for the length `n`. Указатель на вектор промежуточных вычислений алгоритма БПФ. \n Размер вектора `[n x 1]`. \n Память должна быть выделена функцией \ref fft_create. \n \n + +\param w32 +Статический вектор поворотных коэффициентов 32-точечного БПФ. \n \n + +\param w64 +Статический вектор поворотных коэффициентов 64-точечного БПФ. \n \n + +\param w128 +Статический вектор поворотных коэффициентов 128-точечного БПФ. \n \n + +\param w256 +Статический вектор поворотных коэффициентов 256-точечного БПФ. \n \n + +\param w512 +Статический вектор поворотных коэффициентов 512-точечного БПФ. \n \n + +\param w1024 +Статический вектор поворотных коэффициентов 1024-точечного БПФ. \n \n + +\param w2048 +Статический вектор поворотных коэффициентов 2048-точечного БПФ. \n \n + +\param w4096 +Статический вектор поворотных коэффициентов 4096-точечного БПФ. \n \n + + Структура заполняется функцией \ref fft_create один раз до использования алгоритма БПФ. \n Указатель на объект данной структуры может быть @@ -225,10 +254,19 @@ www.dsplib.org #endif typedef struct { - complex_t* w; - complex_t* t0; - complex_t* t1; - int n; + complex_t* w; + complex_t* t0; + complex_t* t1; + + complex_t w32[ 32]; + complex_t w64[ 64]; + complex_t w128[128]; + complex_t w256[256]; + complex_t w512[512]; + complex_t* w1024; + complex_t* w2048; + complex_t* w4096; + int n; } fft_t; diff --git a/performance/bin/octave/fft_cmplx_performance.m b/performance/bin/octave/fft_cmplx_performance.m index 9bab254..acaf5b9 100644 --- a/performance/bin/octave/fft_cmplx_performance.m +++ b/performance/bin/octave/fft_cmplx_performance.m @@ -23,49 +23,96 @@ for j = 1:21 s = s / 2; endfor +dspl_size = [2 + 4 + 8 + 16 + 32 + 64 + 128 + 256 + 512 + 1024 + 2048 + 4096 + 8192 + 16384 + 32768 + 65536 + 131072 + 262144 + 524288 + 1048576]; + -dspl = [1204.630392 -1283.970612 -1586.347958 -1707.107097 -1866.109831 -1837.307509 -2366.785829 -2302.925874 -2388.456514 -2113.451546 -3090.904615 -2979.596190 -2685.155556 -2053.760000 -3723.946667 -3195.618462 -2328.221538 -1786.533333 -7288.960000 -4646.700000 -2633.120000]; +dspl_mflops = [597.7 + 1946.0 + 4455.5 + 5446.3 + 4490.7 + 4288.5 + 3524.1 + 5286.9 + 3995.3 + 3657.6 + 2953.2 + 2078.0 + 2565.7 + 2615.5 + 2361.8 + 2376.4 + 2169.8 + 2285.5 + 2172.4 + 1896.4]; -python = [2390.741 -2597.527 -2841.191 -3066.652 -3092.187 -3444.710 -3633.320 -4333.845 -5316.897 -5201.486 -4608.231 -4481.357 -3876.925 -2961.753 -2435.427 -1344.871 - 606.953 - 298.559 - 120.772 - 50.369 - 17.033]; +python_size = [4194304 + 2097152 + 1048576 + 524288 + 262144 + 131072 + 65536 + 32768 + 16384 + 8192 + 4096 + 2048 + 1024 + 512 + 256 + 128 + 64 + 32 + 16 + 8 + 4 + 2]; + + + + +python_mflops = [2119.626 + 2147.070 + 2362.656 + 2351.777 + 2408.621 + 2678.743 + 3194.574 + 3978.322 + 5220.731 + 4671.613 + 4240.982 + 3585.080 + 3876.999 + 2556.301 + 2333.780 + 1301.660 + 606.947 + 294.469 + 127.103 + 37.574 + 15.669 + 4.110]; - plot(log2(size), mflops,log2(size), dspl, log2(size), python) \ No newline at end of file + plot(log2(size), mflops,log2(dspl_size), dspl_mflops, log2(python_size), python_mflops) \ No newline at end of file diff --git a/performance/src/fft_cmplx_performance.c b/performance/src/fft_cmplx_performance.c index 71a6a00..f00aff0 100644 --- a/performance/src/fft_cmplx_performance.c +++ b/performance/src/fft_cmplx_performance.c @@ -5,7 +5,7 @@ #include "dspl.h" #define NMAX 4194304 -#define L 18 +#define L 20 #define SIZE_FACTOR 2.3 @@ -87,10 +87,10 @@ int main(int argc, char* argv[]) hdspl = dspl_load(); /* Load DSPL function */ int len_r2[L] = {2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, - 8192, 16384, 32768, 65536, 131072, 262144}; + 8192, 16384, 32768, 65536, 131072, 262144, 524288, 1048576}; - int len_nr[L] = {6, 9, 12, 15, 18, 24, 36, 80, 108, 210, 504, 1000, - 1960, 4725, 10368, 27000, 75600, 165375}; + int len_nr[L] = {6, 9, 12, 15, 18, 24, 36, 80, 100, 108, 210, 504, 1000, + 1960, 4725, 8000, 10368, 27000, 75600, 165375}; int err; double mflops[L] = {0}; diff --git a/verification/bin/octave/fft_radix2_verification.m b/verification/bin/octave/fft_radix2_verification.m new file mode 100644 index 0000000..95b0e4d --- /dev/null +++ b/verification/bin/octave/fft_radix2_verification.m @@ -0,0 +1,44 @@ +clear all; close all; clc; +addpath('octave'); + + +fn_in = {'dat/x_fft_4.dat'; + 'dat/x_fft_8.dat'; + 'dat/x_fft_16.dat'; + 'dat/x_fft_32.dat'; + 'dat/x_fft_64.dat'; + 'dat/x_fft_128.dat'; + 'dat/x_fft_256.dat'; + 'dat/x_fft_512.dat'; + 'dat/x_fft_1024.dat'; + 'dat/x_fft_2048.dat'; + 'dat/x_fft_4096.dat'; + 'dat/x_fft_8192.dat'; + 'dat/x_fft_16384.dat'; + 'dat/x_fft_32768.dat'; + 'dat/x_fft_65536.dat'}; + +fn_out = {'dat/y_fft_4.dat'; + 'dat/y_fft_8.dat'; + 'dat/y_fft_16.dat'; + 'dat/y_fft_32.dat'; + 'dat/y_fft_64.dat'; + 'dat/y_fft_128.dat'; + 'dat/y_fft_256.dat'; + 'dat/y_fft_512.dat'; + 'dat/y_fft_1024.dat'; + 'dat/y_fft_2048.dat'; + 'dat/y_fft_4096.dat'; + 'dat/y_fft_8192.dat'; + 'dat/y_fft_16384.dat'; + 'dat/y_fft_32768.dat'; + 'dat/y_fft_65536.dat'}; + + +for i = 1:length(fn_in) + x = readbin(fn_in{i}); + y = fft(x); + writebin(y, 1, fn_out{i}); +end + + diff --git a/verification/src/fft_radix2_verification.c b/verification/src/fft_radix2_verification.c new file mode 100644 index 0000000..8faa8ea --- /dev/null +++ b/verification/src/fft_radix2_verification.c @@ -0,0 +1,156 @@ +#include +#include +#include +#include +#include "dspl.h" + +#define FFT_SIZE 65536 + + +int main(int argc, char* argv[]) +{ + void* hdspl; /* DSPL handle */ + fft_t pfft = {0}; + + int verr, nx, type; + double derr; + complex_t *yout = NULL; + complex_t *xc = NULL; + + hdspl = dspl_load(); /* Load DSPL function */ + + + verif_data_gen(4, DAT_COMPLEX, "dat/x_fft_4.dat"); + verif_data_gen(8, DAT_COMPLEX, "dat/x_fft_8.dat"); + verif_data_gen(16, DAT_COMPLEX, "dat/x_fft_16.dat"); + verif_data_gen(32, DAT_COMPLEX, "dat/x_fft_32.dat"); + verif_data_gen(64, DAT_COMPLEX, "dat/x_fft_64.dat"); + verif_data_gen(128, DAT_COMPLEX, "dat/x_fft_128.dat"); + verif_data_gen(256, DAT_COMPLEX, "dat/x_fft_256.dat"); + verif_data_gen(512, DAT_COMPLEX, "dat/x_fft_512.dat"); + verif_data_gen(1024, DAT_COMPLEX, "dat/x_fft_1024.dat"); + verif_data_gen(2048, DAT_COMPLEX, "dat/x_fft_2048.dat"); + verif_data_gen(4096, DAT_COMPLEX, "dat/x_fft_4096.dat"); + verif_data_gen(8192, DAT_COMPLEX, "dat/x_fft_8192.dat"); + verif_data_gen(16384, DAT_COMPLEX, "dat/x_fft_16384.dat"); + verif_data_gen(32768, DAT_COMPLEX, "dat/x_fft_32768.dat"); + verif_data_gen(65536, DAT_COMPLEX, "dat/x_fft_65536.dat"); + + yout = (complex_t*)malloc(FFT_SIZE * sizeof(complex_t )); + + system("octave octave/fft_radix2_verification.m"); + + + /*------------------------------------------------------------------------*/ + readbin("dat/x_fft_4.dat", (void**)(&xc), &nx, &type); + fft_cmplx(xc, 4, &pfft, yout); + verif_str_cmplx(yout, 4, "fft 4 for complex dat", + "dat/y_fft_4.dat", + "verification.log"); + /*------------------------------------------------------------------------*/ + readbin("dat/x_fft_8.dat", (void**)(&xc), &nx, &type); + fft_cmplx(xc, 8, &pfft, yout); + verif_str_cmplx(yout, 8, "fft 8 for complex dat", + "dat/y_fft_8.dat", + "verification.log"); + /*------------------------------------------------------------------------*/ + readbin("dat/x_fft_16.dat", (void**)(&xc), &nx, &type); + fft_cmplx(xc, 16, &pfft, yout); + verif_str_cmplx(yout, 16, "fft 16 for complex dat", + "dat/y_fft_16.dat", + "verification.log"); + + /*------------------------------------------------------------------------*/ + readbin("dat/x_fft_32.dat", (void**)(&xc), &nx, &type); + fft_cmplx(xc, 32, &pfft, yout); + verif_str_cmplx(yout, 32, "fft 32 for complex dat", + "dat/y_fft_32.dat", + "verification.log"); + + /*------------------------------------------------------------------------*/ + readbin("dat/x_fft_64.dat", (void**)(&xc), &nx, &type); + fft_cmplx(xc, 64, &pfft, yout); + verif_str_cmplx(yout, 64, "fft 64 for complex dat", + "dat/y_fft_64.dat", + "verification.log"); + /*------------------------------------------------------------------------*/ + readbin("dat/x_fft_128.dat", (void**)(&xc), &nx, &type); + fft_cmplx(xc, 128, &pfft, yout); + verif_str_cmplx(yout, 128, "fft 128 for complex dat", + "dat/y_fft_128.dat", + "verification.log"); + /*------------------------------------------------------------------------*/ + readbin("dat/x_fft_256.dat", (void**)(&xc), &nx, &type); + fft_cmplx(xc, 256, &pfft, yout); + verif_str_cmplx(yout, 256, "fft 256 for complex dat", + "dat/y_fft_256.dat", + "verification.log"); + + /*------------------------------------------------------------------------*/ + readbin("dat/x_fft_512.dat", (void**)(&xc), &nx, &type); + fft_cmplx(xc, 512, &pfft, yout); + verif_str_cmplx(yout, 512, "fft 512 for complex dat", + "dat/y_fft_512.dat", + "verification.log"); + + /*------------------------------------------------------------------------*/ + readbin("dat/x_fft_1024.dat", (void**)(&xc), &nx, &type); + fft_cmplx(xc, 1024, &pfft, yout); + verif_str_cmplx(yout, 1024, "fft 1024 for complex dat", + "dat/y_fft_1024.dat", + "verification.log"); + + /*------------------------------------------------------------------------*/ + readbin("dat/x_fft_2048.dat", (void**)(&xc), &nx, &type); + fft_cmplx(xc, 2048, &pfft, yout); + verif_str_cmplx(yout, 2048, "fft 2048 for complex dat", + "dat/y_fft_2048.dat", + "verification.log"); + + /*------------------------------------------------------------------------*/ + readbin("dat/x_fft_4096.dat", (void**)(&xc), &nx, &type); + fft_cmplx(xc, 4096, &pfft, yout); + verif_str_cmplx(yout, 4096, "fft 4096 for complex dat", + "dat/y_fft_4096.dat", + "verification.log"); + + /*------------------------------------------------------------------------*/ + readbin("dat/x_fft_8192.dat", (void**)(&xc), &nx, &type); + fft_cmplx(xc, 8192, &pfft, yout); + verif_str_cmplx(yout, 8192, "fft 8192 for complex dat", + "dat/y_fft_8192.dat", + "verification.log"); + + /*------------------------------------------------------------------------*/ + readbin("dat/x_fft_16384.dat", (void**)(&xc), &nx, &type); + fft_cmplx(xc, 16384, &pfft, yout); + verif_str_cmplx(yout, 16384, "fft 16384 for complex dat", + "dat/y_fft_16384.dat", + "verification.log"); + + /*------------------------------------------------------------------------*/ + readbin("dat/x_fft_32768.dat", (void**)(&xc), &nx, &type); + fft_cmplx(xc, 32768, &pfft, yout); + verif_str_cmplx(yout, 32768, "fft 32768 for complex dat", + "dat/y_fft_32768.dat", + "verification.log"); + + /*------------------------------------------------------------------------*/ + readbin("dat/x_fft_65536.dat", (void**)(&xc), &nx, &type); + fft_cmplx(xc, 65536, &pfft, yout); + verif_str_cmplx(yout, 65536, "fft 65536 for complex dat", + "dat/y_fft_65536.dat", + "verification.log"); + + + /* free dspl handle */ + dspl_free(hdspl); + + if(yout) + free(yout); + if(xc) + free(xc); + + return 0; +} +