improved FFT performance and added verificator for radix-2 fft from 4 points to 65536

Changes to be committed:
modified:   _release/dspl.c
modified:   _release/dspl.h
modified:   dspl/dox/doxyfile_en
modified:   dspl/dox/footer_en.html
modified:   dspl/dox/header_en.html
modified:   dspl/src/dspl_internal.h
modified:   dspl/src/fft.c
modified:   dspl/src/fft_subkernel.c
modified:   dspl/src/filter_ap.c
modified:   include/dspl.h
modified:   performance/bin/octave/fft_cmplx_performance.m
modified:   performance/src/fft_cmplx_performance.c
new file:   verification/bin/octave/fft_radix2_verification.m
new file:   verification/src/fft_radix2_verification.c
pull/6/merge
Dsplib 2020-11-10 19:06:20 +03:00
rodzic 3ed7cd21d4
commit d5586dfb9e
14 zmienionych plików z 1369 dodań i 146 usunięć

Wyświetl plik

@ -137,8 +137,10 @@ p_matrix_eig_cmplx matrix_eig_cmplx ;
p_matrix_eye matrix_eye ; p_matrix_eye matrix_eye ;
p_matrix_eye_cmplx matrix_eye_cmplx ; p_matrix_eye_cmplx matrix_eye_cmplx ;
p_matrix_mul matrix_mul ; p_matrix_mul matrix_mul ;
p_matrix_pinv matrix_pinv ;
p_matrix_print matrix_print ; p_matrix_print matrix_print ;
p_matrix_print_cmplx matrix_print_cmplx ; p_matrix_print_cmplx matrix_print_cmplx ;
p_matrix_svd matrix_svd ;
p_matrix_transpose matrix_transpose ; p_matrix_transpose matrix_transpose ;
p_matrix_transpose_cmplx matrix_transpose_cmplx ; p_matrix_transpose_cmplx matrix_transpose_cmplx ;
p_matrix_transpose_hermite matrix_transpose_hermite ; p_matrix_transpose_hermite matrix_transpose_hermite ;
@ -349,8 +351,10 @@ void* dspl_load()
LOAD_FUNC(matrix_eye); LOAD_FUNC(matrix_eye);
LOAD_FUNC(matrix_eye_cmplx); LOAD_FUNC(matrix_eye_cmplx);
LOAD_FUNC(matrix_mul); LOAD_FUNC(matrix_mul);
LOAD_FUNC(matrix_pinv);
LOAD_FUNC(matrix_print); LOAD_FUNC(matrix_print);
LOAD_FUNC(matrix_print_cmplx); LOAD_FUNC(matrix_print_cmplx);
LOAD_FUNC(matrix_svd);
LOAD_FUNC(matrix_transpose); LOAD_FUNC(matrix_transpose);
LOAD_FUNC(matrix_transpose_cmplx); LOAD_FUNC(matrix_transpose_cmplx);
LOAD_FUNC(matrix_transpose_hermite); LOAD_FUNC(matrix_transpose_hermite);
@ -440,8 +444,6 @@ void* dspl_load()
void dspl_free(void* handle) void dspl_free(void* handle)
{ {
#ifdef WIN_OS #ifdef WIN_OS

Wyświetl plik

@ -225,10 +225,19 @@ www.dsplib.org
#endif #endif
typedef struct typedef struct
{ {
complex_t* w; complex_t* w;
complex_t* t0; complex_t* t0;
complex_t* t1; complex_t* t1;
int n;
complex_t w32[ 32];
complex_t w64[ 64];
complex_t w128[128];
complex_t w256[256];
complex_t w512[512];
complex_t* w1024;
complex_t* w2048;
complex_t* w4096;
int n;
} fft_t; } fft_t;
@ -1216,6 +1225,13 @@ DECLARE_FUNC(int, matrix_mul, double* a
COMMA int mb COMMA int mb
COMMA double* c); COMMA double* c);
/*----------------------------------------------------------------------------*/ /*----------------------------------------------------------------------------*/
DECLARE_FUNC(int, matrix_pinv, double* a
COMMA int n
COMMA int m
COMMA double* tol
COMMA double* inv
COMMA int* info);
/*----------------------------------------------------------------------------*/
DECLARE_FUNC(int, matrix_print, double* a DECLARE_FUNC(int, matrix_print, double* a
COMMA int n COMMA int n
COMMA int m COMMA int m
@ -1228,6 +1244,14 @@ DECLARE_FUNC(int, matrix_print_cmplx, complex_t* a
COMMA const char* name COMMA const char* name
COMMA const char* format); COMMA const char* format);
/*----------------------------------------------------------------------------*/ /*----------------------------------------------------------------------------*/
DECLARE_FUNC(int, matrix_svd, double* a
COMMA int n
COMMA int m
COMMA double* u
COMMA double* s
COMMA double* vt
COMMA int* info);
/*----------------------------------------------------------------------------*/
DECLARE_FUNC(int, matrix_transpose, double* a DECLARE_FUNC(int, matrix_transpose, double* a
COMMA int n COMMA int n
COMMA int m COMMA int m
@ -1563,49 +1587,6 @@ DECLARE_FUNC(int, xcorr_cmplx, complex_t* x
#endif #endif
#ifdef DOXYGEN_ENGLISH
/*! ****************************************************************************
\ingroup SYS_LOADING_GROUP
\fn void dspl_free(void* handle)
\brief Cleans up the previously linked DSPL-2.0 dynamic library.
This cross-platform function clears the library `libdspl.dll` in
Windows system and from the library `libdspl.so` on the Linux system.
After cleaning the library, all functions will become unavailable.
\param [in] handle
Handle of the previously linked DSPL-2.0 library. \n
This pointer can be `NULL`, in this case no action
are being produced.
\author Bakhurin Sergey. www.dsplib.org
***************************************************************************** */
#endif
#ifdef DOXYGEN_RUSSIAN
/*! ****************************************************************************
\ingroup SYS_LOADING_GROUP
\fn void dspl_free(void* handle)
\brief Очищает связанную ранее динамическую библиотеку DSPL-2.0.
Данная кроссплатформенная функция производит очистку библиотеки `libdspl.dll` в
системе Windows и с библиотеки `libdspl.so` в системе Linux.
После очистки библиотеки все функции станут недоступны.
\param[in] handle
Хэндл прилинкованной ранее библиотеки DSPL-2.0. \n
Данный указатель может быть `NULL`, в этом случае никакие действия не
производятся.\n\n
\author Бахурин Сергей. www.dsplib.org
**************************************************************************** */
#endif
void* dspl_load();
#ifdef DOXYGEN_ENGLISH #ifdef DOXYGEN_ENGLISH
/*! **************************************************************************** /*! ****************************************************************************
\ingroup SYS_LOADING_GROUP \ingroup SYS_LOADING_GROUP
@ -1694,7 +1675,7 @@ int main(int argc, char* argv[])
void* hdspl; // DSPL хэндл void* hdspl; // DSPL хэндл
hdspl = dspl_load(); // Динамическая линковка hdspl = dspl_load(); // Динамическая линковка
// Проверяем указатель. Если `NULLL`, то линковка прошла неудачно // Проверяем указатель. Если `NULL`, то линковка прошла неудачно
if(!hdspl) if(!hdspl)
{ {
printf("libdspl loading error!\n"); printf("libdspl loading error!\n");
@ -1714,6 +1695,48 @@ int main(int argc, char* argv[])
\author Бахурин Сергей. www.dsplib.org \author Бахурин Сергей. www.dsplib.org
***************************************************************************** */ ***************************************************************************** */
#endif #endif
void* dspl_load();
#ifdef DOXYGEN_ENGLISH
/*! ****************************************************************************
\ingroup SYS_LOADING_GROUP
\fn void dspl_free(void* handle)
\brief Cleans up the previously linked DSPL-2.0 dynamic library.
This cross-platform function clears the library `libdspl.dll` in
Windows system and from the library `libdspl.so` on the Linux system.
After cleaning the library, all functions will become unavailable.
\param [in] handle
Handle of the previously linked DSPL-2.0 library. \n
This pointer can be `NULL`, in this case no action
are being produced.
\author Bakhurin Sergey. www.dsplib.org
***************************************************************************** */
#endif
#ifdef DOXYGEN_RUSSIAN
/*! ****************************************************************************
\ingroup SYS_LOADING_GROUP
\fn void dspl_free(void* handle)
\brief Очищает связанную ранее динамическую библиотеку DSPL-2.0.
Данная кроссплатформенная функция производит очистку библиотеки `libdspl.dll` в
системе Windows и с библиотеки `libdspl.so` в системе Linux.
После очистки библиотеки все функции станут недоступны.
\param[in] handle
Хэндл прилинкованной ранее библиотеки DSPL-2.0. \n
Данный указатель может быть `NULL`, в этом случае никакие действия не
производятся.\n\n
\author Бахурин Сергей. www.dsplib.org
**************************************************************************** */
#endif
void dspl_free(void* handle); void dspl_free(void* handle);

Wyświetl plik

@ -1,4 +1,4 @@
# Doxyfile 1.8.18 # Doxyfile 1.8.20
# This file describes the settings to be used by the documentation system # This file describes the settings to be used by the documentation system
# doxygen (www.doxygen.org) for a project. # doxygen (www.doxygen.org) for a project.
@ -227,6 +227,14 @@ QT_AUTOBRIEF = NO
MULTILINE_CPP_IS_BRIEF = NO MULTILINE_CPP_IS_BRIEF = NO
# By default Python docstrings are displayed as preformatted text and doxygen's
# special commands cannot be used. By setting PYTHON_DOCSTRING to NO the
# doxygen's special commands can be used and the contents of the docstring
# documentation blocks is shown as doxygen documentation.
# The default value is: YES.
PYTHON_DOCSTRING = YES
# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the # If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the
# documentation from any documented member that it re-implements. # documentation from any documented member that it re-implements.
# The default value is: YES. # The default value is: YES.
@ -449,6 +457,19 @@ TYPEDEF_HIDES_STRUCT = NO
LOOKUP_CACHE_SIZE = 0 LOOKUP_CACHE_SIZE = 0
# The NUM_PROC_THREADS specifies the number threads doxygen is allowed to use
# during processing. When set to 0 doxygen will based this on the number of
# cores available in the system. You can set it explicitly to a value larger
# than 0 to get more control over the balance between CPU load and processing
# speed. At this moment only the input processing can be done using multiple
# threads. Since this is still an experimental feature the default is set to 1,
# which efficively disables parallel processing. Please report any issues you
# encounter. Generating dot graphs in parallel is controlled by the
# DOT_NUM_THREADS setting.
# Minimum value: 0, maximum value: 32, default value: 1.
NUM_PROC_THREADS = 1
#--------------------------------------------------------------------------- #---------------------------------------------------------------------------
# Build related configuration options # Build related configuration options
#--------------------------------------------------------------------------- #---------------------------------------------------------------------------
@ -553,7 +574,7 @@ INTERNAL_DOCS = NO
# names in lower-case letters. If set to YES, upper-case letters are also # names in lower-case letters. If set to YES, upper-case letters are also
# allowed. This is useful if you have classes or files whose names only differ # allowed. This is useful if you have classes or files whose names only differ
# in case and if your file system supports case sensitive file names. Windows # in case and if your file system supports case sensitive file names. Windows
# (including Cygwin) ands Mac users are advised to set this option to NO. # (including Cygwin) and Mac users are advised to set this option to NO.
# The default value is: system dependent. # The default value is: system dependent.
CASE_SENSE_NAMES = NO CASE_SENSE_NAMES = NO
@ -1132,10 +1153,13 @@ CLANG_ASSISTED_PARSING = NO
CLANG_OPTIONS = CLANG_OPTIONS =
# If clang assisted parsing is enabled you can provide the clang parser with the # If clang assisted parsing is enabled you can provide the clang parser with the
# path to the compilation database (see: # path to the directory containing a file called compile_commands.json. This
# http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html) used when the files # file is the compilation database (see:
# were built. This is equivalent to specifying the "-p" option to a clang tool, # http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html) containing the
# such as clang-check. These options will then be passed to the parser. # options used when the source files were built. This is equivalent to
# specifying the "-p" option to a clang tool, such as clang-check. These options
# will then be passed to the parser. Any options specified with CLANG_OPTIONS
# will be added as well.
# Note: The availability of this option depends on whether or not doxygen was # Note: The availability of this option depends on whether or not doxygen was
# generated with the -Duse_libclang=ON option for CMake. # generated with the -Duse_libclang=ON option for CMake.
@ -1405,7 +1429,7 @@ CHM_FILE =
HHC_LOCATION = HHC_LOCATION =
# The GENERATE_CHI flag controls if a separate .chi index file is generated # The GENERATE_CHI flag controls if a separate .chi index file is generated
# (YES) or that it should be included in the master .chm file (NO). # (YES) or that it should be included in the main .chm file (NO).
# The default value is: NO. # The default value is: NO.
# This tag requires that the tag GENERATE_HTMLHELP is set to YES. # This tag requires that the tag GENERATE_HTMLHELP is set to YES.
@ -1571,8 +1595,8 @@ EXT_LINKS_IN_WINDOW = NO
# tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see # tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see
# https://inkscape.org) to generate formulas as SVG images instead of PNGs for # https://inkscape.org) to generate formulas as SVG images instead of PNGs for
# the HTML output. These images will generally look nicer at scaled resolutions. # the HTML output. These images will generally look nicer at scaled resolutions.
# Possible values are: png The default and svg Looks nicer but requires the # Possible values are: png (the default) and svg (looks nicer but requires the
# pdf2svg tool. # pdf2svg or inkscape tool).
# The default value is: png. # The default value is: png.
# This tag requires that the tag GENERATE_HTML is set to YES. # This tag requires that the tag GENERATE_HTML is set to YES.
@ -1613,7 +1637,7 @@ FORMULA_MACROFILE =
# The default value is: NO. # The default value is: NO.
# This tag requires that the tag GENERATE_HTML is set to YES. # This tag requires that the tag GENERATE_HTML is set to YES.
USE_MATHJAX = NO USE_MATHJAX = YES
# When MathJax is enabled you can set the default output format to be used for # When MathJax is enabled you can set the default output format to be used for
# the MathJax output. See the MathJax site (see: # the MathJax output. See the MathJax site (see:
@ -1623,7 +1647,7 @@ USE_MATHJAX = NO
# The default value is: HTML-CSS. # The default value is: HTML-CSS.
# This tag requires that the tag USE_MATHJAX is set to YES. # This tag requires that the tag USE_MATHJAX is set to YES.
MATHJAX_FORMAT = SVG MATHJAX_FORMAT = HTML-CSS
# When MathJax is enabled you need to specify the location relative to the HTML # When MathJax is enabled you need to specify the location relative to the HTML
# output directory using the MATHJAX_RELPATH option. The destination directory # output directory using the MATHJAX_RELPATH option. The destination directory
@ -1636,7 +1660,7 @@ MATHJAX_FORMAT = SVG
# The default value is: https://cdn.jsdelivr.net/npm/mathjax@2. # The default value is: https://cdn.jsdelivr.net/npm/mathjax@2.
# This tag requires that the tag USE_MATHJAX is set to YES. # This tag requires that the tag USE_MATHJAX is set to YES.
MATHJAX_RELPATH = http://dsplib.org/mathjax/latest MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest
# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax # The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax
# extension names that should be enabled during MathJax rendering. For example # extension names that should be enabled during MathJax rendering. For example
@ -1872,9 +1896,11 @@ LATEX_EXTRA_FILES =
PDF_HYPERLINKS = YES PDF_HYPERLINKS = YES
# If the USE_PDFLATEX tag is set to YES, doxygen will use pdflatex to generate # If the USE_PDFLATEX tag is set to YES, doxygen will use the engine as
# the PDF file directly from the LaTeX files. Set this option to YES, to get a # specified with LATEX_CMD_NAME to generate the PDF file directly from the LaTeX
# higher quality PDF documentation. # files. Set this option to YES, to get a higher quality PDF documentation.
#
# See also section LATEX_CMD_NAME for selecting the engine.
# The default value is: YES. # The default value is: YES.
# This tag requires that the tag GENERATE_LATEX is set to YES. # This tag requires that the tag GENERATE_LATEX is set to YES.
@ -2113,6 +2139,10 @@ DOCBOOK_PROGRAMLISTING = NO
GENERATE_AUTOGEN_DEF = NO GENERATE_AUTOGEN_DEF = NO
#---------------------------------------------------------------------------
# Configuration options related to Sqlite3 output
#---------------------------------------------------------------------------
#--------------------------------------------------------------------------- #---------------------------------------------------------------------------
# Configuration options related to the Perl module output # Configuration options related to the Perl module output
#--------------------------------------------------------------------------- #---------------------------------------------------------------------------

Wyświetl plik

@ -27,7 +27,12 @@ $generatedby &#160;<a href="http://www.doxygen.org/index.html">
<a href="http://en.dsplib.org/forum">Forum</a> <a href="http://en.dsplib.org/forum">Forum</a>
</li> </li>
<div style = "float: right;">
<!--LiveInternet logo--><a href="//www.liveinternet.ru/click"
target="_blank"><img src="//counter.yadro.ru/logo?16.2"
title="LiveInternet: Shows the number of views in 24 hours, visitors in 24 hours and today"
alt="" style="border:0" width="88" height="31"/></a><!--/LiveInternet-->
</div>
</ul> </ul>
<HR> <HR>

Wyświetl plik

@ -102,7 +102,7 @@ screen.colorDepth:screen.pixelDepth))+";u"+escape(document.URL)+
<li><a href="http://en.dsplib.org/dspl">DSPL&ndash;2.0</a></li> <li><a href="http://en.dsplib.org/dspl">DSPL&ndash;2.0</a></li>
<li><a href="http://en.dsplib.org/forum">Forum</a></li> <li><a href="http://en.dsplib.org/forum">Forum</a></li>
<li class = "lang-link"> <li class = "lang-link">
<div class = "lang-link-en"> <a href="http://ru.dsplib.org/dspl">Русский</a> </div> <div class = "lang-link-en"> <a href="https://ru.dsplib.org/dspl">Русский</a> </div>
</li> </li>
</ul> </ul>
</nav> </nav>

Wyświetl plik

@ -30,16 +30,22 @@
/* sqrt(2^31) */ /* sqrt(2^31) */
#define FFT_COMPOSITE_MAX 46340 #define FFT_COMPOSITE_MAX 46340
/* FFT kernel */
int fft_krn(complex_t* t0, complex_t* t1, fft_t* p, int n, int addr); int fft_krn(complex_t* t0, complex_t* t1, fft_t* p, int n, int addr);
/* DFT 2 points */
void dft2 (complex_t *x, complex_t* y); void dft2 (complex_t *x, complex_t* y);
/* DFT 3 points */
#define DFT3_W 0.866025403784439 #define DFT3_W 0.866025403784439
void dft3 (complex_t *x, complex_t* y); void dft3 (complex_t *x, complex_t* y);
/* DFT 4 points */
void dft4 (complex_t *x, complex_t* y); void dft4 (complex_t *x, complex_t* y);
/* DFT 5 points */
#define DFT5_W1 -1.250000000000000 #define DFT5_W1 -1.250000000000000
#define DFT5_W2 0.559016994374947 #define DFT5_W2 0.559016994374947
#define DFT5_W3 1.538841768587630 #define DFT5_W3 1.538841768587630
@ -47,6 +53,8 @@ void dft4 (complex_t *x, complex_t* y);
#define DFT5_W5 0.363271264002680 #define DFT5_W5 0.363271264002680
void dft5 (complex_t *x, complex_t* y); void dft5 (complex_t *x, complex_t* y);
/* DFT 7 points */
#define DFT7_W1 -1.166666666666666518636930 #define DFT7_W1 -1.166666666666666518636930
#define DFT7_W2 0.790156468525400224045541 #define DFT7_W2 0.790156468525400224045541
#define DFT7_W3 0.055854267289647742400494 #define DFT7_W3 0.055854267289647742400494
@ -57,18 +65,49 @@ void dft5 (complex_t *x, complex_t* y);
#define DFT7_W8 -0.874842290961656665615465 #define DFT7_W8 -0.874842290961656665615465
void dft7 (complex_t *x, complex_t* y); void dft7 (complex_t *x, complex_t* y);
/* DFT 8 points */
#define DFT8_W 0.707106781186548 #define DFT8_W 0.707106781186548
void dft8 (complex_t *x, complex_t* y); void dft8 (complex_t *x, complex_t* y);
void transpose2x4(complex_t *x, complex_t* y); void transpose2x4(complex_t *x, complex_t* y);
void transpose4x2(complex_t *x, complex_t* y); void transpose4x2(complex_t *x, complex_t* y);
/* DFT 16 points */
#define DFT16_W1 0.923879532511287 #define DFT16_W1 0.923879532511287
#define DFT16_W2 0.382683432365090 #define DFT16_W2 0.382683432365090
#define DFT16_W3 0.707106781186548 #define DFT16_W3 0.707106781186548
void dft16 (complex_t *x, complex_t* y); void dft16 (complex_t *x, complex_t* y);
void transpose4x4(complex_t *x, complex_t* y); void transpose4x4(complex_t *x, complex_t* y);
/* DFT 32 points */
void dft32(complex_t *x, complex_t* y, complex_t* w);
void transpose8x4(complex_t *x, complex_t* y);
void transpose4x8(complex_t *x, complex_t* y);
/* DFT 64 points */
void dft64(complex_t *x, complex_t* y, complex_t* w);
void transpose8x8(complex_t *x, complex_t* y);
/* DFT 128 points */
void dft128(complex_t *x, complex_t* y, complex_t* w);
/* DFT 256 points */
void dft256(complex_t *x, complex_t* y, complex_t* w);
void transpose16x16(complex_t* x, complex_t* y);
/* DFT 512 points */
void dft512(complex_t *x, complex_t* y, complex_t* w, complex_t* w32);
/* DFT 1024 points */
void dft1024(complex_t *x, complex_t* y, complex_t* w, complex_t* w32);
/* DFT 2048 points */
void dft2048(complex_t *x, complex_t* y, complex_t* w,
complex_t* w32, complex_t* w64);
/* DFT 4096 points */
void dft4096(complex_t *x, complex_t* y, complex_t* w, complex_t* w256);
/* Window functions */ /* Window functions */
int win_bartlett (double *w, int n, int win_type); int win_bartlett (double *w, int n, int win_type);

Wyświetl plik

@ -675,13 +675,21 @@ int fft_krn(complex_t* t0, complex_t* t1, fft_t* p, int n, int addr)
complex_t tmp; complex_t tmp;
n1 = 1; n1 = 1;
if(n%16== 0) { n1 = 16; goto label_size; } if(n % 4096 == 0) { n1 = 4096; goto label_size; }
if(n%7 == 0) { n1 = 7; goto label_size; } if(n % 2048 == 0) { n1 = 2048; goto label_size; }
if(n%8 == 0) { n1 = 8; goto label_size; } if(n % 1024 == 0) { n1 = 1024; goto label_size; }
if(n%5 == 0) { n1 = 5; goto label_size; } if(n % 512 == 0) { n1 = 512; goto label_size; }
if(n%4 == 0) { n1 = 4; goto label_size; } if(n % 256 == 0) { n1 = 256; goto label_size; }
if(n%3 == 0) { n1 = 3; goto label_size; } if(n % 128 == 0) { n1 = 128; goto label_size; }
if(n%2 == 0) { n1 = 2; goto label_size; } if(n % 64 == 0) { n1 = 64; goto label_size; }
if(n % 32 == 0) { n1 = 32; goto label_size; }
if(n % 16 == 0) { n1 = 16; goto label_size; }
if(n % 7 == 0) { n1 = 7; goto label_size; }
if(n % 8 == 0) { n1 = 8; goto label_size; }
if(n % 5 == 0) { n1 = 5; goto label_size; }
if(n % 4 == 0) { n1 = 4; goto label_size; }
if(n % 3 == 0) { n1 = 3; goto label_size; }
if(n % 2 == 0) { n1 = 2; goto label_size; }
label_size: label_size:
if(n1 == 1) if(n1 == 1)
@ -709,6 +717,38 @@ label_size:
matrix_transpose_cmplx(t1, n2, n1, t0); matrix_transpose_cmplx(t1, n2, n1, t0);
} }
if(n1 == 4096)
for(k = 0; k < n2; k++)
dft4096(t0+4096*k, t1+4096*k, p->w4096, p->w256);
if(n1 == 2048)
for(k = 0; k < n2; k++)
dft2048(t0+2048*k, t1+2048*k, p->w2048, p->w32, p->w64);
if(n1 == 1024)
for(k = 0; k < n2; k++)
dft1024(t0+1024*k, t1+1024*k, p->w1024, p->w32);
if(n1 == 512)
for(k = 0; k < n2; k++)
dft512(t0+512*k, t1+512*k, p->w512, p->w32);
if(n1 == 256)
for(k = 0; k < n2; k++)
dft256(t0+256*k, t1+256*k, p->w256);
if(n1 == 128)
for(k = 0; k < n2; k++)
dft128(t0+128*k, t1+128*k, p->w128);
if(n1 == 64)
for(k = 0; k < n2; k++)
dft64(t0+64*k, t1+64*k, p->w64);
if(n1 == 32)
for(k = 0; k < n2; k++)
dft32(t0+32*k, t1+32*k, p->w32);
if(n1 == 16) if(n1 == 16)
for(k = 0; k < n2; k++) for(k = 0; k < n2; k++)
dft16(t0+16*k, t1+16*k); dft16(t0+16*k, t1+16*k);
@ -752,6 +792,7 @@ label_size:
{ {
fft_krn(t1+k*n2, t0+k*n2, p, n2, addr+n); fft_krn(t1+k*n2, t0+k*n2, p, n2, addr+n);
} }
matrix_transpose_cmplx(t0, n2, n1, t1); matrix_transpose_cmplx(t0, n2, n1, t1);
} }
} }
@ -903,13 +944,21 @@ int DSPL_API fft_create(fft_t* pfft, int n)
while(s > 1) while(s > 1)
{ {
n2 = 1; n2 = 1;
if(s%16== 0) { n2 = 16; goto label_size; } if(s%4096 == 0) { n2 = 4096; goto label_size; }
if(s%7 == 0) { n2 = 7; goto label_size; } if(s%2048 == 0) { n2 = 2048; goto label_size; }
if(s%8 == 0) { n2 = 8; goto label_size; } if(s%1024 == 0) { n2 = 1024; goto label_size; }
if(s%5 == 0) { n2 = 5; goto label_size; } if(s%512 == 0) { n2 = 512; goto label_size; }
if(s%4 == 0) { n2 = 4; goto label_size; } if(s%256 == 0) { n2 = 256; goto label_size; }
if(s%3 == 0) { n2 = 3; goto label_size; } if(s%128 == 0) { n2 = 128; goto label_size; }
if(s%2 == 0) { n2 = 2; goto label_size; } if(s% 64 == 0) { n2 = 64; goto label_size; }
if(s% 32 == 0) { n2 = 32; goto label_size; }
if(s% 16 == 0) { n2 = 16; goto label_size; }
if(s% 7 == 0) { n2 = 7; goto label_size; }
if(s% 8 == 0) { n2 = 8; goto label_size; }
if(s% 5 == 0) { n2 = 5; goto label_size; }
if(s% 4 == 0) { n2 = 4; goto label_size; }
if(s% 3 == 0) { n2 = 3; goto label_size; }
if(s% 2 == 0) { n2 = 2; goto label_size; }
label_size: label_size:
@ -962,6 +1011,123 @@ label_size:
pfft->t1 = pfft->t1 ? (complex_t*) realloc(pfft->t1, n*sizeof(complex_t)): pfft->t1 = pfft->t1 ? (complex_t*) realloc(pfft->t1, n*sizeof(complex_t)):
(complex_t*) malloc( n*sizeof(complex_t)); (complex_t*) malloc( n*sizeof(complex_t));
pfft->n = n; pfft->n = n;
/* w32 fill */
addr = 0;
for(k = 0; k < 4; k++)
{
for(m = 0; m < 8; m++)
{
phi = - M_2PI * (double)(k*m) / 32.0;
RE(pfft->w32[addr]) = cos(phi);
IM(pfft->w32[addr]) = sin(phi);
addr++;
}
}
/* w64 fill */
addr = 0;
for(k = 0; k < 8; k++)
{
for(m = 0; m < 8; m++)
{
phi = - M_2PI * (double)(k*m) / 64.0;
RE(pfft->w64[addr]) = cos(phi);
IM(pfft->w64[addr]) = sin(phi);
addr++;
}
}
/* w128 fill */
addr = 0;
for(k = 0; k < 8; k++)
{
for(m = 0; m < 16; m++)
{
phi = - M_2PI * (double)(k*m) / 128.0;
RE(pfft->w128[addr]) = cos(phi);
IM(pfft->w128[addr]) = sin(phi);
addr++;
}
}
/* w256 fill */
addr = 0;
for(k = 0; k < 16; k++)
{
for(m = 0; m < 16; m++)
{
phi = - M_2PI * (double)(k*m) / 256.0;
RE(pfft->w256[addr]) = cos(phi);
IM(pfft->w256[addr]) = sin(phi);
addr++;
}
}
/* w512 fill */
addr = 0;
for(k = 0; k < 16; k++)
{
for(m = 0; m < 32; m++)
{
phi = - M_2PI * (double)(k*m) / 512.0;
RE(pfft->w512[addr]) = cos(phi);
IM(pfft->w512[addr]) = sin(phi);
addr++;
}
}
/* w1024 fill */
if(pfft->w1024 == NULL)
{
pfft->w1024 = (complex_t*) malloc(1024 * sizeof(complex_t));
addr = 0;
for(k = 0; k < 32; k++)
{
for(m = 0; m < 32; m++)
{
phi = - M_2PI * (double)(k*m) / 1024.0;
RE(pfft->w1024[addr]) = cos(phi);
IM(pfft->w1024[addr]) = sin(phi);
addr++;
}
}
}
/* w2048 fill */
if(pfft->w2048 == NULL)
{
pfft->w2048= (complex_t*) malloc(2048 * sizeof(complex_t));
addr = 0;
for(k = 0; k < 32; k++)
{
for(m = 0; m < 64; m++)
{
phi = - M_2PI * (double)(k*m) / 2048.0;
RE(pfft->w2048[addr]) = cos(phi);
IM(pfft->w2048[addr]) = sin(phi);
addr++;
}
}
}
/* w4096 fill */
if(pfft->w4096 == NULL)
{
pfft->w4096= (complex_t*) malloc(4096 * sizeof(complex_t));
addr = 0;
for(k = 0; k < 16; k++)
{
for(m = 0; m < 256; m++)
{
phi = - M_2PI * (double)(k*m) / 4096.0;
RE(pfft->w4096[addr]) = cos(phi);
IM(pfft->w4096[addr]) = sin(phi);
addr++;
}
}
}
return RES_OK; return RES_OK;
error_proc: error_proc:
@ -1016,6 +1182,16 @@ void DSPL_API fft_free(fft_t *pfft)
free(pfft->t0); free(pfft->t0);
if(pfft->t1) if(pfft->t1)
free(pfft->t1); free(pfft->t1);
if(pfft->w1024)
free(pfft->w1024);
if(pfft->w2048)
free(pfft->w2048);
if(pfft->w4096)
free(pfft->w4096);
memset(pfft, 0, sizeof(fft_t)); memset(pfft, 0, sizeof(fft_t));
} }

Wyświetl plik

@ -26,10 +26,6 @@
/******************************************************************************* /*******************************************************************************
2 points DFT 2 points DFT
*******************************************************************************/ *******************************************************************************/
@ -319,7 +315,6 @@ void dft7 (complex_t *x, complex_t* y)
RE(y[6]) = RE(sum[20]) - RE(sum[26]); RE(y[6]) = RE(sum[20]) - RE(sum[26]);
IM(y[6]) = IM(sum[20]) - IM(sum[26]); IM(y[6]) = IM(sum[20]) - IM(sum[26]);
} }
@ -455,6 +450,263 @@ void dft16(complex_t *x, complex_t* y)
} }
/*******************************************************************************
32 points DFT (Winograd algorithm)
*******************************************************************************/
void dft32(complex_t *x, complex_t* y, complex_t* w)
{
complex_t t0[32];
complex_t t1[32];
int i;
transpose4x8(x, t0);
dft8(t0, t1);
dft8(t0+8, t1+8);
dft8(t0+16, t1+16);
dft8(t0+24, t1+24);
for(i = 0; i < 32; i++)
{
RE(t0[i]) = CMRE(t1[i], w[i]);
IM(t0[i]) = CMIM(t1[i], w[i]);
}
transpose8x4(t0, t1);
for(i = 0; i < 8; i++)
dft4(t1 + i*4, t0 + i*4);
transpose4x8(t0, y);
}
/*******************************************************************************
64 points DFT (Winograd algorithm)
*******************************************************************************/
void dft64(complex_t *x, complex_t* y, complex_t* w)
{
complex_t t0[64];
complex_t t1[64];
int i;
transpose8x8(x, t0);
for(i = 0; i < 8; i++)
dft8(t0 + i*8, t1 + i*8);
for(i = 0; i < 64; i++)
{
RE(t0[i]) = CMRE(t1[i], w[i]);
IM(t0[i]) = CMIM(t1[i], w[i]);
}
transpose8x8(t0, t1);
for(i = 0; i < 8; i++)
dft8(t1 + i*8, t0 + i*8);
transpose8x8(t0, y);
}
/*******************************************************************************
256 points DFT (Winograd algorithm)
*******************************************************************************/
void dft128(complex_t *x, complex_t* y, complex_t* w)
{
complex_t t0[128];
complex_t t1[128];
int i;
matrix_transpose_cmplx(x,8,16,t0);
for(i = 0; i < 8; i++)
dft16(t0 + i*16, t1 + i*16);
for(i = 0; i < 128; i++)
{
RE(t0[i]) = CMRE(t1[i], w[i]);
IM(t0[i]) = CMIM(t1[i], w[i]);
}
matrix_transpose_cmplx(t0, 16, 8, t1);
for(i = 0; i < 16; i++)
dft8(t1 + i*8, t0 + i*8);
matrix_transpose_cmplx(t0, 8, 16, y);
}
/*******************************************************************************
256 points DFT (Winograd algorithm)
*******************************************************************************/
void dft256(complex_t *x, complex_t* y, complex_t* w)
{
complex_t t0[256];
complex_t t1[256];
int i;
transpose16x16(x, t0);
for(i = 0; i < 16; i++)
dft16(t0 + i*16, t1 + i*16);
for(i = 0; i < 256; i++)
{
RE(t0[i]) = CMRE(t1[i], w[i]);
IM(t0[i]) = CMIM(t1[i], w[i]);
}
transpose16x16(t0, t1);
for(i = 0; i < 16; i++)
dft16(t1 + i*16, t0 + i*16);
transpose16x16(t0, y);
}
/*******************************************************************************
512 points DFT (Winograd algorithm)
*******************************************************************************/
void dft512(complex_t *x, complex_t* y, complex_t* w, complex_t* w32)
{
complex_t t0[512];
complex_t t1[512];
int i;
matrix_transpose_cmplx(x,16,32,t0);
for(i = 0; i < 16; i++)
dft32(t0 + i*32, t1 + i*32, w32);
for(i = 0; i < 512; i++)
{
RE(t0[i]) = CMRE(t1[i], w[i]);
IM(t0[i]) = CMIM(t1[i], w[i]);
}
matrix_transpose_cmplx(t0, 32, 16, t1);
for(i = 0; i < 32; i++)
dft16(t1 + i*16, t0 + i*16);
matrix_transpose_cmplx(t0, 16, 32, y);
}
/*******************************************************************************
1024 points DFT (Winograd algorithm)
*******************************************************************************/
void dft1024(complex_t *x, complex_t* y, complex_t* w, complex_t* w32)
{
complex_t t0[1024];
complex_t t1[1024];
int i;
matrix_transpose_cmplx(x,32,32,t0);
for(i = 0; i < 32; i++)
dft32(t0 + i*32, t1 + i*32, w32);
for(i = 0; i < 1024; i++)
{
RE(t0[i]) = CMRE(t1[i], w[i]);
IM(t0[i]) = CMIM(t1[i], w[i]);
}
matrix_transpose_cmplx(t0, 32, 32, t1);
for(i = 0; i < 32; i++)
dft32(t1 + i*32, t0 + i*32, w32);
matrix_transpose_cmplx(t0, 32, 32, y);
}
/*******************************************************************************
2048 points DFT (Winograd algorithm)
*******************************************************************************/
void dft2048(complex_t *x, complex_t* y, complex_t* w,
complex_t* w32, complex_t* w64)
{
complex_t *t0 = NULL;
complex_t *t1 = NULL;
int i;
t0 = (complex_t*)malloc(2048*sizeof(complex_t));
t1 = (complex_t*)malloc(2048*sizeof(complex_t));
matrix_transpose_cmplx(x,32,64,t0);
for(i = 0; i < 32; i++)
dft64(t0 + i*64, t1 + i*64, w64);
for(i = 0; i < 2048; i++)
{
RE(t0[i]) = CMRE(t1[i], w[i]);
IM(t0[i]) = CMIM(t1[i], w[i]);
}
matrix_transpose_cmplx(t0, 64, 32, t1);
for(i = 0; i < 64; i++)
dft32(t1 + i*32, t0 + i*32, w32);
matrix_transpose_cmplx(t0, 32, 64, y);
free(t0);
free(t1);
}
/*******************************************************************************
4096 points DFT (Winograd algorithm)
*******************************************************************************/
void dft4096(complex_t *x, complex_t* y, complex_t* w, complex_t* w256)
{
complex_t *t0 = NULL;
complex_t *t1 = NULL;
int i;
t0 = (complex_t*)malloc(4096*sizeof(complex_t));
t1 = (complex_t*)malloc(4096*sizeof(complex_t));
matrix_transpose_cmplx(x,16,256,t0);
for(i = 0; i < 16; i++)
dft256(t0 + i*256, t1 + i*256, w256);
for(i = 0; i < 4096; i++)
{
RE(t0[i]) = CMRE(t1[i], w[i]);
IM(t0[i]) = CMIM(t1[i], w[i]);
}
matrix_transpose_cmplx(t0, 256, 16, t1);
for(i = 0; i < 256; i++)
dft16(t1 + i*16, t0 + i*16);
matrix_transpose_cmplx(t0, 16, 256, y);
free(t0);
free(t1);
}
/******************************************************************************* /*******************************************************************************
4 x 2 matrix transpose 4 x 2 matrix transpose
*******************************************************************************/ *******************************************************************************/
@ -512,3 +764,420 @@ void transpose4x4(complex_t *x, complex_t* y)
} }
/*******************************************************************************
8 x 4 matrix transpose
*******************************************************************************/
void transpose8x4(complex_t *x, complex_t* y)
{
RE(y[ 0]) = RE(x[ 0]); IM(y[ 0]) = IM(x[ 0]);
RE(y[ 1]) = RE(x[ 8]); IM(y[ 1]) = IM(x[ 8]);
RE(y[ 2]) = RE(x[ 16]); IM(y[ 2]) = IM(x[ 16]);
RE(y[ 3]) = RE(x[ 24]); IM(y[ 3]) = IM(x[ 24]);
RE(y[ 4]) = RE(x[ 1]); IM(y[ 4]) = IM(x[ 1]);
RE(y[ 5]) = RE(x[ 9]); IM(y[ 5]) = IM(x[ 9]);
RE(y[ 6]) = RE(x[ 17]); IM(y[ 6]) = IM(x[ 17]);
RE(y[ 7]) = RE(x[ 25]); IM(y[ 7]) = IM(x[ 25]);
RE(y[ 8]) = RE(x[ 2]); IM(y[ 8]) = IM(x[ 2]);
RE(y[ 9]) = RE(x[ 10]); IM(y[ 9]) = IM(x[ 10]);
RE(y[10]) = RE(x[ 18]); IM(y[10]) = IM(x[ 18]);
RE(y[11]) = RE(x[ 26]); IM(y[11]) = IM(x[ 26]);
RE(y[12]) = RE(x[ 3]); IM(y[12]) = IM(x[ 3]);
RE(y[13]) = RE(x[ 11]); IM(y[13]) = IM(x[ 11]);
RE(y[14]) = RE(x[ 19]); IM(y[14]) = IM(x[ 19]);
RE(y[15]) = RE(x[ 27]); IM(y[15]) = IM(x[ 27]);
RE(y[16]) = RE(x[ 4]); IM(y[16]) = IM(x[ 4]);
RE(y[17]) = RE(x[ 12]); IM(y[17]) = IM(x[ 12]);
RE(y[18]) = RE(x[ 20]); IM(y[18]) = IM(x[ 20]);
RE(y[19]) = RE(x[ 28]); IM(y[19]) = IM(x[ 28]);
RE(y[20]) = RE(x[ 5]); IM(y[20]) = IM(x[ 5]);
RE(y[21]) = RE(x[ 13]); IM(y[21]) = IM(x[ 13]);
RE(y[22]) = RE(x[ 21]); IM(y[22]) = IM(x[ 21]);
RE(y[23]) = RE(x[ 29]); IM(y[23]) = IM(x[ 29]);
RE(y[24]) = RE(x[ 6]); IM(y[24]) = IM(x[ 6]);
RE(y[25]) = RE(x[ 14]); IM(y[25]) = IM(x[ 14]);
RE(y[26]) = RE(x[ 22]); IM(y[26]) = IM(x[ 22]);
RE(y[27]) = RE(x[ 30]); IM(y[27]) = IM(x[ 30]);
RE(y[28]) = RE(x[ 7]); IM(y[28]) = IM(x[ 7]);
RE(y[29]) = RE(x[ 15]); IM(y[29]) = IM(x[ 15]);
RE(y[30]) = RE(x[ 23]); IM(y[30]) = IM(x[ 23]);
RE(y[31]) = RE(x[ 31]); IM(y[31]) = IM(x[ 31]);
}
/*******************************************************************************
4 x 8 matrix transpose
*******************************************************************************/
void transpose4x8(complex_t *x, complex_t* y)
{
RE(y[ 0]) = RE(x[ 0]); IM(y[ 0]) = IM(x[ 0]);
RE(y[ 1]) = RE(x[ 4]); IM(y[ 1]) = IM(x[ 4]);
RE(y[ 2]) = RE(x[ 8]); IM(y[ 2]) = IM(x[ 8]);
RE(y[ 3]) = RE(x[ 12]); IM(y[ 3]) = IM(x[ 12]);
RE(y[ 4]) = RE(x[ 16]); IM(y[ 4]) = IM(x[ 16]);
RE(y[ 5]) = RE(x[ 20]); IM(y[ 5]) = IM(x[ 20]);
RE(y[ 6]) = RE(x[ 24]); IM(y[ 6]) = IM(x[ 24]);
RE(y[ 7]) = RE(x[ 28]); IM(y[ 7]) = IM(x[ 28]);
RE(y[ 8]) = RE(x[ 1]); IM(y[ 8]) = IM(x[ 1]);
RE(y[ 9]) = RE(x[ 5]); IM(y[ 9]) = IM(x[ 5]);
RE(y[10]) = RE(x[ 9]); IM(y[10]) = IM(x[ 9]);
RE(y[11]) = RE(x[ 13]); IM(y[11]) = IM(x[ 13]);
RE(y[12]) = RE(x[ 17]); IM(y[12]) = IM(x[ 17]);
RE(y[13]) = RE(x[ 21]); IM(y[13]) = IM(x[ 21]);
RE(y[14]) = RE(x[ 25]); IM(y[14]) = IM(x[ 25]);
RE(y[15]) = RE(x[ 29]); IM(y[15]) = IM(x[ 29]);
RE(y[16]) = RE(x[ 2]); IM(y[16]) = IM(x[ 2]);
RE(y[17]) = RE(x[ 6]); IM(y[17]) = IM(x[ 6]);
RE(y[18]) = RE(x[ 10]); IM(y[18]) = IM(x[ 10]);
RE(y[19]) = RE(x[ 14]); IM(y[19]) = IM(x[ 14]);
RE(y[20]) = RE(x[ 18]); IM(y[20]) = IM(x[ 18]);
RE(y[21]) = RE(x[ 22]); IM(y[21]) = IM(x[ 22]);
RE(y[22]) = RE(x[ 26]); IM(y[22]) = IM(x[ 26]);
RE(y[23]) = RE(x[ 30]); IM(y[23]) = IM(x[ 30]);
RE(y[24]) = RE(x[ 3]); IM(y[24]) = IM(x[ 3]);
RE(y[25]) = RE(x[ 7]); IM(y[25]) = IM(x[ 7]);
RE(y[26]) = RE(x[ 11]); IM(y[26]) = IM(x[ 11]);
RE(y[27]) = RE(x[ 15]); IM(y[27]) = IM(x[ 15]);
RE(y[28]) = RE(x[ 19]); IM(y[28]) = IM(x[ 19]);
RE(y[29]) = RE(x[ 23]); IM(y[29]) = IM(x[ 23]);
RE(y[30]) = RE(x[ 27]); IM(y[30]) = IM(x[ 27]);
RE(y[31]) = RE(x[ 31]); IM(y[31]) = IM(x[ 31]);
}
/*******************************************************************************
4 x 8 matrix transpose
*******************************************************************************/
void transpose8x8(complex_t *x, complex_t* y)
{
RE(y[ 0]) = RE(x[ 0]); IM(y[ 0]) = IM(x[ 0]);
RE(y[ 1]) = RE(x[ 8]); IM(y[ 1]) = IM(x[ 8]);
RE(y[ 2]) = RE(x[16]); IM(y[ 2]) = IM(x[16]);
RE(y[ 3]) = RE(x[24]); IM(y[ 3]) = IM(x[24]);
RE(y[ 4]) = RE(x[32]); IM(y[ 4]) = IM(x[32]);
RE(y[ 5]) = RE(x[40]); IM(y[ 5]) = IM(x[40]);
RE(y[ 6]) = RE(x[48]); IM(y[ 6]) = IM(x[48]);
RE(y[ 7]) = RE(x[56]); IM(y[ 7]) = IM(x[56]);
RE(y[ 8]) = RE(x[ 1]); IM(y[ 8]) = IM(x[ 1]);
RE(y[ 9]) = RE(x[ 9]); IM(y[ 9]) = IM(x[ 9]);
RE(y[ 10]) = RE(x[17]); IM(y[ 10]) = IM(x[17]);
RE(y[ 11]) = RE(x[25]); IM(y[ 11]) = IM(x[25]);
RE(y[ 12]) = RE(x[33]); IM(y[ 12]) = IM(x[33]);
RE(y[ 13]) = RE(x[41]); IM(y[ 13]) = IM(x[41]);
RE(y[ 14]) = RE(x[49]); IM(y[ 14]) = IM(x[49]);
RE(y[ 15]) = RE(x[57]); IM(y[ 15]) = IM(x[57]);
RE(y[ 16]) = RE(x[ 2]); IM(y[ 16]) = IM(x[ 2]);
RE(y[ 17]) = RE(x[10]); IM(y[ 17]) = IM(x[10]);
RE(y[ 18]) = RE(x[18]); IM(y[ 18]) = IM(x[18]);
RE(y[ 19]) = RE(x[26]); IM(y[ 19]) = IM(x[26]);
RE(y[ 20]) = RE(x[34]); IM(y[ 20]) = IM(x[34]);
RE(y[ 21]) = RE(x[42]); IM(y[ 21]) = IM(x[42]);
RE(y[ 22]) = RE(x[50]); IM(y[ 22]) = IM(x[50]);
RE(y[ 23]) = RE(x[58]); IM(y[ 23]) = IM(x[58]);
RE(y[ 24]) = RE(x[ 3]); IM(y[ 24]) = IM(x[ 3]);
RE(y[ 25]) = RE(x[11]); IM(y[ 25]) = IM(x[11]);
RE(y[ 26]) = RE(x[19]); IM(y[ 26]) = IM(x[19]);
RE(y[ 27]) = RE(x[27]); IM(y[ 27]) = IM(x[27]);
RE(y[ 28]) = RE(x[35]); IM(y[ 28]) = IM(x[35]);
RE(y[ 29]) = RE(x[43]); IM(y[ 29]) = IM(x[43]);
RE(y[ 30]) = RE(x[51]); IM(y[ 30]) = IM(x[51]);
RE(y[ 31]) = RE(x[59]); IM(y[ 31]) = IM(x[59]);
RE(y[ 32]) = RE(x[ 4]); IM(y[ 32]) = IM(x[ 4]);
RE(y[ 33]) = RE(x[12]); IM(y[ 33]) = IM(x[12]);
RE(y[ 34]) = RE(x[20]); IM(y[ 34]) = IM(x[20]);
RE(y[ 35]) = RE(x[28]); IM(y[ 35]) = IM(x[28]);
RE(y[ 36]) = RE(x[36]); IM(y[ 36]) = IM(x[36]);
RE(y[ 37]) = RE(x[44]); IM(y[ 37]) = IM(x[44]);
RE(y[ 38]) = RE(x[52]); IM(y[ 38]) = IM(x[52]);
RE(y[ 39]) = RE(x[60]); IM(y[ 39]) = IM(x[60]);
RE(y[ 40]) = RE(x[ 5]); IM(y[ 40]) = IM(x[ 5]);
RE(y[ 41]) = RE(x[13]); IM(y[ 41]) = IM(x[13]);
RE(y[ 42]) = RE(x[21]); IM(y[ 42]) = IM(x[21]);
RE(y[ 43]) = RE(x[29]); IM(y[ 43]) = IM(x[29]);
RE(y[ 44]) = RE(x[37]); IM(y[ 44]) = IM(x[37]);
RE(y[ 45]) = RE(x[45]); IM(y[ 45]) = IM(x[45]);
RE(y[ 46]) = RE(x[53]); IM(y[ 46]) = IM(x[53]);
RE(y[ 47]) = RE(x[61]); IM(y[ 47]) = IM(x[61]);
RE(y[ 48]) = RE(x[ 6]); IM(y[ 48]) = IM(x[ 6]);
RE(y[ 49]) = RE(x[14]); IM(y[ 49]) = IM(x[14]);
RE(y[ 50]) = RE(x[22]); IM(y[ 50]) = IM(x[22]);
RE(y[ 51]) = RE(x[30]); IM(y[ 51]) = IM(x[30]);
RE(y[ 52]) = RE(x[38]); IM(y[ 52]) = IM(x[38]);
RE(y[ 53]) = RE(x[46]); IM(y[ 53]) = IM(x[46]);
RE(y[ 54]) = RE(x[54]); IM(y[ 54]) = IM(x[54]);
RE(y[ 55]) = RE(x[62]); IM(y[ 55]) = IM(x[62]);
RE(y[ 56]) = RE(x[ 7]); IM(y[ 56]) = IM(x[ 7]);
RE(y[ 57]) = RE(x[15]); IM(y[ 57]) = IM(x[15]);
RE(y[ 58]) = RE(x[23]); IM(y[ 58]) = IM(x[23]);
RE(y[ 59]) = RE(x[31]); IM(y[ 59]) = IM(x[31]);
RE(y[ 60]) = RE(x[39]); IM(y[ 60]) = IM(x[39]);
RE(y[ 61]) = RE(x[47]); IM(y[ 61]) = IM(x[47]);
RE(y[ 62]) = RE(x[55]); IM(y[ 62]) = IM(x[55]);
RE(y[ 63]) = RE(x[63]); IM(y[ 63]) = IM(x[63]);
}
/*******************************************************************************
16 x 16 matrix transpose
*******************************************************************************/
void transpose16x16(complex_t* x, complex_t* y)
{
RE(y[ 0]) = RE(x[ 0]); IM(y[ 0]) = IM(x[ 0]);
RE(y[ 1]) = RE(x[ 16]); IM(y[ 1]) = IM(x[ 16]);
RE(y[ 2]) = RE(x[ 32]); IM(y[ 2]) = IM(x[ 32]);
RE(y[ 3]) = RE(x[ 48]); IM(y[ 3]) = IM(x[ 48]);
RE(y[ 4]) = RE(x[ 64]); IM(y[ 4]) = IM(x[ 64]);
RE(y[ 5]) = RE(x[ 80]); IM(y[ 5]) = IM(x[ 80]);
RE(y[ 6]) = RE(x[ 96]); IM(y[ 6]) = IM(x[ 96]);
RE(y[ 7]) = RE(x[112]); IM(y[ 7]) = IM(x[112]);
RE(y[ 8]) = RE(x[128]); IM(y[ 8]) = IM(x[128]);
RE(y[ 9]) = RE(x[144]); IM(y[ 9]) = IM(x[144]);
RE(y[ 10]) = RE(x[160]); IM(y[ 10]) = IM(x[160]);
RE(y[ 11]) = RE(x[176]); IM(y[ 11]) = IM(x[176]);
RE(y[ 12]) = RE(x[192]); IM(y[ 12]) = IM(x[192]);
RE(y[ 13]) = RE(x[208]); IM(y[ 13]) = IM(x[208]);
RE(y[ 14]) = RE(x[224]); IM(y[ 14]) = IM(x[224]);
RE(y[ 15]) = RE(x[240]); IM(y[ 15]) = IM(x[240]);
RE(y[ 16]) = RE(x[ 1]); IM(y[ 16]) = IM(x[ 1]);
RE(y[ 17]) = RE(x[ 17]); IM(y[ 17]) = IM(x[ 17]);
RE(y[ 18]) = RE(x[ 33]); IM(y[ 18]) = IM(x[ 33]);
RE(y[ 19]) = RE(x[ 49]); IM(y[ 19]) = IM(x[ 49]);
RE(y[ 20]) = RE(x[ 65]); IM(y[ 20]) = IM(x[ 65]);
RE(y[ 21]) = RE(x[ 81]); IM(y[ 21]) = IM(x[ 81]);
RE(y[ 22]) = RE(x[ 97]); IM(y[ 22]) = IM(x[ 97]);
RE(y[ 23]) = RE(x[113]); IM(y[ 23]) = IM(x[113]);
RE(y[ 24]) = RE(x[129]); IM(y[ 24]) = IM(x[129]);
RE(y[ 25]) = RE(x[145]); IM(y[ 25]) = IM(x[145]);
RE(y[ 26]) = RE(x[161]); IM(y[ 26]) = IM(x[161]);
RE(y[ 27]) = RE(x[177]); IM(y[ 27]) = IM(x[177]);
RE(y[ 28]) = RE(x[193]); IM(y[ 28]) = IM(x[193]);
RE(y[ 29]) = RE(x[209]); IM(y[ 29]) = IM(x[209]);
RE(y[ 30]) = RE(x[225]); IM(y[ 30]) = IM(x[225]);
RE(y[ 31]) = RE(x[241]); IM(y[ 31]) = IM(x[241]);
RE(y[ 32]) = RE(x[ 2]); IM(y[ 32]) = IM(x[ 2]);
RE(y[ 33]) = RE(x[ 18]); IM(y[ 33]) = IM(x[ 18]);
RE(y[ 34]) = RE(x[ 34]); IM(y[ 34]) = IM(x[ 34]);
RE(y[ 35]) = RE(x[ 50]); IM(y[ 35]) = IM(x[ 50]);
RE(y[ 36]) = RE(x[ 66]); IM(y[ 36]) = IM(x[ 66]);
RE(y[ 37]) = RE(x[ 82]); IM(y[ 37]) = IM(x[ 82]);
RE(y[ 38]) = RE(x[ 98]); IM(y[ 38]) = IM(x[ 98]);
RE(y[ 39]) = RE(x[114]); IM(y[ 39]) = IM(x[114]);
RE(y[ 40]) = RE(x[130]); IM(y[ 40]) = IM(x[130]);
RE(y[ 41]) = RE(x[146]); IM(y[ 41]) = IM(x[146]);
RE(y[ 42]) = RE(x[162]); IM(y[ 42]) = IM(x[162]);
RE(y[ 43]) = RE(x[178]); IM(y[ 43]) = IM(x[178]);
RE(y[ 44]) = RE(x[194]); IM(y[ 44]) = IM(x[194]);
RE(y[ 45]) = RE(x[210]); IM(y[ 45]) = IM(x[210]);
RE(y[ 46]) = RE(x[226]); IM(y[ 46]) = IM(x[226]);
RE(y[ 47]) = RE(x[242]); IM(y[ 47]) = IM(x[242]);
RE(y[ 48]) = RE(x[ 3]); IM(y[ 48]) = IM(x[ 3]);
RE(y[ 49]) = RE(x[ 19]); IM(y[ 49]) = IM(x[ 19]);
RE(y[ 50]) = RE(x[ 35]); IM(y[ 50]) = IM(x[ 35]);
RE(y[ 51]) = RE(x[ 51]); IM(y[ 51]) = IM(x[ 51]);
RE(y[ 52]) = RE(x[ 67]); IM(y[ 52]) = IM(x[ 67]);
RE(y[ 53]) = RE(x[ 83]); IM(y[ 53]) = IM(x[ 83]);
RE(y[ 54]) = RE(x[ 99]); IM(y[ 54]) = IM(x[ 99]);
RE(y[ 55]) = RE(x[115]); IM(y[ 55]) = IM(x[115]);
RE(y[ 56]) = RE(x[131]); IM(y[ 56]) = IM(x[131]);
RE(y[ 57]) = RE(x[147]); IM(y[ 57]) = IM(x[147]);
RE(y[ 58]) = RE(x[163]); IM(y[ 58]) = IM(x[163]);
RE(y[ 59]) = RE(x[179]); IM(y[ 59]) = IM(x[179]);
RE(y[ 60]) = RE(x[195]); IM(y[ 60]) = IM(x[195]);
RE(y[ 61]) = RE(x[211]); IM(y[ 61]) = IM(x[211]);
RE(y[ 62]) = RE(x[227]); IM(y[ 62]) = IM(x[227]);
RE(y[ 63]) = RE(x[243]); IM(y[ 63]) = IM(x[243]);
RE(y[ 64]) = RE(x[ 4]); IM(y[ 64]) = IM(x[ 4]);
RE(y[ 65]) = RE(x[ 20]); IM(y[ 65]) = IM(x[ 20]);
RE(y[ 66]) = RE(x[ 36]); IM(y[ 66]) = IM(x[ 36]);
RE(y[ 67]) = RE(x[ 52]); IM(y[ 67]) = IM(x[ 52]);
RE(y[ 68]) = RE(x[ 68]); IM(y[ 68]) = IM(x[ 68]);
RE(y[ 69]) = RE(x[ 84]); IM(y[ 69]) = IM(x[ 84]);
RE(y[ 70]) = RE(x[100]); IM(y[ 70]) = IM(x[100]);
RE(y[ 71]) = RE(x[116]); IM(y[ 71]) = IM(x[116]);
RE(y[ 72]) = RE(x[132]); IM(y[ 72]) = IM(x[132]);
RE(y[ 73]) = RE(x[148]); IM(y[ 73]) = IM(x[148]);
RE(y[ 74]) = RE(x[164]); IM(y[ 74]) = IM(x[164]);
RE(y[ 75]) = RE(x[180]); IM(y[ 75]) = IM(x[180]);
RE(y[ 76]) = RE(x[196]); IM(y[ 76]) = IM(x[196]);
RE(y[ 77]) = RE(x[212]); IM(y[ 77]) = IM(x[212]);
RE(y[ 78]) = RE(x[228]); IM(y[ 78]) = IM(x[228]);
RE(y[ 79]) = RE(x[244]); IM(y[ 79]) = IM(x[244]);
RE(y[ 80]) = RE(x[ 5]); IM(y[ 80]) = IM(x[ 5]);
RE(y[ 81]) = RE(x[ 21]); IM(y[ 81]) = IM(x[ 21]);
RE(y[ 82]) = RE(x[ 37]); IM(y[ 82]) = IM(x[ 37]);
RE(y[ 83]) = RE(x[ 53]); IM(y[ 83]) = IM(x[ 53]);
RE(y[ 84]) = RE(x[ 69]); IM(y[ 84]) = IM(x[ 69]);
RE(y[ 85]) = RE(x[ 85]); IM(y[ 85]) = IM(x[ 85]);
RE(y[ 86]) = RE(x[101]); IM(y[ 86]) = IM(x[101]);
RE(y[ 87]) = RE(x[117]); IM(y[ 87]) = IM(x[117]);
RE(y[ 88]) = RE(x[133]); IM(y[ 88]) = IM(x[133]);
RE(y[ 89]) = RE(x[149]); IM(y[ 89]) = IM(x[149]);
RE(y[ 90]) = RE(x[165]); IM(y[ 90]) = IM(x[165]);
RE(y[ 91]) = RE(x[181]); IM(y[ 91]) = IM(x[181]);
RE(y[ 92]) = RE(x[197]); IM(y[ 92]) = IM(x[197]);
RE(y[ 93]) = RE(x[213]); IM(y[ 93]) = IM(x[213]);
RE(y[ 94]) = RE(x[229]); IM(y[ 94]) = IM(x[229]);
RE(y[ 95]) = RE(x[245]); IM(y[ 95]) = IM(x[245]);
RE(y[ 96]) = RE(x[ 6]); IM(y[ 96]) = IM(x[ 6]);
RE(y[ 97]) = RE(x[ 22]); IM(y[ 97]) = IM(x[ 22]);
RE(y[ 98]) = RE(x[ 38]); IM(y[ 98]) = IM(x[ 38]);
RE(y[ 99]) = RE(x[ 54]); IM(y[ 99]) = IM(x[ 54]);
RE(y[100]) = RE(x[ 70]); IM(y[100]) = IM(x[ 70]);
RE(y[101]) = RE(x[ 86]); IM(y[101]) = IM(x[ 86]);
RE(y[102]) = RE(x[102]); IM(y[102]) = IM(x[102]);
RE(y[103]) = RE(x[118]); IM(y[103]) = IM(x[118]);
RE(y[104]) = RE(x[134]); IM(y[104]) = IM(x[134]);
RE(y[105]) = RE(x[150]); IM(y[105]) = IM(x[150]);
RE(y[106]) = RE(x[166]); IM(y[106]) = IM(x[166]);
RE(y[107]) = RE(x[182]); IM(y[107]) = IM(x[182]);
RE(y[108]) = RE(x[198]); IM(y[108]) = IM(x[198]);
RE(y[109]) = RE(x[214]); IM(y[109]) = IM(x[214]);
RE(y[110]) = RE(x[230]); IM(y[110]) = IM(x[230]);
RE(y[111]) = RE(x[246]); IM(y[111]) = IM(x[246]);
RE(y[112]) = RE(x[ 7]); IM(y[112]) = IM(x[ 7]);
RE(y[113]) = RE(x[ 23]); IM(y[113]) = IM(x[ 23]);
RE(y[114]) = RE(x[ 39]); IM(y[114]) = IM(x[ 39]);
RE(y[115]) = RE(x[ 55]); IM(y[115]) = IM(x[ 55]);
RE(y[116]) = RE(x[ 71]); IM(y[116]) = IM(x[ 71]);
RE(y[117]) = RE(x[ 87]); IM(y[117]) = IM(x[ 87]);
RE(y[118]) = RE(x[103]); IM(y[118]) = IM(x[103]);
RE(y[119]) = RE(x[119]); IM(y[119]) = IM(x[119]);
RE(y[120]) = RE(x[135]); IM(y[120]) = IM(x[135]);
RE(y[121]) = RE(x[151]); IM(y[121]) = IM(x[151]);
RE(y[122]) = RE(x[167]); IM(y[122]) = IM(x[167]);
RE(y[123]) = RE(x[183]); IM(y[123]) = IM(x[183]);
RE(y[124]) = RE(x[199]); IM(y[124]) = IM(x[199]);
RE(y[125]) = RE(x[215]); IM(y[125]) = IM(x[215]);
RE(y[126]) = RE(x[231]); IM(y[126]) = IM(x[231]);
RE(y[127]) = RE(x[247]); IM(y[127]) = IM(x[247]);
RE(y[128]) = RE(x[ 8]); IM(y[128]) = IM(x[ 8]);
RE(y[129]) = RE(x[ 24]); IM(y[129]) = IM(x[ 24]);
RE(y[130]) = RE(x[ 40]); IM(y[130]) = IM(x[ 40]);
RE(y[131]) = RE(x[ 56]); IM(y[131]) = IM(x[ 56]);
RE(y[132]) = RE(x[ 72]); IM(y[132]) = IM(x[ 72]);
RE(y[133]) = RE(x[ 88]); IM(y[133]) = IM(x[ 88]);
RE(y[134]) = RE(x[104]); IM(y[134]) = IM(x[104]);
RE(y[135]) = RE(x[120]); IM(y[135]) = IM(x[120]);
RE(y[136]) = RE(x[136]); IM(y[136]) = IM(x[136]);
RE(y[137]) = RE(x[152]); IM(y[137]) = IM(x[152]);
RE(y[138]) = RE(x[168]); IM(y[138]) = IM(x[168]);
RE(y[139]) = RE(x[184]); IM(y[139]) = IM(x[184]);
RE(y[140]) = RE(x[200]); IM(y[140]) = IM(x[200]);
RE(y[141]) = RE(x[216]); IM(y[141]) = IM(x[216]);
RE(y[142]) = RE(x[232]); IM(y[142]) = IM(x[232]);
RE(y[143]) = RE(x[248]); IM(y[143]) = IM(x[248]);
RE(y[144]) = RE(x[ 9]); IM(y[144]) = IM(x[ 9]);
RE(y[145]) = RE(x[ 25]); IM(y[145]) = IM(x[ 25]);
RE(y[146]) = RE(x[ 41]); IM(y[146]) = IM(x[ 41]);
RE(y[147]) = RE(x[ 57]); IM(y[147]) = IM(x[ 57]);
RE(y[148]) = RE(x[ 73]); IM(y[148]) = IM(x[ 73]);
RE(y[149]) = RE(x[ 89]); IM(y[149]) = IM(x[ 89]);
RE(y[150]) = RE(x[105]); IM(y[150]) = IM(x[105]);
RE(y[151]) = RE(x[121]); IM(y[151]) = IM(x[121]);
RE(y[152]) = RE(x[137]); IM(y[152]) = IM(x[137]);
RE(y[153]) = RE(x[153]); IM(y[153]) = IM(x[153]);
RE(y[154]) = RE(x[169]); IM(y[154]) = IM(x[169]);
RE(y[155]) = RE(x[185]); IM(y[155]) = IM(x[185]);
RE(y[156]) = RE(x[201]); IM(y[156]) = IM(x[201]);
RE(y[157]) = RE(x[217]); IM(y[157]) = IM(x[217]);
RE(y[158]) = RE(x[233]); IM(y[158]) = IM(x[233]);
RE(y[159]) = RE(x[249]); IM(y[159]) = IM(x[249]);
RE(y[160]) = RE(x[ 10]); IM(y[160]) = IM(x[ 10]);
RE(y[161]) = RE(x[ 26]); IM(y[161]) = IM(x[ 26]);
RE(y[162]) = RE(x[ 42]); IM(y[162]) = IM(x[ 42]);
RE(y[163]) = RE(x[ 58]); IM(y[163]) = IM(x[ 58]);
RE(y[164]) = RE(x[ 74]); IM(y[164]) = IM(x[ 74]);
RE(y[165]) = RE(x[ 90]); IM(y[165]) = IM(x[ 90]);
RE(y[166]) = RE(x[106]); IM(y[166]) = IM(x[106]);
RE(y[167]) = RE(x[122]); IM(y[167]) = IM(x[122]);
RE(y[168]) = RE(x[138]); IM(y[168]) = IM(x[138]);
RE(y[169]) = RE(x[154]); IM(y[169]) = IM(x[154]);
RE(y[170]) = RE(x[170]); IM(y[170]) = IM(x[170]);
RE(y[171]) = RE(x[186]); IM(y[171]) = IM(x[186]);
RE(y[172]) = RE(x[202]); IM(y[172]) = IM(x[202]);
RE(y[173]) = RE(x[218]); IM(y[173]) = IM(x[218]);
RE(y[174]) = RE(x[234]); IM(y[174]) = IM(x[234]);
RE(y[175]) = RE(x[250]); IM(y[175]) = IM(x[250]);
RE(y[176]) = RE(x[ 11]); IM(y[176]) = IM(x[ 11]);
RE(y[177]) = RE(x[ 27]); IM(y[177]) = IM(x[ 27]);
RE(y[178]) = RE(x[ 43]); IM(y[178]) = IM(x[ 43]);
RE(y[179]) = RE(x[ 59]); IM(y[179]) = IM(x[ 59]);
RE(y[180]) = RE(x[ 75]); IM(y[180]) = IM(x[ 75]);
RE(y[181]) = RE(x[ 91]); IM(y[181]) = IM(x[ 91]);
RE(y[182]) = RE(x[107]); IM(y[182]) = IM(x[107]);
RE(y[183]) = RE(x[123]); IM(y[183]) = IM(x[123]);
RE(y[184]) = RE(x[139]); IM(y[184]) = IM(x[139]);
RE(y[185]) = RE(x[155]); IM(y[185]) = IM(x[155]);
RE(y[186]) = RE(x[171]); IM(y[186]) = IM(x[171]);
RE(y[187]) = RE(x[187]); IM(y[187]) = IM(x[187]);
RE(y[188]) = RE(x[203]); IM(y[188]) = IM(x[203]);
RE(y[189]) = RE(x[219]); IM(y[189]) = IM(x[219]);
RE(y[190]) = RE(x[235]); IM(y[190]) = IM(x[235]);
RE(y[191]) = RE(x[251]); IM(y[191]) = IM(x[251]);
RE(y[192]) = RE(x[ 12]); IM(y[192]) = IM(x[ 12]);
RE(y[193]) = RE(x[ 28]); IM(y[193]) = IM(x[ 28]);
RE(y[194]) = RE(x[ 44]); IM(y[194]) = IM(x[ 44]);
RE(y[195]) = RE(x[ 60]); IM(y[195]) = IM(x[ 60]);
RE(y[196]) = RE(x[ 76]); IM(y[196]) = IM(x[ 76]);
RE(y[197]) = RE(x[ 92]); IM(y[197]) = IM(x[ 92]);
RE(y[198]) = RE(x[108]); IM(y[198]) = IM(x[108]);
RE(y[199]) = RE(x[124]); IM(y[199]) = IM(x[124]);
RE(y[200]) = RE(x[140]); IM(y[200]) = IM(x[140]);
RE(y[201]) = RE(x[156]); IM(y[201]) = IM(x[156]);
RE(y[202]) = RE(x[172]); IM(y[202]) = IM(x[172]);
RE(y[203]) = RE(x[188]); IM(y[203]) = IM(x[188]);
RE(y[204]) = RE(x[204]); IM(y[204]) = IM(x[204]);
RE(y[205]) = RE(x[220]); IM(y[205]) = IM(x[220]);
RE(y[206]) = RE(x[236]); IM(y[206]) = IM(x[236]);
RE(y[207]) = RE(x[252]); IM(y[207]) = IM(x[252]);
RE(y[208]) = RE(x[ 13]); IM(y[208]) = IM(x[ 13]);
RE(y[209]) = RE(x[ 29]); IM(y[209]) = IM(x[ 29]);
RE(y[210]) = RE(x[ 45]); IM(y[210]) = IM(x[ 45]);
RE(y[211]) = RE(x[ 61]); IM(y[211]) = IM(x[ 61]);
RE(y[212]) = RE(x[ 77]); IM(y[212]) = IM(x[ 77]);
RE(y[213]) = RE(x[ 93]); IM(y[213]) = IM(x[ 93]);
RE(y[214]) = RE(x[109]); IM(y[214]) = IM(x[109]);
RE(y[215]) = RE(x[125]); IM(y[215]) = IM(x[125]);
RE(y[216]) = RE(x[141]); IM(y[216]) = IM(x[141]);
RE(y[217]) = RE(x[157]); IM(y[217]) = IM(x[157]);
RE(y[218]) = RE(x[173]); IM(y[218]) = IM(x[173]);
RE(y[219]) = RE(x[189]); IM(y[219]) = IM(x[189]);
RE(y[220]) = RE(x[205]); IM(y[220]) = IM(x[205]);
RE(y[221]) = RE(x[221]); IM(y[221]) = IM(x[221]);
RE(y[222]) = RE(x[237]); IM(y[222]) = IM(x[237]);
RE(y[223]) = RE(x[253]); IM(y[223]) = IM(x[253]);
RE(y[224]) = RE(x[ 14]); IM(y[224]) = IM(x[ 14]);
RE(y[225]) = RE(x[ 30]); IM(y[225]) = IM(x[ 30]);
RE(y[226]) = RE(x[ 46]); IM(y[226]) = IM(x[ 46]);
RE(y[227]) = RE(x[ 62]); IM(y[227]) = IM(x[ 62]);
RE(y[228]) = RE(x[ 78]); IM(y[228]) = IM(x[ 78]);
RE(y[229]) = RE(x[ 94]); IM(y[229]) = IM(x[ 94]);
RE(y[230]) = RE(x[110]); IM(y[230]) = IM(x[110]);
RE(y[231]) = RE(x[126]); IM(y[231]) = IM(x[126]);
RE(y[232]) = RE(x[142]); IM(y[232]) = IM(x[142]);
RE(y[233]) = RE(x[158]); IM(y[233]) = IM(x[158]);
RE(y[234]) = RE(x[174]); IM(y[234]) = IM(x[174]);
RE(y[235]) = RE(x[190]); IM(y[235]) = IM(x[190]);
RE(y[236]) = RE(x[206]); IM(y[236]) = IM(x[206]);
RE(y[237]) = RE(x[222]); IM(y[237]) = IM(x[222]);
RE(y[238]) = RE(x[238]); IM(y[238]) = IM(x[238]);
RE(y[239]) = RE(x[254]); IM(y[239]) = IM(x[254]);
RE(y[240]) = RE(x[ 15]); IM(y[240]) = IM(x[ 15]);
RE(y[241]) = RE(x[ 31]); IM(y[241]) = IM(x[ 31]);
RE(y[242]) = RE(x[ 47]); IM(y[242]) = IM(x[ 47]);
RE(y[243]) = RE(x[ 63]); IM(y[243]) = IM(x[ 63]);
RE(y[244]) = RE(x[ 79]); IM(y[244]) = IM(x[ 79]);
RE(y[245]) = RE(x[ 95]); IM(y[245]) = IM(x[ 95]);
RE(y[246]) = RE(x[111]); IM(y[246]) = IM(x[111]);
RE(y[247]) = RE(x[127]); IM(y[247]) = IM(x[127]);
RE(y[248]) = RE(x[143]); IM(y[248]) = IM(x[143]);
RE(y[249]) = RE(x[159]); IM(y[249]) = IM(x[159]);
RE(y[250]) = RE(x[175]); IM(y[250]) = IM(x[175]);
RE(y[251]) = RE(x[191]); IM(y[251]) = IM(x[191]);
RE(y[252]) = RE(x[207]); IM(y[252]) = IM(x[207]);
RE(y[253]) = RE(x[223]); IM(y[253]) = IM(x[223]);
RE(y[254]) = RE(x[239]); IM(y[254]) = IM(x[239]);
RE(y[255]) = RE(x[255]); IM(y[255]) = IM(x[255]);
}

Wyświetl plik

@ -96,7 +96,6 @@ In addition, GNUPLOT will build the following graphs from data stored in files:
\author Sergey Bakhurin www.dsplib.org \author Sergey Bakhurin www.dsplib.org
***************************************************************************** */ ***************************************************************************** */
#endif #endif
#ifdef DOXYGEN_RUSSIAN #ifdef DOXYGEN_RUSSIAN
/*! **************************************************************************** /*! ****************************************************************************
@ -211,11 +210,6 @@ exit_label:
#ifdef DOXYGEN_ENGLISH #ifdef DOXYGEN_ENGLISH
/*! **************************************************************************** /*! ****************************************************************************
\ingroup IIR_FILTER_DESIGN_GROUP \ingroup IIR_FILTER_DESIGN_GROUP

Wyświetl plik

@ -118,7 +118,10 @@ Memory must be allocated by \ref fft_create function. \n\n
Pointer to the vector of intermediate results. \n Pointer to the vector of intermediate results. \n
The size of the vector is `[n x 1]`. \n The size of the vector is `[n x 1]`. \n
The memory must be allocated with the \ref fft_create function. \n\n The memory must be allocated with the \ref fft_create function. \n\n
The structure is populated with the \ref fft_create function once
The structure is calculated with the \ref fft_create function once
before using the FFT algorithm. \n before using the FFT algorithm. \n
A pointer to an object of this structure may be A pointer to an object of this structure may be
reused when calling FFT functions. \n reused when calling FFT functions. \n
@ -186,6 +189,32 @@ then the structure arrays will be automatically recreated for the length `n`.
Указатель на вектор промежуточных вычислений алгоритма БПФ. \n Указатель на вектор промежуточных вычислений алгоритма БПФ. \n
Размер вектора `[n x 1]`. \n Размер вектора `[n x 1]`. \n
Память должна быть выделена функцией \ref fft_create. \n \n Память должна быть выделена функцией \ref fft_create. \n \n
\param w32
Статический вектор поворотных коэффициентов 32-точечного БПФ. \n \n
\param w64
Статический вектор поворотных коэффициентов 64-точечного БПФ. \n \n
\param w128
Статический вектор поворотных коэффициентов 128-точечного БПФ. \n \n
\param w256
Статический вектор поворотных коэффициентов 256-точечного БПФ. \n \n
\param w512
Статический вектор поворотных коэффициентов 512-точечного БПФ. \n \n
\param w1024
Статический вектор поворотных коэффициентов 1024-точечного БПФ. \n \n
\param w2048
Статический вектор поворотных коэффициентов 2048-точечного БПФ. \n \n
\param w4096
Статический вектор поворотных коэффициентов 4096-точечного БПФ. \n \n
Структура заполняется функцией \ref fft_create один раз Структура заполняется функцией \ref fft_create один раз
до использования алгоритма БПФ. \n до использования алгоритма БПФ. \n
Указатель на объект данной структуры может быть Указатель на объект данной структуры может быть
@ -225,10 +254,19 @@ www.dsplib.org
#endif #endif
typedef struct typedef struct
{ {
complex_t* w; complex_t* w;
complex_t* t0; complex_t* t0;
complex_t* t1; complex_t* t1;
int n;
complex_t w32[ 32];
complex_t w64[ 64];
complex_t w128[128];
complex_t w256[256];
complex_t w512[512];
complex_t* w1024;
complex_t* w2048;
complex_t* w4096;
int n;
} fft_t; } fft_t;

Wyświetl plik

@ -23,49 +23,96 @@ for j = 1:21
s = s / 2; s = s / 2;
endfor endfor
dspl_size = [2
4
8
16
32
64
128
256
512
1024
2048
4096
8192
16384
32768
65536
131072
262144
524288
1048576];
dspl = [1204.630392 dspl_mflops = [597.7
1283.970612 1946.0
1586.347958 4455.5
1707.107097 5446.3
1866.109831 4490.7
1837.307509 4288.5
2366.785829 3524.1
2302.925874 5286.9
2388.456514 3995.3
2113.451546 3657.6
3090.904615 2953.2
2979.596190 2078.0
2685.155556 2565.7
2053.760000 2615.5
3723.946667 2361.8
3195.618462 2376.4
2328.221538 2169.8
1786.533333 2285.5
7288.960000 2172.4
4646.700000 1896.4];
2633.120000];
python = [2390.741 python_size = [4194304
2597.527 2097152
2841.191 1048576
3066.652 524288
3092.187 262144
3444.710 131072
3633.320 65536
4333.845 32768
5316.897 16384
5201.486 8192
4608.231 4096
4481.357 2048
3876.925 1024
2961.753 512
2435.427 256
1344.871 128
606.953 64
298.559 32
120.772 16
50.369 8
17.033]; 4
2];
python_mflops = [2119.626
2147.070
2362.656
2351.777
2408.621
2678.743
3194.574
3978.322
5220.731
4671.613
4240.982
3585.080
3876.999
2556.301
2333.780
1301.660
606.947
294.469
127.103
37.574
15.669
4.110];
plot(log2(size), mflops,log2(size), dspl, log2(size), python) plot(log2(size), mflops,log2(dspl_size), dspl_mflops, log2(python_size), python_mflops)

Wyświetl plik

@ -5,7 +5,7 @@
#include "dspl.h" #include "dspl.h"
#define NMAX 4194304 #define NMAX 4194304
#define L 18 #define L 20
#define SIZE_FACTOR 2.3 #define SIZE_FACTOR 2.3
@ -87,10 +87,10 @@ int main(int argc, char* argv[])
hdspl = dspl_load(); /* Load DSPL function */ hdspl = dspl_load(); /* Load DSPL function */
int len_r2[L] = {2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, int len_r2[L] = {2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096,
8192, 16384, 32768, 65536, 131072, 262144}; 8192, 16384, 32768, 65536, 131072, 262144, 524288, 1048576};
int len_nr[L] = {6, 9, 12, 15, 18, 24, 36, 80, 108, 210, 504, 1000, int len_nr[L] = {6, 9, 12, 15, 18, 24, 36, 80, 100, 108, 210, 504, 1000,
1960, 4725, 10368, 27000, 75600, 165375}; 1960, 4725, 8000, 10368, 27000, 75600, 165375};
int err; int err;
double mflops[L] = {0}; double mflops[L] = {0};

Wyświetl plik

@ -0,0 +1,44 @@
clear all; close all; clc;
addpath('octave');
fn_in = {'dat/x_fft_4.dat';
'dat/x_fft_8.dat';
'dat/x_fft_16.dat';
'dat/x_fft_32.dat';
'dat/x_fft_64.dat';
'dat/x_fft_128.dat';
'dat/x_fft_256.dat';
'dat/x_fft_512.dat';
'dat/x_fft_1024.dat';
'dat/x_fft_2048.dat';
'dat/x_fft_4096.dat';
'dat/x_fft_8192.dat';
'dat/x_fft_16384.dat';
'dat/x_fft_32768.dat';
'dat/x_fft_65536.dat'};
fn_out = {'dat/y_fft_4.dat';
'dat/y_fft_8.dat';
'dat/y_fft_16.dat';
'dat/y_fft_32.dat';
'dat/y_fft_64.dat';
'dat/y_fft_128.dat';
'dat/y_fft_256.dat';
'dat/y_fft_512.dat';
'dat/y_fft_1024.dat';
'dat/y_fft_2048.dat';
'dat/y_fft_4096.dat';
'dat/y_fft_8192.dat';
'dat/y_fft_16384.dat';
'dat/y_fft_32768.dat';
'dat/y_fft_65536.dat'};
for i = 1:length(fn_in)
x = readbin(fn_in{i});
y = fft(x);
writebin(y, 1, fn_out{i});
end

Wyświetl plik

@ -0,0 +1,156 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include "dspl.h"
#define FFT_SIZE 65536
int main(int argc, char* argv[])
{
void* hdspl; /* DSPL handle */
fft_t pfft = {0};
int verr, nx, type;
double derr;
complex_t *yout = NULL;
complex_t *xc = NULL;
hdspl = dspl_load(); /* Load DSPL function */
verif_data_gen(4, DAT_COMPLEX, "dat/x_fft_4.dat");
verif_data_gen(8, DAT_COMPLEX, "dat/x_fft_8.dat");
verif_data_gen(16, DAT_COMPLEX, "dat/x_fft_16.dat");
verif_data_gen(32, DAT_COMPLEX, "dat/x_fft_32.dat");
verif_data_gen(64, DAT_COMPLEX, "dat/x_fft_64.dat");
verif_data_gen(128, DAT_COMPLEX, "dat/x_fft_128.dat");
verif_data_gen(256, DAT_COMPLEX, "dat/x_fft_256.dat");
verif_data_gen(512, DAT_COMPLEX, "dat/x_fft_512.dat");
verif_data_gen(1024, DAT_COMPLEX, "dat/x_fft_1024.dat");
verif_data_gen(2048, DAT_COMPLEX, "dat/x_fft_2048.dat");
verif_data_gen(4096, DAT_COMPLEX, "dat/x_fft_4096.dat");
verif_data_gen(8192, DAT_COMPLEX, "dat/x_fft_8192.dat");
verif_data_gen(16384, DAT_COMPLEX, "dat/x_fft_16384.dat");
verif_data_gen(32768, DAT_COMPLEX, "dat/x_fft_32768.dat");
verif_data_gen(65536, DAT_COMPLEX, "dat/x_fft_65536.dat");
yout = (complex_t*)malloc(FFT_SIZE * sizeof(complex_t ));
system("octave octave/fft_radix2_verification.m");
/*------------------------------------------------------------------------*/
readbin("dat/x_fft_4.dat", (void**)(&xc), &nx, &type);
fft_cmplx(xc, 4, &pfft, yout);
verif_str_cmplx(yout, 4, "fft 4 for complex dat",
"dat/y_fft_4.dat",
"verification.log");
/*------------------------------------------------------------------------*/
readbin("dat/x_fft_8.dat", (void**)(&xc), &nx, &type);
fft_cmplx(xc, 8, &pfft, yout);
verif_str_cmplx(yout, 8, "fft 8 for complex dat",
"dat/y_fft_8.dat",
"verification.log");
/*------------------------------------------------------------------------*/
readbin("dat/x_fft_16.dat", (void**)(&xc), &nx, &type);
fft_cmplx(xc, 16, &pfft, yout);
verif_str_cmplx(yout, 16, "fft 16 for complex dat",
"dat/y_fft_16.dat",
"verification.log");
/*------------------------------------------------------------------------*/
readbin("dat/x_fft_32.dat", (void**)(&xc), &nx, &type);
fft_cmplx(xc, 32, &pfft, yout);
verif_str_cmplx(yout, 32, "fft 32 for complex dat",
"dat/y_fft_32.dat",
"verification.log");
/*------------------------------------------------------------------------*/
readbin("dat/x_fft_64.dat", (void**)(&xc), &nx, &type);
fft_cmplx(xc, 64, &pfft, yout);
verif_str_cmplx(yout, 64, "fft 64 for complex dat",
"dat/y_fft_64.dat",
"verification.log");
/*------------------------------------------------------------------------*/
readbin("dat/x_fft_128.dat", (void**)(&xc), &nx, &type);
fft_cmplx(xc, 128, &pfft, yout);
verif_str_cmplx(yout, 128, "fft 128 for complex dat",
"dat/y_fft_128.dat",
"verification.log");
/*------------------------------------------------------------------------*/
readbin("dat/x_fft_256.dat", (void**)(&xc), &nx, &type);
fft_cmplx(xc, 256, &pfft, yout);
verif_str_cmplx(yout, 256, "fft 256 for complex dat",
"dat/y_fft_256.dat",
"verification.log");
/*------------------------------------------------------------------------*/
readbin("dat/x_fft_512.dat", (void**)(&xc), &nx, &type);
fft_cmplx(xc, 512, &pfft, yout);
verif_str_cmplx(yout, 512, "fft 512 for complex dat",
"dat/y_fft_512.dat",
"verification.log");
/*------------------------------------------------------------------------*/
readbin("dat/x_fft_1024.dat", (void**)(&xc), &nx, &type);
fft_cmplx(xc, 1024, &pfft, yout);
verif_str_cmplx(yout, 1024, "fft 1024 for complex dat",
"dat/y_fft_1024.dat",
"verification.log");
/*------------------------------------------------------------------------*/
readbin("dat/x_fft_2048.dat", (void**)(&xc), &nx, &type);
fft_cmplx(xc, 2048, &pfft, yout);
verif_str_cmplx(yout, 2048, "fft 2048 for complex dat",
"dat/y_fft_2048.dat",
"verification.log");
/*------------------------------------------------------------------------*/
readbin("dat/x_fft_4096.dat", (void**)(&xc), &nx, &type);
fft_cmplx(xc, 4096, &pfft, yout);
verif_str_cmplx(yout, 4096, "fft 4096 for complex dat",
"dat/y_fft_4096.dat",
"verification.log");
/*------------------------------------------------------------------------*/
readbin("dat/x_fft_8192.dat", (void**)(&xc), &nx, &type);
fft_cmplx(xc, 8192, &pfft, yout);
verif_str_cmplx(yout, 8192, "fft 8192 for complex dat",
"dat/y_fft_8192.dat",
"verification.log");
/*------------------------------------------------------------------------*/
readbin("dat/x_fft_16384.dat", (void**)(&xc), &nx, &type);
fft_cmplx(xc, 16384, &pfft, yout);
verif_str_cmplx(yout, 16384, "fft 16384 for complex dat",
"dat/y_fft_16384.dat",
"verification.log");
/*------------------------------------------------------------------------*/
readbin("dat/x_fft_32768.dat", (void**)(&xc), &nx, &type);
fft_cmplx(xc, 32768, &pfft, yout);
verif_str_cmplx(yout, 32768, "fft 32768 for complex dat",
"dat/y_fft_32768.dat",
"verification.log");
/*------------------------------------------------------------------------*/
readbin("dat/x_fft_65536.dat", (void**)(&xc), &nx, &type);
fft_cmplx(xc, 65536, &pfft, yout);
verif_str_cmplx(yout, 65536, "fft 65536 for complex dat",
"dat/y_fft_65536.dat",
"verification.log");
/* free dspl handle */
dspl_free(hdspl);
if(yout)
free(yout);
if(xc)
free(xc);
return 0;
}