improved FFT performance and added verificator for radix-2 fft from 4 points to 65536

Changes to be committed:
modified:   _release/dspl.c
modified:   _release/dspl.h
modified:   dspl/dox/doxyfile_en
modified:   dspl/dox/footer_en.html
modified:   dspl/dox/header_en.html
modified:   dspl/src/dspl_internal.h
modified:   dspl/src/fft.c
modified:   dspl/src/fft_subkernel.c
modified:   dspl/src/filter_ap.c
modified:   include/dspl.h
modified:   performance/bin/octave/fft_cmplx_performance.m
modified:   performance/src/fft_cmplx_performance.c
new file:   verification/bin/octave/fft_radix2_verification.m
new file:   verification/src/fft_radix2_verification.c
pull/6/merge
Dsplib 2020-11-10 19:06:20 +03:00
rodzic 3ed7cd21d4
commit d5586dfb9e
14 zmienionych plików z 1369 dodań i 146 usunięć

Wyświetl plik

@ -137,8 +137,10 @@ p_matrix_eig_cmplx matrix_eig_cmplx ;
p_matrix_eye matrix_eye ;
p_matrix_eye_cmplx matrix_eye_cmplx ;
p_matrix_mul matrix_mul ;
p_matrix_pinv matrix_pinv ;
p_matrix_print matrix_print ;
p_matrix_print_cmplx matrix_print_cmplx ;
p_matrix_svd matrix_svd ;
p_matrix_transpose matrix_transpose ;
p_matrix_transpose_cmplx matrix_transpose_cmplx ;
p_matrix_transpose_hermite matrix_transpose_hermite ;
@ -349,8 +351,10 @@ void* dspl_load()
LOAD_FUNC(matrix_eye);
LOAD_FUNC(matrix_eye_cmplx);
LOAD_FUNC(matrix_mul);
LOAD_FUNC(matrix_pinv);
LOAD_FUNC(matrix_print);
LOAD_FUNC(matrix_print_cmplx);
LOAD_FUNC(matrix_svd);
LOAD_FUNC(matrix_transpose);
LOAD_FUNC(matrix_transpose_cmplx);
LOAD_FUNC(matrix_transpose_hermite);
@ -440,8 +444,6 @@ void* dspl_load()
void dspl_free(void* handle)
{
#ifdef WIN_OS

Wyświetl plik

@ -225,10 +225,19 @@ www.dsplib.org
#endif
typedef struct
{
complex_t* w;
complex_t* t0;
complex_t* t1;
int n;
complex_t* w;
complex_t* t0;
complex_t* t1;
complex_t w32[ 32];
complex_t w64[ 64];
complex_t w128[128];
complex_t w256[256];
complex_t w512[512];
complex_t* w1024;
complex_t* w2048;
complex_t* w4096;
int n;
} fft_t;
@ -1216,6 +1225,13 @@ DECLARE_FUNC(int, matrix_mul, double* a
COMMA int mb
COMMA double* c);
/*----------------------------------------------------------------------------*/
DECLARE_FUNC(int, matrix_pinv, double* a
COMMA int n
COMMA int m
COMMA double* tol
COMMA double* inv
COMMA int* info);
/*----------------------------------------------------------------------------*/
DECLARE_FUNC(int, matrix_print, double* a
COMMA int n
COMMA int m
@ -1228,6 +1244,14 @@ DECLARE_FUNC(int, matrix_print_cmplx, complex_t* a
COMMA const char* name
COMMA const char* format);
/*----------------------------------------------------------------------------*/
DECLARE_FUNC(int, matrix_svd, double* a
COMMA int n
COMMA int m
COMMA double* u
COMMA double* s
COMMA double* vt
COMMA int* info);
/*----------------------------------------------------------------------------*/
DECLARE_FUNC(int, matrix_transpose, double* a
COMMA int n
COMMA int m
@ -1563,49 +1587,6 @@ DECLARE_FUNC(int, xcorr_cmplx, complex_t* x
#endif
#ifdef DOXYGEN_ENGLISH
/*! ****************************************************************************
\ingroup SYS_LOADING_GROUP
\fn void dspl_free(void* handle)
\brief Cleans up the previously linked DSPL-2.0 dynamic library.
This cross-platform function clears the library `libdspl.dll` in
Windows system and from the library `libdspl.so` on the Linux system.
After cleaning the library, all functions will become unavailable.
\param [in] handle
Handle of the previously linked DSPL-2.0 library. \n
This pointer can be `NULL`, in this case no action
are being produced.
\author Bakhurin Sergey. www.dsplib.org
***************************************************************************** */
#endif
#ifdef DOXYGEN_RUSSIAN
/*! ****************************************************************************
\ingroup SYS_LOADING_GROUP
\fn void dspl_free(void* handle)
\brief Очищает связанную ранее динамическую библиотеку DSPL-2.0.
Данная кроссплатформенная функция производит очистку библиотеки `libdspl.dll` в
системе Windows и с библиотеки `libdspl.so` в системе Linux.
После очистки библиотеки все функции станут недоступны.
\param[in] handle
Хэндл прилинкованной ранее библиотеки DSPL-2.0. \n
Данный указатель может быть `NULL`, в этом случае никакие действия не
производятся.\n\n
\author Бахурин Сергей. www.dsplib.org
**************************************************************************** */
#endif
void* dspl_load();
#ifdef DOXYGEN_ENGLISH
/*! ****************************************************************************
\ingroup SYS_LOADING_GROUP
@ -1694,7 +1675,7 @@ int main(int argc, char* argv[])
void* hdspl; // DSPL хэндл
hdspl = dspl_load(); // Динамическая линковка
// Проверяем указатель. Если `NULLL`, то линковка прошла неудачно
// Проверяем указатель. Если `NULL`, то линковка прошла неудачно
if(!hdspl)
{
printf("libdspl loading error!\n");
@ -1714,6 +1695,48 @@ int main(int argc, char* argv[])
\author Бахурин Сергей. www.dsplib.org
***************************************************************************** */
#endif
void* dspl_load();
#ifdef DOXYGEN_ENGLISH
/*! ****************************************************************************
\ingroup SYS_LOADING_GROUP
\fn void dspl_free(void* handle)
\brief Cleans up the previously linked DSPL-2.0 dynamic library.
This cross-platform function clears the library `libdspl.dll` in
Windows system and from the library `libdspl.so` on the Linux system.
After cleaning the library, all functions will become unavailable.
\param [in] handle
Handle of the previously linked DSPL-2.0 library. \n
This pointer can be `NULL`, in this case no action
are being produced.
\author Bakhurin Sergey. www.dsplib.org
***************************************************************************** */
#endif
#ifdef DOXYGEN_RUSSIAN
/*! ****************************************************************************
\ingroup SYS_LOADING_GROUP
\fn void dspl_free(void* handle)
\brief Очищает связанную ранее динамическую библиотеку DSPL-2.0.
Данная кроссплатформенная функция производит очистку библиотеки `libdspl.dll` в
системе Windows и с библиотеки `libdspl.so` в системе Linux.
После очистки библиотеки все функции станут недоступны.
\param[in] handle
Хэндл прилинкованной ранее библиотеки DSPL-2.0. \n
Данный указатель может быть `NULL`, в этом случае никакие действия не
производятся.\n\n
\author Бахурин Сергей. www.dsplib.org
**************************************************************************** */
#endif
void dspl_free(void* handle);

Wyświetl plik

@ -1,4 +1,4 @@
# Doxyfile 1.8.18
# Doxyfile 1.8.20
# This file describes the settings to be used by the documentation system
# doxygen (www.doxygen.org) for a project.
@ -227,6 +227,14 @@ QT_AUTOBRIEF = NO
MULTILINE_CPP_IS_BRIEF = NO
# By default Python docstrings are displayed as preformatted text and doxygen's
# special commands cannot be used. By setting PYTHON_DOCSTRING to NO the
# doxygen's special commands can be used and the contents of the docstring
# documentation blocks is shown as doxygen documentation.
# The default value is: YES.
PYTHON_DOCSTRING = YES
# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the
# documentation from any documented member that it re-implements.
# The default value is: YES.
@ -449,6 +457,19 @@ TYPEDEF_HIDES_STRUCT = NO
LOOKUP_CACHE_SIZE = 0
# The NUM_PROC_THREADS specifies the number threads doxygen is allowed to use
# during processing. When set to 0 doxygen will based this on the number of
# cores available in the system. You can set it explicitly to a value larger
# than 0 to get more control over the balance between CPU load and processing
# speed. At this moment only the input processing can be done using multiple
# threads. Since this is still an experimental feature the default is set to 1,
# which efficively disables parallel processing. Please report any issues you
# encounter. Generating dot graphs in parallel is controlled by the
# DOT_NUM_THREADS setting.
# Minimum value: 0, maximum value: 32, default value: 1.
NUM_PROC_THREADS = 1
#---------------------------------------------------------------------------
# Build related configuration options
#---------------------------------------------------------------------------
@ -553,7 +574,7 @@ INTERNAL_DOCS = NO
# names in lower-case letters. If set to YES, upper-case letters are also
# allowed. This is useful if you have classes or files whose names only differ
# in case and if your file system supports case sensitive file names. Windows
# (including Cygwin) ands Mac users are advised to set this option to NO.
# (including Cygwin) and Mac users are advised to set this option to NO.
# The default value is: system dependent.
CASE_SENSE_NAMES = NO
@ -1132,10 +1153,13 @@ CLANG_ASSISTED_PARSING = NO
CLANG_OPTIONS =
# If clang assisted parsing is enabled you can provide the clang parser with the
# path to the compilation database (see:
# http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html) used when the files
# were built. This is equivalent to specifying the "-p" option to a clang tool,
# such as clang-check. These options will then be passed to the parser.
# path to the directory containing a file called compile_commands.json. This
# file is the compilation database (see:
# http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html) containing the
# options used when the source files were built. This is equivalent to
# specifying the "-p" option to a clang tool, such as clang-check. These options
# will then be passed to the parser. Any options specified with CLANG_OPTIONS
# will be added as well.
# Note: The availability of this option depends on whether or not doxygen was
# generated with the -Duse_libclang=ON option for CMake.
@ -1405,7 +1429,7 @@ CHM_FILE =
HHC_LOCATION =
# The GENERATE_CHI flag controls if a separate .chi index file is generated
# (YES) or that it should be included in the master .chm file (NO).
# (YES) or that it should be included in the main .chm file (NO).
# The default value is: NO.
# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
@ -1571,8 +1595,8 @@ EXT_LINKS_IN_WINDOW = NO
# tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see
# https://inkscape.org) to generate formulas as SVG images instead of PNGs for
# the HTML output. These images will generally look nicer at scaled resolutions.
# Possible values are: png The default and svg Looks nicer but requires the
# pdf2svg tool.
# Possible values are: png (the default) and svg (looks nicer but requires the
# pdf2svg or inkscape tool).
# The default value is: png.
# This tag requires that the tag GENERATE_HTML is set to YES.
@ -1613,7 +1637,7 @@ FORMULA_MACROFILE =
# The default value is: NO.
# This tag requires that the tag GENERATE_HTML is set to YES.
USE_MATHJAX = NO
USE_MATHJAX = YES
# When MathJax is enabled you can set the default output format to be used for
# the MathJax output. See the MathJax site (see:
@ -1623,7 +1647,7 @@ USE_MATHJAX = NO
# The default value is: HTML-CSS.
# This tag requires that the tag USE_MATHJAX is set to YES.
MATHJAX_FORMAT = SVG
MATHJAX_FORMAT = HTML-CSS
# When MathJax is enabled you need to specify the location relative to the HTML
# output directory using the MATHJAX_RELPATH option. The destination directory
@ -1636,7 +1660,7 @@ MATHJAX_FORMAT = SVG
# The default value is: https://cdn.jsdelivr.net/npm/mathjax@2.
# This tag requires that the tag USE_MATHJAX is set to YES.
MATHJAX_RELPATH = http://dsplib.org/mathjax/latest
MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest
# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax
# extension names that should be enabled during MathJax rendering. For example
@ -1872,9 +1896,11 @@ LATEX_EXTRA_FILES =
PDF_HYPERLINKS = YES
# If the USE_PDFLATEX tag is set to YES, doxygen will use pdflatex to generate
# the PDF file directly from the LaTeX files. Set this option to YES, to get a
# higher quality PDF documentation.
# If the USE_PDFLATEX tag is set to YES, doxygen will use the engine as
# specified with LATEX_CMD_NAME to generate the PDF file directly from the LaTeX
# files. Set this option to YES, to get a higher quality PDF documentation.
#
# See also section LATEX_CMD_NAME for selecting the engine.
# The default value is: YES.
# This tag requires that the tag GENERATE_LATEX is set to YES.
@ -2113,6 +2139,10 @@ DOCBOOK_PROGRAMLISTING = NO
GENERATE_AUTOGEN_DEF = NO
#---------------------------------------------------------------------------
# Configuration options related to Sqlite3 output
#---------------------------------------------------------------------------
#---------------------------------------------------------------------------
# Configuration options related to the Perl module output
#---------------------------------------------------------------------------

Wyświetl plik

@ -27,7 +27,12 @@ $generatedby &#160;<a href="http://www.doxygen.org/index.html">
<a href="http://en.dsplib.org/forum">Forum</a>
</li>
<div style = "float: right;">
<!--LiveInternet logo--><a href="//www.liveinternet.ru/click"
target="_blank"><img src="//counter.yadro.ru/logo?16.2"
title="LiveInternet: Shows the number of views in 24 hours, visitors in 24 hours and today"
alt="" style="border:0" width="88" height="31"/></a><!--/LiveInternet-->
</div>
</ul>
<HR>

Wyświetl plik

@ -102,7 +102,7 @@ screen.colorDepth:screen.pixelDepth))+";u"+escape(document.URL)+
<li><a href="http://en.dsplib.org/dspl">DSPL&ndash;2.0</a></li>
<li><a href="http://en.dsplib.org/forum">Forum</a></li>
<li class = "lang-link">
<div class = "lang-link-en"> <a href="http://ru.dsplib.org/dspl">Русский</a> </div>
<div class = "lang-link-en"> <a href="https://ru.dsplib.org/dspl">Русский</a> </div>
</li>
</ul>
</nav>

Wyświetl plik

@ -30,16 +30,22 @@
/* sqrt(2^31) */
#define FFT_COMPOSITE_MAX 46340
/* FFT kernel */
int fft_krn(complex_t* t0, complex_t* t1, fft_t* p, int n, int addr);
/* DFT 2 points */
void dft2 (complex_t *x, complex_t* y);
/* DFT 3 points */
#define DFT3_W 0.866025403784439
void dft3 (complex_t *x, complex_t* y);
/* DFT 4 points */
void dft4 (complex_t *x, complex_t* y);
/* DFT 5 points */
#define DFT5_W1 -1.250000000000000
#define DFT5_W2 0.559016994374947
#define DFT5_W3 1.538841768587630
@ -47,6 +53,8 @@ void dft4 (complex_t *x, complex_t* y);
#define DFT5_W5 0.363271264002680
void dft5 (complex_t *x, complex_t* y);
/* DFT 7 points */
#define DFT7_W1 -1.166666666666666518636930
#define DFT7_W2 0.790156468525400224045541
#define DFT7_W3 0.055854267289647742400494
@ -57,18 +65,49 @@ void dft5 (complex_t *x, complex_t* y);
#define DFT7_W8 -0.874842290961656665615465
void dft7 (complex_t *x, complex_t* y);
/* DFT 8 points */
#define DFT8_W 0.707106781186548
void dft8 (complex_t *x, complex_t* y);
void transpose2x4(complex_t *x, complex_t* y);
void transpose4x2(complex_t *x, complex_t* y);
/* DFT 16 points */
#define DFT16_W1 0.923879532511287
#define DFT16_W2 0.382683432365090
#define DFT16_W3 0.707106781186548
void dft16 (complex_t *x, complex_t* y);
void transpose4x4(complex_t *x, complex_t* y);
/* DFT 32 points */
void dft32(complex_t *x, complex_t* y, complex_t* w);
void transpose8x4(complex_t *x, complex_t* y);
void transpose4x8(complex_t *x, complex_t* y);
/* DFT 64 points */
void dft64(complex_t *x, complex_t* y, complex_t* w);
void transpose8x8(complex_t *x, complex_t* y);
/* DFT 128 points */
void dft128(complex_t *x, complex_t* y, complex_t* w);
/* DFT 256 points */
void dft256(complex_t *x, complex_t* y, complex_t* w);
void transpose16x16(complex_t* x, complex_t* y);
/* DFT 512 points */
void dft512(complex_t *x, complex_t* y, complex_t* w, complex_t* w32);
/* DFT 1024 points */
void dft1024(complex_t *x, complex_t* y, complex_t* w, complex_t* w32);
/* DFT 2048 points */
void dft2048(complex_t *x, complex_t* y, complex_t* w,
complex_t* w32, complex_t* w64);
/* DFT 4096 points */
void dft4096(complex_t *x, complex_t* y, complex_t* w, complex_t* w256);
/* Window functions */
int win_bartlett (double *w, int n, int win_type);

Wyświetl plik

@ -675,13 +675,21 @@ int fft_krn(complex_t* t0, complex_t* t1, fft_t* p, int n, int addr)
complex_t tmp;
n1 = 1;
if(n%16== 0) { n1 = 16; goto label_size; }
if(n%7 == 0) { n1 = 7; goto label_size; }
if(n%8 == 0) { n1 = 8; goto label_size; }
if(n%5 == 0) { n1 = 5; goto label_size; }
if(n%4 == 0) { n1 = 4; goto label_size; }
if(n%3 == 0) { n1 = 3; goto label_size; }
if(n%2 == 0) { n1 = 2; goto label_size; }
if(n % 4096 == 0) { n1 = 4096; goto label_size; }
if(n % 2048 == 0) { n1 = 2048; goto label_size; }
if(n % 1024 == 0) { n1 = 1024; goto label_size; }
if(n % 512 == 0) { n1 = 512; goto label_size; }
if(n % 256 == 0) { n1 = 256; goto label_size; }
if(n % 128 == 0) { n1 = 128; goto label_size; }
if(n % 64 == 0) { n1 = 64; goto label_size; }
if(n % 32 == 0) { n1 = 32; goto label_size; }
if(n % 16 == 0) { n1 = 16; goto label_size; }
if(n % 7 == 0) { n1 = 7; goto label_size; }
if(n % 8 == 0) { n1 = 8; goto label_size; }
if(n % 5 == 0) { n1 = 5; goto label_size; }
if(n % 4 == 0) { n1 = 4; goto label_size; }
if(n % 3 == 0) { n1 = 3; goto label_size; }
if(n % 2 == 0) { n1 = 2; goto label_size; }
label_size:
if(n1 == 1)
@ -709,6 +717,38 @@ label_size:
matrix_transpose_cmplx(t1, n2, n1, t0);
}
if(n1 == 4096)
for(k = 0; k < n2; k++)
dft4096(t0+4096*k, t1+4096*k, p->w4096, p->w256);
if(n1 == 2048)
for(k = 0; k < n2; k++)
dft2048(t0+2048*k, t1+2048*k, p->w2048, p->w32, p->w64);
if(n1 == 1024)
for(k = 0; k < n2; k++)
dft1024(t0+1024*k, t1+1024*k, p->w1024, p->w32);
if(n1 == 512)
for(k = 0; k < n2; k++)
dft512(t0+512*k, t1+512*k, p->w512, p->w32);
if(n1 == 256)
for(k = 0; k < n2; k++)
dft256(t0+256*k, t1+256*k, p->w256);
if(n1 == 128)
for(k = 0; k < n2; k++)
dft128(t0+128*k, t1+128*k, p->w128);
if(n1 == 64)
for(k = 0; k < n2; k++)
dft64(t0+64*k, t1+64*k, p->w64);
if(n1 == 32)
for(k = 0; k < n2; k++)
dft32(t0+32*k, t1+32*k, p->w32);
if(n1 == 16)
for(k = 0; k < n2; k++)
dft16(t0+16*k, t1+16*k);
@ -752,6 +792,7 @@ label_size:
{
fft_krn(t1+k*n2, t0+k*n2, p, n2, addr+n);
}
matrix_transpose_cmplx(t0, n2, n1, t1);
}
}
@ -903,13 +944,21 @@ int DSPL_API fft_create(fft_t* pfft, int n)
while(s > 1)
{
n2 = 1;
if(s%16== 0) { n2 = 16; goto label_size; }
if(s%7 == 0) { n2 = 7; goto label_size; }
if(s%8 == 0) { n2 = 8; goto label_size; }
if(s%5 == 0) { n2 = 5; goto label_size; }
if(s%4 == 0) { n2 = 4; goto label_size; }
if(s%3 == 0) { n2 = 3; goto label_size; }
if(s%2 == 0) { n2 = 2; goto label_size; }
if(s%4096 == 0) { n2 = 4096; goto label_size; }
if(s%2048 == 0) { n2 = 2048; goto label_size; }
if(s%1024 == 0) { n2 = 1024; goto label_size; }
if(s%512 == 0) { n2 = 512; goto label_size; }
if(s%256 == 0) { n2 = 256; goto label_size; }
if(s%128 == 0) { n2 = 128; goto label_size; }
if(s% 64 == 0) { n2 = 64; goto label_size; }
if(s% 32 == 0) { n2 = 32; goto label_size; }
if(s% 16 == 0) { n2 = 16; goto label_size; }
if(s% 7 == 0) { n2 = 7; goto label_size; }
if(s% 8 == 0) { n2 = 8; goto label_size; }
if(s% 5 == 0) { n2 = 5; goto label_size; }
if(s% 4 == 0) { n2 = 4; goto label_size; }
if(s% 3 == 0) { n2 = 3; goto label_size; }
if(s% 2 == 0) { n2 = 2; goto label_size; }
label_size:
@ -962,6 +1011,123 @@ label_size:
pfft->t1 = pfft->t1 ? (complex_t*) realloc(pfft->t1, n*sizeof(complex_t)):
(complex_t*) malloc( n*sizeof(complex_t));
pfft->n = n;
/* w32 fill */
addr = 0;
for(k = 0; k < 4; k++)
{
for(m = 0; m < 8; m++)
{
phi = - M_2PI * (double)(k*m) / 32.0;
RE(pfft->w32[addr]) = cos(phi);
IM(pfft->w32[addr]) = sin(phi);
addr++;
}
}
/* w64 fill */
addr = 0;
for(k = 0; k < 8; k++)
{
for(m = 0; m < 8; m++)
{
phi = - M_2PI * (double)(k*m) / 64.0;
RE(pfft->w64[addr]) = cos(phi);
IM(pfft->w64[addr]) = sin(phi);
addr++;
}
}
/* w128 fill */
addr = 0;
for(k = 0; k < 8; k++)
{
for(m = 0; m < 16; m++)
{
phi = - M_2PI * (double)(k*m) / 128.0;
RE(pfft->w128[addr]) = cos(phi);
IM(pfft->w128[addr]) = sin(phi);
addr++;
}
}
/* w256 fill */
addr = 0;
for(k = 0; k < 16; k++)
{
for(m = 0; m < 16; m++)
{
phi = - M_2PI * (double)(k*m) / 256.0;
RE(pfft->w256[addr]) = cos(phi);
IM(pfft->w256[addr]) = sin(phi);
addr++;
}
}
/* w512 fill */
addr = 0;
for(k = 0; k < 16; k++)
{
for(m = 0; m < 32; m++)
{
phi = - M_2PI * (double)(k*m) / 512.0;
RE(pfft->w512[addr]) = cos(phi);
IM(pfft->w512[addr]) = sin(phi);
addr++;
}
}
/* w1024 fill */
if(pfft->w1024 == NULL)
{
pfft->w1024 = (complex_t*) malloc(1024 * sizeof(complex_t));
addr = 0;
for(k = 0; k < 32; k++)
{
for(m = 0; m < 32; m++)
{
phi = - M_2PI * (double)(k*m) / 1024.0;
RE(pfft->w1024[addr]) = cos(phi);
IM(pfft->w1024[addr]) = sin(phi);
addr++;
}
}
}
/* w2048 fill */
if(pfft->w2048 == NULL)
{
pfft->w2048= (complex_t*) malloc(2048 * sizeof(complex_t));
addr = 0;
for(k = 0; k < 32; k++)
{
for(m = 0; m < 64; m++)
{
phi = - M_2PI * (double)(k*m) / 2048.0;
RE(pfft->w2048[addr]) = cos(phi);
IM(pfft->w2048[addr]) = sin(phi);
addr++;
}
}
}
/* w4096 fill */
if(pfft->w4096 == NULL)
{
pfft->w4096= (complex_t*) malloc(4096 * sizeof(complex_t));
addr = 0;
for(k = 0; k < 16; k++)
{
for(m = 0; m < 256; m++)
{
phi = - M_2PI * (double)(k*m) / 4096.0;
RE(pfft->w4096[addr]) = cos(phi);
IM(pfft->w4096[addr]) = sin(phi);
addr++;
}
}
}
return RES_OK;
error_proc:
@ -1016,6 +1182,16 @@ void DSPL_API fft_free(fft_t *pfft)
free(pfft->t0);
if(pfft->t1)
free(pfft->t1);
if(pfft->w1024)
free(pfft->w1024);
if(pfft->w2048)
free(pfft->w2048);
if(pfft->w4096)
free(pfft->w4096);
memset(pfft, 0, sizeof(fft_t));
}

Wyświetl plik

@ -26,10 +26,6 @@
/*******************************************************************************
2 points DFT
*******************************************************************************/
@ -319,7 +315,6 @@ void dft7 (complex_t *x, complex_t* y)
RE(y[6]) = RE(sum[20]) - RE(sum[26]);
IM(y[6]) = IM(sum[20]) - IM(sum[26]);
}
@ -455,6 +450,263 @@ void dft16(complex_t *x, complex_t* y)
}
/*******************************************************************************
32 points DFT (Winograd algorithm)
*******************************************************************************/
void dft32(complex_t *x, complex_t* y, complex_t* w)
{
complex_t t0[32];
complex_t t1[32];
int i;
transpose4x8(x, t0);
dft8(t0, t1);
dft8(t0+8, t1+8);
dft8(t0+16, t1+16);
dft8(t0+24, t1+24);
for(i = 0; i < 32; i++)
{
RE(t0[i]) = CMRE(t1[i], w[i]);
IM(t0[i]) = CMIM(t1[i], w[i]);
}
transpose8x4(t0, t1);
for(i = 0; i < 8; i++)
dft4(t1 + i*4, t0 + i*4);
transpose4x8(t0, y);
}
/*******************************************************************************
64 points DFT (Winograd algorithm)
*******************************************************************************/
void dft64(complex_t *x, complex_t* y, complex_t* w)
{
complex_t t0[64];
complex_t t1[64];
int i;
transpose8x8(x, t0);
for(i = 0; i < 8; i++)
dft8(t0 + i*8, t1 + i*8);
for(i = 0; i < 64; i++)
{
RE(t0[i]) = CMRE(t1[i], w[i]);
IM(t0[i]) = CMIM(t1[i], w[i]);
}
transpose8x8(t0, t1);
for(i = 0; i < 8; i++)
dft8(t1 + i*8, t0 + i*8);
transpose8x8(t0, y);
}
/*******************************************************************************
256 points DFT (Winograd algorithm)
*******************************************************************************/
void dft128(complex_t *x, complex_t* y, complex_t* w)
{
complex_t t0[128];
complex_t t1[128];
int i;
matrix_transpose_cmplx(x,8,16,t0);
for(i = 0; i < 8; i++)
dft16(t0 + i*16, t1 + i*16);
for(i = 0; i < 128; i++)
{
RE(t0[i]) = CMRE(t1[i], w[i]);
IM(t0[i]) = CMIM(t1[i], w[i]);
}
matrix_transpose_cmplx(t0, 16, 8, t1);
for(i = 0; i < 16; i++)
dft8(t1 + i*8, t0 + i*8);
matrix_transpose_cmplx(t0, 8, 16, y);
}
/*******************************************************************************
256 points DFT (Winograd algorithm)
*******************************************************************************/
void dft256(complex_t *x, complex_t* y, complex_t* w)
{
complex_t t0[256];
complex_t t1[256];
int i;
transpose16x16(x, t0);
for(i = 0; i < 16; i++)
dft16(t0 + i*16, t1 + i*16);
for(i = 0; i < 256; i++)
{
RE(t0[i]) = CMRE(t1[i], w[i]);
IM(t0[i]) = CMIM(t1[i], w[i]);
}
transpose16x16(t0, t1);
for(i = 0; i < 16; i++)
dft16(t1 + i*16, t0 + i*16);
transpose16x16(t0, y);
}
/*******************************************************************************
512 points DFT (Winograd algorithm)
*******************************************************************************/
void dft512(complex_t *x, complex_t* y, complex_t* w, complex_t* w32)
{
complex_t t0[512];
complex_t t1[512];
int i;
matrix_transpose_cmplx(x,16,32,t0);
for(i = 0; i < 16; i++)
dft32(t0 + i*32, t1 + i*32, w32);
for(i = 0; i < 512; i++)
{
RE(t0[i]) = CMRE(t1[i], w[i]);
IM(t0[i]) = CMIM(t1[i], w[i]);
}
matrix_transpose_cmplx(t0, 32, 16, t1);
for(i = 0; i < 32; i++)
dft16(t1 + i*16, t0 + i*16);
matrix_transpose_cmplx(t0, 16, 32, y);
}
/*******************************************************************************
1024 points DFT (Winograd algorithm)
*******************************************************************************/
void dft1024(complex_t *x, complex_t* y, complex_t* w, complex_t* w32)
{
complex_t t0[1024];
complex_t t1[1024];
int i;
matrix_transpose_cmplx(x,32,32,t0);
for(i = 0; i < 32; i++)
dft32(t0 + i*32, t1 + i*32, w32);
for(i = 0; i < 1024; i++)
{
RE(t0[i]) = CMRE(t1[i], w[i]);
IM(t0[i]) = CMIM(t1[i], w[i]);
}
matrix_transpose_cmplx(t0, 32, 32, t1);
for(i = 0; i < 32; i++)
dft32(t1 + i*32, t0 + i*32, w32);
matrix_transpose_cmplx(t0, 32, 32, y);
}
/*******************************************************************************
2048 points DFT (Winograd algorithm)
*******************************************************************************/
void dft2048(complex_t *x, complex_t* y, complex_t* w,
complex_t* w32, complex_t* w64)
{
complex_t *t0 = NULL;
complex_t *t1 = NULL;
int i;
t0 = (complex_t*)malloc(2048*sizeof(complex_t));
t1 = (complex_t*)malloc(2048*sizeof(complex_t));
matrix_transpose_cmplx(x,32,64,t0);
for(i = 0; i < 32; i++)
dft64(t0 + i*64, t1 + i*64, w64);
for(i = 0; i < 2048; i++)
{
RE(t0[i]) = CMRE(t1[i], w[i]);
IM(t0[i]) = CMIM(t1[i], w[i]);
}
matrix_transpose_cmplx(t0, 64, 32, t1);
for(i = 0; i < 64; i++)
dft32(t1 + i*32, t0 + i*32, w32);
matrix_transpose_cmplx(t0, 32, 64, y);
free(t0);
free(t1);
}
/*******************************************************************************
4096 points DFT (Winograd algorithm)
*******************************************************************************/
void dft4096(complex_t *x, complex_t* y, complex_t* w, complex_t* w256)
{
complex_t *t0 = NULL;
complex_t *t1 = NULL;
int i;
t0 = (complex_t*)malloc(4096*sizeof(complex_t));
t1 = (complex_t*)malloc(4096*sizeof(complex_t));
matrix_transpose_cmplx(x,16,256,t0);
for(i = 0; i < 16; i++)
dft256(t0 + i*256, t1 + i*256, w256);
for(i = 0; i < 4096; i++)
{
RE(t0[i]) = CMRE(t1[i], w[i]);
IM(t0[i]) = CMIM(t1[i], w[i]);
}
matrix_transpose_cmplx(t0, 256, 16, t1);
for(i = 0; i < 256; i++)
dft16(t1 + i*16, t0 + i*16);
matrix_transpose_cmplx(t0, 16, 256, y);
free(t0);
free(t1);
}
/*******************************************************************************
4 x 2 matrix transpose
*******************************************************************************/
@ -512,3 +764,420 @@ void transpose4x4(complex_t *x, complex_t* y)
}
/*******************************************************************************
8 x 4 matrix transpose
*******************************************************************************/
void transpose8x4(complex_t *x, complex_t* y)
{
RE(y[ 0]) = RE(x[ 0]); IM(y[ 0]) = IM(x[ 0]);
RE(y[ 1]) = RE(x[ 8]); IM(y[ 1]) = IM(x[ 8]);
RE(y[ 2]) = RE(x[ 16]); IM(y[ 2]) = IM(x[ 16]);
RE(y[ 3]) = RE(x[ 24]); IM(y[ 3]) = IM(x[ 24]);
RE(y[ 4]) = RE(x[ 1]); IM(y[ 4]) = IM(x[ 1]);
RE(y[ 5]) = RE(x[ 9]); IM(y[ 5]) = IM(x[ 9]);
RE(y[ 6]) = RE(x[ 17]); IM(y[ 6]) = IM(x[ 17]);
RE(y[ 7]) = RE(x[ 25]); IM(y[ 7]) = IM(x[ 25]);
RE(y[ 8]) = RE(x[ 2]); IM(y[ 8]) = IM(x[ 2]);
RE(y[ 9]) = RE(x[ 10]); IM(y[ 9]) = IM(x[ 10]);
RE(y[10]) = RE(x[ 18]); IM(y[10]) = IM(x[ 18]);
RE(y[11]) = RE(x[ 26]); IM(y[11]) = IM(x[ 26]);
RE(y[12]) = RE(x[ 3]); IM(y[12]) = IM(x[ 3]);
RE(y[13]) = RE(x[ 11]); IM(y[13]) = IM(x[ 11]);
RE(y[14]) = RE(x[ 19]); IM(y[14]) = IM(x[ 19]);
RE(y[15]) = RE(x[ 27]); IM(y[15]) = IM(x[ 27]);
RE(y[16]) = RE(x[ 4]); IM(y[16]) = IM(x[ 4]);
RE(y[17]) = RE(x[ 12]); IM(y[17]) = IM(x[ 12]);
RE(y[18]) = RE(x[ 20]); IM(y[18]) = IM(x[ 20]);
RE(y[19]) = RE(x[ 28]); IM(y[19]) = IM(x[ 28]);
RE(y[20]) = RE(x[ 5]); IM(y[20]) = IM(x[ 5]);
RE(y[21]) = RE(x[ 13]); IM(y[21]) = IM(x[ 13]);
RE(y[22]) = RE(x[ 21]); IM(y[22]) = IM(x[ 21]);
RE(y[23]) = RE(x[ 29]); IM(y[23]) = IM(x[ 29]);
RE(y[24]) = RE(x[ 6]); IM(y[24]) = IM(x[ 6]);
RE(y[25]) = RE(x[ 14]); IM(y[25]) = IM(x[ 14]);
RE(y[26]) = RE(x[ 22]); IM(y[26]) = IM(x[ 22]);
RE(y[27]) = RE(x[ 30]); IM(y[27]) = IM(x[ 30]);
RE(y[28]) = RE(x[ 7]); IM(y[28]) = IM(x[ 7]);
RE(y[29]) = RE(x[ 15]); IM(y[29]) = IM(x[ 15]);
RE(y[30]) = RE(x[ 23]); IM(y[30]) = IM(x[ 23]);
RE(y[31]) = RE(x[ 31]); IM(y[31]) = IM(x[ 31]);
}
/*******************************************************************************
4 x 8 matrix transpose
*******************************************************************************/
void transpose4x8(complex_t *x, complex_t* y)
{
RE(y[ 0]) = RE(x[ 0]); IM(y[ 0]) = IM(x[ 0]);
RE(y[ 1]) = RE(x[ 4]); IM(y[ 1]) = IM(x[ 4]);
RE(y[ 2]) = RE(x[ 8]); IM(y[ 2]) = IM(x[ 8]);
RE(y[ 3]) = RE(x[ 12]); IM(y[ 3]) = IM(x[ 12]);
RE(y[ 4]) = RE(x[ 16]); IM(y[ 4]) = IM(x[ 16]);
RE(y[ 5]) = RE(x[ 20]); IM(y[ 5]) = IM(x[ 20]);
RE(y[ 6]) = RE(x[ 24]); IM(y[ 6]) = IM(x[ 24]);
RE(y[ 7]) = RE(x[ 28]); IM(y[ 7]) = IM(x[ 28]);
RE(y[ 8]) = RE(x[ 1]); IM(y[ 8]) = IM(x[ 1]);
RE(y[ 9]) = RE(x[ 5]); IM(y[ 9]) = IM(x[ 5]);
RE(y[10]) = RE(x[ 9]); IM(y[10]) = IM(x[ 9]);
RE(y[11]) = RE(x[ 13]); IM(y[11]) = IM(x[ 13]);
RE(y[12]) = RE(x[ 17]); IM(y[12]) = IM(x[ 17]);
RE(y[13]) = RE(x[ 21]); IM(y[13]) = IM(x[ 21]);
RE(y[14]) = RE(x[ 25]); IM(y[14]) = IM(x[ 25]);
RE(y[15]) = RE(x[ 29]); IM(y[15]) = IM(x[ 29]);
RE(y[16]) = RE(x[ 2]); IM(y[16]) = IM(x[ 2]);
RE(y[17]) = RE(x[ 6]); IM(y[17]) = IM(x[ 6]);
RE(y[18]) = RE(x[ 10]); IM(y[18]) = IM(x[ 10]);
RE(y[19]) = RE(x[ 14]); IM(y[19]) = IM(x[ 14]);
RE(y[20]) = RE(x[ 18]); IM(y[20]) = IM(x[ 18]);
RE(y[21]) = RE(x[ 22]); IM(y[21]) = IM(x[ 22]);
RE(y[22]) = RE(x[ 26]); IM(y[22]) = IM(x[ 26]);
RE(y[23]) = RE(x[ 30]); IM(y[23]) = IM(x[ 30]);
RE(y[24]) = RE(x[ 3]); IM(y[24]) = IM(x[ 3]);
RE(y[25]) = RE(x[ 7]); IM(y[25]) = IM(x[ 7]);
RE(y[26]) = RE(x[ 11]); IM(y[26]) = IM(x[ 11]);
RE(y[27]) = RE(x[ 15]); IM(y[27]) = IM(x[ 15]);
RE(y[28]) = RE(x[ 19]); IM(y[28]) = IM(x[ 19]);
RE(y[29]) = RE(x[ 23]); IM(y[29]) = IM(x[ 23]);
RE(y[30]) = RE(x[ 27]); IM(y[30]) = IM(x[ 27]);
RE(y[31]) = RE(x[ 31]); IM(y[31]) = IM(x[ 31]);
}
/*******************************************************************************
4 x 8 matrix transpose
*******************************************************************************/
void transpose8x8(complex_t *x, complex_t* y)
{
RE(y[ 0]) = RE(x[ 0]); IM(y[ 0]) = IM(x[ 0]);
RE(y[ 1]) = RE(x[ 8]); IM(y[ 1]) = IM(x[ 8]);
RE(y[ 2]) = RE(x[16]); IM(y[ 2]) = IM(x[16]);
RE(y[ 3]) = RE(x[24]); IM(y[ 3]) = IM(x[24]);
RE(y[ 4]) = RE(x[32]); IM(y[ 4]) = IM(x[32]);
RE(y[ 5]) = RE(x[40]); IM(y[ 5]) = IM(x[40]);
RE(y[ 6]) = RE(x[48]); IM(y[ 6]) = IM(x[48]);
RE(y[ 7]) = RE(x[56]); IM(y[ 7]) = IM(x[56]);
RE(y[ 8]) = RE(x[ 1]); IM(y[ 8]) = IM(x[ 1]);
RE(y[ 9]) = RE(x[ 9]); IM(y[ 9]) = IM(x[ 9]);
RE(y[ 10]) = RE(x[17]); IM(y[ 10]) = IM(x[17]);
RE(y[ 11]) = RE(x[25]); IM(y[ 11]) = IM(x[25]);
RE(y[ 12]) = RE(x[33]); IM(y[ 12]) = IM(x[33]);
RE(y[ 13]) = RE(x[41]); IM(y[ 13]) = IM(x[41]);
RE(y[ 14]) = RE(x[49]); IM(y[ 14]) = IM(x[49]);
RE(y[ 15]) = RE(x[57]); IM(y[ 15]) = IM(x[57]);
RE(y[ 16]) = RE(x[ 2]); IM(y[ 16]) = IM(x[ 2]);
RE(y[ 17]) = RE(x[10]); IM(y[ 17]) = IM(x[10]);
RE(y[ 18]) = RE(x[18]); IM(y[ 18]) = IM(x[18]);
RE(y[ 19]) = RE(x[26]); IM(y[ 19]) = IM(x[26]);
RE(y[ 20]) = RE(x[34]); IM(y[ 20]) = IM(x[34]);
RE(y[ 21]) = RE(x[42]); IM(y[ 21]) = IM(x[42]);
RE(y[ 22]) = RE(x[50]); IM(y[ 22]) = IM(x[50]);
RE(y[ 23]) = RE(x[58]); IM(y[ 23]) = IM(x[58]);
RE(y[ 24]) = RE(x[ 3]); IM(y[ 24]) = IM(x[ 3]);
RE(y[ 25]) = RE(x[11]); IM(y[ 25]) = IM(x[11]);
RE(y[ 26]) = RE(x[19]); IM(y[ 26]) = IM(x[19]);
RE(y[ 27]) = RE(x[27]); IM(y[ 27]) = IM(x[27]);
RE(y[ 28]) = RE(x[35]); IM(y[ 28]) = IM(x[35]);
RE(y[ 29]) = RE(x[43]); IM(y[ 29]) = IM(x[43]);
RE(y[ 30]) = RE(x[51]); IM(y[ 30]) = IM(x[51]);
RE(y[ 31]) = RE(x[59]); IM(y[ 31]) = IM(x[59]);
RE(y[ 32]) = RE(x[ 4]); IM(y[ 32]) = IM(x[ 4]);
RE(y[ 33]) = RE(x[12]); IM(y[ 33]) = IM(x[12]);
RE(y[ 34]) = RE(x[20]); IM(y[ 34]) = IM(x[20]);
RE(y[ 35]) = RE(x[28]); IM(y[ 35]) = IM(x[28]);
RE(y[ 36]) = RE(x[36]); IM(y[ 36]) = IM(x[36]);
RE(y[ 37]) = RE(x[44]); IM(y[ 37]) = IM(x[44]);
RE(y[ 38]) = RE(x[52]); IM(y[ 38]) = IM(x[52]);
RE(y[ 39]) = RE(x[60]); IM(y[ 39]) = IM(x[60]);
RE(y[ 40]) = RE(x[ 5]); IM(y[ 40]) = IM(x[ 5]);
RE(y[ 41]) = RE(x[13]); IM(y[ 41]) = IM(x[13]);
RE(y[ 42]) = RE(x[21]); IM(y[ 42]) = IM(x[21]);
RE(y[ 43]) = RE(x[29]); IM(y[ 43]) = IM(x[29]);
RE(y[ 44]) = RE(x[37]); IM(y[ 44]) = IM(x[37]);
RE(y[ 45]) = RE(x[45]); IM(y[ 45]) = IM(x[45]);
RE(y[ 46]) = RE(x[53]); IM(y[ 46]) = IM(x[53]);
RE(y[ 47]) = RE(x[61]); IM(y[ 47]) = IM(x[61]);
RE(y[ 48]) = RE(x[ 6]); IM(y[ 48]) = IM(x[ 6]);
RE(y[ 49]) = RE(x[14]); IM(y[ 49]) = IM(x[14]);
RE(y[ 50]) = RE(x[22]); IM(y[ 50]) = IM(x[22]);
RE(y[ 51]) = RE(x[30]); IM(y[ 51]) = IM(x[30]);
RE(y[ 52]) = RE(x[38]); IM(y[ 52]) = IM(x[38]);
RE(y[ 53]) = RE(x[46]); IM(y[ 53]) = IM(x[46]);
RE(y[ 54]) = RE(x[54]); IM(y[ 54]) = IM(x[54]);
RE(y[ 55]) = RE(x[62]); IM(y[ 55]) = IM(x[62]);
RE(y[ 56]) = RE(x[ 7]); IM(y[ 56]) = IM(x[ 7]);
RE(y[ 57]) = RE(x[15]); IM(y[ 57]) = IM(x[15]);
RE(y[ 58]) = RE(x[23]); IM(y[ 58]) = IM(x[23]);
RE(y[ 59]) = RE(x[31]); IM(y[ 59]) = IM(x[31]);
RE(y[ 60]) = RE(x[39]); IM(y[ 60]) = IM(x[39]);
RE(y[ 61]) = RE(x[47]); IM(y[ 61]) = IM(x[47]);
RE(y[ 62]) = RE(x[55]); IM(y[ 62]) = IM(x[55]);
RE(y[ 63]) = RE(x[63]); IM(y[ 63]) = IM(x[63]);
}
/*******************************************************************************
16 x 16 matrix transpose
*******************************************************************************/
void transpose16x16(complex_t* x, complex_t* y)
{
RE(y[ 0]) = RE(x[ 0]); IM(y[ 0]) = IM(x[ 0]);
RE(y[ 1]) = RE(x[ 16]); IM(y[ 1]) = IM(x[ 16]);
RE(y[ 2]) = RE(x[ 32]); IM(y[ 2]) = IM(x[ 32]);
RE(y[ 3]) = RE(x[ 48]); IM(y[ 3]) = IM(x[ 48]);
RE(y[ 4]) = RE(x[ 64]); IM(y[ 4]) = IM(x[ 64]);
RE(y[ 5]) = RE(x[ 80]); IM(y[ 5]) = IM(x[ 80]);
RE(y[ 6]) = RE(x[ 96]); IM(y[ 6]) = IM(x[ 96]);
RE(y[ 7]) = RE(x[112]); IM(y[ 7]) = IM(x[112]);
RE(y[ 8]) = RE(x[128]); IM(y[ 8]) = IM(x[128]);
RE(y[ 9]) = RE(x[144]); IM(y[ 9]) = IM(x[144]);
RE(y[ 10]) = RE(x[160]); IM(y[ 10]) = IM(x[160]);
RE(y[ 11]) = RE(x[176]); IM(y[ 11]) = IM(x[176]);
RE(y[ 12]) = RE(x[192]); IM(y[ 12]) = IM(x[192]);
RE(y[ 13]) = RE(x[208]); IM(y[ 13]) = IM(x[208]);
RE(y[ 14]) = RE(x[224]); IM(y[ 14]) = IM(x[224]);
RE(y[ 15]) = RE(x[240]); IM(y[ 15]) = IM(x[240]);
RE(y[ 16]) = RE(x[ 1]); IM(y[ 16]) = IM(x[ 1]);
RE(y[ 17]) = RE(x[ 17]); IM(y[ 17]) = IM(x[ 17]);
RE(y[ 18]) = RE(x[ 33]); IM(y[ 18]) = IM(x[ 33]);
RE(y[ 19]) = RE(x[ 49]); IM(y[ 19]) = IM(x[ 49]);
RE(y[ 20]) = RE(x[ 65]); IM(y[ 20]) = IM(x[ 65]);
RE(y[ 21]) = RE(x[ 81]); IM(y[ 21]) = IM(x[ 81]);
RE(y[ 22]) = RE(x[ 97]); IM(y[ 22]) = IM(x[ 97]);
RE(y[ 23]) = RE(x[113]); IM(y[ 23]) = IM(x[113]);
RE(y[ 24]) = RE(x[129]); IM(y[ 24]) = IM(x[129]);
RE(y[ 25]) = RE(x[145]); IM(y[ 25]) = IM(x[145]);
RE(y[ 26]) = RE(x[161]); IM(y[ 26]) = IM(x[161]);
RE(y[ 27]) = RE(x[177]); IM(y[ 27]) = IM(x[177]);
RE(y[ 28]) = RE(x[193]); IM(y[ 28]) = IM(x[193]);
RE(y[ 29]) = RE(x[209]); IM(y[ 29]) = IM(x[209]);
RE(y[ 30]) = RE(x[225]); IM(y[ 30]) = IM(x[225]);
RE(y[ 31]) = RE(x[241]); IM(y[ 31]) = IM(x[241]);
RE(y[ 32]) = RE(x[ 2]); IM(y[ 32]) = IM(x[ 2]);
RE(y[ 33]) = RE(x[ 18]); IM(y[ 33]) = IM(x[ 18]);
RE(y[ 34]) = RE(x[ 34]); IM(y[ 34]) = IM(x[ 34]);
RE(y[ 35]) = RE(x[ 50]); IM(y[ 35]) = IM(x[ 50]);
RE(y[ 36]) = RE(x[ 66]); IM(y[ 36]) = IM(x[ 66]);
RE(y[ 37]) = RE(x[ 82]); IM(y[ 37]) = IM(x[ 82]);
RE(y[ 38]) = RE(x[ 98]); IM(y[ 38]) = IM(x[ 98]);
RE(y[ 39]) = RE(x[114]); IM(y[ 39]) = IM(x[114]);
RE(y[ 40]) = RE(x[130]); IM(y[ 40]) = IM(x[130]);
RE(y[ 41]) = RE(x[146]); IM(y[ 41]) = IM(x[146]);
RE(y[ 42]) = RE(x[162]); IM(y[ 42]) = IM(x[162]);
RE(y[ 43]) = RE(x[178]); IM(y[ 43]) = IM(x[178]);
RE(y[ 44]) = RE(x[194]); IM(y[ 44]) = IM(x[194]);
RE(y[ 45]) = RE(x[210]); IM(y[ 45]) = IM(x[210]);
RE(y[ 46]) = RE(x[226]); IM(y[ 46]) = IM(x[226]);
RE(y[ 47]) = RE(x[242]); IM(y[ 47]) = IM(x[242]);
RE(y[ 48]) = RE(x[ 3]); IM(y[ 48]) = IM(x[ 3]);
RE(y[ 49]) = RE(x[ 19]); IM(y[ 49]) = IM(x[ 19]);
RE(y[ 50]) = RE(x[ 35]); IM(y[ 50]) = IM(x[ 35]);
RE(y[ 51]) = RE(x[ 51]); IM(y[ 51]) = IM(x[ 51]);
RE(y[ 52]) = RE(x[ 67]); IM(y[ 52]) = IM(x[ 67]);
RE(y[ 53]) = RE(x[ 83]); IM(y[ 53]) = IM(x[ 83]);
RE(y[ 54]) = RE(x[ 99]); IM(y[ 54]) = IM(x[ 99]);
RE(y[ 55]) = RE(x[115]); IM(y[ 55]) = IM(x[115]);
RE(y[ 56]) = RE(x[131]); IM(y[ 56]) = IM(x[131]);
RE(y[ 57]) = RE(x[147]); IM(y[ 57]) = IM(x[147]);
RE(y[ 58]) = RE(x[163]); IM(y[ 58]) = IM(x[163]);
RE(y[ 59]) = RE(x[179]); IM(y[ 59]) = IM(x[179]);
RE(y[ 60]) = RE(x[195]); IM(y[ 60]) = IM(x[195]);
RE(y[ 61]) = RE(x[211]); IM(y[ 61]) = IM(x[211]);
RE(y[ 62]) = RE(x[227]); IM(y[ 62]) = IM(x[227]);
RE(y[ 63]) = RE(x[243]); IM(y[ 63]) = IM(x[243]);
RE(y[ 64]) = RE(x[ 4]); IM(y[ 64]) = IM(x[ 4]);
RE(y[ 65]) = RE(x[ 20]); IM(y[ 65]) = IM(x[ 20]);
RE(y[ 66]) = RE(x[ 36]); IM(y[ 66]) = IM(x[ 36]);
RE(y[ 67]) = RE(x[ 52]); IM(y[ 67]) = IM(x[ 52]);
RE(y[ 68]) = RE(x[ 68]); IM(y[ 68]) = IM(x[ 68]);
RE(y[ 69]) = RE(x[ 84]); IM(y[ 69]) = IM(x[ 84]);
RE(y[ 70]) = RE(x[100]); IM(y[ 70]) = IM(x[100]);
RE(y[ 71]) = RE(x[116]); IM(y[ 71]) = IM(x[116]);
RE(y[ 72]) = RE(x[132]); IM(y[ 72]) = IM(x[132]);
RE(y[ 73]) = RE(x[148]); IM(y[ 73]) = IM(x[148]);
RE(y[ 74]) = RE(x[164]); IM(y[ 74]) = IM(x[164]);
RE(y[ 75]) = RE(x[180]); IM(y[ 75]) = IM(x[180]);
RE(y[ 76]) = RE(x[196]); IM(y[ 76]) = IM(x[196]);
RE(y[ 77]) = RE(x[212]); IM(y[ 77]) = IM(x[212]);
RE(y[ 78]) = RE(x[228]); IM(y[ 78]) = IM(x[228]);
RE(y[ 79]) = RE(x[244]); IM(y[ 79]) = IM(x[244]);
RE(y[ 80]) = RE(x[ 5]); IM(y[ 80]) = IM(x[ 5]);
RE(y[ 81]) = RE(x[ 21]); IM(y[ 81]) = IM(x[ 21]);
RE(y[ 82]) = RE(x[ 37]); IM(y[ 82]) = IM(x[ 37]);
RE(y[ 83]) = RE(x[ 53]); IM(y[ 83]) = IM(x[ 53]);
RE(y[ 84]) = RE(x[ 69]); IM(y[ 84]) = IM(x[ 69]);
RE(y[ 85]) = RE(x[ 85]); IM(y[ 85]) = IM(x[ 85]);
RE(y[ 86]) = RE(x[101]); IM(y[ 86]) = IM(x[101]);
RE(y[ 87]) = RE(x[117]); IM(y[ 87]) = IM(x[117]);
RE(y[ 88]) = RE(x[133]); IM(y[ 88]) = IM(x[133]);
RE(y[ 89]) = RE(x[149]); IM(y[ 89]) = IM(x[149]);
RE(y[ 90]) = RE(x[165]); IM(y[ 90]) = IM(x[165]);
RE(y[ 91]) = RE(x[181]); IM(y[ 91]) = IM(x[181]);
RE(y[ 92]) = RE(x[197]); IM(y[ 92]) = IM(x[197]);
RE(y[ 93]) = RE(x[213]); IM(y[ 93]) = IM(x[213]);
RE(y[ 94]) = RE(x[229]); IM(y[ 94]) = IM(x[229]);
RE(y[ 95]) = RE(x[245]); IM(y[ 95]) = IM(x[245]);
RE(y[ 96]) = RE(x[ 6]); IM(y[ 96]) = IM(x[ 6]);
RE(y[ 97]) = RE(x[ 22]); IM(y[ 97]) = IM(x[ 22]);
RE(y[ 98]) = RE(x[ 38]); IM(y[ 98]) = IM(x[ 38]);
RE(y[ 99]) = RE(x[ 54]); IM(y[ 99]) = IM(x[ 54]);
RE(y[100]) = RE(x[ 70]); IM(y[100]) = IM(x[ 70]);
RE(y[101]) = RE(x[ 86]); IM(y[101]) = IM(x[ 86]);
RE(y[102]) = RE(x[102]); IM(y[102]) = IM(x[102]);
RE(y[103]) = RE(x[118]); IM(y[103]) = IM(x[118]);
RE(y[104]) = RE(x[134]); IM(y[104]) = IM(x[134]);
RE(y[105]) = RE(x[150]); IM(y[105]) = IM(x[150]);
RE(y[106]) = RE(x[166]); IM(y[106]) = IM(x[166]);
RE(y[107]) = RE(x[182]); IM(y[107]) = IM(x[182]);
RE(y[108]) = RE(x[198]); IM(y[108]) = IM(x[198]);
RE(y[109]) = RE(x[214]); IM(y[109]) = IM(x[214]);
RE(y[110]) = RE(x[230]); IM(y[110]) = IM(x[230]);
RE(y[111]) = RE(x[246]); IM(y[111]) = IM(x[246]);
RE(y[112]) = RE(x[ 7]); IM(y[112]) = IM(x[ 7]);
RE(y[113]) = RE(x[ 23]); IM(y[113]) = IM(x[ 23]);
RE(y[114]) = RE(x[ 39]); IM(y[114]) = IM(x[ 39]);
RE(y[115]) = RE(x[ 55]); IM(y[115]) = IM(x[ 55]);
RE(y[116]) = RE(x[ 71]); IM(y[116]) = IM(x[ 71]);
RE(y[117]) = RE(x[ 87]); IM(y[117]) = IM(x[ 87]);
RE(y[118]) = RE(x[103]); IM(y[118]) = IM(x[103]);
RE(y[119]) = RE(x[119]); IM(y[119]) = IM(x[119]);
RE(y[120]) = RE(x[135]); IM(y[120]) = IM(x[135]);
RE(y[121]) = RE(x[151]); IM(y[121]) = IM(x[151]);
RE(y[122]) = RE(x[167]); IM(y[122]) = IM(x[167]);
RE(y[123]) = RE(x[183]); IM(y[123]) = IM(x[183]);
RE(y[124]) = RE(x[199]); IM(y[124]) = IM(x[199]);
RE(y[125]) = RE(x[215]); IM(y[125]) = IM(x[215]);
RE(y[126]) = RE(x[231]); IM(y[126]) = IM(x[231]);
RE(y[127]) = RE(x[247]); IM(y[127]) = IM(x[247]);
RE(y[128]) = RE(x[ 8]); IM(y[128]) = IM(x[ 8]);
RE(y[129]) = RE(x[ 24]); IM(y[129]) = IM(x[ 24]);
RE(y[130]) = RE(x[ 40]); IM(y[130]) = IM(x[ 40]);
RE(y[131]) = RE(x[ 56]); IM(y[131]) = IM(x[ 56]);
RE(y[132]) = RE(x[ 72]); IM(y[132]) = IM(x[ 72]);
RE(y[133]) = RE(x[ 88]); IM(y[133]) = IM(x[ 88]);
RE(y[134]) = RE(x[104]); IM(y[134]) = IM(x[104]);
RE(y[135]) = RE(x[120]); IM(y[135]) = IM(x[120]);
RE(y[136]) = RE(x[136]); IM(y[136]) = IM(x[136]);
RE(y[137]) = RE(x[152]); IM(y[137]) = IM(x[152]);
RE(y[138]) = RE(x[168]); IM(y[138]) = IM(x[168]);
RE(y[139]) = RE(x[184]); IM(y[139]) = IM(x[184]);
RE(y[140]) = RE(x[200]); IM(y[140]) = IM(x[200]);
RE(y[141]) = RE(x[216]); IM(y[141]) = IM(x[216]);
RE(y[142]) = RE(x[232]); IM(y[142]) = IM(x[232]);
RE(y[143]) = RE(x[248]); IM(y[143]) = IM(x[248]);
RE(y[144]) = RE(x[ 9]); IM(y[144]) = IM(x[ 9]);
RE(y[145]) = RE(x[ 25]); IM(y[145]) = IM(x[ 25]);
RE(y[146]) = RE(x[ 41]); IM(y[146]) = IM(x[ 41]);
RE(y[147]) = RE(x[ 57]); IM(y[147]) = IM(x[ 57]);
RE(y[148]) = RE(x[ 73]); IM(y[148]) = IM(x[ 73]);
RE(y[149]) = RE(x[ 89]); IM(y[149]) = IM(x[ 89]);
RE(y[150]) = RE(x[105]); IM(y[150]) = IM(x[105]);
RE(y[151]) = RE(x[121]); IM(y[151]) = IM(x[121]);
RE(y[152]) = RE(x[137]); IM(y[152]) = IM(x[137]);
RE(y[153]) = RE(x[153]); IM(y[153]) = IM(x[153]);
RE(y[154]) = RE(x[169]); IM(y[154]) = IM(x[169]);
RE(y[155]) = RE(x[185]); IM(y[155]) = IM(x[185]);
RE(y[156]) = RE(x[201]); IM(y[156]) = IM(x[201]);
RE(y[157]) = RE(x[217]); IM(y[157]) = IM(x[217]);
RE(y[158]) = RE(x[233]); IM(y[158]) = IM(x[233]);
RE(y[159]) = RE(x[249]); IM(y[159]) = IM(x[249]);
RE(y[160]) = RE(x[ 10]); IM(y[160]) = IM(x[ 10]);
RE(y[161]) = RE(x[ 26]); IM(y[161]) = IM(x[ 26]);
RE(y[162]) = RE(x[ 42]); IM(y[162]) = IM(x[ 42]);
RE(y[163]) = RE(x[ 58]); IM(y[163]) = IM(x[ 58]);
RE(y[164]) = RE(x[ 74]); IM(y[164]) = IM(x[ 74]);
RE(y[165]) = RE(x[ 90]); IM(y[165]) = IM(x[ 90]);
RE(y[166]) = RE(x[106]); IM(y[166]) = IM(x[106]);
RE(y[167]) = RE(x[122]); IM(y[167]) = IM(x[122]);
RE(y[168]) = RE(x[138]); IM(y[168]) = IM(x[138]);
RE(y[169]) = RE(x[154]); IM(y[169]) = IM(x[154]);
RE(y[170]) = RE(x[170]); IM(y[170]) = IM(x[170]);
RE(y[171]) = RE(x[186]); IM(y[171]) = IM(x[186]);
RE(y[172]) = RE(x[202]); IM(y[172]) = IM(x[202]);
RE(y[173]) = RE(x[218]); IM(y[173]) = IM(x[218]);
RE(y[174]) = RE(x[234]); IM(y[174]) = IM(x[234]);
RE(y[175]) = RE(x[250]); IM(y[175]) = IM(x[250]);
RE(y[176]) = RE(x[ 11]); IM(y[176]) = IM(x[ 11]);
RE(y[177]) = RE(x[ 27]); IM(y[177]) = IM(x[ 27]);
RE(y[178]) = RE(x[ 43]); IM(y[178]) = IM(x[ 43]);
RE(y[179]) = RE(x[ 59]); IM(y[179]) = IM(x[ 59]);
RE(y[180]) = RE(x[ 75]); IM(y[180]) = IM(x[ 75]);
RE(y[181]) = RE(x[ 91]); IM(y[181]) = IM(x[ 91]);
RE(y[182]) = RE(x[107]); IM(y[182]) = IM(x[107]);
RE(y[183]) = RE(x[123]); IM(y[183]) = IM(x[123]);
RE(y[184]) = RE(x[139]); IM(y[184]) = IM(x[139]);
RE(y[185]) = RE(x[155]); IM(y[185]) = IM(x[155]);
RE(y[186]) = RE(x[171]); IM(y[186]) = IM(x[171]);
RE(y[187]) = RE(x[187]); IM(y[187]) = IM(x[187]);
RE(y[188]) = RE(x[203]); IM(y[188]) = IM(x[203]);
RE(y[189]) = RE(x[219]); IM(y[189]) = IM(x[219]);
RE(y[190]) = RE(x[235]); IM(y[190]) = IM(x[235]);
RE(y[191]) = RE(x[251]); IM(y[191]) = IM(x[251]);
RE(y[192]) = RE(x[ 12]); IM(y[192]) = IM(x[ 12]);
RE(y[193]) = RE(x[ 28]); IM(y[193]) = IM(x[ 28]);
RE(y[194]) = RE(x[ 44]); IM(y[194]) = IM(x[ 44]);
RE(y[195]) = RE(x[ 60]); IM(y[195]) = IM(x[ 60]);
RE(y[196]) = RE(x[ 76]); IM(y[196]) = IM(x[ 76]);
RE(y[197]) = RE(x[ 92]); IM(y[197]) = IM(x[ 92]);
RE(y[198]) = RE(x[108]); IM(y[198]) = IM(x[108]);
RE(y[199]) = RE(x[124]); IM(y[199]) = IM(x[124]);
RE(y[200]) = RE(x[140]); IM(y[200]) = IM(x[140]);
RE(y[201]) = RE(x[156]); IM(y[201]) = IM(x[156]);
RE(y[202]) = RE(x[172]); IM(y[202]) = IM(x[172]);
RE(y[203]) = RE(x[188]); IM(y[203]) = IM(x[188]);
RE(y[204]) = RE(x[204]); IM(y[204]) = IM(x[204]);
RE(y[205]) = RE(x[220]); IM(y[205]) = IM(x[220]);
RE(y[206]) = RE(x[236]); IM(y[206]) = IM(x[236]);
RE(y[207]) = RE(x[252]); IM(y[207]) = IM(x[252]);
RE(y[208]) = RE(x[ 13]); IM(y[208]) = IM(x[ 13]);
RE(y[209]) = RE(x[ 29]); IM(y[209]) = IM(x[ 29]);
RE(y[210]) = RE(x[ 45]); IM(y[210]) = IM(x[ 45]);
RE(y[211]) = RE(x[ 61]); IM(y[211]) = IM(x[ 61]);
RE(y[212]) = RE(x[ 77]); IM(y[212]) = IM(x[ 77]);
RE(y[213]) = RE(x[ 93]); IM(y[213]) = IM(x[ 93]);
RE(y[214]) = RE(x[109]); IM(y[214]) = IM(x[109]);
RE(y[215]) = RE(x[125]); IM(y[215]) = IM(x[125]);
RE(y[216]) = RE(x[141]); IM(y[216]) = IM(x[141]);
RE(y[217]) = RE(x[157]); IM(y[217]) = IM(x[157]);
RE(y[218]) = RE(x[173]); IM(y[218]) = IM(x[173]);
RE(y[219]) = RE(x[189]); IM(y[219]) = IM(x[189]);
RE(y[220]) = RE(x[205]); IM(y[220]) = IM(x[205]);
RE(y[221]) = RE(x[221]); IM(y[221]) = IM(x[221]);
RE(y[222]) = RE(x[237]); IM(y[222]) = IM(x[237]);
RE(y[223]) = RE(x[253]); IM(y[223]) = IM(x[253]);
RE(y[224]) = RE(x[ 14]); IM(y[224]) = IM(x[ 14]);
RE(y[225]) = RE(x[ 30]); IM(y[225]) = IM(x[ 30]);
RE(y[226]) = RE(x[ 46]); IM(y[226]) = IM(x[ 46]);
RE(y[227]) = RE(x[ 62]); IM(y[227]) = IM(x[ 62]);
RE(y[228]) = RE(x[ 78]); IM(y[228]) = IM(x[ 78]);
RE(y[229]) = RE(x[ 94]); IM(y[229]) = IM(x[ 94]);
RE(y[230]) = RE(x[110]); IM(y[230]) = IM(x[110]);
RE(y[231]) = RE(x[126]); IM(y[231]) = IM(x[126]);
RE(y[232]) = RE(x[142]); IM(y[232]) = IM(x[142]);
RE(y[233]) = RE(x[158]); IM(y[233]) = IM(x[158]);
RE(y[234]) = RE(x[174]); IM(y[234]) = IM(x[174]);
RE(y[235]) = RE(x[190]); IM(y[235]) = IM(x[190]);
RE(y[236]) = RE(x[206]); IM(y[236]) = IM(x[206]);
RE(y[237]) = RE(x[222]); IM(y[237]) = IM(x[222]);
RE(y[238]) = RE(x[238]); IM(y[238]) = IM(x[238]);
RE(y[239]) = RE(x[254]); IM(y[239]) = IM(x[254]);
RE(y[240]) = RE(x[ 15]); IM(y[240]) = IM(x[ 15]);
RE(y[241]) = RE(x[ 31]); IM(y[241]) = IM(x[ 31]);
RE(y[242]) = RE(x[ 47]); IM(y[242]) = IM(x[ 47]);
RE(y[243]) = RE(x[ 63]); IM(y[243]) = IM(x[ 63]);
RE(y[244]) = RE(x[ 79]); IM(y[244]) = IM(x[ 79]);
RE(y[245]) = RE(x[ 95]); IM(y[245]) = IM(x[ 95]);
RE(y[246]) = RE(x[111]); IM(y[246]) = IM(x[111]);
RE(y[247]) = RE(x[127]); IM(y[247]) = IM(x[127]);
RE(y[248]) = RE(x[143]); IM(y[248]) = IM(x[143]);
RE(y[249]) = RE(x[159]); IM(y[249]) = IM(x[159]);
RE(y[250]) = RE(x[175]); IM(y[250]) = IM(x[175]);
RE(y[251]) = RE(x[191]); IM(y[251]) = IM(x[191]);
RE(y[252]) = RE(x[207]); IM(y[252]) = IM(x[207]);
RE(y[253]) = RE(x[223]); IM(y[253]) = IM(x[223]);
RE(y[254]) = RE(x[239]); IM(y[254]) = IM(x[239]);
RE(y[255]) = RE(x[255]); IM(y[255]) = IM(x[255]);
}

Wyświetl plik

@ -96,7 +96,6 @@ In addition, GNUPLOT will build the following graphs from data stored in files:
\author Sergey Bakhurin www.dsplib.org
***************************************************************************** */
#endif
#ifdef DOXYGEN_RUSSIAN
/*! ****************************************************************************
@ -211,11 +210,6 @@ exit_label:
#ifdef DOXYGEN_ENGLISH
/*! ****************************************************************************
\ingroup IIR_FILTER_DESIGN_GROUP

Wyświetl plik

@ -118,7 +118,10 @@ Memory must be allocated by \ref fft_create function. \n\n
Pointer to the vector of intermediate results. \n
The size of the vector is `[n x 1]`. \n
The memory must be allocated with the \ref fft_create function. \n\n
The structure is populated with the \ref fft_create function once
The structure is calculated with the \ref fft_create function once
before using the FFT algorithm. \n
A pointer to an object of this structure may be
reused when calling FFT functions. \n
@ -186,6 +189,32 @@ then the structure arrays will be automatically recreated for the length `n`.
Указатель на вектор промежуточных вычислений алгоритма БПФ. \n
Размер вектора `[n x 1]`. \n
Память должна быть выделена функцией \ref fft_create. \n \n
\param w32
Статический вектор поворотных коэффициентов 32-точечного БПФ. \n \n
\param w64
Статический вектор поворотных коэффициентов 64-точечного БПФ. \n \n
\param w128
Статический вектор поворотных коэффициентов 128-точечного БПФ. \n \n
\param w256
Статический вектор поворотных коэффициентов 256-точечного БПФ. \n \n
\param w512
Статический вектор поворотных коэффициентов 512-точечного БПФ. \n \n
\param w1024
Статический вектор поворотных коэффициентов 1024-точечного БПФ. \n \n
\param w2048
Статический вектор поворотных коэффициентов 2048-точечного БПФ. \n \n
\param w4096
Статический вектор поворотных коэффициентов 4096-точечного БПФ. \n \n
Структура заполняется функцией \ref fft_create один раз
до использования алгоритма БПФ. \n
Указатель на объект данной структуры может быть
@ -225,10 +254,19 @@ www.dsplib.org
#endif
typedef struct
{
complex_t* w;
complex_t* t0;
complex_t* t1;
int n;
complex_t* w;
complex_t* t0;
complex_t* t1;
complex_t w32[ 32];
complex_t w64[ 64];
complex_t w128[128];
complex_t w256[256];
complex_t w512[512];
complex_t* w1024;
complex_t* w2048;
complex_t* w4096;
int n;
} fft_t;

Wyświetl plik

@ -23,49 +23,96 @@ for j = 1:21
s = s / 2;
endfor
dspl_size = [2
4
8
16
32
64
128
256
512
1024
2048
4096
8192
16384
32768
65536
131072
262144
524288
1048576];
dspl = [1204.630392
1283.970612
1586.347958
1707.107097
1866.109831
1837.307509
2366.785829
2302.925874
2388.456514
2113.451546
3090.904615
2979.596190
2685.155556
2053.760000
3723.946667
3195.618462
2328.221538
1786.533333
7288.960000
4646.700000
2633.120000];
dspl_mflops = [597.7
1946.0
4455.5
5446.3
4490.7
4288.5
3524.1
5286.9
3995.3
3657.6
2953.2
2078.0
2565.7
2615.5
2361.8
2376.4
2169.8
2285.5
2172.4
1896.4];
python = [2390.741
2597.527
2841.191
3066.652
3092.187
3444.710
3633.320
4333.845
5316.897
5201.486
4608.231
4481.357
3876.925
2961.753
2435.427
1344.871
606.953
298.559
120.772
50.369
17.033];
python_size = [4194304
2097152
1048576
524288
262144
131072
65536
32768
16384
8192
4096
2048
1024
512
256
128
64
32
16
8
4
2];
python_mflops = [2119.626
2147.070
2362.656
2351.777
2408.621
2678.743
3194.574
3978.322
5220.731
4671.613
4240.982
3585.080
3876.999
2556.301
2333.780
1301.660
606.947
294.469
127.103
37.574
15.669
4.110];
plot(log2(size), mflops,log2(size), dspl, log2(size), python)
plot(log2(size), mflops,log2(dspl_size), dspl_mflops, log2(python_size), python_mflops)

Wyświetl plik

@ -5,7 +5,7 @@
#include "dspl.h"
#define NMAX 4194304
#define L 18
#define L 20
#define SIZE_FACTOR 2.3
@ -87,10 +87,10 @@ int main(int argc, char* argv[])
hdspl = dspl_load(); /* Load DSPL function */
int len_r2[L] = {2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096,
8192, 16384, 32768, 65536, 131072, 262144};
8192, 16384, 32768, 65536, 131072, 262144, 524288, 1048576};
int len_nr[L] = {6, 9, 12, 15, 18, 24, 36, 80, 108, 210, 504, 1000,
1960, 4725, 10368, 27000, 75600, 165375};
int len_nr[L] = {6, 9, 12, 15, 18, 24, 36, 80, 100, 108, 210, 504, 1000,
1960, 4725, 8000, 10368, 27000, 75600, 165375};
int err;
double mflops[L] = {0};

Wyświetl plik

@ -0,0 +1,44 @@
clear all; close all; clc;
addpath('octave');
fn_in = {'dat/x_fft_4.dat';
'dat/x_fft_8.dat';
'dat/x_fft_16.dat';
'dat/x_fft_32.dat';
'dat/x_fft_64.dat';
'dat/x_fft_128.dat';
'dat/x_fft_256.dat';
'dat/x_fft_512.dat';
'dat/x_fft_1024.dat';
'dat/x_fft_2048.dat';
'dat/x_fft_4096.dat';
'dat/x_fft_8192.dat';
'dat/x_fft_16384.dat';
'dat/x_fft_32768.dat';
'dat/x_fft_65536.dat'};
fn_out = {'dat/y_fft_4.dat';
'dat/y_fft_8.dat';
'dat/y_fft_16.dat';
'dat/y_fft_32.dat';
'dat/y_fft_64.dat';
'dat/y_fft_128.dat';
'dat/y_fft_256.dat';
'dat/y_fft_512.dat';
'dat/y_fft_1024.dat';
'dat/y_fft_2048.dat';
'dat/y_fft_4096.dat';
'dat/y_fft_8192.dat';
'dat/y_fft_16384.dat';
'dat/y_fft_32768.dat';
'dat/y_fft_65536.dat'};
for i = 1:length(fn_in)
x = readbin(fn_in{i});
y = fft(x);
writebin(y, 1, fn_out{i});
end

Wyświetl plik

@ -0,0 +1,156 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include "dspl.h"
#define FFT_SIZE 65536
int main(int argc, char* argv[])
{
void* hdspl; /* DSPL handle */
fft_t pfft = {0};
int verr, nx, type;
double derr;
complex_t *yout = NULL;
complex_t *xc = NULL;
hdspl = dspl_load(); /* Load DSPL function */
verif_data_gen(4, DAT_COMPLEX, "dat/x_fft_4.dat");
verif_data_gen(8, DAT_COMPLEX, "dat/x_fft_8.dat");
verif_data_gen(16, DAT_COMPLEX, "dat/x_fft_16.dat");
verif_data_gen(32, DAT_COMPLEX, "dat/x_fft_32.dat");
verif_data_gen(64, DAT_COMPLEX, "dat/x_fft_64.dat");
verif_data_gen(128, DAT_COMPLEX, "dat/x_fft_128.dat");
verif_data_gen(256, DAT_COMPLEX, "dat/x_fft_256.dat");
verif_data_gen(512, DAT_COMPLEX, "dat/x_fft_512.dat");
verif_data_gen(1024, DAT_COMPLEX, "dat/x_fft_1024.dat");
verif_data_gen(2048, DAT_COMPLEX, "dat/x_fft_2048.dat");
verif_data_gen(4096, DAT_COMPLEX, "dat/x_fft_4096.dat");
verif_data_gen(8192, DAT_COMPLEX, "dat/x_fft_8192.dat");
verif_data_gen(16384, DAT_COMPLEX, "dat/x_fft_16384.dat");
verif_data_gen(32768, DAT_COMPLEX, "dat/x_fft_32768.dat");
verif_data_gen(65536, DAT_COMPLEX, "dat/x_fft_65536.dat");
yout = (complex_t*)malloc(FFT_SIZE * sizeof(complex_t ));
system("octave octave/fft_radix2_verification.m");
/*------------------------------------------------------------------------*/
readbin("dat/x_fft_4.dat", (void**)(&xc), &nx, &type);
fft_cmplx(xc, 4, &pfft, yout);
verif_str_cmplx(yout, 4, "fft 4 for complex dat",
"dat/y_fft_4.dat",
"verification.log");
/*------------------------------------------------------------------------*/
readbin("dat/x_fft_8.dat", (void**)(&xc), &nx, &type);
fft_cmplx(xc, 8, &pfft, yout);
verif_str_cmplx(yout, 8, "fft 8 for complex dat",
"dat/y_fft_8.dat",
"verification.log");
/*------------------------------------------------------------------------*/
readbin("dat/x_fft_16.dat", (void**)(&xc), &nx, &type);
fft_cmplx(xc, 16, &pfft, yout);
verif_str_cmplx(yout, 16, "fft 16 for complex dat",
"dat/y_fft_16.dat",
"verification.log");
/*------------------------------------------------------------------------*/
readbin("dat/x_fft_32.dat", (void**)(&xc), &nx, &type);
fft_cmplx(xc, 32, &pfft, yout);
verif_str_cmplx(yout, 32, "fft 32 for complex dat",
"dat/y_fft_32.dat",
"verification.log");
/*------------------------------------------------------------------------*/
readbin("dat/x_fft_64.dat", (void**)(&xc), &nx, &type);
fft_cmplx(xc, 64, &pfft, yout);
verif_str_cmplx(yout, 64, "fft 64 for complex dat",
"dat/y_fft_64.dat",
"verification.log");
/*------------------------------------------------------------------------*/
readbin("dat/x_fft_128.dat", (void**)(&xc), &nx, &type);
fft_cmplx(xc, 128, &pfft, yout);
verif_str_cmplx(yout, 128, "fft 128 for complex dat",
"dat/y_fft_128.dat",
"verification.log");
/*------------------------------------------------------------------------*/
readbin("dat/x_fft_256.dat", (void**)(&xc), &nx, &type);
fft_cmplx(xc, 256, &pfft, yout);
verif_str_cmplx(yout, 256, "fft 256 for complex dat",
"dat/y_fft_256.dat",
"verification.log");
/*------------------------------------------------------------------------*/
readbin("dat/x_fft_512.dat", (void**)(&xc), &nx, &type);
fft_cmplx(xc, 512, &pfft, yout);
verif_str_cmplx(yout, 512, "fft 512 for complex dat",
"dat/y_fft_512.dat",
"verification.log");
/*------------------------------------------------------------------------*/
readbin("dat/x_fft_1024.dat", (void**)(&xc), &nx, &type);
fft_cmplx(xc, 1024, &pfft, yout);
verif_str_cmplx(yout, 1024, "fft 1024 for complex dat",
"dat/y_fft_1024.dat",
"verification.log");
/*------------------------------------------------------------------------*/
readbin("dat/x_fft_2048.dat", (void**)(&xc), &nx, &type);
fft_cmplx(xc, 2048, &pfft, yout);
verif_str_cmplx(yout, 2048, "fft 2048 for complex dat",
"dat/y_fft_2048.dat",
"verification.log");
/*------------------------------------------------------------------------*/
readbin("dat/x_fft_4096.dat", (void**)(&xc), &nx, &type);
fft_cmplx(xc, 4096, &pfft, yout);
verif_str_cmplx(yout, 4096, "fft 4096 for complex dat",
"dat/y_fft_4096.dat",
"verification.log");
/*------------------------------------------------------------------------*/
readbin("dat/x_fft_8192.dat", (void**)(&xc), &nx, &type);
fft_cmplx(xc, 8192, &pfft, yout);
verif_str_cmplx(yout, 8192, "fft 8192 for complex dat",
"dat/y_fft_8192.dat",
"verification.log");
/*------------------------------------------------------------------------*/
readbin("dat/x_fft_16384.dat", (void**)(&xc), &nx, &type);
fft_cmplx(xc, 16384, &pfft, yout);
verif_str_cmplx(yout, 16384, "fft 16384 for complex dat",
"dat/y_fft_16384.dat",
"verification.log");
/*------------------------------------------------------------------------*/
readbin("dat/x_fft_32768.dat", (void**)(&xc), &nx, &type);
fft_cmplx(xc, 32768, &pfft, yout);
verif_str_cmplx(yout, 32768, "fft 32768 for complex dat",
"dat/y_fft_32768.dat",
"verification.log");
/*------------------------------------------------------------------------*/
readbin("dat/x_fft_65536.dat", (void**)(&xc), &nx, &type);
fft_cmplx(xc, 65536, &pfft, yout);
verif_str_cmplx(yout, 65536, "fft 65536 for complex dat",
"dat/y_fft_65536.dat",
"verification.log");
/* free dspl handle */
dspl_free(hdspl);
if(yout)
free(yout);
if(xc)
free(xc);
return 0;
}