* Speedup of sfft::run
pull/2/head
Remi Chateauneu 2012-05-10 23:48:05 +01:00 zatwierdzone przez David Freese
rodzic 25fcf8d1b5
commit 967cc68a75
6 zmienionych plików z 59 dodań i 59 usunięć

Wyświetl plik

@ -157,8 +157,9 @@ int BLANK::rx_process(const double *buf, int len)
// with required bandwidth
bandpass->run ( z, z );
// binsfft->run(z) returns pointer to first frequency of interest
bins = sliding->run (z);
// binsfft->run(z) copies frequencies of interest
complex dummy ;
sliding->run (z, &dummy, 0 );
// etc
decodesymbol();
update_syncscope();

Wyświetl plik

@ -540,7 +540,7 @@ void dominoex::eval_s2n()
int dominoex::rx_process(const double *buf, int len)
{
complex zref, z, *zp, *bins = 0;
complex zref, z, *zp;
complex zarray[1];
int n;
@ -573,11 +573,8 @@ int dominoex::rx_process(const double *buf, int len)
for (int j = 0; j < paths; j++) {
// shift in frequency to base band for the sliding DFTs
z = mixer(j + 1, zp[i]);
bins = binsfft[j]->run(z);
// copy current vector to the pipe interleaving the FFT vectors
for (int k = 0; k < numbins; k++) {
pipe[pipeptr].vector[j + paths * k] = bins[k];
}
binsfft[j]->run(z, pipe[pipeptr].vector + j, paths );
}
if (--synccounter <= 0) {
synccounter = symlen;

Wyświetl plik

@ -165,15 +165,16 @@ void C_FIR_filter::init_hilbert (int len, int dec) {
// returns 1 when stable and decimated complex output value is valid
//=====================================================================
int C_FIR_filter::run (complex &in, complex &out) {
ibuffer[pointer] = in.real();
qbuffer[pointer] = in.imag();
int C_FIR_filter::run (const complex &in, complex &out) {
ibuffer[pointer] = in.re;
qbuffer[pointer] = in.im;
counter++;
if (counter == decimateratio)
out = complex ( mac(&ibuffer[pointer - length], ifilter, length),
mac(&qbuffer[pointer - length], qfilter, length) );
pointer++;
if (pointer == FIRBufferLen) {
/// memmove is necessary if length >= FIRBufferLen/2 , theoretically possible.
memmove (ibuffer, ibuffer + FIRBufferLen - length, length * sizeof (double) );
memmove (qbuffer, qbuffer + FIRBufferLen - length, length * sizeof (double) );
pointer = length;
@ -189,7 +190,7 @@ int C_FIR_filter::run (complex &in, complex &out) {
// Run the filter for the Real part of the complex variable
//=====================================================================
int C_FIR_filter::Irun (double &in, double &out) {
int C_FIR_filter::Irun (const double &in, double &out) {
double *iptr = ibuffer + pointer;
pointer++;
@ -219,7 +220,7 @@ int C_FIR_filter::Irun (double &in, double &out) {
// Run the filter for the Imaginary part of the complex variable
//=====================================================================
int C_FIR_filter::Qrun (double &in, double &out) {
int C_FIR_filter::Qrun (const double &in, double &out) {
double *qptr = ibuffer + pointer;
pointer++;
@ -397,11 +398,15 @@ void Cmovavg::reset()
//
//=====================================================================
struct sfft::vrot_bins_pair {
complex vrot;
complex bins;
} ;
sfft::sfft(int len, int _first, int _last)
{
vrot = new complex[len];
vrot_bins = new vrot_bins_pair[len];
delay = new complex[len];
bins = new complex[len];
fftlen = len;
first = _first;
last = _last;
@ -409,41 +414,40 @@ sfft::sfft(int len, int _first, int _last)
double phi = 0.0, tau = 2.0 * M_PI/ len;
k2 = 1.0;
for (int i = 0; i < len; i++) {
vrot[i].re = K1 * cos (phi);
vrot[i].im = K1 * sin (phi);
vrot_bins[i].vrot = complex( cos (phi), sin (phi) ) * K1 ;
phi += tau;
delay[i] = bins[i] = 0.0;
delay[i] = vrot_bins[i].bins = 0.0;
k2 *= K1;
}
}
sfft::~sfft()
{
delete [] vrot;
delete [] vrot_bins;
delete [] delay;
delete [] bins;
}
// Sliding FFT, complex input, complex output
// FFT is computed for each value from first to last
// Values are not stable until more than "len" samples have been processed.
// returns address of first component in array
complex *sfft::run(const complex& input)
// Copies the frequencies to a pointer with a given stride.
void sfft::run(const complex& input, complex * __restrict__ result, int stride )
{
complex & de = delay[ptr];
complex z(
input.re - k2 * de.re,
input.im - k2 * de.im);
const complex z( input.re - k2 * de.re, input.im - k2 * de.im);
de = input;
++ptr ;
if( ptr >= fftlen ) ptr = 0 ;
for (int i = first; i < last; i++) {
bins[i] = ( bins[i] + z ) * vrot[i];
// It is more efficient to have vrot and bins very close to each other.
for( vrot_bins_pair
* __restrict__ itr = vrot_bins + first,
* __restrict__ end = vrot_bins + last ;
itr != end ;
++itr, result += stride ) {
*result = itr->bins = itr->bins * itr->vrot + z * itr->vrot;
}
return &bins[first];
}
// ============================================================================

Wyświetl plik

@ -98,9 +98,9 @@ public:
void init_hilbert (int len, int dec);
double *bp_FIR(int len, int hilbert, double f1, double f2);
void dump();
int run (complex &in, complex &out);
int Irun (double &in, double &out);
int Qrun (double &in, double &out);
int run (const complex &in, complex &out);
int Irun (const double &in, double &out);
int Qrun (const double &in, double &out);
};
//=====================================================================
@ -134,14 +134,14 @@ private:
int first;
int last;
int ptr;
complex *vrot;
complex *bins;
complex *delay;
struct vrot_bins_pair ;
vrot_bins_pair * __restrict__ vrot_bins ;
complex * __restrict__ delay;
double k2;
public:
sfft(int len, int first, int last);
~sfft();
complex *run(const complex& input);
void run(const complex& input, complex * __restrict__ result, int stride );
};

Wyświetl plik

@ -685,8 +685,8 @@ void mfsk::eval_s2n()
int mfsk::rx_process(const double *buf, int len)
{
complex z, *bins = 0;
int i;
complex z;
complex* bins;
while (len-- > 0) {
// create analytic signal...
@ -724,13 +724,11 @@ int mfsk::rx_process(const double *buf, int len)
continue;
}
// binsfft->run(z) returns pointer to first frequency of interest
bins = binsfft->run (z);
// copy current vector to the pipe
// binsfft->bin(i) copies frequencies of interest.
binsfft->run (z, pipe[pipeptr].vector, 1);
bins = pipe[pipeptr].vector;
// copy current vector to the pipe
for (i = 0; i < numtones; i++)
pipe[pipeptr].vector[i] = bins[i];
if (--synccounter <= 0) {
synccounter = symlen;

Wyświetl plik

@ -258,7 +258,6 @@ thor::thor(trx_mode md)
complex thor::mixer(int n, const complex& in)
{
complex z;
double f;
// first IF mixer (n == 0) plus
// THORMAXFFTS mixers are supported each separated by 1/THORMAXFFTS bin size
@ -266,15 +265,17 @@ complex thor::mixer(int n, const complex& in)
if (n == 0)
f = frequency - THORFIRSTIF;
else
f = THORFIRSTIF - THORBASEFREQ - bandwidth/2 + (samplerate / symlen) * (1.0 * n / paths);
z.re = cos(phase[n]);
z.im = sin(phase[n]);
f = THORFIRSTIF - THORBASEFREQ - bandwidth*0.5 + (samplerate / symlen) * ( (double)n / paths);
double phase_n = phase[n];
complex z( cos(phase_n), sin(phase_n) );
z *= in;
phase[n] -= TWOPI * f / samplerate;
if (phase[n] > M_PI)
phase[n] -= TWOPI;
else if (phase[n] < M_PI)
phase[n] += TWOPI;
phase_n -= TWOPI * f / samplerate;
if (phase_n > M_PI)
phase_n -= TWOPI;
else if (phase_n < M_PI)
phase_n += TWOPI;
phase[n] = phase_n;
return z;
}
@ -519,7 +520,7 @@ void thor::eval_s2n()
int thor::rx_process(const double *buf, int len)
{
complex zref, z, *zp, *bins = 0;
complex zref, *zp;
complex zarray[1];
int n;
@ -547,21 +548,20 @@ int thor::rx_process(const double *buf, int len)
if (n) {
for (int i = 0; i < n; i++) {
complex * pipe_pipeptr_vector = pipe[pipeptr].vector ;
const complex zp_i = zp[i];
// process THORMAXFFTS sets of sliding FFTs spaced at 1/THORMAXFFTS bin intervals each of which
// is a matched filter for the current symbol length
for (int k = 0; k < paths; k++) {
// shift in frequency to base band for the sliding DFTs
z = mixer(k + 1, zp[i]);
bins = binsfft[k]->run(z);
const complex z = mixer(k + 1, zp_i );
// copy current vector to the pipe interleaving the FFT vectors
for (int j = 0; j < numbins; j++) {
pipe[pipeptr].vector[k + paths * j] = bins[j];
}
binsfft[k]->run(z, pipe_pipeptr_vector + k, paths );
}
if (--synccounter <= 0) {
synccounter = symlen;
currsymbol = harddecode();
currmag = pipe[pipeptr].vector[currsymbol].mag();
currmag = pipe_pipeptr_vector[currsymbol].mag();
eval_s2n();
decodesymbol();
synchronize();