fir_sampler dot product: Even simpler form that triggers NEON vectorization with gcc-4.9.2

pull/4/head
pabr 2017-03-25 10:34:11 +01:00
rodzic b4668c71bc
commit f054ce9dad
1 zmienionych plików z 8 dodań i 7 usunięć

Wyświetl plik

@ -485,14 +485,15 @@ namespace leansdr {
if ( subsampling == 1 ) {
// Special case for heavily oversampled signals,
// where filtering is expensive.
while ( pc+16 < pcend ) {
// gcc-4.9.2 can vectorize this form with NEON on ARM
for ( int i=0; i<16; ++i)
acc += (*pc++)*(*pin++);
}
// gcc-4.9.2 can vectorize this form with NEON on ARM.
while ( pc < pcend )
acc += (*pc++)*(*pin++);
} else {
// Not vectorized because the coefficients are not
// guaranteed to be contiguous in memory.
for ( ; pc<pcend; pc+=subsampling,++pin )
acc += (*pc)*(*pin);
}
for ( ; pc<pcend; pc+=subsampling,++pin )
acc += (*pc)*(*pin);
// Derotate
return trig.expi(-phase) * acc;
}