From 9cec3782e202cc91764de624b9ed86f83ee37417 Mon Sep 17 00:00:00 2001 From: ha7ilm Date: Wed, 30 Sep 2015 13:52:43 +0000 Subject: [PATCH] Added NEON optimization for DDC. Buffer size can now automatically adjust to sampling rate changes between csdr processes. --- Makefile | 10 +- csdr.c | 536 +++++++++++++++++++++++++++++++++-------- libcsdr.c | 102 +++++++- libcsdr_gpl.c | 5 +- sdr.js/sdrjs-test.html | 38 +++ 5 files changed, 582 insertions(+), 109 deletions(-) create mode 100644 sdr.js/sdrjs-test.html diff --git a/Makefile b/Makefile index 1752fc9..4bcd3f8 100644 --- a/Makefile +++ b/Makefile @@ -32,7 +32,7 @@ LIBSOURCES = fft_fftw.c libcsdr_wrapper.c #SOURCES = csdr.c $(LIBSOURCES) cpufeature = $(if $(findstring $(1),$(shell cat /proc/cpuinfo)),$(2)) PARAMS_SSE = $(call cpufeature,sse,-msse) $(call cpufeature,sse2,-msse2) $(call cpufeature,sse3,-msse3) $(call cpufeature,sse4,-msse4) $(call cpufeature,sse4_1,-msse4.1) $(call cpufeature,sse4_2,-msse4.2) -mfpmath=sse -PARAMS_NEON = -mfloat-abi=hard -march=armv7-a -mtune=cortex-a8 -mfpu=neon -mvectorize-with-neon-quad -funsafe-math-optimizations -Wformat=0 +PARAMS_NEON = -mfloat-abi=hard -march=armv7-a -mtune=cortex-a8 -mfpu=neon -mvectorize-with-neon-quad -funsafe-math-optimizations -Wformat=0 -DNEON_OPTS #tnx Jan Szumiec for the Raspberry Pi support PARAMS_RASPI = -mfloat-abi=hard -mcpu=arm1176jzf-s -mfpu=vfp -funsafe-math-optimizations -Wformat=0 PARAMS_ARM = $(if $(call cpufeature,BCM2708,dummy-text),$(PARAMS_RASPI),$(PARAMS_NEON)) @@ -47,12 +47,12 @@ all: clean-vect @echo NOTE: you may have to manually edit Makefile to optimize for your CPU \(especially if you compile on ARM, please edit PARAMS_NEON\). @echo Auto-detected optimization parameters: $(PARAMS_SIMD) @echo - c99 $(PARAMS_LOOPVECT) $(PARAMS_SIMD) $(LIBSOURCES) $(PARAMS_LIBS) $(PARAMS_MISC) -fpic -shared -o libcsdr.so + gcc -std=gnu99 $(PARAMS_LOOPVECT) $(PARAMS_SIMD) $(LIBSOURCES) $(PARAMS_LIBS) $(PARAMS_MISC) -fpic -shared -o libcsdr.so -./parsevect dumpvect*.vect - c99 $(PARAMS_LOOPVECT) $(PARAMS_SIMD) csdr.c $(PARAMS_LIBS) -L. -lcsdr $(PARAMS_MISC) -o csdr + gcc -std=gnu99 $(PARAMS_LOOPVECT) $(PARAMS_SIMD) csdr.c $(PARAMS_LIBS) -L. -lcsdr $(PARAMS_MISC) -o csdr arm-cross: clean-vect #note: this doesn't work since having added FFTW - arm-linux-gnueabihf-gcc -std=c99 -O3 -fshort-double -ffast-math -dumpbase dumpvect-arm -fdump-tree-vect-details -mfloat-abi=softfp -march=armv7-a -mtune=cortex-a9 -mfpu=neon -mvectorize-with-neon-quad -Wno-unused-result -Wformat=0 $(SOURCES) -lm -o ./csdr + arm-linux-gnueabihf-gcc -std=gnu99 -O3 -fshort-double -ffast-math -dumpbase dumpvect-arm -fdump-tree-vect-details -mfloat-abi=softfp -march=armv7-a -mtune=cortex-a9 -mfpu=neon -mvectorize-with-neon-quad -Wno-unused-result -Wformat=0 $(SOURCES) -lm -o ./csdr clean-vect: rm -f dumpvect*.vect clean: clean-vect @@ -65,6 +65,8 @@ install: uninstall: rm /usr/lib/libcsdr.so /usr/bin/csdr /usr/bin/csdr-fm ldconfig +disasm: + objdump -S libcsdr.so > libcsdr.disasm emcc-clean: -rm sdr.js/sdr.js -rm sdr.js/sdrjs-compiled.js diff --git a/csdr.c b/csdr.c index 0b64829..5f68865 100644 --- a/csdr.c +++ b/csdr.c @@ -30,6 +30,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define _POSIX_C_SOURCE 199309L #define _BSD_SOURCE +#define _GNU_SOURCE #include #include #include @@ -66,7 +67,7 @@ char usage[]= " yes_f [buf_times]\n" " detect_nan_ff\n" " floatdump_f\n" -" flowcontrol \n" +" flowcontrol [prebuffer_sec] [thrust]\n" " shift_math_cc \n" " shift_addition_cc \n" " shift_addition_cc_test\n" @@ -99,19 +100,25 @@ char usage[]= " \n" ; -#define BUFSIZE (1024) -#define BIG_BUFSIZE (1024*16) +//change on 2015-08-29: we rather dynamically determine the bufsize +//#define BUFSIZE (1024) +//#define BIG_BUFSIZE (1024*16) //should be multiple of 16! (size of double complex) //also, keep in mind that shift_addition_cc works better the smaller this buffer is. +int env_csdr_fixed_bufsize = 1024; +int env_csdr_dynamic_bufsize_on = 0; + +//change on on 2015-08-29: we don't yield at all. fread() will do it if it blocks #define YIELD_EVERY_N_TIMES 3 -#define TRY_YIELD if(++yield_counter%YIELD_EVERY_N_TIMES==0) sched_yield() -unsigned yield_counter=0; +//#define TRY_YIELD if(++yield_counter%YIELD_EVERY_N_TIMES==0) sched_yield() +#define TRY_YIELD +//unsigned yield_counter=0; int badsyntax(char* why) { if(why==0) fprintf(stderr, "%s", usage); - else fprintf(stderr, "%s\n\n", why); + else fprintf(stderr, "csdr: %s\n\n", why); return -1; } @@ -119,30 +126,31 @@ int clipdetect_ff(float* input, int input_size) { for(int i=0;i1.0) { fprintf(stderr, "clipdetect_ff: Signal value above 1.0!\n"); return 1; } + if(input[i]<-1.0) { fprintf(stderr, "csdr clipdetect_ff: Signal value below -1.0!\n"); return -1; } + if(input[i]>1.0) { fprintf(stderr, "csdr clipdetect_ff: Signal value above 1.0!\n"); return 1; } } return 0; } -int clone_() +int clone_(int bufsize_param) { - static unsigned char clone_buffer[BUFSIZE]; + unsigned char* clone_buffer; + clone_buffer = (unsigned char*)malloc(bufsize_param*sizeof(unsigned char)); for(;;) { - fread(clone_buffer, sizeof(unsigned char), BUFSIZE, stdin); - fwrite(clone_buffer, sizeof(unsigned char), BUFSIZE, stdout); + fread(clone_buffer, sizeof(unsigned char), bufsize_param, stdin); + fwrite(clone_buffer, sizeof(unsigned char), bufsize_param, stdout); TRY_YIELD; } } -#define FREAD_R fread(input_buffer, sizeof(float), BUFSIZE, stdin) -#define FREAD_C fread(input_buffer, sizeof(float)*2, BUFSIZE, stdin) -#define FWRITE_R fwrite(output_buffer, sizeof(float), BUFSIZE, stdout) -#define FWRITE_C fwrite(output_buffer, sizeof(float)*2, BUFSIZE, stdout) -#define FEOF_CHECK if(feof(stdin)) return 0 -#define BIG_FREAD_C fread(input_buffer, sizeof(float)*2, BIG_BUFSIZE, stdin) -#define BIG_FWRITE_C fwrite(output_buffer, sizeof(float)*2, BIG_BUFSIZE, stdout) +#define FREAD_R fread (input_buffer, sizeof(float), the_bufsize, stdin) +#define FREAD_C fread (input_buffer, sizeof(float)*2, the_bufsize, stdin) +#define FWRITE_R fwrite (output_buffer, sizeof(float), the_bufsize, stdout) +#define FWRITE_C fwrite (output_buffer, sizeof(float)*2, the_bufsize, stdout) +#define FEOF_CHECK if(feof(stdin)) return 0 +//#define BIG_FREAD_C fread(input_buffer, sizeof(float)*2, BIG_BUFSIZE, stdin) +//#define BIG_FWRITE_C fwrite(output_buffer, sizeof(float)*2, BIG_BUFSIZE, stdout) int init_fifo(int argc, char *argv[]) { @@ -200,78 +208,181 @@ int read_fifo_ctl(int fd, char* format, ...) } } +#define SETBUF_PREAMBLE "csdr" +#define SETBUF_DEFAULT_BUFSIZE 1024 +#define STRINGIFY_VALUE(x) STRINGIFY_NAME(x) +#define STRINGIFY_NAME(x) #x + +int getbufsize() +{ + if(!env_csdr_dynamic_bufsize_on) return env_csdr_fixed_bufsize; + int recv_first[2]; + fread(recv_first, sizeof(int), 2, stdin); + if(memcmp(recv_first, SETBUF_PREAMBLE, sizeof(char)*4)!=0) + { badsyntax("warning! Did not match preamble on the beginning of the stream. You should put \"csdr setbuf \" at the beginning of the chain! Falling back to default buffer size: " STRINGIFY_VALUE(SETBUF_DEFAULT_BUFSIZE)); return SETBUF_DEFAULT_BUFSIZE; } + if(recv_first[1]<=0) { badsyntax("warning! Invalid buffer size." ); return 0; } + return recv_first[1]; +} + + +float* input_buffer; +unsigned char* buffer_u8; +float *output_buffer; +short *buffer_i16; +float *temp_f; +int the_bufsize = 0; +char **argv_global; + + +#define UNITROUND_UNIT 128 + +int unitround(int what) +{ + if(what<=0) return UNITROUND_UNIT; + return ((what-1)&~(UNITROUND_UNIT-1))+UNITROUND_UNIT; +} + +int initialize_buffers() +{ + if(!(the_bufsize=getbufsize())) return 0; + the_bufsize=unitround(the_bufsize); + fprintf(stderr,"%s %s: buffer size set to %d\n",argv_global[0], argv_global[1], the_bufsize); + input_buffer = (float*) malloc(the_bufsize*sizeof(float) * 2); //need the 2× because we might also put complex floats into it + output_buffer = (float*) malloc(the_bufsize*sizeof(float) * 2); + buffer_u8 = (unsigned char*)malloc(the_bufsize*sizeof(unsigned char)); + buffer_i16 = (short*) malloc(the_bufsize*sizeof(short)); + temp_f = (float*) malloc(the_bufsize*sizeof(float) * 4); + return the_bufsize; +} + +int sendbufsize(int size) +{ + //The first word is a preamble, "csdr". + //If the next csdr process detects it, sets the buffer size according to the second word + if(!env_csdr_dynamic_bufsize_on) return env_csdr_fixed_bufsize; + fprintf(stderr,"%s %s: next process proposed input buffer size is %d\n",argv_global[0], argv_global[1], size); + int send_first[2]; + memcpy((char*)send_first, SETBUF_PREAMBLE, 4*sizeof(char)); + send_first[1] = size; + fwrite(send_first, sizeof(int), 2, stdout); + return size; +} + +int parse_env() +{ + char* envtmp; + envtmp=getenv("CSDR_DYNAMIC_BUFSIZE_ON"); + //fprintf(stderr, "envtmp: %s\n",envtmp); + if(envtmp) + { + env_csdr_dynamic_bufsize_on = !!atoi(envtmp); + env_csdr_fixed_bufsize = 0; + } + else + { + envtmp=getenv("CSDR_FIXED_BUFSIZE"); + if(envtmp) + { + env_csdr_fixed_bufsize = atoi(envtmp); + } + } +} + int main(int argc, char *argv[]) { - static float input_buffer[BIG_BUFSIZE*2]; - static unsigned char buffer_u8[BIG_BUFSIZE*2]; - static float output_buffer[BIG_BUFSIZE*2]; - static short buffer_i16[BIG_BUFSIZE*2]; - static float temp_f[BIG_BUFSIZE*4]; + parse_env(); + argv_global=argv; if(argc<=1) return badsyntax(0); if(!strcmp(argv[1],"--help")) return badsyntax(0); + + fcntl(STDIN_FILENO, F_SETPIPE_SZ, 65536*32); + fcntl(STDOUT_FILENO, F_SETPIPE_SZ, 65536*32); + fprintf(stderr, "csdr: F_SETPIPE_SZ\n"); + + if(!strcmp(argv[1],"setbuf")) + { + if(argc<=2) return badsyntax("need required parameter (buffer size)"); + sscanf(argv[2],"%d",&the_bufsize); + if(the_bufsize<=0) return badsyntax("buffer size <= 0 is invalid"); + sendbufsize(the_bufsize); + clone_(the_bufsize); //After sending the buffer size out, just copy stdin to stdout + } + + if(!strcmp(argv[1],"clone")) + { + if(!sendbufsize(initialize_buffers())) return -2; + clone_(the_bufsize); + } + if(!strcmp(argv[1],"convert_u8_f")) { + if(!sendbufsize(initialize_buffers())) return -2; for(;;) { FEOF_CHECK; - fread(buffer_u8, sizeof(unsigned char), BUFSIZE, stdin); - convert_u8_f(buffer_u8, output_buffer, BUFSIZE); + fread(buffer_u8, sizeof(unsigned char), the_bufsize, stdin); + convert_u8_f(buffer_u8, output_buffer, the_bufsize); FWRITE_R; TRY_YIELD; } } if(!strcmp(argv[1],"convert_f_u8")) //not tested { + if(!sendbufsize(initialize_buffers())) return -2; for(;;) { FEOF_CHECK; FREAD_R; - convert_f_u8(input_buffer, buffer_u8, BUFSIZE); - fwrite(buffer_u8, sizeof(unsigned char), BUFSIZE, stdout); + convert_f_u8(input_buffer, buffer_u8, the_bufsize); + fwrite(buffer_u8, sizeof(unsigned char), the_bufsize, stdout); TRY_YIELD; } } if(!strcmp(argv[1],"convert_f_i16")) { + if(!sendbufsize(initialize_buffers())) return -2; for(;;) { FEOF_CHECK; FREAD_R; - convert_f_i16(input_buffer, buffer_i16, BUFSIZE); - fwrite(buffer_i16, sizeof(short), BUFSIZE, stdout); + convert_f_i16(input_buffer, buffer_i16, the_bufsize); + fwrite(buffer_i16, sizeof(short), the_bufsize, stdout); TRY_YIELD; } } if(!strcmp(argv[1],"convert_i16_f")) //not tested { + if(!sendbufsize(initialize_buffers())) return -2; for(;;) { FEOF_CHECK; - fread(buffer_i16, sizeof(short), BUFSIZE, stdin); - convert_i16_f(buffer_i16, output_buffer, BUFSIZE); + fread(buffer_i16, sizeof(short), the_bufsize, stdin); + convert_i16_f(buffer_i16, output_buffer, the_bufsize); FWRITE_R; TRY_YIELD; } } if(!strcmp(argv[1],"realpart_cf")) { + if(!sendbufsize(initialize_buffers())) return -2; for(;;) { FEOF_CHECK; FREAD_C; - for(int i=0;i=3) sscanf(argv[2],"%g",&max_amplitude); + if(!sendbufsize(initialize_buffers())) return -2; for(;;) { FEOF_CHECK; FREAD_R; - limit_ff(input_buffer, output_buffer, BUFSIZE, max_amplitude); + limit_ff(input_buffer, output_buffer, the_bufsize, max_amplitude); FWRITE_R; TRY_YIELD; } @@ -313,10 +422,11 @@ int main(int argc, char *argv[]) sscanf(argv[2],"%g",&to_repeat); int buf_times = 0; if(argc>=4) sscanf(argv[3],"%d",&buf_times); - for(int i=0;i3) sscanf(argv[3],"%d",&table_size); + if(!sendbufsize(initialize_buffers())) return -2; shift_table_data_t table_data=shift_table_init(table_size); fprintf(stderr,"shift_table_cc: LUT initialized\n"); for(;;) { FEOF_CHECK; - if(!BIG_FREAD_C) break; - starting_phase=shift_table_cc((complexf*)input_buffer, (complexf*)output_buffer, BIG_BUFSIZE, rate, table_data, starting_phase); - BIG_FWRITE_C; + if(!FREAD_C) break; + starting_phase=shift_table_cc((complexf*)input_buffer, (complexf*)output_buffer, the_bufsize, rate, table_data, starting_phase); + FWRITE_C; TRY_YIELD; } return 0; @@ -372,6 +484,8 @@ int main(int argc, char *argv[]) int decimation=1; sscanf(argv[2],"%g",&rate); if(argc>3) sscanf(argv[3],"%d",&decimation); + if(!initialize_buffers()) return -2; + sendbufsize(the_bufsize/decimation); shift_addition_data_t d=decimating_shift_addition_init(rate, decimation); decimating_shift_addition_status_t s; s.decimation_remain=0; @@ -379,8 +493,8 @@ int main(int argc, char *argv[]) for(;;) { FEOF_CHECK; - if(!BIG_FREAD_C) break; - s=decimating_shift_addition_cc((complexf*)input_buffer, (complexf*)output_buffer, BIG_BUFSIZE, d, decimation, s); + if(!FREAD_C) break; + s=decimating_shift_addition_cc((complexf*)input_buffer, (complexf*)output_buffer, the_bufsize, d, decimation, s); fwrite(output_buffer, sizeof(float)*2, s.output_size, stdout); TRY_YIELD; } @@ -403,16 +517,30 @@ int main(int argc, char *argv[]) sscanf(argv[2],"%g",&rate); } + if(!sendbufsize(initialize_buffers())) return -2; for(;;) { shift_addition_data_t data=shift_addition_init(rate); fprintf(stderr,"shift_addition_cc: reinitialized to %g\n",rate); + int remain, current_size; + float* ibufptr; + float* obufptr; for(;;) { FEOF_CHECK; - if(!BIG_FREAD_C) break; - starting_phase=shift_addition_cc((complexf*)input_buffer, (complexf*)output_buffer, BIG_BUFSIZE, data, starting_phase); - BIG_FWRITE_C; + if(!FREAD_C) break; + remain=the_bufsize; + ibufptr=input_buffer; + obufptr=output_buffer; + while(remain) + { + current_size=(remain>1024)?1024:remain; + starting_phase=shift_addition_cc((complexf*)ibufptr, (complexf*)obufptr, current_size, data, starting_phase); + ibufptr+=current_size*2; + obufptr+=current_size*2; + remain-=current_size; + } + FWRITE_C; if(read_fifo_ctl(fd,"%g\n",&rate)) break; TRY_YIELD; } @@ -425,6 +553,7 @@ int main(int argc, char *argv[]) if(argc<=2) return badsyntax("need required parameter (rate)"); float rate; sscanf(argv[2],"%g",&rate); + //if(initialize_buffers()) return -2; //most likely we don't need this here shift_addition_data_t data=shift_addition_init(rate); shift_addition_cc_test(data); return 0; @@ -433,11 +562,12 @@ int main(int argc, char *argv[]) if(!strcmp(argv[1],"dcblock_ff")) { static dcblock_preserve_t dcp; //will be 0 as .bss is set to 0 + if(!sendbufsize(initialize_buffers())) return -2; for(;;) { FEOF_CHECK; FREAD_R; - dcp=dcblock_ff(input_buffer, output_buffer, BUFSIZE, 0, dcp); + dcp=dcblock_ff(input_buffer, output_buffer, the_bufsize, 0, dcp); FWRITE_R; TRY_YIELD; } @@ -445,10 +575,12 @@ int main(int argc, char *argv[]) if(!strcmp(argv[1],"fastdcblock_ff")) { - int dcblock_bufsize=BUFSIZE; + int dcblock_bufsize=SETBUF_DEFAULT_BUFSIZE; if(argc>=3) sscanf(argv[2],"%d",&dcblock_bufsize); float* dcblock_buffer=(float*)malloc(sizeof(float)*dcblock_bufsize); static float last_dc_level=0.0; + getbufsize(); //it is just dummy + sendbufsize(dcblock_bufsize); for(;;) { FEOF_CHECK; @@ -461,19 +593,21 @@ int main(int argc, char *argv[]) if(!strcmp(argv[1],"fmdemod_atan_cf")) { + if(!sendbufsize(initialize_buffers())) return -2; float last_phase=0; for(;;) { FEOF_CHECK; FREAD_C; if(feof(stdin)) return 0; - last_phase=fmdemod_atan_cf((complexf*)input_buffer, output_buffer, BUFSIZE, last_phase); + last_phase=fmdemod_atan_cf((complexf*)input_buffer, output_buffer, the_bufsize, last_phase); FWRITE_R; TRY_YIELD; } } if(!strcmp(argv[1],"fmdemod_quadri_cf")) { + if(!sendbufsize(initialize_buffers())) return -2; complexf last_sample; last_sample.i=0.; last_sample.q=0.; @@ -481,13 +615,14 @@ int main(int argc, char *argv[]) { FEOF_CHECK; FREAD_C; - last_sample=fmdemod_quadri_cf((complexf*)input_buffer, output_buffer, BUFSIZE, temp_f, last_sample); + last_sample=fmdemod_quadri_cf((complexf*)input_buffer, output_buffer, the_bufsize, temp_f, last_sample); FWRITE_R; TRY_YIELD; } } if(!strcmp(argv[1],"fmdemod_quadri_novect_cf")) { + if(!sendbufsize(initialize_buffers())) return -2; complexf last_sample; last_sample.i=0.; last_sample.q=0.; @@ -495,7 +630,7 @@ int main(int argc, char *argv[]) { FEOF_CHECK; FREAD_C; - last_sample=fmdemod_quadri_novect_cf((complexf*)input_buffer, output_buffer, BUFSIZE, last_sample); + last_sample=fmdemod_quadri_novect_cf((complexf*)input_buffer, output_buffer, the_bufsize, last_sample); FWRITE_R; TRY_YIELD; } @@ -503,6 +638,7 @@ int main(int argc, char *argv[]) if(!strcmp(argv[1],"deemphasis_wfm_ff")) { if(argc<=3) return badsyntax("need required parameters (sample rate, tau)"); + if(!sendbufsize(initialize_buffers())) return -2; int sample_rate; sscanf(argv[2],"%d",&sample_rate); float tau; @@ -513,7 +649,7 @@ int main(int argc, char *argv[]) { FEOF_CHECK; FREAD_R; - last_output=deemphasis_wfm_ff(input_buffer, output_buffer, BUFSIZE, tau, sample_rate, last_output); + last_output=deemphasis_wfm_ff(input_buffer, output_buffer, the_bufsize, tau, sample_rate, last_output); FWRITE_R; TRY_YIELD; } @@ -521,12 +657,13 @@ int main(int argc, char *argv[]) if(!strcmp(argv[1],"detect_nan_ff")) { + if(!sendbufsize(initialize_buffers())) return -2; for(;;) { FEOF_CHECK; FREAD_R; int nan_detect=0; - for(int i=0; i=3) sscanf(argv[2],"%hd",&hang_time); - float reference=0.5; + float reference=0.2; if(argc>=4) sscanf(argv[3],"%g",&reference); float attack_rate=0.01; @@ -755,13 +919,14 @@ int main(int argc, char *argv[]) float filter_alpha=0.999;//0.001; if(argc>=9) sscanf(argv[8],"%g",&filter_alpha); + if(!sendbufsize(initialize_buffers())) return -2; float last_gain=1.0; for(;;) { FEOF_CHECK; FREAD_R; - last_gain=agc_ff(input_buffer, output_buffer, BUFSIZE, reference, attack_rate, decay_rate, max_gain, hang_time, attack_wait, filter_alpha, last_gain); + last_gain=agc_ff(input_buffer, output_buffer, the_bufsize, reference, attack_rate, decay_rate, max_gain, hang_time, attack_wait, filter_alpha, last_gain); FWRITE_R; TRY_YIELD; } @@ -775,8 +940,15 @@ int main(int argc, char *argv[]) input.input_size=1024; if(argc>=3) sscanf(argv[2],"%d",&input.input_size); + + getbufsize(); //dummy + sendbufsize(input.input_size); + input.reference=1.0; if(argc>=4) sscanf(argv[3],"%g",&input.reference); + + //input.max_peak_ratio=12.0; + //if(argc>=5) sscanf(argv[3],"%g",&input.max_peak_ratio); input.buffer_1=(float*)calloc(input.input_size,sizeof(float)); input.buffer_2=(float*)calloc(input.input_size,sizeof(float)); @@ -817,12 +989,15 @@ int main(int argc, char *argv[]) if(suboptimal) fprintf(stderr,"note: suboptimal rational resampler chosen.\n"); - if(decimation==1&&interpolation==1) clone_(); //copy input to output in this special case (and stick in this function). + if(!initialize_buffers()) return -2; + + if(decimation==1&&interpolation==1) { sendbufsize(the_bufsize); clone_(the_bufsize); } //copy input to output in this special case (and stick in this function). //Alloc output buffer - int resampler_output_buffer_size=(BUFSIZE*interpolation)/decimation; + int resampler_output_buffer_size=(the_bufsize*interpolation)/decimation; + sendbufsize(resampler_output_buffer_size); float* resampler_output_buffer=(float*)malloc(sizeof(float)*resampler_output_buffer_size); - float* suboptimal_resampler_temp_buffer = (suboptimal)?(float*)malloc(sizeof(float)*BUFSIZE*interpolation):NULL; + float* suboptimal_resampler_temp_buffer = (suboptimal)?(float*)malloc(sizeof(float)*the_bufsize*interpolation):NULL; //Generate filter taps int taps_length = firdes_filter_len(transition_bw); @@ -834,11 +1009,11 @@ int main(int argc, char *argv[]) for(;;) { FEOF_CHECK; - if(d.input_processed==0) d.input_processed=BUFSIZE; - else memcpy(input_buffer, input_buffer+d.input_processed, sizeof(float)*(BUFSIZE-d.input_processed)); - fread(input_buffer+(BUFSIZE-d.input_processed), sizeof(float), d.input_processed, stdin); - //if(suboptimal) d=suboptimal_rational_resampler_ff(input_buffer, resampler_output_buffer, BUFSIZE, interpolation, decimation, taps, taps_length, suboptimal_resampler_temp_buffer); else - d=rational_resampler_ff(input_buffer, resampler_output_buffer, BUFSIZE, interpolation, decimation, taps, taps_length, d.last_taps_delay); + if(d.input_processed==0) d.input_processed=the_bufsize; + else memcpy(input_buffer, input_buffer+d.input_processed, sizeof(float)*(the_bufsize-d.input_processed)); + fread(input_buffer+(the_bufsize-d.input_processed), sizeof(float), d.input_processed, stdin); + //if(suboptimal) d=suboptimal_rational_resampler_ff(input_buffer, resampler_output_buffer, the_bufsize, interpolation, decimation, taps, taps_length, suboptimal_resampler_temp_buffer); else + d=rational_resampler_ff(input_buffer, resampler_output_buffer, the_bufsize, interpolation, decimation, taps, taps_length, d.last_taps_delay); //fprintf(stderr,"resampled %d %d, %d\n",d.output_size, d.input_processed, d.input_processed); fwrite(resampler_output_buffer, sizeof(float), d.output_size, stdout); TRY_YIELD; @@ -863,7 +1038,10 @@ int main(int argc, char *argv[]) } else fprintf(stderr,"fractional_decimator_ff: window = %s\n",firdes_get_string_from_window(window)); - if(rate==1) clone_(); //copy input to output in this special case (and stick in this function). + if(!initialize_buffers()) return -2; + sendbufsize(the_bufsize / rate); + + if(rate==1) clone_(the_bufsize); //copy input to output in this special case (and stick in this function). //Generate filter taps int taps_length = firdes_filter_len(transition_bw); @@ -876,10 +1054,10 @@ int main(int argc, char *argv[]) for(;;) { FEOF_CHECK; - if(d.input_processed==0) d.input_processed=BUFSIZE; - else memcpy(input_buffer, input_buffer+d.input_processed, sizeof(float)*(BUFSIZE-d.input_processed)); - fread(input_buffer+(BUFSIZE-d.input_processed), sizeof(float), d.input_processed, stdin); - d = fractional_decimator_ff(input_buffer, output_buffer, BUFSIZE, rate, taps, taps_length, d); + if(d.input_processed==0) d.input_processed=the_bufsize; + else memcpy(input_buffer, input_buffer+d.input_processed, sizeof(float)*(the_bufsize-d.input_processed)); + fread(input_buffer+(the_bufsize-d.input_processed), sizeof(float), d.input_processed, stdin); + d = fractional_decimator_ff(input_buffer, output_buffer, the_bufsize, rate, taps, taps_length, d); fwrite(output_buffer, sizeof(float), d.output_size, stdout); TRY_YIELD; } @@ -910,6 +1088,10 @@ int main(int argc, char *argv[]) benchmark|=!strcmp("--benchmark",argv[6]); octave|=!strcmp("--octave",argv[6]); } + + if(!initialize_buffers()) return -2; + sendbufsize(fft_size); + //make FFT plan complexf* input=(complexf*)fft_malloc(sizeof(complexf)*fft_size); complexf* windowed=(complexf*)fft_malloc(sizeof(complexf)*fft_size); @@ -925,9 +1107,9 @@ int main(int argc, char *argv[]) { fread(input, sizeof(complexf), fft_size, stdin); //skipping samples before next FFT (but fseek doesn't work for pipes) - for(int seek_remain=every_n_samples-fft_size;seek_remain>0;seek_remain-=BUFSIZE) + for(int seek_remain=every_n_samples-fft_size;seek_remain>0;seek_remain-=the_bufsize) { - fread(temp_f, sizeof(complexf), MIN_M(BUFSIZE,seek_remain), stdin); + fread(temp_f, sizeof(complexf), MIN_M(the_bufsize,seek_remain), stdin); } } else @@ -959,14 +1141,16 @@ int main(int argc, char *argv[]) { float add_db=0; if(argc>=3) sscanf(argv[2],"%g",&add_db); - + + if(!sendbufsize(initialize_buffers())) return -2; + for(;;) { FEOF_CHECK; - fread(input_buffer, sizeof(complexf), LOGPOWERCF_BUFSIZE, stdin); - logpower_cf((complexf*)input_buffer,output_buffer,LOGPOWERCF_BUFSIZE,add_db); - fwrite(output_buffer, sizeof(float), LOGPOWERCF_BUFSIZE, stdout); - //bufsize is so small, I don't dare to TRY_YIELD + fread(input_buffer, sizeof(complexf), the_bufsize, stdin); + logpower_cf((complexf*)input_buffer,output_buffer, the_bufsize, add_db); + fwrite(output_buffer, sizeof(float), the_bufsize, stdout); + TRY_YIELD; } } @@ -975,6 +1159,8 @@ int main(int argc, char *argv[]) if(argc<=2) return badsyntax("need required parameters (fft_size)"); int fft_size; sscanf(argv[2],"%d",&fft_size); + if(!getbufsize()) return -2; //dummy + sendbufsize(fft_size); float* input_buffer_s1 = (float*)malloc(sizeof(float)*fft_size/2); float* input_buffer_s2 = (float*)malloc(sizeof(float)*fft_size/2); for(;;) @@ -1003,6 +1189,8 @@ int main(int argc, char *argv[]) int fft_size; sscanf(argv[2],"%d",&fft_size); int real_data_size=fft_size+COMPRESS_FFT_PAD_N; + if(!getbufsize()) return -2; //dummy + sendbufsize(real_data_size); float* input_buffer_cwa = (float*)malloc(sizeof(float)*real_data_size); short* temp_buffer_cwa = (short*)malloc(sizeof(short)*real_data_size); unsigned char* output_buffer_cwa = (unsigned char*)malloc(sizeof(unsigned char)*(real_data_size/2)); @@ -1033,7 +1221,7 @@ int main(int argc, char *argv[]) int benchmark=(argc>=5)&&!strcmp(argv[4],"--benchmark"); fprintf(stderr,"fft_benchmark: FFT library used: %s\n",FFT_LIBRARY_USED); - + complexf* input=(complexf*)fft_malloc(sizeof(complexf)*fft_size); complexf* output=(complexf*)fft_malloc(sizeof(complexf)*fft_size); @@ -1068,7 +1256,7 @@ int main(int argc, char *argv[]) float high_cut; float transition_bw; window_t window = WINDOW_DEFAULT; - char window_string[100]; //TODO: nice buffer overflow opportunity + char window_string[256]; //TODO: nice buffer overflow opportunity int fd; if(fd=init_fifo(argc,argv)) @@ -1095,6 +1283,8 @@ int main(int argc, char *argv[]) int overlap_length = taps_length - 1; fprintf(stderr,"bandpass_fir_fft_cc: (fft_size = %d) = (taps_length = %d) + (input_size = %d) - 1\n(overlap_length = %d) = taps_length - 1\n", fft_size, taps_length, input_size, overlap_length); if (fft_size<=2) return badsyntax("FFT size error."); + + if(!sendbufsize(getbufsize())) return -2; //prepare making the filter and doing FFT on it complexf* taps=(complexf*)calloc(sizeof(complexf),fft_size); //initialize to zero @@ -1145,14 +1335,15 @@ int main(int argc, char *argv[]) if(!strcmp(argv[1],"encode_ima_adpcm_i16_u8")) { + if(!sendbufsize(initialize_buffers()/2)) return -2; ima_adpcm_state_t d; d.index=d.previousValue=0; for(;;) { FEOF_CHECK; - fread(buffer_i16, sizeof(short), IMA_ADPCM_BUFSIZE, stdin); - d=encode_ima_adpcm_i16_u8(buffer_i16, buffer_u8, IMA_ADPCM_BUFSIZE, d); - fwrite(buffer_u8, sizeof(unsigned char), IMA_ADPCM_BUFSIZE/2, stdout); + fread(buffer_i16, sizeof(short), the_bufsize, stdin); + d=encode_ima_adpcm_i16_u8(buffer_i16, buffer_u8, the_bufsize, d); + fwrite(buffer_u8, sizeof(unsigned char), the_bufsize/2, stdout); TRY_YIELD; } } @@ -1161,17 +1352,18 @@ int main(int argc, char *argv[]) { ima_adpcm_state_t d; d.index=d.previousValue=0; + if(!sendbufsize(initialize_buffers()*2)) return -2; for(;;) { FEOF_CHECK; - fread(buffer_u8, sizeof(unsigned char), IMA_ADPCM_BUFSIZE/2, stdin); - d=decode_ima_adpcm_u8_i16(buffer_u8, buffer_i16, IMA_ADPCM_BUFSIZE/2, d); - fwrite(buffer_i16, sizeof(short), IMA_ADPCM_BUFSIZE, stdout); + fread(buffer_u8, sizeof(unsigned char), the_bufsize, stdin); + d=decode_ima_adpcm_u8_i16(buffer_u8, buffer_i16, the_bufsize, d); + fwrite(buffer_i16, sizeof(short), the_bufsize*2, stdout); TRY_YIELD; } } #endif - + /* if(!strcmp(argv[1],"flowcontrol")) { if(argc<=3) return badsyntax("need required parameters (data_rate, reads_per_seconds)"); @@ -1180,6 +1372,8 @@ int main(int argc, char *argv[]) int reads_per_second; sscanf(argv[3],"%d",&reads_per_second); int flowcontrol_bufsize=ceil(1.*(double)data_rate/reads_per_second); + if(!getbufsize()) return -2; + sendbufsize(flowcontrol_bufsize); unsigned char* flowcontrol_buffer = (unsigned char*)malloc(sizeof(unsigned char)*flowcontrol_bufsize); int flowcontrol_sleep=floor(1000000./reads_per_second); fprintf(stderr, "flowcontrol: flowcontrol_bufsize = %d, flowcontrol_sleep = %d\n", flowcontrol_bufsize, flowcontrol_sleep); @@ -1191,6 +1385,144 @@ int main(int argc, char *argv[]) usleep(flowcontrol_sleep); TRY_YIELD; } + }*/ + + + if(!strcmp(argv[1],"flowcontrol")) + { + if(argc<=3) return badsyntax("need required parameters (data_rate, reads_per_seconds)"); + + int data_rate; + sscanf(argv[2],"%d",&data_rate); + + int reads_per_second=0; + if(strcmp(argv[3],"auto")) sscanf(argv[3],"%d",&reads_per_second); + + float prebuffer=2; + if(argc>4) sscanf(argv[4],"%g",&prebuffer); + + int thrust=10; + if(argc>5) sscanf(argv[5],"%d",&thrust); + + int flowcontrol_readsize, flowcontrol_bufsize, got_bufsize; + + if(!(got_bufsize=getbufsize())) return -2; + + if(reads_per_second) + { + flowcontrol_readsize=ceil(1.*(double)data_rate/reads_per_second); + } + else + { + flowcontrol_readsize=got_bufsize; + reads_per_second=data_rate/flowcontrol_readsize; + } + flowcontrol_bufsize=flowcontrol_readsize*floor(reads_per_second*prebuffer); + + int flowcontrol_bufindex=0; + unsigned char* flowcontrol_buffer = (unsigned char*)malloc(sizeof(unsigned char)*flowcontrol_bufsize); + int flowcontrol_sleep=floor(1000000./reads_per_second); + + fcntl(STDIN_FILENO, F_SETFL, fcntl(STDIN_FILENO, F_GETFL, 0) | O_NONBLOCK); + + sendbufsize(flowcontrol_readsize); + fflush(stdout); + + int flowcontrol_is_buffering = 1; + int read_return; + + struct timespec start_time, end_time; + + unsigned long long int all_bytes_written=0; + int test=0; + + fprintf(stderr, "flowcontrol: flowcontrol_readsize = %d, flowcontrol_bufsize = %d, flowcontrol_sleep = %d\n", flowcontrol_readsize, flowcontrol_bufsize, flowcontrol_sleep); + for (; ;) //my friend has told me that this is like two smileys ;) + { + FEOF_CHECK; + fprintf(stderr, "r"); + read_return=read(STDIN_FILENO, flowcontrol_buffer+flowcontrol_bufindex, sizeof(unsigned char) * (flowcontrol_bufsize-flowcontrol_bufindex) ); + fprintf(stderr, "t"); + if(read_return>0) flowcontrol_bufindex+=read_return; + + + if(flowcontrol_is_buffering) + { + fprintf(stderr, "flowcontrol: buffering, flowcontrol_bufindex = %d\n", flowcontrol_bufindex); + if(flowcontrol_bufindex==flowcontrol_bufsize) { flowcontrol_is_buffering = 0; clock_gettime(CLOCK_MONOTONIC_RAW, &start_time); } + else if(read_return<=0) continue; + } + else { + clock_gettime(CLOCK_MONOTONIC_RAW, &end_time); + int thrust_added=0; + while( (all_bytes_written+thrust*flowcontrol_readsize) / TIME_TAKEN(start_time,end_time) < data_rate ) + { + thrust_added |= thrust++; + } + //if(!(test++%10)) fprintf(stderr, "abw=%g\n", all_bytes_written / TIME_TAKEN(start_time,end_time)); + /*if(!thrust_added && TIME_TAKEN(start_time,end_time)>50) + { + clock_gettime(CLOCK_MONOTONIC_RAW, &start_time); + all_bytes_written=0; + }*/ + while(all_bytes_written>data_rate && TIME_TAKEN(start_time,end_time)>1) + { + all_bytes_written-=data_rate; + start_time.tv_sec++; + } + do + { + //if(thrust) fprintf(stderr, "flowcontrol: %d .. thrust\n", thrust); + write(STDOUT_FILENO, flowcontrol_buffer, flowcontrol_readsize); + fflush(stdout); + //fsync(STDOUT_FILENO); + memmove(flowcontrol_buffer, flowcontrol_buffer+flowcontrol_readsize, flowcontrol_bufindex-flowcontrol_readsize); + flowcontrol_bufindex -= flowcontrol_readsize; + all_bytes_written += flowcontrol_readsize; + } while(thrust && thrust-- && flowcontrol_bufindex>=flowcontrol_readsize); + } + + usleep(flowcontrol_sleep); + TRY_YIELD; + } + } + + if(!strcmp(argv[1],"through")) + { + struct timespec start_time, end_time; + if(!sendbufsize(initialize_buffers())) return -2; + + int time_now_sec=0; + int buffer_count=0; + + unsigned char* through_buffer; + through_buffer = (unsigned char*)malloc(the_bufsize*sizeof(float)); + + + for(;;) + { + FEOF_CHECK; + fread(through_buffer, sizeof(float), the_bufsize, stdin); + + if(!time_now_sec) + { + time_now_sec=1; + clock_gettime(CLOCK_MONOTONIC_RAW, &start_time); + } + else + { + clock_gettime(CLOCK_MONOTONIC_RAW, &end_time); + float timetaken; + if(time_now_sec<(timetaken=TIME_TAKEN(start_time,end_time))) + { + fprintf( stderr, "through: %lu bytes/s %d\n", (unsigned long)floor((float)buffer_count*the_bufsize*sizeof(float)/timetaken), buffer_count ); + time_now_sec=ceil(timetaken); + } + } + fwrite(through_buffer, sizeof(float), the_bufsize, stdout); + buffer_count++; + TRY_YIELD; + } } if(!strcmp(argv[1],"none")) @@ -1198,7 +1530,7 @@ int main(int argc, char *argv[]) return 0; } - return badsyntax("function name given in argument 1 does not exist."); + return badsyntax("function name given in argument 1 does not exist. Possible causes:\n- You mistyped the commandline.\n- You need to update csdr to a newer version (if available)."); } diff --git a/libcsdr.c b/libcsdr.c index 2cf5e9b..ed4a974 100644 --- a/libcsdr.c +++ b/libcsdr.c @@ -263,6 +263,75 @@ float shift_table_cc(complexf* input, complexf* output, int input_size, float ra return phase; } +#ifdef NEON_OPTS +#pragma message "We have a faster fir_decimate_cc now." + +//max help: http://community.arm.com/groups/android-community/blog/2015/03/27/arm-neon-programming-quick-reference + +int fir_decimate_cc(complexf *input, complexf *output, int input_size, int decimation, float *taps, int taps_length) +{ + //Theory: http://www.dspguru.com/dsp/faqs/multirate/decimation + //It uses real taps. It returns the number of output samples actually written. + //It needs overlapping input based on its returned value: + //number of processed input samples = returned value * decimation factor + //The output buffer should be at least input_length / 3. + // i: input index | ti: tap index | oi: output index + int oi=0; + for(int i=0; iinput_size) break; + register float acci=0; + register float accq=0; + + register int ti=0; + register float* pinput=(float*)&(input[i+ti]); + register float* ptaps=taps; + register float* ptaps_end=taps+taps_length; + float quad_acciq [8]; + + +/* +q0, q1: input signal I sample and Q sample +q2: taps +q4, q5: accumulator for I branch and Q branch (will be the output) +*/ + + //fprintf(stderr, "macska\n"); + + asm volatile( + //" vorr.f32 q4, #0\n\t" //null the accumulators + //" vorr.f32 q5, #0\n\t" + " vmov.f32 q4, #0.0\n\t" //another way to null the accumulators + " vmov.f32 q5, #0.0\n\t" + "for_fdccasm: vld2.32 {q0-q1}, [%[pinput]]!\n\t" //load q0 and q1 directly from the memory address stored in pinput, with interleaving (so that we get the I samples in q0 and the Q samples in q1), also increment the memory address in pinput (hence the "!" mark) //http://community.arm.com/groups/processors/blog/2010/03/17/coding-for-neon--part-1-load-and-stores + " vld1.32 {q2}, [%[ptaps]]!\n\t" + " vmla.f32 q4, q0, q2\n\t" //quad_acc_i += quad_input_i * quad_taps_1 //http://stackoverflow.com/questions/3240440/how-to-use-the-multiply-and-accumulate-intrinsics-in-arm-cortex-a8 //http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0489e/CIHEJBIE.html + " vmla.f32 q5, q1, q2\n\t" //quad_acc_q += quad_input_q * quad_taps_1 + " cmp %[ptaps], %[ptaps_end]\n\t" //if(ptaps == ptaps_end) + " bcc for_fdccasm\n\t" // then goto for_fdcasm + " vst1.32 {q4}, [%[quad_acci]]\n\t" //if the loop is finished, store the two accumulators in memory + " vst1.32 {q5}, [%[quad_accq]]\n\t" + : + [pinput]"+r"(pinput), [ptaps]"+r"(ptaps) //output operand list + : + [ptaps_end]"r"(ptaps_end), [quad_acci]"r"(quad_acciq), [quad_accq]"r"(quad_acciq+4) //input operand list + : + "memory", "q0", "q1", "q2", "q4", "q5", "cc" //clobber list + ); + //original for loops for reference: + //for(int ti=0; ti> [%d] %g \n", n, quad_acciq[n]); + iof(output,oi)=quad_acciq[0]+quad_acciq[1]+quad_acciq[2]+quad_acciq[3]; //we're still not ready, as we have to add up the contents of a quad accumulator register to get a single accumulated value + qof(output,oi)=quad_acciq[4]+quad_acciq[5]+quad_acciq[6]+quad_acciq[7]; + oi++; + } + return oi; +} + +#else + int fir_decimate_cc(complexf *input, complexf *output, int input_size, int decimation, float *taps, int taps_length) { //Theory: http://www.dspguru.com/dsp/faqs/multirate/decimation @@ -286,6 +355,34 @@ int fir_decimate_cc(complexf *input, complexf *output, int input_size, int decim return oi; } +#endif + +/* +int fir_decimate_cc(complexf *input, complexf *output, int input_size, int decimation, float *taps, int taps_length) +{ + //Theory: http://www.dspguru.com/dsp/faqs/multirate/decimation + //It uses real taps. It returns the number of output samples actually written. + //It needs overlapping input based on its returned value: + //number of processed input samples = returned value * decimation factor + //The output buffer should be at least input_length / 3. + // i: input index | ti: tap index | oi: output index + int oi=0; + for(int i=0; iinput_size) break; + float acci=0; + int taps_halflength = taps_length/2; + for(int ti=0; tireference/target_peak; if(target_gain>FASTAGC_MAX_GAIN) target_gain=FASTAGC_MAX_GAIN; + //fprintf(stderr, "target_gain: %g\n",target_gain); for(int i=0;iinput_size;i++) //@fastagc_ff: apply gain { @@ -572,7 +671,6 @@ void fastagc_ff(fastagc_ff_t* input, float* output) //fprintf(stderr,"target_gain=%g\n", target_gain); } - /* ______ __ __ _ _ _ _ | ____| \/ | | | | | | | | | diff --git a/libcsdr_gpl.c b/libcsdr_gpl.c index c17390d..738a3b5 100644 --- a/libcsdr_gpl.c +++ b/libcsdr_gpl.c @@ -189,12 +189,14 @@ float agc_ff(float* input, float* output, int input_size, float reference, float { if(last_peak0) { attack_wait_counter--; + //fprintf(stderr,"A"); dgain=0; } else @@ -203,6 +205,7 @@ float agc_ff(float* input, float* output, int input_size, float reference, float dgain=error*attack_rate; //Before starting to increase the gain next time, we will be waiting until hang_time for sure. hang_counter=hang_time; + } } else //DECREASE IN SIGNAL LEVEL @@ -222,7 +225,7 @@ float agc_ff(float* input, float* output, int input_size, float reference, float } //output[i]=gain*input[i]; //Here we do the actual scaling of the samples. //Here we do the actual scaling of the samples, but we run an IIR filter on the gain values: - output[i]=(gain+last_gain-gain_filter_alpha*last_gain)*input[i]; //dc-pass-filter: freqz([1 -1],[1 -0.99]) y[i]=x[i]+y[i-1]-alpha*x[i-1] + output[i]=(gain=gain+last_gain-gain_filter_alpha*last_gain)*input[i]; //dc-pass-filter: freqz([1 -1],[1 -0.99]) y[i]=x[i]+y[i-1]-alpha*x[i-1] //output[i]=input[i]*(last_gain+gain_filter_alpha*(gain-last_gain)); //LPF last_gain=gain; diff --git a/sdr.js/sdrjs-test.html b/sdr.js/sdrjs-test.html new file mode 100644 index 0000000..1642a10 --- /dev/null +++ b/sdr.js/sdrjs-test.html @@ -0,0 +1,38 @@ + + + + + + + + + +