Merged origin/master into nmux

2017-01-19 17:00:16 +01:00 · 2017-01-19 17:00:16 +01:00 · 3246e20864
commit 3246e20864
--- a/4
+++ b/4
@ -29,7 +29,7 @@
 LIBSOURCES =  fft_fftw.c libcsdr_wrapper.c 
 #SOURCES = csdr.c $(LIBSOURCES)
 cpufeature = $(if $(findstring $(1),$(shell cat /proc/cpuinfo)),$(2))
-PARAMS_SSE = $(call cpufeature,sse,-msse) $(call cpufeature,sse2,-msse2) $(call cpufeature,sse3,-msse3) $(call cpufeature,sse4,-msse4) $(call cpufeature,sse4_1,-msse4.1) $(call cpufeature,sse4_2,-msse4.2) -mfpmath=sse 
+PARAMS_SSE = $(call cpufeature,sse,-msse) $(call cpufeature,sse2,-msse2) $(call cpufeature,sse3,-msse3) $(call cpufeature,sse4a,-msse4a) $(call cpufeature,sse4_1,-msse4.1) $(call cpufeature,sse4_2,-msse4.2 -msse4) -mfpmath=sse 
 PARAMS_NEON = -mfloat-abi=hard -march=armv7-a -mtune=cortex-a8 -mfpu=neon -mvectorize-with-neon-quad -funsafe-math-optimizations -Wformat=0 -DNEON_OPTS
 #tnx Jan Szumiec for the Raspberry Pi support
 PARAMS_RASPI = -mfloat-abi=hard -mcpu=arm1176jzf-s -mfpu=vfp -funsafe-math-optimizations -Wformat=0
@ -92,7 +92,7 @@ emcc-get-deps:
 	emmake make; \
 	emmake make install
 emcc:
-	emcc -O3 -Isdr.js/$(FFTW_PACKAGE)/api -Lsdr.js/$(FFTW_PACKAGE)/emscripten-lib -o sdr.js/sdrjs-compiled.js fft_fftw.c libcsdr_wrapper.c -DLIBCSDR_GPL -DUSE_IMA_ADPCM -DUSE_FFTW -lfftw3f -s EXPORTED_FUNCTIONS="`python sdr.js/exported_functions.py`"
+	emcc -O3 -Isdr.js/$(FFTW_PACKAGE)/api -Lsdr.js/$(FFTW_PACKAGE)/emscripten-lib -o sdr.js/sdrjs-compiled.js fft_fftw.c libcsdr_wrapper.c -s TOTAL_MEMORY=67108864 -DLIBCSDR_GPL -DUSE_IMA_ADPCM -DUSE_FFTW -lfftw3f -s EXPORTED_FUNCTIONS="`python sdr.js/exported_functions.py`"
 	cat sdr.js/sdrjs-header.js sdr.js/sdrjs-compiled.js sdr.js/sdrjs-footer.js > sdr.js/sdr.js
 emcc-beautify:
 	bash -c 'type js-beautify >/dev/null 2>&1; if [ $$? -eq 0 ]; then js-beautify sdr.js/sdr.js >sdr.js/sdr.js.beautiful; mv sdr.js/sdr.js.beautiful sdr.js/sdr.js; fi'
--- a/README.md
+++ b/README.md
@ -12,14 +12,22 @@ Most of the code is available under the permissive BSD license, with some option

 How to compile
 --------------
-The project was only tested on Linux. It has the following dependencies: `libfftw3-dev`

 	make
 	sudo make install

+The project was only tested on Linux. It has the following dependencies: `libfftw3-dev`
+
 If you compile on ARM, please edit the Makefile and tailor `PARAMS_NEON` for your CPU.
+
 To run the examples, you will also need <a href="http://sdr.osmocom.org/trac/wiki/rtl-sdr">rtl_sdr</a> from Osmocom, and the following packages (at least on Debian): `mplayer octave gnuplot gnuplot-x11`

+If you compile *fftw3* from sources for use with *libcsdr*, you need to configure it with 32-bit float support enabled: 
+
+	./configure --enable-float
+
+(This is for *fftw3*, not *libcsdr*. You do not need to run the configure script before compiling *libcsdr*.)
+
 Credits
 -------
 The library was written by Andras Retzler, HA7ILM &lt;<randras@sdr.hu>&gt;.
@ -31,19 +39,19 @@ Usage by example

 ### Demodulate WFM

-	rtl_sdr -s 240000 -f 89500000 -g 20 - | csdr convert_u8_f | csdr fmdemod_quadri_cf | csdr fractional_decimator_ff 5 | csdr deemphasis_wfm_ff 48000 50e-6 | csdr convert_f_i16 | mplayer -cache 1024 -quiet -rawaudio samplesize=2:channels=1:rate=48000 -demuxer rawaudio -
+	rtl_sdr -s 240000 -f 89500000 -g 20 - | csdr convert_u8_f | csdr fmdemod_quadri_cf | csdr fractional_decimator_ff 5 | csdr deemphasis_wfm_ff 48000 50e-6 | csdr convert_f_s16 | mplayer -cache 1024 -quiet -rawaudio samplesize=2:channels=1:rate=48000 -demuxer rawaudio -

 - Baseband I/Q signal is coming from an RTL-SDR USB dongle, with a center frequency of `-f 104300000` Hz, a sampling rate of `-s 240000` samples per second.
 - The `rtl_sdr` tool outputs an unsigned 8-bit I/Q signal (one byte of I sample and one byte of Q coming after each other), but `libcsdr` DSP routines internally use floating point data type, so we convert the data stream of `unsigned char` to `float` by `csdr convert_u8_f`.
 - We want to listen one radio station at the frequency `-f 89500000` Hz (89.5 MHz).
- No other radio station is within the sampled bandwidth, so we send the signal directly to the demodulator. (This is an easy, but not perfect solution as the anti-aliasing filter at RTL-SDR DDC is too short.) 
+- No other radio station is within the sampled bandwidth, so we send the signal directly to the demodulator. (This is an easy, but not perfect solution as the anti-aliasing filter at RTL-SDR DDC is too short.)
 - After FM demodulation we decimate the signal by a factor of 5 to match the rate of the audio card (240000 / 5 = 48000).
 - A de-emphasis filter is used, because pre-emphasis is applied at the transmitter to compensate noise at higher frequencies. The time constant for de-emphasis for FM broadcasting in Europe is 50 microseconds (hence the `50e-6`).
 - Also, `mplayer` cannot play floating point audio, so we convert our signal to a stream of 16-bit integers.  

 ### Demodulate WFM: advanced

-	rtl_sdr -s 2400000 -f 89300000 -g 20 - | csdr convert_u8_f | csdr shift_addition_cc -0.085 | csdr fir_decimate_cc 10 0.05 HAMMING | csdr fmdemod_quadri_cf | csdr fractional_decimator_ff 5 | csdr deemphasis_wfm_ff 48000 50e-6 | csdr convert_f_i16 | mplayer -cache 1024 -quiet -rawaudio samplesize=2:channels=1:rate=48000 -demuxer rawaudio -
+	rtl_sdr -s 2400000 -f 89300000 -g 20 - | csdr convert_u8_f | csdr shift_addition_cc -0.085 | csdr fir_decimate_cc 10 0.05 HAMMING | csdr fmdemod_quadri_cf | csdr fractional_decimator_ff 5 | csdr deemphasis_wfm_ff 48000 50e-6 | csdr convert_f_s16 | mplayer -cache 1024 -quiet -rawaudio samplesize=2:channels=1:rate=48000 -demuxer rawaudio -

 - We want to listen to one radio station, but input signal contains multiple stations, and its bandwidth is too large for sending it directly to the FM demodulator.
 - We shift the signal to the center frequency of the station we want to receive: `-0.085*2400000 = -204000`, so basically we will listen to the radio station centered at 89504000 Hz.
@ -58,13 +66,13 @@ Sample rates look like this:

 *Note:* there is an example shell script that does this for you (without the unnecessary shift operation). If you just want to listen to FM radio, type:

-	csdr-fm 89.5 20 
+	csdr-fm 89.5 20

 The first parameter is the frequency in MHz, and the second optional parameter is the RTL-SDR tuner gain in dB.

 ### Demodulate NFM
-	
-	rtl_sdr -s 2400000 -f 145000000 -g 20 - | csdr convert_u8_f | csdr shift_addition_cc `python -c "print float(145000000-145350000)/2400000"` | csdr fir_decimate_cc 50 0.005 HAMMING | csdr fmdemod_quadri_cf | csdr limit_ff | csdr deemphasis_nfm_ff 48000 | csdr fastagc_ff | csdr convert_f_i16 | mplayer -cache 1024 -quiet -rawaudio samplesize=2:channels=1:rate=48000 -demuxer rawaudio -
+
+	rtl_sdr -s 2400000 -f 145000000 -g 20 - | csdr convert_u8_f | csdr shift_addition_cc `python -c "print float(145000000-145350000)/2400000"` | csdr fir_decimate_cc 50 0.005 HAMMING | csdr fmdemod_quadri_cf | csdr limit_ff | csdr deemphasis_nfm_ff 48000 | csdr fastagc_ff | csdr convert_f_s16 | mplayer -cache 1024 -quiet -rawaudio samplesize=2:channels=1:rate=48000 -demuxer rawaudio -

 - Note that the decimation factor is higher (we want to select a ~25 kHz channel).
 - Also there is a python hack to calculate the relative shift offset. The real receiver frequency is `145350000` Hz.
@ -72,9 +80,9 @@ The first parameter is the frequency in MHz, and the second optional parameter i

 ### Demodulate AM

-	rtl_sdr -s 2400000 -f 145000000 -g 20 - | csdr convert_u8_f | csdr shift_addition_cc `python -c "print float(145000000-144400000)/2400000"` | csdr fir_decimate_cc 50 0.005 HAMMING | csdr amdemod_cf | csdr fastdcblock_ff | csdr agc_ff | csdr limit_ff | csdr convert_f_i16 | mplayer -cache 1024 -quiet -rawaudio samplesize=2:channels=1:rate=48000 -demuxer rawaudio -
+	rtl_sdr -s 2400000 -f 145000000 -g 20 - | csdr convert_u8_f | csdr shift_addition_cc `python -c "print float(145000000-144400000)/2400000"` | csdr fir_decimate_cc 50 0.005 HAMMING | csdr amdemod_cf | csdr fastdcblock_ff | csdr agc_ff | csdr limit_ff | csdr convert_f_s16 | mplayer -cache 1024 -quiet -rawaudio samplesize=2:channels=1:rate=48000 -demuxer rawaudio -

- `amdemod_cf` is used as demodulator. 
+- `amdemod_cf` is used as demodulator.
 - `agc_ff` should be used for AM and SSB.

 ### Design FIR band-pass filter (with complex taps)
@ -87,16 +95,16 @@ The first parameter is the frequency in MHz, and the second optional parameter i

 ### Demodulate SSB

-	rtl_sdr -s 2400000 -f 145000000 -g 20 - | csdr convert_u8_f | csdr shift_addition_cc `python -c "print float(145000000-144400000)/2400000"` | csdr fir_decimate_cc 50 0.005 HAMMING | csdr bandpass_fir_fft_cc 0 0.1 0.05 | csdr realpart_cf | csdr agc_ff | csdr limit_ff | csdr convert_f_i16 | mplayer -cache 1024 -quiet -rawaudio samplesize=2:channels=1:rate=48000 -demuxer rawaudio -
+	rtl_sdr -s 2400000 -f 145000000 -g 20 - | csdr convert_u8_f | csdr shift_addition_cc `python -c "print float(145000000-144400000)/2400000"` | csdr fir_decimate_cc 50 0.005 HAMMING | csdr bandpass_fir_fft_cc 0 0.1 0.05 | csdr realpart_cf | csdr agc_ff | csdr limit_ff | csdr convert_f_s16 | mplayer -cache 1024 -quiet -rawaudio samplesize=2:channels=1:rate=48000 -demuxer rawaudio -

- It is a modified Weaver-demodulator. The complex FIR filter removes the lower sideband and lets only the upper pass (USB). If you want to demodulate LSB, change `bandpass_fir_fft_cc 0 0.05` to `bandpass_fir_fft_cc -0.05 0`. 
+- It is a modified Weaver-demodulator. The complex FIR filter removes the lower sideband and lets only the upper pass (USB). If you want to demodulate LSB, change `bandpass_fir_fft_cc 0 0.05` to `bandpass_fir_fft_cc -0.05 0`.

 ### Draw FFT

 	rtl_sdr -s 2400000 -f 104300000 -g 20 - | csdr convert_u8_f | csdr fft_cc 1024 1200000 HAMMING --octave | octave -i > /dev/null

 - We calculate the Fast Fourier Transform by `csdr fft_cc` on the first 1024 samples of every block of 1200000 complex samples coming after each other. (We calculate FFT from 1024 samples and then skip 1200000-1024=1198976 samples. This way we will calculate FFT two times every second.)
- The window used for FFT is the Hamming window, and the output consists of commands that can be directly interpreted by GNU Octave which plots us the spectrum. 
+- The window used for FFT is the Hamming window, and the output consists of commands that can be directly interpreted by GNU Octave which plots us the spectrum.

 Usage
 -----
@ -107,9 +115,9 @@ Function name endings found in *libcsdr* mean the input and output data types of
 Data types are noted as it follows:

 - `f` is `float` (single percision)
- `c` is `complexf` (two single precision floating point values in a struct) 
+- `c` is `complexf` (two single precision floating point values in a struct)
 - `u8` is `unsigned char` of 1 byte/8 bits (e. g. the output of `rtl_sdr` is of `u8`)
- `i16` is `signed short` of 2 bytes/16 bits (e. g. sound card input is usually `i16`)
+- `s16` is `signed short` of 2 bytes/16 bits (e. g. sound card input is usually `s16`)

 Functions usually end as:

@ -117,19 +125,21 @@ Functions usually end as:
 - `_cf` complex input, float output
 - `_cc` complex input, complex output

-Regarding *csdr*, it can convert a real/complex stream from one data format to another, to interface it with other SDR tools and the sound card. 
-The following commands are available: 
+Regarding *csdr*, it can convert a real/complex stream from one data format to another, to interface it with other SDR tools and the sound card.
+The following commands are available:

- `csdr convert_u8_f` 
- `csdr convert_f_u8` 
+- `csdr convert_u8_f`
+- `csdr convert_f_u8`
 - `csdr convert_s8_f`
 - `csdr convert_f_s8`
- `csdr convert_i16_f` 
- `csdr convert_f_i16`
+- `csdr convert_s16_f`
+- `csdr convert_f_s16`

 How to interpret: `csdr convert_<src>_<dst>`
 You can use these commands on complex streams, too, as they are only interleaved values (I,Q,I,Q,I,Q... coming after each other).

+> Note: The the functions with `i16` in their names have been renamed, but still work (e.g. `csdr convert_f_i16`).
+
 #### csdr commands

 `csdr` should be considered as a reference implementation on using `libcsdr`. For additional details on how to use the library, check `csdr.c` and `libcsdr.c`.
@ -157,13 +167,17 @@ It multiplies all samples by `gain`.

 It copies the input to the output.

+	through
+
+It copies the input to the output, while also displaying the speed of the data going through it.
+
 	none

 The `csdr` process just exits with 0.

 	yes_f <to_repeat> [buf_times]

-It outputs continously the `to_repeat` float number. 
+It outputs continously the `to_repeat` float number.
 If `buf_times` is not given, it never stops.
 Else, after outputing `buf_times` number of buffers (the size of which is stated in the `BUFSIZE` macro), it exits.

@ -173,7 +187,7 @@ Along with copying its input samples to the output, it prints a warning message

 	floatdump_f

-It prints any floating point input samples. 
+It prints any floating point input samples.
 The format string used is `"%g "`.

 	flowcontrol <data_rate> <reads_per_second>
@ -184,8 +198,8 @@ It copies `data_rate / reads_per_second` bytes from the input to the output, doi
 	shift_math_cc <rate>

 It shifts the signal in the frequency domain by `rate`.
-`rate` is a floating point number between -0.5 and 0.5. 
-`rate` is relative to the sampling rate. 
+`rate` is a floating point number between -0.5 and 0.5.
+`rate` is relative to the sampling rate.

 Internally, a sine and cosine wave is generated to perform this function, and this function uses `math.h` for this purpose, which is quite accurate, but not always very fast.

@ -257,7 +271,7 @@ It uses fixed filters so it works only on predefined sample rates, for the actua

 	amdemod_cf

-It is an AM demodulator that uses `sqrt`. On some architectures `sqrt` can be directly calculated by dedicated CPU instructions, but on others it may be slower. 
+It is an AM demodulator that uses `sqrt`. On some architectures `sqrt` can be directly calculated by dedicated CPU instructions, but on others it may be slower.

 	amdemod_estimator_cf

@ -286,7 +300,7 @@ Other parameters were explained above at `firdes_lowpass_f`.

 	fir_decimate_cc <decimation_factor> [transition_bw [window]]

-It is a decimator that keeps one sample out of `decimation_factor` samples. 
+It is a decimator that keeps one sample out of `decimation_factor` samples.
 To avoid aliasing, it runs a filter on the signal and removes spectral components above `0.5 × nyquist_frequency × decimation_factor`.

 `transition_bw` and `window` are the parameters of the filter.
@ -312,7 +326,7 @@ Parameters are described under `firdes_bandpass_c` and `firdes_lowpass_f`.

 	agc_ff [hang_time [reference [attack_rate [decay_rate [max_gain [attack_wait [filter_alpha]]]]]]]

-It is an automatic gain control function. 
+It is an automatic gain control function.

 - `hang_time` is the number of samples to wait before strating to increase the gain after a peak.
 - `reference` is the reference level for the AGC. It tries to keep the amplitude of the output signal close to that.
@ -330,7 +344,7 @@ It is a faster AGC that linearly changes the gain, taking the highest amplitude

 	fft_cc <fft_size> <out_of_every_n_samples> [window [--octave] [--benchmark]]

-It performs an FFT on the first `fft_size` samples out of `out_of_every_n_samples`, thus skipping `out_of_every_n_samples - fft_size` samples in the input. 
+It performs an FFT on the first `fft_size` samples out of `out_of_every_n_samples`, thus skipping `out_of_every_n_samples - fft_size` samples in the input.

 It can draw the spectrum by using `--octave`, for more information, look at the [Usage by example] section.

@ -363,11 +377,45 @@ The actual number of padding samples can be determined by running `cat csdr.c |

 It exchanges the first and second part of the FFT vector, to prepare it for the waterfall/spectrum display. It should operate on the data output from `logpower_cf`.

+	dsb_fc [q_value]
+
+It converts a real signal to a double sideband complex signal centered around DC.
+It does so by generating a complex signal:
+* the real part of which is the input real signal,
+* the imaginary part of which is `q_value` (0 by default).
+With `q_value = 0` it is an AM-DSB/SC modulator. If you want to get an AM-DSB signal, you will have to add a carrier to it.
+
+	add_dcoffset_cc
+
+It adds a DC offset to the complex signal: `i_output = 0.5 + i_input / 2, q_output = q_input / 2`
+
+	convert_f_samplerf <wait_for_this_sample>
+
+It converts a real signal to the `-mRF` input format of [https://github.com/F5OEO/rpitx](rpitx), so it allows you to generate frequency modulation. The input signal will be the modulating signal. The `<wait_for_this_sample>` parameter is the value for `rpitx` indicating the time to wait between samples. For a sampling rate of 48 ksps, this is 20833.
+
+	fmmod_fc
+
+It generates a complex FM modulated output from a real input signal.
+
+	fixed_amplitude_cc <new_amplitude>
+
+It changes the amplitude of every complex input sample to a fixed value. It does not change the phase information of the samples.
+
+	mono2stereo_s16
+
+It doubles every input sample.
+
 	setbuf <buffer_size>

-If the environment variable `CSDR_DYNAMIC_BUFSIZE_ON` is set to 1, then you can use this command to set the input buffer size for the next `csdr` process in the chain.
+See the [buffer sizes](#buffer_sizes) section.

+	squelch_and_smeter_cc --fifo <squelch_fifo> --outfifo <smeter_fifo> <use_every_nth> <report_every_nth>

+This is a controllable squelch, which reads the squelch level input from `<squelch_fifo>` and writes the power level output to `<smeter_fifo>`. Both input and output are in the format of `%g\n`. While calculating the power level, it takes only every `<use_every_nth>` sample into consideration. It writes the S-meter value for every `<report_every_nth>` buffer to `<smeter_fifo>`. If the squelch level is set to 0, it it forces the squelch to be open. If the squelch is closed, it fills the output with zero.
+
+	fifo <buffer_size> <number_of_buffers>
+
+It is similar to `clone`, but internally it uses a circular buffer. It reads as much as possible from the input. It discards input samples if the input buffer is full.

 #### Control via pipes

@ -388,9 +436,59 @@ Processing will only start after the first control command has been received by
 By writing to the given FIFO file with the syntax below, you can control the shift rate:

 	<low_cut> <high_cut>\n
-	
+
 E.g. you can send `-0.05 0.02\n`

+#### Buffer sizes
+
+*csdr* has three modes of determining the buffer sizes, which can be chosen by the appropriate environment variables:
+* *default:* 16k or 1k buffer is chosen based on function,
+* *dynamic buffer size determination:* input buffer size is recommended by the previous process, output buffer size is determined by the process,
+* *fixed buffer sizes*.
+
+*csdr* can choose from two different buffer sizes by **default**.
+* For operations handling the full-bandwidth I/Q data from the receiver, a buffer size of 16384 samples is used (see `env_csdr_fixed_big_bufsize` in the code).
+* For operations handling only a selected channel, a buffer size of 1024 samples is used (see `env_csdr_fixed_bufsize` in the code).
+
+*csdr* now has an experimental feature called **dynamic buffer size determination**, which is switched on by issuing `export CSDR_DYNAMIC_BUFSIZE_ON=1` in the shell before running `csdr`. If it is enabled:
+* All `csdr` processes in a DSP chain acquire their recommended input buffer size from the previous `csdr` process. This information is in the first 8 bytes of the input stream.
+* Each process can decide whether to use this or choose another input buffer size (if that's more practical).
+* Every process sends out its output buffer size to the next process. Then it startss processing data.
+* The DSP chain should start with a `csdr setbuf <buffer_size>` process, which only copies data from the input to the output, but also sends out the given buffer size information to the next process.
+* The 8 bytes of information included in the beginning of the stream is:
+  * a preamble of the bytes 'c','s','d','r' (4 bytes),
+  * the buffer size stored as `int` (4 bytes).
+* This size always counts as samples, as we expect that the user takes care of connecting the functions with right data types to each other.
+
+> I added this feature while researching how to decrease the latency of a DSP chain consisting of several multirate algorithms.<br />
+> For example, a `csdr fir_decimate_cc 10` would use an input buffer of 10240, and an output buffer of 1024. The next process in the chain, `csdr bandpass_fir_fft_cc` would automatically adjust to it, using a buffer of 1024 for both input and output.<br />
+> In contrast to original expectations, using dynamic buffer sizes didn't decrease the latency much.
+
+If dynamic buffer size determination is disabled, you can still set a **fixed buffer size** with `export CSDR_FIXED_BUFSIZE=<buffer_size>`.
+
+For debug purposes, buffer sizes of all processes can be printed using `export CSDR_PRINT_BUFSIZES=1`.
+
+If you add your own functions to `csdr`, you have to initialize the buffers before doing the processing. Buffer size will be stored in the global variable `the_bufsize`.
+
+Example of initialization if the process generates N output samples for N input samples:
+
+    if(!sendbufsize(initialize_buffers())) return -2;
+
+Example of initalization if the process generates N/D output samples for N input samples:
+
+    if(!initialize_buffers()) return -2;
+    sendbufsize(the_bufsize/D);
+
+Example of initialization if the process allocates memory for itself, and it doesn't want to use the global buffers:
+
+    getbufsize(); //dummy
+    sendbufsize(my_own_bufsize);
+
+Example of initialization if the process always works with a fixed output size, regardless of the input:
+
+    if(!initialize_buffers()) return -2;
+    sendbufsize(fft_size);
+
 #### Testbench

 `csdr` was tested with GNU Radio Companion flowgraphs. These flowgraphs are available under the directory `grc_tests`, and they require the <a href="https://github.com/simonyiszk/gr-ha5kfu">gr-ha5kfu</a> set of blocks for GNU Radio.  
--- a/csdr.c
+++ b/csdr.c
--- a/libcsdr.c
+++ b/libcsdr.c
@ -1,5 +1,5 @@
 /*
-This software is part of libcsdr, a set of simple DSP routines for 
+This software is part of libcsdr, a set of simple DSP routines for
 Software Defined Radio.

 Copyright (c) 2014, Andras Retzler <randras@sdr.hu>
@ -40,14 +40,14 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <assert.h>

 /*
-           _           _                   __                  _   _                 
-          (_)         | |                 / _|                | | (_)                
- __      ___ _ __   __| | _____      __  | |_ _   _ _ __   ___| |_ _  ___  _ __  ___ 
+           _           _                   __                  _   _
+          (_)         | |                 / _|                | | (_)
+ __      ___ _ __   __| | _____      __  | |_ _   _ _ __   ___| |_ _  ___  _ __  ___
 \ \ /\ / / | '_ \ / _` |/ _ \ \ /\ / /  |  _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
  \ V  V /| | | | | (_| | (_) \ V  V /   | | | |_| | | | | (__| |_| | (_) | | | \__ \
   \_/\_/ |_|_| |_|\__,_|\___/ \_/\_/    |_|  \__,_|_| |_|\___|\__|_|\___/|_| |_|___/
-                                                                                     
-                                                                                                                                                         
+
+
 */

 #define MFIRDES_GWS(NAME) \
@ -95,7 +95,7 @@ float firdes_wkernel_boxcar(float rate)
 }

 float (*firdes_get_window_kernel(window_t window))(float)
-{	
+{
 	if(window==WINDOW_HAMMING) return firdes_wkernel_hamming;
 	else if(window==WINDOW_BLACKMAN) return firdes_wkernel_blackman;
 	else if(window==WINDOW_BOXCAR) return firdes_wkernel_boxcar;
@ -103,14 +103,14 @@ float (*firdes_get_window_kernel(window_t window))(float)
 }

 /*
-  ______ _____ _____      __ _ _ _                   _           _             
- |  ____|_   _|  __ \    / _(_) | |                 | |         (_)            
- | |__    | | | |__) |  | |_ _| | |_ ___ _ __     __| | ___  ___ _  __ _ _ __  
- |  __|   | | |  _  /   |  _| | | __/ _ \ '__|   / _` |/ _ \/ __| |/ _` | '_ \ 
+  ______ _____ _____      __ _ _ _                   _           _
+ |  ____|_   _|  __ \    / _(_) | |                 | |         (_)
+ | |__    | | | |__) |  | |_ _| | |_ ___ _ __     __| | ___  ___ _  __ _ _ __
+ |  __|   | | |  _  /   |  _| | | __/ _ \ '__|   / _` |/ _ \/ __| |/ _` | '_ \
 | |     _| |_| | \ \   | | | | | ||  __/ |     | (_| |  __/\__ \ | (_| | | | |
 |_|    |_____|_|  \_\  |_| |_|_|\__\___|_|      \__,_|\___||___/_|\__, |_| |_|
-                                                                    __/ |      
-                                                                   |___/       
+                                                                    __/ |
+                                                                   |___/
 */

 void firdes_lowpass_f(float *output, int length, float cutoff_rate, window_t window)
@ -127,7 +127,7 @@ void firdes_lowpass_f(float *output, int length, float cutoff_rate, window_t win
 		output[middle-i]=output[middle+i]=(sin(2*PI*cutoff_rate*i)/i)*window_function((float)i/middle);
 		//printf("%g %d %d %d %d | %g\n",output[middle-i],i,middle,middle+i,middle-i,sin(2*PI*cutoff_rate*i));
 	}
-	
+
 	//Normalize filter kernel
 	float sum=0;
 	for(int i=0;i<length;i++) //@firdes_lowpass_f: normalize pass 1
@ -147,18 +147,18 @@ void firdes_bandpass_c(complexf *output, int length, float lowcut, float highcut
 	//	2. we shift the filter taps spectrally by multiplying with e^(j*w), so we get complex taps
 	//(tnx HA5FT)
 	float* realtaps = (float*)malloc(sizeof(float)*length);
-	
+
 	firdes_lowpass_f(realtaps, length, (highcut-lowcut)/2, window);
 	float filter_center=(highcut+lowcut)/2;
-	
+
 	float phase=0, sinval, cosval;
 	for(int i=0; i<length; i++) //@@firdes_bandpass_c
 	{
 		cosval=cos(phase);
-		sinval=sin(phase);		
+		sinval=sin(phase);
 		phase+=2*PI*filter_center;
 		while(phase>2*PI) phase-=2*PI; //@@firdes_bandpass_c
-		while(phase<0) phase+=2*PI; 
+		while(phase<0) phase+=2*PI;
 		iof(output,i)=cosval*realtaps[i];
 		qof(output,i)=sinval*realtaps[i];
 		//output[i] := realtaps[i] * e^j*w
@ -173,14 +173,14 @@ int firdes_filter_len(float transition_bw)
 }

 /*
-  _____   _____ _____      __                  _   _                 
- |  __ \ / ____|  __ \    / _|                | | (_)                
- | |  | | (___ | |__) |  | |_ _   _ _ __   ___| |_ _  ___  _ __  ___ 
+  _____   _____ _____      __                  _   _
+ |  __ \ / ____|  __ \    / _|                | | (_)
+ | |  | | (___ | |__) |  | |_ _   _ _ __   ___| |_ _  ___  _ __  ___
 | |  | |\___ \|  ___/   |  _| | | | '_ \ / __| __| |/ _ \| '_ \/ __|
 | |__| |____) | |       | | | |_| | | | | (__| |_| | (_) | | | \__ \
 |_____/|_____/|_|       |_|  \__,_|_| |_|\___|\__|_|\___/|_| |_|___/

-*/                                                   
+*/

 float shift_math_cc(complexf *input, complexf* output, int input_size, float rate, float starting_phase)
 {
@ -239,7 +239,7 @@ float shift_table_cc(complexf* input, complexf* output, int input_size, float ra
 		//float vphase=fmodf(phase,PI/2); //between 0 and 90deg
 		int quadrant=phase/(PI/2); //between 0 and 3
 		float vphase=phase-quadrant*(PI/2);
-		sin_index=(vphase/(PI/2))*table_data.table_size; 
+		sin_index=(vphase/(PI/2))*table_data.table_size;
 		cos_index=table_data.table_size-1-sin_index;
 		if(quadrant&1) //in quadrant 1 and 3
 		{
@ -480,8 +480,8 @@ int fir_decimate_cc(complexf *input, complexf *output, int input_size, int decim
 	for(int i=0; i<input_size; i+=decimation) //@fir_decimate_cc: outer loop
 	{
 		if(i+taps_length>input_size) break;
-		register float acci=0; 
-		register float accq=0;		
+		register float acci=0;
+		register float accq=0;

 		register int ti=0;
 		register float* pinput=(float*)&(input[i+ti]);
@ -489,10 +489,10 @@ int fir_decimate_cc(complexf *input, complexf *output, int input_size, int decim
 		register float* ptaps_end=taps+taps_length;
 		float quad_acciq [8];

-	
+
 /*
 q0, q1:	input signal I sample and Q sample
-q2:		taps 
+q2:		taps
 q4, q5: accumulator for I branch and Q branch (will be the output)
 */

@ -500,7 +500,7 @@ q4, q5: accumulator for I branch and Q branch (will be the output)
 			"		vmov.f32 q4, #0.0\n\t" //another way to null the accumulators
 			"		vmov.f32 q5, #0.0\n\t"
 			"for_fdccasm: vld2.32	{q0-q1}, [%[pinput]]!\n\t" //load q0 and q1 directly from the memory address stored in pinput, with interleaving (so that we get the I samples in q0 and the Q samples in q1), also increment the memory address in pinput (hence the "!" mark) //http://community.arm.com/groups/processors/blog/2010/03/17/coding-for-neon--part-1-load-and-stores
-			"		vld1.32	{q2}, [%[ptaps]]!\n\t" 
+			"		vld1.32	{q2}, [%[ptaps]]!\n\t"
 			"		vmla.f32 q4, q0, q2\n\t" //quad_acc_i += quad_input_i * quad_taps_1 //http://stackoverflow.com/questions/3240440/how-to-use-the-multiply-and-accumulate-intrinsics-in-arm-cortex-a8 //http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0489e/CIHEJBIE.html
 			"		vmla.f32 q5, q1, q2\n\t" //quad_acc_q += quad_input_q * quad_taps_1
 			"		cmp %[ptaps], %[ptaps_end]\n\t" //if(ptaps != ptaps_end)
@ -511,13 +511,13 @@ q4, q5: accumulator for I branch and Q branch (will be the output)
 			[pinput]"+r"(pinput), [ptaps]"+r"(ptaps) //output operand list
 		:
 			[ptaps_end]"r"(ptaps_end), [quad_acci]"r"(quad_acciq), [quad_accq]"r"(quad_acciq+4) //input operand list
-		: 
+		:
 			"memory", "q0", "q1", "q2", "q4", "q5", "cc" //clobber list
 		);
 		//original for loops for reference:
-		//for(int ti=0; ti<taps_length; ti++) acci += (iof(input,i+ti)) * taps[ti]; //@fir_decimate_cc: i loop		
+		//for(int ti=0; ti<taps_length; ti++) acci += (iof(input,i+ti)) * taps[ti]; //@fir_decimate_cc: i loop
 		//for(int ti=0; ti<taps_length; ti++) accq += (qof(input,i+ti)) * taps[ti]; //@fir_decimate_cc: q loop
-		
+
 		//for(int n=0;n<8;n++) fprintf(stderr, "\n>> [%d] %g \n", n, quad_acciq[n]);
 		iof(output,oi)=quad_acciq[0]+quad_acciq[1]+quad_acciq[2]+quad_acciq[3]; //we're still not ready, as we have to add up the contents of a quad accumulator register to get a single accumulated value
 		qof(output,oi)=quad_acciq[4]+quad_acciq[5]+quad_acciq[6]+quad_acciq[7];
@ -581,7 +581,7 @@ int fir_decimate_cc(complexf *input, complexf *output, int input_size, int decim

 rational_resampler_ff_t rational_resampler_ff(float *input, float *output, int input_size, int interpolation, int decimation, float *taps, int taps_length, int last_taps_delay)
 {
-	
+
 	//Theory: http://www.dspguru.com/dsp/faqs/multirate/resampling
 	//oi: output index, i: tap index
 	int output_size=input_size*interpolation/decimation;
@ -612,7 +612,7 @@ rational_resampler_ff_t rational_resampler_ff(float *input, float *output, int i

 /*

-The greatest challenge in resampling is figuring out which tap should be applied to which sample. 
+The greatest challenge in resampling is figuring out which tap should be applied to which sample.

 Typical test patterns for rational_resampler_ff:

@ -657,13 +657,13 @@ float inline fir_one_pass_ff(float* input, float* taps, int taps_length)
 fractional_decimator_ff_t fractional_decimator_ff(float* input, float* output, int input_size, float rate, float *taps, int taps_length, fractional_decimator_ff_t d)
 {
 	if(rate<=1.0) return d; //sanity check, can't decimate <=1.0
-	//This routine can handle floating point decimation rates. 
-	//It linearly interpolates between two samples that are taken into consideration from the filtered input. 
+	//This routine can handle floating point decimation rates.
+	//It linearly interpolates between two samples that are taken into consideration from the filtered input.
 	int oi=0;
 	int index_high;
 	float where=d.remain;
 	float result_high, result_low;
-	if(where==0.0) //in the first iteration index_high may be zero (so using the item index_high-1 would lead to invalid memory access). 
+	if(where==0.0) //in the first iteration index_high may be zero (so using the item index_high-1 would lead to invalid memory access).
 	{
 		output[oi++]=fir_one_pass_ff(input,taps,taps_length);
 		where+=rate;
@ -674,7 +674,7 @@ fractional_decimator_ff_t fractional_decimator_ff(float* input, float* output, i
 	for(;(index_high=ceilf(where))+taps_length<input_size;where+=rate) //@fractional_decimator_ff
 	{
 		if(previous_index_high==index_high-1) result_low=result_high; //if we step less than 2.0 then we do already have the result for the FIR filter for that index
-		else result_low=fir_one_pass_ff(input+index_high-1,taps,taps_length); 
+		else result_low=fir_one_pass_ff(input+index_high-1,taps,taps_length);
 		result_high=fir_one_pass_ff(input+index_high,taps,taps_length);
 		float register rate_between_samples=where-index_high+1;
 		output[oi++]=result_low*(1-rate_between_samples)+result_high*rate_between_samples;
@ -698,16 +698,16 @@ void apply_fir_fft_cc(FFT_PLAN_T* plan, FFT_PLAN_T* plan_inverse, complexf* taps
 	//multiply the filter and the input
 	complexf* in = plan->output;
 	complexf* out = plan_inverse->input;
-	
+
 	for(int i=0;i<plan->size;i++) //@apply_fir_fft_cc: multiplication
 	{
 		iof(out,i)=iof(in,i)*iof(taps_fft,i)-qof(in,i)*qof(taps_fft,i);
 		qof(out,i)=iof(in,i)*qof(taps_fft,i)+qof(in,i)*iof(taps_fft,i);
 	}
-	
+
 	//calculate inverse FFT on multiplied buffer
 	fft_execute(plan_inverse);
-	
+
 	//add the overlap of the previous segment
 	complexf* result = plan_inverse->output;

@ -716,35 +716,35 @@ void apply_fir_fft_cc(FFT_PLAN_T* plan, FFT_PLAN_T* plan_inverse, complexf* taps
 		iof(result,i)/=plan->size;
 		qof(result,i)/=plan->size;
 	}
-	
+
 	for(int i=0;i<overlap_size;i++) //@apply_fir_fft_cc: add overlap
 	{
 		iof(result,i)=iof(result,i)+iof(last_overlap,i);
 		qof(result,i)=qof(result,i)+qof(last_overlap,i);
 	}
-	
+
 }

 /*
-           __  __       _                          _       _       _                 
-     /\   |  \/  |     | |                        | |     | |     | |                
-    /  \  | \  / |   __| | ___ _ __ ___   ___   __| |_   _| | __ _| |_ ___  _ __ ___ 
+           __  __       _                          _       _       _
+     /\   |  \/  |     | |                        | |     | |     | |
+    /  \  | \  / |   __| | ___ _ __ ___   ___   __| |_   _| | __ _| |_ ___  _ __ ___
   / /\ \ | |\/| |  / _` |/ _ \ '_ ` _ \ / _ \ / _` | | | | |/ _` | __/ _ \| '__/ __|
  / ____ \| |  | | | (_| |  __/ | | | | | (_) | (_| | |_| | | (_| | || (_) | |  \__ \
 /_/    \_\_|  |_|  \__,_|\___|_| |_| |_|\___/ \__,_|\__,_|_|\__,_|\__\___/|_|  |___/
-                                                                                                                                                                        
+
 */

 void amdemod_cf(complexf* input, float *output, int input_size)
 {
 	//@amdemod: i*i+q*q
 	for (int i=0; i<input_size; i++)
-	{ 
+	{
 		output[i]=iof(input,i)*iof(input,i)+qof(input,i)*qof(input,i);
 	}
 	//@amdemod: sqrt
 	for (int i=0; i<input_size; i++)
-	{ 
+	{
 		output[i]=sqrt(output[i]);
 	}
 }
@ -755,7 +755,7 @@ void amdemod_estimator_cf(complexf* input, float *output, int input_size, float
 	//http://www.dspguru.com/dsp/tricks/magnitude-estimator

 	//default: optimize for min RMS error
-	if(alpha==0) 
+	if(alpha==0)
 	{
 		alpha=0.947543636291;
 		beta=0.392485425092;
@ -763,7 +763,7 @@ void amdemod_estimator_cf(complexf* input, float *output, int input_size, float

 	//@amdemod_estimator
 	for (int i=0; i<input_size; i++)
-	{ 
+	{
 		float abs_i=iof(input,i);
 		if(abs_i<0) abs_i=-abs_i;
 		float abs_q=qof(input,i);
@ -806,8 +806,8 @@ float fastdcblock_ff(float* input, float* output, int input_size, float last_dc_
 		avg+=input[i];
 	}
 	avg/=input_size;
-	
-	float avgdiff=avg-last_dc_level; 
+
+	float avgdiff=avg-last_dc_level;
 	//DC removal level will change lineraly from last_dc_level to avg.
 	for(int i=0;i<input_size;i++) //@fastdcblock_ff: remove DC component
 	{
@ -826,7 +826,7 @@ void fastagc_ff(fastagc_ff_t* input, float* output)
 	//You have to supply three blocks of samples before the first block comes out.
 	//AGC reaction speed equals input_size*samp_rate*2

-	//The algorithm calculates target gain at the end of the first block out of the peak value of all the three blocks. 
+	//The algorithm calculates target gain at the end of the first block out of the peak value of all the three blocks.
 	//This way the gain change can easily react if there is any peak in the third block.
 	//Pros: can be easily speeded up with loop vectorization, easy to implement
 	//Cons: needs 3 buffers, dos not behave similarly to real AGC circuits
@ -843,7 +843,7 @@ void fastagc_ff(fastagc_ff_t* input, float* output)
 	float target_peak=peak_input;
 	if(target_peak<input->peak_2) target_peak=input->peak_2;
 	if(target_peak<input->peak_1) target_peak=input->peak_1;
-	
+
 	//we change the gain linearly on the apply_block from the last_gain to target_gain.
 	float target_gain=input->reference/target_peak;
 	if(target_gain>FASTAGC_MAX_GAIN) target_gain=FASTAGC_MAX_GAIN;
@ -868,9 +868,9 @@ void fastagc_ff(fastagc_ff_t* input, float* output)
 }

 /*
-  ______ __  __        _                          _       _       _                 
- |  ____|  \/  |      | |                        | |     | |     | |                
- | |__  | \  / |    __| | ___ _ __ ___   ___   __| |_   _| | __ _| |_ ___  _ __ ___ 
+  ______ __  __        _                          _       _       _
+ |  ____|  \/  |      | |                        | |     | |     | |
+ | |__  | \  / |    __| | ___ _ __ ___   ___   __| |_   _| | __ _| |_ ___  _ __ ___
 |  __| | |\/| |   / _` |/ _ \ '_ ` _ \ / _ \ / _` | | | | |/ _` | __/ _ \| '__/ __|
 | |    | |  | |  | (_| |  __/ | | | | | (_) | (_| | |_| | | (_| | || (_) | |  \__ \
 |_|    |_|  |_|   \__,_|\___|_| |_| |_|\___/ \__,_|\__,_|_|\__,_|\__\___/|_|  |___/
@ -921,33 +921,33 @@ complexf fmdemod_quadri_cf(complexf* input, float* output, int input_size, float

 	temp_dq[0]=qof(input,0)-last_sample.q;
 	for (int i=1; i<input_size; i++) //@fmdemod_quadri_cf: dq
-	{		
+	{
 		temp_dq[i]=qof(input,i)-qof(input,i-1);
 	}

 	temp_di[0]=iof(input,0)-last_sample.i;
 	for (int i=1; i<input_size; i++) //@fmdemod_quadri_cf: di
-	{		
+	{
 		temp_di[i]=iof(input,i)-iof(input,i-1);
 	}

 	for (int i=0; i<input_size; i++) //@fmdemod_quadri_cf: output numerator
-	{		
+	{
 		output[i]=(iof(input,i)*temp_dq[i]-qof(input,i)*temp_di[i]);
 	}
 	for (int i=0; i<input_size; i++) //@fmdemod_quadri_cf: output denomiator
-	{		
+	{
 		temp[i]=iof(input,i)*iof(input,i)+qof(input,i)*qof(input,i);
-	}	
-	for (int i=0; i<input_size; i++) //@fmdemod_quadri_cf: output division
-	{		
-		output[i]=fmdemod_quadri_K*output[i]/temp[i];
 	}
-	
+	for (int i=0; i<input_size; i++) //@fmdemod_quadri_cf: output division
+	{
+		output[i]=(temp[i])?fmdemod_quadri_K*output[i]/temp[i]:0;
+	}
+
 	return input[input_size-1];
 }

-inline int is_nan(float f) 
+inline int is_nan(float f)
 {
 	//http://stackoverflow.com/questions/570669/checking-if-a-double-or-float-is-nan-in-c
    unsigned u = *(unsigned*)&f;
@ -957,7 +957,7 @@ inline int is_nan(float f)

 float deemphasis_wfm_ff (float* input, float* output, int input_size, float tau, int sample_rate, float last_output)
 {
-	/* 
+	/*
 		typical time constant (tau) values:
 		WFM transmission in USA: 75 us -> tau = 75e-6
 		WFM transmission in EU:  50 us -> tau = 50e-6
@ -967,7 +967,7 @@ float deemphasis_wfm_ff (float* input, float* output, int input_size, float tau,
 	float dt = 1.0/sample_rate;
 	float alpha = dt/(tau+dt);
 	if(is_nan(last_output)) last_output=0.0; //if last_output is NaN
-	output[0]=alpha*input[0]+(1-alpha)*last_output; 
+	output[0]=alpha*input[0]+(1-alpha)*last_output;
 	for (int i=1;i<input_size;i++) //@deemphasis_wfm_ff
       output[i]=alpha*input[i]+(1-alpha)*output[i-1]; //this is the simplest IIR LPF
   	return output[input_size-1];
@ -980,7 +980,7 @@ int deemphasis_nfm_ff (float* input, float* output, int input_size, int sample_r
 	/*
 		Warning! This only works on predefined samplerates, as it uses fixed FIR coefficients defined in predefined.h
 		However, there are the octave commands to generate the taps for your custom (fixed) sample rate.
-		What it does: 
+		What it does:
 			- reject below 400 Hz
 			- passband between 400 HZ - 4 kHz, but with 20 dB/decade rolloff (for deemphasis)
 			- reject everything above 4 kHz
@ -1007,10 +1007,10 @@ int deemphasis_nfm_ff (float* input, float* output, int input_size, int sample_r
 void limit_ff(float* input, float* output, int input_size, float max_amplitude)
 {
 	for (int i=0; i<input_size; i++) //@limit_ff
-	{		
+	{
 		output[i]=(max_amplitude<input[i])?max_amplitude:input[i];
 		output[i]=(-max_amplitude>output[i])?-max_amplitude:output[i];
-	}	
+	}
 }

 void gain_ff(float* input, float* output, int input_size, float gain)
@ -1018,20 +1018,40 @@ void gain_ff(float* input, float* output, int input_size, float gain)
 	for(int i=0;i<input_size;i++) output[i]=gain*input[i]; //@gain_ff
 }

+float get_power_f(float* input, int input_size, int decimation)
+{
+  float acc = 0;
+  for(int i=0;i<input_size;i+=decimation)
+  {
+    acc += (input[i]*input[i])/input_size;
+  }
+  return acc;
+}
+
+float get_power_c(complexf* input, int input_size, int decimation)
+{
+  float acc = 0;
+  for(int i=0;i<input_size;i+=decimation)
+  {
+    acc += (iof(input,i)*iof(input,i)+qof(input,i)*qof(input,i))/input_size;
+  }
+  return acc;
+}
+
 /*
-  __  __           _       _       _                 
- |  \/  |         | |     | |     | |                
- | \  / | ___   __| |_   _| | __ _| |_ ___  _ __ ___ 
+  __  __           _       _       _
+ |  \/  |         | |     | |     | |
+ | \  / | ___   __| |_   _| | __ _| |_ ___  _ __ ___
 | |\/| |/ _ \ / _` | | | | |/ _` | __/ _ \| '__/ __|
 | |  | | (_) | (_| | |_| | | (_| | || (_) | |  \__ \
 |_|  |_|\___/ \__,_|\__,_|_|\__,_|\__\___/|_|  |___/
-                                                                                                        
+
 */

 void add_dcoffset_cc(complexf* input, complexf* output, int input_size)
 {
-	for(int i=0;i<input_size;i++) iof(output,i)=0.5+iof(input,i)/2; 
-	for(int i=0;i<input_size;i++) qof(output,i)=qof(input,i)/2; 
+	for(int i=0;i<input_size;i++) iof(output,i)=0.5+iof(input,i)/2;
+	for(int i=0;i<input_size;i++) qof(output,i)=qof(input,i)/2;
 }

 float fmmod_fc(float* input, complexf* output, int input_size, float last_phase)
@ -1054,30 +1074,30 @@ void fixed_amplitude_cc(complexf* input, complexf* output, int input_size, float
 	{
 		//float phase=atan2(iof(input,i),qof(input,i));
 		//iof(output,i)=cos(phase)*amp;
-		//qof(output,i)=sin(phase)*amp;		
+		//qof(output,i)=sin(phase)*amp;

 		//A faster solution:
 		float amplitude_now = sqrt(iof(input,i)*iof(input,i)+qof(input,i)*qof(input,i));
 		float gain = (amplitude_now > 0) ? new_amplitude / amplitude_now : 0;
 		iof(output,i)=iof(input,i)*gain;
 		qof(output,i)=qof(input,i)*gain;
-	}	
+	}
 }

 /*
-  ______        _     ______               _             _______                   __                     
- |  ____|      | |   |  ____|             (_)           |__   __|                 / _|                    
- | |__ __ _ ___| |_  | |__ ___  _   _ _ __ _  ___ _ __     | |_ __ __ _ _ __  ___| |_ ___  _ __ _ __ ___  
- |  __/ _` / __| __| |  __/ _ \| | | | '__| |/ _ \ '__|    | | '__/ _` | '_ \/ __|  _/ _ \| '__| '_ ` _ \ 
+  ______        _     ______               _             _______                   __
+ |  ____|      | |   |  ____|             (_)           |__   __|                 / _|
+ | |__ __ _ ___| |_  | |__ ___  _   _ _ __ _  ___ _ __     | |_ __ __ _ _ __  ___| |_ ___  _ __ _ __ ___
+ |  __/ _` / __| __| |  __/ _ \| | | | '__| |/ _ \ '__|    | | '__/ _` | '_ \/ __|  _/ _ \| '__| '_ ` _ \
 | | | (_| \__ \ |_  | | | (_) | |_| | |  | |  __/ |       | | | | (_| | | | \__ \ || (_) | |  | | | | | |
- |_|  \__,_|___/\__| |_|  \___/ \__,_|_|  |_|\___|_|       |_|_|  \__,_|_| |_|___/_| \___/|_|  |_| |_| |_|                                                                                                                                                                                                                   
+ |_|  \__,_|___/\__| |_|  \___/ \__,_|_|  |_|\___|_|       |_|_|  \__,_|_| |_|___/_| \___/|_|  |_| |_| |_|

 */

 int log2n(int x)
 {
 	int result=-1;
-	for(int i=0;i<31;i++) 
+	for(int i=0;i<31;i++)
 	{
 		if((x>>i)&1) //@@log2n
 		{
@ -1092,7 +1112,7 @@ int next_pow2(int x)
 {
 	int pow2;
 	//portability? (31 is the problem)
-	for(int i=0;i<31;i++) 
+	for(int i=0;i<31;i++)
 	{
 		if(x<(pow2=1<<i)) return pow2; //@@next_pow2
 	}
@ -1110,6 +1130,29 @@ void apply_window_c(complexf* input, complexf* output, int size, window_t window
 	}
 }

+float *precalculate_window(int size, window_t window)
+{
+	float (*window_function)(float)=firdes_get_window_kernel(window);
+	float *windowt;
+	windowt = malloc(sizeof(float) * size);
+	for(int i=0;i<size;i++) //@precalculate_window
+	{
+		float rate=(float)i/(size-1);
+		windowt[i] = window_function(2.0*rate+1.0);
+	}
+	return windowt;
+}
+
+void apply_precalculated_window_c(complexf* input, complexf* output, int size, float *windowt)
+{
+	for(int i=0;i<size;i++) //@apply_precalculated_window_c
+	{
+		iof(output,i)=iof(input,i)*windowt[i];
+		qof(output,i)=qof(input,i)*windowt[i];
+	}
+}
+
+
 void apply_window_f(float* input, float* output, int size, window_t window)
 {
 	float (*window_function)(float)=firdes_get_window_kernel(window);
@ -1123,21 +1166,34 @@ void apply_window_f(float* input, float* output, int size, window_t window)
 void logpower_cf(complexf* input, float* output, int size, float add_db)
 {
 	for(int i=0;i<size;i++) output[i]=iof(input,i)*iof(input,i) + qof(input,i)*qof(input,i); //@logpower_cf: pass 1
-	
+
 	for(int i=0;i<size;i++) output[i]=log10(output[i]); //@logpower_cf: pass 2

 	for(int i=0;i<size;i++) output[i]=10*output[i]+add_db; //@logpower_cf: pass 3
 }

+void accumulate_power_cf(complexf* input, float* output, int size)
+{
+	for(int i=0;i<size;i++) output[i] += iof(input,i)*iof(input,i) + qof(input,i)*qof(input,i); //@logpower_cf: pass 1
+	
+}
+
+void log_ff(float* input, float* output, int size, float add_db) {
+	for(int i=0;i<size;i++) output[i]=log10(input[i]); //@logpower_cf: pass 2
+
+	for(int i=0;i<size;i++) output[i]=10*output[i]+add_db; //@logpower_cf: pass 3
+}
+
+
 /*
-  _____        _                                            _             
- |  __ \      | |                                          (_)            
- | |  | | __ _| |_ __ _    ___ ___  _ ____   _____ _ __ ___ _  ___  _ __  
- | |  | |/ _` | __/ _` |  / __/ _ \| '_ \ \ / / _ \ '__/ __| |/ _ \| '_ \ 
+  _____        _                                            _
+ |  __ \      | |                                          (_)
+ | |  | | __ _| |_ __ _    ___ ___  _ ____   _____ _ __ ___ _  ___  _ __
+ | |  | |/ _` | __/ _` |  / __/ _ \| '_ \ \ / / _ \ '__/ __| |/ _ \| '_ \
 | |__| | (_| | || (_| | | (_| (_) | | | \ V /  __/ |  \__ \ | (_) | | | |
 |_____/ \__,_|\__\__,_|  \___\___/|_| |_|\_/ \___|_|  |___/_|\___/|_| |_|
-                                                                          
-*/                                                                          
+
+*/

 void convert_u8_f(unsigned char* input, float* output, int input_size)
 {
@ -1149,15 +1205,15 @@ void convert_s8_f(signed char* input, float* output, int input_size)
 	for(int i=0;i<input_size;i++) output[i]=((float)input[i])/SCHAR_MAX; //@convert_s8_f
 }

-void convert_i16_f(short* input, float* output, int input_size)
+void convert_s16_f(short* input, float* output, int input_size)
 {
-	for(int i=0;i<input_size;i++) output[i]=(float)input[i]/SHRT_MAX; //@convert_i16_f
+	for(int i=0;i<input_size;i++) output[i]=(float)input[i]/SHRT_MAX; //@convert_s16_f
 }

 void convert_f_u8(float* input, unsigned char* output, int input_size)
 {
 	for(int i=0;i<input_size;i++) output[i]=input[i]*UCHAR_MAX*0.5+128; //@convert_f_u8
-	//128 above is the correct value to add. In any other case a DC component 
+	//128 above is the correct value to add. In any other case a DC component
 	//of at least -60 dB is shown on the FFT plot after convert_f_u8 -> convert_u8_f
 }

@ -1166,16 +1222,56 @@ void convert_f_s8(float* input, signed char* output, int input_size)
 	for(int i=0;i<input_size;i++) output[i]=input[i]*SCHAR_MAX; //@convert_f_s8
 }

-void convert_f_i16(float* input, short* output, int input_size)
+void convert_f_s16(float* input, short* output, int input_size)
 {
-	/*for(int i=0;i<input_size;i++) 
+	/*for(int i=0;i<input_size;i++)
 	{
 		if(input[i]>1.0) input[i]=1.0;
 		if(input[i]<-1.0) input[i]=-1.0;
 	}*/
-	for(int i=0;i<input_size;i++) output[i]=input[i]*SHRT_MAX; //@convert_f_i16
+	for(int i=0;i<input_size;i++) output[i]=input[i]*SHRT_MAX; //@convert_f_s16
 }

+void convert_i16_f(short* input, float* output, int input_size) { convert_s16_f(input, output, input_size); }
+void convert_f_i16(float* input, short* output, int input_size) { convert_f_s16(input, output, input_size); }
+
+void convert_f_s24(float* input, unsigned char* output, int input_size, int bigendian)
+{
+	int k=0;
+	if(bigendian) for(int i=0;i<input_size;i++)
+	{
+		int temp=input[i]*(INT_MAX>>8);
+		unsigned char* ptemp=(unsigned char*)&temp;
+		output[k++]=*ptemp;
+		output[k++]=*(ptemp+1);
+		output[k++]=*(ptemp+2);
+	}
+	else for(int i=0;i<input_size;i++)
+	{
+		int temp=input[i]*(INT_MAX>>8);
+		unsigned char* ptemp=(unsigned char*)&temp;
+		output[k++]=*(ptemp+2);
+		output[k++]=*(ptemp+1);
+		output[k++]=*ptemp;
+	}
+}
+
+void convert_s24_f(unsigned char* input, float* output, int input_size, int bigendian)
+{
+	int k=0;
+	if(bigendian) for(int i=0;i<input_size*3;i+=3)
+	{
+		int temp=(input[i+2]<<24)|(input[i+1]<<16)|(input[i]<<8);
+		output[k++]=temp/(float)(INT_MAX-256);
+	}
+	else for(int i=0;i<input_size*3;i+=3)
+	{
+		int temp=(input[i+2]<<8)|(input[i+1]<<16)|(input[i]<<24);
+		output[k++]=temp/(float)(INT_MAX-256);
+	}
+}
+
+
 int trivial_vectorize()
 {
 	//this function is trivial to vectorize and should pass on both NEON and SSE
@ -1186,5 +1282,3 @@ int trivial_vectorize()
 	}
 	return c[0];
 }
-
-
--- a/libcsdr.h
+++ b/libcsdr.h
@ -1,5 +1,5 @@
 /*
-This software is part of libcsdr, a set of simple DSP routines for 
+This software is part of libcsdr, a set of simple DSP routines for
 Software Defined Radio.

 Copyright (c) 2014, Andras Retzler <randras@sdr.hu>
@ -32,14 +32,14 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define MIN_M(x,y) (((x)>(y))?(y):(x))

 /*
-   _____                      _                                                 
-  / ____|                    | |                                                
- | |     ___  _ __ ___  _ __ | | _____  __ 
- | |    / _ \| '_ ` _ \| '_ \| |/ _ \ \/ / 
- | |___| (_) | | | | | | |_) | |  __/>  <  
-  \_____\___/|_| |_| |_| .__/|_|\___/_/\_\ 
-                       | |                  
-                       |_|                  
+   _____                      _
+  / ____|                    | |
+ | |     ___  _ __ ___  _ __ | | _____  __
+ | |    / _ \| '_ ` _ \| '_ \| |/ _ \ \/ /
+ | |___| (_) | | | | | | |_) | |  __/>  <
+  \_____\___/|_| |_| |_| .__/|_|\___/_/\_\
+                       | |
+                       |_|
 */

 typedef struct complexf_s { float i; float q; } complexf;
@ -66,7 +66,7 @@ typedef struct complexf_s { float i; float q; } complexf;
 #define TIME_TAKEN(start,end) ((end.tv_sec-start.tv_sec)+(end.tv_nsec-start.tv_nsec)/1e9)

 //window
-typedef enum window_s 
+typedef enum window_s
 {
 	WINDOW_BOXCAR, WINDOW_BLACKMAN, WINDOW_HAMMING
 } window_t;
@ -115,7 +115,7 @@ float fastdcblock_ff(float* input, float* output, int input_size, float last_dc_

 typedef struct fastagc_ff_s
 {
-	float* buffer_1; 
+	float* buffer_1;
 	float* buffer_2;
 	float* buffer_input; //it is the actual input buffer to fill
 	float peak_1;
@ -137,9 +137,13 @@ typedef struct rational_resampler_ff_s
 rational_resampler_ff_t rational_resampler_ff(float *input, float *output, int input_size, int interpolation, int decimation, float *taps, int taps_length, int last_taps_delay);
 void rational_resampler_get_lowpass_f(float* output, int output_size, int interpolation, int decimation, window_t window);

+float *precalculate_window(int size, window_t window);
 void apply_window_c(complexf* input, complexf* output, int size, window_t window);
+void apply_precalculated_window_c(complexf* input, complexf* output, int size, float *windowt);
 void apply_window_f(float* input, float* output, int size, window_t window);
 void logpower_cf(complexf* input, float* output, int size, float add_db);
+void accumulate_power_cf(complexf* input, float* output, int size);
+void log_ff(float* input, float* output, int size, float add_db);

 typedef struct fractional_decimator_ff_s
 {
@ -152,7 +156,7 @@ fractional_decimator_ff_t fractional_decimator_ff(float* input, float* output, i
 typedef struct shift_table_data_s
 {
 	float* table;
-	int table_size;	
+	int table_size;
 } shift_table_data_t;
 void shift_table_deinit(shift_table_data_t table_data);
 shift_table_data_t shift_table_init(int table_size);
@ -182,6 +186,8 @@ int log2n(int x);
 int next_pow2(int x);
 void apply_fir_fft_cc(FFT_PLAN_T* plan, FFT_PLAN_T* plan_inverse, complexf* taps_fft, complexf* last_overlap, int overlap_size);
 void gain_ff(float* input, float* output, int input_size, float gain);
+float get_power_f(float* input, int input_size, int decimation);
+float get_power_c(complexf* input, int input_size, int decimation);

 void add_dcoffset_cc(complexf* input, complexf* output, int input_size);
 float fmmod_fc(float* input, complexf* output, int input_size, float last_phase);
@ -191,7 +197,12 @@ void convert_u8_f(unsigned char* input, float* output, int input_size);
 void convert_f_u8(float* input, unsigned char* output, int input_size);
 void convert_s8_f(signed char* input, float* output, int input_size);
 void convert_f_s8(float* input, signed char* output, int input_size);
+void convert_f_s16(float* input, short* output, int input_size);
+void convert_s16_f(short* input, float* output, int input_size);
 void convert_f_i16(float* input, short* output, int input_size);
 void convert_i16_f(short* input, float* output, int input_size);
+void convert_f_s24(float* input, unsigned char* output, int input_size, int bigendian);
+void convert_s24_f(unsigned char* input, float* output, int input_size, int bigendian);
+

 int is_nan(float f);
--- a/libcsdr_gpl.c
+++ b/libcsdr_gpl.c
@ -47,7 +47,7 @@ float shift_addition_cc(complexf *input, complexf* output, int input_size, shift
 	}
 	starting_phase+=d.rate*PI*input_size;
 	while(starting_phase>PI) starting_phase-=2*PI; //@shift_addition_cc: normalize starting_phase
-	while(starting_phase<-PI) starting_phase+=2*PI; 
+	while(starting_phase<-PI) starting_phase+=2*PI;
 	return starting_phase;
 }

@ -82,12 +82,12 @@ void shift_addition_cc_test(shift_addition_data_t d)
 		sinphi=sinphi_last*d.cosdelta+cosphi_last*d.sindelta;
 		phi+=d.rate*PI;
 		while(phi>2*PI) phi-=2*PI; //@shift_addition_cc: normalize phase
-		if(i%SACCTEST_STEP==0) 
+		if(i%SACCTEST_STEP==0)
 		{
 			avg_counter=avg_size;
 			avg=0;
 		}
-		if(avg_counter) 
+		if(avg_counter)
 		{
 			avg+=fabs(cosphi-cos(phi));
 			if(!--avg_counter) printf("%g ", avg/avg_size);
@ -128,7 +128,7 @@ decimating_shift_addition_status_t decimating_shift_addition_cc(complexf *input,
 	s.starting_phase+=d.rate*PI*k;
 	s.output_size=k;
 	while(s.starting_phase>PI) s.starting_phase-=2*PI; //@shift_addition_cc: normalize starting_phase
-	while(s.starting_phase<-PI) s.starting_phase+=2*PI; 
+	while(s.starting_phase<-PI) s.starting_phase+=2*PI;
 	return s;
 }

@ -142,10 +142,10 @@ float agc_ff(float* input, float* output, int input_size, float reference, float
 			hang_time = (hang_time_ms / 1000) * sample_rate
 				hang_time is given in samples, and should be about 4ms.
 				hang_time can be switched off by setting it to zero (not recommended).
-			
+
 			max_gain = pow(2, adc_bits)
-				max_gain should be no more than the dynamic range of your A/D converter. 
-			gain_filter_alpha = 1 / ((fs/(2*PI*fc))+1) 
+				max_gain should be no more than the dynamic range of your A/D converter.
+			gain_filter_alpha = 1 / ((fs/(2*PI*fc))+1)

 			>>> 1 / ((48000./(2*3.141592654*100))+1)
 			0.012920836043344543
@ -153,7 +153,7 @@ float agc_ff(float* input, float* output, int input_size, float reference, float
 			0.0013072857061786625


-		Literature: 
+		Literature:
 			ww.qsl.net/va3iul/Files/Automatic_Gain_Control.pdf
 			page 7 of http://www.arrl.org/files/file/Technology/tis/info/pdf/021112qex027.pdf

@ -170,10 +170,10 @@ float agc_ff(float* input, float* output, int input_size, float reference, float
 	output[0]=last_gain*input[0]; //we skip this one sample, because it is easier this way
 	for(int i=1;i<input_size;i++) //@agc_ff
 	{
-		//The error is the difference between the required gain at the actual sample, and the previous gain value. 
+		//The error is the difference between the required gain at the actual sample, and the previous gain value.
 		//We actually use an envelope detector.
 		input_abs=fabs(input[i]);
-		error=reference/input_abs-gain; 
+		error=reference/input_abs-gain;

 		if(input[i]!=0) //We skip samples containing 0, as the gain would be infinity for those to keep up with the reference.
 		{
@ -184,12 +184,12 @@ float agc_ff(float* input, float* output, int input_size, float reference, float
 			//However, attack_rate should be higher than the decay_rate as we want to avoid clipping signals.
 			//that had a sudden increase in their amplitude.
 			//It's also important to note that this algorithm has an exponential gain ramp.
-			
+
 			if(error<0) //INCREASE IN SIGNAL LEVEL
 			{
-				if(last_peak<input_abs) 
+				if(last_peak<input_abs)
 				{
-					
+
 					attack_wait_counter=attack_wait_time;
 					last_peak=input_abs;
 				}
@ -201,11 +201,11 @@ float agc_ff(float* input, float* output, int input_size, float reference, float
 				}
 				else
 				{
-					//If the signal level increases, we decrease the gain quite fast.			
-					dgain=error*attack_rate; 
+					//If the signal level increases, we decrease the gain quite fast.
+					dgain=error*attack_rate;
 					//Before starting to increase the gain next time, we will be waiting until hang_time for sure.
 					hang_counter=hang_time;
-					
+
 				}
 			}
 			else //DECREASE IN SIGNAL LEVEL
@ -216,13 +216,12 @@ float agc_ff(float* input, float* output, int input_size, float reference, float
 					dgain=0; //..until then, AGC is inactive and gain doesn't change.
 				}
 				else dgain=error*decay_rate; //If the signal level decreases, we increase the gain quite slowly.
-			} 
+			}
 			gain=gain+dgain;
 			//fprintf(stderr,"g=%f dg=%f\n",gain,dgain);
-
-			if(gain>max_gain) gain=max_gain; //We also have to limit our gain, it can't be infinity.
-			if(gain<0) gain=0;
 		}
+		if(gain>max_gain) gain=max_gain; //We also have to limit our gain, it can't be infinity.
+		if(gain<0) gain=0;
 		//output[i]=gain*input[i]; //Here we do the actual scaling of the samples.
 		//Here we do the actual scaling of the samples, but we run an IIR filter on the gain values:
 		output[i]=(gain=gain+last_gain-gain_filter_alpha*last_gain)*input[i]; //dc-pass-filter: freqz([1 -1],[1 -0.99]) y[i]=x[i]+y[i-1]-alpha*x[i-1]
@ -230,8 +229,7 @@ float agc_ff(float* input, float* output, int input_size, float reference, float

 		last_gain=gain;
 	}
-	return gain; //this will be the last_gain next time 
+	return gain; //this will be the last_gain next time
 }

 #endif
-