diff --git a/Makefile b/Makefile
index cd9ab62..c9292a6 100644
--- a/Makefile
+++ b/Makefile
@@ -1,13 +1,18 @@
 CXXFLAGS = -std=c++14 -I.
 LDFLAGS = -lm
 
-gen_ft8: gen_ft8.o ft8/encode.o ft8/pack.o ft8/text.o ft8/pack_v2.o ft8/encode_v2.o common/wave.o
-	$(CXX) $(LDFLAGS) -o $@ $^
+.PHONY: run_tests all
 
-.PHONY: run_tests
+all: gen_ft8 wav_decode test
 
 run_tests: test
 	@./test
 
+gen_ft8: gen_ft8.o ft8/encode.o ft8/pack.o ft8/text.o ft8/pack_v2.o ft8/encode_v2.o common/wave.o
+	$(CXX) $(LDFLAGS) -o $@ $^
+
 test:  test.o ft8/encode.o ft8/pack.o ft8/text.o ft8/pack_v2.o ft8/encode_v2.o ft8/unpack.o
 	$(CXX) $(LDFLAGS) -o $@ $^
+
+decode_ft8: decode_ft8.o fft/kiss_fftr.o fft/kiss_fft.o ft8/ldpc.o common/wave.o
+	$(CXX) $(LDFLAGS) -o $@ $^
\ No newline at end of file
diff --git a/README.md b/README.md
index b9fada9..65ceb31 100644
--- a/README.md
+++ b/README.md
@@ -25,10 +25,10 @@ I am working on the revised FT8 protocol with 77-bit payload (introduced since W
 
 # What doesn't
 
-* Encoding contest mode message
+* Encoding contest mode messages
 * Encoding extended range signal reports (<-30dB or >=0dB S/N)
 * Encoding compound callsigns with country prefixes and mode suffixes
-* Decoding
+* Decoding (working on it)
 
 # What to do with it
 
@@ -40,5 +40,7 @@ Thanks to Robert Morris, AB1HL, whose Python code (https://github.com/rtmrtmrtmr
 
 This would not of course be possible without the original WSJT-X code, which is mostly written in Fortran (http://physics.princeton.edu/pulsar/K1JT/wsjtx.html). I believe that is the only 'documentation' of the FT8 protocol available, and the source code was used as such in this project.
 
+Thanks to Mark Borgerding for his FFT implementation (https://github.com/mborgerding/kissfft). I have included a portion of his code.
+
 Karlis Goba,
 YL3JG
diff --git a/common/wave.cpp b/common/wave.cpp
index 0f98408..516a1fa 100644
--- a/common/wave.cpp
+++ b/common/wave.cpp
@@ -9,7 +9,6 @@
 
 // Save signal in floating point format (-1 .. +1) as a WAVE file using 16-bit signed integers.
 void save_wav(const float *signal, int num_samples, int sample_rate, const char *path) {
-    FILE *f = fopen(path, "wb");
     char subChunk1ID[4] = {'f', 'm', 't', ' '};
     uint32_t subChunk1Size = 16;    // 16 for PCM
     uint16_t audioFormat = 1;       // PCM = 1
@@ -34,6 +33,8 @@ void save_wav(const float *signal, int num_samples, int sample_rate, const char
         raw_data[i] = int(0.5 + (x * 32767.0));
     }
 
+    FILE *f = fopen(path, "wb");
+
     // NOTE: works only on little-endian architecture
     fwrite(chunkID, sizeof(chunkID), 1, f);
     fwrite(&chunkSize, sizeof(chunkSize), 1, f);
@@ -57,3 +58,64 @@ void save_wav(const float *signal, int num_samples, int sample_rate, const char
 
     free(raw_data);
 }
+
+
+// Load signal in floating point format (-1 .. +1) as a WAVE file using 16-bit signed integers.
+int load_wav(float *signal, int &num_samples, int &sample_rate, const char *path) {
+    char subChunk1ID[4];    // = {'f', 'm', 't', ' '};
+    uint32_t subChunk1Size; // = 16;    // 16 for PCM
+    uint16_t audioFormat;   // = 1;     // PCM = 1
+    uint16_t numChannels;   // = 1;
+    uint16_t bitsPerSample; // = 16;
+    uint32_t sampleRate;
+    uint16_t blockAlign;    // = numChannels * bitsPerSample / 8;
+    uint32_t byteRate;      // = sampleRate * blockAlign;
+
+    char subChunk2ID[4];    // = {'d', 'a', 't', 'a'};
+    uint32_t subChunk2Size; // = num_samples * blockAlign;
+
+    char chunkID[4];        // = {'R', 'I', 'F', 'F'};
+    uint32_t chunkSize;     // = 4 + (8 + subChunk1Size) + (8 + subChunk2Size);
+    char format[4];         // = {'W', 'A', 'V', 'E'};
+
+    FILE *f = fopen(path, "rb");
+
+    // NOTE: works only on little-endian architecture
+    fread((void *)chunkID, sizeof(chunkID), 1, f);
+    fread((void *)&chunkSize, sizeof(chunkSize), 1, f);
+    fread((void *)format, sizeof(format), 1, f);
+
+    fread((void *)subChunk1ID, sizeof(subChunk1ID), 1, f);
+    fread((void *)&subChunk1Size, sizeof(subChunk1Size), 1, f);
+    if (subChunk1Size != 16) return -1;
+
+    fread((void *)&audioFormat, sizeof(audioFormat), 1, f);
+    fread((void *)&numChannels, sizeof(numChannels), 1, f);
+    fread((void *)&sampleRate, sizeof(sampleRate), 1, f);
+    fread((void *)&byteRate, sizeof(byteRate), 1, f);
+    fread((void *)&blockAlign, sizeof(blockAlign), 1, f);
+    fread((void *)&bitsPerSample, sizeof(bitsPerSample), 1, f);
+
+    if (audioFormat != 1 || numChannels != 1 || bitsPerSample != 16) return -1;
+
+    fread((void *)subChunk2ID, sizeof(subChunk2ID), 1, f);
+    fread((void *)&subChunk2Size, sizeof(subChunk2Size), 1, f);
+
+    if (subChunk2Size / blockAlign > num_samples) return -2;
+    
+    num_samples = subChunk2Size / blockAlign;
+    sample_rate = sampleRate;
+
+    int16_t *raw_data = (int16_t *)malloc(num_samples * blockAlign);
+
+    fread((void *)raw_data, blockAlign, num_samples, f);
+    for (int i = 0; i < num_samples; i++) {
+        signal[i] = raw_data[i] / 32768.0f;
+    }
+    
+    free(raw_data);
+
+    fclose(f);
+
+    return 0;
+}
diff --git a/common/wave.h b/common/wave.h
index 88906e1..72bb30d 100644
--- a/common/wave.h
+++ b/common/wave.h
@@ -6,4 +6,4 @@ void save_wav(const float *signal, int num_samples, int sample_rate, const char
 
 
 // Load signal in floating point format (-1 .. +1) as a WAVE file using 16-bit signed integers.
-void load_wav(float *signal, int &num_samples, int &sample_rate, const char *path);
+int load_wav(float *signal, int &num_samples, int &sample_rate, const char *path);
diff --git a/decode_ft8.cpp b/decode_ft8.cpp
new file mode 100644
index 0000000..89955f0
--- /dev/null
+++ b/decode_ft8.cpp
@@ -0,0 +1,63 @@
+#include <cstdlib>
+#include <cstring>
+#include <cstdio>
+#include <cmath>
+
+#include "common/wave.h"
+#include "ft8/pack.h"
+#include "ft8/encode.h"
+#include "ft8/pack_v2.h"
+#include "ft8/encode_v2.h"
+
+#include "ft8/ldpc.h"
+#include "fft/kiss_fftr.h"
+
+
+void usage() {
+    printf("Decode a 15-second WAV file.\n");
+}
+
+
+float hann_i(int i, int N) {
+    float x = sinf((float)M_PI * i / (N - 1));
+    return x*x;
+}
+
+
+int main(int argc, char **argv) {
+    // Expect one command-line argument
+    if (argc < 2) {
+        usage();
+        return -1;
+    }
+
+    const char *wav_path = argv[1];
+
+    int sample_rate = 12000;
+    int num_samples = 15 * sample_rate;
+    float signal[num_samples];
+
+    int rc = load_wav(signal, num_samples, sample_rate, wav_path);
+    if (rc < 0) {
+        return -1;
+    }
+
+    //return 0;
+
+    const int nfft = 2 * (int)(sample_rate / 6.25);     // 2 bins per FSK tone
+
+    size_t fft_work_size;
+    kiss_fftr_alloc(nfft, 0, 0, &fft_work_size);
+
+    printf("N_FFT = %d\n", nfft);
+    printf("FFT work area = %lu\n", fft_work_size);
+
+    void *fft_work = malloc(fft_work_size);
+    kiss_fftr_cfg fft_cfg = kiss_fftr_alloc(nfft, 0, fft_work, &fft_work_size);
+
+    kiss_fft_scalar timedata[nfft];
+    kiss_fft_cpx    freqdata[nfft/2 + 1];
+    kiss_fftr(fft_cfg, timedata, freqdata);
+
+    return 0;
+}
\ No newline at end of file
diff --git a/fft/_kiss_fft_guts.h b/fft/_kiss_fft_guts.h
new file mode 100644
index 0000000..bf5d53c
--- /dev/null
+++ b/fft/_kiss_fft_guts.h
@@ -0,0 +1,158 @@
+/*
+ *  Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
+ *  This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ *  SPDX-License-Identifier: BSD-3-Clause
+ *  See COPYING file for more information.
+ */
+
+/* kiss_fft.h
+   defines kiss_fft_scalar as either short or a float type
+   and defines
+   typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */
+#include "kiss_fft.h"
+#include <limits.h>
+
+#define MAXFACTORS 32
+/* e.g. an fft of length 128 has 4 factors 
+ as far as kissfft is concerned
+ 4*4*4*2
+ */
+
+struct kiss_fft_state{
+    int nfft;
+    int inverse;
+    int factors[2*MAXFACTORS];
+    kiss_fft_cpx twiddles[1];
+};
+
+/*
+  Explanation of macros dealing with complex math:
+
+   C_MUL(m,a,b)         : m = a*b
+   C_FIXDIV( c , div )  : if a fixed point impl., c /= div. noop otherwise
+   C_SUB( res, a,b)     : res = a - b
+   C_SUBFROM( res , a)  : res -= a
+   C_ADDTO( res , a)    : res += a
+ * */
+#ifdef FIXED_POINT
+#if (FIXED_POINT==32)
+# define FRACBITS 31
+# define SAMPPROD int64_t
+#define SAMP_MAX 2147483647
+#else
+# define FRACBITS 15
+# define SAMPPROD int32_t 
+#define SAMP_MAX 32767
+#endif
+
+#define SAMP_MIN -SAMP_MAX
+
+#if defined(CHECK_OVERFLOW)
+#  define CHECK_OVERFLOW_OP(a,op,b)  \
+	if ( (SAMPPROD)(a) op (SAMPPROD)(b) > SAMP_MAX || (SAMPPROD)(a) op (SAMPPROD)(b) < SAMP_MIN ) { \
+		fprintf(stderr,"WARNING:overflow @ " __FILE__ "(%d): (%d " #op" %d) = %ld\n",__LINE__,(a),(b),(SAMPPROD)(a) op (SAMPPROD)(b) );  }
+#endif
+
+
+#   define smul(a,b) ( (SAMPPROD)(a)*(b) )
+#   define sround( x )  (kiss_fft_scalar)( ( (x) + (1<<(FRACBITS-1)) ) >> FRACBITS )
+
+#   define S_MUL(a,b) sround( smul(a,b) )
+
+#   define C_MUL(m,a,b) \
+      do{ (m).r = sround( smul((a).r,(b).r) - smul((a).i,(b).i) ); \
+          (m).i = sround( smul((a).r,(b).i) + smul((a).i,(b).r) ); }while(0)
+
+#   define DIVSCALAR(x,k) \
+	(x) = sround( smul(  x, SAMP_MAX/k ) )
+
+#   define C_FIXDIV(c,div) \
+	do {    DIVSCALAR( (c).r , div);  \
+		DIVSCALAR( (c).i  , div); }while (0)
+
+#   define C_MULBYSCALAR( c, s ) \
+    do{ (c).r =  sround( smul( (c).r , s ) ) ;\
+        (c).i =  sround( smul( (c).i , s ) ) ; }while(0)
+
+#else  /* not FIXED_POINT*/
+
+#   define S_MUL(a,b) ( (a)*(b) )
+#define C_MUL(m,a,b) \
+    do{ (m).r = (a).r*(b).r - (a).i*(b).i;\
+        (m).i = (a).r*(b).i + (a).i*(b).r; }while(0)
+#   define C_FIXDIV(c,div) /* NOOP */
+#   define C_MULBYSCALAR( c, s ) \
+    do{ (c).r *= (s);\
+        (c).i *= (s); }while(0)
+#endif
+
+#ifndef CHECK_OVERFLOW_OP
+#  define CHECK_OVERFLOW_OP(a,op,b) /* noop */
+#endif
+
+#define  C_ADD( res, a,b)\
+    do { \
+	    CHECK_OVERFLOW_OP((a).r,+,(b).r)\
+	    CHECK_OVERFLOW_OP((a).i,+,(b).i)\
+	    (res).r=(a).r+(b).r;  (res).i=(a).i+(b).i; \
+    }while(0)
+#define  C_SUB( res, a,b)\
+    do { \
+	    CHECK_OVERFLOW_OP((a).r,-,(b).r)\
+	    CHECK_OVERFLOW_OP((a).i,-,(b).i)\
+	    (res).r=(a).r-(b).r;  (res).i=(a).i-(b).i; \
+    }while(0)
+#define C_ADDTO( res , a)\
+    do { \
+	    CHECK_OVERFLOW_OP((res).r,+,(a).r)\
+	    CHECK_OVERFLOW_OP((res).i,+,(a).i)\
+	    (res).r += (a).r;  (res).i += (a).i;\
+    }while(0)
+
+#define C_SUBFROM( res , a)\
+    do {\
+	    CHECK_OVERFLOW_OP((res).r,-,(a).r)\
+	    CHECK_OVERFLOW_OP((res).i,-,(a).i)\
+	    (res).r -= (a).r;  (res).i -= (a).i; \
+    }while(0)
+
+
+#ifdef FIXED_POINT
+#  define KISS_FFT_COS(phase)  floor(.5+SAMP_MAX * cos (phase))
+#  define KISS_FFT_SIN(phase)  floor(.5+SAMP_MAX * sin (phase))
+#  define HALF_OF(x) ((x)>>1)
+#elif defined(USE_SIMD)
+#  define KISS_FFT_COS(phase) _mm_set1_ps( cos(phase) )
+#  define KISS_FFT_SIN(phase) _mm_set1_ps( sin(phase) )
+#  define HALF_OF(x) ((x)*_mm_set1_ps(.5))
+#else
+#  define KISS_FFT_COS(phase) (kiss_fft_scalar) cos(phase)
+#  define KISS_FFT_SIN(phase) (kiss_fft_scalar) sin(phase)
+#  define HALF_OF(x) ((x)*.5)
+#endif
+
+#define  kf_cexp(x,phase) \
+	do{ \
+		(x)->r = KISS_FFT_COS(phase);\
+		(x)->i = KISS_FFT_SIN(phase);\
+	}while(0)
+
+
+/* a debugging function */
+#define pcpx(c)\
+    fprintf(stderr,"%g + %gi\n",(double)((c)->r),(double)((c)->i) )
+
+
+#ifdef KISS_FFT_USE_ALLOCA
+// define this to allow use of alloca instead of malloc for temporary buffers
+// Temporary buffers are used in two case: 
+// 1. FFT sizes that have "bad" factors. i.e. not 2,3 and 5
+// 2. "in-place" FFTs.  Notice the quotes, since kissfft does not really do an in-place transform.
+#include <alloca.h>
+#define  KISS_FFT_TMP_ALLOC(nbytes) alloca(nbytes)
+#define  KISS_FFT_TMP_FREE(ptr) 
+#else
+#define  KISS_FFT_TMP_ALLOC(nbytes) KISS_FFT_MALLOC(nbytes)
+#define  KISS_FFT_TMP_FREE(ptr) KISS_FFT_FREE(ptr)
+#endif
diff --git a/fft/kiss_fft.c b/fft/kiss_fft.c
new file mode 100644
index 0000000..af2f695
--- /dev/null
+++ b/fft/kiss_fft.c
@@ -0,0 +1,402 @@
+/*
+ *  Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
+ *  This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ *  SPDX-License-Identifier: BSD-3-Clause
+ *  See COPYING file for more information.
+ */
+
+
+#include "_kiss_fft_guts.h"
+/* The guts header contains all the multiplication and addition macros that are defined for
+ fixed or floating point complex numbers.  It also delares the kf_ internal functions.
+ */
+
+static void kf_bfly2(
+        kiss_fft_cpx * Fout,
+        const size_t fstride,
+        const kiss_fft_cfg st,
+        int m
+        )
+{
+    kiss_fft_cpx * Fout2;
+    kiss_fft_cpx * tw1 = st->twiddles;
+    kiss_fft_cpx t;
+    Fout2 = Fout + m;
+    do{
+        C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2);
+
+        C_MUL (t,  *Fout2 , *tw1);
+        tw1 += fstride;
+        C_SUB( *Fout2 ,  *Fout , t );
+        C_ADDTO( *Fout ,  t );
+        ++Fout2;
+        ++Fout;
+    }while (--m);
+}
+
+static void kf_bfly4(
+        kiss_fft_cpx * Fout,
+        const size_t fstride,
+        const kiss_fft_cfg st,
+        const size_t m
+        )
+{
+    kiss_fft_cpx *tw1,*tw2,*tw3;
+    kiss_fft_cpx scratch[6];
+    size_t k=m;
+    const size_t m2=2*m;
+    const size_t m3=3*m;
+
+
+    tw3 = tw2 = tw1 = st->twiddles;
+
+    do {
+        C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4);
+
+        C_MUL(scratch[0],Fout[m] , *tw1 );
+        C_MUL(scratch[1],Fout[m2] , *tw2 );
+        C_MUL(scratch[2],Fout[m3] , *tw3 );
+
+        C_SUB( scratch[5] , *Fout, scratch[1] );
+        C_ADDTO(*Fout, scratch[1]);
+        C_ADD( scratch[3] , scratch[0] , scratch[2] );
+        C_SUB( scratch[4] , scratch[0] , scratch[2] );
+        C_SUB( Fout[m2], *Fout, scratch[3] );
+        tw1 += fstride;
+        tw2 += fstride*2;
+        tw3 += fstride*3;
+        C_ADDTO( *Fout , scratch[3] );
+
+        if(st->inverse) {
+            Fout[m].r = scratch[5].r - scratch[4].i;
+            Fout[m].i = scratch[5].i + scratch[4].r;
+            Fout[m3].r = scratch[5].r + scratch[4].i;
+            Fout[m3].i = scratch[5].i - scratch[4].r;
+        }else{
+            Fout[m].r = scratch[5].r + scratch[4].i;
+            Fout[m].i = scratch[5].i - scratch[4].r;
+            Fout[m3].r = scratch[5].r - scratch[4].i;
+            Fout[m3].i = scratch[5].i + scratch[4].r;
+        }
+        ++Fout;
+    }while(--k);
+}
+
+static void kf_bfly3(
+         kiss_fft_cpx * Fout,
+         const size_t fstride,
+         const kiss_fft_cfg st,
+         size_t m
+         )
+{
+     size_t k=m;
+     const size_t m2 = 2*m;
+     kiss_fft_cpx *tw1,*tw2;
+     kiss_fft_cpx scratch[5];
+     kiss_fft_cpx epi3;
+     epi3 = st->twiddles[fstride*m];
+
+     tw1=tw2=st->twiddles;
+
+     do{
+         C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
+
+         C_MUL(scratch[1],Fout[m] , *tw1);
+         C_MUL(scratch[2],Fout[m2] , *tw2);
+
+         C_ADD(scratch[3],scratch[1],scratch[2]);
+         C_SUB(scratch[0],scratch[1],scratch[2]);
+         tw1 += fstride;
+         tw2 += fstride*2;
+
+         Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
+         Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
+
+         C_MULBYSCALAR( scratch[0] , epi3.i );
+
+         C_ADDTO(*Fout,scratch[3]);
+
+         Fout[m2].r = Fout[m].r + scratch[0].i;
+         Fout[m2].i = Fout[m].i - scratch[0].r;
+
+         Fout[m].r -= scratch[0].i;
+         Fout[m].i += scratch[0].r;
+
+         ++Fout;
+     }while(--k);
+}
+
+static void kf_bfly5(
+        kiss_fft_cpx * Fout,
+        const size_t fstride,
+        const kiss_fft_cfg st,
+        int m
+        )
+{
+    kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
+    int u;
+    kiss_fft_cpx scratch[13];
+    kiss_fft_cpx * twiddles = st->twiddles;
+    kiss_fft_cpx *tw;
+    kiss_fft_cpx ya,yb;
+    ya = twiddles[fstride*m];
+    yb = twiddles[fstride*2*m];
+
+    Fout0=Fout;
+    Fout1=Fout0+m;
+    Fout2=Fout0+2*m;
+    Fout3=Fout0+3*m;
+    Fout4=Fout0+4*m;
+
+    tw=st->twiddles;
+    for ( u=0; u<m; ++u ) {
+        C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
+        scratch[0] = *Fout0;
+
+        C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
+        C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
+        C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
+        C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
+
+        C_ADD( scratch[7],scratch[1],scratch[4]);
+        C_SUB( scratch[10],scratch[1],scratch[4]);
+        C_ADD( scratch[8],scratch[2],scratch[3]);
+        C_SUB( scratch[9],scratch[2],scratch[3]);
+
+        Fout0->r += scratch[7].r + scratch[8].r;
+        Fout0->i += scratch[7].i + scratch[8].i;
+
+        scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
+        scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
+
+        scratch[6].r =  S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
+        scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
+
+        C_SUB(*Fout1,scratch[5],scratch[6]);
+        C_ADD(*Fout4,scratch[5],scratch[6]);
+
+        scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
+        scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
+        scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
+        scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
+
+        C_ADD(*Fout2,scratch[11],scratch[12]);
+        C_SUB(*Fout3,scratch[11],scratch[12]);
+
+        ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
+    }
+}
+
+/* perform the butterfly for one stage of a mixed radix FFT */
+static void kf_bfly_generic(
+        kiss_fft_cpx * Fout,
+        const size_t fstride,
+        const kiss_fft_cfg st,
+        int m,
+        int p
+        )
+{
+    int u,k,q1,q;
+    kiss_fft_cpx * twiddles = st->twiddles;
+    kiss_fft_cpx t;
+    int Norig = st->nfft;
+
+    kiss_fft_cpx * scratch = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx)*p);
+
+    for ( u=0; u<m; ++u ) {
+        k=u;
+        for ( q1=0 ; q1<p ; ++q1 ) {
+            scratch[q1] = Fout[ k  ];
+            C_FIXDIV(scratch[q1],p);
+            k += m;
+        }
+
+        k=u;
+        for ( q1=0 ; q1<p ; ++q1 ) {
+            int twidx=0;
+            Fout[ k ] = scratch[0];
+            for (q=1;q<p;++q ) {
+                twidx += fstride * k;
+                if (twidx>=Norig) twidx-=Norig;
+                C_MUL(t,scratch[q] , twiddles[twidx] );
+                C_ADDTO( Fout[ k ] ,t);
+            }
+            k += m;
+        }
+    }
+    KISS_FFT_TMP_FREE(scratch);
+}
+
+static
+void kf_work(
+        kiss_fft_cpx * Fout,
+        const kiss_fft_cpx * f,
+        const size_t fstride,
+        int in_stride,
+        int * factors,
+        const kiss_fft_cfg st
+        )
+{
+    kiss_fft_cpx * Fout_beg=Fout;
+    const int p=*factors++; /* the radix  */
+    const int m=*factors++; /* stage's fft length/p */
+    const kiss_fft_cpx * Fout_end = Fout + p*m;
+
+#ifdef _OPENMP
+    // use openmp extensions at the 
+    // top-level (not recursive)
+    if (fstride==1 && p<=5)
+    {
+        int k;
+
+        // execute the p different work units in different threads
+#       pragma omp parallel for
+        for (k=0;k<p;++k) 
+            kf_work( Fout +k*m, f+ fstride*in_stride*k,fstride*p,in_stride,factors,st);
+        // all threads have joined by this point
+
+        switch (p) {
+            case 2: kf_bfly2(Fout,fstride,st,m); break;
+            case 3: kf_bfly3(Fout,fstride,st,m); break; 
+            case 4: kf_bfly4(Fout,fstride,st,m); break;
+            case 5: kf_bfly5(Fout,fstride,st,m); break; 
+            default: kf_bfly_generic(Fout,fstride,st,m,p); break;
+        }
+        return;
+    }
+#endif
+
+    if (m==1) {
+        do{
+            *Fout = *f;
+            f += fstride*in_stride;
+        }while(++Fout != Fout_end );
+    }else{
+        do{
+            // recursive call:
+            // DFT of size m*p performed by doing
+            // p instances of smaller DFTs of size m, 
+            // each one takes a decimated version of the input
+            kf_work( Fout , f, fstride*p, in_stride, factors,st);
+            f += fstride*in_stride;
+        }while( (Fout += m) != Fout_end );
+    }
+
+    Fout=Fout_beg;
+
+    // recombine the p smaller DFTs 
+    switch (p) {
+        case 2: kf_bfly2(Fout,fstride,st,m); break;
+        case 3: kf_bfly3(Fout,fstride,st,m); break; 
+        case 4: kf_bfly4(Fout,fstride,st,m); break;
+        case 5: kf_bfly5(Fout,fstride,st,m); break; 
+        default: kf_bfly_generic(Fout,fstride,st,m,p); break;
+    }
+}
+
+/*  facbuf is populated by p1,m1,p2,m2, ...
+    where 
+    p[i] * m[i] = m[i-1]
+    m0 = n                  */
+static 
+void kf_factor(int n,int * facbuf)
+{
+    int p=4;
+    double floor_sqrt;
+    floor_sqrt = floor( sqrt((double)n) );
+
+    /*factor out powers of 4, powers of 2, then any remaining primes */
+    do {
+        while (n % p) {
+            switch (p) {
+                case 4: p = 2; break;
+                case 2: p = 3; break;
+                default: p += 2; break;
+            }
+            if (p > floor_sqrt)
+                p = n;          /* no more factors, skip to end */
+        }
+        n /= p;
+        *facbuf++ = p;
+        *facbuf++ = n;
+    } while (n > 1);
+}
+
+/*
+ *
+ * User-callable function to allocate all necessary storage space for the fft.
+ *
+ * The return value is a contiguous block of memory, allocated with malloc.  As such,
+ * It can be freed with free(), rather than a kiss_fft-specific function.
+ * */
+kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
+{
+    kiss_fft_cfg st=NULL;
+    size_t memneeded = sizeof(struct kiss_fft_state)
+        + sizeof(kiss_fft_cpx)*(nfft-1); /* twiddle factors*/
+
+    if ( lenmem==NULL ) {
+        st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded );
+    }else{
+        if (mem != NULL && *lenmem >= memneeded)
+            st = (kiss_fft_cfg)mem;
+        *lenmem = memneeded;
+    }
+    if (st) {
+        int i;
+        st->nfft=nfft;
+        st->inverse = inverse_fft;
+
+        for (i=0;i<nfft;++i) {
+            const double pi=3.141592653589793238462643383279502884197169399375105820974944;
+            double phase = -2*pi*i / nfft;
+            if (st->inverse)
+                phase *= -1;
+            kf_cexp(st->twiddles+i, phase );
+        }
+
+        kf_factor(nfft,st->factors);
+    }
+    return st;
+}
+
+
+void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
+{
+    if (fin == fout) {
+        //NOTE: this is not really an in-place FFT algorithm.
+        //It just performs an out-of-place FFT into a temp buffer
+        kiss_fft_cpx * tmpbuf = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC( sizeof(kiss_fft_cpx)*st->nfft);
+        kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
+        memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
+        KISS_FFT_TMP_FREE(tmpbuf);
+    }else{
+        kf_work( fout, fin, 1,in_stride, st->factors,st );
+    }
+}
+
+void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
+{
+    kiss_fft_stride(cfg,fin,fout,1);
+}
+
+
+void kiss_fft_cleanup(void)
+{
+    // nothing needed any more
+}
+
+int kiss_fft_next_fast_size(int n)
+{
+    while(1) {
+        int m=n;
+        while ( (m%2) == 0 ) m/=2;
+        while ( (m%3) == 0 ) m/=3;
+        while ( (m%5) == 0 ) m/=5;
+        if (m<=1)
+            break; /* n is completely factorable by twos, threes, and fives */
+        n++;
+    }
+    return n;
+}
diff --git a/fft/kiss_fft.h b/fft/kiss_fft.h
new file mode 100644
index 0000000..45c3a6a
--- /dev/null
+++ b/fft/kiss_fft.h
@@ -0,0 +1,132 @@
+/*
+ *  Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
+ *  This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ *  SPDX-License-Identifier: BSD-3-Clause
+ *  See COPYING file for more information.
+ */
+
+#ifndef KISS_FFT_H
+#define KISS_FFT_H
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <string.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ ATTENTION!
+ If you would like a :
+ -- a utility that will handle the caching of fft objects
+ -- real-only (no imaginary time component ) FFT
+ -- a multi-dimensional FFT
+ -- a command-line utility to perform ffts
+ -- a command-line utility to perform fast-convolution filtering
+
+ Then see kfc.h kiss_fftr.h kiss_fftnd.h fftutil.c kiss_fastfir.c
+  in the tools/ directory.
+*/
+
+#ifdef USE_SIMD
+# include <xmmintrin.h>
+# define kiss_fft_scalar __m128
+#define KISS_FFT_MALLOC(nbytes) _mm_malloc(nbytes,16)
+#define KISS_FFT_FREE _mm_free
+#else	
+#define KISS_FFT_MALLOC malloc
+#define KISS_FFT_FREE free
+#endif	
+
+
+#ifdef FIXED_POINT
+#include <sys/types.h>	
+# if (FIXED_POINT == 32)
+#  define kiss_fft_scalar int32_t
+# else	
+#  define kiss_fft_scalar int16_t
+# endif
+#else
+# ifndef kiss_fft_scalar
+/*  default is float */
+#   define kiss_fft_scalar float
+# endif
+#endif
+
+typedef struct {
+    kiss_fft_scalar r;
+    kiss_fft_scalar i;
+}kiss_fft_cpx;
+
+typedef struct kiss_fft_state* kiss_fft_cfg;
+
+/* 
+ *  kiss_fft_alloc
+ *  
+ *  Initialize a FFT (or IFFT) algorithm's cfg/state buffer.
+ *
+ *  typical usage:      kiss_fft_cfg mycfg=kiss_fft_alloc(1024,0,NULL,NULL);
+ *
+ *  The return value from fft_alloc is a cfg buffer used internally
+ *  by the fft routine or NULL.
+ *
+ *  If lenmem is NULL, then kiss_fft_alloc will allocate a cfg buffer using malloc.
+ *  The returned value should be free()d when done to avoid memory leaks.
+ *  
+ *  The state can be placed in a user supplied buffer 'mem':
+ *  If lenmem is not NULL and mem is not NULL and *lenmem is large enough,
+ *      then the function places the cfg in mem and the size used in *lenmem
+ *      and returns mem.
+ *  
+ *  If lenmem is not NULL and ( mem is NULL or *lenmem is not large enough),
+ *      then the function returns NULL and places the minimum cfg 
+ *      buffer size in *lenmem.
+ * */
+
+kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem); 
+
+/*
+ * kiss_fft(cfg,in_out_buf)
+ *
+ * Perform an FFT on a complex input buffer.
+ * for a forward FFT,
+ * fin should be  f[0] , f[1] , ... ,f[nfft-1]
+ * fout will be   F[0] , F[1] , ... ,F[nfft-1]
+ * Note that each element is complex and can be accessed like
+    f[k].r and f[k].i
+ * */
+void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
+
+/*
+ A more generic version of the above function. It reads its input from every Nth sample.
+ * */
+void kiss_fft_stride(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int fin_stride);
+
+/* If kiss_fft_alloc allocated a buffer, it is one contiguous 
+   buffer and can be simply free()d when no longer needed*/
+#define kiss_fft_free KISS_FFT_FREE
+
+/*
+ Cleans up some memory that gets managed internally. Not necessary to call, but it might clean up 
+ your compiler output to call this before you exit.
+*/
+void kiss_fft_cleanup(void);
+	
+
+/*
+ * Returns the smallest integer k, such that k>=n and k has only "fast" factors (2,3,5)
+ */
+int kiss_fft_next_fast_size(int n);
+
+/* for real ffts, we need an even size */
+#define kiss_fftr_next_fast_size_real(n) \
+        (kiss_fft_next_fast_size( ((n)+1)>>1)<<1)
+
+#ifdef __cplusplus
+} 
+#endif
+
+#endif
diff --git a/fft/kiss_fftr.c b/fft/kiss_fftr.c
new file mode 100644
index 0000000..8102132
--- /dev/null
+++ b/fft/kiss_fftr.c
@@ -0,0 +1,153 @@
+/*
+ *  Copyright (c) 2003-2004, Mark Borgerding. All rights reserved.
+ *  This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ *  SPDX-License-Identifier: BSD-3-Clause
+ *  See COPYING file for more information.
+ */
+
+#include "kiss_fftr.h"
+#include "_kiss_fft_guts.h"
+
+struct kiss_fftr_state{
+    kiss_fft_cfg substate;
+    kiss_fft_cpx * tmpbuf;
+    kiss_fft_cpx * super_twiddles;
+#ifdef USE_SIMD
+    void * pad;
+#endif
+};
+
+kiss_fftr_cfg kiss_fftr_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem)
+{
+    int i;
+    kiss_fftr_cfg st = NULL;
+    size_t subsize = 0, memneeded;
+
+    if (nfft & 1) {
+        fprintf(stderr,"Real FFT optimization must be even.\n");
+        return NULL;
+    }
+    nfft >>= 1;
+
+    kiss_fft_alloc (nfft, inverse_fft, NULL, &subsize);
+    memneeded = sizeof(struct kiss_fftr_state) + subsize + sizeof(kiss_fft_cpx) * ( nfft * 3 / 2);
+
+    if (lenmem == NULL) {
+        st = (kiss_fftr_cfg) KISS_FFT_MALLOC (memneeded);
+    } else {
+        if (*lenmem >= memneeded)
+            st = (kiss_fftr_cfg) mem;
+        *lenmem = memneeded;
+    }
+    if (!st)
+        return NULL;
+
+    st->substate = (kiss_fft_cfg) (st + 1); /*just beyond kiss_fftr_state struct */
+    st->tmpbuf = (kiss_fft_cpx *) (((char *) st->substate) + subsize);
+    st->super_twiddles = st->tmpbuf + nfft;
+    kiss_fft_alloc(nfft, inverse_fft, st->substate, &subsize);
+
+    for (i = 0; i < nfft/2; ++i) {
+        double phase =
+            -3.14159265358979323846264338327 * ((double) (i+1) / nfft + .5);
+        if (inverse_fft)
+            phase *= -1;
+        kf_cexp (st->super_twiddles+i,phase);
+    }
+    return st;
+}
+
+void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata)
+{
+    /* input buffer timedata is stored row-wise */
+    int k,ncfft;
+    kiss_fft_cpx fpnk,fpk,f1k,f2k,tw,tdc;
+
+    if ( st->substate->inverse) {
+        fprintf(stderr,"kiss fft usage error: improper alloc\n");
+        exit(1);
+    }
+
+    ncfft = st->substate->nfft;
+
+    /*perform the parallel fft of two real signals packed in real,imag*/
+    kiss_fft( st->substate , (const kiss_fft_cpx*)timedata, st->tmpbuf );
+    /* The real part of the DC element of the frequency spectrum in st->tmpbuf
+     * contains the sum of the even-numbered elements of the input time sequence
+     * The imag part is the sum of the odd-numbered elements
+     *
+     * The sum of tdc.r and tdc.i is the sum of the input time sequence. 
+     *      yielding DC of input time sequence
+     * The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1... 
+     *      yielding Nyquist bin of input time sequence
+     */
+ 
+    tdc.r = st->tmpbuf[0].r;
+    tdc.i = st->tmpbuf[0].i;
+    C_FIXDIV(tdc,2);
+    CHECK_OVERFLOW_OP(tdc.r ,+, tdc.i);
+    CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i);
+    freqdata[0].r = tdc.r + tdc.i;
+    freqdata[ncfft].r = tdc.r - tdc.i;
+#ifdef USE_SIMD    
+    freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps(0);
+#else
+    freqdata[ncfft].i = freqdata[0].i = 0;
+#endif
+
+    for ( k=1;k <= ncfft/2 ; ++k ) {
+        fpk    = st->tmpbuf[k]; 
+        fpnk.r =   st->tmpbuf[ncfft-k].r;
+        fpnk.i = - st->tmpbuf[ncfft-k].i;
+        C_FIXDIV(fpk,2);
+        C_FIXDIV(fpnk,2);
+
+        C_ADD( f1k, fpk , fpnk );
+        C_SUB( f2k, fpk , fpnk );
+        C_MUL( tw , f2k , st->super_twiddles[k-1]);
+
+        freqdata[k].r = HALF_OF(f1k.r + tw.r);
+        freqdata[k].i = HALF_OF(f1k.i + tw.i);
+        freqdata[ncfft-k].r = HALF_OF(f1k.r - tw.r);
+        freqdata[ncfft-k].i = HALF_OF(tw.i - f1k.i);
+    }
+}
+
+void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata)
+{
+    /* input buffer timedata is stored row-wise */
+    int k, ncfft;
+
+    if (st->substate->inverse == 0) {
+        fprintf (stderr, "kiss fft usage error: improper alloc\n");
+        exit (1);
+    }
+
+    ncfft = st->substate->nfft;
+
+    st->tmpbuf[0].r = freqdata[0].r + freqdata[ncfft].r;
+    st->tmpbuf[0].i = freqdata[0].r - freqdata[ncfft].r;
+    C_FIXDIV(st->tmpbuf[0],2);
+
+    for (k = 1; k <= ncfft / 2; ++k) {
+        kiss_fft_cpx fk, fnkc, fek, fok, tmp;
+        fk = freqdata[k];
+        fnkc.r = freqdata[ncfft - k].r;
+        fnkc.i = -freqdata[ncfft - k].i;
+        C_FIXDIV( fk , 2 );
+        C_FIXDIV( fnkc , 2 );
+
+        C_ADD (fek, fk, fnkc);
+        C_SUB (tmp, fk, fnkc);
+        C_MUL (fok, tmp, st->super_twiddles[k-1]);
+        C_ADD (st->tmpbuf[k],     fek, fok);
+        C_SUB (st->tmpbuf[ncfft - k], fek, fok);
+#ifdef USE_SIMD        
+        st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0);
+#else
+        st->tmpbuf[ncfft - k].i *= -1;
+#endif
+    }
+    kiss_fft (st->substate, st->tmpbuf, (kiss_fft_cpx *) timedata);
+}
diff --git a/fft/kiss_fftr.h b/fft/kiss_fftr.h
new file mode 100644
index 0000000..588948d
--- /dev/null
+++ b/fft/kiss_fftr.h
@@ -0,0 +1,54 @@
+/*
+ *  Copyright (c) 2003-2004, Mark Borgerding. All rights reserved.
+ *  This file is part of KISS FFT - https://github.com/mborgerding/kissfft
+ *
+ *  SPDX-License-Identifier: BSD-3-Clause
+ *  See COPYING file for more information.
+ */
+
+#ifndef KISS_FTR_H
+#define KISS_FTR_H
+
+#include "kiss_fft.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+    
+/* 
+ 
+ Real optimized version can save about 45% cpu time vs. complex fft of a real seq.
+
+ 
+ 
+ */
+
+typedef struct kiss_fftr_state *kiss_fftr_cfg;
+
+
+kiss_fftr_cfg kiss_fftr_alloc(int nfft,int inverse_fft,void * mem, size_t * lenmem);
+/*
+ nfft must be even
+
+ If you don't care to allocate space, use mem = lenmem = NULL 
+*/
+
+
+void kiss_fftr(kiss_fftr_cfg cfg,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata);
+/*
+ input timedata has nfft scalar points
+ output freqdata has nfft/2+1 complex points
+*/
+
+void kiss_fftri(kiss_fftr_cfg cfg,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata);
+/*
+ input freqdata has  nfft/2+1 complex points
+ output timedata has nfft scalar points
+*/
+
+#define kiss_fftr_free KISS_FFT_FREE
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/ft8/arrays.h b/ft8/arrays.h
new file mode 100644
index 0000000..9e0d38f
--- /dev/null
+++ b/ft8/arrays.h
@@ -0,0 +1,294 @@
+// LDPC(174,87) parameters from WSJT-X.
+// this is an indirection table that moves a
+// codeword's 87 systematic (message) bits to the end.
+int colorder[] = {
+      0, 1, 2, 3, 30, 4, 5, 6, 7, 8, 9, 10, 11, 32, 12, 40, 13, 14, 15, 16,
+     17,  18, 37, 45, 29, 19, 20, 21, 41, 22, 42, 31, 33, 34, 44, 35, 47,
+     51,  50, 43, 36, 52, 63, 46, 25, 55, 27, 24, 23, 53, 39, 49, 59, 38,
+     48,  61, 60, 57, 28, 62, 56, 58, 65, 66, 26, 70, 64, 69, 68, 67, 74,
+     71,  54, 76, 72, 75, 78, 77, 80, 79, 73, 83, 84, 81, 82, 85, 86, 87,
+     88,  89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103,
+    104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
+    118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131,
+    132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145,
+    146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
+    160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173
+};
+
+// this is the LDPC(174,87) parity check matrix.
+// 87 rows.
+// each row describes one parity check.
+// each number is an index into the codeword (1-origin).
+// the codeword bits mentioned in each row must xor to zero.
+// From WSJT-X's bpdecode174.f90.
+int Nm[][7] = {
+    {1, 30, 60, 89, 118, 147, 0},
+    {2, 31, 61, 90, 119, 147, 0},
+    {3, 32, 62, 91, 120, 148, 0},
+    {4, 33, 63, 92, 121, 149, 0},
+    {2, 34, 64, 93, 122, 150, 0},
+    {5, 33, 65, 94, 123, 148, 0},
+    {6, 34, 66, 95, 124, 151, 0},
+    {7, 35, 67, 96, 120, 152, 0},
+    {8, 36, 68, 97, 125, 153, 0},
+    {9, 37, 69, 98, 126, 152, 0},
+    {10, 38, 70, 99, 127, 154, 0},
+    {11, 39, 71, 100, 126, 155, 0},
+    {12, 40, 61, 101, 128, 145, 0},
+    {10, 33, 60, 95, 128, 156, 0},
+    {13, 41, 72, 97, 126, 157, 0},
+    {13, 42, 73, 90, 129, 156, 0},
+    {14, 39, 74, 99, 130, 158, 0},
+    {15, 43, 75, 102, 131, 159, 0},
+    {16, 43, 71, 103, 118, 160, 0},
+    {17, 44, 76, 98, 130, 156, 0},
+    {18, 45, 60, 96, 132, 161, 0},
+    {19, 46, 73, 83, 133, 162, 0},
+    {12, 38, 77, 102, 134, 163, 0},
+    {19, 47, 78, 104, 135, 147, 0},
+    {1, 32, 77, 105, 136, 164, 0},
+    {20, 48, 73, 106, 123, 163, 0},
+    {21, 41, 79, 107, 137, 165, 0},
+    {22, 42, 66, 108, 138, 152, 0},
+    {18, 42, 80, 109, 139, 154, 0},
+    {23, 49, 81, 110, 135, 166, 0},
+    {16, 50, 82, 91, 129, 158, 0},
+    {3, 48, 63, 107, 124, 167, 0},
+    {6, 51, 67, 111, 134, 155, 0},
+    {24, 35, 77, 100, 122, 162, 0},
+    {20, 45, 76, 112, 140, 157, 0},
+    {21, 36, 64, 92, 130, 159, 0},
+    {8, 52, 83, 111, 118, 166, 0},
+    {21, 53, 84, 113, 138, 168, 0},
+    {25, 51, 79, 89, 122, 158, 0},
+    {22, 44, 75, 107, 133, 155, 172},
+    {9, 54, 84, 90, 141, 169, 0},
+    {22, 54, 85, 110, 136, 161, 0},
+    {8, 37, 65, 102, 129, 170, 0},
+    {19, 39, 85, 114, 139, 150, 0},
+    {26, 55, 71, 93, 142, 167, 0},
+    {27, 56, 65, 96, 133, 160, 174},
+    {28, 31, 86, 100, 117, 171, 0},
+    {28, 52, 70, 104, 132, 144, 0},
+    {24, 57, 68, 95, 137, 142, 0},
+    {7, 30, 72, 110, 143, 151, 0},
+    {4, 51, 76, 115, 127, 168, 0},
+    {16, 45, 87, 114, 125, 172, 0},
+    {15, 30, 86, 115, 123, 150, 0},
+    {23, 46, 64, 91, 144, 173, 0},
+    {23, 35, 75, 113, 145, 153, 0},
+    {14, 41, 87, 108, 117, 149, 170},
+    {25, 40, 85, 94, 124, 159, 0},
+    {25, 58, 69, 116, 143, 174, 0},
+    {29, 43, 61, 116, 132, 162, 0},
+    {15, 58, 88, 112, 121, 164, 0},
+    {4, 59, 72, 114, 119, 163, 173},
+    {27, 47, 86, 98, 134, 153, 0},
+    {5, 44, 78, 109, 141, 0, 0},
+    {10, 46, 69, 103, 136, 165, 0},
+    {9, 50, 59, 93, 128, 164, 0},
+    {14, 57, 58, 109, 120, 166, 0},
+    {17, 55, 62, 116, 125, 154, 0},
+    {3, 54, 70, 101, 140, 170, 0},
+    {1, 36, 82, 108, 127, 174, 0},
+    {5, 53, 81, 105, 140, 0, 0},
+    {29, 53, 67, 99, 142, 173, 0},
+    {18, 49, 74, 97, 115, 167, 0},
+    {2, 57, 63, 103, 138, 157, 0},
+    {26, 38, 79, 112, 135, 171, 0},
+    {11, 52, 66, 88, 119, 148, 0},
+    {20, 40, 68, 117, 141, 160, 0},
+    {11, 48, 81, 89, 146, 169, 0},
+    {29, 47, 80, 92, 146, 172, 0},
+    {6, 32, 87, 104, 145, 169, 0},
+    {27, 34, 74, 106, 131, 165, 0},
+    {12, 56, 84, 88, 139, 0, 0},
+    {13, 56, 62, 111, 146, 171, 0},
+    {26, 37, 80, 105, 144, 151, 0},
+    {17, 31, 82, 113, 121, 161, 0},
+    {28, 49, 59, 94, 137, 0, 0},
+    {7, 55, 83, 101, 131, 168, 0},
+    {24, 50, 78, 106, 143, 149, 0},
+};
+
+// Mn from WSJT-X's bpdecode174.f90.
+// each row corresponds to a codeword bit.
+// the numbers indicate which three parity
+// checks (rows in Nm) refer to the codeword bit.
+// 1-origin.
+int Mn[][3] = {
+    {1, 25, 69},
+    {2, 5, 73},
+    {3, 32, 68},
+    {4, 51, 61},
+    {6, 63, 70},
+    {7, 33, 79},
+    {8, 50, 86},
+    {9, 37, 43},
+    {10, 41, 65},
+    {11, 14, 64},
+    {12, 75, 77},
+    {13, 23, 81},
+    {15, 16, 82},
+    {17, 56, 66},
+    {18, 53, 60},
+    {19, 31, 52},
+    {20, 67, 84},
+    {21, 29, 72},
+    {22, 24, 44},
+    {26, 35, 76},
+    {27, 36, 38},
+    {28, 40, 42},
+    {30, 54, 55},
+    {34, 49, 87},
+    {39, 57, 58},
+    {45, 74, 83},
+    {46, 62, 80},
+    {47, 48, 85},
+    {59, 71, 78},
+    {1, 50, 53},
+    {2, 47, 84},
+    {3, 25, 79},
+    {4, 6, 14},
+    {5, 7, 80},
+    {8, 34, 55},
+    {9, 36, 69},
+    {10, 43, 83},
+    {11, 23, 74},
+    {12, 17, 44},
+    {13, 57, 76},
+    {15, 27, 56},
+    {16, 28, 29},
+    {18, 19, 59},
+    {20, 40, 63},
+    {21, 35, 52},
+    {22, 54, 64},
+    {24, 62, 78},
+    {26, 32, 77},
+    {30, 72, 85},
+    {31, 65, 87},
+    {33, 39, 51},
+    {37, 48, 75},
+    {38, 70, 71},
+    {41, 42, 68},
+    {45, 67, 86},
+    {46, 81, 82},
+    {49, 66, 73},
+    {58, 60, 66},
+    {61, 65, 85},
+    {1, 14, 21},
+    {2, 13, 59},
+    {3, 67, 82},
+    {4, 32, 73},
+    {5, 36, 54},
+    {6, 43, 46},
+    {7, 28, 75},
+    {8, 33, 71},
+    {9, 49, 76},
+    {10, 58, 64},
+    {11, 48, 68},
+    {12, 19, 45},
+    {15, 50, 61},
+    {16, 22, 26},
+    {17, 72, 80},
+    {18, 40, 55},
+    {20, 35, 51},
+    {23, 25, 34},
+    {24, 63, 87},
+    {27, 39, 74},
+    {29, 78, 83},
+    {30, 70, 77},
+    {31, 69, 84},
+    {22, 37, 86},
+    {38, 41, 81},
+    {42, 44, 57},
+    {47, 53, 62},
+    {52, 56, 79},
+    {60, 75, 81},
+    {1, 39, 77},
+    {2, 16, 41},
+    {3, 31, 54},
+    {4, 36, 78},
+    {5, 45, 65},
+    {6, 57, 85},
+    {7, 14, 49},
+    {8, 21, 46},
+    {9, 15, 72},
+    {10, 20, 62},
+    {11, 17, 71},
+    {12, 34, 47},
+    {13, 68, 86},
+    {18, 23, 43},
+    {19, 64, 73},
+    {24, 48, 79},
+    {25, 70, 83},
+    {26, 80, 87},
+    {27, 32, 40},
+    {28, 56, 69},
+    {29, 63, 66},
+    {30, 42, 50},
+    {33, 37, 82},
+    {35, 60, 74},
+    {38, 55, 84},
+    {44, 52, 61},
+    {51, 53, 72},
+    {58, 59, 67},
+    {47, 56, 76},
+    {1, 19, 37},
+    {2, 61, 75},
+    {3, 8, 66},
+    {4, 60, 84},
+    {5, 34, 39},
+    {6, 26, 53},
+    {7, 32, 57},
+    {9, 52, 67},
+    {10, 12, 15},
+    {11, 51, 69},
+    {13, 14, 65},
+    {16, 31, 43},
+    {17, 20, 36},
+    {18, 80, 86},
+    {21, 48, 59},
+    {22, 40, 46},
+    {23, 33, 62},
+    {24, 30, 74},
+    {25, 42, 64},
+    {27, 49, 85},
+    {28, 38, 73},
+    {29, 44, 81},
+    {35, 68, 70},
+    {41, 63, 76},
+    {45, 49, 71},
+    {50, 58, 87},
+    {48, 54, 83},
+    {13, 55, 79},
+    {77, 78, 82},
+    {1, 2, 24},
+    {3, 6, 75},
+    {4, 56, 87},
+    {5, 44, 53},
+    {7, 50, 83},
+    {8, 10, 28},
+    {9, 55, 62},
+    {11, 29, 67},
+    {12, 33, 40},
+    {14, 16, 20},
+    {15, 35, 73},
+    {17, 31, 39},
+    {18, 36, 57},
+    {19, 46, 76},
+    {21, 42, 84},
+    {22, 34, 59},
+    {23, 26, 61},
+    {25, 60, 65},
+    {27, 64, 80},
+    {30, 37, 66},
+    {32, 45, 72},
+    {38, 51, 86},
+    {41, 77, 79},
+    {43, 56, 68},
+    {47, 74, 82},
+    {40, 52, 78},
+    {54, 61, 71},
+    {46, 58, 69},
+};
diff --git a/ft8/ldpc.cpp b/ft8/ldpc.cpp
new file mode 100644
index 0000000..966eeb7
--- /dev/null
+++ b/ft8/ldpc.cpp
@@ -0,0 +1,266 @@
+//
+// LDPC decoder for FT8.
+//
+// given a 174-bit codeword as an array of log-likelihood of zero,
+// return a 174-bit corrected codeword, or zero-length array.
+// last 87 bits are the (systematic) plain-text.
+// this is an implementation of the sum-product algorithm
+// from Sarah Johnson's Iterative Error Correction book.
+// codeword[i] = log ( P(x=0) / P(x=1) )
+//
+// cc -O3 libldpc.c -shared -fPIC -o libldpc.so
+//
+
+#include <stdio.h>
+#include <math.h>
+#include <stdlib.h>
+#include "arrays.h"
+
+int ldpc_check(int codeword[]);
+
+
+// thank you Douglas Bagnall
+// https://math.stackexchange.com/a/446411
+float fast_tanh(float x)
+{
+    if (x < -4.97f)
+    {
+        return -1.0f;
+    }
+    if (x > 4.97f)
+    {
+        return 1.0f;
+    }
+    float x2 = x * x;
+    float a = x * (135135.0f + x2 * (17325.0f + x2 * (378.0f + x2)));
+    float b = 135135.0f + x2 * (62370.0f + x2 * (3150.0f + x2 * 28.0f));
+    return a / b;
+}
+
+
+// codeword is 174 log-likelihoods.
+// plain is a return value, 174 ints, to be 0 or 1.
+// iters is how hard to try.
+// ok == 87 means success.
+void ldpc_decode(float codeword[], int iters, int plain[], int *ok)
+{
+    float m[87][174];       // ~60 kB
+    float e[87][174];       // ~60 kB
+    int best_score = -1;
+    int best_cw[174];
+
+    for (int i = 0; i < 174; i++)
+        for (int j = 0; j < 87; j++)
+            m[j][i] = codeword[i];
+
+    for (int i = 0; i < 174; i++)
+        for (int j = 0; j < 87; j++)
+            e[j][i] = 0.0f;
+
+    for (int iter = 0; iter < iters; iter++)
+    {
+        for (int j = 0; j < 87; j++)
+        {
+            for (int ii1 = 0; ii1 < 7; ii1++)
+            {
+                int i1 = Nm[j][ii1] - 1;
+                if (i1 < 0)
+                    continue;
+                float a = 1.0f;
+                for (int ii2 = 0; ii2 < 7; ii2++)
+                {
+                    int i2 = Nm[j][ii2] - 1;
+                    if (i2 >= 0 && i2 != i1)
+                    {
+                        a *= fast_tanh(m[j][i2] / 2.0f);
+                    }
+                }
+                e[j][i1] = log((1 + a) / (1 - a));
+            }
+        }
+
+        int cw[174];
+        for (int i = 0; i < 174; i++)
+        {
+            float l = codeword[i];
+            for (int j = 0; j < 3; j++)
+                l += e[Mn[i][j] - 1][i];
+            cw[i] = (l <= 0.0f);
+        }
+        int score = ldpc_check(cw);
+        if (score == 87)
+        {
+            // Found a perfect answer
+#if 0
+      int cw1[174];
+      for(int i = 0; i < 174; i++)
+        cw1[i] = cw[colorder[i]];
+      for(int i = 0; i < 87; i++)
+        plain[i] = cw1[174-87+i];
+#else
+            for (int i = 0; i < 174; i++)
+                plain[i] = cw[colorder[i]];
+#endif
+            *ok = 87;
+            return;
+        }
+
+        if (score > best_score)
+        {
+            for (int i = 0; i < 174; i++)
+                best_cw[i] = cw[i];
+            best_score = score;
+        }
+
+        for (int i = 0; i < 174; i++)
+        {
+            for (int ji1 = 0; ji1 < 3; ji1++)
+            {
+                int j1 = Mn[i][ji1] - 1;
+                float l = codeword[i];
+                for (int ji2 = 0; ji2 < 3; ji2++)
+                {
+                    if (ji1 != ji2)
+                    {
+                        int j2 = Mn[i][ji2] - 1;
+                        l += e[j2][i];
+                    }
+                }
+                m[j1][i] = l;
+            }
+        }
+    }
+
+    // decode didn't work, return something anyway.
+#if 0
+  int cw1[174];
+  for(int i = 0; i < 174; i++)
+    cw1[i] = best_cw[colorder[i]];
+  for(int i = 0; i < 87; i++)
+    plain[i] = cw1[174-87+i];
+#else
+    for (int i = 0; i < 174; i++)
+        plain[i] = best_cw[colorder[i]];
+#endif
+
+    *ok = best_score;
+}
+
+
+//
+// does a 174-bit codeword pass the FT8's LDPC parity checks?
+// returns the number of parity checks that passed.
+// 87 means total success.
+//
+int ldpc_check(int codeword[])
+{
+    int score = 0;
+
+    // Nm[87][7]
+    for (int j = 0; j < 87; j++)
+    {
+        int x = 0;
+        for (int ii1 = 0; ii1 < 7; ii1++)
+        {
+            int i1 = Nm[j][ii1] - 1;
+            if (i1 >= 0)
+            {
+                x ^= codeword[i1];
+            }
+        }
+        if (x == 0)
+            score++;
+    }
+    return score;
+}
+
+
+/*
+def bp_decode(codeword, max_iterations = 10):
+    ## 174 codeword bits
+    ## 87 parity checks
+
+    mnx = numpy.array(Mn, dtype=numpy.int32)
+    nmx = numpy.array(Nm, dtype=numpy.int32)
+
+    ncw = 3
+    tov = numpy.zeros( (3, N) )
+    toc = numpy.zeros( (7, M) )
+    tanhtoc = numpy.zeros( (7, M) )
+    zn = numpy.zeros(N)
+    nclast = 0
+    ncnt = 0
+
+    # initialize messages to checks
+    for j in range(M):
+        for i in range(nrw[j]):
+            toc[i, j] = codeword[nmx[j, i] - 1]
+    
+    for iteration in range(max_iterations):
+        # Update bit log likelihood ratios (tov=0 in iteration 0).
+        #for i in range(N):
+        #    zn[i] = codeword[i] + numpy.sum(tov[:,i])
+        zn = codeword + numpy.sum(tov, axis = 0)
+        #print(numpy.sum(tov, axis=0))
+
+        # Check to see if we have a codeword (check before we do any iteration).
+        cw = numpy.zeros(N, dtype=numpy.int32)
+        cw[zn > 0] = 1
+        ncheck = 0
+        for i in range(M):
+            synd = numpy.sum(cw[ nmx[i, :nrw[i]]-1 ])
+            if synd % 2 > 0:
+                ncheck += 1
+
+        if ncheck == 0: 
+            # we have a codeword - reorder the columns and return it
+            codeword = cw[colorder]
+            #nerr = 0
+            #for i in range(N):
+            #    if (2*cw[i]-1)*codeword[i] < 0:
+            #        nerr += 1
+            
+            #print("DECODED!", nerr)
+            return codeword[M:N]
+
+        if iter > 0:
+            # this code block implements an early stopping criterion
+            nd = ncheck - nclast
+            if nd < 0: # of unsatisfied parity checks decreased
+                ncnt = 0  # reset counter
+            else:
+                ncnt += 1
+            
+            if ncnt >= 5 and iter >= 10 and ncheck >= 15:
+                nharderror = -1
+                #return numpy.array([])
+
+        nclast = ncheck
+
+        # Send messages from bits to check nodes 
+        for j in range(M):
+            for i in range(nrw[j]):
+                ibj = nmx[j, i] - 1
+                toc[i, j] = zn[ibj]
+                for kk in range(ncw): # subtract off what the bit had received from the check
+                    if mnx[ibj, kk] - 1 == j:
+                        toc[i, j] -= tov[kk, ibj]
+
+        # send messages from check nodes to variable nodes
+        #for i in range(M):
+        #    tanhtoc[:,i] = numpy.tanh(-toc[:,i] / 2)
+        tanhtoc = numpy.tanh(-toc / 2)
+
+        for j in range(N):
+            for i in range(ncw):
+                ichk = mnx[j, i] - 1  # Mn(:,j) are the checks that include bit j
+                Tmn = 1.0
+                for k in range(nrw[ichk]):
+                    if nmx[ichk, k] - 1 == j: continue
+                    Tmn *= tanhtoc[k, ichk]
+                y = numpy.arctanh(-Tmn)
+                #y = platanh(-Tmn)
+                tov[i, j] = 2*y
+
+    return numpy.array([])
+*/
\ No newline at end of file
diff --git a/ft8/ldpc.h b/ft8/ldpc.h
new file mode 100644
index 0000000..ebb831f
--- /dev/null
+++ b/ft8/ldpc.h
@@ -0,0 +1,7 @@
+#pragma once
+
+// codeword is 174 log-likelihoods.
+// plain is a return value, 174 ints, to be 0 or 1.
+// iters is how hard to try.
+// ok == 87 means success.
+void ldpc_decode(float codeword[], int iters, int plain[], int *ok);