diff --git a/README.md b/README.md
new file mode 100644
index 0000000..aa550e6
--- /dev/null
+++ b/README.md
@@ -0,0 +1,18 @@
+This is the firmware for the TNC3 version 2.1.1 hardware.
+
+# Building
+
+Use Eclipse with CDT and the GNU MCU Eclipse plugins.
+
+# Debugging
+
+Logging is enabled in debug builds and is output via ITM (SWO).  The
+firmware is distributed with an openocd stlink config file that enables
+ITM output to a named pipe -- `swv`.
+
+To read from this pipe, open a terminal and run:
+
+`while true; do tr -d '\01' < swv; done`
+
+If you change the MCU's core clock, you need to adjust the timing in the
+`stlink-tnc3.cfg` config file. 
\ No newline at end of file
diff --git a/Src/arm_fir_f32.c b/Src/arm_fir_f32.c
new file mode 100644
index 0000000..3ecb7b5
--- /dev/null
+++ b/Src/arm_fir_f32.c
@@ -0,0 +1,997 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010-2014 ARM Limited. All rights reserved.  
+*  
+* $Date:        19. March 2015
+* $Revision: 	V.1.4.5
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_fir_f32.c  
+*  
+* Description:	Floating-point FIR filter processing function.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Redistribution and use in source and binary forms, with or without 
+* modification, are permitted provided that the following conditions
+* are met:
+*   - Redistributions of source code must retain the above copyright
+*     notice, this list of conditions and the following disclaimer.
+*   - Redistributions in binary form must reproduce the above copyright
+*     notice, this list of conditions and the following disclaimer in
+*     the documentation and/or other materials provided with the 
+*     distribution.
+*   - Neither the name of ARM LIMITED nor the names of its contributors
+*     may be used to endorse or promote products derived from this
+*     software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+* POSSIBILITY OF SUCH DAMAGE. 
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**  
+* @ingroup groupFilters  
+*/
+
+/**  
+* @defgroup FIR Finite Impulse Response (FIR) Filters  
+*  
+* This set of functions implements Finite Impulse Response (FIR) filters  
+* for Q7, Q15, Q31, and floating-point data types.  Fast versions of Q15 and Q31 are also provided.  
+* The functions operate on blocks of input and output data and each call to the function processes  
+* <code>blockSize</code> samples through the filter.  <code>pSrc</code> and  
+* <code>pDst</code> points to input and output arrays containing <code>blockSize</code> values.  
+*  
+* \par Algorithm:  
+* The FIR filter algorithm is based upon a sequence of multiply-accumulate (MAC) operations.  
+* Each filter coefficient <code>b[n]</code> is multiplied by a state variable which equals a previous input sample <code>x[n]</code>.  
+* <pre>  
+*    y[n] = b[0] * x[n] + b[1] * x[n-1] + b[2] * x[n-2] + ...+ b[numTaps-1] * x[n-numTaps+1]  
+* </pre>  
+* \par  
+* \image html FIR.gif "Finite Impulse Response filter"  
+* \par  
+* <code>pCoeffs</code> points to a coefficient array of size <code>numTaps</code>.  
+* Coefficients are stored in time reversed order.  
+* \par  
+* <pre>  
+*    {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}  
+* </pre>  
+* \par  
+* <code>pState</code> points to a state array of size <code>numTaps + blockSize - 1</code>.  
+* Samples in the state buffer are stored in the following order.  
+* \par  
+* <pre>  
+*    {x[n-numTaps+1], x[n-numTaps], x[n-numTaps-1], x[n-numTaps-2]....x[0], x[1], ..., x[blockSize-1]}  
+* </pre>  
+* \par  
+* Note that the length of the state buffer exceeds the length of the coefficient array by <code>blockSize-1</code>.  
+* The increased state buffer length allows circular addressing, which is traditionally used in the FIR filters,  
+* to be avoided and yields a significant speed improvement.  
+* The state variables are updated after each block of data is processed; the coefficients are untouched.  
+* \par Instance Structure  
+* The coefficients and state variables for a filter are stored together in an instance data structure.  
+* A separate instance structure must be defined for each filter.  
+* Coefficient arrays may be shared among several instances while state variable arrays cannot be shared.  
+* There are separate instance structure declarations for each of the 4 supported data types.  
+*  
+* \par Initialization Functions  
+* There is also an associated initialization function for each data type.  
+* The initialization function performs the following operations:  
+* - Sets the values of the internal structure fields.  
+* - Zeros out the values in the state buffer.  
+* To do this manually without calling the init function, assign the follow subfields of the instance structure:
+* numTaps, pCoeffs, pState. Also set all of the values in pState to zero. 
+*  
+* \par  
+* Use of the initialization function is optional.  
+* However, if the initialization function is used, then the instance structure cannot be placed into a const data section.  
+* To place an instance structure into a const data section, the instance structure must be manually initialized.  
+* Set the values in the state buffer to zeros before static initialization.  
+* The code below statically initializes each of the 4 different data type filter instance structures  
+* <pre>  
+*arm_fir_instance_f32 S = {numTaps, pState, pCoeffs};  
+*arm_fir_instance_q31 S = {numTaps, pState, pCoeffs};  
+*arm_fir_instance_q15 S = {numTaps, pState, pCoeffs};  
+*arm_fir_instance_q7 S =  {numTaps, pState, pCoeffs};  
+* </pre>  
+*  
+* where <code>numTaps</code> is the number of filter coefficients in the filter; <code>pState</code> is the address of the state buffer;  
+* <code>pCoeffs</code> is the address of the coefficient buffer.  
+*  
+* \par Fixed-Point Behavior  
+* Care must be taken when using the fixed-point versions of the FIR filter functions.  
+* In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.  
+* Refer to the function specific documentation below for usage guidelines.  
+*/
+
+/**  
+* @addtogroup FIR  
+* @{  
+*/
+
+/**  
+*  
+* @param[in]  *S points to an instance of the floating-point FIR filter structure.  
+* @param[in]  *pSrc points to the block of input data.  
+* @param[out] *pDst points to the block of output data.  
+* @param[in]  blockSize number of samples to process per call.  
+* @return     none.  
+*  
+*/
+
+#if defined(ARM_MATH_CM7)
+
+void arm_fir_f32(
+const arm_fir_instance_f32 * S,
+float32_t * pSrc,
+float32_t * pDst,
+uint32_t blockSize)
+{
+   float32_t *pState = S->pState;                 /* State pointer */
+   float32_t *pCoeffs = S->pCoeffs;               /* Coefficient pointer */
+   float32_t *pStateCurnt;                        /* Points to the current sample of the state */
+   float32_t *px, *pb;                            /* Temporary pointers for state and coefficient buffers */
+   float32_t acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7;     /* Accumulators */
+   float32_t x0, x1, x2, x3, x4, x5, x6, x7, c0;  /* Temporary variables to hold state and coefficient values */
+   uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */
+   uint32_t i, tapCnt, blkCnt;                    /* Loop counters */
+
+   /* S->pState points to state array which contains previous frame (numTaps - 1) samples */
+   /* pStateCurnt points to the location where the new input data should be written */
+   pStateCurnt = &(S->pState[(numTaps - 1u)]);
+
+   /* Apply loop unrolling and compute 8 output values simultaneously.  
+    * The variables acc0 ... acc7 hold output values that are being computed:  
+    *  
+    *    acc0 =  b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0]  
+    *    acc1 =  b[numTaps-1] * x[n-numTaps] +   b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1]  
+    *    acc2 =  b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] +   b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2]  
+    *    acc3 =  b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps]   +...+ b[0] * x[3]  
+    */
+   blkCnt = blockSize >> 3;
+
+   /* First part of the processing with loop unrolling.  Compute 8 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 7 samples. */
+   while(blkCnt > 0u)
+   {
+      /* Copy four new input samples into the state buffer */
+      *pStateCurnt++ = *pSrc++;
+      *pStateCurnt++ = *pSrc++;
+      *pStateCurnt++ = *pSrc++;
+      *pStateCurnt++ = *pSrc++;
+
+      /* Set all accumulators to zero */
+      acc0 = 0.0f;
+      acc1 = 0.0f;
+      acc2 = 0.0f;
+      acc3 = 0.0f;
+      acc4 = 0.0f;
+      acc5 = 0.0f;
+      acc6 = 0.0f;
+      acc7 = 0.0f;		
+
+      /* Initialize state pointer */
+      px = pState;
+
+      /* Initialize coeff pointer */
+      pb = (pCoeffs);		
+   
+      /* This is separated from the others to avoid 
+       * a call to __aeabi_memmove which would be slower
+       */
+      *pStateCurnt++ = *pSrc++;
+      *pStateCurnt++ = *pSrc++;
+      *pStateCurnt++ = *pSrc++;
+      *pStateCurnt++ = *pSrc++;
+
+      /* Read the first seven samples from the state buffer:  x[n-numTaps], x[n-numTaps-1], x[n-numTaps-2] */
+      x0 = *px++;
+      x1 = *px++;
+      x2 = *px++;
+      x3 = *px++;
+      x4 = *px++;
+      x5 = *px++;
+      x6 = *px++;
+
+      /* Loop unrolling.  Process 8 taps at a time. */
+      tapCnt = numTaps >> 3u;
+      
+      /* Loop over the number of taps.  Unroll by a factor of 8.  
+       ** Repeat until we've computed numTaps-8 coefficients. */
+      while(tapCnt > 0u)
+      {
+         /* Read the b[numTaps-1] coefficient */
+         c0 = *(pb++);
+
+         /* Read x[n-numTaps-3] sample */
+         x7 = *(px++);
+
+         /* acc0 +=  b[numTaps-1] * x[n-numTaps] */
+         acc0 += x0 * c0;
+
+         /* acc1 +=  b[numTaps-1] * x[n-numTaps-1] */
+         acc1 += x1 * c0;
+
+         /* acc2 +=  b[numTaps-1] * x[n-numTaps-2] */
+         acc2 += x2 * c0;
+
+         /* acc3 +=  b[numTaps-1] * x[n-numTaps-3] */
+         acc3 += x3 * c0;
+
+         /* acc4 +=  b[numTaps-1] * x[n-numTaps-4] */
+         acc4 += x4 * c0;
+
+         /* acc1 +=  b[numTaps-1] * x[n-numTaps-5] */
+         acc5 += x5 * c0;
+
+         /* acc2 +=  b[numTaps-1] * x[n-numTaps-6] */
+         acc6 += x6 * c0;
+
+         /* acc3 +=  b[numTaps-1] * x[n-numTaps-7] */
+         acc7 += x7 * c0;
+         
+         /* Read the b[numTaps-2] coefficient */
+         c0 = *(pb++);
+
+         /* Read x[n-numTaps-4] sample */
+         x0 = *(px++);
+
+         /* Perform the multiply-accumulate */
+         acc0 += x1 * c0;
+         acc1 += x2 * c0;   
+         acc2 += x3 * c0;   
+         acc3 += x4 * c0;   
+         acc4 += x5 * c0;   
+         acc5 += x6 * c0;   
+         acc6 += x7 * c0;   
+         acc7 += x0 * c0;   
+         
+         /* Read the b[numTaps-3] coefficient */
+         c0 = *(pb++);
+
+         /* Read x[n-numTaps-5] sample */
+         x1 = *(px++);
+
+         /* Perform the multiply-accumulates */      
+         acc0 += x2 * c0;
+         acc1 += x3 * c0;   
+         acc2 += x4 * c0;   
+         acc3 += x5 * c0;   
+         acc4 += x6 * c0;   
+         acc5 += x7 * c0;   
+         acc6 += x0 * c0;   
+         acc7 += x1 * c0;   
+
+         /* Read the b[numTaps-4] coefficient */
+         c0 = *(pb++);
+
+         /* Read x[n-numTaps-6] sample */
+         x2 = *(px++);
+
+         /* Perform the multiply-accumulates */      
+         acc0 += x3 * c0;
+         acc1 += x4 * c0;   
+         acc2 += x5 * c0;   
+         acc3 += x6 * c0;   
+         acc4 += x7 * c0;   
+         acc5 += x0 * c0;   
+         acc6 += x1 * c0;   
+         acc7 += x2 * c0;   
+
+         /* Read the b[numTaps-4] coefficient */
+         c0 = *(pb++);
+
+         /* Read x[n-numTaps-6] sample */
+         x3 = *(px++);
+         /* Perform the multiply-accumulates */      
+         acc0 += x4 * c0;
+         acc1 += x5 * c0;   
+         acc2 += x6 * c0;   
+         acc3 += x7 * c0;   
+         acc4 += x0 * c0;   
+         acc5 += x1 * c0;   
+         acc6 += x2 * c0;   
+         acc7 += x3 * c0;   
+
+         /* Read the b[numTaps-4] coefficient */
+         c0 = *(pb++);
+
+         /* Read x[n-numTaps-6] sample */
+         x4 = *(px++);
+
+         /* Perform the multiply-accumulates */      
+         acc0 += x5 * c0;
+         acc1 += x6 * c0;   
+         acc2 += x7 * c0;   
+         acc3 += x0 * c0;   
+         acc4 += x1 * c0;   
+         acc5 += x2 * c0;   
+         acc6 += x3 * c0;   
+         acc7 += x4 * c0;   
+
+         /* Read the b[numTaps-4] coefficient */
+         c0 = *(pb++);
+
+         /* Read x[n-numTaps-6] sample */
+         x5 = *(px++);
+
+         /* Perform the multiply-accumulates */      
+         acc0 += x6 * c0;
+         acc1 += x7 * c0;   
+         acc2 += x0 * c0;   
+         acc3 += x1 * c0;   
+         acc4 += x2 * c0;   
+         acc5 += x3 * c0;   
+         acc6 += x4 * c0;   
+         acc7 += x5 * c0;   
+
+         /* Read the b[numTaps-4] coefficient */
+         c0 = *(pb++);
+
+         /* Read x[n-numTaps-6] sample */
+         x6 = *(px++);
+
+         /* Perform the multiply-accumulates */      
+         acc0 += x7 * c0;
+         acc1 += x0 * c0;   
+         acc2 += x1 * c0;   
+         acc3 += x2 * c0;   
+         acc4 += x3 * c0;   
+         acc5 += x4 * c0;   
+         acc6 += x5 * c0;   
+         acc7 += x6 * c0;   
+
+         tapCnt--;
+      }
+
+      /* If the filter length is not a multiple of 8, compute the remaining filter taps */
+      tapCnt = numTaps % 0x8u;
+
+      while(tapCnt > 0u)
+      {
+         /* Read coefficients */
+         c0 = *(pb++);
+
+         /* Fetch 1 state variable */
+         x7 = *(px++);
+
+         /* Perform the multiply-accumulates */      
+         acc0 += x0 * c0;
+         acc1 += x1 * c0;   
+         acc2 += x2 * c0;   
+         acc3 += x3 * c0;   
+         acc4 += x4 * c0;   
+         acc5 += x5 * c0;   
+         acc6 += x6 * c0;   
+         acc7 += x7 * c0;   
+
+         /* Reuse the present sample states for next sample */
+         x0 = x1;
+         x1 = x2;
+         x2 = x3;
+         x3 = x4;
+         x4 = x5;
+         x5 = x6;
+         x6 = x7;
+
+         /* Decrement the loop counter */
+         tapCnt--;
+      }
+
+      /* Advance the state pointer by 8 to process the next group of 8 samples */
+      pState = pState + 8;
+
+      /* The results in the 8 accumulators, store in the destination buffer. */
+      *pDst++ = acc0;
+      *pDst++ = acc1;
+      *pDst++ = acc2;
+      *pDst++ = acc3;
+      *pDst++ = acc4;
+      *pDst++ = acc5;
+      *pDst++ = acc6;
+      *pDst++ = acc7;
+
+      blkCnt--;
+   }
+
+   /* If the blockSize is not a multiple of 8, compute any remaining output samples here.  
+   ** No loop unrolling is used. */
+   blkCnt = blockSize % 0x8u;
+
+   while(blkCnt > 0u)
+   {
+      /* Copy one sample at a time into state buffer */
+      *pStateCurnt++ = *pSrc++;
+
+      /* Set the accumulator to zero */
+      acc0 = 0.0f;
+
+      /* Initialize state pointer */
+      px = pState;
+
+      /* Initialize Coefficient pointer */
+      pb = (pCoeffs);
+
+      i = numTaps;
+
+      /* Perform the multiply-accumulates */
+      do
+      {
+         acc0 += *px++ * *pb++;
+         i--;
+
+      } while(i > 0u);
+
+      /* The result is store in the destination buffer. */
+      *pDst++ = acc0;
+
+      /* Advance state pointer by 1 for the next sample */
+      pState = pState + 1;
+
+      blkCnt--;
+   }
+
+   /* Processing is complete.  
+   ** Now copy the last numTaps - 1 samples to the start of the state buffer.  
+   ** This prepares the state buffer for the next function call. */
+
+   /* Points to the start of the state buffer */
+   pStateCurnt = S->pState;
+
+   tapCnt = (numTaps - 1u) >> 2u;
+
+   /* copy data */
+   while(tapCnt > 0u)
+   {
+      *pStateCurnt++ = *pState++;
+      *pStateCurnt++ = *pState++;
+      *pStateCurnt++ = *pState++;
+      *pStateCurnt++ = *pState++;
+
+      /* Decrement the loop counter */
+      tapCnt--;
+   }
+
+   /* Calculate remaining number of copies */
+   tapCnt = (numTaps - 1u) % 0x4u;
+
+   /* Copy the remaining q31_t data */
+   while(tapCnt > 0u)
+   {
+      *pStateCurnt++ = *pState++;
+
+      /* Decrement the loop counter */
+      tapCnt--;
+   }
+}
+
+#elif defined(ARM_MATH_CM0_FAMILY)
+
+void arm_fir_f32(
+const arm_fir_instance_f32 * S,
+float32_t * pSrc,
+float32_t * pDst,
+uint32_t blockSize)
+{
+   float32_t *pState = S->pState;                 /* State pointer */
+   float32_t *pCoeffs = S->pCoeffs;               /* Coefficient pointer */
+   float32_t *pStateCurnt;                        /* Points to the current sample of the state */
+   float32_t *px, *pb;                            /* Temporary pointers for state and coefficient buffers */
+   uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */
+   uint32_t i, tapCnt, blkCnt;                    /* Loop counters */
+
+   /* Run the below code for Cortex-M0 */
+
+   float32_t acc;
+
+   /* S->pState points to state array which contains previous frame (numTaps - 1) samples */
+   /* pStateCurnt points to the location where the new input data should be written */
+   pStateCurnt = &(S->pState[(numTaps - 1u)]);
+
+   /* Initialize blkCnt with blockSize */
+   blkCnt = blockSize;
+
+   while(blkCnt > 0u)
+   {
+      /* Copy one sample at a time into state buffer */
+      *pStateCurnt++ = *pSrc++;
+
+      /* Set the accumulator to zero */
+      acc = 0.0f;
+
+      /* Initialize state pointer */
+      px = pState;
+
+      /* Initialize Coefficient pointer */
+      pb = pCoeffs;
+
+      i = numTaps;
+
+      /* Perform the multiply-accumulates */
+      do
+      {
+         /* acc =  b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] */
+         acc += *px++ * *pb++;
+         i--;
+
+      } while(i > 0u);
+
+      /* The result is store in the destination buffer. */
+      *pDst++ = acc;
+
+      /* Advance state pointer by 1 for the next sample */
+      pState = pState + 1;
+
+      blkCnt--;
+   }
+
+   /* Processing is complete.         
+   ** Now copy the last numTaps - 1 samples to the starting of the state buffer.       
+   ** This prepares the state buffer for the next function call. */
+
+   /* Points to the start of the state buffer */
+   pStateCurnt = S->pState;
+
+   /* Copy numTaps number of values */
+   tapCnt = numTaps - 1u;
+
+   /* Copy data */
+   while(tapCnt > 0u)
+   {
+      *pStateCurnt++ = *pState++;
+
+      /* Decrement the loop counter */
+      tapCnt--;
+   }
+
+}
+
+#else
+
+/* Run the below code for Cortex-M4 and Cortex-M3 */
+
+void arm_fir_f32(
+const arm_fir_instance_f32 * S,
+float32_t * pSrc,
+float32_t * pDst,
+uint32_t blockSize)
+{
+   float32_t *pState = S->pState;                 /* State pointer */
+   float32_t *pCoeffs = S->pCoeffs;               /* Coefficient pointer */
+   float32_t *pStateCurnt;                        /* Points to the current sample of the state */
+   float32_t *px, *pb;                            /* Temporary pointers for state and coefficient buffers */
+   float32_t acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7;     /* Accumulators */
+   float32_t x0, x1, x2, x3, x4, x5, x6, x7, c0;  /* Temporary variables to hold state and coefficient values */
+   uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */
+   uint32_t i, tapCnt, blkCnt;                    /* Loop counters */
+   float32_t p0,p1,p2,p3,p4,p5,p6,p7;             /* Temporary product values */
+
+   /* S->pState points to state array which contains previous frame (numTaps - 1) samples */
+   /* pStateCurnt points to the location where the new input data should be written */
+   pStateCurnt = &(S->pState[(numTaps - 1u)]);
+
+   /* Apply loop unrolling and compute 8 output values simultaneously.  
+    * The variables acc0 ... acc7 hold output values that are being computed:  
+    *  
+    *    acc0 =  b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0]  
+    *    acc1 =  b[numTaps-1] * x[n-numTaps] +   b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1]  
+    *    acc2 =  b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] +   b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2]  
+    *    acc3 =  b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps]   +...+ b[0] * x[3]  
+    */
+   blkCnt = blockSize >> 3;
+
+   /* First part of the processing with loop unrolling.  Compute 8 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 7 samples. */
+   while(blkCnt > 0u)
+   {
+      /* Copy four new input samples into the state buffer */
+      *pStateCurnt++ = *pSrc++;
+      *pStateCurnt++ = *pSrc++;
+      *pStateCurnt++ = *pSrc++;
+      *pStateCurnt++ = *pSrc++;
+
+      /* Set all accumulators to zero */
+      acc0 = 0.0f;
+      acc1 = 0.0f;
+      acc2 = 0.0f;
+      acc3 = 0.0f;
+      acc4 = 0.0f;
+      acc5 = 0.0f;
+      acc6 = 0.0f;
+      acc7 = 0.0f;		
+
+      /* Initialize state pointer */
+      px = pState;
+
+      /* Initialize coeff pointer */
+      pb = (pCoeffs);		
+   
+      /* This is separated from the others to avoid 
+       * a call to __aeabi_memmove which would be slower
+       */
+      *pStateCurnt++ = *pSrc++;
+      *pStateCurnt++ = *pSrc++;
+      *pStateCurnt++ = *pSrc++;
+      *pStateCurnt++ = *pSrc++;
+
+      /* Read the first seven samples from the state buffer:  x[n-numTaps], x[n-numTaps-1], x[n-numTaps-2] */
+      x0 = *px++;
+      x1 = *px++;
+      x2 = *px++;
+      x3 = *px++;
+      x4 = *px++;
+      x5 = *px++;
+      x6 = *px++;
+
+      /* Loop unrolling.  Process 8 taps at a time. */
+      tapCnt = numTaps >> 3u;
+      
+      /* Loop over the number of taps.  Unroll by a factor of 8.  
+       ** Repeat until we've computed numTaps-8 coefficients. */
+      while(tapCnt > 0u)
+      {
+         /* Read the b[numTaps-1] coefficient */
+         c0 = *(pb++);
+
+         /* Read x[n-numTaps-3] sample */
+         x7 = *(px++);
+
+         /* acc0 +=  b[numTaps-1] * x[n-numTaps] */
+         p0 = x0 * c0;
+
+         /* acc1 +=  b[numTaps-1] * x[n-numTaps-1] */
+         p1 = x1 * c0;
+
+         /* acc2 +=  b[numTaps-1] * x[n-numTaps-2] */
+         p2 = x2 * c0;
+
+         /* acc3 +=  b[numTaps-1] * x[n-numTaps-3] */
+         p3 = x3 * c0;
+
+         /* acc4 +=  b[numTaps-1] * x[n-numTaps-4] */
+         p4 = x4 * c0;
+
+         /* acc1 +=  b[numTaps-1] * x[n-numTaps-5] */
+         p5 = x5 * c0;
+
+         /* acc2 +=  b[numTaps-1] * x[n-numTaps-6] */
+         p6 = x6 * c0;
+
+         /* acc3 +=  b[numTaps-1] * x[n-numTaps-7] */
+         p7 = x7 * c0;
+         
+         /* Read the b[numTaps-2] coefficient */
+         c0 = *(pb++);
+
+         /* Read x[n-numTaps-4] sample */
+         x0 = *(px++);
+         
+         acc0 += p0;
+         acc1 += p1;
+         acc2 += p2;
+         acc3 += p3;
+         acc4 += p4;
+         acc5 += p5;
+         acc6 += p6;
+         acc7 += p7;
+
+
+         /* Perform the multiply-accumulate */
+         p0 = x1 * c0;
+         p1 = x2 * c0;   
+         p2 = x3 * c0;   
+         p3 = x4 * c0;   
+         p4 = x5 * c0;   
+         p5 = x6 * c0;   
+         p6 = x7 * c0;   
+         p7 = x0 * c0;   
+         
+         /* Read the b[numTaps-3] coefficient */
+         c0 = *(pb++);
+
+         /* Read x[n-numTaps-5] sample */
+         x1 = *(px++);
+         
+         acc0 += p0;
+         acc1 += p1;
+         acc2 += p2;
+         acc3 += p3;
+         acc4 += p4;
+         acc5 += p5;
+         acc6 += p6;
+         acc7 += p7;
+
+         /* Perform the multiply-accumulates */      
+         p0 = x2 * c0;
+         p1 = x3 * c0;   
+         p2 = x4 * c0;   
+         p3 = x5 * c0;   
+         p4 = x6 * c0;   
+         p5 = x7 * c0;   
+         p6 = x0 * c0;   
+         p7 = x1 * c0;   
+
+         /* Read the b[numTaps-4] coefficient */
+         c0 = *(pb++);
+
+         /* Read x[n-numTaps-6] sample */
+         x2 = *(px++);
+         
+         acc0 += p0;
+         acc1 += p1;
+         acc2 += p2;
+         acc3 += p3;
+         acc4 += p4;
+         acc5 += p5;
+         acc6 += p6;
+         acc7 += p7;
+
+         /* Perform the multiply-accumulates */      
+         p0 = x3 * c0;
+         p1 = x4 * c0;   
+         p2 = x5 * c0;   
+         p3 = x6 * c0;   
+         p4 = x7 * c0;   
+         p5 = x0 * c0;   
+         p6 = x1 * c0;   
+         p7 = x2 * c0;   
+
+         /* Read the b[numTaps-4] coefficient */
+         c0 = *(pb++);
+
+         /* Read x[n-numTaps-6] sample */
+         x3 = *(px++);
+         
+         acc0 += p0;
+         acc1 += p1;
+         acc2 += p2;
+         acc3 += p3;
+         acc4 += p4;
+         acc5 += p5;
+         acc6 += p6;
+         acc7 += p7;
+
+         /* Perform the multiply-accumulates */      
+         p0 = x4 * c0;
+         p1 = x5 * c0;   
+         p2 = x6 * c0;   
+         p3 = x7 * c0;   
+         p4 = x0 * c0;   
+         p5 = x1 * c0;   
+         p6 = x2 * c0;   
+         p7 = x3 * c0;   
+
+         /* Read the b[numTaps-4] coefficient */
+         c0 = *(pb++);
+
+         /* Read x[n-numTaps-6] sample */
+         x4 = *(px++);
+         
+         acc0 += p0;
+         acc1 += p1;
+         acc2 += p2;
+         acc3 += p3;
+         acc4 += p4;
+         acc5 += p5;
+         acc6 += p6;
+         acc7 += p7;
+
+         /* Perform the multiply-accumulates */      
+         p0 = x5 * c0;
+         p1 = x6 * c0;   
+         p2 = x7 * c0;   
+         p3 = x0 * c0;   
+         p4 = x1 * c0;   
+         p5 = x2 * c0;   
+         p6 = x3 * c0;   
+         p7 = x4 * c0;   
+
+         /* Read the b[numTaps-4] coefficient */
+         c0 = *(pb++);
+
+         /* Read x[n-numTaps-6] sample */
+         x5 = *(px++);
+         
+         acc0 += p0;
+         acc1 += p1;
+         acc2 += p2;
+         acc3 += p3;
+         acc4 += p4;
+         acc5 += p5;
+         acc6 += p6;
+         acc7 += p7;
+
+         /* Perform the multiply-accumulates */      
+         p0 = x6 * c0;
+         p1 = x7 * c0;   
+         p2 = x0 * c0;   
+         p3 = x1 * c0;   
+         p4 = x2 * c0;   
+         p5 = x3 * c0;   
+         p6 = x4 * c0;   
+         p7 = x5 * c0;   
+
+         /* Read the b[numTaps-4] coefficient */
+         c0 = *(pb++);
+
+         /* Read x[n-numTaps-6] sample */
+         x6 = *(px++);
+         
+         acc0 += p0;
+         acc1 += p1;
+         acc2 += p2;
+         acc3 += p3;
+         acc4 += p4;
+         acc5 += p5;
+         acc6 += p6;
+         acc7 += p7;
+
+         /* Perform the multiply-accumulates */      
+         p0 = x7 * c0;
+         p1 = x0 * c0;   
+         p2 = x1 * c0;   
+         p3 = x2 * c0;   
+         p4 = x3 * c0;   
+         p5 = x4 * c0;   
+         p6 = x5 * c0;   
+         p7 = x6 * c0;   
+
+         tapCnt--;
+         
+         acc0 += p0;
+         acc1 += p1;
+         acc2 += p2;
+         acc3 += p3;
+         acc4 += p4;
+         acc5 += p5;
+         acc6 += p6;
+         acc7 += p7;
+      }
+
+      /* If the filter length is not a multiple of 8, compute the remaining filter taps */
+      tapCnt = numTaps % 0x8u;
+
+      while(tapCnt > 0u)
+      {
+         /* Read coefficients */
+         c0 = *(pb++);
+
+         /* Fetch 1 state variable */
+         x7 = *(px++);
+
+         /* Perform the multiply-accumulates */      
+         p0 = x0 * c0;
+         p1 = x1 * c0;   
+         p2 = x2 * c0;   
+         p3 = x3 * c0;   
+         p4 = x4 * c0;   
+         p5 = x5 * c0;   
+         p6 = x6 * c0;   
+         p7 = x7 * c0;   
+
+         /* Reuse the present sample states for next sample */
+         x0 = x1;
+         x1 = x2;
+         x2 = x3;
+         x3 = x4;
+         x4 = x5;
+         x5 = x6;
+         x6 = x7;
+         
+         acc0 += p0;
+         acc1 += p1;
+         acc2 += p2;
+         acc3 += p3;
+         acc4 += p4;
+         acc5 += p5;
+         acc6 += p6;
+         acc7 += p7;
+
+         /* Decrement the loop counter */
+         tapCnt--;
+      }
+
+      /* Advance the state pointer by 8 to process the next group of 8 samples */
+      pState = pState + 8;
+
+      /* The results in the 8 accumulators, store in the destination buffer. */
+      *pDst++ = acc0;
+      *pDst++ = acc1;
+      *pDst++ = acc2;
+      *pDst++ = acc3;
+      *pDst++ = acc4;
+      *pDst++ = acc5;
+      *pDst++ = acc6;
+      *pDst++ = acc7;
+
+      blkCnt--;
+   }
+
+   /* If the blockSize is not a multiple of 8, compute any remaining output samples here.  
+   ** No loop unrolling is used. */
+   blkCnt = blockSize % 0x8u;
+
+   while(blkCnt > 0u)
+   {
+      /* Copy one sample at a time into state buffer */
+      *pStateCurnt++ = *pSrc++;
+
+      /* Set the accumulator to zero */
+      acc0 = 0.0f;
+
+      /* Initialize state pointer */
+      px = pState;
+
+      /* Initialize Coefficient pointer */
+      pb = (pCoeffs);
+
+      i = numTaps;
+
+      /* Perform the multiply-accumulates */
+      do
+      {
+         acc0 += *px++ * *pb++;
+         i--;
+
+      } while(i > 0u);
+
+      /* The result is store in the destination buffer. */
+      *pDst++ = acc0;
+
+      /* Advance state pointer by 1 for the next sample */
+      pState = pState + 1;
+
+      blkCnt--;
+   }
+
+   /* Processing is complete.  
+   ** Now copy the last numTaps - 1 samples to the start of the state buffer.  
+   ** This prepares the state buffer for the next function call. */
+
+   /* Points to the start of the state buffer */
+   pStateCurnt = S->pState;
+
+   tapCnt = (numTaps - 1u) >> 2u;
+
+   /* copy data */
+   while(tapCnt > 0u)
+   {
+      *pStateCurnt++ = *pState++;
+      *pStateCurnt++ = *pState++;
+      *pStateCurnt++ = *pState++;
+      *pStateCurnt++ = *pState++;
+
+      /* Decrement the loop counter */
+      tapCnt--;
+   }
+
+   /* Calculate remaining number of copies */
+   tapCnt = (numTaps - 1u) % 0x4u;
+
+   /* Copy the remaining q31_t data */
+   while(tapCnt > 0u)
+   {
+      *pStateCurnt++ = *pState++;
+
+      /* Decrement the loop counter */
+      tapCnt--;
+   }
+}
+
+#endif 
+
+/**  
+* @} end of FIR group  
+*/
diff --git a/Src/arm_fir_init_f32.c b/Src/arm_fir_init_f32.c
new file mode 100644
index 0000000..92bdc9e
--- /dev/null
+++ b/Src/arm_fir_init_f32.c
@@ -0,0 +1,96 @@
+/*-----------------------------------------------------------------------------    
+* Copyright (C) 2010-2014 ARM Limited. All rights reserved.    
+*    
+* $Date:        19. March 2015
+* $Revision: 	V.1.4.5
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:        arm_fir_init_f32.c    
+*    
+* Description:  Floating-point FIR filter initialization function.    
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Redistribution and use in source and binary forms, with or without 
+* modification, are permitted provided that the following conditions
+* are met:
+*   - Redistributions of source code must retain the above copyright
+*     notice, this list of conditions and the following disclaimer.
+*   - Redistributions in binary form must reproduce the above copyright
+*     notice, this list of conditions and the following disclaimer in
+*     the documentation and/or other materials provided with the 
+*     distribution.
+*   - Neither the name of ARM LIMITED nor the names of its contributors
+*     may be used to endorse or promote products derived from this
+*     software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+* POSSIBILITY OF SUCH DAMAGE.   
+* ---------------------------------------------------------------------------*/
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupFilters    
+ */
+
+/**    
+ * @addtogroup FIR    
+ * @{    
+ */
+
+/**    
+ * @details    
+ *    
+ * @param[in,out] *S points to an instance of the floating-point FIR filter structure.    
+ * @param[in] 	  numTaps  Number of filter coefficients in the filter.    
+ * @param[in]     *pCoeffs points to the filter coefficients buffer.    
+ * @param[in]     *pState points to the state buffer.    
+ * @param[in] 	  blockSize number of samples that are processed per call.    
+ * @return 		  none.    
+ *    
+ * <b>Description:</b>    
+ * \par    
+ * <code>pCoeffs</code> points to the array of filter coefficients stored in time reversed order:    
+ * <pre>    
+ *    {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}    
+ * </pre>    
+ * \par    
+ * <code>pState</code> points to the array of state variables.    
+ * <code>pState</code> is of length <code>numTaps+blockSize-1</code> samples, where <code>blockSize</code> is the number of input samples processed by each call to <code>arm_fir_f32()</code>.    
+ */
+
+void arm_fir_init_f32(
+  arm_fir_instance_f32 * S,
+  uint16_t numTaps,
+  float32_t * pCoeffs,
+  float32_t * pState,
+  uint32_t blockSize)
+{
+  /* Assign filter taps */
+  S->numTaps = numTaps;
+
+  /* Assign coefficient pointer */
+  S->pCoeffs = pCoeffs;
+
+  /* Clear state buffer and the size of state buffer is (blockSize + numTaps - 1) */
+  memset(pState, 0, (numTaps + (blockSize - 1u)) * sizeof(float32_t));
+
+  /* Assign state pointer */
+  S->pState = pState;
+
+}
+
+/**    
+ * @} end of FIR group    
+ */
diff --git a/TNC/bm78.cpp b/TNC/bm78.cpp
new file mode 100644
index 0000000..3d28159
--- /dev/null
+++ b/TNC/bm78.cpp
@@ -0,0 +1,481 @@
+// Copyright 2016 Rob Riggs <rob@mobilinkd.com>
+// All rights reserved.
+
+#include "bm78.h"
+#include "GPIO.hpp"
+#include "Log.h"
+#include "main.h"
+
+#include "stm32l4xx_hal.h"
+
+#include <cstdint>
+#include <cstring>
+#include <algorithm>
+#include <array>
+
+extern RTC_HandleTypeDef hrtc;
+extern UART_HandleTypeDef huart3;
+
+namespace mobilinkd { namespace tnc { namespace bm78 {
+
+/**
+ * The BM78 module is a dual-mode BT3.0 & BLE5.0 UART modules.  It supports
+ * transparent data transfer in one or both modes.  The module documentation
+ * only clearly describes how to configure the modules using a (horrible)
+ * Windows application.  Microchip publishes a library for use with their
+ * PIC chips that we use as a guide for implementing our own configuration
+ * code.
+ *
+ * The module must be booted into EEPROM programming mode in order to make
+ * changes.  This mode uses 115200 baud with no hardware flow control
+ *
+ * We program the module for the following features:
+ *
+ *  - The module is set for 115200 baud with hardware flow control.
+ *  - The name is changed to TNC3.
+ *  - The BT3.0 pairing PIN is set to 1234
+ *  - The BLE5.0 pairing PIN is set to "625653" (MBLNKD on a phone pad).
+ *  - The module power setting is set as low as possible for BLE.
+ */
+
+const uint32_t BT_INIT_MAGIC = 0xc0a2;
+
+void bm78_reset()
+{
+  // Must use HAL_Delay() here as osDelay() may not be available.
+  mobilinkd::tnc::gpio::BT_RESET::off();
+  HAL_Delay(1200);
+  mobilinkd::tnc::gpio::BT_RESET::on();
+  HAL_Delay(800);
+
+}
+
+void exit_command_mode()
+{
+    gpio::BT_CMD::on();
+    bm78_reset();
+    INFO("BM78 in PASSTHROUGH mode");
+}
+
+HAL_StatusTypeDef read_response(uint8_t* buffer, uint16_t size, uint32_t timeout)
+{
+    memset(buffer, 0, size);
+
+    auto result = HAL_UART_Receive(&huart3, buffer, size - 1, timeout);
+
+    if (result == HAL_TIMEOUT) result = HAL_OK;
+
+    return result;
+}
+
+bool enter_program_mode()
+{
+    // Ensure we start out disconnected.
+    gpio::BT_CMD::off();
+    HAL_Delay(10);
+    gpio::BT_RESET::off();
+    HAL_Delay(10);      // Spec says minimum 63ns
+    gpio::BT_RESET::on();
+    HAL_Delay(200);     // I could not find timing specifications for this.
+
+    huart3.Init.HwFlowCtl = UART_HWCONTROL_NONE;
+    huart3.Init.BaudRate = 115200;
+    if (HAL_UART_Init(&huart3) != HAL_OK)
+    {
+        CxxErrorHandler();
+    }
+
+    // Read (cmd = 0x29) all 1K bytes, starting at address 0, 128 bytes at a time.
+    uint8_t cmd[] = {0x01, 0x29, 0xfc, 0x03, 0x00, 0x00, 0x80};
+
+    constexpr const uint16_t BLOCK_SIZE = 128;
+
+    for (uint16_t addr = 0; addr != 0x500; addr += BLOCK_SIZE)
+    {
+        cmd[5] = addr & 0xFF;
+        cmd[4] = (addr >> 8) & 0xFF;
+        if (HAL_UART_Transmit(&huart3, cmd, sizeof(cmd), 10) != HAL_OK)
+        {
+            ERROR("Read EEPROM transmit failed");
+            return false;
+        }
+
+        uint8_t buffer[BLOCK_SIZE + 10];
+
+        if (HAL_UART_Receive(&huart3, buffer, BLOCK_SIZE + 10, 1000) != HAL_OK)
+        {
+            ERROR("Read EEPROM receive failed");
+            return false;
+        }
+
+        for (size_t i = 0; i != BLOCK_SIZE; i += 16) {
+            printf("%04X: ", addr + i);
+            for (size_t j = 0; j != 16; j++) {
+                printf("%02X ", buffer[i + j + 10]);
+            }
+            printf("\r\n");
+            HAL_Delay(10);
+        }
+    }
+    return true;
+}
+
+bool exit_program_mode()
+{
+    auto result = true;
+
+    // Read (cmd = 0x29) all 1K bytes, starting at address 0, 128 bytes at a time.
+    uint8_t cmd[] = {0x01, 0x29, 0xfc, 0x03, 0x00, 0x00, 0x80};
+
+    constexpr const uint16_t BLOCK_SIZE = 128;
+
+    for (uint16_t addr = 0; addr != 0x500; addr += BLOCK_SIZE)
+    {
+        cmd[5] = addr & 0xFF;
+        cmd[4] = (addr >> 8) & 0xFF;
+        if (HAL_UART_Transmit(&huart3, cmd, sizeof(cmd), 10) != HAL_OK)
+        {
+            ERROR("Read EEPROM transmit failed");
+            result = false;
+            break;
+        }
+
+        uint8_t buffer[BLOCK_SIZE + 10];
+
+        if (HAL_UART_Receive(&huart3, buffer, BLOCK_SIZE + 10, 1000) != HAL_OK)
+        {
+            ERROR("Read EEPROM receive failed");
+            result = false;
+            break;
+        }
+
+        for (size_t i = 0; i != BLOCK_SIZE; i += 16) {
+            printf("%04X: ", addr + i);
+            for (size_t j = 0; j != 16; j++) {
+                int c = buffer[i + j + 10];
+                printf(" %c ", isprint(c) ? (char) c : '.');
+            }
+            printf("\r\n");
+            printf("%04X: ", addr + i);
+            for (size_t j = 0; j != 16; j++) {
+                printf("%02X ", buffer[i + j + 10]);
+            }
+            printf("\r\n");
+            HAL_Delay(10);
+        }
+    }
+
+
+    // Ensure we start out disconnected.
+    gpio::BT_CMD::on();
+    HAL_Delay(10);
+    gpio::BT_RESET::off();
+    HAL_Delay(1);       // Spec says minimum 63ns.
+    gpio::BT_RESET::on();
+    HAL_Delay(400);     // Spec says 354ms.
+
+    huart3.Init.HwFlowCtl = UART_HWCONTROL_RTS_CTS;
+    huart3.Init.BaudRate = 115200;
+    if (HAL_UART_Init(&huart3) != HAL_OK)
+    {
+        CxxErrorHandler();
+    }
+
+    return result;
+}
+
+bool set_reset()
+{
+    return true;
+}
+
+bool parse_write_result(const char* function)
+{
+    uint8_t result[7];
+
+    if (HAL_UART_Receive(&huart3, result, sizeof(result), 1000) != HAL_OK)
+    {
+        ERROR("%s receive failed", function);
+        return false;
+    }
+
+    if (result[6] != 0)
+    {
+        ERROR("%s operation failed", function);
+    } else {
+        INFO("%s succeeded", function);
+    }
+
+    return result[6] == 0;
+
+}
+
+bool set_name()
+{
+    uint8_t cmd[] = {0x01, 0x27, 0xfc, 0x13, 0x00, 0x0b, 0x10
+        , 'T', 'N', 'C', '3', ' ', 'M', 'o', 'b', 'i', 'l', 'i', 'n', 'k', 'd'
+        , 0x00, 0x00};
+
+    if (HAL_UART_Transmit(&huart3, cmd, sizeof(cmd), 10) != HAL_OK)
+    {
+        ERROR("%s transmit failed", __PRETTY_FUNCTION__);
+        return false;
+    }
+
+    return parse_write_result(__PRETTY_FUNCTION__);
+}
+
+bool set_pin()
+{
+    uint8_t cmd[] = {0x01, 0x27, 0xfc, 0x13, 0x00, 0x5b, 0x10
+        , '1', '2', '3', '4', '5', '6', 0x00, 0x00
+        , 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
+
+    if (HAL_UART_Transmit(&huart3, cmd, sizeof(cmd), 10) != HAL_OK)
+    {
+        ERROR("%s transmit failed", __PRETTY_FUNCTION__);
+        return false;
+    }
+
+    return parse_write_result(__PRETTY_FUNCTION__);
+}
+
+bool set_misc()
+{
+    // System options. Security parameters.
+    // 0x01AD: 01 04 00 19 41 00
+    uint8_t cmd[] = {0x01, 0x27, 0xfc, 0x09, 0x01, 0xad, 0x06
+        , 0x02, 0x04, 0x00, 0x59, 0x09, 0x00};
+
+    if (HAL_UART_Transmit(&huart3, cmd, sizeof(cmd), 10) != HAL_OK)
+    {
+        ERROR("%s transmit failed", __PRETTY_FUNCTION__);
+        return false;
+    }
+
+    return parse_write_result(__PRETTY_FUNCTION__);
+}
+
+bool set_le_service_name()
+{
+    // LE Service Name.
+    // 0x0217: 08 KISS TNC
+    uint8_t cmd[] = {0x01, 0x27, 0xfc, 0x0c, 0x02, 0x17, 0x09
+        , 0x08, 'K', 'I', 'S', 'S', ' ', 'T', 'N', 'C'};
+
+    if (HAL_UART_Transmit(&huart3, cmd, sizeof(cmd), 10) != HAL_OK)
+    {
+        ERROR("%s transmit failed", __PRETTY_FUNCTION__);
+        return false;
+    }
+
+    return parse_write_result(__PRETTY_FUNCTION__);
+}
+
+bool set_le_service_uuid()
+{
+    // LE Service UUID.
+    // 0x0354: 6F E6 FD 82 C1 36 65 A4 16 4E 88 55  5E 6A EB 27
+    uint8_t cmd[] = {0x01, 0x27, 0xfc, 0x13, 0x03, 0x54, 0x10
+        , 0x6F, 0xE6, 0xFD, 0x82, 0xC1, 0x36, 0x65, 0xA4
+        , 0x16, 0x4E, 0x88, 0x55, 0x5E, 0x6A, 0xEB, 0x27};
+
+    if (HAL_UART_Transmit(&huart3, cmd, sizeof(cmd), 10) != HAL_OK)
+    {
+        ERROR("%s transmit failed", __PRETTY_FUNCTION__);
+        return false;
+    }
+
+    return parse_write_result(__PRETTY_FUNCTION__);
+}
+
+bool set_le_tx_attribute_uuid()
+{
+    // TX attribute UUID.
+    // 0x0364: BB 68 1F 96 B0 01 49 AE C9 46 2A BA 02 00 00 00
+    uint8_t cmd[] = {0x01, 0x27, 0xfc, 0x13, 0x03, 0x64, 0x10
+        , 0xBB, 0x68, 0x1F, 0x96, 0xB0, 0x01, 0x49, 0xAE
+        , 0xC9, 0x46, 0x2A, 0xBA, 0x02, 0x00, 0x00, 0x00};
+
+    if (HAL_UART_Transmit(&huart3, cmd, sizeof(cmd), 10) != HAL_OK)
+    {
+        ERROR("%s transmit failed", __PRETTY_FUNCTION__);
+        return false;
+    }
+
+    return parse_write_result(__PRETTY_FUNCTION__);
+}
+
+bool set_le_rx_attribute_uuid()
+{
+    // RX attribute UUID.
+    // 0x0374: BB 68 1F 96 B0 01 49 AE C9 46 2A BA 01 00 00 00
+    uint8_t cmd[] = {0x01, 0x27, 0xfc, 0x13, 0x03, 0x74, 0x10
+        , 0xBB, 0x68, 0x1F, 0x96, 0xB0, 0x01, 0x49, 0xAE
+        , 0xC9, 0x46, 0x2A, 0xBA, 0x01, 0x00, 0x00, 0x00};
+
+    if (HAL_UART_Transmit(&huart3, cmd, sizeof(cmd), 10) != HAL_OK)
+    {
+        ERROR("%s transmit failed", __PRETTY_FUNCTION__);
+        return false;
+    }
+
+    return parse_write_result(__PRETTY_FUNCTION__);
+}
+
+bool set_le_attribute_properties()
+{
+    // RX notify; TX write/write without response.
+    // 0x0384: 10 0C
+    uint8_t cmd[] = {0x01, 0x27, 0xfc, 0x05, 0x03, 0x84, 0x02, 0x10, 0x0c};
+
+    if (HAL_UART_Transmit(&huart3, cmd, sizeof(cmd), 10) != HAL_OK)
+    {
+        ERROR("set_le_attribute_properties transmit failed");
+        return false;
+    }
+
+    return parse_write_result(__PRETTY_FUNCTION__);
+}
+
+bool set_le_manufacturer()
+{
+    // LE manufacturer string.
+    // 0x0300: Mobilinkd LLC
+    uint8_t cmd[] = {0x01, 0x27, 0xfc, 0x13, 0x03, 0x00, 0x10
+        , 'M', 'o', 'b', 'i', 'l', 'i', 'n', 'k', 'd', ' ', 'L', 'L', 'C'
+        , 0x00, 0x00, 0x00};
+
+    if (HAL_UART_Transmit(&huart3, cmd, sizeof(cmd), 10) != HAL_OK)
+    {
+        ERROR("%s transmit failed", __PRETTY_FUNCTION__);
+        return false;
+    }
+
+    return parse_write_result(__PRETTY_FUNCTION__);
+}
+
+bool set_le_advertising()
+{
+    // undocumented but required for proper advertising.
+    // 0x03A0: 31 bytes
+    uint8_t cmd[] = {0x01, 0x27, 0xfc, 0x22, 0x03, 0xa0, 0x1f
+        , 0x1B, 0x05, 0x09, 0x54, 0x4E, 0x43, 0x33, 0x11
+        , 0x06, 0x6F, 0xE6, 0xFD, 0x82, 0xC1, 0x36, 0x65
+        , 0xA4, 0x16, 0x4E, 0x88, 0x55, 0x5E, 0x6A, 0xEB
+        , 0x27, 0x02, 0x01, 0x02, 0x00, 0x00, 0x00};
+
+    if (HAL_UART_Transmit(&huart3, cmd, sizeof(cmd), 10) != HAL_OK)
+    {
+        ERROR("%s transmit failed", __PRETTY_FUNCTION__);
+        return false;
+    }
+
+    return parse_write_result(__PRETTY_FUNCTION__);
+}
+
+bool configure_le_service()
+{
+    bool result = true;
+
+    result &= set_le_service_name();
+    result &= set_le_service_uuid();
+    result &= set_le_tx_attribute_uuid();
+    result &= set_le_rx_attribute_uuid();
+    result &= set_le_attribute_properties();
+    result &= set_le_manufacturer();
+    result &= set_le_advertising();
+
+    return result;
+}
+
+/**
+ * Set the baud rate to 115200, turn on RTS/CTS flow control and then
+ * reset the BLE module.
+ *
+ * @return true on success, otherwise false.
+ */
+bool set_uart()
+{
+  huart3.Init.HwFlowCtl = UART_HWCONTROL_RTS_CTS;
+  huart3.Init.BaudRate = 115200;
+  if (HAL_UART_Init(&huart3) != HAL_OK)
+  {
+      CxxErrorHandler();
+  }
+
+  return true;
+}
+
+bool set_work()
+{
+    return true;
+}
+
+bool set_power()
+{
+  return true;
+}
+
+bool set_secure()
+{
+    return true;
+}
+
+bool set_reliable()
+{
+    return true;
+}
+
+}}} // mobilinkd::tnc::bm78
+
+int bm78_initialized()
+{
+    using namespace mobilinkd::tnc;
+    using namespace mobilinkd::tnc::bm78;
+
+    return HAL_RTCEx_BKUPRead(&hrtc, RTC_BKP_DR1) == BT_INIT_MAGIC;
+}
+
+int bm78_initialize()
+{
+    using namespace mobilinkd::tnc;
+    using namespace mobilinkd::tnc::bm78;
+
+    int result = 0;
+
+    if (!enter_program_mode()) result = 1;
+    else if (!set_name()) result = 2;
+    else if (!set_pin()) result = 3;
+    else if (!set_misc()) result = 4;
+    else if (!configure_le_service()) result = 5;
+    exit_program_mode();
+
+#if 0
+    bt_status_init();
+
+    if (not enter_command_mode()) result = 1;
+    else if (not set_uart()) result = 4;
+    else if (not set_work()) result = 7;
+    else if (not set_power()) result = 8;
+    else if (not set_secure()) result = 9;
+    else if (not set_gpio()) result = 3;
+    else if (not set_name()) result = 2;
+    else if (not set_reset()) result = 10;
+    exit_command_mode();
+#if 1
+    if (result == 0) {
+        /* Write BT_INIT_MAGIC to RTC back-up register RTC_BKP_DR1 to indicate
+           that the HC-05 module has been initialized.  */
+        HAL_PWR_EnableBkUpAccess();
+        HAL_RTCEx_BKUPWrite(&hrtc, RTC_BKP_DR1, BT_INIT_MAGIC);
+        HAL_PWR_DisableBkUpAccess();
+    }
+#endif
+#endif
+    return result;
+}
+
+void bm78_state_change(void) {}
+int bm78_disable(void) {return 0;}
+int bm78_enable(void) {return 0;}
diff --git a/TNC/bm78.h b/TNC/bm78.h
new file mode 100644
index 0000000..a7367b3
--- /dev/null
+++ b/TNC/bm78.h
@@ -0,0 +1,28 @@
+// Copyright 2016 Rob Riggs <rob@mobilinkd.com>
+// All rights reserved.
+
+#ifndef MOBILINKD__TNC__BM78_H_
+#define MOBILINKD__TNC__BM78_H_
+
+#include <stm32l4xx_hal.h>
+
+#ifdef __cplusplus
+#include <cstdint>
+
+extern "C" {
+#else
+#include <stdint.h>
+#endif
+
+void bm78_state_change(void);
+int bm78_disable(void);
+int bm78_enable(void);
+int bm78_initialized(void);
+int bm78_initialize(void);
+HAL_StatusTypeDef bm78_send(const char* data, uint16_t size, uint32_t timeout);
+
+#ifdef __cplusplus
+}
+#endif // __cplusplus
+
+#endif // MOBILINKD__TNC__BM78_H_
diff --git a/stlink-tnc3.cfg b/stlink-tnc3.cfg
new file mode 100644
index 0000000..2c95e4f
--- /dev/null
+++ b/stlink-tnc3.cfg
@@ -0,0 +1,13 @@
+# This is an TNC3 with a STM32L433CCU6 chip.
+
+source [find interface/stlink-v2-1.cfg]
+
+transport select hla_swd
+
+set WORKAREASIZE 0x4000
+source [find target/stm32l4x.cfg]
+
+reset_config srst_only
+itm port 0 on
+tpiu config internal swv uart off 48000000
+
diff --git a/stm32l4x.cfg b/stm32l4x.cfg
new file mode 100644
index 0000000..296507d
--- /dev/null
+++ b/stm32l4x.cfg
@@ -0,0 +1,111 @@
+# script for stm32l4x family
+
+#
+# stm32l4 devices support both JTAG and SWD transports.
+#
+source [find target/swj-dp.tcl]
+source [find mem_helper.tcl]
+
+if { [info exists CHIPNAME] } {
+   set _CHIPNAME $CHIPNAME
+} else {
+   set _CHIPNAME stm32l4x
+}
+
+set _ENDIAN little
+
+# Work-area is a space in RAM used for flash programming
+# Smallest current target has 64kB ram, use 32kB by default to avoid surprises
+if { [info exists WORKAREASIZE] } {
+   set _WORKAREASIZE $WORKAREASIZE
+} else {
+   set _WORKAREASIZE 0x8000
+}
+
+#jtag scan chain
+if { [info exists CPUTAPID] } {
+   set _CPUTAPID $CPUTAPID
+} else {
+   if { [using_jtag] } {
+      # See STM Document RM0351
+      # Section 44.6.3 - corresponds to Cortex-M4 r0p1
+      set _CPUTAPID 0x4ba00477
+   } {
+      set _CPUTAPID 0x2ba01477
+   }
+}
+
+swj_newdap $_CHIPNAME cpu -irlen 4 -ircapture 0x1 -irmask 0xf -expected-id $_CPUTAPID
+
+if {[using_jtag]} {
+   jtag newtap $_CHIPNAME bs -irlen 5
+}
+
+set _TARGETNAME $_CHIPNAME.cpu
+target create $_TARGETNAME cortex_m -endian $_ENDIAN -chain-position $_TARGETNAME
+
+$_TARGETNAME configure -work-area-phys 0x20000000 -work-area-size $_WORKAREASIZE -work-area-backup 0
+
+set _FLASHNAME $_CHIPNAME.flash
+flash bank $_FLASHNAME stm32l4x 0 0 0 0 $_TARGETNAME
+
+# Common knowledges tells JTAG speed should be <= F_CPU/6.
+# F_CPU after reset is MSI 4MHz, so use F_JTAG = 500 kHz to stay on
+# the safe side.
+#
+# Note that there is a pretty wide band where things are
+# more or less stable, see http://openocd.zylin.com/#/c/3366/
+adapter_khz 500
+
+adapter_nsrst_delay 100
+if {[using_jtag]} {
+ jtag_ntrst_delay 100
+}
+
+reset_config srst_nogate
+
+if {![using_hla]} {
+   # if srst is not fitted use SYSRESETREQ to
+   # perform a soft reset
+   cortex_m reset_config sysresetreq
+}
+
+$_TARGETNAME configure -event reset-init {
+	# CPU comes out of reset with MSI_ON | MSI_RDY | MSI Range 6 (4 MHz).
+	# Use MSI 24 MHz clock, compliant even with VOS == 2.
+	# 3 WS compliant with VOS == 2 and 24 MHz.
+	mww 0x40022000 0x00000103   ;# FLASH_ACR = PRFTBE | 3(Latency)
+	mww 0x40021000 0x00000099   ;# RCC_CR = MSI_ON | MSIRGSEL| MSI Range 10
+	# Boost JTAG frequency
+	adapter_khz 4000
+}
+
+$_TARGETNAME configure -event reset-start {
+	# Reset clock is MSI (4 MHz)
+	adapter_khz 500
+}
+
+$_TARGETNAME configure -event examine-end {
+	# DBGMCU_CR |= DBG_STANDBY | DBG_STOP | DBG_SLEEP
+	mmw 0xE0042004 0x00000007 0
+
+	# Stop watchdog counters during halt
+	# DBGMCU_APB1_FZ |= DBG_IWDG_STOP | DBG_WWDG_STOP
+	mmw 0xE0042008 0x00001800 0
+}
+
+$_TARGETNAME configure -event trace-config {
+	# Set TRACE_IOEN; TRACE_MODE is set to async; when using sync
+	# change this value accordingly to configure trace pins
+	# assignment
+	mmw 0xE0042004 0x00000020 0
+}
+
+$_TARGETNAME configure -event gdb-attach {
+   halt
+}
+ 
+$_TARGETNAME configure -event gdb-attach {
+   reset init
+}
+