From 478274aff13249065523bffb59094c9c9e893a50 Mon Sep 17 00:00:00 2001 From: Jm Casler Date: Mon, 11 Apr 2022 20:09:48 -0700 Subject: [PATCH 1/6] Beginning of compression --- src/mesh/Router.cpp | 49 +- src/mesh/compression/unishox2.c | 1345 +++++++++++++++++++++++++++++++ src/mesh/compression/unishox2.h | 278 +++++++ 3 files changed, 1660 insertions(+), 12 deletions(-) create mode 100644 src/mesh/compression/unishox2.c create mode 100644 src/mesh/compression/unishox2.h diff --git a/src/mesh/Router.cpp b/src/mesh/Router.cpp index 08de01ccd..34ab85a26 100644 --- a/src/mesh/Router.cpp +++ b/src/mesh/Router.cpp @@ -1,12 +1,15 @@ -#include "configuration.h" #include "Router.h" #include "Channels.h" #include "CryptoEngine.h" #include "NodeDB.h" #include "RTC.h" +#include "configuration.h" #include "main.h" #include "mesh-pb-constants.h" #include "modules/RoutingModule.h" +extern "C" { +#include "mesh/compression/unishox2.h" +} #if defined(HAS_WIFI) || defined(PORTDUINO) #include "mqtt/MQTT.h" @@ -210,29 +213,27 @@ ErrorCode Router::send(MeshPacket *p) if (p->which_payloadVariant == MeshPacket_decoded_tag) { ChannelIndex chIndex = p->channel; // keep as a local because we are about to change it - - #if defined(HAS_WIFI) || defined(PORTDUINO) - //check if we should send decrypted packets to mqtt + // check if we should send decrypted packets to mqtt - //truth table: + // truth table: /* mqtt_server mqtt_encryption_enabled should_encrypt * not set 0 1 * not set 1 1 * set 0 0 * set 1 1 - * + * * => so we only decrypt mqtt if they have a custom mqtt server AND mqtt_encryption_enabled is FALSE - */ + */ bool shouldActuallyEncrypt = true; if (*radioConfig.preferences.mqtt_server && !radioConfig.preferences.mqtt_encryption_enabled) { shouldActuallyEncrypt = false; } - + DEBUG_MSG("Should encrypt MQTT?: %d\n", shouldActuallyEncrypt); - //the packet is currently in a decrypted state. send it now if they want decrypted packets + // the packet is currently in a decrypted state. send it now if they want decrypted packets if (mqtt && !shouldActuallyEncrypt) mqtt->onSend(*p, chIndex); #endif @@ -244,8 +245,8 @@ ErrorCode Router::send(MeshPacket *p) } #if defined(HAS_WIFI) || defined(PORTDUINO) - //the packet is now encrypted. - //check if we should send encrypted packets to mqtt + // the packet is now encrypted. + // check if we should send encrypted packets to mqtt if (mqtt && shouldActuallyEncrypt) mqtt->onSend(*p, chIndex); #endif @@ -276,7 +277,7 @@ bool perhapsDecode(MeshPacket *p) if (p->which_payloadVariant == MeshPacket_decoded_tag) return true; // If packet was already decoded just return - //assert(p->which_payloadVariant == MeshPacket_encrypted_tag); + // assert(p->which_payloadVariant == MeshPacket_encrypted_tag); // Try to find a channel that works with this hash for (ChannelIndex chIndex = 0; chIndex < channels.getNumChannels(); chIndex++) { @@ -324,6 +325,30 @@ Routing_Error perhapsEncode(MeshPacket *p) size_t numbytes = pb_encode_to_bytes(bytes, sizeof(bytes), Data_fields, &p->decoded); + if (1) { + // int orig_len, compressed_len; + int compressed_len; + char compressed_out[100] = {0}; + // char *orig = (char *)"Hiiiiiiii! :) How are you doing? I am doing well."; + // char *orig = (char *)"Meshtastic"; + + // orig_len = strlen(orig); + // compressed_len = unishox2_compress_simple(orig, orig_len, compressed_out); + compressed_len = unishox2_compress_simple((char *)bytes, numbytes, compressed_out); + + Serial.println(compressed_len); + Serial.println(compressed_out); + //&p->decoded.portnum; + + char decompressed_out[100] = {}; + int decompressed_len; + + decompressed_len = unishox2_decompress_simple(compressed_out, compressed_len, decompressed_out); + + Serial.println(decompressed_len); + Serial.println(decompressed_out); + } + if (numbytes > MAX_RHPACKETLEN) return Routing_Error_TOO_LARGE; diff --git a/src/mesh/compression/unishox2.c b/src/mesh/compression/unishox2.c new file mode 100644 index 000000000..e9d02e8a5 --- /dev/null +++ b/src/mesh/compression/unishox2.c @@ -0,0 +1,1345 @@ +/* + * Copyright (C) 2020 Siara Logics (cc) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * @author Arundale Ramanathan + * + */ +/** + * @file unishox2.c + * @author Arundale Ramanathan, James Z. M. Gao + * @brief Main code of Unishox2 Compression and Decompression library + * + * This file implements the code for the Unishox API function \n + * defined in unishox2.h + */ + +#include +#include +#include +#include +#include + +#include "unishox2.h" + +/// byte is unsigned char +typedef unsigned char byte; + +/// possible horizontal sets and states +enum {USX_ALPHA = 0, USX_SYM, USX_NUM, USX_DICT, USX_DELTA, USX_NUM_TEMP}; + +/// This 2D array has the characters for the sets USX_ALPHA, USX_SYM and USX_NUM. Where a character cannot fit into a byte, 0 is used and handled in code. +byte usx_sets[][28] = {{ 0, ' ', 'e', 't', 'a', 'o', 'i', 'n', + 's', 'r', 'l', 'c', 'd', 'h', 'u', 'p', 'm', 'b', + 'g', 'w', 'f', 'y', 'v', 'k', 'q', 'j', 'x', 'z'}, + {'"', '{', '}', '_', '<', '>', ':', '\n', + 0, '[', ']', '\\', ';', '\'', '\t', '@', '*', '&', + '?', '!', '^', '|', '\r', '~', '`', 0, 0, 0}, + { 0, ',', '.', '0', '1', '9', '2', '5', '-', + '/', '3', '4', '6', '7', '8', '(', ')', ' ', + '=', '+', '$', '%', '#', 0, 0, 0, 0, 0}}; + +/// Stores position of letter in usx_sets. +/// First 3 bits - position in usx_hcodes +/// Next 5 bits - position in usx_vcodes +byte usx_code_94[94]; + +/// Vertical codes starting from the MSB +byte usx_vcodes[] = { 0x00, 0x40, 0x60, 0x80, 0x90, 0xA0, 0xB0, + 0xC0, 0xD0, 0xD8, 0xE0, 0xE4, 0xE8, 0xEC, + 0xEE, 0xF0, 0xF2, 0xF4, 0xF6, 0xF7, 0xF8, + 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF }; + +/// Length of each veritical code +byte usx_vcode_lens[] = { 2, 3, 3, 4, 4, 4, 4, + 4, 5, 5, 6, 6, 6, 7, + 7, 7, 7, 7, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8 }; + +/// Vertical Codes and Set number for frequent sequences in sets USX_SYM and USX_NUM. First 3 bits indicate set (USX_SYM/USX_NUM) and rest are vcode positions +byte usx_freq_codes[] = {(1 << 5) + 25, (1 << 5) + 26, (1 << 5) + 27, (2 << 5) + 23, (2 << 5) + 24, (2 << 5) + 25}; + +/// Not used +const int UTF8_MASK[] = {0xE0, 0xF0, 0xF8}; +/// Not used +const int UTF8_PREFIX[] = {0xC0, 0xE0, 0xF0}; + +/// Minimum length to consider as repeating sequence +#define NICE_LEN 5 + +/// Set (USX_NUM - 2) and vertical code (26) for encoding repeating letters +#define RPT_CODE ((2 << 5) + 26) +/// Set (USX_NUM - 2) and vertical code (27) for encoding terminator +#define TERM_CODE ((2 << 5) + 27) +/// Set (USX_SYM - 1) and vertical code (7) for encoding Line feed \\n +#define LF_CODE ((1 << 5) + 7) +/// Set (USX_NUM - 1) and vertical code (8) for encoding \\r\\n +#define CRLF_CODE ((1 << 5) + 8) +/// Set (USX_NUM - 1) and vertical code (22) for encoding \\r +#define CR_CODE ((1 << 5) + 22) +/// Set (USX_NUM - 1) and vertical code (14) for encoding \\t +#define TAB_CODE ((1 << 5) + 14) +/// Set (USX_NUM - 2) and vertical code (17) for space character when it appears in USX_NUM state \\r +#define NUM_SPC_CODE ((2 << 5) + 17) + +/// Code for special code (11111) when state=USX_DELTA +#define UNI_STATE_SPL_CODE 0xF8 +/// Length of Code for special code when state=USX_DELTA +#define UNI_STATE_SPL_CODE_LEN 5 +/// Code for switch code when state=USX_DELTA +#define UNI_STATE_SW_CODE 0x80 +/// Length of Code for Switch code when state=USX_DELTA +#define UNI_STATE_SW_CODE_LEN 2 + +/// Switch code in USX_ALPHA and USX_NUM 00 +#define SW_CODE 0 +/// Length of Switch code +#define SW_CODE_LEN 2 +/// Terminator bit sequence for Preset 1. Length varies depending on state as per following macros +#define TERM_BYTE_PRESET_1 0 +/// Length of Terminator bit sequence when state is lower +#define TERM_BYTE_PRESET_1_LEN_LOWER 6 +/// Length of Terminator bit sequence when state is upper +#define TERM_BYTE_PRESET_1_LEN_UPPER 4 + +/// Offset at which usx_code_94 starts +#define USX_OFFSET_94 33 + +/// global to indicate whether initialization is complete or not +byte is_inited = 0; + +/// Fills the usx_code_94 94 letter array based on sets of characters at usx_sets \n +/// For each element in usx_code_94, first 3 msb bits is set (USX_ALPHA / USX_SYM / USX_NUM) \n +/// and the rest 5 bits indicate the vertical position in the corresponding set +void init_coder() { + if (is_inited) + return; + memset(usx_code_94, '\0', sizeof(usx_code_94)); + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 28; j++) { + byte c = usx_sets[i][j]; + if (c != 0 && c > 32) { + usx_code_94[c - USX_OFFSET_94] = (i << 5) + j; + if (c >= 'a' && c <= 'z') + usx_code_94[c - USX_OFFSET_94 - ('a' - 'A')] = (i << 5) + j; + } + } + } + is_inited = 1; +} + +/// Mask for retrieving each code to be encoded according to its length +unsigned int usx_mask[] = {0x80, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE, 0xFF}; + +/// Appends specified number of bits to the output (out) \n +/// If maximum limit (olen) is reached, -1 is returned \n +/// Otherwise clen bits in code are appended to out starting with MSB +int append_bits(char *out, int olen, int ol, byte code, int clen) { + + byte cur_bit; + byte blen; + unsigned char a_byte; + int oidx; + + //printf("%d,%x,%d,%d\n", ol, code, clen, state); + + while (clen > 0) { + cur_bit = ol % 8; + blen = clen; + a_byte = code & usx_mask[blen - 1]; + a_byte >>= cur_bit; + if (blen + cur_bit > 8) + blen = (8 - cur_bit); + oidx = ol / 8; + if (oidx < 0 || olen <= oidx) + return -1; + if (cur_bit == 0) + out[oidx] = a_byte; + else + out[oidx] |= a_byte; + code <<= blen; + ol += blen; + clen -= blen; + } + return ol; +} + +/// This is a safe call to append_bits() making sure it does not write past olen +#define SAFE_APPEND_BITS(exp) do { \ + const int newidx = (exp); \ + if (newidx < 0) return newidx; \ +} while (0) + +/// Appends switch code to out depending on the state (USX_DELTA or other) +int append_switch_code(char *out, int olen, int ol, byte state) { + if (state == USX_DELTA) { + SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, UNI_STATE_SPL_CODE, UNI_STATE_SPL_CODE_LEN)); + SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, UNI_STATE_SW_CODE, UNI_STATE_SW_CODE_LEN)); + } else + SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, SW_CODE, SW_CODE_LEN)); + return ol; +} + +/// Appends given horizontal and veritical code bits to out +int append_code(char *out, int olen, int ol, byte code, byte *state, const byte usx_hcodes[], const byte usx_hcode_lens[]) { + byte hcode = code >> 5; + byte vcode = code & 0x1F; + if (!usx_hcode_lens[hcode] && hcode != USX_ALPHA) + return ol; + switch (hcode) { + case USX_ALPHA: + if (*state != USX_ALPHA) { + SAFE_APPEND_BITS(ol = append_switch_code(out, olen, ol, *state)); + SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, usx_hcodes[USX_ALPHA], usx_hcode_lens[USX_ALPHA])); + *state = USX_ALPHA; + } + break; + case USX_SYM: + SAFE_APPEND_BITS(ol = append_switch_code(out, olen, ol, *state)); + SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, usx_hcodes[USX_SYM], usx_hcode_lens[USX_SYM])); + break; + case USX_NUM: + if (*state != USX_NUM) { + SAFE_APPEND_BITS(ol = append_switch_code(out, olen, ol, *state)); + SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, usx_hcodes[USX_NUM], usx_hcode_lens[USX_NUM])); + if (usx_sets[hcode][vcode] >= '0' && usx_sets[hcode][vcode] <= '9') + *state = USX_NUM; + } + } + SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, usx_vcodes[vcode], usx_vcode_lens[vcode])); + return ol; +} + +/// Length of bits used to represent count for each level +const byte count_bit_lens[5] = {2, 4, 7, 11, 16}; +/// Cumulative counts represented at each level +const int32_t count_adder[5] = {4, 20, 148, 2196, 67732}; +/// Codes used to specify the level that the count belongs to +const byte count_codes[] = {0x01, 0x82, 0xC3, 0xE4, 0xF4}; +/// Encodes given count to out +int encodeCount(char *out, int olen, int ol, int count) { + // First five bits are code and Last three bits of codes represent length + for (int i = 0; i < 5; i++) { + if (count < count_adder[i]) { + SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, (count_codes[i] & 0xF8), count_codes[i] & 0x07)); + uint16_t count16 = (count - (i ? count_adder[i - 1] : 0)) << (16 - count_bit_lens[i]); + if (count_bit_lens[i] > 8) { + SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, count16 >> 8, 8)); + SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, count16 & 0xFF, count_bit_lens[i] - 8)); + } else + SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, count16 >> 8, count_bit_lens[i])); + return ol; + } + } + return ol; +} + +/// Length of bits used to represent delta code for each level +const byte uni_bit_len[5] = {6, 12, 14, 16, 21}; +/// Cumulative delta codes represented at each level +const int32_t uni_adder[5] = {0, 64, 4160, 20544, 86080}; + +/// Encodes the unicode code point given by code to out. prev_code is used to calculate the delta +int encodeUnicode(char *out, int olen, int ol, int32_t code, int32_t prev_code) { + // First five bits are code and Last three bits of codes represent length + //const byte codes[8] = {0x00, 0x42, 0x83, 0xA3, 0xC3, 0xE4, 0xF5, 0xFD}; + const byte codes[6] = {0x01, 0x82, 0xC3, 0xE4, 0xF5, 0xFD}; + int32_t till = 0; + int32_t diff = code - prev_code; + if (diff < 0) + diff = -diff; + //printf("%ld, ", code); + //printf("Diff: %d\n", diff); + for (int i = 0; i < 5; i++) { + till += (1 << uni_bit_len[i]); + if (diff < till) { + SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, (codes[i] & 0xF8), codes[i] & 0x07)); + //if (diff) { + SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, prev_code > code ? 0x80 : 0, 1)); + int32_t val = diff - uni_adder[i]; + //printf("Val: %d\n", val); + if (uni_bit_len[i] > 16) { + val <<= (24 - uni_bit_len[i]); + SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, val >> 16, 8)); + SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, (val >> 8) & 0xFF, 8)); + SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, val & 0xFF, uni_bit_len[i] - 16)); + } else + if (uni_bit_len[i] > 8) { + val <<= (16 - uni_bit_len[i]); + SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, val >> 8, 8)); + SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, val & 0xFF, uni_bit_len[i] - 8)); + } else { + val <<= (8 - uni_bit_len[i]); + SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, val & 0xFF, uni_bit_len[i])); + } + return ol; + } + } + return ol; +} + +/// Reads UTF-8 character from in. Also returns the number of bytes occupied by the UTF-8 character in utf8len +int32_t readUTF8(const char *in, int len, int l, int *utf8len) { + int32_t ret = 0; + if (l < (len - 1) && (in[l] & 0xE0) == 0xC0 && (in[l + 1] & 0xC0) == 0x80) { + *utf8len = 2; + ret = (in[l] & 0x1F); + ret <<= 6; + ret += (in[l + 1] & 0x3F); + if (ret < 0x80) + ret = 0; + } else + if (l < (len - 2) && (in[l] & 0xF0) == 0xE0 && (in[l + 1] & 0xC0) == 0x80 + && (in[l + 2] & 0xC0) == 0x80) { + *utf8len = 3; + ret = (in[l] & 0x0F); + ret <<= 6; + ret += (in[l + 1] & 0x3F); + ret <<= 6; + ret += (in[l + 2] & 0x3F); + if (ret < 0x0800) + ret = 0; + } else + if (l < (len - 3) && (in[l] & 0xF8) == 0xF0 && (in[l + 1] & 0xC0) == 0x80 + && (in[l + 2] & 0xC0) == 0x80 && (in[l + 3] & 0xC0) == 0x80) { + *utf8len = 4; + ret = (in[l] & 0x07); + ret <<= 6; + ret += (in[l + 1] & 0x3F); + ret <<= 6; + ret += (in[l + 2] & 0x3F); + ret <<= 6; + ret += (in[l + 3] & 0x3F); + if (ret < 0x10000) + ret = 0; + } + return ret; +} + +/// Finds the longest matching sequence from the beginning of the string. \n +/// If a match is found and it is longer than NICE_LEN, it is encoded as a repeating sequence to out \n +/// This is also used for Unicode strings \n +/// This is a crude implementation that is not optimized. Assuming only short strings \n +/// are encoded, this is not much of an issue. +int matchOccurance(const char *in, int len, int l, char *out, int olen, int *ol, byte *state, const byte usx_hcodes[], const byte usx_hcode_lens[]) { + int j, k; + int longest_dist = 0; + int longest_len = 0; + for (j = l - NICE_LEN; j >= 0; j--) { + for (k = l; k < len && j + k - l < l; k++) { + if (in[k] != in[j + k - l]) + break; + } + while ((((unsigned char) in[k]) >> 6) == 2) + k--; // Skip partial UTF-8 matches + //if ((in[k - 1] >> 3) == 0x1E || (in[k - 1] >> 4) == 0x0E || (in[k - 1] >> 5) == 0x06) + // k--; + if ((k - l) > (NICE_LEN - 1)) { + int match_len = k - l - NICE_LEN; + int match_dist = l - j - NICE_LEN + 1; + if (match_len > longest_len) { + longest_len = match_len; + longest_dist = match_dist; + } + } + } + if (longest_len) { + SAFE_APPEND_BITS(*ol = append_switch_code(out, olen, *ol, *state)); + SAFE_APPEND_BITS(*ol = append_bits(out, olen, *ol, usx_hcodes[USX_DICT], usx_hcode_lens[USX_DICT])); + //printf("Len:%d / Dist:%d/%.*s\n", longest_len, longest_dist, longest_len + NICE_LEN, in + l - longest_dist - NICE_LEN + 1); + SAFE_APPEND_BITS(*ol = encodeCount(out, olen, *ol, longest_len)); + SAFE_APPEND_BITS(*ol = encodeCount(out, olen, *ol, longest_dist)); + l += (longest_len + NICE_LEN); + l--; + return l; + } + return -l; +} + +/// This is used only when encoding a string array +/// Finds the longest matching sequence from the previous array element to the beginning of the string array. \n +/// If a match is found and it is longer than NICE_LEN, it is encoded as a repeating sequence to out \n +/// This is also used for Unicode strings \n +/// This is a crude implementation that is not optimized. Assuming only short strings \n +/// are encoded, this is not much of an issue. +int matchLine(const char *in, int len, int l, char *out, int olen, int *ol, struct us_lnk_lst *prev_lines, byte *state, const byte usx_hcodes[], const byte usx_hcode_lens[]) { + int last_ol = *ol; + int last_len = 0; + int last_dist = 0; + int last_ctx = 0; + int line_ctr = 0; + int j = 0; + do { + int i, k; + int line_len = (int)strlen(prev_lines->data); + int limit = (line_ctr == 0 ? l : line_len); + for (; j < limit; j++) { + for (i = l, k = j; k < line_len && i < len; k++, i++) { + if (prev_lines->data[k] != in[i]) + break; + } + while ((((unsigned char) prev_lines->data[k]) >> 6) == 2) + k--; // Skip partial UTF-8 matches + if ((k - j) >= NICE_LEN) { + if (last_len) { + if (j > last_dist) + continue; + //int saving = ((k - j) - last_len) + (last_dist - j) + (last_ctx - line_ctr); + //if (saving < 0) { + // //printf("No savng: %d\n", saving); + // continue; + //} + *ol = last_ol; + } + last_len = (k - j); + last_dist = j; + last_ctx = line_ctr; + SAFE_APPEND_BITS(*ol = append_switch_code(out, olen, *ol, *state)); + SAFE_APPEND_BITS(*ol = append_bits(out, olen, *ol, usx_hcodes[USX_DICT], usx_hcode_lens[USX_DICT])); + SAFE_APPEND_BITS(*ol = encodeCount(out, olen, *ol, last_len - NICE_LEN)); + SAFE_APPEND_BITS(*ol = encodeCount(out, olen, *ol, last_dist)); + SAFE_APPEND_BITS(*ol = encodeCount(out, olen, *ol, last_ctx)); + /* + if ((*ol - last_ol) > (last_len * 4)) { + last_len = 0; + *ol = last_ol; + }*/ + //printf("Len: %d, Dist: %d, Line: %d\n", last_len, last_dist, last_ctx); + j += last_len; + } + } + line_ctr++; + prev_lines = prev_lines->previous; + } while (prev_lines && prev_lines->data != NULL); + if (last_len) { + l += last_len; + l--; + return l; + } + return -l; +} + +/// Returns 4 bit code assuming ch falls between '0' to '9', \n +/// 'A' to 'F' or 'a' to 'f' +byte getBaseCode(char ch) { + if (ch >= '0' && ch <= '9') + return (ch - '0') << 4; + else if (ch >= 'A' && ch <= 'F') + return (ch - 'A' + 10) << 4; + else if (ch >= 'a' && ch <= 'f') + return (ch - 'a' + 10) << 4; + return 0; +} + +/// Enum indicating nibble type - USX_NIB_NUM means ch is a number '0' to '9', \n +/// USX_NIB_HEX_LOWER means ch is between 'a' to 'f', \n +/// USX_NIB_HEX_UPPER means ch is between 'A' to 'F' +enum {USX_NIB_NUM = 0, USX_NIB_HEX_LOWER, USX_NIB_HEX_UPPER, USX_NIB_NOT}; +/// Gets 4 bit code assuming ch falls between '0' to '9', \n +/// 'A' to 'F' or 'a' to 'f' +char getNibbleType(char ch) { + if (ch >= '0' && ch <= '9') + return USX_NIB_NUM; + else if (ch >= 'a' && ch <= 'f') + return USX_NIB_HEX_LOWER; + else if (ch >= 'A' && ch <= 'F') + return USX_NIB_HEX_UPPER; + return USX_NIB_NOT; +} + +/// Starts coding of nibble sets +int append_nibble_escape(char *out, int olen, int ol, byte state, const byte usx_hcodes[], const byte usx_hcode_lens[]) { + SAFE_APPEND_BITS(ol = append_switch_code(out, olen, ol, state)); + SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, usx_hcodes[USX_NUM], usx_hcode_lens[USX_NUM])); + SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, 0, 2)); + return ol; +} + +/// Returns minimum value of two longs +long min_of(long c, long i) { + return c > i ? i : c; +} + +/// Appends the terminator code depending on the state, preset and whether full terminator needs to be encoded to out or not \n +int append_final_bits(char *const out, const int olen, int ol, const byte state, const byte is_all_upper, const byte usx_hcodes[], const byte usx_hcode_lens[]) { + if (usx_hcode_lens[USX_ALPHA]) { + if (USX_NUM != state) { + // for num state, append TERM_CODE directly + // for other state, switch to Num Set first + SAFE_APPEND_BITS(ol = append_switch_code(out, olen, ol, state)); + SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, usx_hcodes[USX_NUM], usx_hcode_lens[USX_NUM])); + } + SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, usx_vcodes[TERM_CODE & 0x1F], usx_vcode_lens[TERM_CODE & 0x1F])); + } else { + // preset 1, terminate at 2 or 3 SW_CODE, i.e., 4 or 6 continuous 0 bits + // see discussion: https://github.com/siara-cc/Unishox/issues/19#issuecomment-922435580 + SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, TERM_BYTE_PRESET_1, is_all_upper ? TERM_BYTE_PRESET_1_LEN_UPPER : TERM_BYTE_PRESET_1_LEN_LOWER)); + } + + // fill byte with the last bit + SAFE_APPEND_BITS(ol = append_bits(out, olen, ol, (ol == 0 || out[(ol-1)/8] << ((ol-1)&7) >= 0) ? 0 : 0xFF, (8 - ol % 8) & 7)); + + return ol; +} + +/// Macro used in the main compress function so that if the output len exceeds given maximum length (olen) it can exit +#define SAFE_APPEND_BITS2(olen, exp) do { \ + const int newidx = (exp); \ + const int __olen = (olen); \ + if (newidx < 0) return __olen >= 0 ? __olen + 1 : (1 - __olen) * 4; \ +} while (0) + +// Main API function. See unishox2.h for documentation +int unishox2_compress_lines(const char *in, int len, UNISHOX_API_OUT_AND_LEN(char *out, int olen), const byte usx_hcodes[], const byte usx_hcode_lens[], const char *usx_freq_seq[], const char *usx_templates[], struct us_lnk_lst *prev_lines) { + + byte state; + + int l, ll, ol; + char c_in, c_next; + int prev_uni; + byte is_upper, is_all_upper; +#if (UNISHOX_API_OUT_AND_LEN(0,1)) == 0 + const int olen = INT_MAX - 1; + const int rawolen = olen; + const byte need_full_term_codes = 0; +#else + const int rawolen = olen; + byte need_full_term_codes = 0; + if (olen < 0) { + need_full_term_codes = 1; + olen *= -1; + } +#endif + + init_coder(); + ol = 0; + prev_uni = 0; + state = USX_ALPHA; + is_all_upper = 0; + SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, UNISHOX_MAGIC_BITS, UNISHOX_MAGIC_BIT_LEN)); // magic bit(s) + for (l=0; l 0) { + continue; + } else if (l < 0 && ol < 0) { + return olen + 1; + } + l = -l; + } else { + l = matchOccurance(in, len, l, out, olen, &ol, &state, usx_hcodes, usx_hcode_lens); + if (l > 0) { + continue; + } else if (l < 0 && ol < 0) { + return olen + 1; + } + l = -l; + } + } + + c_in = in[l]; + if (l && len > 4 && l < (len - 4) && usx_hcode_lens[USX_NUM]) { + if (c_in == in[l - 1] && c_in == in[l + 1] && c_in == in[l + 2] && c_in == in[l + 3]) { + int rpt_count = l + 4; + while (rpt_count < len && in[rpt_count] == c_in) + rpt_count++; + rpt_count -= l; + SAFE_APPEND_BITS2(rawolen, ol = append_code(out, olen, ol, RPT_CODE, &state, usx_hcodes, usx_hcode_lens)); + SAFE_APPEND_BITS2(rawolen, ol = encodeCount(out, olen, ol, rpt_count - 4)); + l += rpt_count; + l--; + continue; + } + } + + if (l <= (len - 36) && usx_hcode_lens[USX_NUM]) { + if (in[l + 8] == '-' && in[l + 13] == '-' && in[l + 18] == '-' && in[l + 23] == '-') { + char hex_type = USX_NIB_NUM; + int uid_pos = l; + for (; uid_pos < l + 36; uid_pos++) { + char c_uid = in[uid_pos]; + if (c_uid == '-' && (uid_pos == 8 || uid_pos == 13 || uid_pos == 18 || uid_pos == 23)) + continue; + char nib_type = getNibbleType(c_uid); + if (nib_type == USX_NIB_NOT) + break; + if (nib_type != USX_NIB_NUM) { + if (hex_type != USX_NIB_NUM && hex_type != nib_type) + break; + hex_type = nib_type; + } + } + if (uid_pos == l + 36) { + SAFE_APPEND_BITS2(rawolen, ol = append_nibble_escape(out, olen, ol, state, usx_hcodes, usx_hcode_lens)); + SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, (hex_type == USX_NIB_HEX_LOWER ? 0xC0 : 0xF0), + (hex_type == USX_NIB_HEX_LOWER ? 3 : 5))); + for (uid_pos = l; uid_pos < l + 36; uid_pos++) { + char c_uid = in[uid_pos]; + if (c_uid != '-') + SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, getBaseCode(c_uid), 4)); + } + //printf("GUID:\n"); + l += 35; + continue; + } + } + } + + if (l < (len - 5) && usx_hcode_lens[USX_NUM]) { + char hex_type = USX_NIB_NUM; + int hex_len = 0; + do { + char nib_type = getNibbleType(in[l + hex_len]); + if (nib_type == USX_NIB_NOT) + break; + if (nib_type != USX_NIB_NUM) { + if (hex_type != USX_NIB_NUM && hex_type != nib_type) + break; + hex_type = nib_type; + } + hex_len++; + } while (l + hex_len < len); + if (hex_len > 10 && hex_type == USX_NIB_NUM) + hex_type = USX_NIB_HEX_LOWER; + if ((hex_type == USX_NIB_HEX_LOWER || hex_type == USX_NIB_HEX_UPPER) && hex_len > 3) { + SAFE_APPEND_BITS2(rawolen, ol = append_nibble_escape(out, olen, ol, state, usx_hcodes, usx_hcode_lens)); + SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, (hex_type == USX_NIB_HEX_LOWER ? 0x80 : 0xE0), (hex_type == USX_NIB_HEX_LOWER ? 2 : 4))); + SAFE_APPEND_BITS2(rawolen, ol = encodeCount(out, olen, ol, hex_len)); + do { + SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, getBaseCode(in[l++]), 4)); + } while (--hex_len); + l--; + continue; + } + } + + if (usx_templates != NULL) { + int i; + for (i = 0; i < 5; i++) { + if (usx_templates[i]) { + int rem = (int)strlen(usx_templates[i]); + int j = 0; + for (; j < rem && l + j < len; j++) { + char c_t = usx_templates[i][j]; + c_in = in[l + j]; + if (c_t == 'f' || c_t == 'F') { + if (getNibbleType(c_in) != (c_t == 'f' ? USX_NIB_HEX_LOWER : USX_NIB_HEX_UPPER) + && getNibbleType(c_in) != USX_NIB_NUM) { + break; + } + } else + if (c_t == 'r' || c_t == 't' || c_t == 'o') { + if (c_in < '0' || c_in > (c_t == 'r' ? '7' : (c_t == 't' ? '3' : '1'))) + break; + } else + if (c_t != c_in) + break; + } + if (((float)j / rem) > 0.66) { + //printf("%s\n", usx_templates[i]); + rem = rem - j; + SAFE_APPEND_BITS2(rawolen, ol = append_nibble_escape(out, olen, ol, state, usx_hcodes, usx_hcode_lens)); + SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, 0, 1)); + SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, (count_codes[i] & 0xF8), count_codes[i] & 0x07)); + SAFE_APPEND_BITS2(rawolen, ol = encodeCount(out, olen, ol, rem)); + for (int k = 0; k < j; k++) { + char c_t = usx_templates[i][k]; + if (c_t == 'f' || c_t == 'F') + SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, getBaseCode(in[l + k]), 4)); + else if (c_t == 'r' || c_t == 't' || c_t == 'o') { + c_t = (c_t == 'r' ? 3 : (c_t == 't' ? 2 : 1)); + SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, (in[l + k] - '0') << (8 - c_t), c_t)); + } + } + l += j; + l--; + break; + } + } + } + if (i < 5) + continue; + } + + if (usx_freq_seq != NULL) { + int i; + for (i = 0; i < 6; i++) { + int seq_len = (int)strlen(usx_freq_seq[i]); + if (len - seq_len >= 0 && l <= len - seq_len) { + if (memcmp(usx_freq_seq[i], in + l, seq_len) == 0 && usx_hcode_lens[usx_freq_codes[i] >> 5]) { + SAFE_APPEND_BITS2(rawolen, ol = append_code(out, olen, ol, usx_freq_codes[i], &state, usx_hcodes, usx_hcode_lens)); + l += seq_len; + l--; + break; + } + } + } + if (i < 6) + continue; + } + + c_in = in[l]; + + is_upper = 0; + if (c_in >= 'A' && c_in <= 'Z') + is_upper = 1; + else { + if (is_all_upper) { + is_all_upper = 0; + SAFE_APPEND_BITS2(rawolen, ol = append_switch_code(out, olen, ol, state)); + SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, usx_hcodes[USX_ALPHA], usx_hcode_lens[USX_ALPHA])); + state = USX_ALPHA; + } + } + if (is_upper && !is_all_upper) { + if (state == USX_NUM) { + SAFE_APPEND_BITS2(rawolen, ol = append_switch_code(out, olen, ol, state)); + SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, usx_hcodes[USX_ALPHA], usx_hcode_lens[USX_ALPHA])); + state = USX_ALPHA; + } + SAFE_APPEND_BITS2(rawolen, ol = append_switch_code(out, olen, ol, state)); + SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, usx_hcodes[USX_ALPHA], usx_hcode_lens[USX_ALPHA])); + if (state == USX_DELTA) { + state = USX_ALPHA; + SAFE_APPEND_BITS2(rawolen, ol = append_switch_code(out, olen, ol, state)); + SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, usx_hcodes[USX_ALPHA], usx_hcode_lens[USX_ALPHA])); + } + } + c_next = 0; + if (l+1 < len) + c_next = in[l+1]; + + if (c_in >= 32 && c_in <= 126) { + if (is_upper && !is_all_upper) { + for (ll=l+4; ll>=l && ll 'Z') + break; + } + if (ll == l-1) { + SAFE_APPEND_BITS2(rawolen, ol = append_switch_code(out, olen, ol, state)); + SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, usx_hcodes[USX_ALPHA], usx_hcode_lens[USX_ALPHA])); + state = USX_ALPHA; + is_all_upper = 1; + } + } + if (state == USX_DELTA && (c_in == ' ' || c_in == '.' || c_in == ',')) { + byte spl_code = (c_in == ',' ? 0xC0 : (c_in == '.' ? 0xE0 : (c_in == ' ' ? 0 : 0xFF))); + if (spl_code != 0xFF) { + byte spl_code_len = (c_in == ',' ? 3 : (c_in == '.' ? 4 : (c_in == ' ' ? 1 : 4))); + SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, UNI_STATE_SPL_CODE, UNI_STATE_SPL_CODE_LEN)); + SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, spl_code, spl_code_len)); + continue; + } + } + c_in -= 32; + if (is_all_upper && is_upper) + c_in += 32; + if (c_in == 0) { + if (state == USX_NUM) + SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, usx_vcodes[NUM_SPC_CODE & 0x1F], usx_vcode_lens[NUM_SPC_CODE & 0x1F])); + else + SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, usx_vcodes[1], usx_vcode_lens[1])); + } else { + c_in--; + SAFE_APPEND_BITS2(rawolen, ol = append_code(out, olen, ol, usx_code_94[(int)c_in], &state, usx_hcodes, usx_hcode_lens)); + } + } else + if (c_in == 13 && c_next == 10) { + SAFE_APPEND_BITS2(rawolen, ol = append_code(out, olen, ol, CRLF_CODE, &state, usx_hcodes, usx_hcode_lens)); + l++; + } else + if (c_in == 10) { + if (state == USX_DELTA) { + SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, UNI_STATE_SPL_CODE, UNI_STATE_SPL_CODE_LEN)); + SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, 0xF0, 4)); + } else + SAFE_APPEND_BITS2(rawolen, ol = append_code(out, olen, ol, LF_CODE, &state, usx_hcodes, usx_hcode_lens)); + } else + if (c_in == 13) { + SAFE_APPEND_BITS2(rawolen, ol = append_code(out, olen, ol, CR_CODE, &state, usx_hcodes, usx_hcode_lens)); + } else + if (c_in == '\t') { + SAFE_APPEND_BITS2(rawolen, ol = append_code(out, olen, ol, TAB_CODE, &state, usx_hcodes, usx_hcode_lens)); + } else { + int utf8len; + int32_t uni = readUTF8(in, len, l, &utf8len); + if (uni) { + l += utf8len; + if (state != USX_DELTA) { + int32_t uni2 = readUTF8(in, len, l, &utf8len); + if (uni2) { + if (state != USX_ALPHA) { + SAFE_APPEND_BITS2(rawolen, ol = append_switch_code(out, olen, ol, state)); + SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, usx_hcodes[USX_ALPHA], usx_hcode_lens[USX_ALPHA])); + } + SAFE_APPEND_BITS2(rawolen, ol = append_switch_code(out, olen, ol, state)); + SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, usx_hcodes[USX_ALPHA], usx_hcode_lens[USX_ALPHA])); + SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, usx_vcodes[1], usx_vcode_lens[1])); // code for space (' ') + state = USX_DELTA; + } else { + SAFE_APPEND_BITS2(rawolen, ol = append_switch_code(out, olen, ol, state)); + SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, usx_hcodes[USX_DELTA], usx_hcode_lens[USX_DELTA])); + } + } + SAFE_APPEND_BITS2(rawolen, ol = encodeUnicode(out, olen, ol, uni, prev_uni)); + //printf("%d:%d:%d\n", l, utf8len, uni); + prev_uni = uni; + l--; + } else { + int bin_count = 1; + for (int bi = l + 1; bi < len; bi++) { + char c_bi = in[bi]; + //if (c_bi > 0x1F && c_bi != 0x7F) + // break; + if (readUTF8(in, len, bi, &utf8len)) + break; + if (bi < (len - 4) && c_bi == in[bi - 1] && c_bi == in[bi + 1] && c_bi == in[bi + 2] && c_bi == in[bi + 3]) + break; + bin_count++; + } + //printf("Bin:%d:%d:%x:%d\n", l, (unsigned char) c_in, (unsigned char) c_in, bin_count); + SAFE_APPEND_BITS2(rawolen, ol = append_nibble_escape(out, olen, ol, state, usx_hcodes, usx_hcode_lens)); + SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, 0xF8, 5)); + SAFE_APPEND_BITS2(rawolen, ol = encodeCount(out, olen, ol, bin_count)); + do { + SAFE_APPEND_BITS2(rawolen, ol = append_bits(out, olen, ol, in[l++], 8)); + } while (--bin_count); + l--; + } + } + } + + if (need_full_term_codes) { + const int orig_ol = ol; + SAFE_APPEND_BITS2(rawolen, ol = append_final_bits(out, olen, ol, state, is_all_upper, usx_hcodes, usx_hcode_lens)); + return (ol / 8) * 4 + (((ol-orig_ol)/8) & 3); + } else { + const int rst = (ol + 7) / 8; + append_final_bits(out, rst, ol, state, is_all_upper, usx_hcodes, usx_hcode_lens); + return rst; + } +} + +// Main API function. See unishox2.h for documentation +int unishox2_compress(const char *in, int len, UNISHOX_API_OUT_AND_LEN(char *out, int olen), const byte usx_hcodes[], const byte usx_hcode_lens[], const char *usx_freq_seq[], const char *usx_templates[]) { + return unishox2_compress_lines(in, len, UNISHOX_API_OUT_AND_LEN(out, olen), usx_hcodes, usx_hcode_lens, usx_freq_seq, usx_templates, NULL); +} + +// Main API function. See unishox2.h for documentation +int unishox2_compress_simple(const char *in, int len, char *out) { + return unishox2_compress_lines(in, len, UNISHOX_API_OUT_AND_LEN(out, INT_MAX - 1), USX_HCODES_DFLT, USX_HCODE_LENS_DFLT, USX_FREQ_SEQ_DFLT, USX_TEMPLATES, NULL); +} + +// Reads one bit from in +int readBit(const char *in, int bit_no) { + return in[bit_no >> 3] & (0x80 >> (bit_no % 8)); +} + +// Reads next 8 bits, if available +int read8bitCode(const char *in, int len, int bit_no) { + int bit_pos = bit_no & 0x07; + int char_pos = bit_no >> 3; + byte code = (((byte)in[char_pos]) << bit_pos); + if (bit_no + bit_pos < len) { + code |= ((byte)in[++char_pos]) >> (8 - bit_pos); + } else + code |= (0xFF >> (8 - bit_pos)); + return code; +} + +/// The list of veritical codes is split into 5 sections. Used by readVCodeIdx() +#define SECTION_COUNT 5 +/// Used by readVCodeIdx() for finding the section under which the code read using read8bitCode() falls +byte usx_vsections[] = {0x7F, 0xBF, 0xDF, 0xEF, 0xFF}; +/// Used by readVCodeIdx() for finding the section vertical position offset +byte usx_vsection_pos[] = {0, 4, 8, 12, 20}; +/// Used by readVCodeIdx() for masking the code read by read8bitCode() +byte usx_vsection_mask[] = {0x7F, 0x3F, 0x1F, 0x0F, 0x0F}; +/// Used by readVCodeIdx() for shifting the code read by read8bitCode() to obtain the vpos +byte usx_vsection_shift[] = {5, 4, 3, 1, 0}; + +/// Vertical decoder lookup table - 3 bits code len, 5 bytes vertical pos +/// code len is one less as 8 cannot be accommodated in 3 bits +byte usx_vcode_lookup[36] = { + (1 << 5) + 0, (1 << 5) + 0, (2 << 5) + 1, (2 << 5) + 2, // Section 1 + (3 << 5) + 3, (3 << 5) + 4, (3 << 5) + 5, (3 << 5) + 6, // Section 2 + (3 << 5) + 7, (3 << 5) + 7, (4 << 5) + 8, (4 << 5) + 9, // Section 3 + (5 << 5) + 10, (5 << 5) + 10, (5 << 5) + 11, (5 << 5) + 11, // Section 4 + (5 << 5) + 12, (5 << 5) + 12, (6 << 5) + 13, (6 << 5) + 14, + (6 << 5) + 15, (6 << 5) + 15, (6 << 5) + 16, (6 << 5) + 16, // Section 5 + (6 << 5) + 17, (6 << 5) + 17, (7 << 5) + 18, (7 << 5) + 19, + (7 << 5) + 20, (7 << 5) + 21, (7 << 5) + 22, (7 << 5) + 23, + (7 << 5) + 24, (7 << 5) + 25, (7 << 5) + 26, (7 << 5) + 27 +}; + +/// Decodes the vertical code from the given bitstream at in \n +/// This is designed to use less memory using a 36 byte buffer \n +/// compared to using a 256 byte buffer to decode the next 8 bits read by read8bitCode() \n +/// by splitting the list of vertical codes. \n +/// Decoder is designed for using less memory, not speed. \n +/// Returns the veritical code index or 99 if match could not be found. \n +/// Also updates bit_no_p with how many ever bits used by the vertical code. +int readVCodeIdx(const char *in, int len, int *bit_no_p) { + if (*bit_no_p < len) { + byte code = read8bitCode(in, len, *bit_no_p); + int i = 0; + do { + if (code <= usx_vsections[i]) { + byte vcode = usx_vcode_lookup[usx_vsection_pos[i] + ((code & usx_vsection_mask[i]) >> usx_vsection_shift[i])]; + (*bit_no_p) += ((vcode >> 5) + 1); + if (*bit_no_p > len) + return 99; + return vcode & 0x1F; + } + } while (++i < SECTION_COUNT); + } + return 99; +} + +/// Mask for retrieving each code to be decoded according to its length \n +/// Same as usx_mask so redundant +byte len_masks[] = {0x80, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE, 0xFF}; +/// Decodes the horizontal code from the given bitstream at in \n +/// depending on the hcodes defined using usx_hcodes and usx_hcode_lens \n +/// Returns the horizontal code index or 99 if match could not be found. \n +/// Also updates bit_no_p with how many ever bits used by the horizontal code. +int readHCodeIdx(const char *in, int len, int *bit_no_p, const byte usx_hcodes[], const byte usx_hcode_lens[]) { + if (!usx_hcode_lens[USX_ALPHA]) + return USX_ALPHA; + if (*bit_no_p < len) { + byte code = read8bitCode(in, len, *bit_no_p); + for (int code_pos = 0; code_pos < 5; code_pos++) { + if (usx_hcode_lens[code_pos] && (code & len_masks[usx_hcode_lens[code_pos] - 1]) == usx_hcodes[code_pos]) { + *bit_no_p += usx_hcode_lens[code_pos]; + return code_pos; + } + } + } + return 99; +} + +// TODO: Last value check.. Also len check in readBit +/// Returns the position of step code (0, 10, 110, etc.) encountered in the stream +int getStepCodeIdx(const char *in, int len, int *bit_no_p, int limit) { + int idx = 0; + while (*bit_no_p < len && readBit(in, *bit_no_p)) { + idx++; + (*bit_no_p)++; + if (idx == limit) + return idx; + } + if (*bit_no_p >= len) + return 99; + (*bit_no_p)++; + return idx; +} + +/// Reads specified number of bits and builds the corresponding integer +int32_t getNumFromBits(const char *in, int len, int bit_no, int count) { + int32_t ret = 0; + while (count-- && bit_no < len) { + ret += (readBit(in, bit_no) ? 1 << count : 0); + bit_no++; + } + return count < 0 ? ret : -1; +} + +/// Decodes the count from the given bit stream at in. Also updates bit_no_p +int32_t readCount(const char *in, int *bit_no_p, int len) { + int idx = getStepCodeIdx(in, len, bit_no_p, 4); + if (idx == 99) + return -1; + if (*bit_no_p + count_bit_lens[idx] - 1 >= len) + return -1; + int32_t count = getNumFromBits(in, len, *bit_no_p, count_bit_lens[idx]) + (idx ? count_adder[idx - 1] : 0); + (*bit_no_p) += count_bit_lens[idx]; + return count; +} + +/// Decodes the Unicode codepoint from the given bit stream at in. Also updates bit_no_p \n +/// When the step code is 5, reads the next step code to find out the special code. +int32_t readUnicode(const char *in, int *bit_no_p, int len) { + int idx = getStepCodeIdx(in, len, bit_no_p, 5); + if (idx == 99) + return 0x7FFFFF00 + 99; + if (idx == 5) { + int idx = getStepCodeIdx(in, len, bit_no_p, 4); + return 0x7FFFFF00 + idx; + } + if (idx >= 0) { + int sign = (*bit_no_p < len ? readBit(in, *bit_no_p) : 0); + (*bit_no_p)++; + if (*bit_no_p + uni_bit_len[idx] - 1 >= len) + return 0x7FFFFF00 + 99; + int32_t count = getNumFromBits(in, len, *bit_no_p, uni_bit_len[idx]); + count += uni_adder[idx]; + (*bit_no_p) += uni_bit_len[idx]; + //printf("Sign: %d, Val:%d", sign, count); + return sign ? -count : count; + } + return 0; +} + +/// Macro to ensure that the decoder does not append more than olen bytes to out +#define DEC_OUTPUT_CHAR(out, olen, ol, c) do { \ + char *const obuf = (out); \ + const int oidx = (ol); \ + const int limit = (olen); \ + if (limit <= oidx) return limit + 1; \ + else if (oidx < 0) return 0; \ + else obuf[oidx] = (c); \ +} while (0) + +/// Macro to ensure that the decoder does not append more than olen bytes to out +#define DEC_OUTPUT_CHARS(olen, exp) do { \ + const int newidx = (exp); \ + const int limit = (olen); \ + if (newidx > limit) return limit + 1; \ +} while (0) + +/// Write given unicode code point to out as a UTF-8 sequence +int writeUTF8(char *out, int olen, int ol, int uni) { + if (uni < (1 << 11)) { + DEC_OUTPUT_CHAR(out, olen, ol++, 0xC0 + (uni >> 6)); + DEC_OUTPUT_CHAR(out, olen, ol++, 0x80 + (uni & 0x3F)); + } else + if (uni < (1 << 16)) { + DEC_OUTPUT_CHAR(out, olen, ol++, 0xE0 + (uni >> 12)); + DEC_OUTPUT_CHAR(out, olen, ol++, 0x80 + ((uni >> 6) & 0x3F)); + DEC_OUTPUT_CHAR(out, olen, ol++, 0x80 + (uni & 0x3F)); + } else { + DEC_OUTPUT_CHAR(out, olen, ol++, 0xF0 + (uni >> 18)); + DEC_OUTPUT_CHAR(out, olen, ol++, 0x80 + ((uni >> 12) & 0x3F)); + DEC_OUTPUT_CHAR(out, olen, ol++, 0x80 + ((uni >> 6) & 0x3F)); + DEC_OUTPUT_CHAR(out, olen, ol++, 0x80 + (uni & 0x3F)); + } + return ol; +} + +/// Decode repeating sequence and appends to out +int decodeRepeat(const char *in, int len, char *out, int olen, int ol, int *bit_no, struct us_lnk_lst *prev_lines) { + if (prev_lines) { + int32_t dict_len = readCount(in, bit_no, len) + NICE_LEN; + if (dict_len < NICE_LEN) + return ol; + int32_t dist = readCount(in, bit_no, len); + if (dist < 0) + return ol; + int32_t ctx = readCount(in, bit_no, len); + if (ctx < 0) + return ol; + struct us_lnk_lst *cur_line = prev_lines; + const int left = olen - ol; + while (ctx--) + cur_line = cur_line->previous; + if (left <= 0) return olen + 1; + memmove(out + ol, cur_line->data + dist, min_of(left, dict_len)); + if (left < dict_len) return olen + 1; + ol += dict_len; + } else { + int32_t dict_len = readCount(in, bit_no, len) + NICE_LEN; + if (dict_len < NICE_LEN) + return ol; + int32_t dist = readCount(in, bit_no, len) + NICE_LEN - 1; + if (dist < NICE_LEN - 1) + return ol; + const int32_t left = olen - ol; + //printf("Decode len: %d, dist: %d\n", dict_len - NICE_LEN, dist - NICE_LEN + 1); + if (left <= 0) return olen + 1; + memmove(out + ol, out + ol - dist, min_of(left, dict_len)); + if (left < dict_len) return olen + 1; + ol += dict_len; + } + return ol; +} + +/// Returns hex character corresponding to the 4 bit nibble +char getHexChar(int32_t nibble, int hex_type) { + if (nibble >= 0 && nibble <= 9) + return '0' + nibble; + else if (hex_type < USX_NIB_HEX_UPPER) + return 'a' + nibble - 10; + return 'A' + nibble - 10; +} + +// Main API function. See unishox2.h for documentation +int unishox2_decompress_lines(const char *in, int len, UNISHOX_API_OUT_AND_LEN(char *out, int olen), const byte usx_hcodes[], const byte usx_hcode_lens[], const char *usx_freq_seq[], const char *usx_templates[], struct us_lnk_lst *prev_lines) { + + int dstate; + int bit_no; + int h, v; + byte is_all_upper; +#if (UNISHOX_API_OUT_AND_LEN(0,1)) == 0 + const int olen = INT_MAX - 1; +#endif + + init_coder(); + int ol = 0; + bit_no = UNISHOX_MAGIC_BIT_LEN; // ignore the magic bit + dstate = h = USX_ALPHA; + is_all_upper = 0; + + int prev_uni = 0; + + len <<= 3; + while (bit_no < len) { + int orig_bit_no = bit_no; + if (dstate == USX_DELTA || h == USX_DELTA) { + if (dstate != USX_DELTA) + h = dstate; + int32_t delta = readUnicode(in, &bit_no, len); + if ((delta >> 8) == 0x7FFFFF) { + int spl_code_idx = delta & 0x000000FF; + if (spl_code_idx == 99) + break; + switch (spl_code_idx) { + case 0: + DEC_OUTPUT_CHAR(out, olen, ol++, ' '); + continue; + case 1: + h = readHCodeIdx(in, len, &bit_no, usx_hcodes, usx_hcode_lens); + if (h == 99) { + bit_no = len; + continue; + } + if (h == USX_DELTA || h == USX_ALPHA) { + dstate = h; + continue; + } + if (h == USX_DICT) { + DEC_OUTPUT_CHARS(olen, ol = decodeRepeat(in, len, out, olen, ol, &bit_no, prev_lines)); + h = dstate; + continue; + } + break; + case 2: + DEC_OUTPUT_CHAR(out, olen, ol++, ','); + continue; + case 3: + DEC_OUTPUT_CHAR(out, olen, ol++, '.'); + continue; + case 4: + DEC_OUTPUT_CHAR(out, olen, ol++, 10); + continue; + } + } else { + prev_uni += delta; + DEC_OUTPUT_CHARS(olen, ol = writeUTF8(out, olen, ol, prev_uni)); + //printf("%ld, ", prev_uni); + } + if (dstate == USX_DELTA && h == USX_DELTA) + continue; + } else + h = dstate; + char c = 0; + byte is_upper = is_all_upper; + v = readVCodeIdx(in, len, &bit_no); + if (v == 99 || h == 99) { + bit_no = orig_bit_no; + break; + } + if (v == 0 && h != USX_SYM) { + if (bit_no >= len) + break; + if (h != USX_NUM || dstate != USX_DELTA) { + h = readHCodeIdx(in, len, &bit_no, usx_hcodes, usx_hcode_lens); + if (h == 99 || bit_no >= len) { + bit_no = orig_bit_no; + break; + } + } + if (h == USX_ALPHA) { + if (dstate == USX_ALPHA) { + if (!usx_hcode_lens[USX_ALPHA] && TERM_BYTE_PRESET_1 == (read8bitCode(in, len, bit_no - SW_CODE_LEN) & (0xFF << (8 - (is_all_upper ? TERM_BYTE_PRESET_1_LEN_UPPER : TERM_BYTE_PRESET_1_LEN_LOWER))))) + break; // Terminator for preset 1 + if (is_all_upper) { + is_upper = is_all_upper = 0; + continue; + } + v = readVCodeIdx(in, len, &bit_no); + if (v == 99) { + bit_no = orig_bit_no; + break; + } + if (v == 0) { + h = readHCodeIdx(in, len, &bit_no, usx_hcodes, usx_hcode_lens); + if (h == 99) { + bit_no = orig_bit_no; + break; + } + if (h == USX_ALPHA) { + is_all_upper = 1; + continue; + } + } + is_upper = 1; + } else { + dstate = USX_ALPHA; + continue; + } + } else + if (h == USX_DICT) { + DEC_OUTPUT_CHARS(olen, ol = decodeRepeat(in, len, out, olen, ol, &bit_no, prev_lines)); + continue; + } else + if (h == USX_DELTA) { + //printf("Sign: %d, bitno: %d\n", sign, bit_no); + //printf("Code: %d\n", prev_uni); + //printf("BitNo: %d\n", bit_no); + continue; + } else { + if (h != USX_NUM || dstate != USX_DELTA) + v = readVCodeIdx(in, len, &bit_no); + if (v == 99) { + bit_no = orig_bit_no; + break; + } + if (h == USX_NUM && v == 0) { + int idx = getStepCodeIdx(in, len, &bit_no, 5); + if (idx == 0) { + idx = getStepCodeIdx(in, len, &bit_no, 4); + int32_t rem = readCount(in, &bit_no, len); + if (rem < 0) + break; + rem = (int)strlen(usx_templates[idx]) - rem; + int eof = 0; + for (int j = 0; j < rem; j++) { + char c_t = usx_templates[idx][j]; + if (c_t == 'f' || c_t == 'r' || c_t == 't' || c_t == 'o' || c_t == 'F') { + char nibble_len = (c_t == 'f' || c_t == 'F' ? 4 : (c_t == 'r' ? 3 : (c_t == 't' ? 2 : 1))); + const int32_t raw_char = getNumFromBits(in, len, bit_no, nibble_len); + if (raw_char < 0) { + eof = 1; + break; + } + DEC_OUTPUT_CHAR(out, olen, ol++, getHexChar((char)raw_char, + c_t == 'f' ? USX_NIB_HEX_LOWER : USX_NIB_HEX_UPPER)); + bit_no += nibble_len; + } else + DEC_OUTPUT_CHAR(out, olen, ol++, c_t); + } + if (eof) break; // reach input eof + } else + if (idx == 5) { + int32_t bin_count = readCount(in, &bit_no, len); + if (bin_count < 0) + break; + if (bin_count == 0) // invalid encoding + break; + do { + const int32_t raw_char = getNumFromBits(in, len, bit_no, 8); + if (raw_char < 0) + break; + DEC_OUTPUT_CHAR(out, olen, ol++, (char)raw_char); + bit_no += 8; + } while (--bin_count); + if (bin_count > 0) break; // reach input eof + } else { + int32_t nibble_count = 0; + if (idx == 2 || idx == 4) + nibble_count = 32; + else { + nibble_count = readCount(in, &bit_no, len); + if (nibble_count < 0) + break; + if (nibble_count == 0) // invalid encoding + break; + } + do { + int32_t nibble = getNumFromBits(in, len, bit_no, 4); + if (nibble < 0) + break; + DEC_OUTPUT_CHAR(out, olen, ol++, getHexChar(nibble, idx < 3 ? USX_NIB_HEX_LOWER : USX_NIB_HEX_UPPER)); + if ((idx == 2 || idx == 4) && (nibble_count == 25 || nibble_count == 21 || nibble_count == 17 || nibble_count == 13)) + DEC_OUTPUT_CHAR(out, olen, ol++, '-'); + bit_no += 4; + } while (--nibble_count); + if (nibble_count > 0) break; // reach input eof + } + if (dstate == USX_DELTA) + h = USX_DELTA; + continue; + } + } + } + if (is_upper && v == 1) { + h = dstate = USX_DELTA; // continuous delta coding + continue; + } + if (h < 3 && v < 28) + c = usx_sets[h][v]; + if (c >= 'a' && c <= 'z') { + dstate = USX_ALPHA; + if (is_upper) + c -= 32; + } else { + if (c >= '0' && c <= '9') { + dstate = USX_NUM; + } else if (c == 0) { + if (v == 8) { + DEC_OUTPUT_CHAR(out, olen, ol++, '\r'); + DEC_OUTPUT_CHAR(out, olen, ol++, '\n'); + } else if (h == USX_NUM && v == 26) { + int32_t count = readCount(in, &bit_no, len); + if (count < 0) + break; + count += 4; + if (ol <= 0) + return 0; // invalid encoding + char rpt_c = out[ol - 1]; + while (count--) + DEC_OUTPUT_CHAR(out, olen, ol++, rpt_c); + } else if (h == USX_SYM && v > 24) { + v -= 25; + const int freqlen = (int)strlen(usx_freq_seq[v]); + const int left = olen - ol; + if (left <= 0) return olen + 1; + memcpy(out + ol, usx_freq_seq[v], min_of(left, freqlen)); + if (left < freqlen) return olen + 1; + ol += freqlen; + } else if (h == USX_NUM && v > 22 && v < 26) { + v -= (23 - 3); + const int freqlen = (int)strlen(usx_freq_seq[v]); + const int left = olen - ol; + if (left <= 0) return olen + 1; + memcpy(out + ol, usx_freq_seq[v], min_of(left, freqlen)); + if (left < freqlen) return olen + 1; + ol += freqlen; + } else + break; // Terminator + if (dstate == USX_DELTA) + h = USX_DELTA; + continue; + } + } + if (dstate == USX_DELTA) + h = USX_DELTA; + DEC_OUTPUT_CHAR(out, olen, ol++, c); + } + + return ol; + +} + +// Main API function. See unishox2.h for documentation +int unishox2_decompress(const char *in, int len, UNISHOX_API_OUT_AND_LEN(char *out, int olen), const byte usx_hcodes[], const byte usx_hcode_lens[], const char *usx_freq_seq[], const char *usx_templates[]) { + return unishox2_decompress_lines(in, len, UNISHOX_API_OUT_AND_LEN(out, olen), usx_hcodes, usx_hcode_lens, usx_freq_seq, usx_templates, NULL); +} + +// Main API function. See unishox2.h for documentation +int unishox2_decompress_simple(const char *in, int len, char *out) { + return unishox2_decompress(in, len, UNISHOX_API_OUT_AND_LEN(out, INT_MAX - 1), USX_PSET_DFLT); +} diff --git a/src/mesh/compression/unishox2.h b/src/mesh/compression/unishox2.h new file mode 100644 index 000000000..bbbd7a759 --- /dev/null +++ b/src/mesh/compression/unishox2.h @@ -0,0 +1,278 @@ +/* + * Copyright (C) 2020 Siara Logics (cc) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * @author Arundale Ramanathan + * + */ + +/** + * @file unishox2.h + * @author Arundale Ramanathan, James Z. M. Gao + * @brief API for Unishox2 Compression and Decompression + * + * This file describes each function of the Unishox2 API \n + * For finding out how this API can be used in your program, \n + * please see test_unishox2.c. + */ + +#ifndef unishox2 +#define unishox2 + +#define UNISHOX_VERSION "2.0" ///< Unicode spec version + +/** + * Macro switch to enable/disable output buffer length parameter in low level api \n + * Disabled by default \n + * When this macro is defined, the all the API functions \n + * except the simple API functions accept an additional parameter olen \n + * that enables the developer to pass the size of the output buffer provided \n + * so that the api function may not write beyond that length. \n + * This can be disabled if the developer knows that the buffer provided is sufficient enough \n + * so no additional parameter is passed and the program is faster since additional check \n + * for output length is not performed at each step \n + * The simple api, i.e. unishox2_(de)compress_simple will always omit the buffer length + */ +#ifndef UNISHOX_API_WITH_OUTPUT_LEN +# define UNISHOX_API_WITH_OUTPUT_LEN 0 +#endif + +/// Upto 8 bits of initial magic bit sequence can be included. Bit count can be specified with UNISHOX_MAGIC_BIT_LEN +#ifndef UNISHOX_MAGIC_BITS +# define UNISHOX_MAGIC_BITS 0xFF +#endif + +/// Desired length of Magic bits defined by UNISHOX_MAGIC_BITS +#ifdef UNISHOX_MAGIC_BIT_LEN +# if UNISHOX_MAGIC_BIT_LEN < 0 || 9 <= UNISHOX_MAGIC_BIT_LEN +# error "UNISHOX_MAGIC_BIT_LEN need between [0, 8)" +# endif +#else +# define UNISHOX_MAGIC_BIT_LEN 1 +#endif + +//enum {USX_ALPHA = 0, USX_SYM, USX_NUM, USX_DICT, USX_DELTA}; + +/// Default Horizontal codes. When composition of text is know beforehand, the other hcodes in this section can be used to achieve more compression. +#define USX_HCODES_DFLT (const unsigned char[]) {0x00, 0x40, 0x80, 0xC0, 0xE0} +/// Length of each default hcode +#define USX_HCODE_LENS_DFLT (const unsigned char[]) {2, 2, 2, 3, 3} + +/// Horizontal codes preset for English Alphabet content only +#define USX_HCODES_ALPHA_ONLY (const unsigned char[]) {0x00, 0x00, 0x00, 0x00, 0x00} +/// Length of each Alpha only hcode +#define USX_HCODE_LENS_ALPHA_ONLY (const unsigned char[]) {0, 0, 0, 0, 0} + +/// Horizontal codes preset for Alpha Numeric content only +#define USX_HCODES_ALPHA_NUM_ONLY (const unsigned char[]) {0x00, 0x00, 0x80, 0x00, 0x00} +/// Length of each Alpha numeric hcode +#define USX_HCODE_LENS_ALPHA_NUM_ONLY (const unsigned char[]) {1, 0, 1, 0, 0} + +/// Horizontal codes preset for Alpha Numeric and Symbol content only +#define USX_HCODES_ALPHA_NUM_SYM_ONLY (const unsigned char[]) {0x00, 0x80, 0xC0, 0x00, 0x00} +/// Length of each Alpha numeric and symbol hcodes +#define USX_HCODE_LENS_ALPHA_NUM_SYM_ONLY (const unsigned char[]) {1, 2, 2, 0, 0} + +/// Horizontal codes preset favouring Alphabet content +#define USX_HCODES_FAVOR_ALPHA (const unsigned char[]) {0x00, 0x80, 0xA0, 0xC0, 0xE0} +/// Length of each hcode favouring Alpha content +#define USX_HCODE_LENS_FAVOR_ALPHA (const unsigned char[]) {1, 3, 3, 3, 3} + +/// Horizontal codes preset favouring repeating sequences +#define USX_HCODES_FAVOR_DICT (const unsigned char[]) {0x00, 0x40, 0xC0, 0x80, 0xE0} +/// Length of each hcode favouring repeating sequences +#define USX_HCODE_LENS_FAVOR_DICT (const unsigned char[]) {2, 2, 3, 2, 3} + +/// Horizontal codes preset favouring symbols +#define USX_HCODES_FAVOR_SYM (const unsigned char[]) {0x80, 0x00, 0xA0, 0xC0, 0xE0} +/// Length of each hcode favouring symbols +#define USX_HCODE_LENS_FAVOR_SYM (const unsigned char[]) {3, 1, 3, 3, 3} + +//#define USX_HCODES_FAVOR_UMLAUT {0x00, 0x40, 0xE0, 0xC0, 0x80} +//#define USX_HCODE_LENS_FAVOR_UMLAUT {2, 2, 3, 3, 2} + +/// Horizontal codes preset favouring umlaut letters +#define USX_HCODES_FAVOR_UMLAUT (const unsigned char[]) {0x80, 0xA0, 0xC0, 0xE0, 0x00} +/// Length of each hcode favouring umlaut letters +#define USX_HCODE_LENS_FAVOR_UMLAUT (const unsigned char[]) {3, 3, 3, 3, 1} + +/// Horizontal codes preset for no repeating sequences +#define USX_HCODES_NO_DICT (const unsigned char[]) {0x00, 0x40, 0x80, 0x00, 0xC0} +/// Length of each hcode for no repeating sequences +#define USX_HCODE_LENS_NO_DICT (const unsigned char[]) {2, 2, 2, 0, 2} + +/// Horizontal codes preset for no Unicode characters +#define USX_HCODES_NO_UNI (const unsigned char[]) {0x00, 0x40, 0x80, 0xC0, 0x00} +/// Length of each hcode for no Unicode characters +#define USX_HCODE_LENS_NO_UNI (const unsigned char[]) {2, 2, 2, 2, 0} + +/// Default frequently occuring sequences. When composition of text is know beforehand, the other sequences in this section can be used to achieve more compression. +#define USX_FREQ_SEQ_DFLT (const char *[]) {"\": \"", "\": ", ""} +/// Frequently occuring sequences in XML content +#define USX_FREQ_SEQ_XML (const char *[]) {"", " Date: Mon, 11 Apr 2022 22:12:04 -0700 Subject: [PATCH 2/6] compression WIP compression works. next is to store it in the proto as a oneof and then decompress it on use. --- platformio.ini | 5 ++++- src/mesh/Router.cpp | 21 ++++++++++++--------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/platformio.ini b/platformio.ini index e692888c2..ba5c37489 100644 --- a/platformio.ini +++ b/platformio.ini @@ -110,11 +110,14 @@ lib_ignore = ESP32 BLE Arduino platform_packages = framework-arduinoespressif32@https://github.com/meshtastic/arduino-esp32.git#4cde0f5d412d2695184f32e8a47e9bea57b45276 -; leave this commented out to avoid breaking Windows +; leave this commented out to avoid breaking Windows ;upload_port = /dev/ttyUSB0 ;monitor_port = /dev/ttyUSB0 +upload_port = /dev/cu.SLAB_USBtoUART +monitor_port = /dev/cu.SLAB_USBtoUART + ; customize the partition table ; http://docs.platformio.org/en/latest/platforms/espressif32.html#partition-tables board_build.partitions = partition-table.csv diff --git a/src/mesh/Router.cpp b/src/mesh/Router.cpp index 34ab85a26..c81359604 100644 --- a/src/mesh/Router.cpp +++ b/src/mesh/Router.cpp @@ -325,26 +325,29 @@ Routing_Error perhapsEncode(MeshPacket *p) size_t numbytes = pb_encode_to_bytes(bytes, sizeof(bytes), Data_fields, &p->decoded); - if (1) { - // int orig_len, compressed_len; + if (p->decoded.portnum == PortNum_TEXT_MESSAGE_APP) { + + char original_payload[Constants_DATA_PAYLOAD_LEN]; + memcpy(original_payload, p->decoded.payload.bytes, p->decoded.payload.size); + int compressed_len; char compressed_out[100] = {0}; - // char *orig = (char *)"Hiiiiiiii! :) How are you doing? I am doing well."; - // char *orig = (char *)"Meshtastic"; - // orig_len = strlen(orig); - // compressed_len = unishox2_compress_simple(orig, orig_len, compressed_out); - compressed_len = unishox2_compress_simple((char *)bytes, numbytes, compressed_out); + compressed_len = unishox2_compress_simple(original_payload, p->decoded.payload.size, compressed_out); + Serial.print("Original length - "); + Serial.println(p->decoded.payload.size); + + Serial.print("Compressed length - "); Serial.println(compressed_len); - Serial.println(compressed_out); - //&p->decoded.portnum; + //Serial.println(compressed_out); char decompressed_out[100] = {}; int decompressed_len; decompressed_len = unishox2_decompress_simple(compressed_out, compressed_len, decompressed_out); + Serial.print("Decompressed length - "); Serial.println(decompressed_len); Serial.println(decompressed_out); } From ecc114f1cdbe0c04bbd342157f2adcd91afbc550 Mon Sep 17 00:00:00 2001 From: Jm Casler Date: Wed, 13 Apr 2022 19:23:35 -0700 Subject: [PATCH 3/6] temp work on compression --- src/configuration.h | 6 ++++ src/mesh/Router.cpp | 50 +++++++++++++++++++++++-------- src/mesh/generated/mesh.pb.h | 31 +++++++++++-------- src/mesh/generated/telemetry.pb.h | 8 ++--- src/mesh/http/WiFiAPClient.cpp | 17 ++++++++--- 5 files changed, 80 insertions(+), 32 deletions(-) diff --git a/src/configuration.h b/src/configuration.h index 742d9e75e..59d0cf712 100644 --- a/src/configuration.h +++ b/src/configuration.h @@ -125,6 +125,12 @@ along with this program. If not, see . #define RTC_DATA_ATTR #endif +// ----------------------------------------------------------------------------- +// Feature toggles +// ----------------------------------------------------------------------------- + +//#define DISABLE_NTP + // ----------------------------------------------------------------------------- // OLED & Input // ----------------------------------------------------------------------------- diff --git a/src/mesh/Router.cpp b/src/mesh/Router.cpp index c81359604..9487231fd 100644 --- a/src/mesh/Router.cpp +++ b/src/mesh/Router.cpp @@ -303,6 +303,14 @@ bool perhapsDecode(MeshPacket *p) // parsing was successful p->which_payloadVariant = MeshPacket_decoded_tag; // change type to decoded p->channel = chIndex; // change to store the index instead of the hash + + + // Decompress if needed. jm + if (p->decoded.which_payloadVariant == Data_payload_compressed_tag) { + // Decompress the file + } + + printPacket("decoded message", p); return true; } @@ -321,35 +329,53 @@ Routing_Error perhapsEncode(MeshPacket *p) if (p->which_payloadVariant == MeshPacket_decoded_tag) { static uint8_t bytes[MAX_RHPACKETLEN]; // we have to use a scratch buffer because a union - // printPacket("pre encrypt", p); // portnum valid here - size_t numbytes = pb_encode_to_bytes(bytes, sizeof(bytes), Data_fields, &p->decoded); + // Only allow encryption on the text message app. + // TODO: Allow modules to opt into compression. if (p->decoded.portnum == PortNum_TEXT_MESSAGE_APP) { char original_payload[Constants_DATA_PAYLOAD_LEN]; memcpy(original_payload, p->decoded.payload.bytes, p->decoded.payload.size); - int compressed_len; - char compressed_out[100] = {0}; + char compressed_out[Constants_DATA_PAYLOAD_LEN] = {0}; - compressed_len = unishox2_compress_simple(original_payload, p->decoded.payload.size, compressed_out); + int compressed_len = unishox2_compress_simple(original_payload, p->decoded.payload.size, compressed_out); Serial.print("Original length - "); Serial.println(p->decoded.payload.size); Serial.print("Compressed length - "); Serial.println(compressed_len); - //Serial.println(compressed_out); + // Serial.println(compressed_out); - char decompressed_out[100] = {}; - int decompressed_len; + // If the compressed length is greater than or equal to the original size, don't use the compressed form + if (compressed_len >= p->decoded.payload.size) { - decompressed_len = unishox2_decompress_simple(compressed_out, compressed_len, decompressed_out); + DEBUG_MSG("Not compressing message. Not enough benefit from doing so.\n"); + // Set the uncompressed payload varient anyway. Shouldn't hurt? + p->decoded.which_payloadVariant = Data_payload_tag; - Serial.print("Decompressed length - "); - Serial.println(decompressed_len); - Serial.println(decompressed_out); + // Otherwise we use the compressor + } else { + DEBUG_MSG("Compressing message.\n"); + // Copy the compressed data into the meshpacket + p->decoded.payload_compressed.size = compressed_len; + memcpy(p->decoded.payload_compressed.bytes, compressed_out, compressed_len); + + p->decoded.which_payloadVariant = Data_payload_compressed_tag; + } + + if (0) { + char decompressed_out[Constants_DATA_PAYLOAD_LEN] = {}; + int decompressed_len; + + decompressed_len = unishox2_decompress_simple(compressed_out, compressed_len, decompressed_out); + + Serial.print("Decompressed length - "); + Serial.println(decompressed_len); + Serial.println(decompressed_out); + } } if (numbytes > MAX_RHPACKETLEN) diff --git a/src/mesh/generated/mesh.pb.h b/src/mesh/generated/mesh.pb.h index b4a9dbf90..8ce6fc803 100644 --- a/src/mesh/generated/mesh.pb.h +++ b/src/mesh/generated/mesh.pb.h @@ -500,6 +500,7 @@ typedef struct _User { } User; typedef PB_BYTES_ARRAY_T(237) Data_payload_t; +typedef PB_BYTES_ARRAY_T(237) Data_payload_compressed_t; /* (Formerly called SubPacket) The payload portion fo a packet, this is the actual bytes that are sent inside a radio packet (because from/to are broken out by the comms library) */ @@ -507,30 +508,34 @@ typedef struct _Data { /* Formerly named typ and of type Type */ PortNum portnum; /* TODO: REPLACE */ - Data_payload_t payload; + pb_size_t which_payloadVariant; + union { + Data_payload_t payload; + Data_payload_compressed_t payload_compressed; + }; + /* TODO: REPLACE */ + bool want_response; /* Not normally used, but for testing a sender can request that recipient responds in kind (i.e. if it received a position, it should unicast back it's position). Note: that if you set this on a broadcast you will receive many replies. */ - bool want_response; + uint32_t dest; /* The address of the destination node. This field is is filled in by the mesh radio device software, application layer software should never need it. RouteDiscovery messages _must_ populate this. Other message types might need to if they are doing multihop routing. */ - uint32_t dest; + uint32_t source; /* The address of the original sender for this message. This field should _only_ be populated for reliable multihop packets (to keep packets small). */ - uint32_t source; + uint32_t request_id; /* Only used in routing or response messages. Indicates the original message ID that this message is reporting failure on. (formerly called original_id) */ - uint32_t request_id; - /* If set, this message is intened to be a reply to a previously sent message with the defined id. */ uint32_t reply_id; + /* If set, this message is intened to be a reply to a previously sent message with the defined id. */ + uint32_t emoji; /* Defaults to false. If true, then what is in the payload should be treated as an emoji like giving a message a heart or poop emoji. */ - uint32_t emoji; - /* Location structure */ bool has_location; Location location; } Data; @@ -738,7 +743,7 @@ extern "C" { #define User_init_default {"", "", "", {0}, _HardwareModel_MIN, 0, _Team_MIN, 0, 0, 0} #define RouteDiscovery_init_default {0, {0, 0, 0, 0, 0, 0, 0, 0}} #define Routing_init_default {0, {RouteDiscovery_init_default}} -#define Data_init_default {_PortNum_MIN, {0, {0}}, 0, 0, 0, 0, 0, 0, false, Location_init_default} +#define Data_init_default {_PortNum_MIN, 0, {{0, {0}}}, 0, 0, 0, 0, 0, 0, false, Location_init_default} #define Location_init_default {0, 0, 0, 0, 0} #define MeshPacket_init_default {0, 0, 0, 0, {Data_init_default}, 0, 0, 0, 0, 0, _MeshPacket_Priority_MIN, 0, _MeshPacket_Delayed_MIN} #define NodeInfo_init_default {0, false, User_init_default, false, Position_init_default, 0, 0, false, DeviceMetrics_init_default} @@ -751,7 +756,7 @@ extern "C" { #define User_init_zero {"", "", "", {0}, _HardwareModel_MIN, 0, _Team_MIN, 0, 0, 0} #define RouteDiscovery_init_zero {0, {0, 0, 0, 0, 0, 0, 0, 0}} #define Routing_init_zero {0, {RouteDiscovery_init_zero}} -#define Data_init_zero {_PortNum_MIN, {0, {0}}, 0, 0, 0, 0, 0, 0, false, Location_init_zero} +#define Data_init_zero {_PortNum_MIN, 0, {{0, {0}}}, 0, 0, 0, 0, 0, 0, false, Location_init_zero} #define Location_init_zero {0, 0, 0, 0, 0} #define MeshPacket_init_zero {0, 0, 0, 0, {Data_init_zero}, 0, 0, 0, 0, 0, _MeshPacket_Priority_MIN, 0, _MeshPacket_Delayed_MIN} #define NodeInfo_init_zero {0, false, User_init_zero, false, Position_init_zero, 0, 0, false, DeviceMetrics_init_zero} @@ -825,6 +830,7 @@ extern "C" { #define User_ant_azimuth_tag 12 #define Data_portnum_tag 1 #define Data_payload_tag 2 +#define Data_payload_compressed_tag 10 #define Data_want_response_tag 3 #define Data_dest_tag 4 #define Data_source_tag 5 @@ -923,14 +929,15 @@ X(a, STATIC, ONEOF, UENUM, (variant,error_reason,error_reason), 3) #define Data_FIELDLIST(X, a) \ X(a, STATIC, SINGULAR, UENUM, portnum, 1) \ -X(a, STATIC, SINGULAR, BYTES, payload, 2) \ +X(a, STATIC, ONEOF, BYTES, (payloadVariant,payload,payload), 2) \ X(a, STATIC, SINGULAR, BOOL, want_response, 3) \ X(a, STATIC, SINGULAR, FIXED32, dest, 4) \ X(a, STATIC, SINGULAR, FIXED32, source, 5) \ X(a, STATIC, SINGULAR, FIXED32, request_id, 6) \ X(a, STATIC, SINGULAR, FIXED32, reply_id, 7) \ X(a, STATIC, SINGULAR, FIXED32, emoji, 8) \ -X(a, STATIC, OPTIONAL, MESSAGE, location, 9) +X(a, STATIC, OPTIONAL, MESSAGE, location, 9) \ +X(a, STATIC, ONEOF, BYTES, (payloadVariant,payload_compressed,payload_compressed), 10) #define Data_CALLBACK NULL #define Data_DEFAULT NULL #define Data_location_MSGTYPE Location diff --git a/src/mesh/generated/telemetry.pb.h b/src/mesh/generated/telemetry.pb.h index 1fb5ea0d7..b3bf3df1d 100644 --- a/src/mesh/generated/telemetry.pb.h +++ b/src/mesh/generated/telemetry.pb.h @@ -40,10 +40,10 @@ typedef struct _EnvironmentMetrics { /* Types of Measurements the telemetry module is equipped to handle */ typedef struct _Telemetry { - /* This is usually not sent over the mesh (to save space), but it is sent - from the phone so that the local device can set its RTC If it is sent over - the mesh (because there are devices on the mesh without GPS), it will only - be sent by devices which has a hardware GPS clock (IE Mobile Phone). + /* This is usually not sent over the mesh (to save space), but it is sent + from the phone so that the local device can set its RTC If it is sent over + the mesh (because there are devices on the mesh without GPS), it will only + be sent by devices which has a hardware GPS clock (IE Mobile Phone). seconds since 1970 */ uint32_t time; /* Key native device metrics such as battery level */ diff --git a/src/mesh/http/WiFiAPClient.cpp b/src/mesh/http/WiFiAPClient.cpp index afda19702..be07acc53 100644 --- a/src/mesh/http/WiFiAPClient.cpp +++ b/src/mesh/http/WiFiAPClient.cpp @@ -10,10 +10,13 @@ #include "target_specific.h" #include #include -#include #include #include +#ifndef DISABLE_NTP +#include +#endif + using namespace concurrency; static void WiFiEvent(WiFiEvent_t event); @@ -23,7 +26,10 @@ DNSServer dnsServer; // NTP WiFiUDP ntpUDP; + +#ifndef DISABLE_NTP NTPClient timeClient(ntpUDP, "0.pool.ntp.org"); +#endif uint8_t wifiDisconnectReason = 0; @@ -67,13 +73,13 @@ static int32_t reconnectWiFi() DEBUG_MSG("... Reconnecting to WiFi access point\n"); WiFi.mode(WIFI_MODE_STA); WiFi.begin(wifiName, wifiPsw); - // Starting timeClient; } } - //if (*wifiName) { +#ifndef DISABLE_NTP + // if (*wifiName) { if (WiFi.isConnected()) { DEBUG_MSG("Updating NTP time\n"); if (timeClient.update()) { @@ -89,6 +95,7 @@ static int32_t reconnectWiFi() DEBUG_MSG("NTP Update failed\n"); } } +#endif return 30 * 1000; // every 30 seconds } @@ -155,9 +162,11 @@ static void onNetworkConnected() MDNS.addService("https", "tcp", 443); } +#ifndef DISABLE_NTP DEBUG_MSG("Starting NTP time client\n"); timeClient.begin(); - timeClient.setUpdateInterval(60*60); // Update once an hour + timeClient.setUpdateInterval(60 * 60); // Update once an hour +#endif initWebServer(); initApiServer(); From 7e977aea005aebc8e128822a0548b07bb005343d Mon Sep 17 00:00:00 2001 From: Jm Casler Date: Wed, 13 Apr 2022 21:59:25 -0700 Subject: [PATCH 4/6] Add welcome screen feature toggle --- platformio.ini | 5 +++-- src/configuration.h | 2 ++ src/graphics/Screen.cpp | 2 ++ src/mesh/Router.cpp | 8 ++++---- 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/platformio.ini b/platformio.ini index ba5c37489..7bb2f339e 100644 --- a/platformio.ini +++ b/platformio.ini @@ -115,8 +115,9 @@ platform_packages = ;upload_port = /dev/ttyUSB0 ;monitor_port = /dev/ttyUSB0 -upload_port = /dev/cu.SLAB_USBtoUART -monitor_port = /dev/cu.SLAB_USBtoUART +; Please don't delete these lines. JM uses them. +;upload_port = /dev/cu.SLAB_USBtoUART +;monitor_port = /dev/cu.SLAB_USBtoUART ; customize the partition table ; http://docs.platformio.org/en/latest/platforms/espressif32.html#partition-tables diff --git a/src/configuration.h b/src/configuration.h index 59d0cf712..5de429c6b 100644 --- a/src/configuration.h +++ b/src/configuration.h @@ -131,6 +131,8 @@ along with this program. If not, see . //#define DISABLE_NTP +#define DISABLE_WELCOME_UNSET + // ----------------------------------------------------------------------------- // OLED & Input // ----------------------------------------------------------------------------- diff --git a/src/graphics/Screen.cpp b/src/graphics/Screen.cpp index 0ccc46b22..2b2cb7789 100644 --- a/src/graphics/Screen.cpp +++ b/src/graphics/Screen.cpp @@ -894,9 +894,11 @@ int32_t Screen::runOnce() showingBootScreen = false; } +#ifndef DISABLE_WELCOME_UNSET if (radioConfig.preferences.region == RegionCode_Unset) { setWelcomeFrames(); } +#endif // Process incoming commands. for (;;) { diff --git a/src/mesh/Router.cpp b/src/mesh/Router.cpp index 9487231fd..cb5bea03e 100644 --- a/src/mesh/Router.cpp +++ b/src/mesh/Router.cpp @@ -360,13 +360,13 @@ Routing_Error perhapsEncode(MeshPacket *p) } else { DEBUG_MSG("Compressing message.\n"); // Copy the compressed data into the meshpacket - p->decoded.payload_compressed.size = compressed_len; - memcpy(p->decoded.payload_compressed.bytes, compressed_out, compressed_len); + //p->decoded.payload_compressed.size = compressed_len; + //memcpy(p->decoded.payload_compressed.bytes, compressed_out, compressed_len); - p->decoded.which_payloadVariant = Data_payload_compressed_tag; + //p->decoded.which_payloadVariant = Data_payload_compressed_tag; } - if (0) { + if (1) { char decompressed_out[Constants_DATA_PAYLOAD_LEN] = {}; int decompressed_len; From a10ea604afce41bbaabf318deeb44acadb46164f Mon Sep 17 00:00:00 2001 From: Jm Casler Date: Thu, 14 Apr 2022 15:51:48 -0700 Subject: [PATCH 5/6] Fixes for cppcheck errors --- src/configuration.h | 2 ++ src/mesh/compression/unishox2.c | 51 ++++++++++++++++----------------- 2 files changed, 26 insertions(+), 27 deletions(-) diff --git a/src/configuration.h b/src/configuration.h index 5de429c6b..31c7f0422 100644 --- a/src/configuration.h +++ b/src/configuration.h @@ -129,8 +129,10 @@ along with this program. If not, see . // Feature toggles // ----------------------------------------------------------------------------- +// Disable use of the NTP library and related features //#define DISABLE_NTP +// Disable the welcome screen and allow #define DISABLE_WELCOME_UNSET // ----------------------------------------------------------------------------- diff --git a/src/mesh/compression/unishox2.c b/src/mesh/compression/unishox2.c index e9d02e8a5..0ca650bb1 100644 --- a/src/mesh/compression/unishox2.c +++ b/src/mesh/compression/unishox2.c @@ -129,11 +129,9 @@ void init_coder() { for (int i = 0; i < 3; i++) { for (int j = 0; j < 28; j++) { byte c = usx_sets[i][j]; - if (c != 0 && c > 32) { - usx_code_94[c - USX_OFFSET_94] = (i << 5) + j; - if (c >= 'a' && c <= 'z') - usx_code_94[c - USX_OFFSET_94 - ('a' - 'A')] = (i << 5) + j; - } + usx_code_94[c - USX_OFFSET_94] = (i << 5) + j; + if (c >= 'a' && c <= 'z') + usx_code_94[c - USX_OFFSET_94 - ('a' - 'A')] = (i << 5) + j; } } is_inited = 1; @@ -147,32 +145,31 @@ unsigned int usx_mask[] = {0x80, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE, 0xFF}; /// Otherwise clen bits in code are appended to out starting with MSB int append_bits(char *out, int olen, int ol, byte code, int clen) { - byte cur_bit; - byte blen; - unsigned char a_byte; - int oidx; //printf("%d,%x,%d,%d\n", ol, code, clen, state); while (clen > 0) { - cur_bit = ol % 8; - blen = clen; - a_byte = code & usx_mask[blen - 1]; - a_byte >>= cur_bit; - if (blen + cur_bit > 8) - blen = (8 - cur_bit); - oidx = ol / 8; - if (oidx < 0 || olen <= oidx) - return -1; - if (cur_bit == 0) - out[oidx] = a_byte; - else - out[oidx] |= a_byte; - code <<= blen; - ol += blen; - clen -= blen; - } - return ol; + int oidx; + unsigned char a_byte; + + byte cur_bit = ol % 8; + byte blen = clen; + a_byte = code & usx_mask[blen - 1]; + a_byte >>= cur_bit; + if (blen + cur_bit > 8) + blen = (8 - cur_bit); + oidx = ol / 8; + if (oidx < 0 || olen <= oidx) + return -1; + if (cur_bit == 0) + out[oidx] = a_byte; + else + out[oidx] |= a_byte; + code <<= blen; + ol += blen; + clen -= blen; + } + return ol; } /// This is a safe call to append_bits() making sure it does not write past olen From dc20cbb6725e3e1a1e38381c245e163f5aec6714 Mon Sep 17 00:00:00 2001 From: Jm Casler Date: Thu, 14 Apr 2022 15:57:31 -0700 Subject: [PATCH 6/6] one more fix for cppcheck --- src/mesh/compression/unishox2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesh/compression/unishox2.c b/src/mesh/compression/unishox2.c index 0ca650bb1..8e91185a9 100644 --- a/src/mesh/compression/unishox2.c +++ b/src/mesh/compression/unishox2.c @@ -973,7 +973,7 @@ int32_t readUnicode(const char *in, int *bit_no_p, int len) { if (idx == 99) return 0x7FFFFF00 + 99; if (idx == 5) { - int idx = getStepCodeIdx(in, len, bit_no_p, 4); + idx = getStepCodeIdx(in, len, bit_no_p, 4); return 0x7FFFFF00 + idx; } if (idx >= 0) {