Fixes for #4395: nrf52 flash filesystem reliability (#4406)

* bug #4184: fix config file loss due to filesystem write errors
* Use SafeFile for atomic file writing (with xor checksum readback)
* Write db.proto last because it could be the largest file on the FS (and less critical)
* Don't keep a tmp file around while writing db.proto (because too big to fit two files in the filesystem)
* generate a new critial fault if we encounter errors writing to flash
either CriticalErrorCode_FLASH_CORRUPTION_RECOVERABLE or CriticalErrorCode_FLASH_CORRUPTION_UNRECOVERABLE
(depending on if the second write attempt worked)
* reformat the filesystem if we detect it is corrupted (then rewrite our config files) (only on nrf52 - not sure
yet if we should bother on ESP32)
* If we have to format the FS, make sure to preserve the oem.proto if it exists

* add logLegacy() so old C code in libs can log via our logging

* move filesList() to a better location (used only in developer builds)

* Reformat with "trunk fmt" to match our coding conventions

* for #4395: don't use .exists() to before attempting file open
If a LFS filesystem is corrupted, .exists() can fail when a mere .open()
attempt would have succeeded.  Therefore better to do the .open() in hopes that
we can read the file (in case we need to reformat to fix the FS).
(Seen and confirmed in stress testing)

* for #4395 more fixes, see below for details:
* check for LFS assertion failures during file operations (needs customized lfs_util.h to provide suitable hooks)
* Remove fsCheck() because checking filesystem by writing to it is very high risk, it makes likelyhood that we will
be able to read the config protobufs quite low.
* Update the LFS inside of adafruitnrf52 to 1.7.2 (from their old 1.6.1) to get the following fix:
97d8d5e96a

* use disable_adafruit_usb.py now that we are (temporarily?) using a forked adafruit lib
We need to reach inside the adafruit project and turn off USE_TINYUSB, just doing that
from platformio.ini is no longer sufficient.

Tested on a wio-sdk-wm1110 board (which is the only board that had this problem)

---------

Co-authored-by: Ben Meadors <benmmeadors@gmail.com>
pull/4442/head^2
geeksville 2024-08-13 04:45:39 -07:00 zatwierdzone przez GitHub
rodzic 6e8300287b
commit 62a0321c7d
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: B5690EEEBB952194
11 zmienionych plików z 285 dodań i 155 usunięć

Wyświetl plik

@ -0,0 +1,208 @@
/*
* lfs utility functions
*
* Copyright (c) 2017, Arm Limited. All rights reserved.
* SPDX-License-Identifier: BSD-3-Clause
*/
// MESHTASTIC/@geeksville note: This file is copied from the Adafruit nrf52 arduino lib. And we use a special -include in
// nrf52.ini to load it before EVERY file we do this hack because the default definitions for LFS_ASSERT are quite poor and we
// don't want to fork the adafruit lib (again) and send in a PR that they probably won't merge anyways. This file might break if
// they ever update lfs.util on their side, in which case we'll need to update this file to match their new version. The version
// this is a copy from is almost exactly
// https://github.com/adafruit/Adafruit_nRF52_Arduino/blob/c25d93268a3b9c23e9a1ccfcaf9b208beca624ca/libraries/Adafruit_LittleFS/src/littlefs/lfs_util.h
#ifndef LFS_UTIL_H
#define LFS_UTIL_H
// Users can override lfs_util.h with their own configuration by defining
// LFS_CONFIG as a header file to include (-DLFS_CONFIG=lfs_config.h).
//
// If LFS_CONFIG is used, none of the default utils will be emitted and must be
// provided by the config file. To start I would suggest copying lfs_util.h and
// modifying as needed.
#ifdef LFS_CONFIG
#define LFS_STRINGIZE(x) LFS_STRINGIZE2(x)
#define LFS_STRINGIZE2(x) #x
#include LFS_STRINGIZE(LFS_CONFIG)
#else
// System includes
#include <stdbool.h>
#include <stdint.h>
#include <string.h>
#ifndef LFS_NO_MALLOC
#include <stdlib.h>
#endif
#ifndef LFS_NO_ASSERT
#include <assert.h>
#endif
#if !defined(LFS_NO_DEBUG) || !defined(LFS_NO_WARN) || !defined(LFS_NO_ERROR)
#include <stdio.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
// Macros, may be replaced by system specific wrappers. Arguments to these
// macros must not have side-effects as the macros can be removed for a smaller
// code footprint
// Logging functions
#ifndef LFS_NO_DEBUG
void logLegacy(const char *level, const char *fmt, ...);
#define LFS_DEBUG(fmt, ...) logLegacy("DEBUG", "lfs debug:%d: " fmt "\n", __LINE__, __VA_ARGS__)
#else
#define LFS_DEBUG(fmt, ...)
#endif
#ifndef LFS_NO_WARN
#define LFS_WARN(fmt, ...) logLegacy("WARN", "lfs warn:%d: " fmt "\n", __LINE__, __VA_ARGS__)
#else
#define LFS_WARN(fmt, ...)
#endif
#ifndef LFS_NO_ERROR
#define LFS_ERROR(fmt, ...) logLegacy("ERROR", "lfs error:%d: " fmt "\n", __LINE__, __VA_ARGS__)
#else
#define LFS_ERROR(fmt, ...)
#endif
// Runtime assertions
#ifndef LFS_NO_ASSERT
#define LFS_ASSERT(test) assert(test)
#else
extern void lfs_assert(const char *reason);
#define LFS_ASSERT(test) \
if (!(test)) \
lfs_assert(#test)
#endif
// Builtin functions, these may be replaced by more efficient
// toolchain-specific implementations. LFS_NO_INTRINSICS falls back to a more
// expensive basic C implementation for debugging purposes
// Min/max functions for unsigned 32-bit numbers
static inline uint32_t lfs_max(uint32_t a, uint32_t b)
{
return (a > b) ? a : b;
}
static inline uint32_t lfs_min(uint32_t a, uint32_t b)
{
return (a < b) ? a : b;
}
// Find the next smallest power of 2 less than or equal to a
static inline uint32_t lfs_npw2(uint32_t a)
{
#if !defined(LFS_NO_INTRINSICS) && (defined(__GNUC__) || defined(__CC_ARM))
return 32 - __builtin_clz(a - 1);
#else
uint32_t r = 0;
uint32_t s;
a -= 1;
s = (a > 0xffff) << 4;
a >>= s;
r |= s;
s = (a > 0xff) << 3;
a >>= s;
r |= s;
s = (a > 0xf) << 2;
a >>= s;
r |= s;
s = (a > 0x3) << 1;
a >>= s;
r |= s;
return (r | (a >> 1)) + 1;
#endif
}
// Count the number of trailing binary zeros in a
// lfs_ctz(0) may be undefined
static inline uint32_t lfs_ctz(uint32_t a)
{
#if !defined(LFS_NO_INTRINSICS) && defined(__GNUC__)
return __builtin_ctz(a);
#else
return lfs_npw2((a & -a) + 1) - 1;
#endif
}
// Count the number of binary ones in a
static inline uint32_t lfs_popc(uint32_t a)
{
#if !defined(LFS_NO_INTRINSICS) && (defined(__GNUC__) || defined(__CC_ARM))
return __builtin_popcount(a);
#else
a = a - ((a >> 1) & 0x55555555);
a = (a & 0x33333333) + ((a >> 2) & 0x33333333);
return (((a + (a >> 4)) & 0xf0f0f0f) * 0x1010101) >> 24;
#endif
}
// Find the sequence comparison of a and b, this is the distance
// between a and b ignoring overflow
static inline int lfs_scmp(uint32_t a, uint32_t b)
{
return (int)(unsigned)(a - b);
}
// Convert from 32-bit little-endian to native order
static inline uint32_t lfs_fromle32(uint32_t a)
{
#if !defined(LFS_NO_INTRINSICS) && ((defined(BYTE_ORDER) && BYTE_ORDER == ORDER_LITTLE_ENDIAN) || \
(defined(__BYTE_ORDER) && __BYTE_ORDER == __ORDER_LITTLE_ENDIAN) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__))
return a;
#elif !defined(LFS_NO_INTRINSICS) && \
((defined(BYTE_ORDER) && BYTE_ORDER == ORDER_BIG_ENDIAN) || (defined(__BYTE_ORDER) && __BYTE_ORDER == __ORDER_BIG_ENDIAN) || \
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))
return __builtin_bswap32(a);
#else
return (((uint8_t *)&a)[0] << 0) | (((uint8_t *)&a)[1] << 8) | (((uint8_t *)&a)[2] << 16) | (((uint8_t *)&a)[3] << 24);
#endif
}
// Convert to 32-bit little-endian from native order
static inline uint32_t lfs_tole32(uint32_t a)
{
return lfs_fromle32(a);
}
// Calculate CRC-32 with polynomial = 0x04c11db7
void lfs_crc(uint32_t *crc, const void *buffer, size_t size);
// Allocate memory, only used if buffers are not provided to littlefs
static inline void *lfs_malloc(size_t size)
{
#ifndef LFS_NO_MALLOC
extern void *pvPortMalloc(size_t xWantedSize);
return pvPortMalloc(size);
#else
(void)size;
return NULL;
#endif
}
// Deallocate memory, only used if buffers are not provided to littlefs
static inline void lfs_free(void *p)
{
#ifndef LFS_NO_MALLOC
extern void vPortFree(void *pv);
vPortFree(p);
#else
(void)p;
#endif
}
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif
#endif

Wyświetl plik

@ -2,9 +2,13 @@
; Instead of the standard nordicnrf52 platform, we use our fork which has our added variant files
platform = platformio/nordicnrf52@^10.5.0
extends = arduino_base
platform_packages =
; our custom Git version until they merge our PR
framework-arduinoadafruitnrf52 @ https://github.com/geeksville/Adafruit_nRF52_Arduino.git
build_type = debug
build_flags =
build_flags =
-include arch/nrf52/cpp_overrides/lfs_util.h
${arduino_base.build_flags}
-DSERIAL_BUFFER_SIZE=1024
-Wno-unused-variable

Wyświetl plik

@ -5,16 +5,24 @@ Import("env")
# NOTE: This is not currently used, but can serve as an example on how to write extra_scripts
print("Current CLI targets", COMMAND_LINE_TARGETS)
print("Current Build targets", BUILD_TARGETS)
print("CPP defs", env.get("CPPDEFINES"))
# print("Current CLI targets", COMMAND_LINE_TARGETS)
# print("Current Build targets", BUILD_TARGETS)
# print("CPP defs", env.get("CPPDEFINES"))
# print(env.Dump())
# Adafruit.py in the platformio build tree is a bit naive and always enables their USB stack for building. We don't want this.
# So come in after that python script has run and disable it. This hack avoids us having to fork that big project and send in a PR
# which might not be accepted. -@geeksville
env["CPPDEFINES"].remove("USBCON")
env["CPPDEFINES"].remove("USE_TINYUSB")
lib_builders = env.get("__PIO_LIB_BUILDERS", None)
if lib_builders is not None:
print("Disabling Adafruit USB stack")
for k in lib_builders:
if k.name == "Adafruit TinyUSB Library":
libenv = k.env
# print(f"{k.name }: { libenv.Dump() } ")
# libenv["CPPDEFINES"].remove("USBCON")
libenv["CPPDEFINES"].remove("USE_TINYUSB")
# Custom actions when building program/firmware
# env.AddPreAction("buildprog", callback...)

Wyświetl plik

@ -18,10 +18,7 @@ import subprocess
import sys
from platformio.project.exception import PlatformioException
from platformio.public import (
DeviceMonitorFilterBase,
load_build_metadata,
)
from platformio.public import DeviceMonitorFilterBase, load_build_metadata
# By design, __init__ is called inside miniterm and we can't pass context to it.
# pylint: disable=attribute-defined-outside-init
@ -32,7 +29,7 @@ IS_WINDOWS = sys.platform.startswith("win")
class Esp32C3ExceptionDecoder(DeviceMonitorFilterBase):
NAME = "esp32_c3_exception_decoder"
PCADDR_PATTERN = re.compile(r'0x4[0-9a-f]{7}', re.IGNORECASE)
PCADDR_PATTERN = re.compile(r"0x4[0-9a-f]{7}", re.IGNORECASE)
def __call__(self):
self.buffer = ""
@ -75,14 +72,14 @@ See https://docs.platformio.org/page/projectconf/build_configurations.html
% self.__class__.__name__
)
return False
if not os.path.isfile(self.addr2line_path):
sys.stderr.write(
"%s: disabling, addr2line at %s does not exist\n"
% (self.__class__.__name__, self.addr2line_path)
)
return False
return True
except PlatformioException as e:
sys.stderr.write(
@ -117,7 +114,7 @@ See https://docs.platformio.org/page/projectconf/build_configurations.html
trace = self.get_backtrace(m)
if len(trace) != "":
text = text[: last] + trace + text[last :]
text = text[:last] + trace + text[last:]
last += len(trace)
return text
@ -125,14 +122,10 @@ See https://docs.platformio.org/page/projectconf/build_configurations.html
def get_backtrace(self, match):
trace = "\n"
enc = "mbcs" if IS_WINDOWS else "utf-8"
args = [self.addr2line_path, u"-fipC", u"-e", self.firmware_path]
args = [self.addr2line_path, "-fipC", "-e", self.firmware_path]
try:
addr = match.group()
output = (
subprocess.check_output(args + [addr])
.decode(enc)
.strip()
)
output = subprocess.check_output(args + [addr]).decode(enc).strip()
output = output.replace(
"\n", "\n "
) # newlines happen with inlined methods

Wyświetl plik

@ -26,6 +26,16 @@ SOFTWARE.*/
#include "DebugConfiguration.h"
/// A C wrapper for LOG_DEBUG that can be used from arduino C libs that don't know about C++ or meshtastic
extern "C" void logLegacy(const char *level, const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
if (console)
console->vprintf(level, fmt, args);
va_end(args);
}
#if HAS_NETWORKING
Syslog::Syslog(UDP &client)
@ -169,4 +179,4 @@ inline bool Syslog::_sendLog(uint16_t pri, const char *appName, const char *mess
return true;
}
#endif
#endif

Wyświetl plik

@ -62,6 +62,9 @@
#endif
#endif
/// A C wrapper for LOG_DEBUG that can be used from arduino C libs that don't know about C++ or meshtastic
extern "C" void logLegacy(const char *level, const char *fmt, ...);
#define SYSLOG_NILVALUE "-"
#define SYSLOG_CRIT 2 /* critical conditions */

Wyświetl plik

@ -48,6 +48,15 @@ void OSFS::writeNBytes(uint16_t address, unsigned int num, const byte *input)
}
#endif
bool lfs_assert_failed =
false; // Note: we use this global on all platforms, though it can only be set true on nrf52 (in our modified lfs_util.h)
extern "C" void lfs_assert(const char *reason)
{
LOG_ERROR("LFS assert: %s\n", reason);
lfs_assert_failed = true;
}
/**
* @brief Copies a file from one location to another.
*
@ -199,7 +208,7 @@ std::vector<meshtastic_FileInfo> getFiles(const char *dirname, uint8_t levels)
* @param levels The number of levels of subdirectories to list.
* @param del Whether or not to delete the contents of the directory after listing.
*/
void listDir(const char *dirname, uint8_t levels, bool del = false)
void listDir(const char *dirname, uint8_t levels, bool del)
{
#ifdef FSCom
#if (defined(ARCH_ESP32) || defined(ARCH_RP2040) || defined(ARCH_PORTDUINO))
@ -214,7 +223,9 @@ void listDir(const char *dirname, uint8_t levels, bool del = false)
}
File file = root.openNextFile();
while (file) {
while (
file &&
file.name()[0]) { // This file.name() check is a workaround for a bug in the Adafruit LittleFS nrf52 glue (see issue 4395)
if (file.isDirectory() && !String(file.name()).endsWith(".")) {
if (levels) {
#ifdef ARCH_ESP32
@ -313,62 +324,6 @@ void rmDir(const char *dirname)
#endif
}
bool fsCheck()
{
#if defined(ARCH_NRF52)
size_t write_size = 0;
size_t read_size = 0;
char buf[32] = {0};
Adafruit_LittleFS_Namespace::File file(FSCom);
const char *text = "meshtastic fs test";
size_t text_length = strlen(text);
const char *filename = "/meshtastic.txt";
LOG_DEBUG("Try create file .\n");
if (file.open(filename, FILE_O_WRITE)) {
write_size = file.write(text);
} else {
LOG_DEBUG("Open file failed .\n");
goto FORMAT_FS;
}
if (write_size != text_length) {
LOG_DEBUG("Text bytes do not match .\n");
file.close();
goto FORMAT_FS;
}
file.close();
if (!file.open(filename, FILE_O_READ)) {
LOG_DEBUG("Open file failed .\n");
goto FORMAT_FS;
}
read_size = file.readBytes(buf, text_length);
if (read_size != text_length) {
LOG_DEBUG("Text bytes do not match .\n");
file.close();
goto FORMAT_FS;
}
if (memcmp(buf, text, text_length) != 0) {
LOG_DEBUG("The written bytes do not match the read bytes .\n");
file.close();
goto FORMAT_FS;
}
return true;
FORMAT_FS:
LOG_DEBUG("Format FS ....\n");
FSCom.format();
FSCom.begin();
return false;
#else
return true;
#endif
}
void fsInit()
{
#ifdef FSCom
@ -378,35 +333,6 @@ void fsInit()
}
#if defined(ARCH_ESP32)
LOG_DEBUG("Filesystem files (%d/%d Bytes):\n", FSCom.usedBytes(), FSCom.totalBytes());
#elif defined(ARCH_NRF52)
/*
* nRF52840 has a certain chance of automatic formatting failure.
* Try to create a file after initializing the file system. If the creation fails,
* it means that the file system is not working properly. Please format it manually again.
* To check the normality of the file system, you need to disable the LFS_NO_ASSERT assertion.
* Otherwise, the assertion will be entered at the moment of reading or opening, and the FS will not be formatted.
* */
bool ret = false;
uint8_t retry = 3;
while (retry--) {
ret = fsCheck();
if (ret) {
LOG_DEBUG("File system check is OK.\n");
break;
}
delay(10);
}
// It may not be possible to reach this step.
// Add a loop here to prevent unpredictable situations from happening.
// Can add a screen to display error status later.
if (!ret) {
while (1) {
LOG_ERROR("The file system is damaged and cannot proceed to the next step.\n");
delay(1000);
}
}
#else
LOG_DEBUG("Filesystem files:\n");
#endif

Wyświetl plik

@ -51,9 +51,13 @@ using namespace Adafruit_LittleFS_Namespace;
#endif
void fsInit();
void fsListFiles();
bool copyFile(const char *from, const char *to);
bool renameFile(const char *pathFrom, const char *pathTo);
std::vector<meshtastic_FileInfo> getFiles(const char *dirname, uint8_t levels);
void listDir(const char *dirname, uint8_t levels, bool del);
void listDir(const char *dirname, uint8_t levels, bool del = false);
void rmDir(const char *dirname);
void setupSDCard();
void setupSDCard();
extern bool lfs_assert_failed; // Note: we use this global on all platforms, though it can only be set true on nrf52 (in our
// modified lfs_util.h)

Wyświetl plik

@ -11,6 +11,9 @@ static File openFile(const char *filename, bool fullAtomic)
String filenameTmp = filename;
filenameTmp += ".tmp";
// clear any previous LFS errors
lfs_assert_failed = false;
return FSCom.open(filenameTmp.c_str(), FILE_O_WRITE);
}
@ -73,6 +76,9 @@ bool SafeFile::close()
/// Read our (closed) tempfile back in and compare the hash
bool SafeFile::testReadback()
{
bool lfs_failed = lfs_assert_failed;
lfs_assert_failed = false;
String filenameTmp = filename;
filenameTmp += ".tmp";
auto f2 = FSCom.open(filenameTmp.c_str(), FILE_O_READ);
@ -93,7 +99,7 @@ bool SafeFile::testReadback()
return false;
}
return true;
return !lfs_failed;
}
#endif

Wyświetl plik

@ -56,27 +56,6 @@ meshtastic_ChannelFile channelFile;
meshtastic_OEMStore oemStore;
static bool hasOemStore = false;
// These are not publically exposed - copied from InternalFileSystem.cpp
// #define FLASH_NRF52_PAGE_SIZE 4096
// #define LFS_FLASH_TOTAL_SIZE (7*FLASH_NRF52_PAGE_SIZE)
// #define LFS_BLOCK_SIZE 128
/// List all files in the FS and test write and readback.
/// Useful for filesystem stress testing - normally stripped from build by the linker.
void flashTest()
{
auto filesManifest = getFiles("/", 5);
uint32_t totalSize = 0;
for (size_t i = 0; i < filesManifest.size(); i++) {
LOG_INFO("File %s (size %d)\n", filesManifest[i].file_name, filesManifest[i].size_bytes);
totalSize += filesManifest[i].size_bytes;
}
LOG_INFO("%d files (total size %u)\n", filesManifest.size(), totalSize);
// LOG_INFO("Filesystem block size %u, total bytes %u", LFS_FLASH_TOTAL_SIZE, LFS_BLOCK_SIZE);
nodeDB->saveToDisk();
}
bool meshtastic_DeviceState_callback(pb_istream_t *istream, pb_ostream_t *ostream, const pb_field_iter_t *field)
{
if (ostream) {
@ -617,11 +596,6 @@ LoadFileResult NodeDB::loadProto(const char *filename, size_t protoSize, size_t
LoadFileResult state = LoadFileResult::OTHER_FAILURE;
#ifdef FSCom
if (!FSCom.exists(filename)) {
LOG_INFO("File %s not found\n", filename);
return LoadFileResult::NOT_FOUND;
}
auto f = FSCom.open(filename, FILE_O_READ);
if (f) {

Wyświetl plik

@ -3,29 +3,23 @@
extends = nrf52840_base
board = wio-sdk-wm1110
extra_scripts =
bin/platformio-custom.py
extra_scripts/disable_adafruit_usb.py
# Remove adafruit USB serial from the build (it is incompatible with using the ch340 serial chip on this board)
build_unflags = ${nrf52840_base:build_unflags} -DUSBCON -DUSE_TINYUSB
build_flags = ${nrf52840_base.build_flags} -Ivariants/wio-sdk-wm1110 -Isrc/platform/nrf52/softdevice -Isrc/platform/nrf52/softdevice/nrf52 -DWIO_WM1110
-L "${platformio.libdeps_dir}/${this.__env__}/bsec2/src/cortex-m4/fpv4-sp-d16-hard"
-DGPS_POWER_TOGGLE ; comment this line to disable triple press function on the user button to turn off gps entirely.
-DCFG_TUD_CDC=0
board_build.ldscript = src/platform/nrf52/nrf52840_s140_v7.ld
build_src_filter = ${nrf52_base.build_src_filter} +<../variants/wio-sdk-wm1110>
lib_deps =
${nrf52840_base.lib_deps}
debug_tool = jlink
;debug_tool = stlink
;debug_speed = 4000
; No need to reflash if the binary hasn't changed
debug_load_mode = modified
; If not set we will default to uploading over serial (first it forces bootloader entry by talking 1200bps to cdcacm)
upload_protocol = nrfutil
;upload_protocol = stlink
; we prefer to stop in setup() because we are an 'ardiuno' app
debug_init_break = tbreak setup
; we need to turn off BLE/soft device if we are debugging otherwise it will watchdog reset us.
debug_extra_cmds =
echo Running .gdbinit script
commands 1
set useSoftDevice = false
end
;debug_tool = jlink
debug_tool = stlink
; No need to reflash if the binary hasn't changed
; If not set we will default to uploading over serial (first it forces bootloader entry by talking 1200bps to cdcacm)
;upload_protocol = nrfutil
;upload_protocol = stlink