Merge branch 'feature/esp32c3_idf_monitor_backtrace' into 'master'

tools: decode esp32c3 stack dumps in idf_monitor.py See merge request espressif/esp-idf!11713
2020-12-23 07:51:26 +08:00 · 2020-12-23 07:51:26 +08:00 · bcf34cce09
commit bcf34cce09
--- a/tools/gdb_panic_server.py
+++ b/tools/gdb_panic_server.py
@ -0,0 +1,295 @@
+#!/usr/bin/env python
+# coding=utf-8
+#
+# A script which parses ESP-IDF panic handler output (registers & stack dump),
+# and then acts as a GDB server over stdin/stdout, presenting the information
+# from the panic handler to GDB.
+# This allows for generating backtraces out of raw stack dumps on architectures
+# where backtracing on the target side is not possible.
+#
+# Note that the "act as a GDB server" approach is somewhat a hack.
+# A much nicer solution would have been to convert the panic handler output
+# into a core file, and point GDB to the core file.
+# However, RISC-V baremetal GDB currently lacks core dump support.
+#
+# The approach is inspired by Cesanta's ESP8266 GDB server:
+# https://github.com/cesanta/mongoose-os/blob/27777c8977/platforms/esp8266/tools/serve_core.py
+#
+# Copyright 2020 Espressif Systems (Shanghai) Co. Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+from builtins import bytes
+import argparse
+import struct
+import sys
+import logging
+import binascii
+from collections import namedtuple
+
+from pyparsing import Literal, Word, nums, OneOrMore, srange, Group, Combine
+
+# Used for type annotations only. Silence linter warnings.
+from pyparsing import ParseResults, ParserElement  # noqa: F401 # pylint: disable=unused-import
+
+try:
+    import typing  # noqa: F401 # pylint: disable=unused-import
+except ImportError:
+    pass
+
+# pyparsing helper
+hexnumber = srange("[0-9a-f]")
+
+
+# List of registers to be passed to GDB, in the order GDB expects.
+# The names should match those used in IDF panic handler.
+# Registers not present in IDF panic handler output (like X0) will be assumed to be 0.
+GDB_REGS_INFO_RISCV_ILP32 = [
+    "X0", "RA", "SP", "GP",
+    "TP", "T0", "T1", "T2",
+    "S0/FP", "S1", "A0", "A1",
+    "A2", "A3", "A4", "A5",
+    "A6", "A7", "S2", "S3",
+    "S4", "S5", "S6", "S7",
+    "S8", "S9", "S10", "S11",
+    "T3", "T4", "T5", "T6",
+    "MEPC"
+]
+
+
+GDB_REGS_INFO = {
+    "esp32c3": GDB_REGS_INFO_RISCV_ILP32
+}
+
+PanicInfo = namedtuple("PanicInfo", "core_id regs stack_base_addr stack_data")
+
+
+def build_riscv_panic_output_parser():  # type: () -> typing.Type[ParserElement]
+    """Builds a parser for the panic handler output using pyparsing"""
+
+    # We don't match the first line, since "Guru Meditation" will not be printed in case of an abort:
+    # Guru Meditation Error: Core  0 panic'ed (Store access fault). Exception was unhandled.
+
+    # Core  0 register dump:
+    reg_dump_header = Group(Literal("Core") +
+                            Word(nums)("core_id") +
+                            Literal("register dump:"))("reg_dump_header")
+
+    # MEPC    : 0x4200232c  RA      : 0x42009694  SP      : 0x3fc93a80  GP      : 0x3fc8b320
+    reg_name = Word(srange("[A-Z_0-9/-]"))("name")
+    hexnumber_with_0x = Combine(Literal("0x") + Word(hexnumber))
+    reg_value = hexnumber_with_0x("value")
+    reg_dump_one_reg = Group(reg_name + Literal(":") + reg_value)  # not named because there will be OneOrMore
+    reg_dump_all_regs = Group(OneOrMore(reg_dump_one_reg))("regs")
+    reg_dump = Group(reg_dump_header + reg_dump_all_regs)  # not named because there will be OneOrMore
+    reg_dumps = Group(OneOrMore(reg_dump))("reg_dumps")
+
+    # Stack memory:
+    # 3fc93a80: 0x00000030 0x00000021 0x3fc8aedc 0x4200232a 0xa5a5a5a5 0xa5a5a5a5 0x3fc8aedc 0x420099b0
+    stack_line = Group(Word(hexnumber)("base") + Literal(":") +
+                       Group(OneOrMore(hexnumber_with_0x))("data"))
+    stack_dump = Group(Literal("Stack memory:") +
+                       Group(OneOrMore(stack_line))("lines"))("stack_dump")
+
+    # Parser for the complete panic output:
+    panic_output = reg_dumps + stack_dump
+    return panic_output
+
+
+def get_stack_addr_and_data(res):  # type: (ParseResults) -> typing.Tuple[int, bytes]
+    """ Extract base address and bytes from the parsed stack dump """
+    stack_base_addr = 0  # First reported address in the dump
+    base_addr = 0  # keeps track of the address for the given line of the dump
+    bytes_in_line = 0  # bytes of stack parsed on the previous line; used to validate the next base address
+    stack_data = b""  # accumulates all the dumped stack data
+    for line in res.stack_dump.lines:
+        # update and validate the base address
+        prev_base_addr = base_addr
+        base_addr = int(line.base, 16)
+        if stack_base_addr == 0:
+            stack_base_addr = base_addr
+        else:
+            assert base_addr == prev_base_addr + bytes_in_line
+
+        # convert little-endian hex words to byte representation
+        words = [int(w, 16) for w in line.data]
+        line_data = b"".join([struct.pack("<I", w) for w in words])
+        bytes_in_line = len(line_data)
+
+        # accumulate in the whole stack data
+        stack_data += line_data
+
+    return stack_base_addr, stack_data
+
+
+def parse_idf_riscv_panic_output(panic_text):  # type: (str) -> PanicInfo
+    """ Decode panic handler output from a file """
+    panic_output = build_riscv_panic_output_parser()
+    results = panic_output.searchString(panic_text)
+    if len(results) != 1:
+        raise ValueError("Couldn't parse panic handler output")
+    res = results[0]
+
+    if len(res.reg_dumps) > 1:
+        raise NotImplementedError("Handling of multi-core register dumps not implemented")
+
+    # Build a dict of register names/values
+    rd = res.reg_dumps[0]
+    core_id = int(rd.reg_dump_header.core_id)
+    regs = dict()
+    for reg in rd.regs:
+        reg_value = int(reg.value, 16)
+        regs[reg.name] = reg_value
+
+    stack_base_addr, stack_data = get_stack_addr_and_data(res)
+
+    return PanicInfo(core_id=core_id,
+                     regs=regs,
+                     stack_base_addr=stack_base_addr,
+                     stack_data=stack_data)
+
+
+PANIC_OUTPUT_PARSERS = {
+    "esp32c3": parse_idf_riscv_panic_output
+}
+
+
+class GdbServer(object):
+    def __init__(self, panic_info, target, log_file=None):  # type: (PanicInfo, str, str) -> None
+        self.panic_info = panic_info
+        self.in_stream = sys.stdin
+        self.out_stream = sys.stdout
+        self.reg_list = GDB_REGS_INFO[target]
+
+        self.logger = logging.getLogger("GdbServer")
+        if log_file:
+            handler = logging.FileHandler(log_file, "w+")
+            self.logger.setLevel(logging.DEBUG)
+            formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
+            handler.setFormatter(formatter)
+            self.logger.addHandler(handler)
+
+    def run(self):  # type: () -> None
+        """ Process GDB commands from stdin until GDB tells us to quit """
+        buffer = ""
+        while True:
+            buffer += self.in_stream.read(1)
+            if len(buffer) > 3 and buffer[-3] == '#':
+                self._handle_command(buffer)
+                buffer = ""
+
+    def _handle_command(self, buffer):  # type: (str) -> None
+        command = buffer[1:-3]  # ignore checksums
+        # Acknowledge the command
+        self.out_stream.write("+")
+        self.out_stream.flush()
+        self.logger.debug("Got command: %s", command)
+        if command == "?":
+            # report sigtrap as the stop reason; the exact reason doesn't matter for backtracing
+            self._respond("T05")
+        elif command.startswith("Hg") or command.startswith("Hc"):
+            # Select thread command
+            self._respond("OK")
+        elif command == "qfThreadInfo":
+            # Get list of threads.
+            # Only one thread for now, can be extended to show one thread for each core,
+            # if we dump both cores (e.g. on an interrupt watchdog)
+            self._respond("m1")
+        elif command == "qC":
+            # That single thread is selected.
+            self._respond("QC1")
+        elif command == "g":
+            # Registers read
+            self._respond_regs()
+        elif command.startswith("m"):
+            # Memory read
+            addr, size = [int(v, 16) for v in command[1:].split(",")]
+            self._respond_mem(addr, size)
+        elif command.startswith("vKill") or command == "k":
+            # Quit
+            self._respond("OK")
+            raise SystemExit(0)
+        else:
+            # Empty response required for any unknown command
+            self._respond("")
+
+    def _respond(self, data):  # type: (str) -> None
+        # calculate checksum
+        data_bytes = bytes(data.encode("ascii"))  # bytes() for Py2 compatibility
+        checksum = sum(data_bytes) & 0xff
+        # format and write the response
+        res = "${}#{:02x}".format(data, checksum)
+        self.logger.debug("Wrote: %s", res)
+        self.out_stream.write(res)
+        self.out_stream.flush()
+        # get the result ('+' or '-')
+        ret = self.in_stream.read(1)
+        self.logger.debug("Response: %s", ret)
+        if ret != '+':
+            sys.stderr.write("GDB responded with '-' to {}".format(res))
+            raise SystemExit(1)
+
+    def _respond_regs(self):  # type: () -> None
+        response = ""
+        for reg_name in self.reg_list:
+            # register values are reported as hexadecimal strings
+            # in target byte order (i.e. LSB first for RISC-V)
+            reg_val = self.panic_info.regs.get(reg_name, 0)
+            reg_bytes = struct.pack("<L", reg_val)
+            response += binascii.hexlify(reg_bytes).decode("ascii")
+        self._respond(response)
+
+    def _respond_mem(self, start_addr, size):  # type: (int, int) -> None
+        stack_addr_min = self.panic_info.stack_base_addr
+        stack_data = self.panic_info.stack_data
+        stack_len = len(self.panic_info.stack_data)
+        stack_addr_max = stack_addr_min + stack_len
+
+        # For any memory address that is not on the stack, pretend the value is 0x00.
+        # GDB should never ask us for program memory, it will be obtained from the ELF file.
+        def in_stack(addr):
+            return stack_addr_min <= addr < stack_addr_max
+
+        result = ""
+        for addr in range(start_addr, start_addr + size):
+            if not in_stack(addr):
+                result += "00"
+            else:
+                result += "{:02x}".format(stack_data[addr - stack_addr_min])
+
+        self._respond(result)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("input_file", type=argparse.FileType("r"),
+                        help="File containing the panic handler output")
+    parser.add_argument("--target", choices=GDB_REGS_INFO.keys(),
+                        help="Chip to use (determines the architecture)")
+    parser.add_argument("--gdb-log", default=None,
+                        help="If specified, the file for logging GDB server debug information")
+    args = parser.parse_args()
+
+    panic_info = PANIC_OUTPUT_PARSERS[args.target](args.input_file.read())
+
+    server = GdbServer(panic_info, target=args.target, log_file=args.gdb_log)
+    try:
+        server.run()
+    except KeyboardInterrupt:
+        sys.exit(0)
+
+
+if __name__ == "__main__":
+    main()
--- a/tools/idf_monitor.py
+++ b/tools/idf_monitor.py
@ -137,6 +137,19 @@ COREDUMP_DONE = 2
 COREDUMP_DECODE_DISABLE = "disable"
 COREDUMP_DECODE_INFO = "info"

+# panic handler related messages
+PANIC_START = r"Core \s*\d+ register dump:"
+PANIC_END = b"ELF file SHA256:"
+PANIC_STACK_DUMP = b"Stack memory:"
+
+# panic handler decoding states
+PANIC_IDLE = 0
+PANIC_READING = 1
+
+# panic handler decoding options
+PANIC_DECODE_DISABLE = "disable"
+PANIC_DECODE_BACKTRACE = "backtrace"
+

 class StoppableThread(object):
    """
@ -486,6 +499,8 @@ class Monitor(object):
    def __init__(self, serial_instance, elf_file, print_filter, make="make", encrypted=False,
                 toolchain_prefix=DEFAULT_TOOLCHAIN_PREFIX, eol="CRLF",
                 decode_coredumps=COREDUMP_DECODE_INFO,
+                 decode_panic=PANIC_DECODE_DISABLE,
+                 target=None,
                 websocket_client=None):
        super(Monitor, self).__init__()
        self.event_queue = queue.Queue()
@ -519,6 +534,7 @@ class Monitor(object):
        self.encrypted = encrypted
        self.toolchain_prefix = toolchain_prefix
        self.websocket_client = websocket_client
+        self.target = target

        # internal state
        self._last_line_part = b""
@ -533,6 +549,9 @@ class Monitor(object):
        self._decode_coredumps = decode_coredumps
        self._reading_coredump = COREDUMP_IDLE
        self._coredump_buffer = b""
+        self._decode_panic = decode_panic
+        self._reading_panic = PANIC_IDLE
+        self._panic_buffer = b""

    def invoke_processing_last_line(self):
        self.event_queue.put((TAG_SERIAL_FLUSH, b''), False)
@ -602,6 +621,7 @@ class Monitor(object):
            if line != b"":
                if self._serial_check_exit and line == self.console_parser.exit_key.encode('latin-1'):
                    raise SerialStopException()
+                self.check_panic_decode_trigger(line)
                self.check_coredump_trigger_before_print(line)
                if self._force_line_print or self._line_matcher.match(line.decode(errors="ignore")):
                    self._print(line + b'\n')
@ -806,6 +826,59 @@ class Monitor(object):
                except OSError as e:
                    yellow_print("Couldn't remote temporary core dump file ({})".format(e))

+    def check_panic_decode_trigger(self, line):
+        if self._decode_panic == PANIC_DECODE_DISABLE:
+            return
+
+        if self._reading_panic == PANIC_IDLE and re.search(PANIC_START, line.decode("ascii", errors='ignore')):
+            self._reading_panic = PANIC_READING
+            yellow_print("Stack dump detected")
+
+        if self._reading_panic == PANIC_READING and PANIC_STACK_DUMP in line:
+            self._output_enabled = False
+
+        if self._reading_panic == PANIC_READING:
+            self._panic_buffer += line.replace(b'\r', b'') + b'\n'
+
+        if self._reading_panic == PANIC_READING and PANIC_END in line:
+            self._reading_panic = PANIC_IDLE
+            self._output_enabled = True
+            self.process_panic_output(self._panic_buffer)
+            self._panic_buffer = b""
+
+    def process_panic_output(self, panic_output):
+        panic_output_decode_script = os.path.join(os.path.dirname(__file__), "..", "tools", "gdb_panic_server.py")
+        panic_output_file = None
+        try:
+            # On Windows, the temporary file can't be read unless it is closed.
+            # Set delete=False and delete the file manually later.
+            with tempfile.NamedTemporaryFile(mode="wb", delete=False) as panic_output_file:
+                panic_output_file.write(panic_output)
+                panic_output_file.flush()
+
+            cmd = [self.toolchain_prefix + "gdb",
+                   "--batch", "-n",
+                   self.elf_file,
+                   "-ex", "target remote | \"{python}\" \"{script}\" --target {target} \"{output_file}\""
+                   .format(python=sys.executable,
+                           script=panic_output_decode_script,
+                           target=self.target,
+                           output_file=panic_output_file.name),
+                   "-ex", "bt"]
+
+            output = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
+            yellow_print("\nBacktrace:\n\n")
+            self._print(output)
+        except subprocess.CalledProcessError as e:
+            yellow_print("Failed to run gdb_panic_server.py script: {}\n{}\n\n".format(e, e.output))
+            self._print(panic_output)
+        finally:
+            if panic_output_file is not None:
+                try:
+                    os.unlink(panic_output_file.name)
+                except OSError as e:
+                    yellow_print("Couldn't remove temporary panic output file ({})".format(e))
+
    def run_gdb(self):
        with self:  # disable console control
            sys.stderr.write(ANSI_NORMAL)
@ -981,6 +1054,19 @@ def main():
        help="Handling of core dumps found in serial output"
    )

+    parser.add_argument(
+        '--decode-panic',
+        choices=[PANIC_DECODE_BACKTRACE, PANIC_DECODE_DISABLE],
+        default=PANIC_DECODE_DISABLE,
+        help="Handling of panic handler info found in serial output"
+    )
+
+    parser.add_argument(
+        '--target',
+        required=False,
+        help="Target name (used when stack dump decoding is enabled)"
+    )
+
    parser.add_argument(
        '--ws',
        default=os.environ.get('ESP_IDF_MONITOR_WS', None),
@ -1029,7 +1115,7 @@ def main():
    try:
        monitor = Monitor(serial_instance, args.elf_file.name, args.print_filter, args.make, args.encrypted,
                          args.toolchain_prefix, args.eol,
-                          args.decode_coredumps,
+                          args.decode_coredumps, args.decode_panic, args.target,
                          ws)

        yellow_print('--- idf_monitor on {p.name} {p.baudrate} ---'.format(
--- a/tools/idf_py_actions/serial_ext.py
+++ b/tools/idf_py_actions/serial_ext.py
@ -94,6 +94,10 @@ def action_extensions(base_actions, project_path):
        if coredump_decode is not None:
            monitor_args += ["--decode-coredumps", coredump_decode]

+        target_arch_riscv = get_sdkconfig_value(project_desc["config_file"], "CONFIG_IDF_TARGET_ARCH_RISCV")
+        if target_arch_riscv:
+            monitor_args += ["--decode-panic", "backtrace", "--target", project_desc["target"]]
+
        if print_filter is not None:
            monitor_args += ["--print_filter", print_filter]
        monitor_args += [elf_file]