From 16b89c3c8efff416ee0581aedee22d8f07ef2367 Mon Sep 17 00:00:00 2001 From: Jeff Epler Date: Thu, 8 Feb 2024 16:56:52 -0800 Subject: [PATCH 1/2] nanbox_smallint: Fix incorrect use of int() in test. The literal is in base 16 but int()'s default radix in cpython is 10, not 0. Signed-off-by: Jeff Epler --- tests/basics/nanbox_smallint.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/basics/nanbox_smallint.py b/tests/basics/nanbox_smallint.py index b3a502e447..9451ab3284 100644 --- a/tests/basics/nanbox_smallint.py +++ b/tests/basics/nanbox_smallint.py @@ -23,17 +23,17 @@ if float("1e100") == float("inf"): raise SystemExit micropython.heap_lock() -print(int("0x80000000")) +print(int("0x80000000", 16)) micropython.heap_unlock() # This is the most positive small integer. micropython.heap_lock() -print(int("0x3fffffffffff")) +print(int("0x3fffffffffff", 16)) micropython.heap_unlock() # This is the most negative small integer. micropython.heap_lock() -print(int("-0x3fffffffffff") - 1) +print(int("-0x3fffffffffff", 16) - 1) micropython.heap_unlock() x = 1 From 7568b3b10527cf47708d3528f234fb245620aa08 Mon Sep 17 00:00:00 2001 From: Jeff Epler Date: Wed, 3 Jan 2024 19:31:35 -0600 Subject: [PATCH 2/2] core: Throw an exception for invalid int literals like "01". This includes making int("01") parse in base 10 like standard Python. The new error message is different from cpython. It says e.g., `SyntaxError: invalid syntax for integer with base 0: '09'` Signed-off-by: Jeff Epler --- py/objint.c | 2 +- py/parsenum.c | 3 +++ py/parsenumbase.c | 14 ++++++++++++++ tests/basics/int1.py | 3 +++ tests/basics/lexer.py | 8 ++++++++ 5 files changed, 29 insertions(+), 1 deletion(-) diff --git a/py/objint.c b/py/objint.c index be5f4653a7..2fc102a5f5 100644 --- a/py/objint.c +++ b/py/objint.c @@ -55,7 +55,7 @@ STATIC mp_obj_t mp_obj_int_make_new(const mp_obj_type_t *type_in, size_t n_args, return o; } else if (mp_get_buffer(args[0], &bufinfo, MP_BUFFER_READ)) { // a textual representation, parse it - return mp_parse_num_integer(bufinfo.buf, bufinfo.len, 0, NULL); + return mp_parse_num_integer(bufinfo.buf, bufinfo.len, 10, NULL); #if MICROPY_PY_BUILTINS_FLOAT } else if (mp_obj_is_float(args[0])) { return mp_obj_new_int_from_float(mp_obj_float_get(args[0])); diff --git a/py/parsenum.c b/py/parsenum.c index e3ad8070c6..da842afe2e 100644 --- a/py/parsenum.c +++ b/py/parsenum.c @@ -78,6 +78,9 @@ mp_obj_t mp_parse_num_integer(const char *restrict str_, size_t len, int base, m // string should be an integer number mp_int_t int_val = 0; const byte *restrict str_val_start = str; + if (base == 0) { + goto value_error; + } for (; str < top; str++) { // get next digit as a value mp_uint_t dig = *str; diff --git a/py/parsenumbase.c b/py/parsenumbase.c index 94523a666d..a706b7cfb0 100644 --- a/py/parsenumbase.c +++ b/py/parsenumbase.c @@ -27,9 +27,20 @@ #include "py/mpconfig.h" #include "py/misc.h" #include "py/parsenumbase.h" +#include "py/runtime.h" + +static bool is_all_zeros(const char *str, size_t len) { + while (len-- && unichar_isdigit(*str)) { + if (*str++ != '0') { + return false; + } + } + return true; +} // find real radix base, and strip preceding '0x', '0o' and '0b' // puts base in *base, and returns number of bytes to skip the prefix +// puts 0 in *base to indicate an invalid C-style octal literal size_t mp_parse_num_base(const char *str, size_t len, int *base) { const byte *p = (const byte *)str; if (len <= 1) { @@ -49,6 +60,9 @@ size_t mp_parse_num_base(const char *str, size_t len, int *base) { *base = 10; } p -= 2; + if (!is_all_zeros(str, len)) { + *base = 0; + } } } else if (*base == 8 && c == '0') { c = *(p++); diff --git a/tests/basics/int1.py b/tests/basics/int1.py index 2d92105c73..20efe93f03 100644 --- a/tests/basics/int1.py +++ b/tests/basics/int1.py @@ -13,6 +13,7 @@ print(int('1')) print(int('+1')) print(int('-1')) print(int('01')) +print(int('00')) print(int('9')) print(int('10')) print(int('+10')) @@ -31,6 +32,7 @@ print(int(' -3 ')) print(int('0', 10)) print(int('1', 10)) print(int(' \t 1 \t ', 10)) +print(int(' \t 00 \t ', 10)) print(int('11', 10)) print(int('11', 16)) print(int('11', 8)) @@ -79,6 +81,7 @@ test('0o8', 8) test('0xg', 16) test('1 1', 16) test('123', 37) +test('01', 0) # check that we don't parse this as a floating point number print(0x1e+1) diff --git a/tests/basics/lexer.py b/tests/basics/lexer.py index 181d62db1a..addb8a13df 100644 --- a/tests/basics/lexer.py +++ b/tests/basics/lexer.py @@ -83,3 +83,11 @@ try: exec(r"'\U0000000'") except SyntaxError: print("SyntaxError") + +# Properly formed integer literals +print(eval("00")) +# badly formed integer literals +try: + eval("01") +except SyntaxError: + print("SyntaxError")