py/parsenum: Throw an exception for invalid int literals like "01".

This includes making int("01") parse in base 10 like standard Python.
When a base of 0 is specified it means auto-detect based on the prefix, and
literals begining with 0 (except when the literal is all 0's) like "01" are
then invalid and now throw an exception.

The new error message is different from CPython. It says e.g.,
`SyntaxError: invalid syntax for integer with base 0: '09'`

Additional test cases were added to cover the changed & added code.

Co-authored-by: Damien George <damien@micropython.org>
Signed-off-by: Jeff Epler <jepler@gmail.com>
This commit is contained in:
Jeff Epler
2024-01-03 19:31:35 -06:00
committed by Damien George
parent 7b3f189b17
commit 13b13d1fdd
5 changed files with 37 additions and 21 deletions

View File

@@ -55,7 +55,7 @@ static mp_obj_t mp_obj_int_make_new(const mp_obj_type_t *type_in, size_t n_args,
return o;
} else if (mp_get_buffer(args[0], &bufinfo, MP_BUFFER_READ)) {
// a textual representation, parse it
return mp_parse_num_integer(bufinfo.buf, bufinfo.len, 0, NULL);
return mp_parse_num_integer(bufinfo.buf, bufinfo.len, 10, NULL);
#if MICROPY_PY_BUILTINS_FLOAT
} else if (mp_obj_is_float(args[0])) {
return mp_obj_new_int_from_float(mp_obj_float_get(args[0]));

View File

@@ -151,13 +151,13 @@ value_error:
raise_exc(exc, lex);
#elif MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_NORMAL
mp_obj_t exc = mp_obj_new_exception_msg_varg(&mp_type_ValueError,
MP_ERROR_TEXT("invalid syntax for integer with base %d"), base);
MP_ERROR_TEXT("invalid syntax for integer with base %d"), base == 1 ? 0 : base);
raise_exc(exc, lex);
#else
vstr_t vstr;
mp_print_t print;
vstr_init_print(&vstr, 50, &print);
mp_printf(&print, "invalid syntax for integer with base %d: ", base);
mp_printf(&print, "invalid syntax for integer with base %d: ", base == 1 ? 0 : base);
mp_str_print_quoted(&print, str_val_start, top - str_val_start, true);
mp_obj_t exc = mp_obj_new_exception_arg1(&mp_type_ValueError,
mp_obj_new_str_from_utf8_vstr(&vstr));

View File

@@ -30,35 +30,28 @@
// find real radix base, and strip preceding '0x', '0o' and '0b'
// puts base in *base, and returns number of bytes to skip the prefix
// in base-0, puts 1 in *base to indicate a number that starts with 0, to provoke a
// ValueError if it's not all-digits-zero.
size_t mp_parse_num_base(const char *str, size_t len, int *base) {
const byte *p = (const byte *)str;
if (len <= 1) {
goto no_prefix;
}
unichar c = *(p++);
if ((*base == 0 || *base == 16) && c == '0') {
c = *(p++);
if ((c | 32) == 'x') {
if (c == '0') {
c = *(p++) | 32;
int b = *base;
if (c == 'x' && !(b & ~16)) {
*base = 16;
} else if (*base == 0 && (c | 32) == 'o') {
} else if (c == 'o' && !(b & ~8)) {
*base = 8;
} else if (*base == 0 && (c | 32) == 'b') {
} else if (c == 'b' && !(b & ~2)) {
*base = 2;
} else {
if (*base == 0) {
*base = 10;
p -= 2;
if (b == 0) {
*base = 1;
}
p -= 2;
}
} else if (*base == 8 && c == '0') {
c = *(p++);
if ((c | 32) != 'o') {
p -= 2;
}
} else if (*base == 2 && c == '0') {
c = *(p++);
if ((c | 32) != 'b') {
p -= 2;
}
} else {
p--;