From 88864587f5af292d7f86aceb6bf40e8331e9a8d6 Mon Sep 17 00:00:00 2001 From: Jim Mussared Date: Fri, 26 Aug 2022 12:54:53 +1000 Subject: [PATCH] py/objstr: Always ensure mp_obj_str_from_vstr is unicode-safe. Now that we have `mp_obj_new_str_type_from_vstr` (private helper used by objstr.c) split from the public API (`mp_obj_new_str_from_vstr`), we can enforce a unicode check at the public API without incurring a performance cost on the various objstr.c methods (which are already working on known unicode-safe strings). Signed-off-by: Jim Mussared --- py/objstr.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/py/objstr.c b/py/objstr.c index 69745a2f58..ab1229ad66 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -2248,6 +2248,11 @@ STATIC mp_obj_t mp_obj_new_str_type_from_vstr(const mp_obj_type_t *type, vstr_t } mp_obj_t mp_obj_new_str_from_vstr(vstr_t *vstr) { + #if MICROPY_PY_BUILTINS_STR_UNICODE && MICROPY_PY_BUILTINS_STR_UNICODE_CHECK + if (!utf8_check((byte *)vstr->buf, vstr->len)) { + mp_raise_msg(&mp_type_UnicodeError, NULL); + } + #endif // MICROPY_PY_BUILTINS_STR_UNICODE && MICROPY_PY_BUILTINS_STR_UNICODE_CHECK return mp_obj_new_str_type_from_vstr(&mp_type_str, vstr); }