From bc990dad9afb8ec112f5e7f7f79d5ab415da0e72 Mon Sep 17 00:00:00 2001 From: Chris Angelico Date: Sun, 8 Jun 2014 02:10:59 +1000 Subject: [PATCH] Revert "Add PEP 393-flags to strings and stub usage." This reverts commit c239f509521d1a0f9563bf9c5de0c4fb9a6a33ba. --- py/compile.c | 20 +++++------- py/makeqstrdata.py | 2 +- py/objstr.c | 77 +++++++++++++++++++++------------------------- py/objstr.h | 3 +- py/qstr.c | 37 ++++++++++------------ py/qstr.h | 2 +- py/runtime.c | 15 ++++----- 7 files changed, 68 insertions(+), 88 deletions(-) diff --git a/py/compile.c b/py/compile.c index f6133cec47..1f0d90570e 100644 --- a/py/compile.c +++ b/py/compile.c @@ -499,9 +499,8 @@ STATIC void cpython_c_tuple_emit_const(compiler_t *comp, mp_parse_node_t pn, vst case MP_PARSE_NODE_DECIMAL: vstr_printf(vstr, "%s", qstr_str(arg)); break; case MP_PARSE_NODE_STRING: case MP_PARSE_NODE_BYTES: { - uint len; char flags; - const byte *str = qstr_data(arg, &len, &flags); - assert(flags == 1); //TODO: Support multibyte strings + uint len; + const byte *str = qstr_data(arg, &len); cpython_c_print_quoted_str(vstr, (const char*)str, len, MP_PARSE_NODE_LEAF_KIND(pn) == MP_PARSE_NODE_BYTES); break; } @@ -1440,9 +1439,8 @@ void do_import_name(compiler_t *comp, mp_parse_node_t pn, qstr *q_base) { if (i > 0) { *str_dest++ = '.'; } - uint str_src_len; char str_src_flags; - const byte *str_src = qstr_data(MP_PARSE_NODE_LEAF_ARG(pns->nodes[i]), &str_src_len, &str_src_flags); - assert(str_src_flags == 1); //TODO: Support multibyte strings + uint str_src_len; + const byte *str_src = qstr_data(MP_PARSE_NODE_LEAF_ARG(pns->nodes[i]), &str_src_len); memcpy(str_dest, str_src, str_src_len); str_dest += str_src_len; } @@ -1546,9 +1544,8 @@ void compile_import_from(compiler_t *comp, mp_parse_node_struct_t *pns) { vstr_printf(vstr, ", "); } vstr_printf(vstr, "'"); - uint len; char flags; - const byte *str = qstr_data(id2, &len, &flags); - assert(flags == 1); //TODO: Support multibyte strings + uint len; + const byte *str = qstr_data(id2, &len); vstr_add_strn(vstr, (const char*)str, len); vstr_printf(vstr, "'"); } @@ -2544,9 +2541,8 @@ void compile_atom_string(compiler_t *comp, mp_parse_node_struct_t *pns) { byte *s_dest = qstr_build_start(n_bytes, &q_ptr); for (int i = 0; i < n; i++) { if (MP_PARSE_NODE_IS_LEAF(pns->nodes[i])) { - uint s_len; char s_flags; - const byte *s = qstr_data(MP_PARSE_NODE_LEAF_ARG(pns->nodes[i]), &s_len, &s_flags); - assert(s_flags == 1); //TODO: Support multibyte strings + uint s_len; + const byte *s = qstr_data(MP_PARSE_NODE_LEAF_ARG(pns->nodes[i]), &s_len); memcpy(s_dest, s, s_len); s_dest += s_len; } else { diff --git a/py/makeqstrdata.py b/py/makeqstrdata.py index 7952e248a5..3989f7aaaa 100644 --- a/py/makeqstrdata.py +++ b/py/makeqstrdata.py @@ -60,7 +60,7 @@ def do_work(infiles): qhash = compute_hash(qstr) qlen = len(qstr) qchlen = len(qstr.decode("utf-8")) - print('Q({}, (const byte*)"\\x{:02x}\\x{:02x}\\x{:02x}\\x{:02x}\\x{:02x}\\x{:02x}\\1" "{}")'.format(ident, qhash & 0xff, (qhash >> 8) & 0xff, qlen & 0xff, (qlen >> 8) & 0xff, qchlen & 0xff, (qchlen >> 8) & 0xff, qstr)) + print('Q({}, (const byte*)"\\x{:02x}\\x{:02x}\\x{:02x}\\x{:02x}\\x{:02x}\\x{:02x}" "{}")'.format(ident, qhash & 0xff, (qhash >> 8) & 0xff, qlen & 0xff, (qlen >> 8) & 0xff, qchlen & 0xff, (qchlen >> 8) & 0xff, qstr)) return True diff --git a/py/objstr.c b/py/objstr.c index 21b0b3a19b..c9fc452f77 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -50,13 +50,10 @@ const mp_obj_t mp_const_empty_bytes; #define GET_STR_LEN(str_obj_in, str_len) uint str_len; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_len = qstr_len(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; } // use this macro to extract the string data and length -#define GET_STR_DATA_LEN_FLAGS(str_obj_in, str_data, str_len, str_flags) const byte *str_data; uint str_len; char str_flags; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len, &str_flags); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; str_data = ((mp_obj_str_t*)str_obj_in)->data; str_flags = ((mp_obj_str_t*)str_obj_in)->flags; } +#define GET_STR_DATA_LEN(str_obj_in, str_data, str_len) const byte *str_data; uint str_len; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; str_data = ((mp_obj_str_t*)str_obj_in)->data; } -// use this macro to extract the string data, lengths, and flags -#define GET_STR_INFO(str_obj_in, str_data, str_len, str_charlen, str_flags) const byte *str_data; uint str_len, str_charlen; char str_flags; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len, &str_flags); str_charlen = qstr_charlen(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; str_charlen = ((mp_obj_str_t*)str_obj_in)->charlen; str_data = ((mp_obj_str_t*)str_obj_in)->data; str_flags = ((mp_obj_str_t*)str_obj_in)->flags; } - -// don't use this macro, it's only for conversions -#define GET_STR_DATA_LEN(str_obj_in, str_data, str_len) GET_STR_DATA_LEN_FLAGS(str_obj_in, str_data, str_len, str_data ## _flags); assert(str_data ## _flags == 1); +// use this macro to extract the string data and both lengths +#define GET_STR_INFO(str_obj_in, str_data, str_len, str_charlen) const byte *str_data; uint str_len, str_charlen; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len); str_charlen = qstr_charlen(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; str_charlen = ((mp_obj_str_t*)str_obj_in)->charlen; str_data = ((mp_obj_str_t*)str_obj_in)->data; } STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str); STATIC mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str); @@ -101,32 +98,32 @@ void mp_str_print_quoted(void (*print)(void *env, const char *fmt, ...), void *e print(env, "\\t"); } else if (*s == '\x7f') { print(env, "\\x7f"); - } else if (is_bytes) { + } else if (is_bytes) { print(env, "\\x%02x", *s); } else { // Non-ASCII character. Decode UTF-8. - machine_int_t ord = *s++ & 0x7F; + machine_int_t ord = *s++ & 0x7F; for (machine_int_t mask = 0x40; ord & mask; mask >>= 1) { - ord &= ~mask; - } - while (UTF8_IS_CONT(*s)) { - ord = (ord << 6) | (*s++ & 0x3F); - } - --s; // s will be incremented by the main loop - if (ord < 0x100) { + ord &= ~mask; + } + while (UTF8_IS_CONT(*s)) { + ord = (ord << 6) | (*s++ & 0x3F); + } + --s; // s will be incremented by the main loop + if (ord < 0x100) { print(env, "\\x%02x", ord); - } else if (ord < 0x10000) { + } else if (ord < 0x10000) { print(env, "\\u%04x", ord); - } else { + } else { print(env, "\\U%08x", ord); - } + } } } print(env, "%c", quote_char); } STATIC void str_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind) { - GET_STR_DATA_LEN_FLAGS(self_in, str_data, str_len, str_flags); + GET_STR_DATA_LEN(self_in, str_data, str_len); bool is_bytes = MP_OBJ_IS_TYPE(self_in, &mp_type_bytes); if (kind == PRINT_STR && !is_bytes) { print(env, "%.*s", str_len, str_data); @@ -170,7 +167,6 @@ STATIC mp_obj_t str_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_ mp_obj_str_t *o = mp_obj_new_str_of_type(&mp_type_str, NULL, str_len); o->data = str_data; o->hash = str_hash; - o->flags = 1; return o; } @@ -199,7 +195,6 @@ STATIC mp_obj_t bytes_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const m mp_obj_str_t *o = mp_obj_new_str_of_type(&mp_type_bytes, NULL, str_len); o->data = str_data; o->hash = str_hash; - o->flags = 1; return o; } @@ -377,7 +372,7 @@ uncomparable: STATIC mp_obj_t str_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) { mp_obj_type_t *type = mp_obj_get_type(self_in); - GET_STR_INFO(self_in, self_data, self_len, self_charlen, self_flags); + GET_STR_INFO(self_in, self_data, self_len, self_charlen); if (value == MP_OBJ_SENTINEL) { // load #if MICROPY_PY_BUILTINS_SLICE @@ -400,20 +395,20 @@ STATIC mp_obj_t str_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) { const char *s; for (s=(const char *)self_data; index_val; ++s) { if (!UTF8_IS_CONT(*s)) { - --index_val; - } - } - // Skip continuation bytes after the last lead byte + --index_val; + } + } + // Skip continuation bytes after the last lead byte while (UTF8_IS_CONT(*s)) { - ++s; - } + ++s; + } int len = 1; if (UTF8_IS_NONASCII(*s)) { - // Count the number of 1 bits (after the first) + // Count the number of 1 bits (after the first) for (char mask = 0x40; *s & mask; mask >>= 1) { - ++len; - } - } + ++len; + } + } return mp_obj_new_str(s, len, true); // This will create a one-character string } } else { @@ -1746,7 +1741,7 @@ const mp_obj_type_t mp_type_bytes = { }; // the zero-length bytes -STATIC const mp_obj_str_t empty_bytes_obj = {{&mp_type_bytes}, 0, 0, 0, 1, NULL}; +STATIC const mp_obj_str_t empty_bytes_obj = {{&mp_type_bytes}, 0, 0, 0, NULL}; const mp_obj_t mp_const_empty_bytes = (mp_obj_t)&empty_bytes_obj; mp_obj_t mp_obj_str_builder_start(const mp_obj_type_t *type, uint len, byte **data) { @@ -1765,7 +1760,6 @@ mp_obj_t mp_obj_str_builder_end(mp_obj_t o_in) { o->hash = qstr_compute_hash(o->data, o->len); byte *p = (byte*)o->data; p[o->len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings - o->flags = 1; return o; } @@ -1773,7 +1767,6 @@ mp_obj_t mp_obj_new_str_of_type(const mp_obj_type_t *type, const byte* data, uin mp_obj_str_t *o = m_new_obj(mp_obj_str_t); o->base.type = type; o->len = len; - o->flags = 1; if (data) { if (MP_OBJ_IS_STR(o)) { // Count non-continuation bytes so we know how long the string is in characters. @@ -1781,14 +1774,14 @@ mp_obj_t mp_obj_new_str_of_type(const mp_obj_type_t *type, const byte* data, uin uint charlen = 0; for (endptr = data; endptr < top; ++endptr) { if (!UTF8_IS_CONT(*endptr)) { - ++charlen; - } - } + ++charlen; + } + } o->charlen = charlen; - } else { + } else { // For byte strings, the 'character' length (really the "exposed length" or "Python length") equals the byte length. o->charlen = len; - } + } o->hash = qstr_compute_hash(data, len); byte *p = m_new(byte, len + 1); o->data = p; @@ -1858,7 +1851,7 @@ uint mp_obj_str_get_hash(mp_obj_t self_in) { uint mp_obj_str_get_len(mp_obj_t self_in) { // TODO This has a double check for the type, one in obj.c and one here if (MP_OBJ_IS_STR(self_in) || MP_OBJ_IS_TYPE(self_in, &mp_type_bytes)) { - GET_STR_INFO(self_in, self_data, self_len, self_charlen, self_flags); (void)self_data; + GET_STR_INFO(self_in, self_data, self_len, self_charlen); (void)self_data; return self_charlen; } else { bad_implicit_conversion(self_in); @@ -1902,7 +1895,7 @@ const char *mp_obj_str_get_data(mp_obj_t self_in, uint *len) { const char *mp_obj_str_get_data_len(mp_obj_t self_in, uint *len, uint *charlen) { if (is_str_or_bytes(self_in)) { - GET_STR_INFO(self_in, s, l, cl, f); + GET_STR_INFO(self_in, s, l, cl); *len = l; *charlen = cl; return (const char*)s; } else { diff --git a/py/objstr.h b/py/objstr.h index 66199ea145..98b0a5a316 100644 --- a/py/objstr.h +++ b/py/objstr.h @@ -32,12 +32,11 @@ typedef struct _mp_obj_str_t { machine_uint_t len : 16; // charlen == number of characters in the string - charlen <= len - 1, and is the value returned by len() in Python machine_uint_t charlen : 16; - char flags; //Currently unused, always 1. Will later get markers eg ASCII-only. const void *data; //Character data is encoded UTF-8 and should not be blindly indexed. } mp_obj_str_t; // This is valid ONLY for pure-ASCII strings! -#define MP_DEFINE_STR_OBJ(obj_name, str) mp_obj_str_t obj_name = {{&mp_type_str}, 0, sizeof(str) - 1, sizeof(str) - 1, 1, (const byte*)str}; +#define MP_DEFINE_STR_OBJ(obj_name, str) mp_obj_str_t obj_name = {{&mp_type_str}, 0, sizeof(str) - 1, sizeof(str) - 1, (const byte*)str}; mp_obj_t mp_obj_str_format(uint n_args, const mp_obj_t *args); mp_obj_t mp_obj_new_str_of_type(const mp_obj_type_t *type, const byte* data, uint len); diff --git a/py/qstr.c b/py/qstr.c index 5637aea77d..85f1bb4238 100644 --- a/py/qstr.c +++ b/py/qstr.c @@ -47,16 +47,14 @@ // - hash is 2 bytes (see function below) // - length is 2 bytes // - character length is 2 bytes -// - flags byte // - data follows // - \0 terminated (for now, so they can be printed using printf) #define Q_GET_HASH(q) ((q)[0] | ((q)[1] << 8)) -#define Q_GET_ALLOC(q) (7 + Q_GET_LENGTH(q) + 1) +#define Q_GET_ALLOC(q) (6 + Q_GET_LENGTH(q) + 1) #define Q_GET_LENGTH(q) ((q)[2] | ((q)[3] << 8)) #define Q_GET_CHARLEN(q) ((q)[4] | ((q)[5] << 8)) -#define Q_GET_FLAGS(q) ((q)[6]) -#define Q_GET_DATA(q) ((q) + 7) +#define Q_GET_DATA(q) ((q) + 6) // this must match the equivalent function in makeqstrdata.py // Note that this hashes the UTF-8 encoded data bytes. @@ -88,8 +86,8 @@ const static qstr_pool_t const_pool = { 10, // set so that the first dynamically allocated pool is twice this size; must be <= the len (just below) MP_QSTR_number_of, // corresponds to number of strings in array just below { - (const byte*) "\0\0\0\0\0\0\0", // invalid/no qstr has empty data - (const byte*) "\0\0\0\0\0\0\1", // empty qstr + (const byte*) "\0\0\0\0\0\0", // invalid/no qstr has empty data + (const byte*) "\0\0\0\0\0\0", // empty qstr #define Q(id, str) str, #include "genhdr/qstrdefs.generated.h" #undef Q @@ -115,7 +113,7 @@ STATIC const byte *find_qstr(qstr q) { } STATIC qstr qstr_add(const byte *q_ptr) { - DEBUG_printf("QSTR: add hash=%d len=%d flags=%d data=%.*s\n", Q_GET_HASH(q_ptr), Q_GET_LENGTH(q_ptr), Q_GET_LENGTH(q_ptr), Q_GET_FLAGS(q_ptr), Q_GET_DATA(q_ptr)); + DEBUG_printf("QSTR: add hash=%d len=%d data=%.*s\n", Q_GET_HASH(q_ptr), Q_GET_LENGTH(q_ptr), Q_GET_LENGTH(q_ptr), Q_GET_DATA(q_ptr)); // make sure we have room in the pool for a new qstr if (last_pool->len >= last_pool->alloc) { @@ -160,22 +158,21 @@ qstr qstr_from_strn(const char *str, uint len) { qstr q = qstr_find_strn(str, len); if (q == 0) { machine_uint_t hash = qstr_compute_hash((const byte*)str, len); - byte *q_ptr = m_new(byte, 7 + len + 1); + byte *q_ptr = m_new(byte, 6 + len + 1); uint charlen = 0; for (const char *s = str; s < str + len; ++s) { if (!UTF8_IS_CONT(*s)) { - ++charlen; - } - } + ++charlen; + } + } q_ptr[0] = hash; q_ptr[1] = hash >> 8; q_ptr[2] = len; q_ptr[3] = len >> 8; q_ptr[4] = charlen; q_ptr[5] = charlen >> 8; - q_ptr[6] = 1; - memcpy(q_ptr + 7, str, len); - q_ptr[7 + len] = '\0'; + memcpy(q_ptr + 6, str, len); + q_ptr[6 + len] = '\0'; q = qstr_add(q_ptr); } return q; @@ -200,13 +197,12 @@ qstr qstr_build_end(byte *q_ptr) { uint charlen = 0; for (const byte *s = str; s < str + len; ++s) { if (!UTF8_IS_CONT(*s)) { - ++charlen; - } - } + ++charlen; + } + } q_ptr[4] = charlen; q_ptr[5] = charlen >> 8; - q_ptr[6] = 1; - q_ptr[7 + len] = '\0'; + q_ptr[6 + len] = '\0'; q = qstr_add(q_ptr); } else { m_del(byte, q_ptr, Q_GET_ALLOC(q_ptr)); @@ -234,10 +230,9 @@ const char *qstr_str(qstr q) { return (const char*)Q_GET_DATA(qd); } -const byte *qstr_data(qstr q, uint *len, char *flags) { +const byte *qstr_data(qstr q, uint *len) { const byte *qd = find_qstr(q); *len = Q_GET_LENGTH(qd); - *flags = Q_GET_FLAGS(qd); return Q_GET_DATA(qd); } diff --git a/py/qstr.h b/py/qstr.h index 1f641832f3..b624e6f30d 100644 --- a/py/qstr.h +++ b/py/qstr.h @@ -60,6 +60,6 @@ machine_uint_t qstr_hash(qstr q); const char* qstr_str(qstr q); uint qstr_len(qstr q); uint qstr_charlen(qstr q); -const byte* qstr_data(qstr q, uint *len, char *flags); +const byte* qstr_data(qstr q, uint *len); void qstr_pool_info(uint *n_pool, uint *n_qstr, uint *n_str_data_bytes, uint *n_total_bytes); diff --git a/py/runtime.c b/py/runtime.c index 44001e0749..cdbf99d4a5 100644 --- a/py/runtime.c +++ b/py/runtime.c @@ -101,17 +101,15 @@ void mp_deinit(void) { mp_obj_t mp_load_const_int(qstr qstr) { DEBUG_OP_printf("load '%s'\n", qstr_str(qstr)); - uint len; char flags; - const byte* data = qstr_data(qstr, &len, &flags); - assert(flags == 1); //TODO: Support multibyte strings + uint len; + const byte* data = qstr_data(qstr, &len); return mp_parse_num_integer((const char*)data, len, 0); } mp_obj_t mp_load_const_dec(qstr qstr) { DEBUG_OP_printf("load '%s'\n", qstr_str(qstr)); - uint len; char flags; - const byte* data = qstr_data(qstr, &len, &flags); - assert(flags == 1); //TODO: Support multibyte strings + uint len; + const byte* data = qstr_data(qstr, &len); return mp_parse_num_decimal((const char*)data, len, true, false); } @@ -122,9 +120,8 @@ mp_obj_t mp_load_const_str(qstr qstr) { mp_obj_t mp_load_const_bytes(qstr qstr) { DEBUG_OP_printf("load b'%s'\n", qstr_str(qstr)); - uint len; char flags; - const byte* data = qstr_data(qstr, &len, &flags); - assert(flags == 1); //TODO: Support multibyte strings + uint len; + const byte *data = qstr_data(qstr, &len); return mp_obj_new_bytes(data, len); }