From c239f509521d1a0f9563bf9c5de0c4fb9a6a33ba Mon Sep 17 00:00:00 2001 From: Chris Angelico Date: Wed, 4 Jun 2014 05:28:12 +1000 Subject: [PATCH] Add PEP 393-flags to strings and stub usage. The test suite all passes, but nothing has actually been changed. --- py/compile.c | 20 ++++++++++++-------- py/makeqstrdata.py | 2 +- py/objstr.c | 13 ++++++++++--- py/objstr.h | 5 +++-- py/qstr.c | 21 +++++++++++++-------- py/qstr.h | 2 +- py/runtime.c | 15 +++++++++------ 7 files changed, 49 insertions(+), 29 deletions(-) diff --git a/py/compile.c b/py/compile.c index 1f0d90570e..f6133cec47 100644 --- a/py/compile.c +++ b/py/compile.c @@ -499,8 +499,9 @@ STATIC void cpython_c_tuple_emit_const(compiler_t *comp, mp_parse_node_t pn, vst case MP_PARSE_NODE_DECIMAL: vstr_printf(vstr, "%s", qstr_str(arg)); break; case MP_PARSE_NODE_STRING: case MP_PARSE_NODE_BYTES: { - uint len; - const byte *str = qstr_data(arg, &len); + uint len; char flags; + const byte *str = qstr_data(arg, &len, &flags); + assert(flags == 1); //TODO: Support multibyte strings cpython_c_print_quoted_str(vstr, (const char*)str, len, MP_PARSE_NODE_LEAF_KIND(pn) == MP_PARSE_NODE_BYTES); break; } @@ -1439,8 +1440,9 @@ void do_import_name(compiler_t *comp, mp_parse_node_t pn, qstr *q_base) { if (i > 0) { *str_dest++ = '.'; } - uint str_src_len; - const byte *str_src = qstr_data(MP_PARSE_NODE_LEAF_ARG(pns->nodes[i]), &str_src_len); + uint str_src_len; char str_src_flags; + const byte *str_src = qstr_data(MP_PARSE_NODE_LEAF_ARG(pns->nodes[i]), &str_src_len, &str_src_flags); + assert(str_src_flags == 1); //TODO: Support multibyte strings memcpy(str_dest, str_src, str_src_len); str_dest += str_src_len; } @@ -1544,8 +1546,9 @@ void compile_import_from(compiler_t *comp, mp_parse_node_struct_t *pns) { vstr_printf(vstr, ", "); } vstr_printf(vstr, "'"); - uint len; - const byte *str = qstr_data(id2, &len); + uint len; char flags; + const byte *str = qstr_data(id2, &len, &flags); + assert(flags == 1); //TODO: Support multibyte strings vstr_add_strn(vstr, (const char*)str, len); vstr_printf(vstr, "'"); } @@ -2541,8 +2544,9 @@ void compile_atom_string(compiler_t *comp, mp_parse_node_struct_t *pns) { byte *s_dest = qstr_build_start(n_bytes, &q_ptr); for (int i = 0; i < n; i++) { if (MP_PARSE_NODE_IS_LEAF(pns->nodes[i])) { - uint s_len; - const byte *s = qstr_data(MP_PARSE_NODE_LEAF_ARG(pns->nodes[i]), &s_len); + uint s_len; char s_flags; + const byte *s = qstr_data(MP_PARSE_NODE_LEAF_ARG(pns->nodes[i]), &s_len, &s_flags); + assert(s_flags == 1); //TODO: Support multibyte strings memcpy(s_dest, s, s_len); s_dest += s_len; } else { diff --git a/py/makeqstrdata.py b/py/makeqstrdata.py index 599b936f9e..6a671e0867 100644 --- a/py/makeqstrdata.py +++ b/py/makeqstrdata.py @@ -58,7 +58,7 @@ def do_work(infiles): for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]): qhash = compute_hash(qstr) qlen = len(qstr) - print('Q({}, (const byte*)"\\x{:02x}\\x{:02x}\\x{:02x}\\x{:02x}" "{}")'.format(ident, qhash & 0xff, (qhash >> 8) & 0xff, qlen & 0xff, (qlen >> 8) & 0xff, qstr)) + print('Q({}, (const byte*)"\\x{:02x}\\x{:02x}\\x{:02x}\\x{:02x}\\1" "{}")'.format(ident, qhash & 0xff, (qhash >> 8) & 0xff, qlen & 0xff, (qlen >> 8) & 0xff, qstr)) return True diff --git a/py/objstr.c b/py/objstr.c index 4e70b00812..22789a05bc 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -50,7 +50,10 @@ const mp_obj_t mp_const_empty_bytes; #define GET_STR_LEN(str_obj_in, str_len) uint str_len; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_len = qstr_len(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; } // use this macro to extract the string data and length -#define GET_STR_DATA_LEN(str_obj_in, str_data, str_len) const byte *str_data; uint str_len; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; str_data = ((mp_obj_str_t*)str_obj_in)->data; } +#define GET_STR_DATA_LEN_FLAGS(str_obj_in, str_data, str_len, str_flags) const byte *str_data; uint str_len; char str_flags; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len, &str_flags); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; str_data = ((mp_obj_str_t*)str_obj_in)->data; str_flags = ((mp_obj_str_t*)str_obj_in)->flags; } + +// don't use this macro, it's only for conversions +#define GET_STR_DATA_LEN(str_obj_in, str_data, str_len) GET_STR_DATA_LEN_FLAGS(str_obj_in, str_data, str_len, str_data ## _flags); assert(str_data ## _flags == 1); STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str); STATIC mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str); @@ -101,7 +104,7 @@ void mp_str_print_quoted(void (*print)(void *env, const char *fmt, ...), void *e } STATIC void str_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind) { - GET_STR_DATA_LEN(self_in, str_data, str_len); + GET_STR_DATA_LEN_FLAGS(self_in, str_data, str_len, str_flags); bool is_bytes = MP_OBJ_IS_TYPE(self_in, &mp_type_bytes); if (kind == PRINT_STR && !is_bytes) { print(env, "%.*s", str_len, str_data); @@ -145,6 +148,7 @@ STATIC mp_obj_t str_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_ mp_obj_str_t *o = mp_obj_new_str_of_type(&mp_type_str, NULL, str_len); o->data = str_data; o->hash = str_hash; + o->flags = 1; return o; } @@ -173,6 +177,7 @@ STATIC mp_obj_t bytes_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const m mp_obj_str_t *o = mp_obj_new_str_of_type(&mp_type_bytes, NULL, str_len); o->data = str_data; o->hash = str_hash; + o->flags = 1; return o; } @@ -1699,7 +1704,7 @@ const mp_obj_type_t mp_type_bytes = { }; // the zero-length bytes -STATIC const mp_obj_str_t empty_bytes_obj = {{&mp_type_bytes}, 0, 0, NULL}; +STATIC const mp_obj_str_t empty_bytes_obj = {{&mp_type_bytes}, 0, 0, 1, NULL}; const mp_obj_t mp_const_empty_bytes = (mp_obj_t)&empty_bytes_obj; mp_obj_t mp_obj_str_builder_start(const mp_obj_type_t *type, uint len, byte **data) { @@ -1718,6 +1723,7 @@ mp_obj_t mp_obj_str_builder_end(mp_obj_t o_in) { o->hash = qstr_compute_hash(o->data, o->len); byte *p = (byte*)o->data; p[o->len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings + o->flags = 1; return o; } @@ -1725,6 +1731,7 @@ mp_obj_t mp_obj_new_str_of_type(const mp_obj_type_t *type, const byte* data, uin mp_obj_str_t *o = m_new_obj(mp_obj_str_t); o->base.type = type; o->len = len; + o->flags = 1; if (data) { o->hash = qstr_compute_hash(data, len); byte *p = m_new(byte, len + 1); diff --git a/py/objstr.h b/py/objstr.h index 5be137d36d..0db2b31e4a 100644 --- a/py/objstr.h +++ b/py/objstr.h @@ -30,10 +30,11 @@ typedef struct _mp_obj_str_t { machine_uint_t hash : 16; // len == number of bytes used in data, alloc = len + 1 because (at the moment) we also append a null byte machine_uint_t len : 16; - const byte *data; + char flags; //PEP 393-style flags + const void *data; //Character data may be 1-byte, 2-bytes, or 4-bytes per character depending on flags } mp_obj_str_t; -#define MP_DEFINE_STR_OBJ(obj_name, str) mp_obj_str_t obj_name = {{&mp_type_str}, 0, sizeof(str) - 1, (const byte*)str}; +#define MP_DEFINE_STR_OBJ(obj_name, str) mp_obj_str_t obj_name = {{&mp_type_str}, 0, sizeof(str) - 1, 1, (const byte*)str}; mp_obj_t mp_obj_str_format(uint n_args, const mp_obj_t *args); mp_obj_t mp_obj_new_str_of_type(const mp_obj_type_t *type, const byte* data, uint len); diff --git a/py/qstr.c b/py/qstr.c index a1ee7cafd0..3566f28cb3 100644 --- a/py/qstr.c +++ b/py/qstr.c @@ -46,13 +46,15 @@ // For now we use very simple encoding, just to get the framework correct: // - hash is 2 bytes (see function below) // - length is 2 bytes +// - flags byte // - data follows // - \0 terminated (for now, so they can be printed using printf) #define Q_GET_HASH(q) ((q)[0] | ((q)[1] << 8)) #define Q_GET_ALLOC(q) (4 + Q_GET_LENGTH(q) + 1) #define Q_GET_LENGTH(q) ((q)[2] | ((q)[3] << 8)) -#define Q_GET_DATA(q) ((q) + 4) +#define Q_GET_FLAGS(q) ((q)[4]) +#define Q_GET_DATA(q) ((q) + 5) // this must match the equivalent function in makeqstrdata.py machine_uint_t qstr_compute_hash(const byte *data, uint len) { @@ -83,8 +85,8 @@ const static qstr_pool_t const_pool = { 10, // set so that the first dynamically allocated pool is twice this size; must be <= the len (just below) MP_QSTR_number_of, // corresponds to number of strings in array just below { - (const byte*) "\0\0\0\0", // invalid/no qstr has empty data - (const byte*) "\0\0\0\0", // empty qstr + (const byte*) "\0\0\0\0\0", // invalid/no qstr has empty data + (const byte*) "\0\0\0\0\1", // empty qstr #define Q(id, str) str, #include "genhdr/qstrdefs.generated.h" #undef Q @@ -110,7 +112,7 @@ STATIC const byte *find_qstr(qstr q) { } STATIC qstr qstr_add(const byte *q_ptr) { - DEBUG_printf("QSTR: add hash=%d len=%d data=%.*s\n", Q_GET_HASH(q_ptr), Q_GET_LENGTH(q_ptr), Q_GET_LENGTH(q_ptr), Q_GET_DATA(q_ptr)); + DEBUG_printf("QSTR: add hash=%d len=%d flags=%d data=%.*s\n", Q_GET_HASH(q_ptr), Q_GET_LENGTH(q_ptr), Q_GET_LENGTH(q_ptr), Q_GET_FLAGS(q_ptr), Q_GET_DATA(q_ptr)); // make sure we have room in the pool for a new qstr if (last_pool->len >= last_pool->alloc) { @@ -160,8 +162,9 @@ qstr qstr_from_strn(const char *str, uint len) { q_ptr[1] = hash >> 8; q_ptr[2] = len; q_ptr[3] = len >> 8; - memcpy(q_ptr + 4, str, len); - q_ptr[4 + len] = '\0'; + q_ptr[4] = 1; + memcpy(q_ptr + 5, str, len); + q_ptr[5 + len] = '\0'; q = qstr_add(q_ptr); } return q; @@ -182,7 +185,8 @@ qstr qstr_build_end(byte *q_ptr) { machine_uint_t hash = qstr_compute_hash(Q_GET_DATA(q_ptr), len); q_ptr[0] = hash; q_ptr[1] = hash >> 8; - q_ptr[4 + len] = '\0'; + q_ptr[4] = 1; + q_ptr[5 + len] = '\0'; q = qstr_add(q_ptr); } else { m_del(byte, q_ptr, Q_GET_ALLOC(q_ptr)); @@ -205,9 +209,10 @@ const char *qstr_str(qstr q) { return (const char*)Q_GET_DATA(qd); } -const byte *qstr_data(qstr q, uint *len) { +const byte *qstr_data(qstr q, uint *len, char *flags) { const byte *qd = find_qstr(q); *len = Q_GET_LENGTH(qd); + *flags = Q_GET_FLAGS(qd); return Q_GET_DATA(qd); } diff --git a/py/qstr.h b/py/qstr.h index 9803e672ca..2d2687fede 100644 --- a/py/qstr.h +++ b/py/qstr.h @@ -59,6 +59,6 @@ qstr qstr_build_end(byte *q_ptr); machine_uint_t qstr_hash(qstr q); const char* qstr_str(qstr q); uint qstr_len(qstr q); -const byte* qstr_data(qstr q, uint *len); +const byte* qstr_data(qstr q, uint *len, char *flags); void qstr_pool_info(uint *n_pool, uint *n_qstr, uint *n_str_data_bytes, uint *n_total_bytes); diff --git a/py/runtime.c b/py/runtime.c index cdbf99d4a5..44001e0749 100644 --- a/py/runtime.c +++ b/py/runtime.c @@ -101,15 +101,17 @@ void mp_deinit(void) { mp_obj_t mp_load_const_int(qstr qstr) { DEBUG_OP_printf("load '%s'\n", qstr_str(qstr)); - uint len; - const byte* data = qstr_data(qstr, &len); + uint len; char flags; + const byte* data = qstr_data(qstr, &len, &flags); + assert(flags == 1); //TODO: Support multibyte strings return mp_parse_num_integer((const char*)data, len, 0); } mp_obj_t mp_load_const_dec(qstr qstr) { DEBUG_OP_printf("load '%s'\n", qstr_str(qstr)); - uint len; - const byte* data = qstr_data(qstr, &len); + uint len; char flags; + const byte* data = qstr_data(qstr, &len, &flags); + assert(flags == 1); //TODO: Support multibyte strings return mp_parse_num_decimal((const char*)data, len, true, false); } @@ -120,8 +122,9 @@ mp_obj_t mp_load_const_str(qstr qstr) { mp_obj_t mp_load_const_bytes(qstr qstr) { DEBUG_OP_printf("load b'%s'\n", qstr_str(qstr)); - uint len; - const byte *data = qstr_data(qstr, &len); + uint len; char flags; + const byte* data = qstr_data(qstr, &len, &flags); + assert(flags == 1); //TODO: Support multibyte strings return mp_obj_new_bytes(data, len); }