Add PEP 393-flags to strings and stub usage.

The test suite all passes, but nothing has actually been changed.
This commit is contained in:
Chris Angelico
2014-06-04 05:28:12 +10:00
parent c61be8e1e1
commit c239f50952
7 changed files with 49 additions and 29 deletions

View File

@@ -499,8 +499,9 @@ STATIC void cpython_c_tuple_emit_const(compiler_t *comp, mp_parse_node_t pn, vst
case MP_PARSE_NODE_DECIMAL: vstr_printf(vstr, "%s", qstr_str(arg)); break;
case MP_PARSE_NODE_STRING:
case MP_PARSE_NODE_BYTES: {
uint len;
const byte *str = qstr_data(arg, &len);
uint len; char flags;
const byte *str = qstr_data(arg, &len, &flags);
assert(flags == 1); //TODO: Support multibyte strings
cpython_c_print_quoted_str(vstr, (const char*)str, len, MP_PARSE_NODE_LEAF_KIND(pn) == MP_PARSE_NODE_BYTES);
break;
}
@@ -1439,8 +1440,9 @@ void do_import_name(compiler_t *comp, mp_parse_node_t pn, qstr *q_base) {
if (i > 0) {
*str_dest++ = '.';
}
uint str_src_len;
const byte *str_src = qstr_data(MP_PARSE_NODE_LEAF_ARG(pns->nodes[i]), &str_src_len);
uint str_src_len; char str_src_flags;
const byte *str_src = qstr_data(MP_PARSE_NODE_LEAF_ARG(pns->nodes[i]), &str_src_len, &str_src_flags);
assert(str_src_flags == 1); //TODO: Support multibyte strings
memcpy(str_dest, str_src, str_src_len);
str_dest += str_src_len;
}
@@ -1544,8 +1546,9 @@ void compile_import_from(compiler_t *comp, mp_parse_node_struct_t *pns) {
vstr_printf(vstr, ", ");
}
vstr_printf(vstr, "'");
uint len;
const byte *str = qstr_data(id2, &len);
uint len; char flags;
const byte *str = qstr_data(id2, &len, &flags);
assert(flags == 1); //TODO: Support multibyte strings
vstr_add_strn(vstr, (const char*)str, len);
vstr_printf(vstr, "'");
}
@@ -2541,8 +2544,9 @@ void compile_atom_string(compiler_t *comp, mp_parse_node_struct_t *pns) {
byte *s_dest = qstr_build_start(n_bytes, &q_ptr);
for (int i = 0; i < n; i++) {
if (MP_PARSE_NODE_IS_LEAF(pns->nodes[i])) {
uint s_len;
const byte *s = qstr_data(MP_PARSE_NODE_LEAF_ARG(pns->nodes[i]), &s_len);
uint s_len; char s_flags;
const byte *s = qstr_data(MP_PARSE_NODE_LEAF_ARG(pns->nodes[i]), &s_len, &s_flags);
assert(s_flags == 1); //TODO: Support multibyte strings
memcpy(s_dest, s, s_len);
s_dest += s_len;
} else {

View File

@@ -58,7 +58,7 @@ def do_work(infiles):
for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]):
qhash = compute_hash(qstr)
qlen = len(qstr)
print('Q({}, (const byte*)"\\x{:02x}\\x{:02x}\\x{:02x}\\x{:02x}" "{}")'.format(ident, qhash & 0xff, (qhash >> 8) & 0xff, qlen & 0xff, (qlen >> 8) & 0xff, qstr))
print('Q({}, (const byte*)"\\x{:02x}\\x{:02x}\\x{:02x}\\x{:02x}\\1" "{}")'.format(ident, qhash & 0xff, (qhash >> 8) & 0xff, qlen & 0xff, (qlen >> 8) & 0xff, qstr))
return True

View File

@@ -50,7 +50,10 @@ const mp_obj_t mp_const_empty_bytes;
#define GET_STR_LEN(str_obj_in, str_len) uint str_len; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_len = qstr_len(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; }
// use this macro to extract the string data and length
#define GET_STR_DATA_LEN(str_obj_in, str_data, str_len) const byte *str_data; uint str_len; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; str_data = ((mp_obj_str_t*)str_obj_in)->data; }
#define GET_STR_DATA_LEN_FLAGS(str_obj_in, str_data, str_len, str_flags) const byte *str_data; uint str_len; char str_flags; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len, &str_flags); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; str_data = ((mp_obj_str_t*)str_obj_in)->data; str_flags = ((mp_obj_str_t*)str_obj_in)->flags; }
// don't use this macro, it's only for conversions
#define GET_STR_DATA_LEN(str_obj_in, str_data, str_len) GET_STR_DATA_LEN_FLAGS(str_obj_in, str_data, str_len, str_data ## _flags); assert(str_data ## _flags == 1);
STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str);
STATIC mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str);
@@ -101,7 +104,7 @@ void mp_str_print_quoted(void (*print)(void *env, const char *fmt, ...), void *e
}
STATIC void str_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind) {
GET_STR_DATA_LEN(self_in, str_data, str_len);
GET_STR_DATA_LEN_FLAGS(self_in, str_data, str_len, str_flags);
bool is_bytes = MP_OBJ_IS_TYPE(self_in, &mp_type_bytes);
if (kind == PRINT_STR && !is_bytes) {
print(env, "%.*s", str_len, str_data);
@@ -145,6 +148,7 @@ STATIC mp_obj_t str_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_
mp_obj_str_t *o = mp_obj_new_str_of_type(&mp_type_str, NULL, str_len);
o->data = str_data;
o->hash = str_hash;
o->flags = 1;
return o;
}
@@ -173,6 +177,7 @@ STATIC mp_obj_t bytes_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const m
mp_obj_str_t *o = mp_obj_new_str_of_type(&mp_type_bytes, NULL, str_len);
o->data = str_data;
o->hash = str_hash;
o->flags = 1;
return o;
}
@@ -1699,7 +1704,7 @@ const mp_obj_type_t mp_type_bytes = {
};
// the zero-length bytes
STATIC const mp_obj_str_t empty_bytes_obj = {{&mp_type_bytes}, 0, 0, NULL};
STATIC const mp_obj_str_t empty_bytes_obj = {{&mp_type_bytes}, 0, 0, 1, NULL};
const mp_obj_t mp_const_empty_bytes = (mp_obj_t)&empty_bytes_obj;
mp_obj_t mp_obj_str_builder_start(const mp_obj_type_t *type, uint len, byte **data) {
@@ -1718,6 +1723,7 @@ mp_obj_t mp_obj_str_builder_end(mp_obj_t o_in) {
o->hash = qstr_compute_hash(o->data, o->len);
byte *p = (byte*)o->data;
p[o->len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings
o->flags = 1;
return o;
}
@@ -1725,6 +1731,7 @@ mp_obj_t mp_obj_new_str_of_type(const mp_obj_type_t *type, const byte* data, uin
mp_obj_str_t *o = m_new_obj(mp_obj_str_t);
o->base.type = type;
o->len = len;
o->flags = 1;
if (data) {
o->hash = qstr_compute_hash(data, len);
byte *p = m_new(byte, len + 1);

View File

@@ -30,10 +30,11 @@ typedef struct _mp_obj_str_t {
machine_uint_t hash : 16;
// len == number of bytes used in data, alloc = len + 1 because (at the moment) we also append a null byte
machine_uint_t len : 16;
const byte *data;
char flags; //PEP 393-style flags
const void *data; //Character data may be 1-byte, 2-bytes, or 4-bytes per character depending on flags
} mp_obj_str_t;
#define MP_DEFINE_STR_OBJ(obj_name, str) mp_obj_str_t obj_name = {{&mp_type_str}, 0, sizeof(str) - 1, (const byte*)str};
#define MP_DEFINE_STR_OBJ(obj_name, str) mp_obj_str_t obj_name = {{&mp_type_str}, 0, sizeof(str) - 1, 1, (const byte*)str};
mp_obj_t mp_obj_str_format(uint n_args, const mp_obj_t *args);
mp_obj_t mp_obj_new_str_of_type(const mp_obj_type_t *type, const byte* data, uint len);

View File

@@ -46,13 +46,15 @@
// For now we use very simple encoding, just to get the framework correct:
// - hash is 2 bytes (see function below)
// - length is 2 bytes
// - flags byte
// - data follows
// - \0 terminated (for now, so they can be printed using printf)
#define Q_GET_HASH(q) ((q)[0] | ((q)[1] << 8))
#define Q_GET_ALLOC(q) (4 + Q_GET_LENGTH(q) + 1)
#define Q_GET_LENGTH(q) ((q)[2] | ((q)[3] << 8))
#define Q_GET_DATA(q) ((q) + 4)
#define Q_GET_FLAGS(q) ((q)[4])
#define Q_GET_DATA(q) ((q) + 5)
// this must match the equivalent function in makeqstrdata.py
machine_uint_t qstr_compute_hash(const byte *data, uint len) {
@@ -83,8 +85,8 @@ const static qstr_pool_t const_pool = {
10, // set so that the first dynamically allocated pool is twice this size; must be <= the len (just below)
MP_QSTR_number_of, // corresponds to number of strings in array just below
{
(const byte*) "\0\0\0\0", // invalid/no qstr has empty data
(const byte*) "\0\0\0\0", // empty qstr
(const byte*) "\0\0\0\0\0", // invalid/no qstr has empty data
(const byte*) "\0\0\0\0\1", // empty qstr
#define Q(id, str) str,
#include "genhdr/qstrdefs.generated.h"
#undef Q
@@ -110,7 +112,7 @@ STATIC const byte *find_qstr(qstr q) {
}
STATIC qstr qstr_add(const byte *q_ptr) {
DEBUG_printf("QSTR: add hash=%d len=%d data=%.*s\n", Q_GET_HASH(q_ptr), Q_GET_LENGTH(q_ptr), Q_GET_LENGTH(q_ptr), Q_GET_DATA(q_ptr));
DEBUG_printf("QSTR: add hash=%d len=%d flags=%d data=%.*s\n", Q_GET_HASH(q_ptr), Q_GET_LENGTH(q_ptr), Q_GET_LENGTH(q_ptr), Q_GET_FLAGS(q_ptr), Q_GET_DATA(q_ptr));
// make sure we have room in the pool for a new qstr
if (last_pool->len >= last_pool->alloc) {
@@ -160,8 +162,9 @@ qstr qstr_from_strn(const char *str, uint len) {
q_ptr[1] = hash >> 8;
q_ptr[2] = len;
q_ptr[3] = len >> 8;
memcpy(q_ptr + 4, str, len);
q_ptr[4 + len] = '\0';
q_ptr[4] = 1;
memcpy(q_ptr + 5, str, len);
q_ptr[5 + len] = '\0';
q = qstr_add(q_ptr);
}
return q;
@@ -182,7 +185,8 @@ qstr qstr_build_end(byte *q_ptr) {
machine_uint_t hash = qstr_compute_hash(Q_GET_DATA(q_ptr), len);
q_ptr[0] = hash;
q_ptr[1] = hash >> 8;
q_ptr[4 + len] = '\0';
q_ptr[4] = 1;
q_ptr[5 + len] = '\0';
q = qstr_add(q_ptr);
} else {
m_del(byte, q_ptr, Q_GET_ALLOC(q_ptr));
@@ -205,9 +209,10 @@ const char *qstr_str(qstr q) {
return (const char*)Q_GET_DATA(qd);
}
const byte *qstr_data(qstr q, uint *len) {
const byte *qstr_data(qstr q, uint *len, char *flags) {
const byte *qd = find_qstr(q);
*len = Q_GET_LENGTH(qd);
*flags = Q_GET_FLAGS(qd);
return Q_GET_DATA(qd);
}

View File

@@ -59,6 +59,6 @@ qstr qstr_build_end(byte *q_ptr);
machine_uint_t qstr_hash(qstr q);
const char* qstr_str(qstr q);
uint qstr_len(qstr q);
const byte* qstr_data(qstr q, uint *len);
const byte* qstr_data(qstr q, uint *len, char *flags);
void qstr_pool_info(uint *n_pool, uint *n_qstr, uint *n_str_data_bytes, uint *n_total_bytes);

View File

@@ -101,15 +101,17 @@ void mp_deinit(void) {
mp_obj_t mp_load_const_int(qstr qstr) {
DEBUG_OP_printf("load '%s'\n", qstr_str(qstr));
uint len;
const byte* data = qstr_data(qstr, &len);
uint len; char flags;
const byte* data = qstr_data(qstr, &len, &flags);
assert(flags == 1); //TODO: Support multibyte strings
return mp_parse_num_integer((const char*)data, len, 0);
}
mp_obj_t mp_load_const_dec(qstr qstr) {
DEBUG_OP_printf("load '%s'\n", qstr_str(qstr));
uint len;
const byte* data = qstr_data(qstr, &len);
uint len; char flags;
const byte* data = qstr_data(qstr, &len, &flags);
assert(flags == 1); //TODO: Support multibyte strings
return mp_parse_num_decimal((const char*)data, len, true, false);
}
@@ -120,8 +122,9 @@ mp_obj_t mp_load_const_str(qstr qstr) {
mp_obj_t mp_load_const_bytes(qstr qstr) {
DEBUG_OP_printf("load b'%s'\n", qstr_str(qstr));
uint len;
const byte *data = qstr_data(qstr, &len);
uint len; char flags;
const byte* data = qstr_data(qstr, &len, &flags);
assert(flags == 1); //TODO: Support multibyte strings
return mp_obj_new_bytes(data, len);
}