py: Move constant folding from compiler to parser.

It makes much more sense to do constant folding in the parser while the
parse tree is being built.  This eliminates the need to create parse
nodes that will just be folded away.  The code is slightly simpler and a
bit smaller as well.

Constant folding now has a configuration option,
MICROPY_COMP_CONST_FOLDING, which is enabled by default.
This commit is contained in:
Damien George
2015-10-08 14:58:15 +01:00
parent 91fc075a33
commit 64f2b213bb
3 changed files with 286 additions and 264 deletions

View File

@@ -35,6 +35,8 @@
#include "py/parse.h"
#include "py/parsenum.h"
#include "py/smallint.h"
#include "py/runtime.h"
#include "py/builtin.h"
#define RULE_ACT_ARG_MASK (0x0f)
#define RULE_ACT_KIND_MASK (0x30)
@@ -121,8 +123,14 @@ typedef struct _mp_parse_chunk_t {
byte data[];
} mp_parse_chunk_t;
typedef enum {
PARSE_ERROR_NONE = 0,
PARSE_ERROR_MEMORY,
PARSE_ERROR_CONST,
} parse_error_t;
typedef struct _parser_t {
bool had_memory_error;
parse_error_t parse_error;
mp_uint_t rule_stack_alloc;
mp_uint_t rule_stack_top;
@@ -136,11 +144,11 @@ typedef struct _parser_t {
mp_parse_tree_t tree;
mp_parse_chunk_t *cur_chunk;
} parser_t;
STATIC inline void memory_error(parser_t *parser) {
parser->had_memory_error = true;
}
#if MICROPY_COMP_CONST
mp_map_t consts;
#endif
} parser_t;
STATIC void *parser_alloc(parser_t *parser, size_t num_bytes) {
// use a custom memory allocator to store parse nodes sequentially in large chunks
@@ -184,13 +192,13 @@ STATIC void *parser_alloc(parser_t *parser, size_t num_bytes) {
}
STATIC void push_rule(parser_t *parser, mp_uint_t src_line, const rule_t *rule, mp_uint_t arg_i) {
if (parser->had_memory_error) {
if (parser->parse_error) {
return;
}
if (parser->rule_stack_top >= parser->rule_stack_alloc) {
rule_stack_t *rs = m_renew_maybe(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc, parser->rule_stack_alloc + MICROPY_ALLOC_PARSE_RULE_INC, true);
if (rs == NULL) {
memory_error(parser);
parser->parse_error = PARSE_ERROR_MEMORY;
return;
}
parser->rule_stack = rs;
@@ -210,7 +218,7 @@ STATIC void push_rule_from_arg(parser_t *parser, mp_uint_t arg) {
}
STATIC void pop_rule(parser_t *parser, const rule_t **rule, mp_uint_t *arg_i, mp_uint_t *src_line) {
assert(!parser->had_memory_error);
assert(!parser->parse_error);
parser->rule_stack_top -= 1;
*rule = rules[parser->rule_stack[parser->rule_stack_top].rule_id];
*arg_i = parser->rule_stack[parser->rule_stack_top].arg_i;
@@ -301,7 +309,7 @@ STATIC void result_stack_show(parser_t *parser) {
*/
STATIC mp_parse_node_t pop_result(parser_t *parser) {
if (parser->had_memory_error) {
if (parser->parse_error) {
return MP_PARSE_NODE_NULL;
}
assert(parser->result_stack_top > 0);
@@ -309,7 +317,7 @@ STATIC mp_parse_node_t pop_result(parser_t *parser) {
}
STATIC mp_parse_node_t peek_result(parser_t *parser, mp_uint_t pos) {
if (parser->had_memory_error) {
if (parser->parse_error) {
return MP_PARSE_NODE_NULL;
}
assert(parser->result_stack_top > pos);
@@ -317,13 +325,13 @@ STATIC mp_parse_node_t peek_result(parser_t *parser, mp_uint_t pos) {
}
STATIC void push_result_node(parser_t *parser, mp_parse_node_t pn) {
if (parser->had_memory_error) {
if (parser->parse_error) {
return;
}
if (parser->result_stack_top >= parser->result_stack_alloc) {
mp_parse_node_t *stack = m_renew_maybe(mp_parse_node_t, parser->result_stack, parser->result_stack_alloc, parser->result_stack_alloc + MICROPY_ALLOC_PARSE_RESULT_INC, true);
if (stack == NULL) {
memory_error(parser);
parser->parse_error = PARSE_ERROR_MEMORY;
return;
}
parser->result_stack = stack;
@@ -335,7 +343,7 @@ STATIC void push_result_node(parser_t *parser, mp_parse_node_t pn) {
STATIC mp_parse_node_t make_node_string_bytes(parser_t *parser, mp_uint_t src_line, mp_uint_t rule_kind, const char *str, mp_uint_t len) {
mp_parse_node_struct_t *pn = parser_alloc(parser, sizeof(mp_parse_node_struct_t) + sizeof(mp_parse_node_t) * 2);
if (pn == NULL) {
memory_error(parser);
parser->parse_error = PARSE_ERROR_MEMORY;
return MP_PARSE_NODE_NULL;
}
pn->source_line = src_line;
@@ -350,7 +358,7 @@ STATIC mp_parse_node_t make_node_string_bytes(parser_t *parser, mp_uint_t src_li
STATIC mp_parse_node_t make_node_const_object(parser_t *parser, mp_uint_t src_line, mp_obj_t obj) {
mp_parse_node_struct_t *pn = parser_alloc(parser, sizeof(mp_parse_node_struct_t) + sizeof(mp_parse_node_t));
if (pn == NULL) {
memory_error(parser);
parser->parse_error = PARSE_ERROR_MEMORY;
return MP_PARSE_NODE_NULL;
}
pn->source_line = src_line;
@@ -363,7 +371,17 @@ STATIC void push_result_token(parser_t *parser) {
mp_parse_node_t pn;
mp_lexer_t *lex = parser->lexer;
if (lex->tok_kind == MP_TOKEN_NAME) {
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, qstr_from_strn(lex->vstr.buf, lex->vstr.len));
qstr id = qstr_from_strn(lex->vstr.buf, lex->vstr.len);
#if MICROPY_COMP_CONST
// lookup identifier in table of dynamic constants
mp_map_elem_t *elem = mp_map_lookup(&parser->consts, MP_OBJ_NEW_QSTR(id), MP_MAP_LOOKUP);
if (elem != NULL) {
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, MP_OBJ_SMALL_INT_VALUE(elem->value));
} else
#endif
{
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, id);
}
} else if (lex->tok_kind == MP_TOKEN_INTEGER) {
mp_obj_t o = mp_parse_num_integer(lex->vstr.buf, lex->vstr.len, 0, lex);
if (MP_OBJ_IS_SMALL_INT(o)) {
@@ -398,10 +416,234 @@ STATIC void push_result_token(parser_t *parser) {
push_result_node(parser, pn);
}
#if MICROPY_COMP_MODULE_CONST
STATIC const mp_map_elem_t mp_constants_table[] = {
#if MICROPY_PY_UCTYPES
{ MP_OBJ_NEW_QSTR(MP_QSTR_uctypes), (mp_obj_t)&mp_module_uctypes },
#endif
// Extra constants as defined by a port
MICROPY_PORT_CONSTANTS
};
STATIC MP_DEFINE_CONST_MAP(mp_constants_map, mp_constants_table);
#endif
#if MICROPY_COMP_CONST_FOLDING
STATIC bool fold_constants(parser_t *parser, const rule_t *rule, mp_uint_t num_args) {
// this code does folding of arbitrary integer expressions, eg 1 + 2 * 3 + 4
// it does not do partial folding, eg 1 + 2 + x -> 3 + x
mp_int_t arg0;
if (rule->rule_id == RULE_expr
|| rule->rule_id == RULE_xor_expr
|| rule->rule_id == RULE_and_expr) {
// folding for binary ops: | ^ &
mp_parse_node_t pn = peek_result(parser, num_args - 1);
if (!MP_PARSE_NODE_IS_SMALL_INT(pn)) {
return false;
}
arg0 = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
for (mp_int_t i = num_args - 2; i >= 0; --i) {
pn = peek_result(parser, i);
if (!MP_PARSE_NODE_IS_SMALL_INT(pn)) {
return false;
}
mp_int_t arg1 = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
if (rule->rule_id == RULE_expr) {
// int | int
arg0 |= arg1;
} else if (rule->rule_id == RULE_xor_expr) {
// int ^ int
arg0 ^= arg1;
} else if (rule->rule_id == RULE_and_expr) {
// int & int
arg0 &= arg1;
}
}
} else if (rule->rule_id == RULE_shift_expr
|| rule->rule_id == RULE_arith_expr
|| rule->rule_id == RULE_term) {
// folding for binary ops: << >> + - * / % //
mp_parse_node_t pn = peek_result(parser, num_args - 1);
if (!MP_PARSE_NODE_IS_SMALL_INT(pn)) {
return false;
}
arg0 = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
for (mp_int_t i = num_args - 2; i >= 1; i -= 2) {
pn = peek_result(parser, i - 1);
if (!MP_PARSE_NODE_IS_SMALL_INT(pn)) {
return false;
}
mp_int_t arg1 = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
mp_token_kind_t tok = MP_PARSE_NODE_LEAF_ARG(peek_result(parser, i));
if (tok == MP_TOKEN_OP_DBL_LESS) {
// int << int
if (arg1 >= (mp_int_t)BITS_PER_WORD
|| arg0 > (MP_SMALL_INT_MAX >> arg1)
|| arg0 < (MP_SMALL_INT_MIN >> arg1)) {
return false;
}
arg0 <<= arg1;
} else if (tok == MP_TOKEN_OP_DBL_MORE) {
// int >> int
if (arg1 >= (mp_int_t)BITS_PER_WORD) {
// Shifting to big amounts is underfined behavior
// in C and is CPU-dependent; propagate sign bit.
arg1 = BITS_PER_WORD - 1;
}
arg0 >>= arg1;
} else if (tok == MP_TOKEN_OP_PLUS) {
// int + int
arg0 += arg1;
} else if (tok == MP_TOKEN_OP_MINUS) {
// int - int
arg0 -= arg1;
} else if (tok == MP_TOKEN_OP_STAR) {
// int * int
if (mp_small_int_mul_overflow(arg0, arg1)) {
return false;
}
arg0 *= arg1;
} else if (tok == MP_TOKEN_OP_SLASH) {
// int / int
return false;
} else if (tok == MP_TOKEN_OP_PERCENT) {
// int % int
if (arg1 == 0) {
return false;
}
arg0 = mp_small_int_modulo(arg0, arg1);
} else {
assert(tok == MP_TOKEN_OP_DBL_SLASH); // should be
// int // int
if (arg1 == 0) {
return false;
}
arg0 = mp_small_int_floor_divide(arg0, arg1);
}
if (!MP_SMALL_INT_FITS(arg0)) {
return false;
}
}
} else if (rule->rule_id == RULE_factor_2) {
// folding for unary ops: + - ~
mp_parse_node_t pn = peek_result(parser, 0);
if (!MP_PARSE_NODE_IS_SMALL_INT(pn)) {
return false;
}
arg0 = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
mp_token_kind_t tok = MP_PARSE_NODE_LEAF_ARG(peek_result(parser, 1));
if (tok == MP_TOKEN_OP_PLUS) {
// +int
} else if (tok == MP_TOKEN_OP_MINUS) {
// -int
arg0 = -arg0;
if (!MP_SMALL_INT_FITS(arg0)) {
return false;
}
} else {
assert(tok == MP_TOKEN_OP_TILDE); // should be
// ~int
arg0 = ~arg0;
}
#if MICROPY_COMP_CONST
} else if (rule->rule_id == RULE_expr_stmt) {
mp_parse_node_t pn1 = peek_result(parser, 0);
if (!MP_PARSE_NODE_IS_NULL(pn1)
&& !(MP_PARSE_NODE_IS_STRUCT_KIND(pn1, RULE_expr_stmt_augassign)
|| MP_PARSE_NODE_IS_STRUCT_KIND(pn1, RULE_expr_stmt_assign_list))) {
// this node is of the form <x> = <y>
mp_parse_node_t pn0 = peek_result(parser, 1);
if (MP_PARSE_NODE_IS_ID(pn0)
&& MP_PARSE_NODE_IS_STRUCT_KIND(pn1, RULE_power)
&& MP_PARSE_NODE_IS_ID(((mp_parse_node_struct_t*)pn1)->nodes[0])
&& MP_PARSE_NODE_LEAF_ARG(((mp_parse_node_struct_t*)pn1)->nodes[0]) == MP_QSTR_const
&& MP_PARSE_NODE_IS_STRUCT_KIND(((mp_parse_node_struct_t*)pn1)->nodes[1], RULE_trailer_paren)
&& MP_PARSE_NODE_IS_NULL(((mp_parse_node_struct_t*)pn1)->nodes[2])
) {
// code to assign dynamic constants: id = const(value)
// get the id
qstr id = MP_PARSE_NODE_LEAF_ARG(pn0);
// get the value
mp_parse_node_t pn_value = ((mp_parse_node_struct_t*)((mp_parse_node_struct_t*)pn1)->nodes[1])->nodes[0];
if (!MP_PARSE_NODE_IS_SMALL_INT(pn_value)) {
parser->parse_error = PARSE_ERROR_CONST;
return false;
}
mp_int_t value = MP_PARSE_NODE_LEAF_SMALL_INT(pn_value);
// store the value in the table of dynamic constants
mp_map_elem_t *elem = mp_map_lookup(&parser->consts, MP_OBJ_NEW_QSTR(id), MP_MAP_LOOKUP_ADD_IF_NOT_FOUND);
assert(elem->value == MP_OBJ_NULL);
elem->value = MP_OBJ_NEW_SMALL_INT(value);
// replace const(value) with value
pop_result(parser);
push_result_node(parser, pn_value);
// finished folding this assignment, but we still want it to be part of the tree
return false;
}
}
return false;
#endif
#if MICROPY_COMP_MODULE_CONST
} else if (rule->rule_id == RULE_power) {
mp_parse_node_t pn0 = peek_result(parser, 2);
mp_parse_node_t pn1 = peek_result(parser, 1);
mp_parse_node_t pn2 = peek_result(parser, 0);
if (!(MP_PARSE_NODE_IS_ID(pn0)
&& MP_PARSE_NODE_IS_STRUCT_KIND(pn1, RULE_trailer_period)
&& MP_PARSE_NODE_IS_NULL(pn2))) {
return false;
}
// id1.id2
// look it up in constant table, see if it can be replaced with an integer
mp_parse_node_struct_t *pns1 = (mp_parse_node_struct_t*)pn1;
assert(MP_PARSE_NODE_IS_ID(pns1->nodes[0]));
qstr q_base = MP_PARSE_NODE_LEAF_ARG(pn0);
qstr q_attr = MP_PARSE_NODE_LEAF_ARG(pns1->nodes[0]);
mp_map_elem_t *elem = mp_map_lookup((mp_map_t*)&mp_constants_map, MP_OBJ_NEW_QSTR(q_base), MP_MAP_LOOKUP);
if (elem == NULL) {
return false;
}
mp_obj_t dest[2];
mp_load_method_maybe(elem->value, q_attr, dest);
if (!(MP_OBJ_IS_SMALL_INT(dest[0]) && dest[1] == NULL)) {
return false;
}
arg0 = MP_OBJ_SMALL_INT_VALUE(dest[0]);
#endif
} else {
return false;
}
// success folding this rule
for (mp_uint_t i = num_args; i > 0; i--) {
pop_result(parser);
}
push_result_node(parser, mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, arg0));
return true;
}
#endif
STATIC void push_result_rule(parser_t *parser, mp_uint_t src_line, const rule_t *rule, mp_uint_t num_args) {
#if MICROPY_COMP_CONST_FOLDING
if (fold_constants(parser, rule, num_args)) {
// we folded this rule so return straight away
return;
}
#endif
mp_parse_node_struct_t *pn = parser_alloc(parser, sizeof(mp_parse_node_struct_t) + sizeof(mp_parse_node_t) * num_args);
if (pn == NULL) {
memory_error(parser);
parser->parse_error = PARSE_ERROR_MEMORY;
return;
}
pn->source_line = src_line;
@@ -418,7 +660,7 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
parser_t parser;
parser.had_memory_error = false;
parser.parse_error = PARSE_ERROR_NONE;
parser.rule_stack_alloc = MICROPY_ALLOC_PARSE_RULE_INIT;
parser.rule_stack_top = 0;
@@ -433,6 +675,10 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
parser.tree.chunk = NULL;
parser.cur_chunk = NULL;
#if MICROPY_COMP_CONST
mp_map_init(&parser.consts, 0);
#endif
// check if we could allocate the stacks
if (parser.rule_stack == NULL || parser.result_stack == NULL) {
goto memory_error;
@@ -456,7 +702,7 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
for (;;) {
next_rule:
if (parser.rule_stack_top == 0 || parser.had_memory_error) {
if (parser.rule_stack_top == 0 || parser.parse_error) {
break;
}
@@ -737,6 +983,10 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
}
}
#if MICROPY_COMP_CONST
mp_map_deinit(&parser.consts);
#endif
// truncate final chunk and link into chain of chunks
if (parser.cur_chunk != NULL) {
(void)m_renew(byte, parser.cur_chunk,
@@ -749,11 +999,19 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
mp_obj_t exc;
// check if we had a memory error
if (parser.had_memory_error) {
memory_error:
exc = mp_obj_new_exception_msg(&mp_type_MemoryError,
"parser could not allocate enough memory");
if (parser.parse_error) {
#if MICROPY_COMP_CONST
if (parser.parse_error == PARSE_ERROR_CONST) {
exc = mp_obj_new_exception_msg(&mp_type_SyntaxError,
"constant must be an integer");
} else
#endif
{
assert(parser.parse_error == PARSE_ERROR_MEMORY);
memory_error:
exc = mp_obj_new_exception_msg(&mp_type_MemoryError,
"parser could not allocate enough memory");
}
parser.tree.root = MP_PARSE_NODE_NULL;
goto finished;
}