py: Compress load-int, load-fast, store-fast, unop, binop bytecodes.

There is a lot potential in compress bytecodes and make more use of the
coding space.  This patch introduces "multi" bytecodes which have their
argument included in the bytecode (by addition).

UNARY_OP and BINARY_OP now no longer take a 1 byte argument for the
opcode.  Rather, the opcode is included in the first byte itself.

LOAD_FAST_[0,1,2] and STORE_FAST_[0,1,2] are removed in favour of their
multi versions, which can take an argument between 0 and 15 inclusive.
The majority of LOAD_FAST/STORE_FAST codes fit in this range and so this
saves a byte for each of these.

LOAD_CONST_SMALL_INT_MULTI is used to load small ints between -16 and 47
inclusive.  Such ints are quite common and now only need 1 byte to
store, and now have much faster decoding.

In all this patch saves about 2% RAM for typically bytecode (1.8% on
64-bit test, 2.5% on pyboard test).  It also reduces the binary size
(because bytecodes are simplified) and doesn't harm performance.
This commit is contained in:
Damien George
2014-10-25 16:43:46 +01:00
parent 1084b0f9c2
commit 8456cc017b
5 changed files with 159 additions and 174 deletions

85
py/vm.c
View File

@@ -223,18 +223,6 @@ dispatch_loop:
PUSH(MP_OBJ_NULL);
DISPATCH();
ENTRY(MP_BC_LOAD_FAST_0):
obj_shared = fastn[0];
goto load_check;
ENTRY(MP_BC_LOAD_FAST_1):
obj_shared = fastn[-1];
goto load_check;
ENTRY(MP_BC_LOAD_FAST_2):
obj_shared = fastn[-2];
goto load_check;
ENTRY(MP_BC_LOAD_FAST_N):
DECODE_UINT;
obj_shared = fastn[-unum];
@@ -288,18 +276,6 @@ dispatch_loop:
DISPATCH();
}
ENTRY(MP_BC_STORE_FAST_0):
fastn[0] = POP();
DISPATCH();
ENTRY(MP_BC_STORE_FAST_1):
fastn[-1] = POP();
DISPATCH();
ENTRY(MP_BC_STORE_FAST_2):
fastn[-2] = POP();
DISPATCH();
ENTRY(MP_BC_STORE_FAST_N):
DECODE_UINT;
fastn[-unum] = POP();
@@ -606,19 +582,6 @@ unwind_jump:
}
DISPATCH();
ENTRY(MP_BC_UNARY_OP):
unum = *ip++;
SET_TOP(mp_unary_op(unum, TOP()));
DISPATCH();
ENTRY(MP_BC_BINARY_OP): {
unum = *ip++;
mp_obj_t rhs = POP();
mp_obj_t lhs = TOP();
SET_TOP(mp_binary_op(unum, lhs, rhs));
DISPATCH();
}
ENTRY(MP_BC_BUILD_TUPLE):
DECODE_UINT;
sp -= unum - 1;
@@ -890,7 +853,53 @@ yield:
mp_import_all(POP());
DISPATCH();
ENTRY_DEFAULT: {
#if MICROPY_OPT_COMPUTED_GOTO
ENTRY(MP_BC_LOAD_CONST_SMALL_INT_MULTI):
PUSH(MP_OBJ_NEW_SMALL_INT((mp_int_t)ip[-1] - MP_BC_LOAD_CONST_SMALL_INT_MULTI - 16));
DISPATCH();
ENTRY(MP_BC_LOAD_FAST_MULTI):
obj_shared = fastn[MP_BC_LOAD_FAST_MULTI - (mp_int_t)ip[-1]];
goto load_check;
ENTRY(MP_BC_STORE_FAST_MULTI):
fastn[MP_BC_STORE_FAST_MULTI - (mp_int_t)ip[-1]] = POP();
DISPATCH();
ENTRY(MP_BC_UNARY_OP_MULTI):
SET_TOP(mp_unary_op(ip[-1] - MP_BC_UNARY_OP_MULTI, TOP()));
DISPATCH();
ENTRY(MP_BC_BINARY_OP_MULTI): {
mp_obj_t rhs = POP();
mp_obj_t lhs = TOP();
SET_TOP(mp_binary_op(ip[-1] - MP_BC_BINARY_OP_MULTI, lhs, rhs));
DISPATCH();
}
ENTRY_DEFAULT:
#else
ENTRY_DEFAULT:
if (ip[-1] < MP_BC_LOAD_CONST_SMALL_INT_MULTI + 64) {
PUSH(MP_OBJ_NEW_SMALL_INT((mp_int_t)ip[-1] - MP_BC_LOAD_CONST_SMALL_INT_MULTI - 16));
DISPATCH();
} else if (ip[-1] < MP_BC_LOAD_FAST_MULTI + 16) {
obj_shared = fastn[MP_BC_LOAD_FAST_MULTI - (mp_int_t)ip[-1]];
goto load_check;
} else if (ip[-1] < MP_BC_STORE_FAST_MULTI + 16) {
fastn[MP_BC_STORE_FAST_MULTI - (mp_int_t)ip[-1]] = POP();
DISPATCH();
} else if (ip[-1] < MP_BC_UNARY_OP_MULTI + 5) {
SET_TOP(mp_unary_op(ip[-1] - MP_BC_UNARY_OP_MULTI, TOP()));
DISPATCH();
} else if (ip[-1] < MP_BC_BINARY_OP_MULTI + 35) {
mp_obj_t rhs = POP();
mp_obj_t lhs = TOP();
SET_TOP(mp_binary_op(ip[-1] - MP_BC_BINARY_OP_MULTI, lhs, rhs));
DISPATCH();
} else
#endif
{
mp_obj_t obj = mp_obj_new_exception_msg(&mp_type_NotImplementedError, "byte code not implemented");
nlr_pop();
fastn[0] = obj;