mirror of
https://github.com/micropython/micropython.git
synced 2026-03-11 11:20:17 +01:00
The null byte cannot exist in source code (per CPython), so use it to indicate the end of the input stream (instead of `(mp_uint_t)-1`). This allows the cache chars (chr0/1/2 and their saved versions) to be 8-bit bytes, making it clear that they are not `unichar` values. It also saves a bit of memory in the `mp_lexer_t` data structure. (And in a future commit allows the saved cache chars to be eliminated entirely by storing them in a vstr instead.) In order to keep code size down, the frequently used `chr0` is still of type `uint32_t`. Having it 32-bit means that machine instructions to load it are smaller (it adds about +80 bytes to Thumb code if `chr0` is changed to `uint8_t`). Also add tests for invalid bytes in the input stream to make sure there are no regressions in this regard. Signed-off-by: Damien George <damien@micropython.org>
102 lines
1.9 KiB
Python
102 lines
1.9 KiB
Python
# test the lexer
|
|
|
|
try:
|
|
eval
|
|
exec
|
|
except NameError:
|
|
print("SKIP")
|
|
raise SystemExit
|
|
|
|
# __debug__ is a special symbol
|
|
print(type(__debug__))
|
|
|
|
# short input
|
|
exec("")
|
|
exec("\n")
|
|
exec("\n\n")
|
|
exec("\r")
|
|
exec("\r\r")
|
|
exec("\t")
|
|
exec("\r\n")
|
|
exec("\nprint(1)")
|
|
exec("\rprint(2)")
|
|
exec("\r\nprint(3)")
|
|
exec("\n5")
|
|
exec("\r6")
|
|
exec("\r\n7")
|
|
print(eval("1"))
|
|
print(eval("12"))
|
|
print(eval("123"))
|
|
print(eval("1\n"))
|
|
print(eval("12\n"))
|
|
print(eval("123\n"))
|
|
print(eval("1\r"))
|
|
print(eval("12\r"))
|
|
print(eval("123\r"))
|
|
|
|
# line continuation
|
|
print(eval("'123' \\\r '456'"))
|
|
print(eval("'123' \\\n '456'"))
|
|
print(eval("'123' \\\r\n '456'"))
|
|
print(eval("'123'\\\r'456'"))
|
|
print(eval("'123'\\\n'456'"))
|
|
print(eval("'123'\\\r\n'456'"))
|
|
|
|
# backslash used to escape a line-break in a string
|
|
print('a\
|
|
b')
|
|
|
|
# lots of indentation
|
|
def a(x):
|
|
if x:
|
|
if x:
|
|
if x:
|
|
if x:
|
|
if x:
|
|
if x:
|
|
if x:
|
|
if x:
|
|
if x:
|
|
if x:
|
|
if x:
|
|
if x:
|
|
if x:
|
|
if x:
|
|
if x:
|
|
print(x)
|
|
a(1)
|
|
|
|
# badly formed hex escape sequences
|
|
try:
|
|
exec(r"'\x0'")
|
|
except SyntaxError:
|
|
print("SyntaxError")
|
|
try:
|
|
exec(r"b'\x0'")
|
|
except SyntaxError:
|
|
print("SyntaxError")
|
|
try:
|
|
exec(r"'\u000'")
|
|
except SyntaxError:
|
|
print("SyntaxError")
|
|
try:
|
|
exec(r"'\U0000000'")
|
|
except SyntaxError:
|
|
print("SyntaxError")
|
|
|
|
# Properly formed integer literals
|
|
print(eval("00"))
|
|
# badly formed integer literals
|
|
try:
|
|
eval("01")
|
|
except SyntaxError:
|
|
print("SyntaxError")
|
|
|
|
# Bytes 0-8 inclusive are not allowed in input stream.
|
|
# Earlier CPython (eg 3.10.12) raises ValueError, later CPython (eg 3.11.14) raises SyntaxError.
|
|
for invalid_byte_value in range(0, 10):
|
|
try:
|
|
print(eval(b"123" + bytes([invalid_byte_value])))
|
|
except (ValueError, SyntaxError):
|
|
print("byte {}: SyntaxError".format(invalid_byte_value))
|