py: Add support for PEP 750's t-strings.

This commit adds support for t-strings by leveraging the existing f-string
parser in the lexer.  It includes:
- t-string parsing in `py/lexer.c`
- new built-in `__template__()` function to construct t-string objects
- new built-in `Template` and `Interpolation` classes which implement all
  the functionality from PEP 750
- new built-in `string` module with `templatelib` sub-module, which
  contains the classes `Template` and `Interpolation`

The way the t-string parser works is that an input t-string like:

    t"hello {name:5}"

is converted character-by-character by the lexer/tokenizer to:

    __template__(("hello ", "",), name, "name", None, "5")

For reference, if it were an f-string it would be converted to:

    "hello {:5}".format(name)

Some properties of this implementation:
- it's enabled by default at the full feature level,
  MICROPY_CONFIG_ROM_LEVEL_AT_LEAST_FULL_FEATURES
- when enabled on a Cortex-M bare-metal port it costs about +3000 bytes
- there are no limits on the size or complexity of t-strings, and it allows
  arbitrary levels of nesting of f-strings and t-strings (up to the memory
  available to the compiler)
- the 'a' (ascii) conversion specifier is not supported (MicroPython does
  not have the built-in `ascii` function)
- space after conversion specifier, eg t"{x!r :10}", is not supported
- arguments to `__template__` and `Interpolation` are not fully validated
  (it's not necessary, it won't crash if the wrong arguments are passed in)

Otherwise the implementation here matches CPython.

Signed-off-by: Damien George <damien@micropython.org>
This commit is contained in:
Damien George
2026-01-06 12:30:42 +11:00
parent 58436b2882
commit d4751a164e
10 changed files with 691 additions and 14 deletions

View File

@@ -59,7 +59,11 @@ static bool is_char_or3(mp_lexer_t *lex, byte c1, byte c2, byte c3) {
return lex->chr0 == c1 || lex->chr0 == c2 || lex->chr0 == c3;
}
#if MICROPY_PY_FSTRINGS
#if MICROPY_PY_TSTRINGS
static bool is_char_or5(mp_lexer_t *lex, byte c1, byte c2, byte c3, byte c4, byte c5) {
return lex->chr0 == c1 || lex->chr0 == c2 || lex->chr0 == c3 || lex->chr0 == c4 || lex->chr0 == c5;
}
#elif MICROPY_PY_FSTRINGS
static bool is_char_or4(mp_lexer_t *lex, byte c1, byte c2, byte c3, byte c4) {
return lex->chr0 == c1 || lex->chr0 == c2 || lex->chr0 == c3 || lex->chr0 == c4;
}
@@ -108,7 +112,11 @@ static bool is_following_odigit(mp_lexer_t *lex) {
static bool is_string_or_bytes(mp_lexer_t *lex) {
return is_char_or(lex, '\'', '\"')
#if MICROPY_PY_FSTRINGS
#if MICROPY_PY_TSTRINGS
|| (is_char_or5(lex, 'r', 'u', 'b', 'f', 't') && is_char_following_or(lex, '\'', '\"'))
|| (((is_char_and(lex, 'r', 'f') || is_char_and(lex, 'f', 'r') || is_char_and(lex, 'r', 't') || is_char_and(lex, 't', 'r'))
&& is_char_following_following_or(lex, '\'', '\"')))
#elif MICROPY_PY_FSTRINGS
|| (is_char_or4(lex, 'r', 'u', 'b', 'f') && is_char_following_or(lex, '\'', '\"'))
|| (((is_char_and(lex, 'r', 'f') || is_char_and(lex, 'f', 'r'))
&& is_char_following_following_or(lex, '\'', '\"')))
@@ -312,7 +320,7 @@ static bool get_hex(mp_lexer_t *lex, size_t num_digits, mp_uint_t *result) {
return true;
}
static void parse_string_literal(mp_lexer_t *lex, bool is_raw, bool is_fstring) {
static void parse_string_literal(mp_lexer_t *lex, bool is_raw, bool is_fstring, bool is_tstring) {
// get first quoting character
char quote_char = '\'';
if (is_char(lex, '\"')) {
@@ -345,20 +353,52 @@ static void parse_string_literal(mp_lexer_t *lex, bool is_raw, bool is_fstring)
}
}
#endif
#if MICROPY_PY_TSTRINGS
if (is_tstring) {
if (vstr_len(&lex->fstring_args) == 0) {
vstr_add_byte(&lex->vstr, '(');
vstr_add_byte(&lex->vstr, '(');
for (size_t q = 0; q < num_quotes; ++q) {
vstr_add_byte(&lex->vstr, quote_char);
}
}
}
#endif
#if MICROPY_PY_TSTRINGS
size_t tstring_num_interpolations = 0;
size_t end_of_format_index = 0;
size_t nested_formatting_in_tstring = 0;
bool nested_formatting_needs_fstring = false;
#endif
while (!is_end(lex) && (num_quotes > 1 || !is_char(lex, '\n')) && n_closing < num_quotes) {
if (is_char(lex, quote_char)) {
n_closing += 1;
vstr_add_char(&lex->vstr, CUR_CHAR(lex));
#if MICROPY_PY_TSTRINGS
} else if (is_tstring && is_char(lex, '\n')) {
// handle multi-line t-strings
vstr_add_byte(&lex->vstr, '\\');
vstr_add_byte(&lex->vstr, 'n');
#endif
} else {
n_closing = 0;
#if MICROPY_PY_FSTRINGS
while (is_fstring && is_char(lex, '{')) {
while ((is_fstring || is_tstring) && is_char(lex, '{')) {
#if MICROPY_PY_TSTRINGS
if (nested_formatting_in_tstring) {
++nested_formatting_in_tstring;
break;
}
#endif
next_char(lex);
if (is_char(lex, '{')) {
// "{{" is passed through unchanged to be handled by str.format
vstr_add_byte(&lex->vstr, '{');
if (!is_tstring) {
vstr_add_byte(&lex->vstr, '{');
}
next_char(lex);
} else {
// wrap each argument in (), e.g.
@@ -395,24 +435,102 @@ static void parse_string_literal(mp_lexer_t *lex, bool is_raw, bool is_fstring)
vstr_add_byte(&lex->fstring_args, c);
next_char(lex);
}
#if MICROPY_PY_TSTRINGS
bool was_debug = false;
#endif
if (lex->fstring_args.buf[lex->fstring_args.len - 1] == '=') {
// if the last character of the arg was '=', then inject "arg=" before the '{'.
// f'{a=}' --> 'a={}'.format(a)
vstr_add_strn(&lex->vstr, lex->fstring_args.buf + i, lex->fstring_args.len - i);
// remove the trailing '='
lex->fstring_args.len--;
#if MICROPY_PY_TSTRINGS
was_debug = true;
#endif
}
#if MICROPY_PY_TSTRINGS
if (is_tstring) {
// truncate trailing spaces
while (lex->fstring_args.len && unichar_isspace(lex->fstring_args.buf[lex->fstring_args.len - 1])) {
lex->fstring_args.len--;
}
}
#endif
if (lex->fstring_args.len == i) {
// empty format, eg f'{}'
// (should apply to both f-strings and t-strings, needs test)
lex->tok_kind = MP_TOKEN_MALFORMED_FSTRING;
}
// close the paren-wrapped arg to .format().
vstr_add_byte(&lex->fstring_args, ')');
// comma-separate args to .format().
vstr_add_byte(&lex->fstring_args, ',');
#if MICROPY_PY_TSTRINGS
if (is_tstring) {
// start the interpolation part
// duplicate expression to a string
vstr_add_byte(&lex->fstring_args, quote_char);
size_t nn = lex->fstring_args.len - i - 3;
for (size_t j = 0; j < nn; ++j) {
byte b = lex->fstring_args.buf[i + j];
if (b == quote_char) {
vstr_add_byte(&lex->fstring_args, '\\');
} else if (b == '\\') {
vstr_add_byte(&lex->fstring_args, '\\');
}
vstr_add_byte(&lex->fstring_args, b);
}
vstr_add_byte(&lex->fstring_args, quote_char);
vstr_add_byte(&lex->fstring_args, ',');
// start next part of string as next __template__ argument
for (size_t q = 0; q < num_quotes; ++q) {
vstr_add_byte(&lex->vstr, quote_char);
}
vstr_add_byte(&lex->vstr, ',');
for (size_t q = 0; q < num_quotes; ++q) {
vstr_add_byte(&lex->vstr, quote_char);
}
// process conv and format spec
if (is_char(lex, '!')) {
next_char(lex);
vstr_add_byte(&lex->fstring_args, quote_char);
vstr_add_byte(&lex->fstring_args, CUR_CHAR(lex));
next_char(lex);
vstr_add_byte(&lex->fstring_args, quote_char);
vstr_add_byte(&lex->fstring_args, ',');
} else if (was_debug && !is_char(lex, ':')) {
vstr_add_str(&lex->fstring_args, "'r',");
} else {
vstr_add_str(&lex->fstring_args, "None,");
}
// start format str
if (is_char(lex, ':')) {
next_char(lex);
}
nested_formatting_in_tstring = 1;
end_of_format_index = lex->vstr.len;
}
#endif
}
vstr_add_byte(&lex->vstr, '{');
goto continue_outer;
}
#endif
if (is_char(lex, '\\')) {
if (is_tstring && is_char(lex, '\\')) {
// it'll be reparsed as a string
vstr_add_byte(&lex->vstr, '\\');
if (is_raw) {
vstr_add_byte(&lex->vstr, '\\');
} else {
next_char(lex);
vstr_add_byte(&lex->vstr, CUR_CHAR(lex));
}
} else if (is_char(lex, '\\')) {
next_char(lex);
unichar c = CUR_CHAR(lex);
if (is_raw) {
@@ -511,10 +629,36 @@ static void parse_string_literal(mp_lexer_t *lex, bool is_raw, bool is_fstring)
// Character out of range; this raises a generic SyntaxError.
lex->tok_kind = MP_TOKEN_INVALID;
}
#if MICROPY_PY_TSTRINGS
} else if (is_tstring && nested_formatting_in_tstring && is_char(lex, '}')) {
if (--nested_formatting_in_tstring > 0) {
nested_formatting_needs_fstring = true;
vstr_add_byte(&lex->vstr, CUR_CHAR(lex));
} else {
// finished the current interpolation
++tstring_num_interpolations;
if (nested_formatting_needs_fstring) {
vstr_add_byte(&lex->fstring_args, 'f');
nested_formatting_needs_fstring = false;
}
vstr_add_byte(&lex->fstring_args, quote_char);
vstr_add_strn(&lex->fstring_args, lex->vstr.buf + end_of_format_index + 1, lex->vstr.len - end_of_format_index - 1);
lex->vstr.len = end_of_format_index;
vstr_add_byte(&lex->fstring_args, quote_char);
vstr_add_byte(&lex->fstring_args, ',');
}
#endif
} else {
// Add the "character" as a byte so that we remain 8-bit clean.
// This way, strings are parsed correctly whether or not they contain utf-8 chars.
vstr_add_byte(&lex->vstr, CUR_CHAR(lex));
#if MICROPY_PY_TSTRINGS
if (is_tstring && is_char_and(lex, '}', '}')) {
next_char(lex);
} else if (is_tstring && is_char(lex, '}')) {
lex->tok_kind = MP_TOKEN_MALFORMED_FSTRING;
}
#endif
}
}
continue_parsing_string_literal:
@@ -529,8 +673,23 @@ static void parse_string_literal(mp_lexer_t *lex, bool is_raw, bool is_fstring)
lex->tok_kind = MP_TOKEN_LONELY_STRING_OPEN;
}
// cut off the end quotes from the token text
vstr_cut_tail_bytes(&lex->vstr, n_closing);
#if MICROPY_PY_TSTRINGS
if (is_tstring) {
if (nested_formatting_in_tstring > 0) {
lex->tok_kind = MP_TOKEN_MALFORMED_FSTRING;
}
if (1 + tstring_num_interpolations * 4 > 255) {
// too many arguments for function call, so wrap interpolations in a tuple
vstr_ins_byte(&lex->fstring_args, 0, '(');
vstr_add_byte(&lex->fstring_args, ')');
}
} else
#endif
{
// cut off the end quotes from the token text
vstr_cut_tail_bytes(&lex->vstr, n_closing);
}
}
// This function returns whether it has crossed a newline or not.
@@ -621,11 +780,16 @@ void mp_lexer_to_next(mp_lexer_t *lex) {
// MP_TOKEN_END is used to indicate that this is the first string token
lex->tok_kind = MP_TOKEN_END;
#if MICROPY_PY_TSTRINGS
bool had_tstring = false;
#endif
// Loop to accumulate string/bytes literals
do {
// parse type codes
bool is_raw = false;
bool is_fstring = false;
bool is_tstring = false;
mp_token_kind_t kind = MP_TOKEN_STRING;
int n_char = 0;
if (is_char(lex, 'u')) {
@@ -645,11 +809,17 @@ void mp_lexer_to_next(mp_lexer_t *lex) {
n_char = 2;
}
#if MICROPY_PY_FSTRINGS
if (is_char_following(lex, 'f')) {
else if (is_char_following(lex, 'f')) {
is_fstring = true;
n_char = 2;
}
#endif
#if MICROPY_PY_TSTRINGS
else if (is_char_following(lex, 't')) {
is_tstring = true;
n_char = 2;
}
#endif
}
#if MICROPY_PY_FSTRINGS
else if (is_char(lex, 'f')) {
@@ -661,6 +831,22 @@ void mp_lexer_to_next(mp_lexer_t *lex) {
}
}
#endif
#if MICROPY_PY_TSTRINGS
else if (is_char(lex, 't')) {
is_tstring = true;
n_char = 1;
if (is_char_following(lex, 'r')) {
is_raw = true;
n_char = 2;
}
}
#endif
#if MICROPY_PY_TSTRINGS
if (is_tstring) {
had_tstring = true;
}
#endif
// Set or check token kind
if (lex->tok_kind == MP_TOKEN_END) {
@@ -679,13 +865,28 @@ void mp_lexer_to_next(mp_lexer_t *lex) {
}
// Parse the literal
parse_string_literal(lex, is_raw, is_fstring);
parse_string_literal(lex, is_raw, is_fstring, is_tstring);
// Skip whitespace so we can check if there's another string following
skip_whitespace(lex, true);
} while (is_string_or_bytes(lex));
#if MICROPY_PY_TSTRINGS
if (had_tstring) {
vstr_add_byte(&lex->vstr, ',');
vstr_add_byte(&lex->vstr, ')');
vstr_add_byte(&lex->vstr, ',');
vstr_ins_strn(&lex->fstring_args, 0, lex->vstr.buf, lex->vstr.len);
if (lex->tok_kind > MP_TOKEN_MALFORMED_FSTRING) {
// next token is __template__ for the function
lex->tok_kind = MP_TOKEN_NAME;
vstr_reset(&lex->vstr);
vstr_add_str(&lex->vstr, "__template__");
}
}
#endif
#if MICROPY_PY_FSTRINGS
if (lex->fstring_args.len) {
// If there was an f-string then it's now complete.

View File

@@ -599,6 +599,9 @@ MP_DEFINE_CONST_FUN_OBJ_0(mp_builtin_locals_obj, mp_builtin_locals);
// These are defined in terms of MicroPython API functions right away
MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_id_obj, mp_obj_id);
MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_len_obj, mp_obj_len);
#if MICROPY_PY_TSTRINGS
static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mp_builtin___template___obj, 1, MP_OBJ_FUN_ARGS_MAX, mp_obj_new_template);
#endif
static const mp_rom_map_elem_t mp_module_builtins_globals_table[] = {
{ MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_builtins) },
@@ -607,6 +610,9 @@ static const mp_rom_map_elem_t mp_module_builtins_globals_table[] = {
{ MP_ROM_QSTR(MP_QSTR___build_class__), MP_ROM_PTR(&mp_builtin___build_class___obj) },
{ MP_ROM_QSTR(MP_QSTR___import__), MP_ROM_PTR(&mp_builtin___import___obj) },
{ MP_ROM_QSTR(MP_QSTR___repl_print__), MP_ROM_PTR(&mp_builtin___repl_print___obj) },
#if MICROPY_PY_TSTRINGS
{ MP_ROM_QSTR(MP_QSTR___template__), MP_ROM_PTR(&mp_builtin___template___obj) },
#endif
// built-in types
{ MP_ROM_QSTR(MP_QSTR_bool), MP_ROM_PTR(&mp_type_bool) },

56
py/modstring.c Normal file
View File

@@ -0,0 +1,56 @@
/*
* This file is part of the MicroPython project, http://micropython.org/
*
* The MIT License (MIT)
*
* Copyright (c) 2026 Damien P. George
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "py/obj.h"
#if MICROPY_PY_TSTRINGS
static const mp_rom_map_elem_t mp_module_string_templatelib_globals_table[] = {
{ MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_string_dot_templatelib) },
{ MP_ROM_QSTR(MP_QSTR_Template), MP_ROM_PTR(&mp_type_template) },
{ MP_ROM_QSTR(MP_QSTR_Interpolation), MP_ROM_PTR(&mp_type_interpolation) },
};
static MP_DEFINE_CONST_DICT(mp_module_string_templatelib_globals, mp_module_string_templatelib_globals_table);
static const mp_obj_module_t mp_module_string_templatelib = {
.base = { &mp_type_module },
.globals = (mp_obj_dict_t *)&mp_module_string_templatelib_globals,
};
static const mp_rom_map_elem_t mp_module_string_globals_table[] = {
{ MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_string) },
{ MP_ROM_QSTR(MP_QSTR_templatelib), MP_ROM_PTR(&mp_module_string_templatelib) },
};
static MP_DEFINE_CONST_DICT(mp_module_string_globals, mp_module_string_globals_table);
const mp_obj_module_t mp_module_string = {
.base = { &mp_type_module },
.globals = (mp_obj_dict_t *)&mp_module_string_globals,
};
MP_REGISTER_EXTENSIBLE_MODULE(MP_QSTR_string, mp_module_string);
#endif // MICROPY_PY_TSTRINGS

View File

@@ -1137,7 +1137,7 @@ typedef time_t mp_timestamp_t;
// have __init__ methods. Instead, the top-level package's __init__ should
// initialise all sub-packages.
#ifndef MICROPY_MODULE_BUILTIN_SUBPACKAGES
#define MICROPY_MODULE_BUILTIN_SUBPACKAGES (MICROPY_CONFIG_ROM_LEVEL_AT_LEAST_EVERYTHING)
#define MICROPY_MODULE_BUILTIN_SUBPACKAGES (MICROPY_PY_TSTRINGS || MICROPY_CONFIG_ROM_LEVEL_AT_LEAST_EVERYTHING)
#endif
// Whether to support module-level __getattr__ (see PEP 562)
@@ -1322,6 +1322,12 @@ typedef time_t mp_timestamp_t;
#define MICROPY_PY_FSTRINGS (MICROPY_CONFIG_ROM_LEVEL_AT_LEAST_EXTRA_FEATURES)
#endif
// Support for template strings, t-strings (see PEP 750, Python 3.14+)
// Requires MICROPY_PY_FSTRINGS to be enabled.
#ifndef MICROPY_PY_TSTRINGS
#define MICROPY_PY_TSTRINGS (MICROPY_PY_FSTRINGS && MICROPY_CONFIG_ROM_LEVEL_AT_LEAST_FULL_FEATURES)
#endif
// Support for assignment expressions with := (see PEP 572, Python 3.8+)
#ifndef MICROPY_PY_ASSIGN_EXPR
#define MICROPY_PY_ASSIGN_EXPR (MICROPY_CONFIG_ROM_LEVEL_AT_LEAST_CORE_FEATURES)

View File

@@ -831,6 +831,8 @@ extern const mp_obj_type_t mp_type_NoneType;
extern const mp_obj_type_t mp_type_bool;
extern const mp_obj_type_t mp_type_int;
extern const mp_obj_type_t mp_type_str;
extern const mp_obj_type_t mp_type_template;
extern const mp_obj_type_t mp_type_interpolation;
extern const mp_obj_type_t mp_type_bytes;
extern const mp_obj_type_t mp_type_bytearray;
extern const mp_obj_type_t mp_type_memoryview;
@@ -1011,6 +1013,9 @@ mp_obj_t mp_obj_new_bytes_from_vstr(vstr_t *vstr);
mp_obj_t mp_obj_new_bytes(const byte *data, size_t len);
mp_obj_t mp_obj_new_bytearray(size_t n, const void *items);
mp_obj_t mp_obj_new_bytearray_by_ref(size_t n, void *items);
#if MICROPY_PY_TSTRINGS
mp_obj_t mp_obj_new_template(size_t n_args, const mp_obj_t *args);
#endif
#if MICROPY_PY_BUILTINS_FLOAT
mp_obj_t mp_obj_new_int_from_float(mp_float_t val);
mp_obj_t mp_obj_new_complex(mp_float_t real, mp_float_t imag);

395
py/objtemplate.c Normal file
View File

@@ -0,0 +1,395 @@
/*
* This file is part of the MicroPython project, http://micropython.org/
*
* The MIT License (MIT)
*
* Copyright (c) 2025 Koudai Aono
* Copyright (c) 2026 Damien P. George
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "py/runtime.h"
#if MICROPY_PY_TSTRINGS
typedef struct _mp_obj_template_t {
mp_obj_base_t base;
mp_obj_t strings;
mp_obj_t interpolations;
} mp_obj_template_t;
typedef struct _mp_obj_interpolation_t {
mp_obj_base_t base;
mp_obj_t value;
mp_obj_t expression;
mp_obj_t conversion;
mp_obj_t format_spec;
} mp_obj_interpolation_t;
static mp_obj_t mp_obj_new_interpolation(mp_obj_t value, mp_obj_t expr, mp_obj_t conv, mp_obj_t spec);
static mp_obj_t mp_obj_template_make_new(const mp_obj_type_t *type, size_t n_args, size_t n_kw, const mp_obj_t *args) {
mp_arg_check_num(n_args, n_kw, 0, MP_OBJ_FUN_ARGS_MAX, false);
mp_obj_t strings_obj;
mp_obj_t interpolations_obj;
if (n_args == 0) {
mp_obj_t empty = MP_OBJ_NEW_QSTR(MP_QSTR_);
strings_obj = mp_obj_new_tuple(1, &empty);
interpolations_obj = mp_obj_new_tuple(0, NULL);
} else {
size_t n_interpolations = 0;
size_t n_str_args = 0;
for (size_t i = 0; i < n_args; i++) {
if (mp_obj_is_exact_type(args[i], &mp_type_interpolation)) {
n_interpolations++;
} else if (mp_obj_is_str(args[i])) {
n_str_args++;
} else {
mp_raise_TypeError(MP_ERROR_TEXT("expected str or Interpolation"));
}
}
if (n_interpolations == 0) {
if (n_str_args == 1) {
strings_obj = mp_obj_new_tuple(1, &args[0]);
} else {
size_t total_len = 0;
for (size_t i = 0; i < n_args; i++) {
size_t str_len;
(void)mp_obj_str_get_data(args[i], &str_len);
total_len += str_len;
}
vstr_t vstr;
vstr_init(&vstr, total_len);
for (size_t i = 0; i < n_args; i++) {
size_t str_len;
const char *str_data = mp_obj_str_get_data(args[i], &str_len);
vstr_add_strn(&vstr, str_data, str_len);
}
mp_obj_t str_items[1];
str_items[0] = mp_obj_new_str_from_vstr(&vstr);
strings_obj = mp_obj_new_tuple(1, str_items);
}
interpolations_obj = mp_obj_new_tuple(0, NULL);
} else {
size_t n_strings = n_interpolations + 1;
mp_obj_tuple_t *strings_tuple = mp_obj_malloc_var(mp_obj_tuple_t, items, mp_obj_t, n_strings, &mp_type_tuple);
mp_obj_tuple_t *interpolations_tuple = mp_obj_malloc_var(mp_obj_tuple_t, items, mp_obj_t, n_interpolations, &mp_type_tuple);
strings_tuple->len = n_strings;
interpolations_tuple->len = n_interpolations;
size_t string_idx = 0;
size_t interp_idx = 0;
mp_obj_t current_str = MP_OBJ_NULL;
bool current_vstr_active = false;
vstr_t current_vstr = {0};
for (size_t i = 0; i <= n_args; i++) {
if (i == n_args || mp_obj_is_exact_type(args[i], &mp_type_interpolation)) {
mp_obj_t out_str;
if (current_vstr_active) {
out_str = mp_obj_new_str_from_vstr(&current_vstr);
current_vstr_active = false;
} else if (current_str != MP_OBJ_NULL) {
out_str = current_str;
} else {
out_str = MP_OBJ_NEW_QSTR(MP_QSTR_);
}
strings_tuple->items[string_idx++] = out_str;
current_str = MP_OBJ_NULL;
if (i < n_args) {
interpolations_tuple->items[interp_idx++] = args[i];
}
} else {
size_t str_len;
const char *str_data = mp_obj_str_get_data(args[i], &str_len);
if (current_vstr_active) {
vstr_add_strn(&current_vstr, str_data, str_len);
} else if (current_str == MP_OBJ_NULL) {
current_str = args[i];
} else {
size_t prev_len;
const char *prev_data = mp_obj_str_get_data(current_str, &prev_len);
vstr_init(&current_vstr, prev_len + str_len);
vstr_add_strn(&current_vstr, prev_data, prev_len);
vstr_add_strn(&current_vstr, str_data, str_len);
current_vstr_active = true;
current_str = MP_OBJ_NULL;
}
}
}
strings_obj = MP_OBJ_FROM_PTR(strings_tuple);
interpolations_obj = MP_OBJ_FROM_PTR(interpolations_tuple);
}
}
mp_obj_template_t *self = mp_obj_malloc(mp_obj_template_t, type);
self->strings = strings_obj;
self->interpolations = interpolations_obj;
return MP_OBJ_FROM_PTR(self);
}
static void mp_obj_template_print(const mp_print_t *print, mp_obj_t self_in, mp_print_kind_t kind) {
(void)kind;
mp_obj_template_t *self = MP_OBJ_TO_PTR(self_in);
mp_printf(print, "%q(%q=", MP_QSTR_Template, MP_QSTR_strings);
mp_obj_print_helper(print, self->strings, PRINT_REPR);
mp_printf(print, ", %q=", MP_QSTR_interpolations);
mp_obj_print_helper(print, self->interpolations, PRINT_REPR);
mp_print_str(print, ")");
}
static mp_obj_t mp_obj_template_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
mp_obj_template_t *lhs = MP_OBJ_TO_PTR(lhs_in);
switch (op) {
case MP_BINARY_OP_ADD: {
if (!mp_obj_is_exact_type(rhs_in, &mp_type_template)) {
return MP_OBJ_NULL; // op not supported
}
mp_obj_template_t *rhs = MP_OBJ_TO_PTR(rhs_in);
mp_obj_tuple_t *lhs_strings = MP_OBJ_TO_PTR(lhs->strings);
mp_obj_tuple_t *lhs_interps = MP_OBJ_TO_PTR(lhs->interpolations);
mp_obj_tuple_t *rhs_strings = MP_OBJ_TO_PTR(rhs->strings);
mp_obj_tuple_t *rhs_interps = MP_OBJ_TO_PTR(rhs->interpolations);
size_t new_strings_len = lhs_strings->len + rhs_strings->len - 1;
size_t new_interps_len = lhs_interps->len + rhs_interps->len;
// Create tuples directly to avoid GC issues.
mp_obj_tuple_t *new_strings_tuple = mp_obj_malloc_var(mp_obj_tuple_t, items, mp_obj_t, new_strings_len, &mp_type_tuple);
mp_obj_tuple_t *new_interps_tuple = mp_obj_malloc_var(mp_obj_tuple_t, items, mp_obj_t, new_interps_len, &mp_type_tuple);
new_strings_tuple->len = new_strings_len;
new_interps_tuple->len = new_interps_len;
// Copy all but the last string from lhs.
for (size_t i = 0; i < lhs_strings->len - 1; i++) {
new_strings_tuple->items[i] = lhs_strings->items[i];
}
// Merge last string from lhs with first string from rhs.
size_t lhs_last_len, rhs_first_len;
const char *lhs_last_str = mp_obj_str_get_data(lhs_strings->items[lhs_strings->len - 1], &lhs_last_len);
const char *rhs_first_str = mp_obj_str_get_data(rhs_strings->items[0], &rhs_first_len);
vstr_t vstr;
vstr_init(&vstr, lhs_last_len + rhs_first_len);
vstr_add_strn(&vstr, lhs_last_str, lhs_last_len);
vstr_add_strn(&vstr, rhs_first_str, rhs_first_len);
new_strings_tuple->items[lhs_strings->len - 1] = mp_obj_new_str_from_vstr(&vstr);
// Copy remaining strings from rhs.
for (size_t i = 1; i < rhs_strings->len; i++) {
new_strings_tuple->items[lhs_strings->len - 1 + i] = rhs_strings->items[i];
}
// Copy interpolations from both sides.
for (size_t i = 0; i < lhs_interps->len; i++) {
new_interps_tuple->items[i] = lhs_interps->items[i];
}
for (size_t i = 0; i < rhs_interps->len; i++) {
new_interps_tuple->items[lhs_interps->len + i] = rhs_interps->items[i];
}
mp_obj_template_t *result = mp_obj_malloc(mp_obj_template_t, &mp_type_template);
result->strings = MP_OBJ_FROM_PTR(new_strings_tuple);
result->interpolations = MP_OBJ_FROM_PTR(new_interps_tuple);
return MP_OBJ_FROM_PTR(result);
}
default:
return MP_OBJ_NULL; // op not supported
}
}
static void mp_obj_template_attr(mp_obj_t self_in, qstr attr, mp_obj_t *dest) {
mp_obj_template_t *self = MP_OBJ_TO_PTR(self_in);
if (dest[0] == MP_OBJ_NULL) {
// Load attribute.
if (attr == MP_QSTR_strings) {
dest[0] = self->strings;
} else if (attr == MP_QSTR_interpolations) {
dest[0] = self->interpolations;
} else if (attr == MP_QSTR_values) {
mp_obj_tuple_t *interps = MP_OBJ_TO_PTR(self->interpolations);
mp_obj_tuple_t *values_tuple = MP_OBJ_TO_PTR(mp_obj_new_tuple(interps->len, NULL));
for (size_t i = 0; i < interps->len; i++) {
mp_obj_interpolation_t *interp = MP_OBJ_TO_PTR(interps->items[i]);
values_tuple->items[i] = interp->value;
}
dest[0] = MP_OBJ_FROM_PTR(values_tuple);
}
}
}
typedef struct _mp_obj_template_iter_t {
mp_obj_base_t base;
mp_fun_1_t iternext;
mp_obj_t template;
size_t index;
} mp_obj_template_iter_t;
static mp_obj_t template_iter_iternext(mp_obj_t self_in) {
mp_obj_template_iter_t *self = MP_OBJ_TO_PTR(self_in);
mp_obj_template_t *tmpl = MP_OBJ_TO_PTR(self->template);
mp_obj_tuple_t *strings = MP_OBJ_TO_PTR(tmpl->strings);
mp_obj_tuple_t *interps = MP_OBJ_TO_PTR(tmpl->interpolations);
while (self->index < strings->len + interps->len) {
if ((self->index & 1) == 0) {
// A string.
mp_obj_t str_obj = strings->items[self->index++ / 2];
size_t str_len;
mp_obj_str_get_data(str_obj, &str_len);
if (str_len > 0) {
return str_obj;
}
} else {
// An interpolation.
return interps->items[self->index++ / 2];
}
}
return MP_OBJ_STOP_ITERATION;
}
static mp_obj_t mp_obj_template_iter(mp_obj_t self_in, mp_obj_iter_buf_t *iter_buf) {
assert(sizeof(mp_obj_template_iter_t) <= sizeof(mp_obj_iter_buf_t));
mp_obj_template_iter_t *iter = (mp_obj_template_iter_t *)iter_buf;
iter->base.type = &mp_type_polymorph_iter;
iter->iternext = template_iter_iternext;
iter->template = self_in;
iter->index = 0;
return MP_OBJ_FROM_PTR(iter);
}
MP_DEFINE_CONST_OBJ_TYPE(
mp_type_template,
MP_QSTR_Template,
MP_TYPE_FLAG_NONE,
make_new, mp_obj_template_make_new,
print, mp_obj_template_print,
binary_op, mp_obj_template_binary_op,
attr, mp_obj_template_attr,
iter, mp_obj_template_iter
);
mp_obj_t mp_obj_new_template(size_t n_args, const mp_obj_t *args) {
mp_obj_template_t *o = mp_obj_malloc(mp_obj_template_t, &mp_type_template);
o->strings = args[0];
if (n_args == 2) {
// Unpack interpolations from second argument (which is a tuple).
mp_obj_t *iargs;
mp_obj_get_array(args[1], &n_args, &iargs);
args = iargs;
} else {
// Unpack interpolations directly from arguments.
--n_args;
++args;
}
size_t n_interpolations = n_args / 4;
mp_obj_tuple_t *interpolations = MP_OBJ_TO_PTR(mp_obj_new_tuple(n_interpolations, NULL));
for (size_t i = 0; i < n_interpolations; ++i) {
interpolations->items[i] = mp_obj_new_interpolation(args[i * 4], args[i * 4 + 1], args[i * 4 + 2], args[i * 4 + 3]);
}
o->interpolations = MP_OBJ_FROM_PTR(interpolations);
return MP_OBJ_FROM_PTR(o);
}
/////////////////////////////////////////////////////////////////
static mp_obj_t mp_obj_interpolation_make_new(const mp_obj_type_t *type, size_t n_args, size_t n_kw, const mp_obj_t *all_args) {
enum { ARG_value, ARG_expression, ARG_conversion, ARG_format_spec };
static const mp_arg_t allowed_args[] = {
{ MP_QSTR_value, MP_ARG_REQUIRED | MP_ARG_OBJ, {.u_obj = MP_OBJ_NULL} },
{ MP_QSTR_expression, MP_ARG_OBJ, {.u_rom_obj = MP_ROM_QSTR(MP_QSTR_)} },
{ MP_QSTR_conversion, MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE} },
{ MP_QSTR_format_spec, MP_ARG_OBJ, {.u_rom_obj = MP_ROM_QSTR(MP_QSTR_)} },
};
mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
mp_arg_parse_all_kw_array(n_args, n_kw, all_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
mp_obj_interpolation_t *self = mp_obj_malloc(mp_obj_interpolation_t, &mp_type_interpolation);
self->value = args[ARG_value].u_obj;
self->expression = args[ARG_expression].u_obj;
self->conversion = args[ARG_conversion].u_obj;
self->format_spec = args[ARG_format_spec].u_obj;
return MP_OBJ_FROM_PTR(self);
}
static void mp_obj_interpolation_print(const mp_print_t *print, mp_obj_t self_in, mp_print_kind_t kind) {
(void)kind;
mp_obj_interpolation_t *self = MP_OBJ_TO_PTR(self_in);
mp_printf(print, "%q(", MP_QSTR_Interpolation);
mp_obj_print_helper(print, self->value, PRINT_REPR);
mp_print_str(print, ", ");
mp_obj_print_helper(print, self->expression, PRINT_REPR);
mp_print_str(print, ", ");
mp_obj_print_helper(print, self->conversion, PRINT_REPR);
mp_print_str(print, ", ");
mp_obj_print_helper(print, self->format_spec, PRINT_REPR);
mp_print_str(print, ")");
}
static void mp_obj_interpolation_attr(mp_obj_t self_in, qstr attr, mp_obj_t *dest) {
mp_obj_interpolation_t *self = MP_OBJ_TO_PTR(self_in);
if (dest[0] == MP_OBJ_NULL) {
// load attribute
if (attr == MP_QSTR_value) {
dest[0] = self->value;
} else if (attr == MP_QSTR_expression) {
dest[0] = self->expression;
} else if (attr == MP_QSTR_conversion) {
dest[0] = self->conversion;
} else if (attr == MP_QSTR_format_spec) {
dest[0] = self->format_spec;
}
}
}
static mp_obj_t mp_obj_new_interpolation(mp_obj_t value, mp_obj_t expression, mp_obj_t conversion, mp_obj_t format_spec) {
mp_obj_interpolation_t *o = mp_obj_malloc(mp_obj_interpolation_t, &mp_type_interpolation);
o->value = value;
o->expression = expression;
o->conversion = conversion;
o->format_spec = format_spec;
return MP_OBJ_FROM_PTR(o);
}
MP_DEFINE_CONST_OBJ_TYPE(
mp_type_interpolation,
MP_QSTR_Interpolation,
MP_TYPE_FLAG_NONE,
make_new, mp_obj_interpolation_make_new,
print, mp_obj_interpolation_print,
attr, mp_obj_interpolation_attr
);
#endif // MICROPY_PY_TSTRINGS

View File

@@ -49,6 +49,7 @@ set(MICROPY_SOURCE_PY
${MICROPY_PY_DIR}/modio.c
${MICROPY_PY_DIR}/modmath.c
${MICROPY_PY_DIR}/modmicropython.c
${MICROPY_PY_DIR}/modstring.c
${MICROPY_PY_DIR}/modstruct.c
${MICROPY_PY_DIR}/modsys.c
${MICROPY_PY_DIR}/modthread.c
@@ -106,6 +107,7 @@ set(MICROPY_SOURCE_PY
${MICROPY_PY_DIR}/objstr.c
${MICROPY_PY_DIR}/objstringio.c
${MICROPY_PY_DIR}/objstrunicode.c
${MICROPY_PY_DIR}/objtemplate.c
${MICROPY_PY_DIR}/objtuple.c
${MICROPY_PY_DIR}/objtype.c
${MICROPY_PY_DIR}/objzip.c

View File

@@ -180,6 +180,7 @@ PY_CORE_O_BASENAME = $(addprefix py/,\
objstr.o \
objstrunicode.o \
objstringio.o \
objtemplate.o \
objtuple.o \
objtype.o \
objzip.o \
@@ -198,6 +199,7 @@ PY_CORE_O_BASENAME = $(addprefix py/,\
modmath.o \
modcmath.o \
modmicropython.o \
modstring.o \
modstruct.o \
modsys.o \
moderrno.o \

View File

@@ -76,3 +76,7 @@ Q(/rom/lib)
#if MICROPY_ENABLE_PYSTACK
Q(pystack exhausted)
#endif
#if MICROPY_PY_TSTRINGS
Q(string.templatelib)
#endif

View File

@@ -73,9 +73,9 @@ example_package ffi framebuf
gc hashlib heapq io
json machine marshal math
os platform random re
select socket struct sys
termios time tls uctypes
vfs websocket
select socket string struct
sys termios time tls
uctypes vfs websocket
me
micropython machine marshal math