From b47ea4eadd53374b74bf8fcbfa6580c89e2650f9 Mon Sep 17 00:00:00 2001 From: Damien George Date: Sat, 20 Dec 2014 18:37:50 +0000 Subject: [PATCH] py: Add blank and ident flags to grammar rules to simplify parser. This saves around 100 bytes code space on stmhal, more on unix. --- py/grammar.h | 38 +++++++++++++++++++------------------- py/parse.c | 23 ++++++++++++++--------- 2 files changed, 33 insertions(+), 28 deletions(-) diff --git a/py/grammar.h b/py/grammar.h index 962f654570..15c4e98109 100644 --- a/py/grammar.h +++ b/py/grammar.h @@ -59,16 +59,16 @@ DEF_RULE(decorator, nc, and(4), tok(DEL_AT), rule(dotted_name), opt_rule(trailer DEF_RULE(decorators, nc, one_or_more, rule(decorator)) DEF_RULE(decorated, c(decorated), and(2), rule(decorators), rule(decorated_body)) DEF_RULE(decorated_body, nc, or(2), rule(classdef), rule(funcdef)) -DEF_RULE(funcdef, c(funcdef), and(8), tok(KW_DEF), tok(NAME), tok(DEL_PAREN_OPEN), opt_rule(typedargslist), tok(DEL_PAREN_CLOSE), opt_rule(funcdefrettype), tok(DEL_COLON), rule(suite)) -DEF_RULE(funcdefrettype, nc, and(2), tok(DEL_MINUS_MORE), rule(test)) +DEF_RULE(funcdef, c(funcdef), blank | and(8), tok(KW_DEF), tok(NAME), tok(DEL_PAREN_OPEN), opt_rule(typedargslist), tok(DEL_PAREN_CLOSE), opt_rule(funcdefrettype), tok(DEL_COLON), rule(suite)) +DEF_RULE(funcdefrettype, nc, ident | and(2), tok(DEL_MINUS_MORE), rule(test)) // TODO typedargslist lets through more than is allowed DEF_RULE(typedargslist, nc, list_with_end, rule(typedargslist_item), tok(DEL_COMMA)) DEF_RULE(typedargslist_item, nc, or(3), rule(typedargslist_name), rule(typedargslist_star), rule(typedargslist_dbl_star)) -DEF_RULE(typedargslist_name, nc, and(3), tok(NAME), opt_rule(typedargslist_colon), opt_rule(typedargslist_equal)) +DEF_RULE(typedargslist_name, nc, ident | and(3), tok(NAME), opt_rule(typedargslist_colon), opt_rule(typedargslist_equal)) DEF_RULE(typedargslist_star, nc, and(2), tok(OP_STAR), opt_rule(tfpdef)) DEF_RULE(typedargslist_dbl_star, nc, and(3), tok(OP_DBL_STAR), tok(NAME), opt_rule(typedargslist_colon)) -DEF_RULE(typedargslist_colon, nc, and(2), tok(DEL_COLON), rule(test)) -DEF_RULE(typedargslist_equal, nc, and(2), tok(DEL_EQUAL), rule(test)) +DEF_RULE(typedargslist_colon, nc, ident | and(2), tok(DEL_COLON), rule(test)) +DEF_RULE(typedargslist_equal, nc, ident | and(2), tok(DEL_EQUAL), rule(test)) DEF_RULE(tfpdef, nc, and(2), tok(NAME), opt_rule(typedargslist_colon)) // TODO varargslist lets through more than is allowed DEF_RULE(varargslist, nc, list_with_end, rule(varargslist_item), tok(DEL_COMMA)) @@ -77,7 +77,7 @@ DEF_RULE(varargslist_name, nc, and(2), tok(NAME), opt_rule(varargslist_equal)) DEF_RULE(varargslist_star, nc, and(2), tok(OP_STAR), opt_rule(vfpdef)) DEF_RULE(varargslist_dbl_star, nc, and(2), tok(OP_DBL_STAR), tok(NAME)) DEF_RULE(varargslist_equal, nc, and(2), tok(DEL_EQUAL), rule(test)) -DEF_RULE(vfpdef, nc, and(1), tok(NAME)) +DEF_RULE(vfpdef, nc, ident | and(1), tok(NAME)) // stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | simple_stmt @@ -123,7 +123,7 @@ DEF_RULE(return_stmt, c(return_stmt), and(2), tok(KW_RETURN), opt_rule(testlist) DEF_RULE(yield_stmt, c(yield_stmt), and(1), rule(yield_expr)) DEF_RULE(raise_stmt, c(raise_stmt), and(2), tok(KW_RAISE), opt_rule(raise_stmt_arg)) DEF_RULE(raise_stmt_arg, nc, and(2), rule(test), opt_rule(raise_stmt_from)) -DEF_RULE(raise_stmt_from, nc, and(2), tok(KW_FROM), rule(test)) +DEF_RULE(raise_stmt_from, nc, ident | and(2), tok(KW_FROM), rule(test)) // import_stmt: import_name | import_from // import_name: 'import' dotted_as_names @@ -143,12 +143,12 @@ DEF_RULE(import_from, c(import_from), and(4), tok(KW_FROM), rule(import_from_2), DEF_RULE(import_from_2, nc, or(2), rule(dotted_name), rule(import_from_2b)) DEF_RULE(import_from_2b, nc, and(2), rule(one_or_more_period_or_ellipsis), opt_rule(dotted_name)) DEF_RULE(import_from_3, nc, or(3), tok(OP_STAR), rule(import_as_names_paren), rule(import_as_names)) -DEF_RULE(import_as_names_paren, nc, and(3), tok(DEL_PAREN_OPEN), rule(import_as_names), tok(DEL_PAREN_CLOSE)) +DEF_RULE(import_as_names_paren, nc, ident | and(3), tok(DEL_PAREN_OPEN), rule(import_as_names), tok(DEL_PAREN_CLOSE)) DEF_RULE(one_or_more_period_or_ellipsis, nc, one_or_more, rule(period_or_ellipsis)) DEF_RULE(period_or_ellipsis, nc, or(2), tok(DEL_PERIOD), tok(ELLIPSIS)) DEF_RULE(import_as_name, nc, and(2), tok(NAME), opt_rule(as_name)) DEF_RULE(dotted_as_name, nc, and(2), rule(dotted_name), opt_rule(as_name)) -DEF_RULE(as_name, nc, and(2), tok(KW_AS), tok(NAME)) +DEF_RULE(as_name, nc, ident | and(2), tok(KW_AS), tok(NAME)) DEF_RULE(import_as_names, nc, list_with_end, rule(import_as_name), tok(DEL_COMMA)) DEF_RULE(dotted_as_names, nc, list, rule(dotted_as_name), tok(DEL_COMMA)) DEF_RULE(dotted_name, nc, list, tok(NAME), tok(DEL_PERIOD)) @@ -156,7 +156,7 @@ DEF_RULE(global_stmt, c(global_stmt), and(2), tok(KW_GLOBAL), rule(name_list)) DEF_RULE(nonlocal_stmt, c(nonlocal_stmt), and(2), tok(KW_NONLOCAL), rule(name_list)) DEF_RULE(name_list, nc, list, tok(NAME), tok(DEL_COMMA)) DEF_RULE(assert_stmt, c(assert_stmt), and(3), tok(KW_ASSERT), rule(test), opt_rule(assert_stmt_extra)) -DEF_RULE(assert_stmt_extra, nc, and(2), tok(DEL_COMMA), rule(test)) +DEF_RULE(assert_stmt_extra, nc, ident | and(2), tok(DEL_COMMA), rule(test)) // compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated // if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] @@ -182,11 +182,11 @@ DEF_RULE(try_stmt_except, nc, and(4), tok(KW_EXCEPT), opt_rule(try_stmt_as_name) DEF_RULE(try_stmt_as_name, nc, and(2), rule(test), opt_rule(as_name)) DEF_RULE(try_stmt_except_list, nc, one_or_more, rule(try_stmt_except)) DEF_RULE(try_stmt_finally, nc, and(3), tok(KW_FINALLY), tok(DEL_COLON), rule(suite)) -DEF_RULE(else_stmt, nc, and(3), tok(KW_ELSE), tok(DEL_COLON), rule(suite)) +DEF_RULE(else_stmt, nc, ident | and(3), tok(KW_ELSE), tok(DEL_COLON), rule(suite)) DEF_RULE(with_stmt, c(with_stmt), and(4), tok(KW_WITH), rule(with_stmt_list), tok(DEL_COLON), rule(suite)) DEF_RULE(with_stmt_list, nc, list, rule(with_item), tok(DEL_COMMA)) DEF_RULE(with_item, nc, and(2), rule(test), opt_rule(with_item_as)) -DEF_RULE(with_item_as, nc, and(2), tok(KW_AS), rule(expr)) +DEF_RULE(with_item_as, nc, ident | and(2), tok(KW_AS), rule(expr)) DEF_RULE(suite, nc, or(2), rule(suite_block), rule(simple_stmt)) DEF_RULE(suite_block, nc, and(4), tok(NEWLINE), tok(INDENT), rule(suite_block_stmts), tok(DEDENT)) DEF_RULE(suite_block_stmts, c(generic_all_nodes), one_or_more, rule(stmt)) @@ -200,8 +200,8 @@ DEF_RULE(test, nc, or(2), rule(lambdef), rule(test_if_expr)) DEF_RULE(test_if_expr, c(test_if_expr), and(2), rule(or_test), opt_rule(test_if_else)) DEF_RULE(test_if_else, nc, and(4), tok(KW_IF), rule(or_test), tok(KW_ELSE), rule(test)) DEF_RULE(test_nocond, nc, or(2), rule(lambdef_nocond), rule(or_test)) -DEF_RULE(lambdef, c(lambdef), and(4), tok(KW_LAMBDA), opt_rule(varargslist), tok(DEL_COLON), rule(test)) -DEF_RULE(lambdef_nocond, c(lambdef), and(4), tok(KW_LAMBDA), opt_rule(varargslist), tok(DEL_COLON), rule(test_nocond)) +DEF_RULE(lambdef, c(lambdef), blank | and(4), tok(KW_LAMBDA), opt_rule(varargslist), tok(DEL_COLON), rule(test)) +DEF_RULE(lambdef_nocond, c(lambdef), blank | and(4), tok(KW_LAMBDA), opt_rule(varargslist), tok(DEL_COLON), rule(test_nocond)) // or_test: and_test ('or' and_test)* // and_test: not_test ('and' not_test)* @@ -258,7 +258,7 @@ DEF_RULE(atom_brace, c(atom_brace), and(3), tok(DEL_BRACE_OPEN), opt_rule(dictor DEF_RULE(testlist_comp, nc, and(2), rule(testlist_comp_2), opt_rule(testlist_comp_3)) DEF_RULE(testlist_comp_2, nc, or(2), rule(star_expr), rule(test)) DEF_RULE(testlist_comp_3, nc, or(2), rule(comp_for), rule(testlist_comp_3b)) -DEF_RULE(testlist_comp_3b, nc, and(2), tok(DEL_COMMA), opt_rule(testlist_comp_3c)) +DEF_RULE(testlist_comp_3b, nc, ident | and(2), tok(DEL_COMMA), opt_rule(testlist_comp_3c)) DEF_RULE(testlist_comp_3c, nc, list_with_end, rule(testlist_comp_2), tok(DEL_COMMA)) DEF_RULE(trailer, nc, or(3), rule(trailer_paren), rule(trailer_bracket), rule(trailer_period)) DEF_RULE(trailer_paren, c(trailer_paren), and(3), tok(DEL_PAREN_OPEN), opt_rule(arglist), tok(DEL_PAREN_CLOSE)) @@ -288,15 +288,15 @@ DEF_RULE(testlist, c(generic_tuple), list_with_end, rule(test), tok(DEL_COMMA)) // TODO dictorsetmaker lets through more than is allowed DEF_RULE(dictorsetmaker, nc, and(2), rule(dictorsetmaker_item), opt_rule(dictorsetmaker_tail)) DEF_RULE(dictorsetmaker_item, c(dictorsetmaker_item), and(2), rule(test), opt_rule(dictorsetmaker_colon)) -DEF_RULE(dictorsetmaker_colon, nc, and(2), tok(DEL_COLON), rule(test)) +DEF_RULE(dictorsetmaker_colon, nc, ident | and(2), tok(DEL_COLON), rule(test)) DEF_RULE(dictorsetmaker_tail, nc, or(2), rule(comp_for), rule(dictorsetmaker_list)) DEF_RULE(dictorsetmaker_list, nc, and(2), tok(DEL_COMMA), opt_rule(dictorsetmaker_list2)) DEF_RULE(dictorsetmaker_list2, nc, list_with_end, rule(dictorsetmaker_item), tok(DEL_COMMA)) // classdef: 'class' NAME ['(' [arglist] ')'] ':' suite -DEF_RULE(classdef, c(classdef), and(5), tok(KW_CLASS), tok(NAME), opt_rule(classdef_2), tok(DEL_COLON), rule(suite)) -DEF_RULE(classdef_2, nc, and(3), tok(DEL_PAREN_OPEN), opt_rule(arglist), tok(DEL_PAREN_CLOSE)) +DEF_RULE(classdef, c(classdef), blank | and(5), tok(KW_CLASS), tok(NAME), opt_rule(classdef_2), tok(DEL_COLON), rule(suite)) +DEF_RULE(classdef_2, nc, ident | and(3), tok(DEL_PAREN_OPEN), opt_rule(arglist), tok(DEL_PAREN_CLOSE)) // arglist: (argument ',')* (argument [','] | '*' test (',' argument)* [',' '**' test] | '**' test) @@ -317,7 +317,7 @@ DEF_RULE(argument, nc, and(2), rule(test), opt_rule(argument_2)) DEF_RULE(argument_2, nc, or(2), rule(comp_for), rule(argument_3)) DEF_RULE(argument_3, nc, and(2), tok(DEL_EQUAL), rule(test)) DEF_RULE(comp_iter, nc, or(2), rule(comp_for), rule(comp_if)) -DEF_RULE(comp_for, nc, and(5), tok(KW_FOR), rule(exprlist), tok(KW_IN), rule(or_test), opt_rule(comp_iter)) +DEF_RULE(comp_for, nc, blank | and(5), tok(KW_FOR), rule(exprlist), tok(KW_IN), rule(or_test), opt_rule(comp_iter)) DEF_RULE(comp_if, nc, and(3), tok(KW_IF), rule(test_nocond), opt_rule(comp_iter)) // # not used in grammar, but may appear in "node" passed from Parser to Compiler diff --git a/py/parse.c b/py/parse.c index 2179f2878c..07ceceeeb7 100644 --- a/py/parse.c +++ b/py/parse.c @@ -38,13 +38,14 @@ #include "parse.h" #include "smallint.h" -#define RULE_ACT_KIND_MASK (0xf0) #define RULE_ACT_ARG_MASK (0x0f) +#define RULE_ACT_KIND_MASK (0x30) +#define RULE_ACT_ALLOW_IDENT (0x40) +#define RULE_ACT_ADD_BLANK (0x80) #define RULE_ACT_OR (0x10) #define RULE_ACT_AND (0x20) #define RULE_ACT_LIST (0x30) -#define RULE_ARG_BLANK (0x0000) #define RULE_ARG_KIND_MASK (0xf000) #define RULE_ARG_ARG_MASK (0x0fff) #define RULE_ARG_TOK (0x1000) @@ -52,7 +53,7 @@ #define RULE_ARG_OPT_TOK (0x3000) #define RULE_ARG_OPT_RULE (0x4000) -#define ADD_BLANK_NODE(rule_id) ((rule_id) == RULE_funcdef || (rule_id) == RULE_classdef || (rule_id) == RULE_comp_for || (rule_id) == RULE_lambdef || (rule_id) == RULE_lambdef_nocond) +#define ADD_BLANK_NODE(rule) ((rule->act & RULE_ACT_ADD_BLANK) != 0) // (un)comment to use rule names; for debugging //#define USE_RULE_NAME (1) @@ -75,6 +76,8 @@ enum { RULE_string, // special node for non-interned string }; +#define ident (RULE_ACT_ALLOW_IDENT) +#define blank (RULE_ACT_ADD_BLANK) #define or(n) (RULE_ACT_OR | n) #define and(n) (RULE_ACT_AND | n) #define one_or_more (RULE_ACT_LIST | 2) @@ -181,7 +184,7 @@ void mp_parse_node_free(mp_parse_node_t pn) { if (rule_id == RULE_string) { m_del(char, (char*)pns->nodes[0], (mp_uint_t)pns->nodes[1]); } else { - bool adjust = ADD_BLANK_NODE(rule_id); + bool adjust = ADD_BLANK_NODE(rules[rule_id]); if (adjust) { n--; } @@ -573,15 +576,17 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, mp_p emit_rule = true; } - // never emit these rules if they have only 1 argument - // NOTE: can't put atom_paren here because we need it to distinguisg, for example, [a,b] from [(a,b)] - // TODO possibly put varargslist_name, varargslist_equal here as well - if (rule->rule_id == RULE_else_stmt || rule->rule_id == RULE_testlist_comp_3b || rule->rule_id == RULE_import_as_names_paren || rule->rule_id == RULE_typedargslist_name || rule->rule_id == RULE_typedargslist_colon || rule->rule_id == RULE_typedargslist_equal || rule->rule_id == RULE_dictorsetmaker_colon || rule->rule_id == RULE_classdef_2 || rule->rule_id == RULE_with_item_as || rule->rule_id == RULE_assert_stmt_extra || rule->rule_id == RULE_as_name || rule->rule_id == RULE_raise_stmt_from || rule->rule_id == RULE_vfpdef || rule->rule_id == RULE_funcdefrettype) { + // if a rule has the RULE_ACT_ALLOW_IDENT bit set then this + // rule should not be emitted if it has only 1 argument + // NOTE: can't set this flag for atom_paren because we need it + // to distinguish, for example, [a,b] from [(a,b)] + // TODO possibly set for: varargslist_name, varargslist_equal + if (rule->act & RULE_ACT_ALLOW_IDENT) { emit_rule = false; } // always emit these rules, and add an extra blank node at the end (to be used by the compiler to store data) - if (ADD_BLANK_NODE(rule->rule_id)) { + if (ADD_BLANK_NODE(rule)) { emit_rule = true; push_result_node(&parser, MP_PARSE_NODE_NULL); i += 1;