[Python-checkins] Refactor parser compilation units into specific components (GH-29676)

pablogsal webhook-mailer at python.org
Sat Nov 20 20:09:03 EST 2021


https://github.com/python/cpython/commit/c9c4444d9f11ae80c2c4cc7d40b6718419d81a97
commit: c9c4444d9f11ae80c2c4cc7d40b6718419d81a97
branch: main
author: Pablo Galindo Salgado <Pablogsal at gmail.com>
committer: pablogsal <Pablogsal at gmail.com>
date: 2021-11-21T01:08:50Z
summary:

Refactor parser compilation units into specific components (GH-29676)

files:
A Parser/action_helpers.c
A Parser/pegen_errors.c
M Makefile.pre.in
M PCbuild/_freeze_module.vcxproj
M PCbuild/pythoncore.vcxproj
M PCbuild/pythoncore.vcxproj.filters
M Parser/pegen.c
M Parser/pegen.h
M Tools/peg_generator/Makefile
M Tools/peg_generator/pegen/build.py

diff --git a/Makefile.pre.in b/Makefile.pre.in
index 11ffdaabc617b..fc4def8f5d12f 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -331,6 +331,8 @@ LIBFFI_INCLUDEDIR=	@LIBFFI_INCLUDEDIR@
 
 PEGEN_OBJS=		\
 		Parser/pegen.o \
+		Parser/pegen_errors.o \
+		Parser/action_helpers.o \
 		Parser/parser.o \
 		Parser/string_parser.o \
 		Parser/peg_api.o
diff --git a/PCbuild/_freeze_module.vcxproj b/PCbuild/_freeze_module.vcxproj
index d33e07c54b8c9..6a91776b9d8e9 100644
--- a/PCbuild/_freeze_module.vcxproj
+++ b/PCbuild/_freeze_module.vcxproj
@@ -163,6 +163,8 @@
     <ClCompile Include="..\Parser\parser.c" />
     <ClCompile Include="..\Parser\peg_api.c" />
     <ClCompile Include="..\Parser\pegen.c" />
+    <ClCompile Include="..\Parser\pegen_errors.c" />
+    <ClCompile Include="..\Parser\action_helpers.c" />
     <ClCompile Include="..\Parser\string_parser.c" />
     <ClCompile Include="..\Parser\token.c" />
     <ClCompile Include="..\Parser\tokenizer.c" />
diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj
index b65998186927b..70f05563fa391 100644
--- a/PCbuild/pythoncore.vcxproj
+++ b/PCbuild/pythoncore.vcxproj
@@ -433,6 +433,8 @@
     <ClCompile Include="..\Parser\tokenizer.c" />
     <ClCompile Include="..\Parser\token.c" />
     <ClCompile Include="..\Parser\pegen.c" />
+    <ClCompile Include="..\Parser\pegen_errors.c" />
+    <ClCompile Include="..\Parser\action_helpers.c" />
     <ClCompile Include="..\Parser\parser.c" />
     <ClCompile Include="..\Parser\string_parser.c" />
     <ClCompile Include="..\Parser\peg_api.c" />
diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters
index 62aab5bccf9ef..b19f0279ec311 100644
--- a/PCbuild/pythoncore.vcxproj.filters
+++ b/PCbuild/pythoncore.vcxproj.filters
@@ -1205,6 +1205,12 @@
     <ClCompile Include="..\Parser\pegen.c">
       <Filter>Parser</Filter>
     </ClCompile>
+    <ClCompile Include="..\Parser\pegen_errors.c">
+      <Filter>Parser</Filter>
+    </ClCompile>
+    <ClCompile Include="..\Parser\action_helpers.c">
+      <Filter>Parser</Filter>
+    </ClCompile>
     <ClCompile Include="..\Parser\peg_api.c">
       <Filter>Parser</Filter>
     </ClCompile>
diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c
new file mode 100644
index 0000000000000..e5d7b667f7f5e
--- /dev/null
+++ b/Parser/action_helpers.c
@@ -0,0 +1,1289 @@
+#include <Python.h>
+
+#include "pegen.h"
+#include "string_parser.h"
+
+static PyObject *
+_create_dummy_identifier(Parser *p)
+{
+    return _PyPegen_new_identifier(p, "");
+}
+
+void *
+_PyPegen_dummy_name(Parser *p, ...)
+{
+    static void *cache = NULL;
+
+    if (cache != NULL) {
+        return cache;
+    }
+
+    PyObject *id = _create_dummy_identifier(p);
+    if (!id) {
+        return NULL;
+    }
+    cache = _PyAST_Name(id, Load, 1, 0, 1, 0, p->arena);
+    return cache;
+}
+
+/* Creates a single-element asdl_seq* that contains a */
+asdl_seq *
+_PyPegen_singleton_seq(Parser *p, void *a)
+{
+    assert(a != NULL);
+    asdl_seq *seq = (asdl_seq*)_Py_asdl_generic_seq_new(1, p->arena);
+    if (!seq) {
+        return NULL;
+    }
+    asdl_seq_SET_UNTYPED(seq, 0, a);
+    return seq;
+}
+
+/* Creates a copy of seq and prepends a to it */
+asdl_seq *
+_PyPegen_seq_insert_in_front(Parser *p, void *a, asdl_seq *seq)
+{
+    assert(a != NULL);
+    if (!seq) {
+        return _PyPegen_singleton_seq(p, a);
+    }
+
+    asdl_seq *new_seq = (asdl_seq*)_Py_asdl_generic_seq_new(asdl_seq_LEN(seq) + 1, p->arena);
+    if (!new_seq) {
+        return NULL;
+    }
+
+    asdl_seq_SET_UNTYPED(new_seq, 0, a);
+    for (Py_ssize_t i = 1, l = asdl_seq_LEN(new_seq); i < l; i++) {
+        asdl_seq_SET_UNTYPED(new_seq, i, asdl_seq_GET_UNTYPED(seq, i - 1));
+    }
+    return new_seq;
+}
+
+/* Creates a copy of seq and appends a to it */
+asdl_seq *
+_PyPegen_seq_append_to_end(Parser *p, asdl_seq *seq, void *a)
+{
+    assert(a != NULL);
+    if (!seq) {
+        return _PyPegen_singleton_seq(p, a);
+    }
+
+    asdl_seq *new_seq = (asdl_seq*)_Py_asdl_generic_seq_new(asdl_seq_LEN(seq) + 1, p->arena);
+    if (!new_seq) {
+        return NULL;
+    }
+
+    for (Py_ssize_t i = 0, l = asdl_seq_LEN(new_seq); i + 1 < l; i++) {
+        asdl_seq_SET_UNTYPED(new_seq, i, asdl_seq_GET_UNTYPED(seq, i));
+    }
+    asdl_seq_SET_UNTYPED(new_seq, asdl_seq_LEN(new_seq) - 1, a);
+    return new_seq;
+}
+
+static Py_ssize_t
+_get_flattened_seq_size(asdl_seq *seqs)
+{
+    Py_ssize_t size = 0;
+    for (Py_ssize_t i = 0, l = asdl_seq_LEN(seqs); i < l; i++) {
+        asdl_seq *inner_seq = asdl_seq_GET_UNTYPED(seqs, i);
+        size += asdl_seq_LEN(inner_seq);
+    }
+    return size;
+}
+
+/* Flattens an asdl_seq* of asdl_seq*s */
+asdl_seq *
+_PyPegen_seq_flatten(Parser *p, asdl_seq *seqs)
+{
+    Py_ssize_t flattened_seq_size = _get_flattened_seq_size(seqs);
+    assert(flattened_seq_size > 0);
+
+    asdl_seq *flattened_seq = (asdl_seq*)_Py_asdl_generic_seq_new(flattened_seq_size, p->arena);
+    if (!flattened_seq) {
+        return NULL;
+    }
+
+    int flattened_seq_idx = 0;
+    for (Py_ssize_t i = 0, l = asdl_seq_LEN(seqs); i < l; i++) {
+        asdl_seq *inner_seq = asdl_seq_GET_UNTYPED(seqs, i);
+        for (Py_ssize_t j = 0, li = asdl_seq_LEN(inner_seq); j < li; j++) {
+            asdl_seq_SET_UNTYPED(flattened_seq, flattened_seq_idx++, asdl_seq_GET_UNTYPED(inner_seq, j));
+        }
+    }
+    assert(flattened_seq_idx == flattened_seq_size);
+
+    return flattened_seq;
+}
+
+void *
+_PyPegen_seq_last_item(asdl_seq *seq)
+{
+    Py_ssize_t len = asdl_seq_LEN(seq);
+    return asdl_seq_GET_UNTYPED(seq, len - 1);
+}
+
+void *
+_PyPegen_seq_first_item(asdl_seq *seq)
+{
+    return asdl_seq_GET_UNTYPED(seq, 0);
+}
+
+/* Creates a new name of the form <first_name>.<second_name> */
+expr_ty
+_PyPegen_join_names_with_dot(Parser *p, expr_ty first_name, expr_ty second_name)
+{
+    assert(first_name != NULL && second_name != NULL);
+    PyObject *first_identifier = first_name->v.Name.id;
+    PyObject *second_identifier = second_name->v.Name.id;
+
+    if (PyUnicode_READY(first_identifier) == -1) {
+        return NULL;
+    }
+    if (PyUnicode_READY(second_identifier) == -1) {
+        return NULL;
+    }
+    const char *first_str = PyUnicode_AsUTF8(first_identifier);
+    if (!first_str) {
+        return NULL;
+    }
+    const char *second_str = PyUnicode_AsUTF8(second_identifier);
+    if (!second_str) {
+        return NULL;
+    }
+    Py_ssize_t len = strlen(first_str) + strlen(second_str) + 1;  // +1 for the dot
+
+    PyObject *str = PyBytes_FromStringAndSize(NULL, len);
+    if (!str) {
+        return NULL;
+    }
+
+    char *s = PyBytes_AS_STRING(str);
+    if (!s) {
+        return NULL;
+    }
+
+    strcpy(s, first_str);
+    s += strlen(first_str);
+    *s++ = '.';
+    strcpy(s, second_str);
+    s += strlen(second_str);
+    *s = '\0';
+
+    PyObject *uni = PyUnicode_DecodeUTF8(PyBytes_AS_STRING(str), PyBytes_GET_SIZE(str), NULL);
+    Py_DECREF(str);
+    if (!uni) {
+        return NULL;
+    }
+    PyUnicode_InternInPlace(&uni);
+    if (_PyArena_AddPyObject(p->arena, uni) < 0) {
+        Py_DECREF(uni);
+        return NULL;
+    }
+
+    return _PyAST_Name(uni, Load, EXTRA_EXPR(first_name, second_name));
+}
+
+/* Counts the total number of dots in seq's tokens */
+int
+_PyPegen_seq_count_dots(asdl_seq *seq)
+{
+    int number_of_dots = 0;
+    for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
+        Token *current_expr = asdl_seq_GET_UNTYPED(seq, i);
+        switch (current_expr->type) {
+            case ELLIPSIS:
+                number_of_dots += 3;
+                break;
+            case DOT:
+                number_of_dots += 1;
+                break;
+            default:
+                Py_UNREACHABLE();
+        }
+    }
+
+    return number_of_dots;
+}
+
+/* Creates an alias with '*' as the identifier name */
+alias_ty
+_PyPegen_alias_for_star(Parser *p, int lineno, int col_offset, int end_lineno,
+                        int end_col_offset, PyArena *arena) {
+    PyObject *str = PyUnicode_InternFromString("*");
+    if (!str) {
+        return NULL;
+    }
+    if (_PyArena_AddPyObject(p->arena, str) < 0) {
+        Py_DECREF(str);
+        return NULL;
+    }
+    return _PyAST_alias(str, NULL, lineno, col_offset, end_lineno, end_col_offset, arena);
+}
+
+/* Creates a new asdl_seq* with the identifiers of all the names in seq */
+asdl_identifier_seq *
+_PyPegen_map_names_to_ids(Parser *p, asdl_expr_seq *seq)
+{
+    Py_ssize_t len = asdl_seq_LEN(seq);
+    assert(len > 0);
+
+    asdl_identifier_seq *new_seq = _Py_asdl_identifier_seq_new(len, p->arena);
+    if (!new_seq) {
+        return NULL;
+    }
+    for (Py_ssize_t i = 0; i < len; i++) {
+        expr_ty e = asdl_seq_GET(seq, i);
+        asdl_seq_SET(new_seq, i, e->v.Name.id);
+    }
+    return new_seq;
+}
+
+/* Constructs a CmpopExprPair */
+CmpopExprPair *
+_PyPegen_cmpop_expr_pair(Parser *p, cmpop_ty cmpop, expr_ty expr)
+{
+    assert(expr != NULL);
+    CmpopExprPair *a = _PyArena_Malloc(p->arena, sizeof(CmpopExprPair));
+    if (!a) {
+        return NULL;
+    }
+    a->cmpop = cmpop;
+    a->expr = expr;
+    return a;
+}
+
+asdl_int_seq *
+_PyPegen_get_cmpops(Parser *p, asdl_seq *seq)
+{
+    Py_ssize_t len = asdl_seq_LEN(seq);
+    assert(len > 0);
+
+    asdl_int_seq *new_seq = _Py_asdl_int_seq_new(len, p->arena);
+    if (!new_seq) {
+        return NULL;
+    }
+    for (Py_ssize_t i = 0; i < len; i++) {
+        CmpopExprPair *pair = asdl_seq_GET_UNTYPED(seq, i);
+        asdl_seq_SET(new_seq, i, pair->cmpop);
+    }
+    return new_seq;
+}
+
+asdl_expr_seq *
+_PyPegen_get_exprs(Parser *p, asdl_seq *seq)
+{
+    Py_ssize_t len = asdl_seq_LEN(seq);
+    assert(len > 0);
+
+    asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
+    if (!new_seq) {
+        return NULL;
+    }
+    for (Py_ssize_t i = 0; i < len; i++) {
+        CmpopExprPair *pair = asdl_seq_GET_UNTYPED(seq, i);
+        asdl_seq_SET(new_seq, i, pair->expr);
+    }
+    return new_seq;
+}
+
+/* Creates an asdl_seq* where all the elements have been changed to have ctx as context */
+static asdl_expr_seq *
+_set_seq_context(Parser *p, asdl_expr_seq *seq, expr_context_ty ctx)
+{
+    Py_ssize_t len = asdl_seq_LEN(seq);
+    if (len == 0) {
+        return NULL;
+    }
+
+    asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
+    if (!new_seq) {
+        return NULL;
+    }
+    for (Py_ssize_t i = 0; i < len; i++) {
+        expr_ty e = asdl_seq_GET(seq, i);
+        asdl_seq_SET(new_seq, i, _PyPegen_set_expr_context(p, e, ctx));
+    }
+    return new_seq;
+}
+
+static expr_ty
+_set_name_context(Parser *p, expr_ty e, expr_context_ty ctx)
+{
+    return _PyAST_Name(e->v.Name.id, ctx, EXTRA_EXPR(e, e));
+}
+
+static expr_ty
+_set_tuple_context(Parser *p, expr_ty e, expr_context_ty ctx)
+{
+    return _PyAST_Tuple(
+            _set_seq_context(p, e->v.Tuple.elts, ctx),
+            ctx,
+            EXTRA_EXPR(e, e));
+}
+
+static expr_ty
+_set_list_context(Parser *p, expr_ty e, expr_context_ty ctx)
+{
+    return _PyAST_List(
+            _set_seq_context(p, e->v.List.elts, ctx),
+            ctx,
+            EXTRA_EXPR(e, e));
+}
+
+static expr_ty
+_set_subscript_context(Parser *p, expr_ty e, expr_context_ty ctx)
+{
+    return _PyAST_Subscript(e->v.Subscript.value, e->v.Subscript.slice,
+                            ctx, EXTRA_EXPR(e, e));
+}
+
+static expr_ty
+_set_attribute_context(Parser *p, expr_ty e, expr_context_ty ctx)
+{
+    return _PyAST_Attribute(e->v.Attribute.value, e->v.Attribute.attr,
+                            ctx, EXTRA_EXPR(e, e));
+}
+
+static expr_ty
+_set_starred_context(Parser *p, expr_ty e, expr_context_ty ctx)
+{
+    return _PyAST_Starred(_PyPegen_set_expr_context(p, e->v.Starred.value, ctx),
+                          ctx, EXTRA_EXPR(e, e));
+}
+
+/* Creates an `expr_ty` equivalent to `expr` but with `ctx` as context */
+expr_ty
+_PyPegen_set_expr_context(Parser *p, expr_ty expr, expr_context_ty ctx)
+{
+    assert(expr != NULL);
+
+    expr_ty new = NULL;
+    switch (expr->kind) {
+        case Name_kind:
+            new = _set_name_context(p, expr, ctx);
+            break;
+        case Tuple_kind:
+            new = _set_tuple_context(p, expr, ctx);
+            break;
+        case List_kind:
+            new = _set_list_context(p, expr, ctx);
+            break;
+        case Subscript_kind:
+            new = _set_subscript_context(p, expr, ctx);
+            break;
+        case Attribute_kind:
+            new = _set_attribute_context(p, expr, ctx);
+            break;
+        case Starred_kind:
+            new = _set_starred_context(p, expr, ctx);
+            break;
+        default:
+            new = expr;
+    }
+    return new;
+}
+
+/* Constructs a KeyValuePair that is used when parsing a dict's key value pairs */
+KeyValuePair *
+_PyPegen_key_value_pair(Parser *p, expr_ty key, expr_ty value)
+{
+    KeyValuePair *a = _PyArena_Malloc(p->arena, sizeof(KeyValuePair));
+    if (!a) {
+        return NULL;
+    }
+    a->key = key;
+    a->value = value;
+    return a;
+}
+
+/* Extracts all keys from an asdl_seq* of KeyValuePair*'s */
+asdl_expr_seq *
+_PyPegen_get_keys(Parser *p, asdl_seq *seq)
+{
+    Py_ssize_t len = asdl_seq_LEN(seq);
+    asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
+    if (!new_seq) {
+        return NULL;
+    }
+    for (Py_ssize_t i = 0; i < len; i++) {
+        KeyValuePair *pair = asdl_seq_GET_UNTYPED(seq, i);
+        asdl_seq_SET(new_seq, i, pair->key);
+    }
+    return new_seq;
+}
+
+/* Extracts all values from an asdl_seq* of KeyValuePair*'s */
+asdl_expr_seq *
+_PyPegen_get_values(Parser *p, asdl_seq *seq)
+{
+    Py_ssize_t len = asdl_seq_LEN(seq);
+    asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
+    if (!new_seq) {
+        return NULL;
+    }
+    for (Py_ssize_t i = 0; i < len; i++) {
+        KeyValuePair *pair = asdl_seq_GET_UNTYPED(seq, i);
+        asdl_seq_SET(new_seq, i, pair->value);
+    }
+    return new_seq;
+}
+
+/* Constructs a KeyPatternPair that is used when parsing mapping & class patterns */
+KeyPatternPair *
+_PyPegen_key_pattern_pair(Parser *p, expr_ty key, pattern_ty pattern)
+{
+    KeyPatternPair *a = _PyArena_Malloc(p->arena, sizeof(KeyPatternPair));
+    if (!a) {
+        return NULL;
+    }
+    a->key = key;
+    a->pattern = pattern;
+    return a;
+}
+
+/* Extracts all keys from an asdl_seq* of KeyPatternPair*'s */
+asdl_expr_seq *
+_PyPegen_get_pattern_keys(Parser *p, asdl_seq *seq)
+{
+    Py_ssize_t len = asdl_seq_LEN(seq);
+    asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
+    if (!new_seq) {
+        return NULL;
+    }
+    for (Py_ssize_t i = 0; i < len; i++) {
+        KeyPatternPair *pair = asdl_seq_GET_UNTYPED(seq, i);
+        asdl_seq_SET(new_seq, i, pair->key);
+    }
+    return new_seq;
+}
+
+/* Extracts all patterns from an asdl_seq* of KeyPatternPair*'s */
+asdl_pattern_seq *
+_PyPegen_get_patterns(Parser *p, asdl_seq *seq)
+{
+    Py_ssize_t len = asdl_seq_LEN(seq);
+    asdl_pattern_seq *new_seq = _Py_asdl_pattern_seq_new(len, p->arena);
+    if (!new_seq) {
+        return NULL;
+    }
+    for (Py_ssize_t i = 0; i < len; i++) {
+        KeyPatternPair *pair = asdl_seq_GET_UNTYPED(seq, i);
+        asdl_seq_SET(new_seq, i, pair->pattern);
+    }
+    return new_seq;
+}
+
+/* Constructs a NameDefaultPair */
+NameDefaultPair *
+_PyPegen_name_default_pair(Parser *p, arg_ty arg, expr_ty value, Token *tc)
+{
+    NameDefaultPair *a = _PyArena_Malloc(p->arena, sizeof(NameDefaultPair));
+    if (!a) {
+        return NULL;
+    }
+    a->arg = _PyPegen_add_type_comment_to_arg(p, arg, tc);
+    a->value = value;
+    return a;
+}
+
+/* Constructs a SlashWithDefault */
+SlashWithDefault *
+_PyPegen_slash_with_default(Parser *p, asdl_arg_seq *plain_names, asdl_seq *names_with_defaults)
+{
+    SlashWithDefault *a = _PyArena_Malloc(p->arena, sizeof(SlashWithDefault));
+    if (!a) {
+        return NULL;
+    }
+    a->plain_names = plain_names;
+    a->names_with_defaults = names_with_defaults;
+    return a;
+}
+
+/* Constructs a StarEtc */
+StarEtc *
+_PyPegen_star_etc(Parser *p, arg_ty vararg, asdl_seq *kwonlyargs, arg_ty kwarg)
+{
+    StarEtc *a = _PyArena_Malloc(p->arena, sizeof(StarEtc));
+    if (!a) {
+        return NULL;
+    }
+    a->vararg = vararg;
+    a->kwonlyargs = kwonlyargs;
+    a->kwarg = kwarg;
+    return a;
+}
+
+asdl_seq *
+_PyPegen_join_sequences(Parser *p, asdl_seq *a, asdl_seq *b)
+{
+    Py_ssize_t first_len = asdl_seq_LEN(a);
+    Py_ssize_t second_len = asdl_seq_LEN(b);
+    asdl_seq *new_seq = (asdl_seq*)_Py_asdl_generic_seq_new(first_len + second_len, p->arena);
+    if (!new_seq) {
+        return NULL;
+    }
+
+    int k = 0;
+    for (Py_ssize_t i = 0; i < first_len; i++) {
+        asdl_seq_SET_UNTYPED(new_seq, k++, asdl_seq_GET_UNTYPED(a, i));
+    }
+    for (Py_ssize_t i = 0; i < second_len; i++) {
+        asdl_seq_SET_UNTYPED(new_seq, k++, asdl_seq_GET_UNTYPED(b, i));
+    }
+
+    return new_seq;
+}
+
+static asdl_arg_seq*
+_get_names(Parser *p, asdl_seq *names_with_defaults)
+{
+    Py_ssize_t len = asdl_seq_LEN(names_with_defaults);
+    asdl_arg_seq *seq = _Py_asdl_arg_seq_new(len, p->arena);
+    if (!seq) {
+        return NULL;
+    }
+    for (Py_ssize_t i = 0; i < len; i++) {
+        NameDefaultPair *pair = asdl_seq_GET_UNTYPED(names_with_defaults, i);
+        asdl_seq_SET(seq, i, pair->arg);
+    }
+    return seq;
+}
+
+static asdl_expr_seq *
+_get_defaults(Parser *p, asdl_seq *names_with_defaults)
+{
+    Py_ssize_t len = asdl_seq_LEN(names_with_defaults);
+    asdl_expr_seq *seq = _Py_asdl_expr_seq_new(len, p->arena);
+    if (!seq) {
+        return NULL;
+    }
+    for (Py_ssize_t i = 0; i < len; i++) {
+        NameDefaultPair *pair = asdl_seq_GET_UNTYPED(names_with_defaults, i);
+        asdl_seq_SET(seq, i, pair->value);
+    }
+    return seq;
+}
+
+static int
+_make_posonlyargs(Parser *p,
+                  asdl_arg_seq *slash_without_default,
+                  SlashWithDefault *slash_with_default,
+                  asdl_arg_seq **posonlyargs) {
+    if (slash_without_default != NULL) {
+        *posonlyargs = slash_without_default;
+    }
+    else if (slash_with_default != NULL) {
+        asdl_arg_seq *slash_with_default_names =
+                _get_names(p, slash_with_default->names_with_defaults);
+        if (!slash_with_default_names) {
+            return -1;
+        }
+        *posonlyargs = (asdl_arg_seq*)_PyPegen_join_sequences(
+                p,
+                (asdl_seq*)slash_with_default->plain_names,
+                (asdl_seq*)slash_with_default_names);
+    }
+    else {
+        *posonlyargs = _Py_asdl_arg_seq_new(0, p->arena);
+    }
+    return *posonlyargs == NULL ? -1 : 0;
+}
+
+static int
+_make_posargs(Parser *p,
+              asdl_arg_seq *plain_names,
+              asdl_seq *names_with_default,
+              asdl_arg_seq **posargs) {
+    if (plain_names != NULL && names_with_default != NULL) {
+        asdl_arg_seq *names_with_default_names = _get_names(p, names_with_default);
+        if (!names_with_default_names) {
+            return -1;
+        }
+        *posargs = (asdl_arg_seq*)_PyPegen_join_sequences(
+                p,(asdl_seq*)plain_names, (asdl_seq*)names_with_default_names);
+    }
+    else if (plain_names == NULL && names_with_default != NULL) {
+        *posargs = _get_names(p, names_with_default);
+    }
+    else if (plain_names != NULL && names_with_default == NULL) {
+        *posargs = plain_names;
+    }
+    else {
+        *posargs = _Py_asdl_arg_seq_new(0, p->arena);
+    }
+    return *posargs == NULL ? -1 : 0;
+}
+
+static int
+_make_posdefaults(Parser *p,
+                  SlashWithDefault *slash_with_default,
+                  asdl_seq *names_with_default,
+                  asdl_expr_seq **posdefaults) {
+    if (slash_with_default != NULL && names_with_default != NULL) {
+        asdl_expr_seq *slash_with_default_values =
+                _get_defaults(p, slash_with_default->names_with_defaults);
+        if (!slash_with_default_values) {
+            return -1;
+        }
+        asdl_expr_seq *names_with_default_values = _get_defaults(p, names_with_default);
+        if (!names_with_default_values) {
+            return -1;
+        }
+        *posdefaults = (asdl_expr_seq*)_PyPegen_join_sequences(
+                p,
+                (asdl_seq*)slash_with_default_values,
+                (asdl_seq*)names_with_default_values);
+    }
+    else if (slash_with_default == NULL && names_with_default != NULL) {
+        *posdefaults = _get_defaults(p, names_with_default);
+    }
+    else if (slash_with_default != NULL && names_with_default == NULL) {
+        *posdefaults = _get_defaults(p, slash_with_default->names_with_defaults);
+    }
+    else {
+        *posdefaults = _Py_asdl_expr_seq_new(0, p->arena);
+    }
+    return *posdefaults == NULL ? -1 : 0;
+}
+
+static int
+_make_kwargs(Parser *p, StarEtc *star_etc,
+             asdl_arg_seq **kwonlyargs,
+             asdl_expr_seq **kwdefaults) {
+    if (star_etc != NULL && star_etc->kwonlyargs != NULL) {
+        *kwonlyargs = _get_names(p, star_etc->kwonlyargs);
+    }
+    else {
+        *kwonlyargs = _Py_asdl_arg_seq_new(0, p->arena);
+    }
+
+    if (*kwonlyargs == NULL) {
+        return -1;
+    }
+
+    if (star_etc != NULL && star_etc->kwonlyargs != NULL) {
+        *kwdefaults = _get_defaults(p, star_etc->kwonlyargs);
+    }
+    else {
+        *kwdefaults = _Py_asdl_expr_seq_new(0, p->arena);
+    }
+
+    if (*kwdefaults == NULL) {
+        return -1;
+    }
+
+    return 0;
+}
+
+/* Constructs an arguments_ty object out of all the parsed constructs in the parameters rule */
+arguments_ty
+_PyPegen_make_arguments(Parser *p, asdl_arg_seq *slash_without_default,
+                        SlashWithDefault *slash_with_default, asdl_arg_seq *plain_names,
+                        asdl_seq *names_with_default, StarEtc *star_etc)
+{
+    asdl_arg_seq *posonlyargs;
+    if (_make_posonlyargs(p, slash_without_default, slash_with_default, &posonlyargs) == -1) {
+        return NULL;
+    }
+
+    asdl_arg_seq *posargs;
+    if (_make_posargs(p, plain_names, names_with_default, &posargs) == -1) {
+        return NULL;
+    }
+
+    asdl_expr_seq *posdefaults;
+    if (_make_posdefaults(p,slash_with_default, names_with_default, &posdefaults) == -1) {
+        return NULL;
+    }
+
+    arg_ty vararg = NULL;
+    if (star_etc != NULL && star_etc->vararg != NULL) {
+        vararg = star_etc->vararg;
+    }
+
+    asdl_arg_seq *kwonlyargs;
+    asdl_expr_seq *kwdefaults;
+    if (_make_kwargs(p, star_etc, &kwonlyargs, &kwdefaults) == -1) {
+        return NULL;
+    }
+
+    arg_ty kwarg = NULL;
+    if (star_etc != NULL && star_etc->kwarg != NULL) {
+        kwarg = star_etc->kwarg;
+    }
+
+    return _PyAST_arguments(posonlyargs, posargs, vararg, kwonlyargs,
+                            kwdefaults, kwarg, posdefaults, p->arena);
+}
+
+
+/* Constructs an empty arguments_ty object, that gets used when a function accepts no
+ * arguments. */
+arguments_ty
+_PyPegen_empty_arguments(Parser *p)
+{
+    asdl_arg_seq *posonlyargs = _Py_asdl_arg_seq_new(0, p->arena);
+    if (!posonlyargs) {
+        return NULL;
+    }
+    asdl_arg_seq *posargs = _Py_asdl_arg_seq_new(0, p->arena);
+    if (!posargs) {
+        return NULL;
+    }
+    asdl_expr_seq *posdefaults = _Py_asdl_expr_seq_new(0, p->arena);
+    if (!posdefaults) {
+        return NULL;
+    }
+    asdl_arg_seq *kwonlyargs = _Py_asdl_arg_seq_new(0, p->arena);
+    if (!kwonlyargs) {
+        return NULL;
+    }
+    asdl_expr_seq *kwdefaults = _Py_asdl_expr_seq_new(0, p->arena);
+    if (!kwdefaults) {
+        return NULL;
+    }
+
+    return _PyAST_arguments(posonlyargs, posargs, NULL, kwonlyargs,
+                            kwdefaults, NULL, posdefaults, p->arena);
+}
+
+/* Encapsulates the value of an operator_ty into an AugOperator struct */
+AugOperator *
+_PyPegen_augoperator(Parser *p, operator_ty kind)
+{
+    AugOperator *a = _PyArena_Malloc(p->arena, sizeof(AugOperator));
+    if (!a) {
+        return NULL;
+    }
+    a->kind = kind;
+    return a;
+}
+
+/* Construct a FunctionDef equivalent to function_def, but with decorators */
+stmt_ty
+_PyPegen_function_def_decorators(Parser *p, asdl_expr_seq *decorators, stmt_ty function_def)
+{
+    assert(function_def != NULL);
+    if (function_def->kind == AsyncFunctionDef_kind) {
+        return _PyAST_AsyncFunctionDef(
+            function_def->v.FunctionDef.name, function_def->v.FunctionDef.args,
+            function_def->v.FunctionDef.body, decorators, function_def->v.FunctionDef.returns,
+            function_def->v.FunctionDef.type_comment, function_def->lineno,
+            function_def->col_offset, function_def->end_lineno, function_def->end_col_offset,
+            p->arena);
+    }
+
+    return _PyAST_FunctionDef(
+        function_def->v.FunctionDef.name, function_def->v.FunctionDef.args,
+        function_def->v.FunctionDef.body, decorators,
+        function_def->v.FunctionDef.returns,
+        function_def->v.FunctionDef.type_comment, function_def->lineno,
+        function_def->col_offset, function_def->end_lineno,
+        function_def->end_col_offset, p->arena);
+}
+
+/* Construct a ClassDef equivalent to class_def, but with decorators */
+stmt_ty
+_PyPegen_class_def_decorators(Parser *p, asdl_expr_seq *decorators, stmt_ty class_def)
+{
+    assert(class_def != NULL);
+    return _PyAST_ClassDef(
+        class_def->v.ClassDef.name, class_def->v.ClassDef.bases,
+        class_def->v.ClassDef.keywords, class_def->v.ClassDef.body, decorators,
+        class_def->lineno, class_def->col_offset, class_def->end_lineno,
+        class_def->end_col_offset, p->arena);
+}
+
+/* Construct a KeywordOrStarred */
+KeywordOrStarred *
+_PyPegen_keyword_or_starred(Parser *p, void *element, int is_keyword)
+{
+    KeywordOrStarred *a = _PyArena_Malloc(p->arena, sizeof(KeywordOrStarred));
+    if (!a) {
+        return NULL;
+    }
+    a->element = element;
+    a->is_keyword = is_keyword;
+    return a;
+}
+
+/* Get the number of starred expressions in an asdl_seq* of KeywordOrStarred*s */
+static int
+_seq_number_of_starred_exprs(asdl_seq *seq)
+{
+    int n = 0;
+    for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
+        KeywordOrStarred *k = asdl_seq_GET_UNTYPED(seq, i);
+        if (!k->is_keyword) {
+            n++;
+        }
+    }
+    return n;
+}
+
+/* Extract the starred expressions of an asdl_seq* of KeywordOrStarred*s */
+asdl_expr_seq *
+_PyPegen_seq_extract_starred_exprs(Parser *p, asdl_seq *kwargs)
+{
+    int new_len = _seq_number_of_starred_exprs(kwargs);
+    if (new_len == 0) {
+        return NULL;
+    }
+    asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(new_len, p->arena);
+    if (!new_seq) {
+        return NULL;
+    }
+
+    int idx = 0;
+    for (Py_ssize_t i = 0, len = asdl_seq_LEN(kwargs); i < len; i++) {
+        KeywordOrStarred *k = asdl_seq_GET_UNTYPED(kwargs, i);
+        if (!k->is_keyword) {
+            asdl_seq_SET(new_seq, idx++, k->element);
+        }
+    }
+    return new_seq;
+}
+
+/* Return a new asdl_seq* with only the keywords in kwargs */
+asdl_keyword_seq*
+_PyPegen_seq_delete_starred_exprs(Parser *p, asdl_seq *kwargs)
+{
+    Py_ssize_t len = asdl_seq_LEN(kwargs);
+    Py_ssize_t new_len = len - _seq_number_of_starred_exprs(kwargs);
+    if (new_len == 0) {
+        return NULL;
+    }
+    asdl_keyword_seq *new_seq = _Py_asdl_keyword_seq_new(new_len, p->arena);
+    if (!new_seq) {
+        return NULL;
+    }
+
+    int idx = 0;
+    for (Py_ssize_t i = 0; i < len; i++) {
+        KeywordOrStarred *k = asdl_seq_GET_UNTYPED(kwargs, i);
+        if (k->is_keyword) {
+            asdl_seq_SET(new_seq, idx++, k->element);
+        }
+    }
+    return new_seq;
+}
+
+expr_ty
+_PyPegen_concatenate_strings(Parser *p, asdl_seq *strings)
+{
+    Py_ssize_t len = asdl_seq_LEN(strings);
+    assert(len > 0);
+
+    Token *first = asdl_seq_GET_UNTYPED(strings, 0);
+    Token *last = asdl_seq_GET_UNTYPED(strings, len - 1);
+
+    int bytesmode = 0;
+    PyObject *bytes_str = NULL;
+
+    FstringParser state;
+    _PyPegen_FstringParser_Init(&state);
+
+    for (Py_ssize_t i = 0; i < len; i++) {
+        Token *t = asdl_seq_GET_UNTYPED(strings, i);
+
+        int this_bytesmode;
+        int this_rawmode;
+        PyObject *s;
+        const char *fstr;
+        Py_ssize_t fstrlen = -1;
+
+        if (_PyPegen_parsestr(p, &this_bytesmode, &this_rawmode, &s, &fstr, &fstrlen, t) != 0) {
+            goto error;
+        }
+
+        /* Check that we are not mixing bytes with unicode. */
+        if (i != 0 && bytesmode != this_bytesmode) {
+            RAISE_SYNTAX_ERROR("cannot mix bytes and nonbytes literals");
+            Py_XDECREF(s);
+            goto error;
+        }
+        bytesmode = this_bytesmode;
+
+        if (fstr != NULL) {
+            assert(s == NULL && !bytesmode);
+
+            int result = _PyPegen_FstringParser_ConcatFstring(p, &state, &fstr, fstr + fstrlen,
+                                                     this_rawmode, 0, first, t, last);
+            if (result < 0) {
+                goto error;
+            }
+        }
+        else {
+            /* String or byte string. */
+            assert(s != NULL && fstr == NULL);
+            assert(bytesmode ? PyBytes_CheckExact(s) : PyUnicode_CheckExact(s));
+
+            if (bytesmode) {
+                if (i == 0) {
+                    bytes_str = s;
+                }
+                else {
+                    PyBytes_ConcatAndDel(&bytes_str, s);
+                    if (!bytes_str) {
+                        goto error;
+                    }
+                }
+            }
+            else {
+                /* This is a regular string. Concatenate it. */
+                if (_PyPegen_FstringParser_ConcatAndDel(&state, s) < 0) {
+                    goto error;
+                }
+            }
+        }
+    }
+
+    if (bytesmode) {
+        if (_PyArena_AddPyObject(p->arena, bytes_str) < 0) {
+            goto error;
+        }
+        return _PyAST_Constant(bytes_str, NULL, first->lineno,
+                               first->col_offset, last->end_lineno,
+                               last->end_col_offset, p->arena);
+    }
+
+    return _PyPegen_FstringParser_Finish(p, &state, first, last);
+
+error:
+    Py_XDECREF(bytes_str);
+    _PyPegen_FstringParser_Dealloc(&state);
+    if (PyErr_Occurred()) {
+        _Pypegen_raise_decode_error(p);
+    }
+    return NULL;
+}
+
+expr_ty
+_PyPegen_ensure_imaginary(Parser *p, expr_ty exp)
+{
+    if (exp->kind != Constant_kind || !PyComplex_CheckExact(exp->v.Constant.value)) {
+        RAISE_SYNTAX_ERROR_KNOWN_LOCATION(exp, "imaginary number required in complex literal");
+        return NULL;
+    }
+    return exp;
+}
+
+expr_ty
+_PyPegen_ensure_real(Parser *p, expr_ty exp)
+{
+    if (exp->kind != Constant_kind || PyComplex_CheckExact(exp->v.Constant.value)) {
+        RAISE_SYNTAX_ERROR_KNOWN_LOCATION(exp, "real number required in complex literal");
+        return NULL;
+    }
+    return exp;
+}
+
+mod_ty
+_PyPegen_make_module(Parser *p, asdl_stmt_seq *a) {
+    asdl_type_ignore_seq *type_ignores = NULL;
+    Py_ssize_t num = p->type_ignore_comments.num_items;
+    if (num > 0) {
+        // Turn the raw (comment, lineno) pairs into TypeIgnore objects in the arena
+        type_ignores = _Py_asdl_type_ignore_seq_new(num, p->arena);
+        if (type_ignores == NULL) {
+            return NULL;
+        }
+        for (int i = 0; i < num; i++) {
+            PyObject *tag = _PyPegen_new_type_comment(p, p->type_ignore_comments.items[i].comment);
+            if (tag == NULL) {
+                return NULL;
+            }
+            type_ignore_ty ti = _PyAST_TypeIgnore(p->type_ignore_comments.items[i].lineno,
+                                                  tag, p->arena);
+            if (ti == NULL) {
+                return NULL;
+            }
+            asdl_seq_SET(type_ignores, i, ti);
+        }
+    }
+    return _PyAST_Module(a, type_ignores, p->arena);
+}
+
+PyObject *
+_PyPegen_new_type_comment(Parser *p, const char *s)
+{
+    PyObject *res = PyUnicode_DecodeUTF8(s, strlen(s), NULL);
+    if (res == NULL) {
+        return NULL;
+    }
+    if (_PyArena_AddPyObject(p->arena, res) < 0) {
+        Py_DECREF(res);
+        return NULL;
+    }
+    return res;
+}
+
+arg_ty
+_PyPegen_add_type_comment_to_arg(Parser *p, arg_ty a, Token *tc)
+{
+    if (tc == NULL) {
+        return a;
+    }
+    const char *bytes = PyBytes_AsString(tc->bytes);
+    if (bytes == NULL) {
+        return NULL;
+    }
+    PyObject *tco = _PyPegen_new_type_comment(p, bytes);
+    if (tco == NULL) {
+        return NULL;
+    }
+    return _PyAST_arg(a->arg, a->annotation, tco,
+                      a->lineno, a->col_offset, a->end_lineno, a->end_col_offset,
+                      p->arena);
+}
+
+/* Checks if the NOTEQUAL token is valid given the current parser flags
+0 indicates success and nonzero indicates failure (an exception may be set) */
+int
+_PyPegen_check_barry_as_flufl(Parser *p, Token* t) {
+    assert(t->bytes != NULL);
+    assert(t->type == NOTEQUAL);
+
+    const char* tok_str = PyBytes_AS_STRING(t->bytes);
+    if (p->flags & PyPARSE_BARRY_AS_BDFL && strcmp(tok_str, "<>") != 0) {
+        RAISE_SYNTAX_ERROR("with Barry as BDFL, use '<>' instead of '!='");
+        return -1;
+    }
+    if (!(p->flags & PyPARSE_BARRY_AS_BDFL)) {
+        return strcmp(tok_str, "!=");
+    }
+    return 0;
+}
+
+int
+_PyPegen_check_legacy_stmt(Parser *p, expr_ty name) {
+    if (name->kind != Name_kind) {
+        return 0;
+    }
+    const char* candidates[2] = {"print", "exec"};
+    for (int i=0; i<2; i++) {
+        if (PyUnicode_CompareWithASCIIString(name->v.Name.id, candidates[i]) == 0) {
+            return 1;
+        }
+    }
+    return 0;
+}
+
+const char *
+_PyPegen_get_expr_name(expr_ty e)
+{
+    assert(e != NULL);
+    switch (e->kind) {
+        case Attribute_kind:
+            return "attribute";
+        case Subscript_kind:
+            return "subscript";
+        case Starred_kind:
+            return "starred";
+        case Name_kind:
+            return "name";
+        case List_kind:
+            return "list";
+        case Tuple_kind:
+            return "tuple";
+        case Lambda_kind:
+            return "lambda";
+        case Call_kind:
+            return "function call";
+        case BoolOp_kind:
+        case BinOp_kind:
+        case UnaryOp_kind:
+            return "expression";
+        case GeneratorExp_kind:
+            return "generator expression";
+        case Yield_kind:
+        case YieldFrom_kind:
+            return "yield expression";
+        case Await_kind:
+            return "await expression";
+        case ListComp_kind:
+            return "list comprehension";
+        case SetComp_kind:
+            return "set comprehension";
+        case DictComp_kind:
+            return "dict comprehension";
+        case Dict_kind:
+            return "dict literal";
+        case Set_kind:
+            return "set display";
+        case JoinedStr_kind:
+        case FormattedValue_kind:
+            return "f-string expression";
+        case Constant_kind: {
+            PyObject *value = e->v.Constant.value;
+            if (value == Py_None) {
+                return "None";
+            }
+            if (value == Py_False) {
+                return "False";
+            }
+            if (value == Py_True) {
+                return "True";
+            }
+            if (value == Py_Ellipsis) {
+                return "ellipsis";
+            }
+            return "literal";
+        }
+        case Compare_kind:
+            return "comparison";
+        case IfExp_kind:
+            return "conditional expression";
+        case NamedExpr_kind:
+            return "named expression";
+        default:
+            PyErr_Format(PyExc_SystemError,
+                         "unexpected expression in assignment %d (line %d)",
+                         e->kind, e->lineno);
+            return NULL;
+    }
+}
+
+static inline expr_ty
+_PyPegen_get_last_comprehension_item(comprehension_ty comprehension) {
+    if (comprehension->ifs == NULL || asdl_seq_LEN(comprehension->ifs) == 0) {
+        return comprehension->iter;
+    }
+    return PyPegen_last_item(comprehension->ifs, expr_ty);
+}
+
+expr_ty _PyPegen_collect_call_seqs(Parser *p, asdl_expr_seq *a, asdl_seq *b,
+                     int lineno, int col_offset, int end_lineno,
+                     int end_col_offset, PyArena *arena) {
+    Py_ssize_t args_len = asdl_seq_LEN(a);
+    Py_ssize_t total_len = args_len;
+
+    if (b == NULL) {
+        return _PyAST_Call(_PyPegen_dummy_name(p), a, NULL, lineno, col_offset,
+                        end_lineno, end_col_offset, arena);
+
+    }
+
+    asdl_expr_seq *starreds = _PyPegen_seq_extract_starred_exprs(p, b);
+    asdl_keyword_seq *keywords = _PyPegen_seq_delete_starred_exprs(p, b);
+
+    if (starreds) {
+        total_len += asdl_seq_LEN(starreds);
+    }
+
+    asdl_expr_seq *args = _Py_asdl_expr_seq_new(total_len, arena);
+
+    Py_ssize_t i = 0;
+    for (i = 0; i < args_len; i++) {
+        asdl_seq_SET(args, i, asdl_seq_GET(a, i));
+    }
+    for (; i < total_len; i++) {
+        asdl_seq_SET(args, i, asdl_seq_GET(starreds, i - args_len));
+    }
+
+    return _PyAST_Call(_PyPegen_dummy_name(p), args, keywords, lineno,
+                       col_offset, end_lineno, end_col_offset, arena);
+}
+
+// AST Error reporting helpers
+
+expr_ty
+_PyPegen_get_invalid_target(expr_ty e, TARGETS_TYPE targets_type)
+{
+    if (e == NULL) {
+        return NULL;
+    }
+
+#define VISIT_CONTAINER(CONTAINER, TYPE) do { \
+        Py_ssize_t len = asdl_seq_LEN((CONTAINER)->v.TYPE.elts);\
+        for (Py_ssize_t i = 0; i < len; i++) {\
+            expr_ty other = asdl_seq_GET((CONTAINER)->v.TYPE.elts, i);\
+            expr_ty child = _PyPegen_get_invalid_target(other, targets_type);\
+            if (child != NULL) {\
+                return child;\
+            }\
+        }\
+    } while (0)
+
+    // We only need to visit List and Tuple nodes recursively as those
+    // are the only ones that can contain valid names in targets when
+    // they are parsed as expressions. Any other kind of expression
+    // that is a container (like Sets or Dicts) is directly invalid and
+    // we don't need to visit it recursively.
+
+    switch (e->kind) {
+        case List_kind:
+            VISIT_CONTAINER(e, List);
+            return NULL;
+        case Tuple_kind:
+            VISIT_CONTAINER(e, Tuple);
+            return NULL;
+        case Starred_kind:
+            if (targets_type == DEL_TARGETS) {
+                return e;
+            }
+            return _PyPegen_get_invalid_target(e->v.Starred.value, targets_type);
+        case Compare_kind:
+            // This is needed, because the `a in b` in `for a in b` gets parsed
+            // as a comparison, and so we need to search the left side of the comparison
+            // for invalid targets.
+            if (targets_type == FOR_TARGETS) {
+                cmpop_ty cmpop = (cmpop_ty) asdl_seq_GET(e->v.Compare.ops, 0);
+                if (cmpop == In) {
+                    return _PyPegen_get_invalid_target(e->v.Compare.left, targets_type);
+                }
+                return NULL;
+            }
+            return e;
+        case Name_kind:
+        case Subscript_kind:
+        case Attribute_kind:
+            return NULL;
+        default:
+            return e;
+    }
+}
+
+void *_PyPegen_arguments_parsing_error(Parser *p, expr_ty e) {
+    int kwarg_unpacking = 0;
+    for (Py_ssize_t i = 0, l = asdl_seq_LEN(e->v.Call.keywords); i < l; i++) {
+        keyword_ty keyword = asdl_seq_GET(e->v.Call.keywords, i);
+        if (!keyword->arg) {
+            kwarg_unpacking = 1;
+        }
+    }
+
+    const char *msg = NULL;
+    if (kwarg_unpacking) {
+        msg = "positional argument follows keyword argument unpacking";
+    } else {
+        msg = "positional argument follows keyword argument";
+    }
+
+    return RAISE_SYNTAX_ERROR(msg);
+}
+
+void *
+_PyPegen_nonparen_genexp_in_call(Parser *p, expr_ty args, asdl_comprehension_seq *comprehensions)
+{
+    /* The rule that calls this function is 'args for_if_clauses'.
+       For the input f(L, x for x in y), L and x are in args and
+       the for is parsed as a for_if_clause. We have to check if
+       len <= 1, so that input like dict((a, b) for a, b in x)
+       gets successfully parsed and then we pass the last
+       argument (x in the above example) as the location of the
+       error */
+    Py_ssize_t len = asdl_seq_LEN(args->v.Call.args);
+    if (len <= 1) {
+        return NULL;
+    }
+
+    comprehension_ty last_comprehension = PyPegen_last_item(comprehensions, comprehension_ty);
+
+    return RAISE_SYNTAX_ERROR_KNOWN_RANGE(
+        (expr_ty) asdl_seq_GET(args->v.Call.args, len - 1),
+        _PyPegen_get_last_comprehension_item(last_comprehension),
+        "Generator expression must be parenthesized"
+    );
+}
\ No newline at end of file
diff --git a/Parser/pegen.c b/Parser/pegen.c
index b760730189073..4f51c63c44353 100644
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@@ -1,432 +1,21 @@
 #include <Python.h>
 #include "pycore_ast.h"           // _PyAST_Validate(),
 #include <errcode.h>
-#include "tokenizer.h"
 
+#include "tokenizer.h"
 #include "pegen.h"
-#include "string_parser.h"
-
-PyObject *
-_PyPegen_new_type_comment(Parser *p, const char *s)
-{
-    PyObject *res = PyUnicode_DecodeUTF8(s, strlen(s), NULL);
-    if (res == NULL) {
-        return NULL;
-    }
-    if (_PyArena_AddPyObject(p->arena, res) < 0) {
-        Py_DECREF(res);
-        return NULL;
-    }
-    return res;
-}
-
-arg_ty
-_PyPegen_add_type_comment_to_arg(Parser *p, arg_ty a, Token *tc)
-{
-    if (tc == NULL) {
-        return a;
-    }
-    const char *bytes = PyBytes_AsString(tc->bytes);
-    if (bytes == NULL) {
-        return NULL;
-    }
-    PyObject *tco = _PyPegen_new_type_comment(p, bytes);
-    if (tco == NULL) {
-        return NULL;
-    }
-    return _PyAST_arg(a->arg, a->annotation, tco,
-                      a->lineno, a->col_offset, a->end_lineno, a->end_col_offset,
-                      p->arena);
-}
-
-static int
-init_normalization(Parser *p)
-{
-    if (p->normalize) {
-        return 1;
-    }
-    PyObject *m = PyImport_ImportModuleNoBlock("unicodedata");
-    if (!m)
-    {
-        return 0;
-    }
-    p->normalize = PyObject_GetAttrString(m, "normalize");
-    Py_DECREF(m);
-    if (!p->normalize)
-    {
-        return 0;
-    }
-    return 1;
-}
-
-/* Checks if the NOTEQUAL token is valid given the current parser flags
-0 indicates success and nonzero indicates failure (an exception may be set) */
-int
-_PyPegen_check_barry_as_flufl(Parser *p, Token* t) {
-    assert(t->bytes != NULL);
-    assert(t->type == NOTEQUAL);
-
-    const char* tok_str = PyBytes_AS_STRING(t->bytes);
-    if (p->flags & PyPARSE_BARRY_AS_BDFL && strcmp(tok_str, "<>") != 0) {
-        RAISE_SYNTAX_ERROR("with Barry as BDFL, use '<>' instead of '!='");
-        return -1;
-    }
-    if (!(p->flags & PyPARSE_BARRY_AS_BDFL)) {
-        return strcmp(tok_str, "!=");
-    }
-    return 0;
-}
-
-int
-_PyPegen_check_legacy_stmt(Parser *p, expr_ty name) {
-    if (name->kind != Name_kind) {
-        return 0;
-    }
-    const char* candidates[2] = {"print", "exec"};
-    for (int i=0; i<2; i++) {
-        if (PyUnicode_CompareWithASCIIString(name->v.Name.id, candidates[i]) == 0) {
-            return 1;
-        }
-    }
-    return 0;
-}
-
-PyObject *
-_PyPegen_new_identifier(Parser *p, const char *n)
-{
-    PyObject *id = PyUnicode_DecodeUTF8(n, strlen(n), NULL);
-    if (!id) {
-        goto error;
-    }
-    /* PyUnicode_DecodeUTF8 should always return a ready string. */
-    assert(PyUnicode_IS_READY(id));
-    /* Check whether there are non-ASCII characters in the
-       identifier; if so, normalize to NFKC. */
-    if (!PyUnicode_IS_ASCII(id))
-    {
-        PyObject *id2;
-        if (!init_normalization(p))
-        {
-            Py_DECREF(id);
-            goto error;
-        }
-        PyObject *form = PyUnicode_InternFromString("NFKC");
-        if (form == NULL)
-        {
-            Py_DECREF(id);
-            goto error;
-        }
-        PyObject *args[2] = {form, id};
-        id2 = _PyObject_FastCall(p->normalize, args, 2);
-        Py_DECREF(id);
-        Py_DECREF(form);
-        if (!id2) {
-            goto error;
-        }
-        if (!PyUnicode_Check(id2))
-        {
-            PyErr_Format(PyExc_TypeError,
-                         "unicodedata.normalize() must return a string, not "
-                         "%.200s",
-                         _PyType_Name(Py_TYPE(id2)));
-            Py_DECREF(id2);
-            goto error;
-        }
-        id = id2;
-    }
-    PyUnicode_InternInPlace(&id);
-    if (_PyArena_AddPyObject(p->arena, id) < 0)
-    {
-        Py_DECREF(id);
-        goto error;
-    }
-    return id;
-
-error:
-    p->error_indicator = 1;
-    return NULL;
-}
-
-static PyObject *
-_create_dummy_identifier(Parser *p)
-{
-    return _PyPegen_new_identifier(p, "");
-}
-
-const char *
-_PyPegen_get_expr_name(expr_ty e)
-{
-    assert(e != NULL);
-    switch (e->kind) {
-        case Attribute_kind:
-            return "attribute";
-        case Subscript_kind:
-            return "subscript";
-        case Starred_kind:
-            return "starred";
-        case Name_kind:
-            return "name";
-        case List_kind:
-            return "list";
-        case Tuple_kind:
-            return "tuple";
-        case Lambda_kind:
-            return "lambda";
-        case Call_kind:
-            return "function call";
-        case BoolOp_kind:
-        case BinOp_kind:
-        case UnaryOp_kind:
-            return "expression";
-        case GeneratorExp_kind:
-            return "generator expression";
-        case Yield_kind:
-        case YieldFrom_kind:
-            return "yield expression";
-        case Await_kind:
-            return "await expression";
-        case ListComp_kind:
-            return "list comprehension";
-        case SetComp_kind:
-            return "set comprehension";
-        case DictComp_kind:
-            return "dict comprehension";
-        case Dict_kind:
-            return "dict literal";
-        case Set_kind:
-            return "set display";
-        case JoinedStr_kind:
-        case FormattedValue_kind:
-            return "f-string expression";
-        case Constant_kind: {
-            PyObject *value = e->v.Constant.value;
-            if (value == Py_None) {
-                return "None";
-            }
-            if (value == Py_False) {
-                return "False";
-            }
-            if (value == Py_True) {
-                return "True";
-            }
-            if (value == Py_Ellipsis) {
-                return "ellipsis";
-            }
-            return "literal";
-        }
-        case Compare_kind:
-            return "comparison";
-        case IfExp_kind:
-            return "conditional expression";
-        case NamedExpr_kind:
-            return "named expression";
-        default:
-            PyErr_Format(PyExc_SystemError,
-                         "unexpected expression in assignment %d (line %d)",
-                         e->kind, e->lineno);
-            return NULL;
-    }
-}
-
-static int
-raise_decode_error(Parser *p)
-{
-    assert(PyErr_Occurred());
-    const char *errtype = NULL;
-    if (PyErr_ExceptionMatches(PyExc_UnicodeError)) {
-        errtype = "unicode error";
-    }
-    else if (PyErr_ExceptionMatches(PyExc_ValueError)) {
-        errtype = "value error";
-    }
-    if (errtype) {
-        PyObject *type;
-        PyObject *value;
-        PyObject *tback;
-        PyObject *errstr;
-        PyErr_Fetch(&type, &value, &tback);
-        errstr = PyObject_Str(value);
-        if (errstr) {
-            RAISE_SYNTAX_ERROR("(%s) %U", errtype, errstr);
-            Py_DECREF(errstr);
-        }
-        else {
-            PyErr_Clear();
-            RAISE_SYNTAX_ERROR("(%s) unknown error", errtype);
-        }
-        Py_XDECREF(type);
-        Py_XDECREF(value);
-        Py_XDECREF(tback);
-    }
-
-    return -1;
-}
-
-static inline void
-raise_unclosed_parentheses_error(Parser *p) {
-       int error_lineno = p->tok->parenlinenostack[p->tok->level-1];
-       int error_col = p->tok->parencolstack[p->tok->level-1];
-       RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError,
-                                  error_lineno, error_col, error_lineno, -1,
-                                  "'%c' was never closed",
-                                  p->tok->parenstack[p->tok->level-1]);
-}
-
-static void
-raise_tokenizer_init_error(PyObject *filename)
-{
-    if (!(PyErr_ExceptionMatches(PyExc_LookupError)
-          || PyErr_ExceptionMatches(PyExc_SyntaxError)
-          || PyErr_ExceptionMatches(PyExc_ValueError)
-          || PyErr_ExceptionMatches(PyExc_UnicodeDecodeError))) {
-        return;
-    }
-    PyObject *errstr = NULL;
-    PyObject *tuple = NULL;
-    PyObject *type;
-    PyObject *value;
-    PyObject *tback;
-    PyErr_Fetch(&type, &value, &tback);
-    errstr = PyObject_Str(value);
-    if (!errstr) {
-        goto error;
-    }
-
-    PyObject *tmp = Py_BuildValue("(OiiO)", filename, 0, -1, Py_None);
-    if (!tmp) {
-        goto error;
-    }
-
-    tuple = PyTuple_Pack(2, errstr, tmp);
-    Py_DECREF(tmp);
-    if (!value) {
-        goto error;
-    }
-    PyErr_SetObject(PyExc_SyntaxError, tuple);
-
-error:
-    Py_XDECREF(type);
-    Py_XDECREF(value);
-    Py_XDECREF(tback);
-    Py_XDECREF(errstr);
-    Py_XDECREF(tuple);
-}
-
-static int
-tokenizer_error(Parser *p)
-{
-    if (PyErr_Occurred()) {
-        return -1;
-    }
-
-    const char *msg = NULL;
-    PyObject* errtype = PyExc_SyntaxError;
-    Py_ssize_t col_offset = -1;
-    switch (p->tok->done) {
-        case E_TOKEN:
-            msg = "invalid token";
-            break;
-        case E_EOF:
-            if (p->tok->level) {
-                raise_unclosed_parentheses_error(p);
-            } else {
-                RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
-            }
-            return -1;
-        case E_DEDENT:
-            RAISE_INDENTATION_ERROR("unindent does not match any outer indentation level");
-            return -1;
-        case E_INTR:
-            if (!PyErr_Occurred()) {
-                PyErr_SetNone(PyExc_KeyboardInterrupt);
-            }
-            return -1;
-        case E_NOMEM:
-            PyErr_NoMemory();
-            return -1;
-        case E_TABSPACE:
-            errtype = PyExc_TabError;
-            msg = "inconsistent use of tabs and spaces in indentation";
-            break;
-        case E_TOODEEP:
-            errtype = PyExc_IndentationError;
-            msg = "too many levels of indentation";
-            break;
-        case E_LINECONT: {
-            col_offset = p->tok->cur - p->tok->buf - 1;
-            msg = "unexpected character after line continuation character";
-            break;
-        }
-        default:
-            msg = "unknown parsing error";
-    }
 
-    RAISE_ERROR_KNOWN_LOCATION(p, errtype, p->tok->lineno,
-                               col_offset >= 0 ? col_offset : 0,
-                               p->tok->lineno, -1, msg);
-    return -1;
-}
+// Internal parser functions
 
-void *
-_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
+asdl_stmt_seq*
+_PyPegen_interactive_exit(Parser *p)
 {
-    if (p->fill == 0) {
-        va_list va;
-        va_start(va, errmsg);
-        _PyPegen_raise_error_known_location(p, errtype, 0, 0, 0, -1, errmsg, va);
-        va_end(va);
-        return NULL;
-    }
-
-    Token *t = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1];
-    Py_ssize_t col_offset;
-    Py_ssize_t end_col_offset = -1;
-    if (t->col_offset == -1) {
-        if (p->tok->cur == p->tok->buf) {
-            col_offset = 0;
-        } else {
-            const char* start = p->tok->buf  ? p->tok->line_start : p->tok->buf;
-            col_offset = Py_SAFE_DOWNCAST(p->tok->cur - start, intptr_t, int);
-        }
-    } else {
-        col_offset = t->col_offset + 1;
-    }
-
-    if (t->end_col_offset != -1) {
-        end_col_offset = t->end_col_offset + 1;
+    if (p->errcode) {
+        *(p->errcode) = E_EOF;
     }
-
-    va_list va;
-    va_start(va, errmsg);
-    _PyPegen_raise_error_known_location(p, errtype, t->lineno, col_offset, t->end_lineno, end_col_offset, errmsg, va);
-    va_end(va);
-
     return NULL;
 }
 
-static PyObject *
-get_error_line(Parser *p, Py_ssize_t lineno)
-{
-    /* If the file descriptor is interactive, the source lines of the current
-     * (multi-line) statement are stored in p->tok->interactive_src_start.
-     * If not, we're parsing from a string, which means that the whole source
-     * is stored in p->tok->str. */
-    assert(p->tok->fp == NULL || p->tok->fp == stdin);
-
-    char *cur_line = p->tok->fp_interactive ? p->tok->interactive_src_start : p->tok->str;
-    assert(cur_line != NULL);
-
-    for (int i = 0; i < lineno - 1; i++) {
-        cur_line = strchr(cur_line, '\n') + 1;
-    }
-
-    char *next_newline;
-    if ((next_newline = strchr(cur_line, '\n')) == NULL) { // This is the last line
-        next_newline = cur_line + strlen(cur_line);
-    }
-    return PyUnicode_DecodeUTF8(cur_line, next_newline - cur_line, "replace");
-}
-
 Py_ssize_t
 _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset)
 {
@@ -448,127 +37,6 @@ _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset)
     return size;
 }
 
-void *
-_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
-                                    Py_ssize_t lineno, Py_ssize_t col_offset,
-                                    Py_ssize_t end_lineno, Py_ssize_t end_col_offset,
-                                    const char *errmsg, va_list va)
-{
-    PyObject *value = NULL;
-    PyObject *errstr = NULL;
-    PyObject *error_line = NULL;
-    PyObject *tmp = NULL;
-    p->error_indicator = 1;
-
-    if (end_lineno == CURRENT_POS) {
-        end_lineno = p->tok->lineno;
-    }
-    if (end_col_offset == CURRENT_POS) {
-        end_col_offset = p->tok->cur - p->tok->line_start;
-    }
-
-    if (p->start_rule == Py_fstring_input) {
-        const char *fstring_msg = "f-string: ";
-        Py_ssize_t len = strlen(fstring_msg) + strlen(errmsg);
-
-        char *new_errmsg = PyMem_Malloc(len + 1); // Lengths of both strings plus NULL character
-        if (!new_errmsg) {
-            return (void *) PyErr_NoMemory();
-        }
-
-        // Copy both strings into new buffer
-        memcpy(new_errmsg, fstring_msg, strlen(fstring_msg));
-        memcpy(new_errmsg + strlen(fstring_msg), errmsg, strlen(errmsg));
-        new_errmsg[len] = 0;
-        errmsg = new_errmsg;
-    }
-    errstr = PyUnicode_FromFormatV(errmsg, va);
-    if (!errstr) {
-        goto error;
-    }
-
-    if (p->tok->fp_interactive) {
-        error_line = get_error_line(p, lineno);
-    }
-    else if (p->start_rule == Py_file_input) {
-        error_line = _PyErr_ProgramDecodedTextObject(p->tok->filename,
-                                                     (int) lineno, p->tok->encoding);
-    }
-
-    if (!error_line) {
-        /* PyErr_ProgramTextObject was not called or returned NULL. If it was not called,
-           then we need to find the error line from some other source, because
-           p->start_rule != Py_file_input. If it returned NULL, then it either unexpectedly
-           failed or we're parsing from a string or the REPL. There's a third edge case where
-           we're actually parsing from a file, which has an E_EOF SyntaxError and in that case
-           `PyErr_ProgramTextObject` fails because lineno points to last_file_line + 1, which
-           does not physically exist */
-        assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF);
-
-        if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) {
-            Py_ssize_t size = p->tok->inp - p->tok->buf;
-            error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
-        }
-        else if (p->tok->fp == NULL || p->tok->fp == stdin) {
-            error_line = get_error_line(p, lineno);
-        }
-        else {
-            error_line = PyUnicode_FromStringAndSize("", 0);
-        }
-        if (!error_line) {
-            goto error;
-        }
-    }
-
-    if (p->start_rule == Py_fstring_input) {
-        col_offset -= p->starting_col_offset;
-        end_col_offset -= p->starting_col_offset;
-    }
-
-    Py_ssize_t col_number = col_offset;
-    Py_ssize_t end_col_number = end_col_offset;
-
-    if (p->tok->encoding != NULL) {
-        col_number = _PyPegen_byte_offset_to_character_offset(error_line, col_offset);
-        if (col_number < 0) {
-            goto error;
-        }
-        if (end_col_number > 0) {
-            Py_ssize_t end_col_offset = _PyPegen_byte_offset_to_character_offset(error_line, end_col_number);
-            if (end_col_offset < 0) {
-                goto error;
-            } else {
-                end_col_number = end_col_offset;
-            }
-        }
-    }
-    tmp = Py_BuildValue("(OiiNii)", p->tok->filename, lineno, col_number, error_line, end_lineno, end_col_number);
-    if (!tmp) {
-        goto error;
-    }
-    value = PyTuple_Pack(2, errstr, tmp);
-    Py_DECREF(tmp);
-    if (!value) {
-        goto error;
-    }
-    PyErr_SetObject(errtype, value);
-
-    Py_DECREF(errstr);
-    Py_DECREF(value);
-    if (p->start_rule == Py_fstring_input) {
-        PyMem_Free((void *)errmsg);
-    }
-    return NULL;
-
-error:
-    Py_XDECREF(errstr);
-    Py_XDECREF(error_line);
-    if (p->start_rule == Py_fstring_input) {
-        PyMem_Free((void *)errmsg);
-    }
-    return NULL;
-}
-
 #if 0
 static const char *
 token_name(int type)
@@ -614,39 +82,24 @@ _PyPegen_update_memo(Parser *p, int mark, int type, void *node)
     return _PyPegen_insert_memo(p, mark, type, node);
 }
 
-// Return dummy NAME.
-void *
-_PyPegen_dummy_name(Parser *p, ...)
+static int
+init_normalization(Parser *p)
 {
-    static void *cache = NULL;
-
-    if (cache != NULL) {
-        return cache;
+    if (p->normalize) {
+        return 1;
     }
-
-    PyObject *id = _create_dummy_identifier(p);
-    if (!id) {
-        return NULL;
+    PyObject *m = PyImport_ImportModuleNoBlock("unicodedata");
+    if (!m)
+    {
+        return 0;
     }
-    cache = _PyAST_Name(id, Load, 1, 0, 1, 0, p->arena);
-    return cache;
-}
-
-static int
-_get_keyword_or_name_type(Parser *p, const char *name, int name_len)
-{
-    assert(name_len > 0);
-    if (name_len >= p->n_keyword_lists ||
-        p->keywords[name_len] == NULL ||
-        p->keywords[name_len]->type == -1) {
-        return NAME;
-    }
-    for (KeywordToken *k = p->keywords[name_len]; k != NULL && k->type != -1; k++) {
-        if (strncmp(k->str, name, name_len) == 0) {
-            return k->type;
-        }
+    p->normalize = PyObject_GetAttrString(m, "normalize");
+    Py_DECREF(m);
+    if (!p->normalize)
+    {
+        return 0;
     }
-    return NAME;
+    return 1;
 }
 
 static int
@@ -685,6 +138,23 @@ growable_comment_array_deallocate(growable_comment_array *arr) {
     PyMem_Free(arr->items);
 }
 
+static int
+_get_keyword_or_name_type(Parser *p, const char *name, int name_len)
+{
+    assert(name_len > 0);
+    if (name_len >= p->n_keyword_lists ||
+        p->keywords[name_len] == NULL ||
+        p->keywords[name_len]->type == -1) {
+        return NAME;
+    }
+    for (KeywordToken *k = p->keywords[name_len]; k != NULL && k->type != -1; k++) {
+        if (strncmp(k->str, name, name_len) == 0) {
+            return k->type;
+        }
+    }
+    return NAME;
+}
+
 static int
 initialize_token(Parser *p, Token *token, const char *start, const char *end, int token_type) {
     assert(token != NULL);
@@ -715,10 +185,10 @@ initialize_token(Parser *p, Token *token, const char *start, const char *end, in
     p->fill += 1;
 
     if (token_type == ERRORTOKEN && p->tok->done == E_DECODE) {
-        return raise_decode_error(p);
+        return _Pypegen_raise_decode_error(p);
     }
 
-    return (token_type == ERRORTOKEN ? tokenizer_error(p) : 0);
+    return (token_type == ERRORTOKEN ? _Pypegen_tokenizer_error(p) : 0);
 }
 
 static int
@@ -791,7 +261,6 @@ _PyPegen_fill_token(Parser *p)
     return initialize_token(p, t, start, end, type);
 }
 
-
 #if defined(Py_DEBUG)
 // Instrumentation to count the effectiveness of memoization.
 // The array counts the number of tokens skipped by memoization,
@@ -989,6 +458,62 @@ _PyPegen_get_last_nonnwhitespace_token(Parser *p)
     return token;
 }
 
+PyObject *
+_PyPegen_new_identifier(Parser *p, const char *n)
+{
+    PyObject *id = PyUnicode_DecodeUTF8(n, strlen(n), NULL);
+    if (!id) {
+        goto error;
+    }
+    /* PyUnicode_DecodeUTF8 should always return a ready string. */
+    assert(PyUnicode_IS_READY(id));
+    /* Check whether there are non-ASCII characters in the
+       identifier; if so, normalize to NFKC. */
+    if (!PyUnicode_IS_ASCII(id))
+    {
+        PyObject *id2;
+        if (!init_normalization(p))
+        {
+            Py_DECREF(id);
+            goto error;
+        }
+        PyObject *form = PyUnicode_InternFromString("NFKC");
+        if (form == NULL)
+        {
+            Py_DECREF(id);
+            goto error;
+        }
+        PyObject *args[2] = {form, id};
+        id2 = _PyObject_FastCall(p->normalize, args, 2);
+        Py_DECREF(id);
+        Py_DECREF(form);
+        if (!id2) {
+            goto error;
+        }
+        if (!PyUnicode_Check(id2))
+        {
+            PyErr_Format(PyExc_TypeError,
+                         "unicodedata.normalize() must return a string, not "
+                         "%.200s",
+                         _PyType_Name(Py_TYPE(id2)));
+            Py_DECREF(id2);
+            goto error;
+        }
+        id = id2;
+    }
+    PyUnicode_InternInPlace(&id);
+    if (_PyArena_AddPyObject(p->arena, id) < 0)
+    {
+        Py_DECREF(id);
+        goto error;
+    }
+    return id;
+
+error:
+    p->error_indicator = 1;
+    return NULL;
+}
+
 static expr_ty
 _PyPegen_name_from_token(Parser *p, Token* t)
 {
@@ -1009,7 +534,6 @@ _PyPegen_name_from_token(Parser *p, Token* t)
                        t->end_col_offset, p->arena);
 }
 
-
 expr_ty
 _PyPegen_name_token(Parser *p)
 {
@@ -1023,7 +547,6 @@ _PyPegen_string_token(Parser *p)
     return _PyPegen_expect_token(p, STRING);
 }
 
-
 expr_ty _PyPegen_soft_keyword_token(Parser *p) {
     Token *t = _PyPegen_expect_token(p, NAME);
     if (t == NULL) {
@@ -1197,18 +720,6 @@ bad_single_statement(Parser *p)
     }
 }
 
-void
-_PyPegen_Parser_Free(Parser *p)
-{
-    Py_XDECREF(p->normalize);
-    for (int i = 0; i < p->size; i++) {
-        PyMem_Free(p->tokens[i]);
-    }
-    PyMem_Free(p->tokens);
-    growable_comment_array_deallocate(&p->type_ignore_comments);
-    PyMem_Free(p);
-}
-
 static int
 compute_parser_flags(PyCompilerFlags *flags)
 {
@@ -1234,6 +745,8 @@ compute_parser_flags(PyCompilerFlags *flags)
     return parser_flags;
 }
 
+// Parser API
+
 Parser *
 _PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags,
                     int feature_version, int *errcode, PyArena *arena)
@@ -1289,8 +802,20 @@ _PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags,
     return p;
 }
 
+void
+_PyPegen_Parser_Free(Parser *p)
+{
+    Py_XDECREF(p->normalize);
+    for (int i = 0; i < p->size; i++) {
+        PyMem_Free(p->tokens[i]);
+    }
+    PyMem_Free(p->tokens);
+    growable_comment_array_deallocate(&p->type_ignore_comments);
+    PyMem_Free(p);
+}
+
 static void
-reset_parser_state(Parser *p)
+reset_parser_state_for_error_pass(Parser *p)
 {
     for (int i = 0; i < p->fill; i++) {
         p->tokens[i]->memo = NULL;
@@ -1302,60 +827,6 @@ reset_parser_state(Parser *p)
     p->tok->interactive_underflow = IUNDERFLOW_STOP;
 }
 
-static int
-_PyPegen_check_tokenizer_errors(Parser *p) {
-    // Tokenize the whole input to see if there are any tokenization
-    // errors such as mistmatching parentheses. These will get priority
-    // over generic syntax errors only if the line number of the error is
-    // before the one that we had for the generic error.
-
-    // We don't want to tokenize to the end for interactive input
-    if (p->tok->prompt != NULL) {
-        return 0;
-    }
-
-    PyObject *type, *value, *traceback;
-    PyErr_Fetch(&type, &value, &traceback);
-
-    Token *current_token = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1];
-    Py_ssize_t current_err_line = current_token->lineno;
-
-    int ret = 0;
-
-    for (;;) {
-        const char *start;
-        const char *end;
-        switch (_PyTokenizer_Get(p->tok, &start, &end)) {
-            case ERRORTOKEN:
-                if (p->tok->level != 0) {
-                    int error_lineno = p->tok->parenlinenostack[p->tok->level-1];
-                    if (current_err_line > error_lineno) {
-                        raise_unclosed_parentheses_error(p);
-                        ret = -1;
-                        goto exit;
-                    }
-                }
-                break;
-            case ENDMARKER:
-                break;
-            default:
-                continue;
-        }
-        break;
-    }
-
-
-exit:
-    if (PyErr_Occurred()) {
-        Py_XDECREF(value);
-        Py_XDECREF(type);
-        Py_XDECREF(traceback);
-    } else {
-        PyErr_Restore(type, value, traceback);
-    }
-    return ret;
-}
-
 void *
 _PyPegen_run_parser(Parser *p)
 {
@@ -1364,46 +835,17 @@ _PyPegen_run_parser(Parser *p)
         if (PyErr_Occurred() && !PyErr_ExceptionMatches(PyExc_SyntaxError)) {
             return NULL;
         }
+        // Make a second parser pass. In this pass we activate heavier and slower checks
+        // to produce better error messages and more complete diagnostics. Extra "invalid_*"
+        // rules will be active during parsing.
         Token *last_token = p->tokens[p->fill - 1];
-        reset_parser_state(p);
+        reset_parser_state_for_error_pass(p);
         _PyPegen_parse(p);
-        if (PyErr_Occurred()) {
-            // Prioritize tokenizer errors to custom syntax errors raised
-            // on the second phase only if the errors come from the parser.
-            if (p->tok->done == E_DONE && PyErr_ExceptionMatches(PyExc_SyntaxError)) {
-                _PyPegen_check_tokenizer_errors(p);
-            }
-            return NULL;
-        }
-        if (p->fill == 0) {
-            RAISE_SYNTAX_ERROR("error at start before reading any input");
-        }
-        else if (p->tok->done == E_EOF) {
-            if (p->tok->level) {
-                raise_unclosed_parentheses_error(p);
-            } else {
-                RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
-            }
-        }
-        else {
-            if (p->tokens[p->fill-1]->type == INDENT) {
-                RAISE_INDENTATION_ERROR("unexpected indent");
-            }
-            else if (p->tokens[p->fill-1]->type == DEDENT) {
-                RAISE_INDENTATION_ERROR("unexpected unindent");
-            }
-            else {
-                // Use the last token we found on the first pass to avoid reporting
-                // incorrect locations for generic syntax errors just because we reached
-                // further away when trying to find specific syntax errors in the second
-                // pass.
-                RAISE_SYNTAX_ERROR_KNOWN_LOCATION(last_token, "invalid syntax");
-                // _PyPegen_check_tokenizer_errors will override the existing
-                // generic SyntaxError we just raised if errors are found.
-                _PyPegen_check_tokenizer_errors(p);
-            }
-        }
-        return NULL;
+
+        // Set SyntaxErrors accordingly depending on the parser/tokenizer status at the failure
+        // point.
+        _Pypegen_set_syntax_error(p, last_token);
+       return NULL;
     }
 
     if (p->start_rule == Py_single_input && bad_single_statement(p)) {
@@ -1433,7 +875,7 @@ _PyPegen_run_parser_from_file_pointer(FILE *fp, int start_rule, PyObject *filena
     struct tok_state *tok = _PyTokenizer_FromFile(fp, enc, ps1, ps2);
     if (tok == NULL) {
         if (PyErr_Occurred()) {
-            raise_tokenizer_init_error(filename_ob);
+            _PyPegen_raise_tokenizer_init_error(filename_ob);
             return NULL;
         }
         return NULL;
@@ -1478,7 +920,7 @@ _PyPegen_run_parser_from_string(const char *str, int start_rule, PyObject *filen
     }
     if (tok == NULL) {
         if (PyErr_Occurred()) {
-            raise_tokenizer_init_error(filename_ob);
+            _PyPegen_raise_tokenizer_init_error(filename_ob);
         }
         return NULL;
     }
@@ -1504,1138 +946,4 @@ _PyPegen_run_parser_from_string(const char *str, int start_rule, PyObject *filen
 error:
     _PyTokenizer_Free(tok);
     return result;
-}
-
-asdl_stmt_seq*
-_PyPegen_interactive_exit(Parser *p)
-{
-    if (p->errcode) {
-        *(p->errcode) = E_EOF;
-    }
-    return NULL;
-}
-
-/* Creates a single-element asdl_seq* that contains a */
-asdl_seq *
-_PyPegen_singleton_seq(Parser *p, void *a)
-{
-    assert(a != NULL);
-    asdl_seq *seq = (asdl_seq*)_Py_asdl_generic_seq_new(1, p->arena);
-    if (!seq) {
-        return NULL;
-    }
-    asdl_seq_SET_UNTYPED(seq, 0, a);
-    return seq;
-}
-
-/* Creates a copy of seq and prepends a to it */
-asdl_seq *
-_PyPegen_seq_insert_in_front(Parser *p, void *a, asdl_seq *seq)
-{
-    assert(a != NULL);
-    if (!seq) {
-        return _PyPegen_singleton_seq(p, a);
-    }
-
-    asdl_seq *new_seq = (asdl_seq*)_Py_asdl_generic_seq_new(asdl_seq_LEN(seq) + 1, p->arena);
-    if (!new_seq) {
-        return NULL;
-    }
-
-    asdl_seq_SET_UNTYPED(new_seq, 0, a);
-    for (Py_ssize_t i = 1, l = asdl_seq_LEN(new_seq); i < l; i++) {
-        asdl_seq_SET_UNTYPED(new_seq, i, asdl_seq_GET_UNTYPED(seq, i - 1));
-    }
-    return new_seq;
-}
-
-/* Creates a copy of seq and appends a to it */
-asdl_seq *
-_PyPegen_seq_append_to_end(Parser *p, asdl_seq *seq, void *a)
-{
-    assert(a != NULL);
-    if (!seq) {
-        return _PyPegen_singleton_seq(p, a);
-    }
-
-    asdl_seq *new_seq = (asdl_seq*)_Py_asdl_generic_seq_new(asdl_seq_LEN(seq) + 1, p->arena);
-    if (!new_seq) {
-        return NULL;
-    }
-
-    for (Py_ssize_t i = 0, l = asdl_seq_LEN(new_seq); i + 1 < l; i++) {
-        asdl_seq_SET_UNTYPED(new_seq, i, asdl_seq_GET_UNTYPED(seq, i));
-    }
-    asdl_seq_SET_UNTYPED(new_seq, asdl_seq_LEN(new_seq) - 1, a);
-    return new_seq;
-}
-
-static Py_ssize_t
-_get_flattened_seq_size(asdl_seq *seqs)
-{
-    Py_ssize_t size = 0;
-    for (Py_ssize_t i = 0, l = asdl_seq_LEN(seqs); i < l; i++) {
-        asdl_seq *inner_seq = asdl_seq_GET_UNTYPED(seqs, i);
-        size += asdl_seq_LEN(inner_seq);
-    }
-    return size;
-}
-
-/* Flattens an asdl_seq* of asdl_seq*s */
-asdl_seq *
-_PyPegen_seq_flatten(Parser *p, asdl_seq *seqs)
-{
-    Py_ssize_t flattened_seq_size = _get_flattened_seq_size(seqs);
-    assert(flattened_seq_size > 0);
-
-    asdl_seq *flattened_seq = (asdl_seq*)_Py_asdl_generic_seq_new(flattened_seq_size, p->arena);
-    if (!flattened_seq) {
-        return NULL;
-    }
-
-    int flattened_seq_idx = 0;
-    for (Py_ssize_t i = 0, l = asdl_seq_LEN(seqs); i < l; i++) {
-        asdl_seq *inner_seq = asdl_seq_GET_UNTYPED(seqs, i);
-        for (Py_ssize_t j = 0, li = asdl_seq_LEN(inner_seq); j < li; j++) {
-            asdl_seq_SET_UNTYPED(flattened_seq, flattened_seq_idx++, asdl_seq_GET_UNTYPED(inner_seq, j));
-        }
-    }
-    assert(flattened_seq_idx == flattened_seq_size);
-
-    return flattened_seq;
-}
-
-void *
-_PyPegen_seq_last_item(asdl_seq *seq)
-{
-    Py_ssize_t len = asdl_seq_LEN(seq);
-    return asdl_seq_GET_UNTYPED(seq, len - 1);
-}
-
-void *
-_PyPegen_seq_first_item(asdl_seq *seq)
-{
-    return asdl_seq_GET_UNTYPED(seq, 0);
-}
-
-
-/* Creates a new name of the form <first_name>.<second_name> */
-expr_ty
-_PyPegen_join_names_with_dot(Parser *p, expr_ty first_name, expr_ty second_name)
-{
-    assert(first_name != NULL && second_name != NULL);
-    PyObject *first_identifier = first_name->v.Name.id;
-    PyObject *second_identifier = second_name->v.Name.id;
-
-    if (PyUnicode_READY(first_identifier) == -1) {
-        return NULL;
-    }
-    if (PyUnicode_READY(second_identifier) == -1) {
-        return NULL;
-    }
-    const char *first_str = PyUnicode_AsUTF8(first_identifier);
-    if (!first_str) {
-        return NULL;
-    }
-    const char *second_str = PyUnicode_AsUTF8(second_identifier);
-    if (!second_str) {
-        return NULL;
-    }
-    Py_ssize_t len = strlen(first_str) + strlen(second_str) + 1;  // +1 for the dot
-
-    PyObject *str = PyBytes_FromStringAndSize(NULL, len);
-    if (!str) {
-        return NULL;
-    }
-
-    char *s = PyBytes_AS_STRING(str);
-    if (!s) {
-        return NULL;
-    }
-
-    strcpy(s, first_str);
-    s += strlen(first_str);
-    *s++ = '.';
-    strcpy(s, second_str);
-    s += strlen(second_str);
-    *s = '\0';
-
-    PyObject *uni = PyUnicode_DecodeUTF8(PyBytes_AS_STRING(str), PyBytes_GET_SIZE(str), NULL);
-    Py_DECREF(str);
-    if (!uni) {
-        return NULL;
-    }
-    PyUnicode_InternInPlace(&uni);
-    if (_PyArena_AddPyObject(p->arena, uni) < 0) {
-        Py_DECREF(uni);
-        return NULL;
-    }
-
-    return _PyAST_Name(uni, Load, EXTRA_EXPR(first_name, second_name));
-}
-
-/* Counts the total number of dots in seq's tokens */
-int
-_PyPegen_seq_count_dots(asdl_seq *seq)
-{
-    int number_of_dots = 0;
-    for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
-        Token *current_expr = asdl_seq_GET_UNTYPED(seq, i);
-        switch (current_expr->type) {
-            case ELLIPSIS:
-                number_of_dots += 3;
-                break;
-            case DOT:
-                number_of_dots += 1;
-                break;
-            default:
-                Py_UNREACHABLE();
-        }
-    }
-
-    return number_of_dots;
-}
-
-/* Creates an alias with '*' as the identifier name */
-alias_ty
-_PyPegen_alias_for_star(Parser *p, int lineno, int col_offset, int end_lineno,
-                        int end_col_offset, PyArena *arena) {
-    PyObject *str = PyUnicode_InternFromString("*");
-    if (!str) {
-        return NULL;
-    }
-    if (_PyArena_AddPyObject(p->arena, str) < 0) {
-        Py_DECREF(str);
-        return NULL;
-    }
-    return _PyAST_alias(str, NULL, lineno, col_offset, end_lineno, end_col_offset, arena);
-}
-
-/* Creates a new asdl_seq* with the identifiers of all the names in seq */
-asdl_identifier_seq *
-_PyPegen_map_names_to_ids(Parser *p, asdl_expr_seq *seq)
-{
-    Py_ssize_t len = asdl_seq_LEN(seq);
-    assert(len > 0);
-
-    asdl_identifier_seq *new_seq = _Py_asdl_identifier_seq_new(len, p->arena);
-    if (!new_seq) {
-        return NULL;
-    }
-    for (Py_ssize_t i = 0; i < len; i++) {
-        expr_ty e = asdl_seq_GET(seq, i);
-        asdl_seq_SET(new_seq, i, e->v.Name.id);
-    }
-    return new_seq;
-}
-
-/* Constructs a CmpopExprPair */
-CmpopExprPair *
-_PyPegen_cmpop_expr_pair(Parser *p, cmpop_ty cmpop, expr_ty expr)
-{
-    assert(expr != NULL);
-    CmpopExprPair *a = _PyArena_Malloc(p->arena, sizeof(CmpopExprPair));
-    if (!a) {
-        return NULL;
-    }
-    a->cmpop = cmpop;
-    a->expr = expr;
-    return a;
-}
-
-asdl_int_seq *
-_PyPegen_get_cmpops(Parser *p, asdl_seq *seq)
-{
-    Py_ssize_t len = asdl_seq_LEN(seq);
-    assert(len > 0);
-
-    asdl_int_seq *new_seq = _Py_asdl_int_seq_new(len, p->arena);
-    if (!new_seq) {
-        return NULL;
-    }
-    for (Py_ssize_t i = 0; i < len; i++) {
-        CmpopExprPair *pair = asdl_seq_GET_UNTYPED(seq, i);
-        asdl_seq_SET(new_seq, i, pair->cmpop);
-    }
-    return new_seq;
-}
-
-asdl_expr_seq *
-_PyPegen_get_exprs(Parser *p, asdl_seq *seq)
-{
-    Py_ssize_t len = asdl_seq_LEN(seq);
-    assert(len > 0);
-
-    asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
-    if (!new_seq) {
-        return NULL;
-    }
-    for (Py_ssize_t i = 0; i < len; i++) {
-        CmpopExprPair *pair = asdl_seq_GET_UNTYPED(seq, i);
-        asdl_seq_SET(new_seq, i, pair->expr);
-    }
-    return new_seq;
-}
-
-/* Creates an asdl_seq* where all the elements have been changed to have ctx as context */
-static asdl_expr_seq *
-_set_seq_context(Parser *p, asdl_expr_seq *seq, expr_context_ty ctx)
-{
-    Py_ssize_t len = asdl_seq_LEN(seq);
-    if (len == 0) {
-        return NULL;
-    }
-
-    asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
-    if (!new_seq) {
-        return NULL;
-    }
-    for (Py_ssize_t i = 0; i < len; i++) {
-        expr_ty e = asdl_seq_GET(seq, i);
-        asdl_seq_SET(new_seq, i, _PyPegen_set_expr_context(p, e, ctx));
-    }
-    return new_seq;
-}
-
-static expr_ty
-_set_name_context(Parser *p, expr_ty e, expr_context_ty ctx)
-{
-    return _PyAST_Name(e->v.Name.id, ctx, EXTRA_EXPR(e, e));
-}
-
-static expr_ty
-_set_tuple_context(Parser *p, expr_ty e, expr_context_ty ctx)
-{
-    return _PyAST_Tuple(
-            _set_seq_context(p, e->v.Tuple.elts, ctx),
-            ctx,
-            EXTRA_EXPR(e, e));
-}
-
-static expr_ty
-_set_list_context(Parser *p, expr_ty e, expr_context_ty ctx)
-{
-    return _PyAST_List(
-            _set_seq_context(p, e->v.List.elts, ctx),
-            ctx,
-            EXTRA_EXPR(e, e));
-}
-
-static expr_ty
-_set_subscript_context(Parser *p, expr_ty e, expr_context_ty ctx)
-{
-    return _PyAST_Subscript(e->v.Subscript.value, e->v.Subscript.slice,
-                            ctx, EXTRA_EXPR(e, e));
-}
-
-static expr_ty
-_set_attribute_context(Parser *p, expr_ty e, expr_context_ty ctx)
-{
-    return _PyAST_Attribute(e->v.Attribute.value, e->v.Attribute.attr,
-                            ctx, EXTRA_EXPR(e, e));
-}
-
-static expr_ty
-_set_starred_context(Parser *p, expr_ty e, expr_context_ty ctx)
-{
-    return _PyAST_Starred(_PyPegen_set_expr_context(p, e->v.Starred.value, ctx),
-                          ctx, EXTRA_EXPR(e, e));
-}
-
-/* Creates an `expr_ty` equivalent to `expr` but with `ctx` as context */
-expr_ty
-_PyPegen_set_expr_context(Parser *p, expr_ty expr, expr_context_ty ctx)
-{
-    assert(expr != NULL);
-
-    expr_ty new = NULL;
-    switch (expr->kind) {
-        case Name_kind:
-            new = _set_name_context(p, expr, ctx);
-            break;
-        case Tuple_kind:
-            new = _set_tuple_context(p, expr, ctx);
-            break;
-        case List_kind:
-            new = _set_list_context(p, expr, ctx);
-            break;
-        case Subscript_kind:
-            new = _set_subscript_context(p, expr, ctx);
-            break;
-        case Attribute_kind:
-            new = _set_attribute_context(p, expr, ctx);
-            break;
-        case Starred_kind:
-            new = _set_starred_context(p, expr, ctx);
-            break;
-        default:
-            new = expr;
-    }
-    return new;
-}
-
-/* Constructs a KeyValuePair that is used when parsing a dict's key value pairs */
-KeyValuePair *
-_PyPegen_key_value_pair(Parser *p, expr_ty key, expr_ty value)
-{
-    KeyValuePair *a = _PyArena_Malloc(p->arena, sizeof(KeyValuePair));
-    if (!a) {
-        return NULL;
-    }
-    a->key = key;
-    a->value = value;
-    return a;
-}
-
-/* Extracts all keys from an asdl_seq* of KeyValuePair*'s */
-asdl_expr_seq *
-_PyPegen_get_keys(Parser *p, asdl_seq *seq)
-{
-    Py_ssize_t len = asdl_seq_LEN(seq);
-    asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
-    if (!new_seq) {
-        return NULL;
-    }
-    for (Py_ssize_t i = 0; i < len; i++) {
-        KeyValuePair *pair = asdl_seq_GET_UNTYPED(seq, i);
-        asdl_seq_SET(new_seq, i, pair->key);
-    }
-    return new_seq;
-}
-
-/* Extracts all values from an asdl_seq* of KeyValuePair*'s */
-asdl_expr_seq *
-_PyPegen_get_values(Parser *p, asdl_seq *seq)
-{
-    Py_ssize_t len = asdl_seq_LEN(seq);
-    asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
-    if (!new_seq) {
-        return NULL;
-    }
-    for (Py_ssize_t i = 0; i < len; i++) {
-        KeyValuePair *pair = asdl_seq_GET_UNTYPED(seq, i);
-        asdl_seq_SET(new_seq, i, pair->value);
-    }
-    return new_seq;
-}
-
-/* Constructs a KeyPatternPair that is used when parsing mapping & class patterns */
-KeyPatternPair *
-_PyPegen_key_pattern_pair(Parser *p, expr_ty key, pattern_ty pattern)
-{
-    KeyPatternPair *a = _PyArena_Malloc(p->arena, sizeof(KeyPatternPair));
-    if (!a) {
-        return NULL;
-    }
-    a->key = key;
-    a->pattern = pattern;
-    return a;
-}
-
-/* Extracts all keys from an asdl_seq* of KeyPatternPair*'s */
-asdl_expr_seq *
-_PyPegen_get_pattern_keys(Parser *p, asdl_seq *seq)
-{
-    Py_ssize_t len = asdl_seq_LEN(seq);
-    asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(len, p->arena);
-    if (!new_seq) {
-        return NULL;
-    }
-    for (Py_ssize_t i = 0; i < len; i++) {
-        KeyPatternPair *pair = asdl_seq_GET_UNTYPED(seq, i);
-        asdl_seq_SET(new_seq, i, pair->key);
-    }
-    return new_seq;
-}
-
-/* Extracts all patterns from an asdl_seq* of KeyPatternPair*'s */
-asdl_pattern_seq *
-_PyPegen_get_patterns(Parser *p, asdl_seq *seq)
-{
-    Py_ssize_t len = asdl_seq_LEN(seq);
-    asdl_pattern_seq *new_seq = _Py_asdl_pattern_seq_new(len, p->arena);
-    if (!new_seq) {
-        return NULL;
-    }
-    for (Py_ssize_t i = 0; i < len; i++) {
-        KeyPatternPair *pair = asdl_seq_GET_UNTYPED(seq, i);
-        asdl_seq_SET(new_seq, i, pair->pattern);
-    }
-    return new_seq;
-}
-
-/* Constructs a NameDefaultPair */
-NameDefaultPair *
-_PyPegen_name_default_pair(Parser *p, arg_ty arg, expr_ty value, Token *tc)
-{
-    NameDefaultPair *a = _PyArena_Malloc(p->arena, sizeof(NameDefaultPair));
-    if (!a) {
-        return NULL;
-    }
-    a->arg = _PyPegen_add_type_comment_to_arg(p, arg, tc);
-    a->value = value;
-    return a;
-}
-
-/* Constructs a SlashWithDefault */
-SlashWithDefault *
-_PyPegen_slash_with_default(Parser *p, asdl_arg_seq *plain_names, asdl_seq *names_with_defaults)
-{
-    SlashWithDefault *a = _PyArena_Malloc(p->arena, sizeof(SlashWithDefault));
-    if (!a) {
-        return NULL;
-    }
-    a->plain_names = plain_names;
-    a->names_with_defaults = names_with_defaults;
-    return a;
-}
-
-/* Constructs a StarEtc */
-StarEtc *
-_PyPegen_star_etc(Parser *p, arg_ty vararg, asdl_seq *kwonlyargs, arg_ty kwarg)
-{
-    StarEtc *a = _PyArena_Malloc(p->arena, sizeof(StarEtc));
-    if (!a) {
-        return NULL;
-    }
-    a->vararg = vararg;
-    a->kwonlyargs = kwonlyargs;
-    a->kwarg = kwarg;
-    return a;
-}
-
-asdl_seq *
-_PyPegen_join_sequences(Parser *p, asdl_seq *a, asdl_seq *b)
-{
-    Py_ssize_t first_len = asdl_seq_LEN(a);
-    Py_ssize_t second_len = asdl_seq_LEN(b);
-    asdl_seq *new_seq = (asdl_seq*)_Py_asdl_generic_seq_new(first_len + second_len, p->arena);
-    if (!new_seq) {
-        return NULL;
-    }
-
-    int k = 0;
-    for (Py_ssize_t i = 0; i < first_len; i++) {
-        asdl_seq_SET_UNTYPED(new_seq, k++, asdl_seq_GET_UNTYPED(a, i));
-    }
-    for (Py_ssize_t i = 0; i < second_len; i++) {
-        asdl_seq_SET_UNTYPED(new_seq, k++, asdl_seq_GET_UNTYPED(b, i));
-    }
-
-    return new_seq;
-}
-
-static asdl_arg_seq*
-_get_names(Parser *p, asdl_seq *names_with_defaults)
-{
-    Py_ssize_t len = asdl_seq_LEN(names_with_defaults);
-    asdl_arg_seq *seq = _Py_asdl_arg_seq_new(len, p->arena);
-    if (!seq) {
-        return NULL;
-    }
-    for (Py_ssize_t i = 0; i < len; i++) {
-        NameDefaultPair *pair = asdl_seq_GET_UNTYPED(names_with_defaults, i);
-        asdl_seq_SET(seq, i, pair->arg);
-    }
-    return seq;
-}
-
-static asdl_expr_seq *
-_get_defaults(Parser *p, asdl_seq *names_with_defaults)
-{
-    Py_ssize_t len = asdl_seq_LEN(names_with_defaults);
-    asdl_expr_seq *seq = _Py_asdl_expr_seq_new(len, p->arena);
-    if (!seq) {
-        return NULL;
-    }
-    for (Py_ssize_t i = 0; i < len; i++) {
-        NameDefaultPair *pair = asdl_seq_GET_UNTYPED(names_with_defaults, i);
-        asdl_seq_SET(seq, i, pair->value);
-    }
-    return seq;
-}
-
-static int
-_make_posonlyargs(Parser *p,
-                  asdl_arg_seq *slash_without_default,
-                  SlashWithDefault *slash_with_default,
-                  asdl_arg_seq **posonlyargs) {
-    if (slash_without_default != NULL) {
-        *posonlyargs = slash_without_default;
-    }
-    else if (slash_with_default != NULL) {
-        asdl_arg_seq *slash_with_default_names =
-                _get_names(p, slash_with_default->names_with_defaults);
-        if (!slash_with_default_names) {
-            return -1;
-        }
-        *posonlyargs = (asdl_arg_seq*)_PyPegen_join_sequences(
-                p,
-                (asdl_seq*)slash_with_default->plain_names,
-                (asdl_seq*)slash_with_default_names);
-    }
-    else {
-        *posonlyargs = _Py_asdl_arg_seq_new(0, p->arena);
-    }
-    return *posonlyargs == NULL ? -1 : 0;
-}
-
-static int
-_make_posargs(Parser *p,
-              asdl_arg_seq *plain_names,
-              asdl_seq *names_with_default,
-              asdl_arg_seq **posargs) {
-    if (plain_names != NULL && names_with_default != NULL) {
-        asdl_arg_seq *names_with_default_names = _get_names(p, names_with_default);
-        if (!names_with_default_names) {
-            return -1;
-        }
-        *posargs = (asdl_arg_seq*)_PyPegen_join_sequences(
-                p,(asdl_seq*)plain_names, (asdl_seq*)names_with_default_names);
-    }
-    else if (plain_names == NULL && names_with_default != NULL) {
-        *posargs = _get_names(p, names_with_default);
-    }
-    else if (plain_names != NULL && names_with_default == NULL) {
-        *posargs = plain_names;
-    }
-    else {
-        *posargs = _Py_asdl_arg_seq_new(0, p->arena);
-    }
-    return *posargs == NULL ? -1 : 0;
-}
-
-static int
-_make_posdefaults(Parser *p,
-                  SlashWithDefault *slash_with_default,
-                  asdl_seq *names_with_default,
-                  asdl_expr_seq **posdefaults) {
-    if (slash_with_default != NULL && names_with_default != NULL) {
-        asdl_expr_seq *slash_with_default_values =
-                _get_defaults(p, slash_with_default->names_with_defaults);
-        if (!slash_with_default_values) {
-            return -1;
-        }
-        asdl_expr_seq *names_with_default_values = _get_defaults(p, names_with_default);
-        if (!names_with_default_values) {
-            return -1;
-        }
-        *posdefaults = (asdl_expr_seq*)_PyPegen_join_sequences(
-                p,
-                (asdl_seq*)slash_with_default_values,
-                (asdl_seq*)names_with_default_values);
-    }
-    else if (slash_with_default == NULL && names_with_default != NULL) {
-        *posdefaults = _get_defaults(p, names_with_default);
-    }
-    else if (slash_with_default != NULL && names_with_default == NULL) {
-        *posdefaults = _get_defaults(p, slash_with_default->names_with_defaults);
-    }
-    else {
-        *posdefaults = _Py_asdl_expr_seq_new(0, p->arena);
-    }
-    return *posdefaults == NULL ? -1 : 0;
-}
-
-static int
-_make_kwargs(Parser *p, StarEtc *star_etc,
-             asdl_arg_seq **kwonlyargs,
-             asdl_expr_seq **kwdefaults) {
-    if (star_etc != NULL && star_etc->kwonlyargs != NULL) {
-        *kwonlyargs = _get_names(p, star_etc->kwonlyargs);
-    }
-    else {
-        *kwonlyargs = _Py_asdl_arg_seq_new(0, p->arena);
-    }
-
-    if (*kwonlyargs == NULL) {
-        return -1;
-    }
-
-    if (star_etc != NULL && star_etc->kwonlyargs != NULL) {
-        *kwdefaults = _get_defaults(p, star_etc->kwonlyargs);
-    }
-    else {
-        *kwdefaults = _Py_asdl_expr_seq_new(0, p->arena);
-    }
-
-    if (*kwdefaults == NULL) {
-        return -1;
-    }
-
-    return 0;
-}
-
-/* Constructs an arguments_ty object out of all the parsed constructs in the parameters rule */
-arguments_ty
-_PyPegen_make_arguments(Parser *p, asdl_arg_seq *slash_without_default,
-                        SlashWithDefault *slash_with_default, asdl_arg_seq *plain_names,
-                        asdl_seq *names_with_default, StarEtc *star_etc)
-{
-    asdl_arg_seq *posonlyargs;
-    if (_make_posonlyargs(p, slash_without_default, slash_with_default, &posonlyargs) == -1) {
-        return NULL;
-    }
-
-    asdl_arg_seq *posargs;
-    if (_make_posargs(p, plain_names, names_with_default, &posargs) == -1) {
-        return NULL;
-    }
-
-    asdl_expr_seq *posdefaults;
-    if (_make_posdefaults(p,slash_with_default, names_with_default, &posdefaults) == -1) {
-        return NULL;
-    }
-
-    arg_ty vararg = NULL;
-    if (star_etc != NULL && star_etc->vararg != NULL) {
-        vararg = star_etc->vararg;
-    }
-
-    asdl_arg_seq *kwonlyargs;
-    asdl_expr_seq *kwdefaults;
-    if (_make_kwargs(p, star_etc, &kwonlyargs, &kwdefaults) == -1) {
-        return NULL;
-    }
-
-    arg_ty kwarg = NULL;
-    if (star_etc != NULL && star_etc->kwarg != NULL) {
-        kwarg = star_etc->kwarg;
-    }
-
-    return _PyAST_arguments(posonlyargs, posargs, vararg, kwonlyargs,
-                            kwdefaults, kwarg, posdefaults, p->arena);
-}
-
-
-/* Constructs an empty arguments_ty object, that gets used when a function accepts no
- * arguments. */
-arguments_ty
-_PyPegen_empty_arguments(Parser *p)
-{
-    asdl_arg_seq *posonlyargs = _Py_asdl_arg_seq_new(0, p->arena);
-    if (!posonlyargs) {
-        return NULL;
-    }
-    asdl_arg_seq *posargs = _Py_asdl_arg_seq_new(0, p->arena);
-    if (!posargs) {
-        return NULL;
-    }
-    asdl_expr_seq *posdefaults = _Py_asdl_expr_seq_new(0, p->arena);
-    if (!posdefaults) {
-        return NULL;
-    }
-    asdl_arg_seq *kwonlyargs = _Py_asdl_arg_seq_new(0, p->arena);
-    if (!kwonlyargs) {
-        return NULL;
-    }
-    asdl_expr_seq *kwdefaults = _Py_asdl_expr_seq_new(0, p->arena);
-    if (!kwdefaults) {
-        return NULL;
-    }
-
-    return _PyAST_arguments(posonlyargs, posargs, NULL, kwonlyargs,
-                            kwdefaults, NULL, posdefaults, p->arena);
-}
-
-/* Encapsulates the value of an operator_ty into an AugOperator struct */
-AugOperator *
-_PyPegen_augoperator(Parser *p, operator_ty kind)
-{
-    AugOperator *a = _PyArena_Malloc(p->arena, sizeof(AugOperator));
-    if (!a) {
-        return NULL;
-    }
-    a->kind = kind;
-    return a;
-}
-
-/* Construct a FunctionDef equivalent to function_def, but with decorators */
-stmt_ty
-_PyPegen_function_def_decorators(Parser *p, asdl_expr_seq *decorators, stmt_ty function_def)
-{
-    assert(function_def != NULL);
-    if (function_def->kind == AsyncFunctionDef_kind) {
-        return _PyAST_AsyncFunctionDef(
-            function_def->v.FunctionDef.name, function_def->v.FunctionDef.args,
-            function_def->v.FunctionDef.body, decorators, function_def->v.FunctionDef.returns,
-            function_def->v.FunctionDef.type_comment, function_def->lineno,
-            function_def->col_offset, function_def->end_lineno, function_def->end_col_offset,
-            p->arena);
-    }
-
-    return _PyAST_FunctionDef(
-        function_def->v.FunctionDef.name, function_def->v.FunctionDef.args,
-        function_def->v.FunctionDef.body, decorators,
-        function_def->v.FunctionDef.returns,
-        function_def->v.FunctionDef.type_comment, function_def->lineno,
-        function_def->col_offset, function_def->end_lineno,
-        function_def->end_col_offset, p->arena);
-}
-
-/* Construct a ClassDef equivalent to class_def, but with decorators */
-stmt_ty
-_PyPegen_class_def_decorators(Parser *p, asdl_expr_seq *decorators, stmt_ty class_def)
-{
-    assert(class_def != NULL);
-    return _PyAST_ClassDef(
-        class_def->v.ClassDef.name, class_def->v.ClassDef.bases,
-        class_def->v.ClassDef.keywords, class_def->v.ClassDef.body, decorators,
-        class_def->lineno, class_def->col_offset, class_def->end_lineno,
-        class_def->end_col_offset, p->arena);
-}
-
-/* Construct a KeywordOrStarred */
-KeywordOrStarred *
-_PyPegen_keyword_or_starred(Parser *p, void *element, int is_keyword)
-{
-    KeywordOrStarred *a = _PyArena_Malloc(p->arena, sizeof(KeywordOrStarred));
-    if (!a) {
-        return NULL;
-    }
-    a->element = element;
-    a->is_keyword = is_keyword;
-    return a;
-}
-
-/* Get the number of starred expressions in an asdl_seq* of KeywordOrStarred*s */
-static int
-_seq_number_of_starred_exprs(asdl_seq *seq)
-{
-    int n = 0;
-    for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
-        KeywordOrStarred *k = asdl_seq_GET_UNTYPED(seq, i);
-        if (!k->is_keyword) {
-            n++;
-        }
-    }
-    return n;
-}
-
-/* Extract the starred expressions of an asdl_seq* of KeywordOrStarred*s */
-asdl_expr_seq *
-_PyPegen_seq_extract_starred_exprs(Parser *p, asdl_seq *kwargs)
-{
-    int new_len = _seq_number_of_starred_exprs(kwargs);
-    if (new_len == 0) {
-        return NULL;
-    }
-    asdl_expr_seq *new_seq = _Py_asdl_expr_seq_new(new_len, p->arena);
-    if (!new_seq) {
-        return NULL;
-    }
-
-    int idx = 0;
-    for (Py_ssize_t i = 0, len = asdl_seq_LEN(kwargs); i < len; i++) {
-        KeywordOrStarred *k = asdl_seq_GET_UNTYPED(kwargs, i);
-        if (!k->is_keyword) {
-            asdl_seq_SET(new_seq, idx++, k->element);
-        }
-    }
-    return new_seq;
-}
-
-/* Return a new asdl_seq* with only the keywords in kwargs */
-asdl_keyword_seq*
-_PyPegen_seq_delete_starred_exprs(Parser *p, asdl_seq *kwargs)
-{
-    Py_ssize_t len = asdl_seq_LEN(kwargs);
-    Py_ssize_t new_len = len - _seq_number_of_starred_exprs(kwargs);
-    if (new_len == 0) {
-        return NULL;
-    }
-    asdl_keyword_seq *new_seq = _Py_asdl_keyword_seq_new(new_len, p->arena);
-    if (!new_seq) {
-        return NULL;
-    }
-
-    int idx = 0;
-    for (Py_ssize_t i = 0; i < len; i++) {
-        KeywordOrStarred *k = asdl_seq_GET_UNTYPED(kwargs, i);
-        if (k->is_keyword) {
-            asdl_seq_SET(new_seq, idx++, k->element);
-        }
-    }
-    return new_seq;
-}
-
-expr_ty
-_PyPegen_concatenate_strings(Parser *p, asdl_seq *strings)
-{
-    Py_ssize_t len = asdl_seq_LEN(strings);
-    assert(len > 0);
-
-    Token *first = asdl_seq_GET_UNTYPED(strings, 0);
-    Token *last = asdl_seq_GET_UNTYPED(strings, len - 1);
-
-    int bytesmode = 0;
-    PyObject *bytes_str = NULL;
-
-    FstringParser state;
-    _PyPegen_FstringParser_Init(&state);
-
-    for (Py_ssize_t i = 0; i < len; i++) {
-        Token *t = asdl_seq_GET_UNTYPED(strings, i);
-
-        int this_bytesmode;
-        int this_rawmode;
-        PyObject *s;
-        const char *fstr;
-        Py_ssize_t fstrlen = -1;
-
-        if (_PyPegen_parsestr(p, &this_bytesmode, &this_rawmode, &s, &fstr, &fstrlen, t) != 0) {
-            goto error;
-        }
-
-        /* Check that we are not mixing bytes with unicode. */
-        if (i != 0 && bytesmode != this_bytesmode) {
-            RAISE_SYNTAX_ERROR("cannot mix bytes and nonbytes literals");
-            Py_XDECREF(s);
-            goto error;
-        }
-        bytesmode = this_bytesmode;
-
-        if (fstr != NULL) {
-            assert(s == NULL && !bytesmode);
-
-            int result = _PyPegen_FstringParser_ConcatFstring(p, &state, &fstr, fstr + fstrlen,
-                                                     this_rawmode, 0, first, t, last);
-            if (result < 0) {
-                goto error;
-            }
-        }
-        else {
-            /* String or byte string. */
-            assert(s != NULL && fstr == NULL);
-            assert(bytesmode ? PyBytes_CheckExact(s) : PyUnicode_CheckExact(s));
-
-            if (bytesmode) {
-                if (i == 0) {
-                    bytes_str = s;
-                }
-                else {
-                    PyBytes_ConcatAndDel(&bytes_str, s);
-                    if (!bytes_str) {
-                        goto error;
-                    }
-                }
-            }
-            else {
-                /* This is a regular string. Concatenate it. */
-                if (_PyPegen_FstringParser_ConcatAndDel(&state, s) < 0) {
-                    goto error;
-                }
-            }
-        }
-    }
-
-    if (bytesmode) {
-        if (_PyArena_AddPyObject(p->arena, bytes_str) < 0) {
-            goto error;
-        }
-        return _PyAST_Constant(bytes_str, NULL, first->lineno,
-                               first->col_offset, last->end_lineno,
-                               last->end_col_offset, p->arena);
-    }
-
-    return _PyPegen_FstringParser_Finish(p, &state, first, last);
-
-error:
-    Py_XDECREF(bytes_str);
-    _PyPegen_FstringParser_Dealloc(&state);
-    if (PyErr_Occurred()) {
-        raise_decode_error(p);
-    }
-    return NULL;
-}
-
-expr_ty
-_PyPegen_ensure_imaginary(Parser *p, expr_ty exp)
-{
-    if (exp->kind != Constant_kind || !PyComplex_CheckExact(exp->v.Constant.value)) {
-        RAISE_SYNTAX_ERROR_KNOWN_LOCATION(exp, "imaginary number required in complex literal");
-        return NULL;
-    }
-    return exp;
-}
-
-expr_ty
-_PyPegen_ensure_real(Parser *p, expr_ty exp)
-{
-    if (exp->kind != Constant_kind || PyComplex_CheckExact(exp->v.Constant.value)) {
-        RAISE_SYNTAX_ERROR_KNOWN_LOCATION(exp, "real number required in complex literal");
-        return NULL;
-    }
-    return exp;
-}
-
-mod_ty
-_PyPegen_make_module(Parser *p, asdl_stmt_seq *a) {
-    asdl_type_ignore_seq *type_ignores = NULL;
-    Py_ssize_t num = p->type_ignore_comments.num_items;
-    if (num > 0) {
-        // Turn the raw (comment, lineno) pairs into TypeIgnore objects in the arena
-        type_ignores = _Py_asdl_type_ignore_seq_new(num, p->arena);
-        if (type_ignores == NULL) {
-            return NULL;
-        }
-        for (int i = 0; i < num; i++) {
-            PyObject *tag = _PyPegen_new_type_comment(p, p->type_ignore_comments.items[i].comment);
-            if (tag == NULL) {
-                return NULL;
-            }
-            type_ignore_ty ti = _PyAST_TypeIgnore(p->type_ignore_comments.items[i].lineno,
-                                                  tag, p->arena);
-            if (ti == NULL) {
-                return NULL;
-            }
-            asdl_seq_SET(type_ignores, i, ti);
-        }
-    }
-    return _PyAST_Module(a, type_ignores, p->arena);
-}
-
-// Error reporting helpers
-
-expr_ty
-_PyPegen_get_invalid_target(expr_ty e, TARGETS_TYPE targets_type)
-{
-    if (e == NULL) {
-        return NULL;
-    }
-
-#define VISIT_CONTAINER(CONTAINER, TYPE) do { \
-        Py_ssize_t len = asdl_seq_LEN((CONTAINER)->v.TYPE.elts);\
-        for (Py_ssize_t i = 0; i < len; i++) {\
-            expr_ty other = asdl_seq_GET((CONTAINER)->v.TYPE.elts, i);\
-            expr_ty child = _PyPegen_get_invalid_target(other, targets_type);\
-            if (child != NULL) {\
-                return child;\
-            }\
-        }\
-    } while (0)
-
-    // We only need to visit List and Tuple nodes recursively as those
-    // are the only ones that can contain valid names in targets when
-    // they are parsed as expressions. Any other kind of expression
-    // that is a container (like Sets or Dicts) is directly invalid and
-    // we don't need to visit it recursively.
-
-    switch (e->kind) {
-        case List_kind:
-            VISIT_CONTAINER(e, List);
-            return NULL;
-        case Tuple_kind:
-            VISIT_CONTAINER(e, Tuple);
-            return NULL;
-        case Starred_kind:
-            if (targets_type == DEL_TARGETS) {
-                return e;
-            }
-            return _PyPegen_get_invalid_target(e->v.Starred.value, targets_type);
-        case Compare_kind:
-            // This is needed, because the `a in b` in `for a in b` gets parsed
-            // as a comparison, and so we need to search the left side of the comparison
-            // for invalid targets.
-            if (targets_type == FOR_TARGETS) {
-                cmpop_ty cmpop = (cmpop_ty) asdl_seq_GET(e->v.Compare.ops, 0);
-                if (cmpop == In) {
-                    return _PyPegen_get_invalid_target(e->v.Compare.left, targets_type);
-                }
-                return NULL;
-            }
-            return e;
-        case Name_kind:
-        case Subscript_kind:
-        case Attribute_kind:
-            return NULL;
-        default:
-            return e;
-    }
-}
-
-void *_PyPegen_arguments_parsing_error(Parser *p, expr_ty e) {
-    int kwarg_unpacking = 0;
-    for (Py_ssize_t i = 0, l = asdl_seq_LEN(e->v.Call.keywords); i < l; i++) {
-        keyword_ty keyword = asdl_seq_GET(e->v.Call.keywords, i);
-        if (!keyword->arg) {
-            kwarg_unpacking = 1;
-        }
-    }
-
-    const char *msg = NULL;
-    if (kwarg_unpacking) {
-        msg = "positional argument follows keyword argument unpacking";
-    } else {
-        msg = "positional argument follows keyword argument";
-    }
-
-    return RAISE_SYNTAX_ERROR(msg);
-}
-
-
-static inline expr_ty
-_PyPegen_get_last_comprehension_item(comprehension_ty comprehension) {
-    if (comprehension->ifs == NULL || asdl_seq_LEN(comprehension->ifs) == 0) {
-        return comprehension->iter;
-    }
-    return PyPegen_last_item(comprehension->ifs, expr_ty);
-}
-
-void *
-_PyPegen_nonparen_genexp_in_call(Parser *p, expr_ty args, asdl_comprehension_seq *comprehensions)
-{
-    /* The rule that calls this function is 'args for_if_clauses'.
-       For the input f(L, x for x in y), L and x are in args and
-       the for is parsed as a for_if_clause. We have to check if
-       len <= 1, so that input like dict((a, b) for a, b in x)
-       gets successfully parsed and then we pass the last
-       argument (x in the above example) as the location of the
-       error */
-    Py_ssize_t len = asdl_seq_LEN(args->v.Call.args);
-    if (len <= 1) {
-        return NULL;
-    }
-
-    comprehension_ty last_comprehension = PyPegen_last_item(comprehensions, comprehension_ty);
-
-    return RAISE_SYNTAX_ERROR_KNOWN_RANGE(
-        (expr_ty) asdl_seq_GET(args->v.Call.args, len - 1),
-        _PyPegen_get_last_comprehension_item(last_comprehension),
-        "Generator expression must be parenthesized"
-    );
-}
-
-
-expr_ty _PyPegen_collect_call_seqs(Parser *p, asdl_expr_seq *a, asdl_seq *b,
-                     int lineno, int col_offset, int end_lineno,
-                     int end_col_offset, PyArena *arena) {
-    Py_ssize_t args_len = asdl_seq_LEN(a);
-    Py_ssize_t total_len = args_len;
-
-    if (b == NULL) {
-        return _PyAST_Call(_PyPegen_dummy_name(p), a, NULL, lineno, col_offset,
-                        end_lineno, end_col_offset, arena);
-
-    }
-
-    asdl_expr_seq *starreds = _PyPegen_seq_extract_starred_exprs(p, b);
-    asdl_keyword_seq *keywords = _PyPegen_seq_delete_starred_exprs(p, b);
-
-    if (starreds) {
-        total_len += asdl_seq_LEN(starreds);
-    }
-
-    asdl_expr_seq *args = _Py_asdl_expr_seq_new(total_len, arena);
-
-    Py_ssize_t i = 0;
-    for (i = 0; i < args_len; i++) {
-        asdl_seq_SET(args, i, asdl_seq_GET(a, i));
-    }
-    for (; i < total_len; i++) {
-        asdl_seq_SET(args, i, asdl_seq_GET(starreds, i - args_len));
-    }
-
-    return _PyAST_Call(_PyPegen_dummy_name(p), args, keywords, lineno,
-                       col_offset, end_lineno, end_col_offset, arena);
-}
+}
\ No newline at end of file
diff --git a/Parser/pegen.h b/Parser/pegen.h
index 8721d7e891005..e5e712ab26b87 100644
--- a/Parser/pegen.h
+++ b/Parser/pegen.h
@@ -23,6 +23,8 @@
 #define PyPARSE_TYPE_COMMENTS 0x0040
 #define PyPARSE_ASYNC_HACKS   0x0080
 
+#define CURRENT_POS (-5)
+
 typedef struct _memo {
     int type;
     void *node;
@@ -114,6 +116,7 @@ typedef struct {
     int is_keyword;
 } KeywordOrStarred;
 
+// Internal parser functions
 #if defined(Py_DEBUG)
 void _PyPegen_clear_memo_statistics(void);
 PyObject *_PyPegen_get_memo_statistics(void);
@@ -123,7 +126,6 @@ int _PyPegen_insert_memo(Parser *p, int mark, int type, void *node);
 int _PyPegen_update_memo(Parser *p, int mark, int type, void *node);
 int _PyPegen_is_memoized(Parser *p, int type, void *pres);
 
-
 int _PyPegen_lookahead_with_name(int, expr_ty (func)(Parser *), Parser *);
 int _PyPegen_lookahead_with_int(int, Token *(func)(Parser *, int), Parser *, int);
 int _PyPegen_lookahead_with_string(int , expr_ty (func)(Parser *, const char*), Parser *, const char*);
@@ -139,23 +141,24 @@ int _PyPegen_fill_token(Parser *p);
 expr_ty _PyPegen_name_token(Parser *p);
 expr_ty _PyPegen_number_token(Parser *p);
 void *_PyPegen_string_token(Parser *p);
-const char *_PyPegen_get_expr_name(expr_ty);
 Py_ssize_t _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset);
+
+// Error handling functions and APIs
+typedef enum {
+    STAR_TARGETS,
+    DEL_TARGETS,
+    FOR_TARGETS
+} TARGETS_TYPE;
+
+int _Pypegen_raise_decode_error(Parser *p);
+void _PyPegen_raise_tokenizer_init_error(PyObject *filename);
+int _Pypegen_tokenizer_error(Parser *p);
 void *_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...);
 void *_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
                                           Py_ssize_t lineno, Py_ssize_t col_offset,
                                           Py_ssize_t end_lineno, Py_ssize_t end_col_offset,
                                           const char *errmsg, va_list va);
-void *_PyPegen_dummy_name(Parser *p, ...);
-
-void * _PyPegen_seq_last_item(asdl_seq *seq);
-#define PyPegen_last_item(seq, type) ((type)_PyPegen_seq_last_item((asdl_seq*)seq))
-
-void * _PyPegen_seq_first_item(asdl_seq *seq);
-#define PyPegen_first_item(seq, type) ((type)_PyPegen_seq_first_item((asdl_seq*)seq))
-
-#define CURRENT_POS (-5)
-
+void _Pypegen_set_syntax_error(Parser* p, Token* last_token);
 Py_LOCAL_INLINE(void *)
 RAISE_ERROR_KNOWN_LOCATION(Parser *p, PyObject *errtype,
                            Py_ssize_t lineno, Py_ssize_t col_offset,
@@ -170,10 +173,6 @@ RAISE_ERROR_KNOWN_LOCATION(Parser *p, PyObject *errtype,
     va_end(va);
     return NULL;
 }
-
-#define UNUSED(expr) do { (void)(expr); } while (0)
-#define EXTRA_EXPR(head, tail) head->lineno, (head)->col_offset, (tail)->end_lineno, (tail)->end_col_offset, p->arena
-#define EXTRA _start_lineno, _start_col_offset, _end_lineno, _end_col_offset, p->arena
 #define RAISE_SYNTAX_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_SyntaxError, msg, ##__VA_ARGS__)
 #define RAISE_INDENTATION_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_IndentationError, msg, ##__VA_ARGS__)
 #define RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, msg, ...) \
@@ -182,6 +181,7 @@ RAISE_ERROR_KNOWN_LOCATION(Parser *p, PyObject *errtype,
     RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, (a)->lineno, (a)->col_offset, (a)->end_lineno, (a)->end_col_offset, msg, ##__VA_ARGS__)
 #define RAISE_SYNTAX_ERROR_STARTING_FROM(a, msg, ...) \
     RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, (a)->lineno, (a)->col_offset, CURRENT_POS, CURRENT_POS, msg, ##__VA_ARGS__)
+#define RAISE_SYNTAX_ERROR_INVALID_TARGET(type, e) _RAISE_SYNTAX_ERROR_INVALID_TARGET(p, type, e)
 
 Py_LOCAL_INLINE(void *)
 CHECK_CALL(Parser *p, void *result)
@@ -207,6 +207,39 @@ CHECK_CALL_NULL_ALLOWED(Parser *p, void *result)
 #define CHECK(type, result) ((type) CHECK_CALL(p, result))
 #define CHECK_NULL_ALLOWED(type, result) ((type) CHECK_CALL_NULL_ALLOWED(p, result))
 
+expr_ty _PyPegen_get_invalid_target(expr_ty e, TARGETS_TYPE targets_type);
+const char *_PyPegen_get_expr_name(expr_ty);
+Py_LOCAL_INLINE(void *)
+_RAISE_SYNTAX_ERROR_INVALID_TARGET(Parser *p, TARGETS_TYPE type, void *e)
+{
+    expr_ty invalid_target = CHECK_NULL_ALLOWED(expr_ty, _PyPegen_get_invalid_target(e, type));
+    if (invalid_target != NULL) {
+        const char *msg;
+        if (type == STAR_TARGETS || type == FOR_TARGETS) {
+            msg = "cannot assign to %s";
+        }
+        else {
+            msg = "cannot delete %s";
+        }
+        return RAISE_SYNTAX_ERROR_KNOWN_LOCATION(
+            invalid_target,
+            msg,
+            _PyPegen_get_expr_name(invalid_target)
+        );
+    }
+    return RAISE_SYNTAX_ERROR("invalid syntax");
+}
+
+// Action utility functions
+
+void *_PyPegen_dummy_name(Parser *p, ...);
+void * _PyPegen_seq_last_item(asdl_seq *seq);
+#define PyPegen_last_item(seq, type) ((type)_PyPegen_seq_last_item((asdl_seq*)seq))
+void * _PyPegen_seq_first_item(asdl_seq *seq);
+#define PyPegen_first_item(seq, type) ((type)_PyPegen_seq_first_item((asdl_seq*)seq))
+#define UNUSED(expr) do { (void)(expr); } while (0)
+#define EXTRA_EXPR(head, tail) head->lineno, (head)->col_offset, (tail)->end_lineno, (tail)->end_col_offset, p->arena
+#define EXTRA _start_lineno, _start_col_offset, _end_lineno, _end_col_offset, p->arena
 PyObject *_PyPegen_new_type_comment(Parser *, const char *);
 
 Py_LOCAL_INLINE(PyObject *)
@@ -248,13 +281,6 @@ INVALID_VERSION_CHECK(Parser *p, int version, char *msg, void *node)
 
 arg_ty _PyPegen_add_type_comment_to_arg(Parser *, arg_ty, Token *);
 PyObject *_PyPegen_new_identifier(Parser *, const char *);
-Parser *_PyPegen_Parser_New(struct tok_state *, int, int, int, int *, PyArena *);
-void _PyPegen_Parser_Free(Parser *);
-mod_ty _PyPegen_run_parser_from_file_pointer(FILE *, int, PyObject *, const char *,
-                                    const char *, const char *, PyCompilerFlags *, int *, PyArena *);
-void *_PyPegen_run_parser(Parser *);
-mod_ty _PyPegen_run_parser_from_string(const char *, int, PyObject *, PyCompilerFlags *, PyArena *);
-asdl_stmt_seq *_PyPegen_interactive_exit(Parser *);
 asdl_seq *_PyPegen_singleton_seq(Parser *, void *);
 asdl_seq *_PyPegen_seq_insert_in_front(Parser *, void *, asdl_seq *);
 asdl_seq *_PyPegen_seq_append_to_end(Parser *, asdl_seq *, void *);
@@ -295,40 +321,18 @@ asdl_seq *_PyPegen_join_sequences(Parser *, asdl_seq *, asdl_seq *);
 int _PyPegen_check_barry_as_flufl(Parser *, Token *);
 int _PyPegen_check_legacy_stmt(Parser *p, expr_ty t);
 mod_ty _PyPegen_make_module(Parser *, asdl_stmt_seq *);
-
-// Error reporting helpers
-typedef enum {
-    STAR_TARGETS,
-    DEL_TARGETS,
-    FOR_TARGETS
-} TARGETS_TYPE;
-expr_ty _PyPegen_get_invalid_target(expr_ty e, TARGETS_TYPE targets_type);
-#define RAISE_SYNTAX_ERROR_INVALID_TARGET(type, e) _RAISE_SYNTAX_ERROR_INVALID_TARGET(p, type, e)
-
-Py_LOCAL_INLINE(void *)
-_RAISE_SYNTAX_ERROR_INVALID_TARGET(Parser *p, TARGETS_TYPE type, void *e)
-{
-    expr_ty invalid_target = CHECK_NULL_ALLOWED(expr_ty, _PyPegen_get_invalid_target(e, type));
-    if (invalid_target != NULL) {
-        const char *msg;
-        if (type == STAR_TARGETS || type == FOR_TARGETS) {
-            msg = "cannot assign to %s";
-        }
-        else {
-            msg = "cannot delete %s";
-        }
-        return RAISE_SYNTAX_ERROR_KNOWN_LOCATION(
-            invalid_target,
-            msg,
-            _PyPegen_get_expr_name(invalid_target)
-        );
-    }
-    return RAISE_SYNTAX_ERROR("invalid syntax");
-}
-
 void *_PyPegen_arguments_parsing_error(Parser *, expr_ty);
 void *_PyPegen_nonparen_genexp_in_call(Parser *p, expr_ty args, asdl_comprehension_seq *comprehensions);
 
+// Parser API
+
+Parser *_PyPegen_Parser_New(struct tok_state *, int, int, int, int *, PyArena *);
+void _PyPegen_Parser_Free(Parser *);
+mod_ty _PyPegen_run_parser_from_file_pointer(FILE *, int, PyObject *, const char *,
+                                    const char *, const char *, PyCompilerFlags *, int *, PyArena *);
+void *_PyPegen_run_parser(Parser *);
+mod_ty _PyPegen_run_parser_from_string(const char *, int, PyObject *, PyCompilerFlags *, PyArena *);
+asdl_stmt_seq *_PyPegen_interactive_exit(Parser *);
 
 // Generated function in parse.c - function definition in python.gram
 void *_PyPegen_parse(Parser *);
diff --git a/Parser/pegen_errors.c b/Parser/pegen_errors.c
new file mode 100644
index 0000000000000..6eeab0a97226f
--- /dev/null
+++ b/Parser/pegen_errors.c
@@ -0,0 +1,425 @@
+#include <Python.h>
+#include <errcode.h>
+
+#include "tokenizer.h"
+#include "pegen.h"
+
+// TOKENIZER ERRORS
+
+void
+_PyPegen_raise_tokenizer_init_error(PyObject *filename)
+{
+    if (!(PyErr_ExceptionMatches(PyExc_LookupError)
+          || PyErr_ExceptionMatches(PyExc_SyntaxError)
+          || PyErr_ExceptionMatches(PyExc_ValueError)
+          || PyErr_ExceptionMatches(PyExc_UnicodeDecodeError))) {
+        return;
+    }
+    PyObject *errstr = NULL;
+    PyObject *tuple = NULL;
+    PyObject *type;
+    PyObject *value;
+    PyObject *tback;
+    PyErr_Fetch(&type, &value, &tback);
+    errstr = PyObject_Str(value);
+    if (!errstr) {
+        goto error;
+    }
+
+    PyObject *tmp = Py_BuildValue("(OiiO)", filename, 0, -1, Py_None);
+    if (!tmp) {
+        goto error;
+    }
+
+    tuple = PyTuple_Pack(2, errstr, tmp);
+    Py_DECREF(tmp);
+    if (!value) {
+        goto error;
+    }
+    PyErr_SetObject(PyExc_SyntaxError, tuple);
+
+error:
+    Py_XDECREF(type);
+    Py_XDECREF(value);
+    Py_XDECREF(tback);
+    Py_XDECREF(errstr);
+    Py_XDECREF(tuple);
+}
+
+static inline void
+raise_unclosed_parentheses_error(Parser *p) {
+       int error_lineno = p->tok->parenlinenostack[p->tok->level-1];
+       int error_col = p->tok->parencolstack[p->tok->level-1];
+       RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError,
+                                  error_lineno, error_col, error_lineno, -1,
+                                  "'%c' was never closed",
+                                  p->tok->parenstack[p->tok->level-1]);
+}
+
+int
+_Pypegen_tokenizer_error(Parser *p)
+{
+    if (PyErr_Occurred()) {
+        return -1;
+    }
+
+    const char *msg = NULL;
+    PyObject* errtype = PyExc_SyntaxError;
+    Py_ssize_t col_offset = -1;
+    switch (p->tok->done) {
+        case E_TOKEN:
+            msg = "invalid token";
+            break;
+        case E_EOF:
+            if (p->tok->level) {
+                raise_unclosed_parentheses_error(p);
+            } else {
+                RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
+            }
+            return -1;
+        case E_DEDENT:
+            RAISE_INDENTATION_ERROR("unindent does not match any outer indentation level");
+            return -1;
+        case E_INTR:
+            if (!PyErr_Occurred()) {
+                PyErr_SetNone(PyExc_KeyboardInterrupt);
+            }
+            return -1;
+        case E_NOMEM:
+            PyErr_NoMemory();
+            return -1;
+        case E_TABSPACE:
+            errtype = PyExc_TabError;
+            msg = "inconsistent use of tabs and spaces in indentation";
+            break;
+        case E_TOODEEP:
+            errtype = PyExc_IndentationError;
+            msg = "too many levels of indentation";
+            break;
+        case E_LINECONT: {
+            col_offset = p->tok->cur - p->tok->buf - 1;
+            msg = "unexpected character after line continuation character";
+            break;
+        }
+        default:
+            msg = "unknown parsing error";
+    }
+
+    RAISE_ERROR_KNOWN_LOCATION(p, errtype, p->tok->lineno,
+                               col_offset >= 0 ? col_offset : 0,
+                               p->tok->lineno, -1, msg);
+    return -1;
+}
+
+int
+_Pypegen_raise_decode_error(Parser *p)
+{
+    assert(PyErr_Occurred());
+    const char *errtype = NULL;
+    if (PyErr_ExceptionMatches(PyExc_UnicodeError)) {
+        errtype = "unicode error";
+    }
+    else if (PyErr_ExceptionMatches(PyExc_ValueError)) {
+        errtype = "value error";
+    }
+    if (errtype) {
+        PyObject *type;
+        PyObject *value;
+        PyObject *tback;
+        PyObject *errstr;
+        PyErr_Fetch(&type, &value, &tback);
+        errstr = PyObject_Str(value);
+        if (errstr) {
+            RAISE_SYNTAX_ERROR("(%s) %U", errtype, errstr);
+            Py_DECREF(errstr);
+        }
+        else {
+            PyErr_Clear();
+            RAISE_SYNTAX_ERROR("(%s) unknown error", errtype);
+        }
+        Py_XDECREF(type);
+        Py_XDECREF(value);
+        Py_XDECREF(tback);
+    }
+
+    return -1;
+}
+
+static int
+_PyPegen_tokenize_full_source_to_check_for_errors(Parser *p) {
+    // Tokenize the whole input to see if there are any tokenization
+    // errors such as mistmatching parentheses. These will get priority
+    // over generic syntax errors only if the line number of the error is
+    // before the one that we had for the generic error.
+
+    // We don't want to tokenize to the end for interactive input
+    if (p->tok->prompt != NULL) {
+        return 0;
+    }
+
+    PyObject *type, *value, *traceback;
+    PyErr_Fetch(&type, &value, &traceback);
+
+    Token *current_token = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1];
+    Py_ssize_t current_err_line = current_token->lineno;
+
+    int ret = 0;
+
+    for (;;) {
+        const char *start;
+        const char *end;
+        switch (_PyTokenizer_Get(p->tok, &start, &end)) {
+            case ERRORTOKEN:
+                if (p->tok->level != 0) {
+                    int error_lineno = p->tok->parenlinenostack[p->tok->level-1];
+                    if (current_err_line > error_lineno) {
+                        raise_unclosed_parentheses_error(p);
+                        ret = -1;
+                        goto exit;
+                    }
+                }
+                break;
+            case ENDMARKER:
+                break;
+            default:
+                continue;
+        }
+        break;
+    }
+
+
+exit:
+    if (PyErr_Occurred()) {
+        Py_XDECREF(value);
+        Py_XDECREF(type);
+        Py_XDECREF(traceback);
+    } else {
+        PyErr_Restore(type, value, traceback);
+    }
+    return ret;
+}
+
+// PARSER ERRORS
+
+void *
+_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
+{
+    if (p->fill == 0) {
+        va_list va;
+        va_start(va, errmsg);
+        _PyPegen_raise_error_known_location(p, errtype, 0, 0, 0, -1, errmsg, va);
+        va_end(va);
+        return NULL;
+    }
+
+    Token *t = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1];
+    Py_ssize_t col_offset;
+    Py_ssize_t end_col_offset = -1;
+    if (t->col_offset == -1) {
+        if (p->tok->cur == p->tok->buf) {
+            col_offset = 0;
+        } else {
+            const char* start = p->tok->buf  ? p->tok->line_start : p->tok->buf;
+            col_offset = Py_SAFE_DOWNCAST(p->tok->cur - start, intptr_t, int);
+        }
+    } else {
+        col_offset = t->col_offset + 1;
+    }
+
+    if (t->end_col_offset != -1) {
+        end_col_offset = t->end_col_offset + 1;
+    }
+
+    va_list va;
+    va_start(va, errmsg);
+    _PyPegen_raise_error_known_location(p, errtype, t->lineno, col_offset, t->end_lineno, end_col_offset, errmsg, va);
+    va_end(va);
+
+    return NULL;
+}
+
+static PyObject *
+get_error_line_from_tokenizer_buffers(Parser *p, Py_ssize_t lineno)
+{
+    /* If the file descriptor is interactive, the source lines of the current
+     * (multi-line) statement are stored in p->tok->interactive_src_start.
+     * If not, we're parsing from a string, which means that the whole source
+     * is stored in p->tok->str. */
+    assert(p->tok->fp == NULL || p->tok->fp == stdin);
+
+    char *cur_line = p->tok->fp_interactive ? p->tok->interactive_src_start : p->tok->str;
+    assert(cur_line != NULL);
+
+    for (int i = 0; i < lineno - 1; i++) {
+        cur_line = strchr(cur_line, '\n') + 1;
+    }
+
+    char *next_newline;
+    if ((next_newline = strchr(cur_line, '\n')) == NULL) { // This is the last line
+        next_newline = cur_line + strlen(cur_line);
+    }
+    return PyUnicode_DecodeUTF8(cur_line, next_newline - cur_line, "replace");
+}
+
+void *
+_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
+                                    Py_ssize_t lineno, Py_ssize_t col_offset,
+                                    Py_ssize_t end_lineno, Py_ssize_t end_col_offset,
+                                    const char *errmsg, va_list va)
+{
+    PyObject *value = NULL;
+    PyObject *errstr = NULL;
+    PyObject *error_line = NULL;
+    PyObject *tmp = NULL;
+    p->error_indicator = 1;
+
+    if (end_lineno == CURRENT_POS) {
+        end_lineno = p->tok->lineno;
+    }
+    if (end_col_offset == CURRENT_POS) {
+        end_col_offset = p->tok->cur - p->tok->line_start;
+    }
+
+    if (p->start_rule == Py_fstring_input) {
+        const char *fstring_msg = "f-string: ";
+        Py_ssize_t len = strlen(fstring_msg) + strlen(errmsg);
+
+        char *new_errmsg = PyMem_Malloc(len + 1); // Lengths of both strings plus NULL character
+        if (!new_errmsg) {
+            return (void *) PyErr_NoMemory();
+        }
+
+        // Copy both strings into new buffer
+        memcpy(new_errmsg, fstring_msg, strlen(fstring_msg));
+        memcpy(new_errmsg + strlen(fstring_msg), errmsg, strlen(errmsg));
+        new_errmsg[len] = 0;
+        errmsg = new_errmsg;
+    }
+    errstr = PyUnicode_FromFormatV(errmsg, va);
+    if (!errstr) {
+        goto error;
+    }
+
+    if (p->tok->fp_interactive) {
+        error_line = get_error_line_from_tokenizer_buffers(p, lineno);
+    }
+    else if (p->start_rule == Py_file_input) {
+        error_line = _PyErr_ProgramDecodedTextObject(p->tok->filename,
+                                                     (int) lineno, p->tok->encoding);
+    }
+
+    if (!error_line) {
+        /* PyErr_ProgramTextObject was not called or returned NULL. If it was not called,
+           then we need to find the error line from some other source, because
+           p->start_rule != Py_file_input. If it returned NULL, then it either unexpectedly
+           failed or we're parsing from a string or the REPL. There's a third edge case where
+           we're actually parsing from a file, which has an E_EOF SyntaxError and in that case
+           `PyErr_ProgramTextObject` fails because lineno points to last_file_line + 1, which
+           does not physically exist */
+        assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF);
+
+        if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) {
+            Py_ssize_t size = p->tok->inp - p->tok->buf;
+            error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
+        }
+        else if (p->tok->fp == NULL || p->tok->fp == stdin) {
+            error_line = get_error_line_from_tokenizer_buffers(p, lineno);
+        }
+        else {
+            error_line = PyUnicode_FromStringAndSize("", 0);
+        }
+        if (!error_line) {
+            goto error;
+        }
+    }
+
+    if (p->start_rule == Py_fstring_input) {
+        col_offset -= p->starting_col_offset;
+        end_col_offset -= p->starting_col_offset;
+    }
+
+    Py_ssize_t col_number = col_offset;
+    Py_ssize_t end_col_number = end_col_offset;
+
+    if (p->tok->encoding != NULL) {
+        col_number = _PyPegen_byte_offset_to_character_offset(error_line, col_offset);
+        if (col_number < 0) {
+            goto error;
+        }
+        if (end_col_number > 0) {
+            Py_ssize_t end_col_offset = _PyPegen_byte_offset_to_character_offset(error_line, end_col_number);
+            if (end_col_offset < 0) {
+                goto error;
+            } else {
+                end_col_number = end_col_offset;
+            }
+        }
+    }
+    tmp = Py_BuildValue("(OiiNii)", p->tok->filename, lineno, col_number, error_line, end_lineno, end_col_number);
+    if (!tmp) {
+        goto error;
+    }
+    value = PyTuple_Pack(2, errstr, tmp);
+    Py_DECREF(tmp);
+    if (!value) {
+        goto error;
+    }
+    PyErr_SetObject(errtype, value);
+
+    Py_DECREF(errstr);
+    Py_DECREF(value);
+    if (p->start_rule == Py_fstring_input) {
+        PyMem_Free((void *)errmsg);
+    }
+    return NULL;
+
+error:
+    Py_XDECREF(errstr);
+    Py_XDECREF(error_line);
+    if (p->start_rule == Py_fstring_input) {
+        PyMem_Free((void *)errmsg);
+    }
+    return NULL;
+}
+
+void
+_Pypegen_set_syntax_error(Parser* p, Token* last_token) {
+    // Existing sintax error
+    if (PyErr_Occurred()) {
+        // Prioritize tokenizer errors to custom syntax errors raised
+        // on the second phase only if the errors come from the parser.
+        if (p->tok->done == E_DONE && PyErr_ExceptionMatches(PyExc_SyntaxError)) {
+            _PyPegen_tokenize_full_source_to_check_for_errors(p);
+        }
+        // Propagate the existing syntax error.
+        return;
+    }
+    // Initialization error
+    if (p->fill == 0) {
+        RAISE_SYNTAX_ERROR("error at start before reading any input");
+    }
+    // Parser encountered EOF (End of File) unexpectedtly
+    if (p->tok->done == E_EOF) {
+        if (p->tok->level) {
+            raise_unclosed_parentheses_error(p);
+        } else {
+            RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
+        }
+        return;
+    }
+    // Indentation error in the tokenizer
+    if (last_token->type == INDENT || last_token->type == DEDENT) {
+        RAISE_INDENTATION_ERROR(last_token->type == INDENT ? "unexpected indent" : "unexpected unindent");
+        return;
+    }
+    // Unknown error (generic case)
+
+    // Use the last token we found on the first pass to avoid reporting
+    // incorrect locations for generic syntax errors just because we reached
+    // further away when trying to find specific syntax errors in the second
+    // pass.
+    RAISE_SYNTAX_ERROR_KNOWN_LOCATION(last_token, "invalid syntax");
+    // _PyPegen_tokenize_full_source_to_check_for_errors will override the existing
+    // generic SyntaxError we just raised if errors are found.
+    _PyPegen_tokenize_full_source_to_check_for_errors(p);
+}
\ No newline at end of file
diff --git a/Tools/peg_generator/Makefile b/Tools/peg_generator/Makefile
index 6ad9c91b985cb..d010f19d58892 100644
--- a/Tools/peg_generator/Makefile
+++ b/Tools/peg_generator/Makefile
@@ -22,7 +22,7 @@ data/xxl.py:
 
 build: peg_extension/parse.c
 
-peg_extension/parse.c: $(GRAMMAR) $(TOKENS) pegen/*.py peg_extension/peg_extension.c ../../Parser/pegen.c ../../Parser/string_parser.c ../../Parser/*.h pegen/grammar_parser.py
+peg_extension/parse.c: $(GRAMMAR) $(TOKENS) pegen/*.py peg_extension/peg_extension.c ../../Parser/pegen.c ../../Parser/pegen_errors.c ../../Parser/string_parser.c ../../Parser/action_helpers.c ../../Parser/*.h pegen/grammar_parser.py
 	$(PYTHON) -m pegen -q c $(GRAMMAR) $(TOKENS) -o peg_extension/parse.c --compile-extension
 
 clean:
diff --git a/Tools/peg_generator/pegen/build.py b/Tools/peg_generator/pegen/build.py
index bf01078ff0b4a..c69e5c9a5f26a 100644
--- a/Tools/peg_generator/pegen/build.py
+++ b/Tools/peg_generator/pegen/build.py
@@ -69,6 +69,8 @@ def compile_c_extension(
                 str(MOD_DIR.parent.parent.parent / "Python" / "asdl.c"),
                 str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer.c"),
                 str(MOD_DIR.parent.parent.parent / "Parser" / "pegen.c"),
+                str(MOD_DIR.parent.parent.parent / "Parser" / "pegen_errors.c"),
+                str(MOD_DIR.parent.parent.parent / "Parser" / "action_helpers.c"),
                 str(MOD_DIR.parent.parent.parent / "Parser" / "string_parser.c"),
                 str(MOD_DIR.parent / "peg_extension" / "peg_extension.c"),
                 generated_source_path,



More information about the Python-checkins mailing list