[Python-checkins] bpo-40939: Clean and adapt the peg_generator directory after deleting the old parser (GH-20822)
Pablo Galindo
webhook-mailer at python.org
Thu Jun 11 20:55:42 EDT 2020
https://github.com/python/cpython/commit/756180b4bfa09bb77394a2b3754d331181d4f28c
commit: 756180b4bfa09bb77394a2b3754d331181d4f28c
branch: master
author: Pablo Galindo <Pablogsal at gmail.com>
committer: GitHub <noreply at github.com>
date: 2020-06-12T01:55:35+01:00
summary:
bpo-40939: Clean and adapt the peg_generator directory after deleting the old parser (GH-20822)
files:
D Tools/peg_generator/scripts/show_parse.py
M Tools/peg_generator/Makefile
M Tools/peg_generator/pegen/keywordgen.py
M Tools/peg_generator/scripts/benchmark.py
M Tools/peg_generator/scripts/find_max_nesting.py
M Tools/peg_generator/scripts/test_parse_directory.py
diff --git a/Tools/peg_generator/Makefile b/Tools/peg_generator/Makefile
index fb727c048b311..6ad9c91b985cb 100644
--- a/Tools/peg_generator/Makefile
+++ b/Tools/peg_generator/Makefile
@@ -22,7 +22,7 @@ data/xxl.py:
build: peg_extension/parse.c
-peg_extension/parse.c: $(GRAMMAR) $(TOKENS) pegen/*.py peg_extension/peg_extension.c ../../Parser/pegen/pegen.c ../../Parser/pegen/parse_string.c ../../Parser/pegen/*.h pegen/grammar_parser.py
+peg_extension/parse.c: $(GRAMMAR) $(TOKENS) pegen/*.py peg_extension/peg_extension.c ../../Parser/pegen.c ../../Parser/string_parser.c ../../Parser/*.h pegen/grammar_parser.py
$(PYTHON) -m pegen -q c $(GRAMMAR) $(TOKENS) -o peg_extension/parse.c --compile-extension
clean:
@@ -70,18 +70,10 @@ stats: peg_extension/parse.c data/xxl.py
time: time_compile
time_compile: venv data/xxl.py
- $(VENVPYTHON) scripts/benchmark.py --parser=new --target=xxl compile
+ $(VENVPYTHON) scripts/benchmark.py --target=xxl compile
time_parse: venv data/xxl.py
- $(VENVPYTHON) scripts/benchmark.py --parser=new --target=xxl parse
-
-time_old: time_old_compile
-
-time_old_compile: venv data/xxl.py
- $(VENVPYTHON) scripts/benchmark.py --parser=old --target=xxl compile
-
-time_old_parse: venv data/xxl.py
- $(VENVPYTHON) scripts/benchmark.py --parser=old --target=xxl parse
+ $(VENVPYTHON) scripts/benchmark.py --target=xxl parse
time_peg_dir: venv
$(VENVPYTHON) scripts/test_parse_directory.py \
diff --git a/Tools/peg_generator/pegen/keywordgen.py b/Tools/peg_generator/pegen/keywordgen.py
index 279c34b6dae5b..8684944096654 100644
--- a/Tools/peg_generator/pegen/keywordgen.py
+++ b/Tools/peg_generator/pegen/keywordgen.py
@@ -41,9 +41,7 @@ def main():
"grammar", type=str, help="The file with the grammar definition in PEG format"
)
parser.add_argument(
- "tokens_file",
- type=argparse.FileType("r"),
- help="The file with the token definitions"
+ "tokens_file", type=argparse.FileType("r"), help="The file with the token definitions"
)
parser.add_argument(
"keyword_file",
@@ -61,9 +59,7 @@ def main():
gen.collect_todo()
with args.keyword_file as thefile:
- all_keywords = sorted(
- list(gen.callmakervisitor.keyword_cache.keys()) + EXTRA_KEYWORDS
- )
+ all_keywords = sorted(list(gen.callmakervisitor.keyword_cache.keys()) + EXTRA_KEYWORDS)
keywords = ",\n ".join(map(repr, all_keywords))
thefile.write(TEMPLATE.format(keywords=keywords))
diff --git a/Tools/peg_generator/scripts/benchmark.py b/Tools/peg_generator/scripts/benchmark.py
index af356bed78391..5fbedaa3b0ed1 100644
--- a/Tools/peg_generator/scripts/benchmark.py
+++ b/Tools/peg_generator/scripts/benchmark.py
@@ -6,13 +6,13 @@
import os
from time import time
-import _peg_parser
-
try:
import memory_profiler
except ModuleNotFoundError:
- print("Please run `make venv` to create a virtual environment and install"
- " all the dependencies, before running this script.")
+ print(
+ "Please run `make venv` to create a virtual environment and install"
+ " all the dependencies, before running this script."
+ )
sys.exit(1)
sys.path.insert(0, os.getcwd())
@@ -21,13 +21,6 @@
argparser = argparse.ArgumentParser(
prog="benchmark", description="Reproduce the various pegen benchmarks"
)
-argparser.add_argument(
- "--parser",
- action="store",
- choices=["new", "old"],
- default="pegen",
- help="Which parser to benchmark (default is pegen)",
-)
argparser.add_argument(
"--target",
action="store",
@@ -40,12 +33,7 @@
command_compile = subcommands.add_parser(
"compile", help="Benchmark parsing and compiling to bytecode"
)
-command_parse = subcommands.add_parser(
- "parse", help="Benchmark parsing and generating an ast.AST"
-)
-command_notree = subcommands.add_parser(
- "notree", help="Benchmark parsing and dumping the tree"
-)
+command_parse = subcommands.add_parser("parse", help="Benchmark parsing and generating an ast.AST")
def benchmark(func):
@@ -66,59 +54,37 @@ def wrapper(*args):
@benchmark
-def time_compile(source, parser):
- if parser == "old":
- return _peg_parser.compile_string(
- source,
- oldparser=True,
- )
- else:
- return _peg_parser.compile_string(source)
-
-
- at benchmark
-def time_parse(source, parser):
- if parser == "old":
- return _peg_parser.parse_string(source, oldparser=True)
- else:
- return _peg_parser.parse_string(source)
+def time_compile(source):
+ return compile(source, "<string>", "exec")
@benchmark
-def time_notree(source, parser):
- if parser == "old":
- return _peg_parser.parse_string(source, oldparser=True, ast=False)
- else:
- return _peg_parser.parse_string(source, ast=False)
+def time_parse(source):
+ return ast.parse(source)
-def run_benchmark_xxl(subcommand, parser, source):
+def run_benchmark_xxl(subcommand, source):
if subcommand == "compile":
- time_compile(source, parser)
+ time_compile(source)
elif subcommand == "parse":
- time_parse(source, parser)
- elif subcommand == "notree":
- time_notree(source, parser)
+ time_parse(source)
-def run_benchmark_stdlib(subcommand, parser):
- modes = {"compile": 2, "parse": 1, "notree": 0}
+def run_benchmark_stdlib(subcommand):
+ modes = {"compile": 2, "parse": 1}
for _ in range(3):
parse_directory(
"../../Lib",
verbose=False,
excluded_files=["*/bad*", "*/lib2to3/tests/data/*",],
- tree_arg=0,
short=True,
mode=modes[subcommand],
- oldparser=(parser == "old"),
)
def main():
args = argparser.parse_args()
subcommand = args.subcommand
- parser = args.parser
target = args.target
if subcommand is None:
@@ -127,9 +93,9 @@ def main():
if target == "xxl":
with open(os.path.join("data", "xxl.py"), "r") as f:
source = f.read()
- run_benchmark_xxl(subcommand, parser, source)
+ run_benchmark_xxl(subcommand, source)
elif target == "stdlib":
- run_benchmark_stdlib(subcommand, parser)
+ run_benchmark_stdlib(subcommand)
if __name__ == "__main__":
diff --git a/Tools/peg_generator/scripts/find_max_nesting.py b/Tools/peg_generator/scripts/find_max_nesting.py
index f2fdd00bfb7cd..92045c93ff76d 100755
--- a/Tools/peg_generator/scripts/find_max_nesting.py
+++ b/Tools/peg_generator/scripts/find_max_nesting.py
@@ -14,8 +14,7 @@
Usage: python -m scripts.find_max_nesting
"""
import sys
-
-from _peg_parser import parse_string
+import ast
GRAMMAR_FILE = "data/python.gram"
INITIAL_NESTING_DEPTH = 10
@@ -28,9 +27,8 @@
def check_nested_expr(nesting_depth: int) -> bool:
expr = f"{'(' * nesting_depth}0{')' * nesting_depth}"
-
try:
- parse_string(expr)
+ ast.parse(expr)
print(f"Nesting depth of {nesting_depth} is successful")
return True
except Exception as err:
diff --git a/Tools/peg_generator/scripts/show_parse.py b/Tools/peg_generator/scripts/show_parse.py
deleted file mode 100755
index b4ee5a1b357f7..0000000000000
--- a/Tools/peg_generator/scripts/show_parse.py
+++ /dev/null
@@ -1,121 +0,0 @@
-#!/usr/bin/env python3.8
-
-"""Show the parse tree for a given program, nicely formatted.
-
-Example:
-
-$ scripts/show_parse.py a+b
-Module(
- body=[
- Expr(
- value=BinOp(
- left=Name(id="a", ctx=Load()), op=Add(), right=Name(id="b", ctx=Load())
- )
- )
- ],
- type_ignores=[],
-)
-$
-
-Use -v to show line numbers and column offsets.
-
-The formatting is done using black. You can also import this module
-and call one of its functions.
-"""
-
-import argparse
-import ast
-import difflib
-import os
-import sys
-import tempfile
-
-import _peg_parser
-
-from typing import List
-
-sys.path.insert(0, os.getcwd())
-from pegen.ast_dump import ast_dump
-
-parser = argparse.ArgumentParser()
-parser.add_argument(
- "-d", "--diff", action="store_true", help="show diff between grammar and ast (requires -g)"
-)
-parser.add_argument(
- "-p",
- "--parser",
- choices=["new", "old"],
- default="new",
- help="choose the parser to use"
-)
-parser.add_argument(
- "-m",
- "--multiline",
- action="store_true",
- help="concatenate program arguments using newline instead of space",
-)
-parser.add_argument("-v", "--verbose", action="store_true", help="show line/column numbers")
-parser.add_argument("program", nargs="+", help="program to parse (will be concatenated)")
-
-
-def format_tree(tree: ast.AST, verbose: bool = False) -> str:
- with tempfile.NamedTemporaryFile("w+") as tf:
- tf.write(ast_dump(tree, include_attributes=verbose))
- tf.write("\n")
- tf.flush()
- cmd = f"black -q {tf.name}"
- sts = os.system(cmd)
- if sts:
- raise RuntimeError(f"Command {cmd!r} failed with status 0x{sts:x}")
- tf.seek(0)
- return tf.read()
-
-
-def diff_trees(a: ast.AST, b: ast.AST, verbose: bool = False) -> List[str]:
- sa = format_tree(a, verbose)
- sb = format_tree(b, verbose)
- la = sa.splitlines()
- lb = sb.splitlines()
- return list(difflib.unified_diff(la, lb, "a", "b", lineterm=""))
-
-
-def show_parse(source: str, verbose: bool = False) -> str:
- tree = _peg_parser.parse_string(source, oldparser=True)
- return format_tree(tree, verbose).rstrip("\n")
-
-
-def print_parse(source: str, verbose: bool = False) -> None:
- print(show_parse(source, verbose))
-
-
-def main() -> None:
- args = parser.parse_args()
- new_parser = args.parser == "new"
- if args.multiline:
- sep = "\n"
- else:
- sep = " "
- program = sep.join(args.program)
- if new_parser:
- tree = _peg_parser.parse_string(program)
-
- if args.diff:
- a = _peg_parser.parse_string(program, oldparser=True)
- b = tree
- diff = diff_trees(a, b, args.verbose)
- if diff:
- for line in diff:
- print(line)
- else:
- print("# Trees are the same")
- else:
- print("# Parsed using the new parser")
- print(format_tree(tree, args.verbose))
- else:
- tree = _peg_parser.parse_string(program, oldparser=True)
- print("# Parsed using the old parser")
- print(format_tree(tree, args.verbose))
-
-
-if __name__ == "__main__":
- main()
diff --git a/Tools/peg_generator/scripts/test_parse_directory.py b/Tools/peg_generator/scripts/test_parse_directory.py
index 63204ce9dc193..d8f4f0ecd3e05 100755
--- a/Tools/peg_generator/scripts/test_parse_directory.py
+++ b/Tools/peg_generator/scripts/test_parse_directory.py
@@ -7,7 +7,6 @@
import time
import traceback
import tokenize
-import _peg_parser
from glob import glob
from pathlib import PurePath
@@ -16,7 +15,6 @@
sys.path.insert(0, os.getcwd())
from pegen.ast_dump import ast_dump
from pegen.testutil import print_memstats
-from scripts import show_parse
SUCCESS = "\033[92m"
FAIL = "\033[91m"
@@ -40,9 +38,6 @@
argparser.add_argument(
"-v", "--verbose", action="store_true", help="Display detailed errors for failures"
)
-argparser.add_argument(
- "-t", "--tree", action="count", help="Compare parse tree to official AST", default=0
-)
def report_status(
@@ -79,66 +74,13 @@ def report_status(
print(f" {str(error.__class__.__name__)}: {error}")
-def compare_trees(
- actual_tree: ast.AST, file: str, verbose: bool, include_attributes: bool = False,
-) -> int:
- with open(file) as f:
- expected_tree = _peg_parser.parse_string(f.read(), oldparser=True)
-
- expected_text = ast_dump(expected_tree, include_attributes=include_attributes)
- actual_text = ast_dump(actual_tree, include_attributes=include_attributes)
- if actual_text == expected_text:
- if verbose:
- print("Tree for {file}:")
- print(show_parse.format_tree(actual_tree, include_attributes))
- return 0
-
- print(f"Diffing ASTs for {file} ...")
-
- expected = show_parse.format_tree(expected_tree, include_attributes)
- actual = show_parse.format_tree(actual_tree, include_attributes)
-
- if verbose:
- print("Expected for {file}:")
- print(expected)
- print("Actual for {file}:")
- print(actual)
- print(f"Diff for {file}:")
-
- diff = show_parse.diff_trees(expected_tree, actual_tree, include_attributes)
- for line in diff:
- print(line)
-
- return 1
-
-
-def parse_file(source: str, file: str, mode: int, oldparser: bool) -> Tuple[Any, float]:
+def parse_file(source: str, file: str) -> Tuple[Any, float]:
t0 = time.time()
- if mode == COMPILE:
- result = _peg_parser.compile_string(
- source,
- filename=file,
- oldparser=oldparser,
- )
- else:
- result = _peg_parser.parse_string(
- source,
- filename=file,
- oldparser=oldparser,
- ast=(mode == PARSE),
- )
+ result = ast.parse(source, filename=file)
t1 = time.time()
return result, t1 - t0
-def is_parsing_failure(source: str) -> bool:
- try:
- _peg_parser.parse_string(source, mode="exec", oldparser=True)
- except SyntaxError:
- return False
- return True
-
-
def generate_time_stats(files, total_seconds) -> None:
total_files = len(files)
total_bytes = 0
@@ -160,27 +102,11 @@ def generate_time_stats(files, total_seconds) -> None:
)
-def parse_directory(
- directory: str,
- verbose: bool,
- excluded_files: List[str],
- tree_arg: int,
- short: bool,
- mode: int,
- oldparser: bool,
-) -> int:
- if tree_arg:
- assert mode == PARSE, "Mode should be 1 (parse), when comparing the generated trees"
-
- if oldparser and tree_arg:
- print("Cannot specify tree argument with the cpython parser.", file=sys.stderr)
- return 1
-
+def parse_directory(directory: str, verbose: bool, excluded_files: List[str], short: bool) -> int:
# For a given directory, traverse files and attempt to parse each one
# - Output success/failure for each file
errors = 0
files = []
- trees = {} # Trees to compare (after everything else is done)
total_seconds = 0
for file in sorted(glob(f"{directory}/**/*.py", recursive=True)):
@@ -192,39 +118,20 @@ def parse_directory(
source = f.read()
try:
- result, dt = parse_file(source, file, mode, oldparser)
+ result, dt = parse_file(source, file)
total_seconds += dt
- if tree_arg:
- trees[file] = result
report_status(succeeded=True, file=file, verbose=verbose, short=short)
except SyntaxError as error:
- if is_parsing_failure(source):
- print(f"File {file} cannot be parsed by either parser.")
- else:
- report_status(
- succeeded=False, file=file, verbose=verbose, error=error, short=short
- )
- errors += 1
+ report_status(succeeded=False, file=file, verbose=verbose, error=error, short=short)
+ errors += 1
files.append(file)
- t1 = time.time()
-
generate_time_stats(files, total_seconds)
if short:
print_memstats()
if errors:
print(f"Encountered {errors} failures.", file=sys.stderr)
-
- # Compare trees (the dict is empty unless -t is given)
- compare_trees_errors = 0
- for file, tree in trees.items():
- if not short:
- print("Comparing ASTs for", file)
- if compare_trees(tree, file, verbose, tree_arg >= 2) == 1:
- compare_trees_errors += 1
-
- if errors or compare_trees_errors:
return 1
return 0
@@ -235,20 +142,8 @@ def main() -> None:
directory = args.directory
verbose = args.verbose
excluded_files = args.exclude
- tree = args.tree
short = args.short
- mode = 1 if args.tree else 2
- sys.exit(
- parse_directory(
- directory,
- verbose,
- excluded_files,
- tree,
- short,
- mode,
- oldparser=False,
- )
- )
+ sys.exit(parse_directory(directory, verbose, excluded_files, short))
if __name__ == "__main__":
More information about the Python-checkins
mailing list