[Python-checkins] bpo-36876: [c-analyzer tool] Add a "capi" subcommand to the c-analyzer tool. (gh-23918)

ericsnowcurrently webhook-mailer at python.org
Thu Dec 24 13:04:45 EST 2020


https://github.com/python/cpython/commit/7ec59d8861ef1104c3028678b2cacde4c5693e19
commit: 7ec59d8861ef1104c3028678b2cacde4c5693e19
branch: master
author: Eric Snow <ericsnowcurrently at gmail.com>
committer: ericsnowcurrently <ericsnowcurrently at gmail.com>
date: 2020-12-24T11:04:19-07:00
summary:

bpo-36876: [c-analyzer tool] Add a "capi" subcommand to the c-analyzer tool. (gh-23918)

This will help identify which C-API items will need to be updated for subinterpreter support.

https://bugs.python.org/issue36876

files:
A Tools/c-analyzer/cpython/_capi.py
A Tools/c-analyzer/cpython/_files.py
M Tools/c-analyzer/c_analyzer/__main__.py
M Tools/c-analyzer/c_common/scriptutil.py
M Tools/c-analyzer/c_common/tables.py
M Tools/c-analyzer/c_parser/__main__.py
M Tools/c-analyzer/c_parser/preprocessor/__main__.py
M Tools/c-analyzer/check-c-globals.py
M Tools/c-analyzer/cpython/__main__.py
M Tools/c-analyzer/cpython/_parser.py

diff --git a/Tools/c-analyzer/c_analyzer/__main__.py b/Tools/c-analyzer/c_analyzer/__main__.py
index 44325f2952e28..24fc6cd182656 100644
--- a/Tools/c-analyzer/c_analyzer/__main__.py
+++ b/Tools/c-analyzer/c_analyzer/__main__.py
@@ -263,7 +263,7 @@ def fmt_full(analysis):
 def add_output_cli(parser, *, default='summary'):
     parser.add_argument('--format', dest='fmt', default=default, choices=tuple(FORMATS))
 
-    def process_args(args):
+    def process_args(args, *, argv=None):
         pass
     return process_args
 
@@ -280,7 +280,7 @@ def _cli_check(parser, checks=None, **kwargs):
         process_checks = add_checks_cli(parser)
     elif len(checks) == 1 and type(checks) is not dict and re.match(r'^<.*>$', checks[0]):
         check = checks[0][1:-1]
-        def process_checks(args):
+        def process_checks(args, *, argv=None):
             args.checks = [check]
     else:
         process_checks = add_checks_cli(parser, checks=checks)
@@ -428,9 +428,9 @@ def _cli_data(parser, filenames=None, known=None):
     if known is None:
         sub.add_argument('--known', required=True)
 
-    def process_args(args):
+    def process_args(args, *, argv):
         if args.datacmd == 'dump':
-            process_progress(args)
+            process_progress(args, argv)
     return process_args
 
 
@@ -515,6 +515,7 @@ def parse_args(argv=sys.argv[1:], prog=sys.argv[0], *, subset=None):
 
     verbosity, traceback_cm = process_args_by_key(
         args,
+        argv,
         processors[cmd],
         ['verbosity', 'traceback_cm'],
     )
diff --git a/Tools/c-analyzer/c_common/scriptutil.py b/Tools/c-analyzer/c_common/scriptutil.py
index 50dd754886919..ce69af2b6bdee 100644
--- a/Tools/c-analyzer/c_common/scriptutil.py
+++ b/Tools/c-analyzer/c_common/scriptutil.py
@@ -192,7 +192,7 @@ def add_verbosity_cli(parser):
     parser.add_argument('-q', '--quiet', action='count', default=0)
     parser.add_argument('-v', '--verbose', action='count', default=0)
 
-    def process_args(args):
+    def process_args(args, *, argv=None):
         ns = vars(args)
         key = 'verbosity'
         if key in ns:
@@ -208,7 +208,7 @@ def add_traceback_cli(parser):
     parser.add_argument('--no-traceback', '--no-tb', dest='traceback',
                         action='store_const', const=False)
 
-    def process_args(args):
+    def process_args(args, *, argv=None):
         ns = vars(args)
         key = 'traceback_cm'
         if key in ns:
@@ -262,7 +262,7 @@ def add_sepval_cli(parser, opt, dest, choices, *, sep=',', **kwargs):
         #kwargs.setdefault('metavar', opt.upper())
         parser.add_argument(opt, dest=dest, action='append', **kwargs)
 
-    def process_args(args):
+    def process_args(args, *, argv=None):
         ns = vars(args)
 
         # XXX Use normalize_selection()?
@@ -293,7 +293,7 @@ def add_file_filtering_cli(parser, *, excluded=None):
 
     excluded = tuple(excluded or ())
 
-    def process_args(args):
+    def process_args(args, *, argv=None):
         ns = vars(args)
         key = 'iter_filenames'
         if key in ns:
@@ -323,7 +323,7 @@ def add_progress_cli(parser, *, threshold=VERBOSITY, **kwargs):
     parser.add_argument('--no-progress', dest='track_progress', action='store_false')
     parser.set_defaults(track_progress=True)
 
-    def process_args(args):
+    def process_args(args, *, argv=None):
         if args.track_progress:
             ns = vars(args)
             verbosity = ns.get('verbosity', VERBOSITY)
@@ -339,7 +339,7 @@ def add_failure_filtering_cli(parser, pool, *, default=False):
                         metavar=f'"{{all|{"|".join(sorted(pool))}}},..."')
     parser.add_argument('--no-fail', dest='fail', action='store_const', const=())
 
-    def process_args(args):
+    def process_args(args, *, argv=None):
         ns = vars(args)
 
         fail = ns.pop('fail')
@@ -371,7 +371,7 @@ def ignore_exc(exc):
 def add_kind_filtering_cli(parser, *, default=None):
     parser.add_argument('--kinds', action='append')
 
-    def process_args(args):
+    def process_args(args, *, argv=None):
         ns = vars(args)
 
         kinds = []
@@ -486,18 +486,18 @@ def _flatten_processors(processors):
             yield from _flatten_processors(proc)
 
 
-def process_args(args, processors, *, keys=None):
+def process_args(args, argv, processors, *, keys=None):
     processors = _flatten_processors(processors)
     ns = vars(args)
     extracted = {}
     if keys is None:
         for process_args in processors:
-            for key in process_args(args):
+            for key in process_args(args, argv=argv):
                 extracted[key] = ns.pop(key)
     else:
         remainder = set(keys)
         for process_args in processors:
-            hanging = process_args(args)
+            hanging = process_args(args, argv=argv)
             if isinstance(hanging, str):
                 hanging = [hanging]
             for key in hanging or ():
@@ -510,8 +510,8 @@ def process_args(args, processors, *, keys=None):
     return extracted
 
 
-def process_args_by_key(args, processors, keys):
-    extracted = process_args(args, processors, keys=keys)
+def process_args_by_key(args, argv, processors, keys):
+    extracted = process_args(args, argv, processors, keys=keys)
     return [extracted[key] for key in keys]
 
 
diff --git a/Tools/c-analyzer/c_common/tables.py b/Tools/c-analyzer/c_common/tables.py
index 411152e3f9498..85b501925715d 100644
--- a/Tools/c-analyzer/c_common/tables.py
+++ b/Tools/c-analyzer/c_common/tables.py
@@ -1,4 +1,6 @@
 import csv
+import re
+import textwrap
 
 from . import NOT_SET, strutil, fsutil
 
@@ -212,3 +214,177 @@ def _normalize_table_file_props(header, sep):
         else:
             sep = None
     return header, sep
+
+
+##################################
+# stdout tables
+
+WIDTH = 20
+
+
+def resolve_columns(specs):
+    if isinstance(specs, str):
+        specs = specs.replace(',', ' ').strip().split()
+    return _resolve_colspecs(specs)
+
+
+def build_table(specs, *, sep=' ', defaultwidth=None):
+    columns = resolve_columns(specs)
+    return _build_table(columns, sep=sep, defaultwidth=defaultwidth)
+
+
+_COLSPEC_RE = re.compile(textwrap.dedent(r'''
+    ^
+    (?:
+        [[]
+        (
+            (?: [^\s\]] [^\]]* )?
+            [^\s\]]
+        )  # <label>
+        []]
+    )?
+    ( \w+ )  # <field>
+    (?:
+        (?:
+            :
+            ( [<^>] )  # <align>
+            ( \d+ )  # <width1>
+        )
+        |
+        (?:
+            (?:
+                :
+                ( \d+ )  # <width2>
+            )?
+            (?:
+                :
+                ( .*? )  # <fmt>
+            )?
+        )
+    )?
+    $
+'''), re.VERBOSE)
+
+
+def _parse_fmt(fmt):
+    if fmt.startswith(tuple('<^>')):
+        align = fmt[0]
+        width = fmt[1:]
+        if width.isdigit():
+            return int(width), align
+    return None, None
+
+
+def _parse_colspec(raw):
+    m = _COLSPEC_RE.match(raw)
+    if not m:
+        return None
+    label, field, align, width1, width2, fmt = m.groups()
+    if not label:
+        label = field
+    if width1:
+        width = None
+        fmt = f'{align}{width1}'
+    elif width2:
+        width = int(width2)
+        if fmt:
+            _width, _ = _parse_fmt(fmt)
+            if _width == width:
+                width = None
+    else:
+        width = None
+    return field, label, width, fmt
+
+
+def _normalize_colspec(spec):
+    if len(spec) == 1:
+        raw, = spec
+        return _resolve_column(raw)
+
+    if len(spec) == 4:
+        label, field, width, fmt = spec
+        if width:
+            fmt = f'{width}:{fmt}' if fmt else width
+    elif len(raw) == 3:
+        label, field, fmt = spec
+        if not field:
+            label, field = None, label
+        elif not isinstance(field, str) or not field.isidentifier():
+            fmt = f'{field}:{fmt}' if fmt else field
+            label, field = None, label
+    elif len(raw) == 2:
+        label = None
+        field, fmt = raw
+        if not field:
+            field, fmt = fmt, None
+        elif not field.isidentifier() or fmt.isidentifier():
+            label, field = field, fmt
+    else:
+        raise NotImplementedError
+
+    fmt = f':{fmt}' if fmt else ''
+    if label:
+        return _parse_colspec(f'[{label}]{field}{fmt}')
+    else:
+        return _parse_colspec(f'{field}{fmt}')
+
+
+def _resolve_colspec(raw):
+    if isinstance(raw, str):
+        spec = _parse_colspec(raw)
+    else:
+        spec = _normalize_colspec(raw)
+    if spec is None:
+        raise ValueError(f'unsupported column spec {raw!r}')
+    return spec
+
+
+def _resolve_colspecs(columns):
+    parsed = []
+    for raw in columns:
+        column = _resolve_colspec(raw)
+        parsed.append(column)
+    return parsed
+
+
+def _resolve_width(spec, defaultwidth):
+    _, label, width, fmt = spec
+    if width:
+        if not isinstance(width, int):
+            raise NotImplementedError
+        return width
+    elif width and fmt:
+        width, _ = _parse_fmt(fmt)
+        if width:
+            return width
+
+    if not defaultwidth:
+        return WIDTH
+    elif not hasattr(defaultwidth, 'get'):
+        return defaultwidth or WIDTH
+
+    defaultwidths = defaultwidth
+    defaultwidth = defaultwidths.get(None) or WIDTH
+    return defaultwidths.get(label) or defaultwidth
+
+
+def _build_table(columns, *, sep=' ', defaultwidth=None):
+    header = []
+    div = []
+    rowfmt = []
+    for spec in columns:
+        label, field, _, colfmt = spec
+        width = _resolve_width(spec, defaultwidth)
+        if colfmt:
+            colfmt = f':{colfmt}'
+        else:
+            colfmt = f':{width}'
+
+        header.append(f' {{:^{width}}} '.format(label))
+        div.append('-' * (width + 2))
+        rowfmt.append(f' {{{field}{colfmt}}} ')
+    return (
+        sep.join(header),
+        sep.join(div),
+        sep.join(rowfmt),
+    )
diff --git a/Tools/c-analyzer/c_parser/__main__.py b/Tools/c-analyzer/c_parser/__main__.py
index 539cec509cecb..78f47a1808f50 100644
--- a/Tools/c-analyzer/c_parser/__main__.py
+++ b/Tools/c-analyzer/c_parser/__main__.py
@@ -149,7 +149,7 @@ def add_output_cli(parser):
     parser.add_argument('--showfwd', action='store_true', default=None)
     parser.add_argument('--no-showfwd', dest='showfwd', action='store_false', default=None)
 
-    def process_args(args):
+    def process_args(args, *, argv=None):
         pass
     return process_args
 
@@ -243,6 +243,7 @@ def parse_args(argv=sys.argv[1:], prog=sys.argv[0], *, subset='parse'):
 
     verbosity, traceback_cm = process_args_by_key(
         args,
+        argv,
         processors[cmd],
         ['verbosity', 'traceback_cm'],
     )
diff --git a/Tools/c-analyzer/c_parser/preprocessor/__main__.py b/Tools/c-analyzer/c_parser/preprocessor/__main__.py
index a6054307c2575..bfc61949a76e4 100644
--- a/Tools/c-analyzer/c_parser/preprocessor/__main__.py
+++ b/Tools/c-analyzer/c_parser/preprocessor/__main__.py
@@ -40,10 +40,10 @@ def add_common_cli(parser, *, get_preprocessor=_get_preprocessor):
     parser.add_argument('--same', action='append')
     process_fail_arg = add_failure_filtering_cli(parser, FAIL)
 
-    def process_args(args):
+    def process_args(args, *, argv):
         ns = vars(args)
 
-        process_fail_arg(args)
+        process_fail_arg(args, argv)
         ignore_exc = ns.pop('ignore_exc')
         # We later pass ignore_exc to _get_preprocessor().
 
@@ -174,6 +174,7 @@ def parse_args(argv=sys.argv[1:], prog=sys.argv[0], *,
 
     verbosity, traceback_cm = process_args_by_key(
         args,
+        argv,
         processors[cmd],
         ['verbosity', 'traceback_cm'],
     )
diff --git a/Tools/c-analyzer/check-c-globals.py b/Tools/c-analyzer/check-c-globals.py
index 3fe2bdcae1460..b1364a612bb7d 100644
--- a/Tools/c-analyzer/check-c-globals.py
+++ b/Tools/c-analyzer/check-c-globals.py
@@ -22,6 +22,7 @@ def parse_args():
     cmd = 'check'
     verbosity, traceback_cm = process_args_by_key(
         args,
+        argv,
         processors,
         ['verbosity', 'traceback_cm'],
     )
diff --git a/Tools/c-analyzer/cpython/__main__.py b/Tools/c-analyzer/cpython/__main__.py
index 6d78af299bb6f..9d29b13ed8f9c 100644
--- a/Tools/c-analyzer/cpython/__main__.py
+++ b/Tools/c-analyzer/cpython/__main__.py
@@ -3,11 +3,14 @@
 
 from c_common.fsutil import expand_filenames, iter_files_by_suffix
 from c_common.scriptutil import (
+    VERBOSITY,
     add_verbosity_cli,
     add_traceback_cli,
     add_commands_cli,
     add_kind_filtering_cli,
     add_files_cli,
+    add_progress_cli,
+    main_for_filenames,
     process_args_by_key,
     configure_logger,
     get_prog,
@@ -17,7 +20,7 @@
 import c_analyzer.__main__ as c_analyzer
 import c_analyzer as _c_analyzer
 from c_analyzer.info import UNKNOWN
-from . import _analyzer, _parser, REPO_ROOT
+from . import _analyzer, _capi, _files, _parser, REPO_ROOT
 
 
 logger = logging.getLogger(__name__)
@@ -25,9 +28,9 @@
 
 def _resolve_filenames(filenames):
     if filenames:
-        resolved = (_parser.resolve_filename(f) for f in filenames)
+        resolved = (_files.resolve_filename(f) for f in filenames)
     else:
-        resolved = _parser.iter_filenames()
+        resolved = _files.iter_filenames()
     return resolved
 
 
@@ -204,6 +207,95 @@ def analyze(files, **kwargs):
     )
 
 
+def _cli_capi(parser):
+    parser.add_argument('--levels', action='append', metavar='LEVEL[,...]')
+    parser.add_argument(f'--public', dest='levels',
+                        action='append_const', const='public')
+    parser.add_argument(f'--no-public', dest='levels',
+                        action='append_const', const='no-public')
+    for level in _capi.LEVELS:
+        parser.add_argument(f'--{level}', dest='levels',
+                            action='append_const', const=level)
+    def process_levels(args, *, argv=None):
+        levels = []
+        for raw in args.levels or ():
+            for level in raw.replace(',', ' ').strip().split():
+                if level == 'public':
+                    levels.append('stable')
+                    levels.append('cpython')
+                elif level == 'no-public':
+                    levels.append('private')
+                    levels.append('internal')
+                elif level in _capi.LEVELS:
+                    levels.append(level)
+                else:
+                    parser.error(f'expected LEVEL to be one of {sorted(_capi.LEVELS)}, got {level!r}')
+        args.levels = set(levels)
+
+    parser.add_argument('--kinds', action='append', metavar='KIND[,...]')
+    for kind in _capi.KINDS:
+        parser.add_argument(f'--{kind}', dest='kinds',
+                            action='append_const', const=kind)
+    def process_kinds(args, *, argv=None):
+        kinds = []
+        for raw in args.kinds or ():
+            for kind in raw.replace(',', ' ').strip().split():
+                if kind in _capi.KINDS:
+                    kind.append(kind)
+                else:
+                    parser.error(f'expected KIND to be one of {sorted(_capi.KINDS)}, got {kind!r}')
+        args.kinds = set(kinds)
+
+    parser.add_argument('--group-by', dest='groupby',
+                        choices=['level', 'kind'])
+
+    parser.add_argument('--format', default='brief')
+    parser.add_argument('--summary', dest='format',
+                        action='store_const', const='summary')
+    def process_format(args, *, argv=None):
+        orig = args.format
+        args.format = _capi.resolve_format(args.format)
+        if isinstance(args.format, str):
+            if args.format not in _capi._FORMATS:
+                parser.error(f'unsupported format {orig!r}')
+
+    parser.add_argument('filenames', nargs='*', metavar='FILENAME')
+    process_progress = add_progress_cli(parser)
+
+    return [
+        process_levels,
+        process_format,
+        process_progress,
+    ]
+
+
+def cmd_capi(filenames=None, *,
+             levels=None,
+             kinds=None,
+             groupby='kind',
+             format='brief',
+             track_progress=None,
+             verbosity=VERBOSITY,
+             **kwargs
+             ):
+    render = _capi.get_renderer(format)
+
+    filenames = _files.iter_header_files(filenames, levels=levels)
+    #filenames = (file for file, _ in main_for_filenames(filenames))
+    if track_progress is not None:
+        filenames = track_progress(filenames)
+    items = _capi.iter_capi(filenames)
+    if levels:
+        items = (item for item in items if item.level in levels)
+    if kinds:
+        items = (item for item in items if item.kind in kinds)
+
+    lines = render(items, groupby=groupby, verbose=verbosity > VERBOSITY)
+    print()
+    for line in lines:
+        print(line)
+
+
 # We do not define any other cmd_*() handlers here,
 # favoring those defined elsewhere.
 
@@ -228,6 +320,11 @@ def analyze(files, **kwargs):
         [_cli_data],
         cmd_data,
     ),
+    'capi': (
+        'inspect the C-API',
+        [_cli_capi],
+        cmd_capi,
+    ),
 }
 
 
@@ -263,6 +360,7 @@ def parse_args(argv=sys.argv[1:], prog=None, *, subset=None):
 
     verbosity, traceback_cm = process_args_by_key(
         args,
+        argv,
         processors[cmd],
         ['verbosity', 'traceback_cm'],
     )
diff --git a/Tools/c-analyzer/cpython/_capi.py b/Tools/c-analyzer/cpython/_capi.py
new file mode 100644
index 0000000000000..38d7cd3c51465
--- /dev/null
+++ b/Tools/c-analyzer/cpython/_capi.py
@@ -0,0 +1,479 @@
+from collections import namedtuple
+import os
+import os.path
+import re
+import textwrap
+
+from c_common.tables import build_table, resolve_columns
+from c_parser.parser._regexes import _ind
+from ._files import iter_header_files, resolve_filename
+from . import REPO_ROOT
+
+
+INCLUDE_ROOT = os.path.join(REPO_ROOT, 'Include')
+INCLUDE_CPYTHON = os.path.join(INCLUDE_ROOT, 'cpython')
+INCLUDE_INTERNAL = os.path.join(INCLUDE_ROOT, 'internal')
+
+_MAYBE_NESTED_PARENS = textwrap.dedent(r'''
+    (?:
+        (?: [^(]* [(] [^()]* [)] )* [^(]*
+    )
+''')
+
+CAPI_FUNC = textwrap.dedent(rf'''
+    (?:
+        ^
+        \s*
+        PyAPI_FUNC \s*
+        [(]
+        {_ind(_MAYBE_NESTED_PARENS, 2)}
+        [)] \s*
+        (\w+)  # <func>
+        \s* [(]
+    )
+''')
+CAPI_DATA = textwrap.dedent(rf'''
+    (?:
+        ^
+        \s*
+        PyAPI_DATA \s*
+        [(]
+        {_ind(_MAYBE_NESTED_PARENS, 2)}
+        [)] \s*
+        (\w+)  # <data>
+        \b [^(]
+    )
+''')
+CAPI_INLINE = textwrap.dedent(r'''
+    (?:
+        ^
+        \s*
+        static \s+ inline \s+
+        .*?
+        \s+
+        ( \w+ )  # <inline>
+        \s* [(]
+    )
+''')
+CAPI_MACRO = textwrap.dedent(r'''
+    (?:
+        (\w+)  # <macro>
+        [(]
+    )
+''')
+CAPI_CONSTANT = textwrap.dedent(r'''
+    (?:
+        (\w+)  # <constant>
+        \s+ [^(]
+    )
+''')
+CAPI_DEFINE = textwrap.dedent(rf'''
+    (?:
+        ^
+        \s* [#] \s* define \s+
+        (?:
+            {_ind(CAPI_MACRO, 3)}
+            |
+            {_ind(CAPI_CONSTANT, 3)}
+            |
+            (?:
+                # ignored
+                \w+   # <defined_name>
+                \s*
+                $
+            )
+        )
+    )
+''')
+CAPI_RE = re.compile(textwrap.dedent(rf'''
+    (?:
+        {_ind(CAPI_FUNC, 2)}
+        |
+        {_ind(CAPI_DATA, 2)}
+        |
+        {_ind(CAPI_INLINE, 2)}
+        |
+        {_ind(CAPI_DEFINE, 2)}
+    )
+'''), re.VERBOSE)
+
+KINDS = [
+    'func',
+    'data',
+    'inline',
+    'macro',
+    'constant',
+]
+
+
+def _parse_line(line, prev=None):
+    last = line
+    if prev:
+        if not prev.endswith(os.linesep):
+            prev += os.linesep
+        line = prev + line
+    m = CAPI_RE.match(line)
+    if not m:
+        if not prev and line.startswith('static inline '):
+            return line  # the new "prev"
+        #if 'PyAPI_' in line or '#define ' in line or ' define ' in line:
+        #    print(line)
+        return None
+    results = zip(KINDS, m.groups())
+    for kind, name in results:
+        if name:
+            clean = last.split('//')[0].strip()
+            if clean.endswith('*/'):
+                clean = clean.split('/*')[0].rstrip()
+            if kind == 'macro' or kind == 'constant':
+                if clean.endswith('\\'):
+                    return line  # the new "prev"
+            elif kind == 'inline':
+                if not prev:
+                    if not clean.endswith('}'):
+                        return line  # the new "prev"
+                elif clean != '}':
+                    return line  # the new "prev"
+            elif not clean.endswith(';'):
+                return line  # the new "prev"
+            return name, kind
+    # It was a plain #define.
+    return None
+
+
+LEVELS = {
+    'stable',
+    'cpython',
+    'private',
+    'internal',
+}
+
+def _get_level(filename, name, *,
+               _cpython=INCLUDE_CPYTHON + os.path.sep,
+               _internal=INCLUDE_INTERNAL + os.path.sep,
+               ):
+    if filename.startswith(_internal):
+        return 'internal'
+    elif name.startswith('_'):
+        return 'private'
+    elif os.path.dirname(filename) == INCLUDE_ROOT:
+        return 'stable'
+    elif filename.startswith(_cpython):
+        return 'cpython'
+    else:
+        raise NotImplementedError
+    #return '???'
+
+
+class CAPIItem(namedtuple('CAPIItem', 'file lno name kind level')):
+
+    @classmethod
+    def from_line(cls, line, filename, lno, prev=None):
+        parsed = _parse_line(line, prev)
+        if not parsed:
+            return None, None
+        if isinstance(parsed, str):
+            # incomplete
+            return None, parsed
+        name, kind = parsed
+        level = _get_level(filename, name)
+        self = cls(filename, lno, name, kind, level)
+        if prev:
+            self._text = (prev + line).rstrip().splitlines()
+        else:
+            self._text = [line.rstrip()]
+        return self, None
+
+    @property
+    def relfile(self):
+        return self.file[len(REPO_ROOT) + 1:]
+
+    @property
+    def text(self):
+        try:
+            return self._text
+        except AttributeError:
+            # XXX Actually ready the text from disk?.
+            self._text = []
+            if self.kind == 'data':
+                self._text = [
+                    f'PyAPI_DATA(...) {self.name}',
+                ]
+            elif self.kind == 'func':
+                self._text = [
+                    f'PyAPI_FUNC(...) {self.name}(...);',
+                ]
+            elif self.kind == 'inline':
+                self._text = [
+                    f'static inline {self.name}(...);',
+                ]
+            elif self.kind == 'macro':
+                self._text = [
+                    f'#define {self.name}(...) \\',
+                    f'    ...',
+                ]
+            elif self.kind == 'constant':
+                self._text = [
+                    f'#define {self.name} ...',
+                ]
+            else:
+                raise NotImplementedError
+
+            return self._text
+
+
+def _parse_groupby(raw):
+    if not raw:
+        raw = 'kind'
+
+    if isinstance(raw, str):
+        groupby = raw.replace(',', ' ').strip().split()
+    else:
+        raise NotImplementedError
+
+    if not all(v in ('kind', 'level') for v in groupby):
+        raise ValueError(f'invalid groupby value {raw!r}')
+    return groupby
+
+
+def summarize(items, *, groupby='kind'):
+    summary = {}
+
+    groupby = _parse_groupby(groupby)[0]
+    if groupby == 'kind':
+        outers = KINDS
+        inners = LEVELS
+        def increment(item):
+            summary[item.kind][item.level] += 1
+    elif groupby == 'level':
+        outers = LEVELS
+        inners = KINDS
+        def increment(item):
+            summary[item.level][item.kind] += 1
+    else:
+        raise NotImplementedError
+
+    for outer in outers:
+        summary[outer] = _outer = {}
+        for inner in inners:
+            _outer[inner] = 0
+    for item in items:
+        increment(item)
+
+    return summary
+
+
+def _parse_capi(lines, filename):
+    if isinstance(lines, str):
+        lines = lines.splitlines()
+    prev = None
+    for lno, line in enumerate(lines, 1):
+        parsed, prev = CAPIItem.from_line(line, filename, lno, prev)
+        if parsed:
+            yield parsed
+    if prev:
+        parsed, prev = CAPIItem.from_line('', filename, lno, prev)
+        if parsed:
+            yield parsed
+        if prev:
+            print('incomplete match:')
+            print(filename)
+            print(prev)
+            raise Exception
+
+
+def iter_capi(filenames=None):
+    for filename in iter_header_files(filenames):
+        with open(filename) as infile:
+            for item in _parse_capi(infile, filename):
+                yield item
+
+
+def _collate(items, groupby):
+    groupby = _parse_groupby(groupby)[0]
+    maxfilename = maxname = maxkind = maxlevel = 0
+    collated = {}
+    for item in items:
+        key = getattr(item, groupby)
+        if key in collated:
+            collated[key].append(item)
+        else:
+            collated[key] = [item]
+        maxfilename = max(len(item.relfile), maxfilename)
+        maxname = max(len(item.name), maxname)
+        maxkind = max(len(item.kind), maxkind)
+        maxlevel = max(len(item.level), maxlevel)
+    maxextra = {
+        'kind': maxkind,
+        'level': maxlevel,
+    }
+    return collated, groupby, maxfilename, maxname, maxextra
+
+
+##################################
+# CLI rendering
+
+_LEVEL_MARKERS = {
+    'S': 'stable',
+    'C': 'cpython',
+    'P': 'private',
+    'I': 'internal',
+}
+_KIND_MARKERS = {
+    'F': 'func',
+    'D': 'data',
+    'I': 'inline',
+    'M': 'macro',
+    'C': 'constant',
+}
+
+
+def resolve_format(format):
+    if not format:
+        return 'brief'
+    elif isinstance(format, str) and format in _FORMATS:
+        return format
+    else:
+        return resolve_columns(format)
+
+
+def get_renderer(format):
+    format = resolve_format(format)
+    if isinstance(format, str):
+        try:
+            return _FORMATS[format]
+        except KeyError:
+            raise ValueError(f'unsupported format {format!r}')
+    else:
+        def render(items, **kwargs):
+            return render_table(items, columns=format, **kwargs)
+        return render
+
+
+def render_table(items, *, columns=None, groupby='kind', verbose=False):
+    if groupby:
+        collated, groupby, maxfilename, maxname, maxextra = _collate(items, groupby)
+        if groupby == 'kind':
+            groups = KINDS
+            extras = ['level']
+            markers = {'level': _LEVEL_MARKERS}
+        elif groupby == 'level':
+            groups = LEVELS
+            extras = ['kind']
+            markers = {'kind': _KIND_MARKERS}
+        else:
+            raise NotImplementedError
+    else:
+        # XXX Support no grouping?
+        raise NotImplementedError
+
+    if columns:
+        def get_extra(item):
+            return {extra: getattr(item, extra)
+                    for extra in ('kind', 'level')}
+    else:
+        if verbose:
+            maxextra['kind'] = max(len(kind) for kind in KINDS)
+            maxextra['level'] = max(len(level) for level in LEVELS)
+            extracols = [f'{extra}:{maxextra[extra]}'
+                         for extra in extras]
+            def get_extra(item):
+                return {extra: getattr(item, extra)
+                        for extra in extras}
+        elif len(extras) == 1:
+            extra, = extras
+            extracols = [f'{m}:1' for m in markers[extra]]
+            def get_extra(item):
+                return {m: m if getattr(item, extra) == markers[extra][m] else ''
+                        for m in markers[extra]}
+        else:
+            raise NotImplementedError
+            #extracols = [[f'{m}:1' for m in markers[extra]]
+            #             for extra in extras]
+            #def get_extra(item):
+            #    values = {}
+            #    for extra in extras:
+            #        cur = markers[extra]
+            #        for m in cur:
+            #            values[m] = m if getattr(item, m) == cur[m] else ''
+            #    return values
+        columns = [
+            f'filename:{maxfilename}',
+            f'name:{maxname}',
+            *extracols,
+        ]
+    header, div, fmt = build_table(columns)
+
+    total = 0
+    for group in groups:
+        if group not in collated:
+            continue
+        yield ''
+        yield f' === {group} ==='
+        yield ''
+        yield header
+        yield div
+        for item in collated[group]:
+            yield fmt.format(
+                filename=item.relfile,
+                name=item.name,
+                **get_extra(item),
+            )
+        yield div
+        subtotal = len(collated[group])
+        yield f'  sub-total: {subtotal}'
+        total += subtotal
+    yield ''
+    yield f'total: {total}'
+
+
+def render_full(items, *, groupby=None, verbose=False):
+    if groupby:
+        collated, groupby, _, _, _ = _collate(items, groupby)
+        for group, grouped in collated.items():
+            yield '#' * 25
+            yield f'# {group} ({len(grouped)})'
+            yield '#' * 25
+            yield ''
+            if not grouped:
+                continue
+            for item in grouped:
+                yield from _render_item_full(item, groupby, verbose)
+                yield ''
+    else:
+        for item in items:
+            yield from _render_item_full(item, None, verbose)
+            yield ''
+
+
+def _render_item_full(item, groupby, verbose):
+    yield item.name
+    yield f'  {"filename:":10} {item.relfile}'
+    for extra in ('kind', 'level'):
+        #if groupby != extra:
+            yield f'  {extra+":":10} {getattr(item, extra)}'
+    if verbose:
+        print('  ---------------------------------------')
+        for lno, line in enumerate(item.text, item.lno):
+            print(f'  | {lno:3} {line}')
+        print('  ---------------------------------------')
+
+
+def render_summary(items, *, groupby='kind', verbose=False):
+    total = 0
+    summary = summarize(items, groupby=groupby)
+    # XXX Stablize the sorting to match KINDS/LEVELS.
+    for outer, counts in summary.items():
+        subtotal = sum(c for _, c in counts.items())
+        yield f'{outer + ":":20} ({subtotal})'
+        for inner, count in counts.items():
+            yield f'   {inner + ":":9} {count}'
+        total += subtotal
+    yield f'{"total:":20} ({total})'
+
+
+_FORMATS = {
+    'brief': render_table,
+    'full': render_full,
+    'summary': render_summary,
+}
diff --git a/Tools/c-analyzer/cpython/_files.py b/Tools/c-analyzer/cpython/_files.py
new file mode 100644
index 0000000000000..3e397880977ab
--- /dev/null
+++ b/Tools/c-analyzer/cpython/_files.py
@@ -0,0 +1,69 @@
+import os.path
+
+from c_common.fsutil import expand_filenames, iter_files_by_suffix
+from . import REPO_ROOT, INCLUDE_DIRS, SOURCE_DIRS
+
+
+GLOBS = [
+    'Include/*.h',
+    'Include/internal/*.h',
+    'Modules/**/*.h',
+    'Modules/**/*.c',
+    'Objects/**/*.h',
+    'Objects/**/*.c',
+    'Python/**/*.h',
+    'Parser/**/*.c',
+    'Python/**/*.h',
+    'Parser/**/*.c',
+]
+LEVEL_GLOBS = {
+    'stable': 'Include/*.h',
+    'cpython': 'Include/cpython/*.h',
+    'internal': 'Include/internal/*.h',
+}
+
+
+def resolve_filename(filename):
+    orig = filename
+    filename = os.path.normcase(os.path.normpath(filename))
+    if os.path.isabs(filename):
+        if os.path.relpath(filename, REPO_ROOT).startswith('.'):
+            raise Exception(f'{orig!r} is outside the repo ({REPO_ROOT})')
+        return filename
+    else:
+        return os.path.join(REPO_ROOT, filename)
+
+
+def iter_filenames(*, search=False):
+    if search:
+        yield from iter_files_by_suffix(INCLUDE_DIRS, ('.h',))
+        yield from iter_files_by_suffix(SOURCE_DIRS, ('.c',))
+    else:
+        globs = (os.path.join(REPO_ROOT, file) for file in GLOBS)
+        yield from expand_filenames(globs)
+
+
+def iter_header_files(filenames=None, *, levels=None):
+    if not filenames:
+        if levels:
+            levels = set(levels)
+            if 'private' in levels:
+                levels.add('stable')
+                levels.add('cpython')
+            for level, glob in LEVEL_GLOBS.items():
+                if level in levels:
+                    yield from expand_filenames([glob])
+        else:
+            yield from iter_files_by_suffix(INCLUDE_DIRS, ('.h',))
+        return
+
+    for filename in filenames:
+        orig = filename
+        filename = resolve_filename(filename)
+        if filename.endswith(os.path.sep):
+            yield from iter_files_by_suffix(INCLUDE_DIRS, ('.h',))
+        elif filename.endswith('.h'):
+            yield filename
+        else:
+            # XXX Log it and continue instead?
+            raise ValueError(f'expected .h file, got {orig!r}')
diff --git a/Tools/c-analyzer/cpython/_parser.py b/Tools/c-analyzer/cpython/_parser.py
index eef758495386c..ef06a9fcb6903 100644
--- a/Tools/c-analyzer/cpython/_parser.py
+++ b/Tools/c-analyzer/cpython/_parser.py
@@ -1,7 +1,6 @@
 import os.path
 import re
 
-from c_common.fsutil import expand_filenames, iter_files_by_suffix
 from c_parser.preprocessor import (
     get_preprocessor as _get_preprocessor,
 )
@@ -9,7 +8,7 @@
     parse_file as _parse_file,
     parse_files as _parse_files,
 )
-from . import REPO_ROOT, INCLUDE_DIRS, SOURCE_DIRS
+from . import REPO_ROOT
 
 
 GLOB_ALL = '**/*'
@@ -43,19 +42,6 @@ def clean_lines(text):
 @end=sh@
 '''
 
-GLOBS = [
-    'Include/*.h',
-    'Include/internal/*.h',
-    'Modules/**/*.h',
-    'Modules/**/*.c',
-    'Objects/**/*.h',
-    'Objects/**/*.c',
-    'Python/**/*.h',
-    'Parser/**/*.c',
-    'Python/**/*.h',
-    'Parser/**/*.c',
-]
-
 EXCLUDED = clean_lines('''
 # @begin=conf@
 
@@ -280,26 +266,6 @@ def clean_lines(text):
 ]
 
 
-def resolve_filename(filename):
-    orig = filename
-    filename = os.path.normcase(os.path.normpath(filename))
-    if os.path.isabs(filename):
-        if os.path.relpath(filename, REPO_ROOT).startswith('.'):
-            raise Exception(f'{orig!r} is outside the repo ({REPO_ROOT})')
-        return filename
-    else:
-        return os.path.join(REPO_ROOT, filename)
-
-
-def iter_filenames(*, search=False):
-    if search:
-        yield from iter_files_by_suffix(INCLUDE_DIRS, ('.h',))
-        yield from iter_files_by_suffix(SOURCE_DIRS, ('.c',))
-    else:
-        globs = (os.path.join(REPO_ROOT, file) for file in GLOBS)
-        yield from expand_filenames(globs)
-
-
 def get_preprocessor(*,
                      file_macros=None,
                      file_incldirs=None,



More information about the Python-checkins mailing list