[Python-checkins] bpo-45952: Get the C analyzer tool working again. (gh-29882)

ericsnowcurrently webhook-mailer at python.org
Wed Dec 1 13:20:37 EST 2021


https://github.com/python/cpython/commit/ee94aa0850191712e6adfc1f4a9df08ec3240195
commit: ee94aa0850191712e6adfc1f4a9df08ec3240195
branch: main
author: Eric Snow <ericsnowcurrently at gmail.com>
committer: ericsnowcurrently <ericsnowcurrently at gmail.com>
date: 2021-12-01T11:20:20-07:00
summary:

bpo-45952: Get the C analyzer tool working again. (gh-29882)

There wasn't much that needed to be done. Mostly it was just a few new files that got added.

https://bugs.python.org/issue45952

files:
M Tools/c-analyzer/TODO
M Tools/c-analyzer/c_parser/__init__.py
M Tools/c-analyzer/c_parser/parser/__init__.py
M Tools/c-analyzer/c_parser/preprocessor/__main__.py
M Tools/c-analyzer/cpython/__main__.py
M Tools/c-analyzer/cpython/_parser.py

diff --git a/Tools/c-analyzer/TODO b/Tools/c-analyzer/TODO
index 1fd8052268be0..4b9b2857e1d1e 100644
--- a/Tools/c-analyzer/TODO
+++ b/Tools/c-analyzer/TODO
@@ -1,3 +1,11 @@
+# For up-to-date results, run:
+#   ./python Tools/c-analyzer/c-analyzer.py check --format summary
+# or
+#   ./python Tools/c-analyzer/c-analyzer.py analyze
+
+
+#######################################
+# non-PyObject (61)
 
 # allocator (16)
 Objects/obmalloc.c:_PyMem                                        static PyMemAllocatorEx _PyMem
@@ -32,12 +40,7 @@ Objects/dictobject.c:empty_keys_struct                           static PyDictKe
 Python/fileutils.c:_Py_open_cloexec_works                        int _Py_open_cloexec_works
 
 
-# freelists
-Objects/dictobject.c:keys_free_list                              static PyDictKeysObject *keys_free_list[PyDict_MAXFREELIST]
-Objects/dictobject.c:numfreekeys                                 static int numfreekeys
-
-
-# other non-object (43)
+# other non-object (40)
 Modules/_tracemalloc.c:allocators                                static struct { PyMemAllocatorEx mem; PyMemAllocatorEx raw; PyMemAllocatorEx obj; } allocators
 Modules/_tracemalloc.c:tables_lock                               static PyThread_type_lock tables_lock
 Modules/_tracemalloc.c:tracemalloc_filenames                     static _Py_hashtable_t *tracemalloc_filenames
@@ -81,30 +84,7 @@ Python/pylifecycle.c:fatal_error():reentrant                     static int reen
 
 
 #######################################
-# PyObject (960)
-
-# freelists (10 + 10)
-Modules/_collectionsmodule.c:freeblocks                          static block *freeblocks[MAXFREEBLOCKS]
-Modules/_collectionsmodule.c:numfreeblocks                       static Py_ssize_t numfreeblocks
-Objects/dictobject.c:free_list                                   static PyDictObject *free_list[PyDict_MAXFREELIST]
-Objects/dictobject.c:numfree                                     static int numfree
-Objects/exceptions.c:memerrors_freelist                          static PyBaseExceptionObject *memerrors_freelist
-Objects/exceptions.c:memerrors_numfree                           static int memerrors_numfree
-Objects/floatobject.c:free_list                                  static PyFloatObject *free_list
-Objects/floatobject.c:numfree                                    static int numfree
-Objects/frameobject.c:free_list                                  static PyFrameObject *free_list
-Objects/frameobject.c:numfree                                    static int numfree
-Objects/genobject.c:ag_asend_freelist                            static PyAsyncGenASend *ag_asend_freelist[_PyAsyncGen_MAXFREELIST]
-Objects/genobject.c:ag_asend_freelist_free                       static int ag_asend_freelist_free
-Objects/genobject.c:ag_value_freelist                            static _PyAsyncGenWrappedValue *ag_value_freelist[_PyAsyncGen_MAXFREELIST]
-Objects/genobject.c:ag_value_freelist_free                       static int ag_value_freelist_free
-Objects/listobject.c:free_list                                   static PyListObject *free_list[PyList_MAXFREELIST]
-Objects/listobject.c:numfree                                     static int numfree
-Objects/tupleobject.c:free_list                                  static PyTupleObject *free_list[PyTuple_MAXSAVESIZE]
-Objects/tupleobject.c:numfree                                    static int numfree[PyTuple_MAXSAVESIZE]
-Python/context.c:ctx_freelist                                    static PyContext *ctx_freelist
-Python/context.c:ctx_freelist_len                                static int ctx_freelist_len
-
+# PyObject (919)
 
 # singletons (7)
 Objects/boolobject.c:_Py_FalseStruct                             static struct _longobject _Py_FalseStruct
@@ -116,16 +96,8 @@ Objects/object.c:_Py_NotImplementedStruct                        PyObject _Py_No
 Objects/sliceobject.c:_Py_EllipsisObject                         PyObject _Py_EllipsisObject
 
 
-# module vars (9)
-Modules/_functoolsmodule.c:kwd_mark                              static PyObject *kwd_mark
-Modules/_localemodule.c:Error                                    static PyObject *Error
-Modules/_threadmodule.c:ThreadError                              static PyObject *ThreadError
+# module vars (1)
 Modules/_tracemalloc.c:unknown_filename                          static PyObject *unknown_filename
-Modules/signalmodule.c:DefaultHandler                            static PyObject *DefaultHandler
-Modules/signalmodule.c:IgnoreHandler                             static PyObject *IgnoreHandler
-Modules/signalmodule.c:IntHandler                                static PyObject *IntHandler
-Modules/signalmodule.c:ItimerError                               static PyObject *ItimerError
-Objects/exceptions.c:errnomap                                    static PyObject *errnomap
 
 
 # other (non-cache) (5)
@@ -136,26 +108,15 @@ Modules/signalmodule.c:Handlers                                  static volatile
 Objects/setobject.c:_dummy_struct                                static PyObject _dummy_struct
 
 
-# caches (5)
-Modules/posixmodule.c:posix_putenv_garbage                       static PyObject *posix_putenv_garbage
-Objects/sliceobject.c:slice_cache                                static PySliceObject *slice_cache
-Objects/typeobject.c:method_cache                                static struct method_cache_entry method_cache[1 << MCACHE_SIZE_EXP]
-Objects/unicodeobject.c:interned                                 static PyObject *interned
+# caches (1)
 Python/import.c:extensions                                       static PyObject *extensions
 
 
-# cached constants - non-str (15)
+# cached constants - non-str (6)
 Modules/_io/_iomodule.c:_PyIO_empty_bytes                        PyObject *_PyIO_empty_bytes
 Modules/_io/bufferedio.c:_PyIO_trap_eintr():eintr_int            static PyObject *eintr_int
-Modules/posixmodule.c:billion                                    static PyObject *billion
-Modules/posixmodule.c:wait_helper():struct_rusage                static PyObject *struct_rusage
-Objects/bytesobject.c:characters                                 static PyBytesObject *characters[UCHAR_MAX + 1]
-Objects/bytesobject.c:nullstring                                 static PyBytesObject *nullstring
-Objects/codeobject.c:PyCode_NewEmpty():nulltuple                 static PyObject *nulltuple
-Objects/dictobject.c:empty_values                                static PyObject *empty_values[1]
+Objects/dictobject.c:empty_values_struct                         static PyDictValues
 Objects/listobject.c:indexerr                                    static PyObject *indexerr
-Objects/longobject.c:small_ints                                  static PyLongObject small_ints[NSMALLNEGINTS + NSMALLPOSINTS]
-Objects/setobject.c:emptyfrozenset                               static PyObject *emptyfrozenset
 Python/context.c:_token_missing                                  static PyObject *_token_missing
 Python/hamt.c:_empty_hamt                                        static PyHamtObject *_empty_hamt
 
@@ -662,15 +623,6 @@ Modules/itertoolsmodule.c:takewhile_type                         static PyTypeOb
 Modules/itertoolsmodule.c:tee_type                               static PyTypeObject tee_type
 Modules/itertoolsmodule.c:teedataobject_type                     static PyTypeObject teedataobject_type
 Modules/itertoolsmodule.c:ziplongest_type                        static PyTypeObject ziplongest_type
-Modules/posixmodule.c:DirEntryType                               static PyTypeObject DirEntryType
-Modules/posixmodule.c:ScandirIteratorType                        static PyTypeObject ScandirIteratorType
-Modules/posixmodule.c:SchedParamType                             static PyTypeObject* SchedParamType
-Modules/posixmodule.c:StatResultType                             static PyTypeObject* StatResultType
-Modules/posixmodule.c:StatVFSResultType                          static PyTypeObject* StatVFSResultType
-Modules/posixmodule.c:TerminalSizeType                           static PyTypeObject* TerminalSizeType
-Modules/posixmodule.c:TimesResultType                            static PyTypeObject* TimesResultType
-Modules/posixmodule.c:UnameResultType                            static PyTypeObject* UnameResultType
-Modules/posixmodule.c:WaitidResultType                           static PyTypeObject* WaitidResultType
 Modules/signalmodule.c:SiginfoType                               static PyTypeObject SiginfoType
 Modules/timemodule.c:StructTimeType                              static PyTypeObject StructTimeType
 Modules/xxsubtype.c:spamdict_type                                static PyTypeObject spamdict_type
diff --git a/Tools/c-analyzer/c_parser/__init__.py b/Tools/c-analyzer/c_parser/__init__.py
index 39455ddbf1a0c..fc10aff94505d 100644
--- a/Tools/c-analyzer/c_parser/__init__.py
+++ b/Tools/c-analyzer/c_parser/__init__.py
@@ -1,3 +1,4 @@
+from c_common.fsutil import match_glob as _match_glob
 from .parser import parse as _parse
 from .preprocessor import get_preprocessor as _get_preprocessor
 
@@ -5,23 +6,32 @@
 def parse_file(filename, *,
                match_kind=None,
                get_file_preprocessor=None,
+               file_maxsizes=None,
                ):
     if get_file_preprocessor is None:
         get_file_preprocessor = _get_preprocessor()
-    yield from _parse_file(filename, match_kind, get_file_preprocessor)
+    yield from _parse_file(
+            filename, match_kind, get_file_preprocessor, file_maxsizes)
 
 
 def parse_files(filenames, *,
                 match_kind=None,
                 get_file_preprocessor=None,
+                file_maxsizes=None,
                 ):
     if get_file_preprocessor is None:
         get_file_preprocessor = _get_preprocessor()
     for filename in filenames:
-        yield from _parse_file(filename, match_kind, get_file_preprocessor)
+        yield from _parse_file(
+                filename, match_kind, get_file_preprocessor, file_maxsizes)
 
 
-def _parse_file(filename, match_kind, get_file_preprocessor):
+def _parse_file(filename, match_kind, get_file_preprocessor, maxsizes):
+    srckwargs = {}
+    maxsize = _resolve_max_size(filename, maxsizes)
+    if maxsize:
+        srckwargs['maxtext'], srckwargs['maxlines'] = maxsize
+
     # Preprocess the file.
     preprocess = get_file_preprocessor(filename)
     preprocessed = preprocess()
@@ -30,7 +40,7 @@ def _parse_file(filename, match_kind, get_file_preprocessor):
 
     # Parse the lines.
     srclines = ((l.file, l.data) for l in preprocessed if l.kind == 'source')
-    for item in _parse(srclines):
+    for item in _parse(srclines, **srckwargs):
         if match_kind is not None and not match_kind(item.kind):
             continue
         if not item.filename:
@@ -38,6 +48,22 @@ def _parse_file(filename, match_kind, get_file_preprocessor):
         yield item
 
 
+def _resolve_max_size(filename, maxsizes):
+    for pattern, maxsize in (maxsizes.items() if maxsizes else ()):
+        if _match_glob(filename, pattern):
+            break
+    else:
+        return None
+    if not maxsize:
+        return None, None
+    maxtext, maxlines = maxsize
+    if maxtext is not None:
+        maxtext = int(maxtext)
+    if maxlines is not None:
+        maxlines = int(maxlines)
+    return maxtext, maxlines
+
+
 def parse_signature(text):
     raise NotImplementedError
 
diff --git a/Tools/c-analyzer/c_parser/parser/__init__.py b/Tools/c-analyzer/c_parser/parser/__init__.py
index df70aae66b776..b5eae2ed92d0d 100644
--- a/Tools/c-analyzer/c_parser/parser/__init__.py
+++ b/Tools/c-analyzer/c_parser/parser/__init__.py
@@ -120,12 +120,12 @@
 from ._info import SourceInfo
 
 
-def parse(srclines):
+def parse(srclines, **srckwargs):
     if isinstance(srclines, str):  # a filename
         raise NotImplementedError
 
     anon_name = anonymous_names()
-    for result in _parse(srclines, anon_name):
+    for result in _parse(srclines, anon_name, **srckwargs):
         yield ParsedItem.from_raw(result)
 
 
@@ -152,17 +152,19 @@ def anon_name(prefix='anon-'):
 _logger = logging.getLogger(__name__)
 
 
-def _parse(srclines, anon_name):
+def _parse(srclines, anon_name, **srckwargs):
     from ._global import parse_globals
 
-    source = _iter_source(srclines)
-    #source = _iter_source(srclines, showtext=True)
+    source = _iter_source(srclines, **srckwargs)
     for result in parse_globals(source, anon_name):
         # XXX Handle blocks here instead of in parse_globals().
         yield result
 
 
-def _iter_source(lines, *, maxtext=20_000, maxlines=700, showtext=False):
+# We use defaults that cover most files.  Files with bigger declarations
+# are covered elsewhere (MAX_SIZES in cpython/_parser.py).
+
+def _iter_source(lines, *, maxtext=10_000, maxlines=200, showtext=False):
     maxtext = maxtext if maxtext and maxtext > 0 else None
     maxlines = maxlines if maxlines and maxlines > 0 else None
     filestack = []
diff --git a/Tools/c-analyzer/c_parser/preprocessor/__main__.py b/Tools/c-analyzer/c_parser/preprocessor/__main__.py
index bfc61949a76e4..55aa8752dce72 100644
--- a/Tools/c-analyzer/c_parser/preprocessor/__main__.py
+++ b/Tools/c-analyzer/c_parser/preprocessor/__main__.py
@@ -43,7 +43,7 @@ def add_common_cli(parser, *, get_preprocessor=_get_preprocessor):
     def process_args(args, *, argv):
         ns = vars(args)
 
-        process_fail_arg(args, argv)
+        process_fail_arg(args, argv=argv)
         ignore_exc = ns.pop('ignore_exc')
         # We later pass ignore_exc to _get_preprocessor().
 
diff --git a/Tools/c-analyzer/cpython/__main__.py b/Tools/c-analyzer/cpython/__main__.py
index 06ec871ba75e3..be331d50427d5 100644
--- a/Tools/c-analyzer/cpython/__main__.py
+++ b/Tools/c-analyzer/cpython/__main__.py
@@ -111,6 +111,7 @@ def cmd_parse(filenames=None, **kwargs):
     c_parser.cmd_parse(
         filenames,
         relroot=REPO_ROOT,
+        file_maxsizes=_parser.MAX_SIZES,
         **kwargs
     )
 
@@ -127,6 +128,7 @@ def cmd_check(filenames=None, **kwargs):
         relroot=REPO_ROOT,
         _analyze=_analyzer.analyze,
         _CHECKS=CHECKS,
+        file_maxsizes=_parser.MAX_SIZES,
         **kwargs
     )
 
@@ -141,6 +143,7 @@ def cmd_analyze(filenames=None, **kwargs):
         relroot=REPO_ROOT,
         _analyze=_analyzer.analyze,
         formats=formats,
+        file_maxsizes=_parser.MAX_SIZES,
         **kwargs
     )
 
diff --git a/Tools/c-analyzer/cpython/_parser.py b/Tools/c-analyzer/cpython/_parser.py
index 8526b2af15a23..90b470c8196c0 100644
--- a/Tools/c-analyzer/cpython/_parser.py
+++ b/Tools/c-analyzer/cpython/_parser.py
@@ -14,6 +14,10 @@
 GLOB_ALL = '**/*'
 
 
+def _abs(relfile):
+    return os.path.join(REPO_ROOT, relfile)
+
+
 def clean_lines(text):
     """Clear out comments, blank lines, and leading/trailing whitespace."""
     lines = (line.strip() for line in text.splitlines())
@@ -22,7 +26,7 @@ def clean_lines(text):
              if line and not line.startswith('#'))
     glob_all = f'{GLOB_ALL} '
     lines = (re.sub(r'^[*] ', glob_all, line) for line in lines)
-    lines = (os.path.join(REPO_ROOT, line) for line in lines)
+    lines = (_abs(line) for line in lines)
     return list(lines)
 
 
@@ -55,26 +59,31 @@ def clean_lines(text):
 
 # Windows
 Modules/_winapi.c               # windows.h
+Modules/expat/winconfig.h
 Modules/overlapped.c            # winsock.h
 Python/dynload_win.c            # windows.h
-Modules/expat/winconfig.h
 Python/thread_nt.h
 
 # other OS-dependent
+Python/dynload_aix.c            # sys/ldr.h
 Python/dynload_dl.c             # dl.h
 Python/dynload_hpux.c           # dl.h
-Python/dynload_aix.c            # sys/ldr.h
 Python/thread_pthread.h
 
 # only huge constants (safe but parsing is slow)
+Modules/_blake2/impl/blake2-kat.h
 Modules/_ssl_data.h
+Modules/_ssl_data_300.h
+Modules/_ssl_data_111.h
+Modules/cjkcodecs/mappings_*.h
 Modules/unicodedata_db.h
 Modules/unicodename_db.h
-Modules/cjkcodecs/mappings_*.h
 Objects/unicodetype_db.h
-Python/importlib.h
-Python/importlib_external.h
-Python/importlib_zipimport.h
+
+# generated
+Python/frozen_modules/*.h
+Python/opcode_targets.h
+Python/stdlib_module_names.h
 
 # @end=conf@
 ''')
@@ -126,35 +135,40 @@ def clean_lines(text):
 Parser/**/*.c	Py_BUILD_CORE	1
 Objects/**/*.c	Py_BUILD_CORE	1
 
-Modules/faulthandler.c	Py_BUILD_CORE	1
+Modules/_asynciomodule.c	Py_BUILD_CORE	1
+Modules/_collectionsmodule.c	Py_BUILD_CORE	1
+Modules/_ctypes/_ctypes.c	Py_BUILD_CORE	1
+Modules/_ctypes/cfield.c	Py_BUILD_CORE	1
+Modules/_cursesmodule.c	Py_BUILD_CORE	1
+Modules/_datetimemodule.c	Py_BUILD_CORE	1
 Modules/_functoolsmodule.c	Py_BUILD_CORE	1
-Modules/gcmodule.c	Py_BUILD_CORE	1
-Modules/getpath.c	Py_BUILD_CORE	1
+Modules/_heapqmodule.c	Py_BUILD_CORE	1
 Modules/_io/*.c	Py_BUILD_CORE	1
-Modules/itertoolsmodule.c	Py_BUILD_CORE	1
 Modules/_localemodule.c	Py_BUILD_CORE	1
-Modules/main.c	Py_BUILD_CORE	1
-Modules/posixmodule.c	Py_BUILD_CORE	1
-Modules/signalmodule.c	Py_BUILD_CORE	1
+Modules/_operator.c	Py_BUILD_CORE	1
+Modules/_posixsubprocess.c	Py_BUILD_CORE	1
+Modules/_sre.c	Py_BUILD_CORE	1
 Modules/_threadmodule.c	Py_BUILD_CORE	1
 Modules/_tracemalloc.c	Py_BUILD_CORE	1
-Modules/_asynciomodule.c	Py_BUILD_CORE	1
-Modules/mathmodule.c	Py_BUILD_CORE	1
-Modules/cmathmodule.c	Py_BUILD_CORE	1
 Modules/_weakref.c	Py_BUILD_CORE	1
+Modules/_zoneinfo.c	Py_BUILD_CORE	1
+Modules/atexitmodule.c	Py_BUILD_CORE	1
+Modules/cmathmodule.c	Py_BUILD_CORE	1
+Modules/faulthandler.c	Py_BUILD_CORE	1
+Modules/gcmodule.c	Py_BUILD_CORE	1
+Modules/getpath.c	Py_BUILD_CORE	1
+Modules/itertoolsmodule.c	Py_BUILD_CORE	1
+Modules/main.c	Py_BUILD_CORE	1
+Modules/mathmodule.c	Py_BUILD_CORE	1
+Modules/posixmodule.c	Py_BUILD_CORE	1
 Modules/sha256module.c	Py_BUILD_CORE	1
 Modules/sha512module.c	Py_BUILD_CORE	1
-Modules/_datetimemodule.c	Py_BUILD_CORE	1
-Modules/_ctypes/cfield.c	Py_BUILD_CORE	1
-Modules/_heapqmodule.c	Py_BUILD_CORE	1
-Modules/_posixsubprocess.c	Py_BUILD_CORE	1
-Modules/_sre.c	Py_BUILD_CORE	1
-Modules/_collectionsmodule.c	Py_BUILD_CORE	1
-Modules/_zoneinfo.c	Py_BUILD_CORE	1
+Modules/signalmodule.c	Py_BUILD_CORE	1
+Modules/symtablemodule.c	Py_BUILD_CORE	1
+Modules/timemodule.c	Py_BUILD_CORE	1
 Modules/unicodedata.c	Py_BUILD_CORE	1
-Modules/_cursesmodule.c	Py_BUILD_CORE	1
-Modules/_ctypes/_ctypes.c	Py_BUILD_CORE	1
 Objects/stringlib/codecs.h	Py_BUILD_CORE	1
+Objects/stringlib/unicode_format.h	Py_BUILD_CORE	1
 Python/ceval_gil.h	Py_BUILD_CORE	1
 Python/condvar.h	Py_BUILD_CORE	1
 
@@ -244,6 +258,7 @@ def clean_lines(text):
 Modules/sre_lib.h	LOCAL(type)	static inline type
 Modules/sre_lib.h	SRE(F)	sre_ucs2_##F
 Objects/stringlib/codecs.h	STRINGLIB_IS_UNICODE	1
+Include/internal/pycore_bitutils.h	_Py__has_builtin(B)	0
 
 # @end=tsv@
 ''')[1:]
@@ -264,6 +279,18 @@ def clean_lines(text):
     './Include/cpython/',
 ]
 
+MAX_SIZES = {
+    _abs('Include/**/*.h'): (5_000, 500),
+    _abs('Modules/_ctypes/ctypes.h'): (5_000, 500),
+    _abs('Modules/_datetimemodule.c'): (20_000, 300),
+    _abs('Modules/posixmodule.c'): (20_000, 500),
+    _abs('Modules/termios.c'): (10_000, 800),
+    _abs('Modules/_testcapimodule.c'): (20_000, 400),
+    _abs('Modules/expat/expat.h'): (10_000, 400),
+    _abs('Objects/stringlib/unicode_format.h'): (10_000, 400),
+    _abs('Objects/typeobject.c'): (20_000, 200),
+}
+
 
 def get_preprocessor(*,
                      file_macros=None,
@@ -298,6 +325,7 @@ def parse_file(filename, *,
         filename,
         match_kind=match_kind,
         get_file_preprocessor=get_file_preprocessor,
+        file_maxsizes=MAX_SIZES,
     )
 
 
@@ -317,5 +345,6 @@ def parse_files(filenames=None, *,
         filenames,
         match_kind=match_kind,
         get_file_preprocessor=get_file_preprocessor,
+        file_maxsizes=MAX_SIZES,
         **file_kwargs
     )



More information about the Python-checkins mailing list