[Python-checkins] bpo-37958: Adding get_profile_dict to pstats (GH-15495)

Miss Islington (bot) webhook-mailer at python.org
Wed Jan 15 17:52:01 EST 2020


https://github.com/python/cpython/commit/01602ae40321ecdb375ee6d44eaeac3255857879
commit: 01602ae40321ecdb375ee6d44eaeac3255857879
branch: master
author: Daniel Olshansky <olshansky.daniel at gmail.com>
committer: Miss Islington (bot) <31488909+miss-islington at users.noreply.github.com>
date: 2020-01-15T14:51:54-08:00
summary:

bpo-37958: Adding get_profile_dict to pstats (GH-15495)



pstats is really useful or profiling and printing the output of the execution of some block of code, but I've found on multiple occasions when I'd like to access this output directly in an easily usable dictionary on which I can further analyze or manipulate.

The proposal is to add a function called get_profile_dict inside of pstats that'll automatically return this data the data in an easily accessible dict.

The output of the following script:

```
import cProfile, pstats
import pprint
from pstats import func_std_string, f8

def fib(n):
    if n == 0:
        return 0
    if n == 1:
        return 1
    return fib(n-1) + fib(n-2)

pr = cProfile.Profile()
pr.enable()
fib(5)
pr.create_stats()

ps = pstats.Stats(pr).sort_stats('tottime', 'cumtime')

def get_profile_dict(self, keys_filter=None):
    """
        Returns a dict where the key is a function name and the value is a dict
        with the following keys:
            - ncalls
            - tottime
            - percall_tottime
            - cumtime
            - percall_cumtime
            - file_name
            - line_number

        keys_filter can be optionally set to limit the key-value pairs in the
        retrieved dict.
    """
    pstats_dict = {}
    func_list = self.fcn_list[:] if self.fcn_list else list(self.stats.keys())

    if not func_list:
        return pstats_dict

    pstats_dict["total_tt"] = float(f8(self.total_tt))
    for func in func_list:
        cc, nc, tt, ct, callers = self.stats[func]
        file, line, func_name = func
        ncalls = str(nc) if nc == cc else (str(nc) + '/' + str(cc))
        tottime = float(f8(tt))
        percall_tottime = -1 if nc == 0 else float(f8(tt/nc))
        cumtime = float(f8(ct))
        percall_cumtime = -1 if cc == 0 else float(f8(ct/cc))
        func_dict = {
            "ncalls": ncalls,
            "tottime": tottime, # time spent in this function alone
            "percall_tottime": percall_tottime,
            "cumtime": cumtime, # time spent in the function plus all functions that this function called,
            "percall_cumtime": percall_cumtime,
            "file_name": file,
            "line_number": line
        }
        func_dict_filtered = func_dict if not keys_filter else { key: func_dict[key] for key in keys_filter }
        pstats_dict[func_name] = func_dict_filtered

    return pstats_dict

pp = pprint.PrettyPrinter(depth=6)
pp.pprint(get_profile_dict(ps))
```

will produce:

```
{"<method 'disable' of '_lsprof.Profiler' objects>": {'cumtime': 0.0,
                                                      'file_name': '~',
                                                      'line_number': 0,
                                                      'ncalls': '1',
                                                      'percall_cumtime': 0.0,
                                                      'percall_tottime': 0.0,
                                                      'tottime': 0.0},
 'create_stats': {'cumtime': 0.0,
                  'file_name': '/usr/local/Cellar/python/3.7.4/Frameworks/Python.framework/Versions/3.7/lib/python3.7/cProfile.py',
                  'line_number': 50,
                  'ncalls': '1',
                  'percall_cumtime': 0.0,
                  'percall_tottime': 0.0,
                  'tottime': 0.0},
 'fib': {'cumtime': 0.0,
         'file_name': 'get_profile_dict.py',
         'line_number': 5,
         'ncalls': '15/1',
         'percall_cumtime': 0.0,
         'percall_tottime': 0.0,
         'tottime': 0.0},
 'total_tt': 0.0}
 ```

 As an example, this can be used to generate a stacked column chart using various visualization tools which will assist in easily identifying program bottlenecks.



https://bugs.python.org/issue37958



Automerge-Triggered-By: @gpshead

files:
A Misc/NEWS.d/next/Library/2019-08-27-03-57-25.bpo-37958.lRORI3.rst
M Doc/library/profile.rst
M Lib/pstats.py
M Lib/test/test_pstats.py

diff --git a/Doc/library/profile.rst b/Doc/library/profile.rst
index 8d589d247b774..34525a96f55c4 100644
--- a/Doc/library/profile.rst
+++ b/Doc/library/profile.rst
@@ -525,6 +525,17 @@ Analysis of the profiler data is done using the :class:`~pstats.Stats` class.
       ordering are identical to the :meth:`~pstats.Stats.print_callers` method.
 
 
+    .. method:: get_stats_profile()
+
+      This method returns an instance of StatsProfile, which contains a mapping
+      of function names to instances of FunctionProfile. Each FunctionProfile
+      instance holds information related to the function's profile such as how
+      long the function took to run, how many times it was called, etc...
+
+       .. versionadded:: 3.9
+          Added the following dataclasses: StatsProfile, FunctionProfile.
+          Added the following function: get_stats_profile.
+
 .. _deterministic-profiling:
 
 What Is Deterministic Profiling?
diff --git a/Lib/pstats.py b/Lib/pstats.py
index 4b419a8ecdb6c..e781b91c6052c 100644
--- a/Lib/pstats.py
+++ b/Lib/pstats.py
@@ -25,11 +25,13 @@
 import time
 import marshal
 import re
+
 from enum import Enum
 from functools import cmp_to_key
+from dataclasses import dataclass
+from typing import Dict
 
-__all__ = ["Stats", "SortKey"]
-
+__all__ = ["Stats", "SortKey", "FunctionProfile", "StatsProfile"]
 
 class SortKey(str, Enum):
     CALLS = 'calls', 'ncalls'
@@ -52,6 +54,22 @@ def __new__(cls, *values):
         return obj
 
 
+ at dataclass(unsafe_hash=True)
+class FunctionProfile:
+    ncalls: int
+    tottime: float
+    percall_tottime: float
+    cumtime: float
+    percall_cumtime: float
+    file_name: str
+    line_number: int
+
+ at dataclass(unsafe_hash=True)
+class StatsProfile:
+    '''Class for keeping track of an item in inventory.'''
+    total_tt: float
+    func_profiles: Dict[str, FunctionProfile]
+
 class Stats:
     """This class is used for creating reports from data generated by the
     Profile class.  It is a "friend" of that class, and imports data either
@@ -333,6 +351,41 @@ def eval_print_amount(self, sel, list, msg):
 
         return new_list, msg
 
+    def get_stats_profile(self):
+        """This method returns an instance of StatsProfile, which contains a mapping
+        of function names to instances of FunctionProfile. Each FunctionProfile
+        instance holds information related to the function's profile such as how
+        long the function took to run, how many times it was called, etc...
+        """
+        func_list = self.fcn_list[:] if self.fcn_list else list(self.stats.keys())
+        if not func_list:
+            return StatsProfile(0, {})
+
+        total_tt = float(f8(self.total_tt))
+        func_profiles = {}
+        stats_profile = StatsProfile(total_tt, func_profiles)
+
+        for func in func_list:
+            cc, nc, tt, ct, callers = self.stats[func]
+            file_name, line_number, func_name = func
+            ncalls = str(nc) if nc == cc else (str(nc) + '/' + str(cc))
+            tottime = float(f8(tt))
+            percall_tottime = -1 if nc == 0 else float(f8(tt/nc))
+            cumtime = float(f8(ct))
+            percall_cumtime = -1 if cc == 0 else float(f8(ct/cc))
+            func_profile = FunctionProfile(
+                ncalls,
+                tottime, # time spent in this function alone
+                percall_tottime,
+                cumtime, # time spent in the function plus all functions that this function called,
+                percall_cumtime,
+                file_name,
+                line_number
+            )
+            func_profiles[func_name] = func_profile
+
+        return stats_profile
+
     def get_print_list(self, sel_list):
         width = self.max_name_len
         if self.fcn_list:
diff --git a/Lib/test/test_pstats.py b/Lib/test/test_pstats.py
index f835ce309a60e..f3a6e586c3bcc 100644
--- a/Lib/test/test_pstats.py
+++ b/Lib/test/test_pstats.py
@@ -1,10 +1,12 @@
 import unittest
+
 from test import support
 from io import StringIO
-import pstats
 from pstats import SortKey
 
-
+import pstats
+import time
+import cProfile
 
 class AddCallersTestCase(unittest.TestCase):
     """Tests for pstats.add_callers helper."""
@@ -75,6 +77,24 @@ def test_sort_starts_mix(self):
                           SortKey.TIME,
                           'calls')
 
+    def test_get_stats_profile(self):
+        def pass1(): pass
+        def pass2(): pass
+        def pass3(): pass
+
+        pr = cProfile.Profile()
+        pr.enable()
+        pass1()
+        pass2()
+        pass3()
+        pr.create_stats()
+        ps = pstats.Stats(pr)
+
+        stats_profile = ps.get_stats_profile()
+        funcs_called = set(stats_profile.func_profiles.keys())
+        self.assertIn('pass1', funcs_called)
+        self.assertIn('pass2', funcs_called)
+        self.assertIn('pass3', funcs_called)
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/Misc/NEWS.d/next/Library/2019-08-27-03-57-25.bpo-37958.lRORI3.rst b/Misc/NEWS.d/next/Library/2019-08-27-03-57-25.bpo-37958.lRORI3.rst
new file mode 100644
index 0000000000000..d0b4d6adca4b9
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2019-08-27-03-57-25.bpo-37958.lRORI3.rst
@@ -0,0 +1,2 @@
+Added the pstats.Stats.get_profile_dict() method to return the profile
+data as a StatsProfile instance.



More information about the Python-checkins mailing list