[pypy-commit] lang-smalltalk storage: Removed parsing of binary logs. Slightly improved outputs. Added command to aggregate a logfile.
anton_gulenko
noreply at buildbot.pypy.org
Mon Jul 7 13:16:31 CEST 2014
Author: Anton Gulenko <anton.gulenko at googlemail.com>
Branch: storage
Changeset: r863:0fbe2a4a6a3b
Date: 2014-07-02 17:54 +0200
http://bitbucket.org/pypy/lang-smalltalk/changeset/0fbe2a4a6a3b/
Log: Removed parsing of binary logs. Slightly improved outputs. Added
command to aggregate a logfile.
diff --git a/spyvm/tool/storagelog_parser.py b/spyvm/tool/storagelog_parser.py
--- a/spyvm/tool/storagelog_parser.py
+++ b/spyvm/tool/storagelog_parser.py
@@ -1,12 +1,10 @@
import re, os, sys, operator
-import spyvm.storage_logger
OPERATIONS = ["Filledin", "Initialized", "Switched"]
-# Reverse the two maps used to encode the byte encoded log-output
-storage_map = {v:k for k, v in spyvm.storage_logger.storage_map.items()}
-operation_map = {v:k for k, v in spyvm.storage_logger.operation_map.items()}
+IMAGE_LOADING_STORAGE = " Image Loading Storage" # Space to be sorted to the beginning
+OBJECT_CREATION_STORAGE = " Object Creation Storage"
# ====================================================================
# ======== Logfile parsing
@@ -19,83 +17,17 @@
else:
opener = lambda: open(filename, 'r', 1)
with opener() as file:
- if flags.binary:
- while True:
- try:
- entry = parse_binary(file)
- if entry == None:
- if flags.verbose:
- if file is sys.stdin:
- print "Stopped after parsing %d entries." % parsed_entries
- else:
- tell = file.tell()
- format = (tell, parsed_entries, os.path.getsize(file.name) - tell)
- print "Stopped parsing after %d bytes (%d entries). Ignoring leftover %d bytes." % format
- break
- else:
- parsed_entries += 1
- callback(entry)
- except:
- tell = 0 if file is sys.stdin else file.tell()
- print "Exception while parsing file, after %d bytes (%d entries)" % (tell, len(entries))
- raise
- else:
- while True:
- line = file.readline()
- if len(line) == 0:
- break
- entry = parse_line(line, flags)
- if entry:
- parsed_entries += 1
- callback(entry)
+ while True:
+ line = file.readline()
+ if len(line) == 0:
+ break
+ entry = parse_line(line, flags)
+ if entry:
+ parsed_entries += 1
+ callback(entry)
return parsed_entries
-def safe_read(file, size):
- result = file.read(size)
- retries = 20
- # Try to work around stdin's unpredictability
- while len(result) < size:
- result += file.read(size - len(result))
- retries -= 1
- if retries < 0:
- return None
- import time
- time.sleep(0.001)
- return result
-
-def parse_binary(file):
- # First 3 bytes: operation, old storage, new storage
- header = safe_read(file, 3)
- if header is None: return None
- operation_byte = ord(header[0])
- old_storage_byte = ord(header[1])
- new_storage_byte = ord(header[2])
- # This is the only way to check if we are reading a correct log entry
- if operation_byte not in operation_map or old_storage_byte not in storage_map or new_storage_byte not in storage_map:
- print "Wrong 3 bytes: %d %d %d" % header
- return None
- operation = operation_map[operation_byte]
- old_storage = storage_map[old_storage_byte]
- new_storage = storage_map[new_storage_byte]
-
- # Next 4 bytes: object size (big endian)
- size_bytes = safe_read(file, 4)
- if size_bytes is None: return None
- size = int(ord(size_bytes[0]) + (ord(size_bytes[1])<<8) + (ord(size_bytes[2])<<16) + (ord(size_bytes[3])<<24))
-
- # Last: classname, nul-terminated
- classname = ""
- while True:
- byte = safe_read(file, 1)
- if byte is None: return None
- if byte == chr(0):
- break
- classname += byte
- if len(classname) == 0:
- classname = None
- return LogEntry(operation, old_storage, new_storage, classname, size)
-
-line_pattern = re.compile("^(?P<operation>\w+) \(((?P<old>\w+) -> )?(?P<new>\w+)\)( of (?P<classname>.+))? size (?P<size>[0-9]+)$")
+line_pattern = re.compile("^(?P<operation>\w+) \(((?P<old>\w+) -> )?(?P<new>\w+)\)( of (?P<classname>.+))? size (?P<size>[0-9]+)( objects (?P<objects>[0-9]+))?$")
def parse_line(line, flags):
result = line_pattern.match(line)
@@ -108,32 +40,42 @@
new_storage = result.group('new')
classname = result.group('classname')
size = result.group('size')
- return LogEntry(operation, old_storage, new_storage, classname, size)
+ objects = result.group('objects')
+ return LogEntry(operation, old_storage, new_storage, classname, size, objects)
class LogEntry(object):
- def __init__(self, operation, old_storage, new_storage, classname, size):
+ def __init__(self, operation, old_storage, new_storage, classname, size, objects):
self.operation = str(operation)
self.new_storage = str(new_storage)
self.classname = str(classname)
- self.size = float(size)
+ self.size = int(size)
+ self.objects = int(objects) if objects else 1
if old_storage is None:
if operation == "Filledin":
- old_storage = " Image Loading Storage" # Space to be sorted to the beginning
+ old_storage = IMAGE_LOADING_STORAGE
elif operation == "Initialized":
- old_storage = " Object Creation Storage"
+ old_storage = OBJECT_CREATION_STORAGE
else:
assert False, "old_storage has to be available in a Switched operation"
self.old_storage = str(old_storage)
+ def clear_old_storage(self):
+ if self.old_storage in (IMAGE_LOADING_STORAGE, OBJECT_CREATION_STORAGE):
+ self.old_storage = None
+
def full_key(self):
return (self.operation, self.old_storage, self.new_storage)
+ def __lt__(self, other):
+ return self.classname < other.classname
+
def __str__(self):
old_storage_string = "%s -> " % self.old_storage if self.old_storage else ""
classname_string = " of %s" % self.classname if self.classname else ""
- return "%s (%s%s)%s size %d" % (self.operation, old_storage_string, self.new_storage, classname_string, self.size)
+ objects_string = " objects %d" % self.objects if self.objects > 1 else ""
+ return "%s (%s%s)%s size %d%s" % (self.operation, old_storage_string, self.new_storage, classname_string, self.size, objects_string)
# ====================================================================
# ======== Graph parsing
@@ -158,8 +100,8 @@
percent_objects = " (%.1f%%)" % (float(self.objects)*100 / total.objects)
else:
percent_objects = ""
- slots = format(self.slots, ",.0f")
- objects = format(self.objects, ",.0f")
+ slots = format(self.slots, ",d")
+ objects = format(self.objects, ",d")
return "%s%s slots in %s%s objects (avg size: %.1f)" % (slots, percent_slots, objects, percent_objects, avg_slots)
def __repr__(self):
@@ -167,7 +109,7 @@
def add_log_entry(self, entry):
self.slots = self.slots + entry.size
- self.objects = self.objects + 1
+ self.objects = self.objects + entry.objects
def __sub__(self, other):
return Operations(self.objects - other.objects, self.slots - other.slots)
@@ -245,6 +187,17 @@
def add_log_entry(self, entry):
self.cls(entry.classname).add_log_entry(entry)
+ def as_log_entries(self):
+ entries = []
+ for classname, ops in self.classes.classes.items():
+ entry = LogEntry(self.operation, self.origin.name, self.target.name, classname, ops.slots, ops.objects)
+ entry.clear_old_storage()
+ entries.append(entry)
+ return entries
+
+ def __lt__(self, other):
+ return self.full_key() < other.full_key()
+
def __str__(self):
return "[%s %s -> %s]" % (self.operation, self.origin, self.target)
@@ -544,14 +497,28 @@
return result
# ====================================================================
-# ======== Main
+# ======== Other commands
# ====================================================================
+def command_aggregate(logfile, flags):
+ graph = make_graph(logfile, flags)
+ edges = graph.edges.values()
+ edges.sort()
+ for edge in edges:
+ logentries = edge.as_log_entries()
+ logentries.sort()
+ for entry in logentries:
+ print entry
+
def command_print_entries(logfile, flags):
def callback(entry):
print entry
parse(logfile, flags, callback)
+# ====================================================================
+# ======== Main
+# ====================================================================
+
class Flags(object):
def __init__(self, flags):
@@ -583,7 +550,6 @@
('allstorage', '-a'),
('detailed', '-d'),
('classes', '-c'),
- ('binary', '-b'),
])
command_prefix = "command_"
More information about the pypy-commit
mailing list