[Numpy-svn] r5024 - trunk/numpy/lib
numpy-svn at scipy.org
numpy-svn at scipy.org
Fri Apr 11 03:08:24 EDT 2008
Author: oliphant
Date: 2008-04-11 02:08:13 -0500 (Fri, 11 Apr 2008)
New Revision: 5024
Modified:
trunk/numpy/lib/utils.py
Log:
Add lookfor function from ticket #734
Modified: trunk/numpy/lib/utils.py
===================================================================
--- trunk/numpy/lib/utils.py 2008-04-11 06:59:11 UTC (rev 5023)
+++ trunk/numpy/lib/utils.py 2008-04-11 07:08:13 UTC (rev 5024)
@@ -2,7 +2,10 @@
import os
import sys
import inspect
+import pkgutil
import types
+import re
+import pydoc
from numpy.core.numerictypes import obj2sctype, generic
from numpy.core.multiarray import dtype as _dtype
from numpy.core import product, ndarray
@@ -10,7 +13,7 @@
__all__ = ['issubclass_', 'get_numpy_include', 'issubsctype',
'issubdtype', 'deprecate', 'deprecate_with_doc',
'get_numarray_include',
- 'get_include', 'info', 'source', 'who',
+ 'get_include', 'info', 'source', 'who', 'lookfor',
'byte_bounds', 'may_share_memory', 'safe_eval']
def issubclass_(arg1, arg2):
@@ -471,6 +474,188 @@
except:
print >> output, "Not available for this object."
+
+# Cache for lookfor: {id(module): {name: (docstring, kind, index), ...}...}
+# where kind: "func", "class", "module", "object"
+# and index: index in breadth-first namespace traversal
+_lookfor_caches = {}
+
+# regexp whose match indicates that the string may contain a function signature
+_function_signature_re = re.compile(r"[a-z_]+\(.*[,=].*\)", re.I)
+
+def lookfor(what, module=None, import_modules=True, regenerate=False):
+ """
+ Search for objects whose documentation contains all given words.
+ Shows a summary of matching objects, sorted roughly by relevance.
+
+ Parameters
+ ----------
+ what : str
+ String containing words to look for.
+
+ module : str, module
+ Module whose docstrings to go through.
+ import_modules : bool
+ Whether to import sub-modules in packages.
+ Will import only modules in __all__
+ regenerate: bool
+ Re-generate the docstring cache
+
+ """
+ # Cache
+ cache = _lookfor_generate_cache(module, import_modules, regenerate)
+
+ # Search
+ # XXX: maybe using a real stemming search engine would be better?
+ found = []
+ whats = str(what).lower().split()
+ if not whats: return
+
+ for name, (docstring, kind, index) in cache.iteritems():
+ if kind in ('module', 'object'):
+ # don't show modules or objects
+ continue
+ ok = True
+ doc = docstring.lower()
+ for w in whats:
+ if w not in doc:
+ ok = False
+ break
+ if ok:
+ found.append(name)
+
+ # Relevance sort
+ # XXX: this is full Harrison-Stetson heuristics now,
+ # XXX: it probably could be improved
+
+ kind_relevance = {'func': 1000, 'class': 1000,
+ 'module': -1000, 'object': -1000}
+
+ def relevance(name, docstr, kind, index):
+ r = 0
+ # do the keywords occur within the start of the docstring?
+ first_doc = "\n".join(docstr.lower().strip().split("\n")[:3])
+ r += sum([200 for w in whats if w in first_doc])
+ # do the keywords occur in the function name?
+ r += sum([30 for w in whats if w in name])
+ # is the full name long?
+ r += -len(name) * 5
+ # is the object of bad type?
+ r += kind_relevance.get(kind, -1000)
+ # is the object deep in namespace hierarchy?
+ r += -name.count('.') * 10
+ r += max(-index / 100, -100)
+ return r
+
+ def relevance_sort(a, b):
+ dr = relevance(b, *cache[b]) - relevance(a, *cache[a])
+ if dr != 0: return dr
+ else: return cmp(a, b)
+ found.sort(relevance_sort)
+
+ # Pretty-print
+ s = "Search results for '%s'" % (' '.join(whats))
+ help_text = [s, "-"*len(s)]
+ for name in found:
+ doc, kind, ix = cache[name]
+
+ doclines = [line.strip() for line in doc.strip().split("\n")
+ if line.strip()]
+
+ # find a suitable short description
+ try:
+ first_doc = doclines[0].strip()
+ if _function_signature_re.search(first_doc):
+ first_doc = doclines[1].strip()
+ except IndexError:
+ first_doc = ""
+ help_text.append("%s\n %s" % (name, first_doc))
+
+ # Output
+ if len(help_text) > 10:
+ pager = pydoc.getpager()
+ pager("\n".join(help_text))
+ else:
+ print "\n".join(help_text)
+
+def _lookfor_generate_cache(module, import_modules, regenerate):
+ """
+ Generate docstring cache for given module.
+
+ Parameters
+ ----------
+ module : str, None, module
+ Module for which to generate docstring cache
+ import_modules : bool
+ Whether to import sub-modules in packages.
+ Will import only modules in __all__
+ regenerate: bool
+ Re-generate the docstring cache
+
+ Returns
+ -------
+ cache : dict {obj_full_name: (docstring, kind, index), ...}
+ Docstring cache for the module, either cached one (regenerate=False)
+ or newly generated.
+
+ """
+ global _lookfor_caches
+
+ if module is None:
+ module = "numpy"
+
+ if isinstance(module, str):
+ module = __import__(module)
+
+ if id(module) in _lookfor_caches and not regenerate:
+ return _lookfor_caches[id(module)]
+
+ # walk items and collect docstrings
+ cache = {}
+ _lookfor_caches[id(module)] = cache
+ seen = {}
+ index = 0
+ stack = [(module.__name__, module)]
+ while stack:
+ name, item = stack.pop(0)
+ if id(item) in seen: continue
+ seen[id(item)] = True
+
+ index += 1
+ kind = "object"
+
+ if inspect.ismodule(item):
+ kind = "module"
+ try:
+ _all = item.__all__
+ except AttributeError:
+ _all = None
+ # import sub-packages
+ if import_modules and hasattr(item, '__path__'):
+ for m in pkgutil.iter_modules(item.__path__):
+ if _all is not None and m[1] not in _all:
+ continue
+ try:
+ __import__("%s.%s" % (name, m[1]))
+ except ImportError:
+ continue
+ for n, v in inspect.getmembers(item):
+ if _all is not None and n not in _all:
+ continue
+ stack.append(("%s.%s" % (name, n), v))
+ elif inspect.isclass(item):
+ kind = "class"
+ for n, v in inspect.getmembers(item):
+ stack.append(("%s.%s" % (name, n), v))
+ elif callable(item):
+ kind = "func"
+
+ doc = inspect.getdoc(item)
+ if doc is not None:
+ cache[name] = (doc, kind, index)
+
+ return cache
+
#-----------------------------------------------------------------------------
# The following SafeEval class and company are adapted from Michael Spencer's
More information about the Numpy-svn
mailing list