[Python-checkins] r52373 - sandbox/trunk/import_in_py/importer.py sandbox/trunk/import_in_py/test_importer.py

brett.cannon python-checkins at python.org
Wed Oct 18 03:30:31 CEST 2006


Author: brett.cannon
Date: Wed Oct 18 03:30:30 2006
New Revision: 52373

Modified:
   sandbox/trunk/import_in_py/importer.py
   sandbox/trunk/import_in_py/test_importer.py
Log:
Rework FileSystemLoader to pass in a path instead of a file object.  This is
because different handlers want to open paths differently (e.g., 'rb' compared
to 'U').  Change PyPycFileHandler accordingly.

Also have rough version of import object.  Tests are not flushed out.  Hope is
that once tests are done (and the subsequent bugs have been worked out) Python
1.4 semantics (i.e., import without packages) will be working.


Modified: sandbox/trunk/import_in_py/importer.py
==============================================================================
--- sandbox/trunk/import_in_py/importer.py	(original)
+++ sandbox/trunk/import_in_py/importer.py	Wed Oct 18 03:30:30 2006
@@ -44,19 +44,21 @@
     + Importing the module being imported in a circular import dependency
       requires that module added to sys.modules stay consistent from the
       point it is added to initialization.
+* Remove any idea of default importer.
+    + Removes None entries from sys.path_importer_cache.
+    + Rely on default importers being in sys.path_hooks or sys.meta_path.
        
 Rejected Py3K improvements:
 * Passing in new module to loaders
     Creating a new module is minimal and loader might want to use a different
     type of object.
-      
+
 PTL use-case:
 * Use filesystem importer and loader.
 * Subclass PyPycFileHandler
     + Set source_handles to '.ptl' and bytecode to '.ptlc'.
     + get_code_from_source()
-        - Handle transforming source to pure Python source.
-        - Return code object and timestamp.
+        - Handle transforming to pure Python code here.
  
 sqlite3 importer use-case:
 + DB
@@ -246,18 +248,21 @@
         try:
             return sys.modules[fullname]
         except KeyError:
-            module = imp.new_module(fullname)
-            sys.modules[fullname] = module
-            module.__loader__ = self
-            module.__file__ = self.file_path
-            module.__name__ = fullname
+            new_module = imp.new_module(fullname)
+            sys.modules[fullname] = new_module
+            # If handler returns a different module these should already be
+            # set.
+            new_module.__file__ = self.file_path
+            new_module.__name__ = fullname
             try:
-                with open(self.file_path) as code_file:
-                    self.handler.handle_code(module, code_file,
-                                                self.chosen_handle)
+                module = self.handler.handle_code(new_module, self.file_path,
+                                                    self.chosen_handle)
             except:
                 del sys.modules[fullname]
                 raise
+            # Needs to be set regardless of whether passed-in module was used
+            # or not.
+            module.__loader__ = self
             return module
 
 
@@ -440,19 +445,7 @@
     
     """Handler for source code and bytecode files.
     
-    All methods that work with opaque objects expect a file-like interface:
-    * read(n=-1)
-        Read n bytes from the file, or all bytes if no argument is given.
-    * close()
-        Close the file.  It is a no-op if called previously.
-    * name
-        Attribute with the location to the file.  If source and bytecode are
-        not both used by an instance of this class then the values does not
-        need to be a valid path, otherwise care needs to be taken to make sure
-        the value is reasonable.
-        
-    The file type by default implements the required interface.  StringIO
-    objects require the 'name' attribute to be set.
+    All methods that work with opaque objects expect file paths.
     
     """
     
@@ -473,21 +466,21 @@
     def find_source_to_read(self, bytecode_file):
         """Return the file object to the corresponding source code."""
         base, ext = os.path.splitext(self.get_location(bytecode_file))
-        return open(base + self.source_handles[-1], 'U')
+        return base + self.source_handles[-1]
 
     def find_bytecode_to_write(self, source_file):
         """Return the file object to the corresponding bytecode."""
         base, ext = os.path.splitext(self.get_location(source_file))
-        return open(base + self.bytecode_handles[-1], 'wb')
+        return base + self.bytecode_handles[-1]
         
-    def get_location(self, file_object):
+    def get_location(self, file_path):
         """Return the path to the file object."""
-        return file_object.name
+        return file_path
         
-    def get_bytecode(self, bytecode_file):
+    def get_bytecode(self, bytecode_path):
         """Return the magic number, timestamp, and bytecode from the bytecode
-        file."""
-        with contextlib.closing(bytecode_file):
+        file path."""
+        with open(bytecode_path, 'rb') as bytecode_file:
             magic = bytecode_file.read(4)
             timestamp = bytecode_file.read(4)
             bytecode = bytecode_file.read()
@@ -495,36 +488,30 @@
         # and timestamp.
         return magic, timestamp, bytecode
         
-    def verify_timestamp(self, bytecode_timestamp, source_file):
+    def verify_timestamp(self, bytecode_timestamp, source_path):
         """Verify that 'timestamp' is newer than the modification time for
         'source_path'."""
         return True
         # XXX Won't work until can unmarshal longs.
-        source_path = self.get_location(source_file)
         source_timestamp = os.stat(source_path).st_mtime
         return source_timestamp <= bytecode_timestamp
         
-    def get_code_from_source(self, source_file):
-        """Return the code object created from the source code file and the
+    def get_code_from_source(self, source_path):
+        """Return the code object created from the source code path and the
         timestamp on the source file."""
-        with contextlib.closing(source_file):
+        with open(source_path, 'U') as source_file:
             source_code = source_file.read()
-        source_location = self.get_location(source_file)
-        timestamp = os.stat(source_location).st_mtime
-        code_object = compile(source_code, self.get_location(source_file),
+        timestamp = os.stat(source_path).st_mtime
+        code_object = compile(source_code, source_path,
                                 'exec')
         return code_object, timestamp
         
-    def write_bytecode(self, code_object, bytecode_file, timestamp):
+    def write_bytecode(self, code_object, bytecode_path, timestamp):
         """Write out code_object to the file location bytecode_path with the
         passed-in timestamp."""
         # XXX w/o being able to marshal longs, we need to use py_compile.
-        with contextlib.closing(bytecode_file):
-            source_file = self.find_source_to_read(bytecode_file)
-            with contextlib.closing(source_file):
-                source_location = self.get_location(source_file)
-            bytecode_location = self.get_location(bytecode_file)
-        py_compile.compile(source_location, bytecode_location, doraise=True)
+        source_path = self.find_source_to_read(bytecode_path)
+        py_compile.compile(source_path, bytecode_path, doraise=True)
         
 
 class ExtensionFileHandler(object):
@@ -536,7 +523,114 @@
         self.handles = tuple(suffix[0] for suffix in imp.get_suffixes()
                                 if suffix[2] == imp.C_EXTENSION)
     
-    def handle_code(self, module, extension_file, to_handle):
+    def handle_code(self, module, extension_path, to_handle):
         """Import an extension module."""
-        return imp.load_dynamic(module.__name__, extension_file.name,
-                                extension_file)
\ No newline at end of file
+        return imp.load_dynamic(module.__name__, extension_path)
+                                
+
+class Importer(object):
+
+    """Class that re-implements __import__."""
+    #XXX How to handle None entries in sys.path_importer_cache?
+
+    def __init__(self, default_importer_factory=None,
+                 default_meta_path=(BuiltinImporter, FrozenImporter)):
+        """Store the built-in importer factory to use when
+        sys.path_importer_cache has a None entry.
+
+        The importer factory should act just like an object that was put on
+        sys.path_hooks.
+
+        """
+        if default_importer_factory:
+            self.default_importer_factory = default_importer_factory
+        else:
+            # Create a handler to deal with extension modules, .py, and .pyc
+            # files.  Built-in and frozen modules are handled by sys.meta_path
+            # entries.
+            handlers = ExtensionFileHandler(), PyPycFileHandler()
+            self.default_importer_factory = FileSystemFactory(*handlers)
+        self.default_meta_path = default_meta_path
+
+    def search_meta_path(self, name):
+        """Check the importers on sys.meta_path for a loader."""
+        for entry in (tuple(sys.meta_path) + tuple(self.default_meta_path)):
+            loader = entry.find_module(name)
+            if loader:
+                return loader
+        else:
+            raise ImportError("%s not found on meta path" % name)
+
+    def sys_path_importer(self, path_entry):
+        """Return the importer for the entry on sys.path."""
+        try:
+            # See if an importer is cached.
+            importer = sys.path_importer_cache[path_entry]
+            # If None was stored, use default importer factory.
+            if importer is None:
+                return self.default_importer_factory(path_entry)
+            else:
+                return importer
+        except KeyError:
+            # No cached importer found; try to get a new one from
+            # sys.path_hooks.
+            for importer_factory in sys.path_hooks:
+                try:
+                    importer = importer_factory(path_entry)
+                    sys.path_importer_cache[path_entry] = importer
+                    return importer
+                except ImportError:
+                    continue
+            else:
+                # No importer factory on sys.path_hooks works; use the default
+                # importer factory.
+                sys.path_importer_cache[path_entry] = None
+                try:
+                    return self.default_importer_factory(path_entry)
+                except ImportError:
+                    raise ImportError("no importer found for %s" % path_entry)
+
+    def search_sys_path(self, name):
+        """Check sys.path for the module and return a loader if found."""
+        for entry in sys.path:
+            try:
+                importer = self.sys_path_importer(entry)
+            except ImportError:
+                continue
+            loader = importer.find_module(name)
+            if loader:
+                return loader
+        else:
+            raise ImportError("%s not found on sys.path" % name)
+
+    def __call__(self, name, globals={}, locals={}, fromlist=[], level=-1):
+        """Import a module.
+
+        'name' is the dotted name of the module/package to import.  'globals' and
+        'locals' are the global and local namespace dictionaries of the caller
+        (only 'globals' is used to introspect the __path__ attribute of the calling
+        module).  fromlist is any specific objects that are to eventually be put
+        into the namespace (e.g., ``from for.bar import baz`` would have baz in the
+        fromlist).  'level' is set to -1 if both relative and absolute imports are
+        supported, 0 if only for absolute, and positive values represent the number
+        of levels up from the directory the calling module is in.
+
+        When 'name' is a dotted name, there are two different situations to
+        consider.  One is when the fromlist is empty.  In this situation the import
+        imports and returns the name up to the first dot.  All subsequent names are
+        imported but set at attributes as needed.  When fromlist is not empty then
+        the module represented by the full dotted name is returned.
+
+        """
+        # XXX Does not handle packages yet, which means no absolute/relative imports
+        # or fromlist worries.
+        # Try meta_path entries.
+        try:
+            # Attempt to find a loader on sys.meta_path.
+            loader = self.search_meta_path(name)
+        except ImportError:
+            # sys.meta_path search failed.  Attempt to find a loader on
+            # sys.path.  If this fails then module cannot be found.
+            loader = self.search_sys_path(name)
+        # A loader was found.
+        return loader.load_module(name)
\ No newline at end of file

Modified: sandbox/trunk/import_in_py/test_importer.py
==============================================================================
--- sandbox/trunk/import_in_py/test_importer.py	(original)
+++ sandbox/trunk/import_in_py/test_importer.py	Wed Oct 18 03:30:30 2006
@@ -443,14 +443,11 @@
         
     def test_get_location(self):
         # Should return the value on the 'name' attribute of its argument.
-        class Tester(object):
-            name = 42
-        self.failUnlessEqual(self.handler.get_location(Tester), Tester.name)
+        self.failUnlessEqual(self.handler.get_location('path'), 'path')
         
     def test_get_code_from_source(self):
         # Should be able to read from a file object and return a code object.
-        with open(self.source_path, 'rU') as source_file:
-            result = self.handler.get_code_from_source(source_file)
+        result = self.handler.get_code_from_source(self.source_path)
         code_object, timestamp = result
         exec code_object in self.module_object.__dict__
         self.verify_module(self.module_object)
@@ -459,24 +456,17 @@
         
     def test_find_source_to_read(self):
         # Should be able to deduce .py file from .pyc file.
-        with open(self.bytecode_path, 'rb') as bytecode_file:
-            source_file = self.handler.find_source_to_read(bytecode_file)
-            with contextlib.closing(source_file):
-                source_file_path = source_file.name
+        source_file_path = self.handler.find_source_to_read(self.bytecode_path)
         self.failUnlessEqual(source_file_path, self.source_path)
         
     def test_find_bytecode_to_write(self):
         # Should be able to deduce .pyc file from .py file.
-        with open(self.source_path, 'U') as source_file:
-            bytecode_file = self.handler.find_bytecode_to_write(source_file)
-            with contextlib.closing(bytecode_file):
-                bytecode_file_path = bytecode_file.name
-        self.failUnlessEqual(bytecode_file_path, self.bytecode_path)
+        bytecode_path = self.handler.find_bytecode_to_write(self.source_path)
+        self.failUnlessEqual(bytecode_path, self.bytecode_path)
         
     def test_get_bytecode(self):
         # Magic number should be good
-        with open(self.bytecode_path, 'rb') as bytecode_file:
-            result = self.handler.get_bytecode(bytecode_file)
+        result = self.handler.get_bytecode(self.bytecode_path)
         magic, timestamp, bytecode = result
         # XXX self.failUnlessEqual(magic, imp.get_magic())
         source_timestamp = os.stat(self.source_path).st_mtime
@@ -487,14 +477,12 @@
         
     def test_verify_timestamp(self):
         source_timestamp = os.stat(self.source_path).st_mtime
-        with open(self.source_path, 'U') as source_file:
-            result = self.handler.verify_timestamp(source_timestamp,
-                                                    source_file)
-            self.failUnless(result)
+        result = self.handler.verify_timestamp(source_timestamp,
+                                                self.source_path)
+        self.failUnless(result)
         
     def test_get_code_from_source(self):
-        with open(self.source_path, 'U') as source_file:
-            result = self.handler.get_code_from_source(source_file)
+        result = self.handler.get_code_from_source(self.source_path)
         code_object, timestamp = result
         exec code_object in self.module_object.__dict__
         self.verify_module(self.module_object)
@@ -506,20 +494,17 @@
         # a timestamp of the source file, and correct bytecode.
         os.remove(self.bytecode_path)
         timestamp = os.stat(self.source_path).st_mtime
-        with open(self.bytecode_path, 'wb') as bytecode_file:
-            self.handler.write_bytecode(self.code_object, bytecode_file,
-                                        timestamp)
+        self.handler.write_bytecode(self.code_object, self.bytecode_path,
+                                    timestamp)
         # Verify bytecode file was created.
         self.failUnless(os.path.exists(self.bytecode_path))
-        with open(self.bytecode_path, 'rb') as bytecode_file:
-            result = self.handler.get_bytecode(bytecode_file)
+        result = self.handler.get_bytecode(self.bytecode_path)
         magic, timestamp, bytecode = result
         # Verify magic number.
         self.failUnless(self.handler.verify_magic(magic))
         # Verify timestamp.
-        with open(self.source_path, 'U') as source_file:
-            self.failUnless(self.handler.verify_timestamp(timestamp,
-                                                            source_file))
+        self.failUnless(self.handler.verify_timestamp(timestamp,
+                                                        self.source_path))
         # Verify bytecode.
         code_object = self.handler.get_code_from_bytecode(bytecode)
         exec code_object in self.module_object.__dict__
@@ -529,8 +514,7 @@
         # Should be able to initialize the module from just using the source.
         os.remove(self.bytecode_path)
         handler = importer.PyPycFileHandler(bytecode_handles=False)
-        with open(self.source_path, 'U') as source_file:
-            handler.handle_code(self.module_object, source_file, '.py')
+        handler.handle_code(self.module_object, self.source_path, '.py')
         self.verify_module(self.module_object)
         self.failUnless(not os.path.exists(self.bytecode_path))
         
@@ -539,46 +523,29 @@
         os.remove(self.source_path)
         bytecode_extension = os.path.splitext(self.bytecode_path)[1]
         handler = importer.PyPycFileHandler(source_handles=False)
-        with open(self.bytecode_path, 'rb') as bytecode_file:
-            handler.handle_code(self.module_object, bytecode_file,
-                                bytecode_extension)
+        handler.handle_code(self.module_object, self.bytecode_path,
+                            bytecode_extension)
         self.verify_module(self.module_object)
         
     def test_handle_code_bad_bytecode_timestamp_good_source(self):
         # If the timestamp fails on the bytecode, use the source and recreate
         # the bytecode.
         class Tester(importer.PyPycFileHandler):
-            """On some platforms the resolution of the last modification time
-            can be too coarse for rewriting the source to pick it up.  Thus
-            force a fail timestamp check."""
-            def verify_timestamp(self, ignore, ignore2):
+            def verify_timestamp(self, timestamp, source_path):
+                bytecode_path = self.find_bytecode_to_write(source_path)
+                os.remove(bytecode_path)
                 return False
-        
+                
         handler = Tester()
         bytecode_extension = os.path.splitext(self.bytecode_path)[1]
-        with open(self.bytecode_path, 'rb') as bytecode_file:
-            bytecode_stringio = StringIO.StringIO(bytecode_file.read())
-        bytecode_stringio.name = self.bytecode_path
-        # Once bytecode has been read, don't need the file.  Deleting it
-        # allows for easy detection that the bytecode was recreated.
-        os.remove(self.bytecode_path)
-        handler.handle_code(self.module_object, bytecode_stringio,
+        handler.handle_code(self.module_object, self.bytecode_path,
                             bytecode_extension)
-        self.verify_module(self.module_object)
-        self.failUnless(os.path.exists(self.bytecode_path))
-
-        with open(self.bytecode_path) as bytecode_file:
-            self.handler.handle_code(self.module_object, bytecode_file,
-                                bytecode_extension)
-        self.verify_module(self.module_object)
-        self.failUnless(os.path.exists(self.bytecode_path))
         
     def test_handle_code_good_source_write_bytecode(self):
         # If the handler is requested to handle source code and bytecode can
         # be written, then do so.
         os.remove(self.bytecode_path)
-        with open(self.source_path, 'U') as source_file:
-            self.handler.handle_code(self.module_object, source_file, '.py')
+        self.handler.handle_code(self.module_object, self.source_path, '.py')
         self.verify_module(self.module_object)
         self.failUnless(os.path.exists(self.bytecode_path))
         
@@ -606,12 +573,99 @@
     def test_handle_code(self):
         # Make sure an extension module can be loaded.
         new_module = new.module(self.module_name)
-        with open(self.ext_path, 'rb') as ext_file:
-            module = self.handler.handle_code(new_module, ext_file,
-                                                self.handler.handles[0])
+        module = self.handler.handle_code(new_module, self.ext_path,
+                                            self.handler.handles[0])
         # There should be at least one attribute that does not start with '_'.
         self.failUnless(any(True for attr in dir(module)
                             if not attr.startswith('_')))
+                            
+
+class SimpleImportTests(unittest.TestCase):
+    
+    """Test Importer class with only direct module imports; no packages."""
+    
+    def setUp(self):
+        """Store a copy of the 'sys' attribute pertaining to imports."""
+        self.old_sys_modules = sys.modules.copy()
+        self.old_meta_path = sys.meta_path[:]
+        self.old_sys_path = sys.path[:]
+        self.old_path_hooks = sys.path_hooks[:]
+        self.old_path_importer_cache = sys.path_importer_cache.copy()
+        self.import_ = importer.Importer()
+        
+    def tearDown(self):
+        """Restore backup of import-related attributes in 'sys'."""
+        sys.modules = self.old_sys_modules
+        sys.meta_path = self.old_meta_path
+        sys.path = self.old_sys_path
+        sys.path_hooks = self.old_path_hooks
+        sys.path_importer_cache = self.old_path_importer_cache
+        
+    def test_default_importer_factory(self):
+        # Make sure that the object passed in during initialization is used
+        # when sys.path_importer_cache has a value of None.
+        pass
+        
+    def test_default_meta_path(self):
+        # Default meta_path entries set during initialization should be
+        # queried after sys.meta_path.
+        pass
+        
+    def test_default_init(self):
+        # The default initialization should work with a None entry for every
+        # sys.path entry in sys.path_importer_cache.  It should also lead to
+        # built-in, frozen, extension, .pyc, and .py files being imported if
+        # desired.
+        sys.path_importer_cache = dict((entry, None) for entry in sys.path)
+        sys.modules = {}
+        # Built-ins.
+        module = self.import_('sys')
+        self.failUnlessEqual(module.__name__, 'sys')
+        self.failUnless(hasattr(sys, 'version'))
+        # Frozen modules.
+        try:
+            sys.stdout = StringIO.StringIO()
+            module = self.import_('__hello__')
+        finally:
+            sys.stdout = sys.__stdout__
+        self.failUnlessEqual(module.__name__, '__hello__')
+        # Extension modules.
+        module = self.import_('time')
+        self.failUnlessEqual(module.__name__, 'time')
+        self.failUnless(hasattr(module, 'sleep'))
+        # .py/.pyc files.
+        module = self.import_('token')
+        self.failUnlessEqual(module.__name__, 'token')
+        self.failUnless(hasattr(module, 'ISTERMINAL'))
+    
+    def test_meta_path(self):
+        # Test meta_path searching for a loader.
+        pass
+        
+    def test_sys_path(self):
+        # Test sys.path searching for a loader.
+        pass
+        
+    def test_importer_cache_preexisting(self):
+        # A pre-existing importer should be returned if it exists in
+        # sys.path_importer_cache.
+        pass
+        
+    def test_importer_cache_None(self):
+        # A entry of None in sys.path_importer_cache should get one back an
+        # importer from the default importer factory.
+        pass
+        
+    def test_importer_cache_from_path_hooks(self):
+        # If an entry does not exist for a sys.path entry in the importer cache
+        # then sys.path_hooks should be searched and if one is found then cache
+        # it.
+        pass
+        
+    def test_importer_cache_no_path_hooks(self):
+        # If an entry does not exist for a sys.path entry in the importer cache
+        # and sys.path_hooks has nothing for the entry, None should be set.
+        pass
 
 
 def test_main():
@@ -623,6 +677,7 @@
                 PyPycBaseHandlerTests,
                 PyPycFileHandlerTests,
                 ExtensionHandlerTests,
+                SimpleImportTests,
             )
 
 


More information about the Python-checkins mailing list