[Python-checkins] cpython (3.1): #11999: sync based on comparing mtimes, not mtime to system clock

r.david.murray python-checkins at python.org
Sat May 7 04:27:25 CEST 2011


http://hg.python.org/cpython/rev/252451fef13f
changeset:   69882:252451fef13f
branch:      3.1
parent:      69869:26da299ca88e
user:        R David Murray <rdmurray at bitdance.com>
date:        Fri May 06 22:07:19 2011 -0400
summary:
  #11999: sync based on comparing mtimes, not mtime to system clock

files:
  Lib/mailbox.py           |  76 ++++++++++++++-------------
  Lib/test/test_mailbox.py |  23 +++----
  Misc/NEWS                |   4 +
  3 files changed, 54 insertions(+), 49 deletions(-)


diff --git a/Lib/mailbox.py b/Lib/mailbox.py
--- a/Lib/mailbox.py
+++ b/Lib/mailbox.py
@@ -224,19 +224,24 @@
     def __init__(self, dirname, factory=None, create=True):
         """Initialize a Maildir instance."""
         Mailbox.__init__(self, dirname, factory, create)
+        self._paths = {
+            'tmp': os.path.join(self._path, 'tmp'),
+            'new': os.path.join(self._path, 'new'),
+            'cur': os.path.join(self._path, 'cur'),
+            }
         if not os.path.exists(self._path):
             if create:
                 os.mkdir(self._path, 0o700)
-                os.mkdir(os.path.join(self._path, 'tmp'), 0o700)
-                os.mkdir(os.path.join(self._path, 'new'), 0o700)
-                os.mkdir(os.path.join(self._path, 'cur'), 0o700)
+                for path in self._paths.values():
+                    os.mkdir(path, 0o700)
             else:
                 raise NoSuchMailboxError(self._path)
         self._toc = {}
-        self._last_read = None          # Records last time we read cur/new
-        # NOTE: we manually invalidate _last_read each time we do any
-        # modifications ourselves, otherwise we might get tripped up by
-        # bogus mtime behaviour on some systems (see issue #6896).
+        self._toc_mtimes = {}
+        for subdir in ('cur', 'new'):
+            self._toc_mtimes[subdir] = os.path.getmtime(self._paths[subdir])
+        self._last_read = time.time()  # Records last time we read cur/new
+        self._skewfactor = 0.1         # Adjust if os/fs clocks are skewing
 
     def add(self, message):
         """Add message and return assigned key."""
@@ -270,15 +275,11 @@
                 raise
         if isinstance(message, MaildirMessage):
             os.utime(dest, (os.path.getatime(dest), message.get_date()))
-        # Invalidate cached toc
-        self._last_read = None
         return uniq
 
     def remove(self, key):
         """Remove the keyed message; raise KeyError if it doesn't exist."""
         os.remove(os.path.join(self._path, self._lookup(key)))
-        # Invalidate cached toc (only on success)
-        self._last_read = None
 
     def discard(self, key):
         """If the keyed message exists, remove it."""
@@ -313,8 +314,6 @@
         if isinstance(message, MaildirMessage):
             os.utime(new_path, (os.path.getatime(new_path),
                                 message.get_date()))
-        # Invalidate cached toc
-        self._last_read = None
 
     def get_message(self, key):
         """Return a Message representation or raise a KeyError."""
@@ -370,8 +369,8 @@
     def flush(self):
         """Write any pending changes to disk."""
         # Maildir changes are always written immediately, so there's nothing
-        # to do except invalidate our cached toc.
-        self._last_read = None
+        # to do.
+        pass
 
     def lock(self):
         """Lock the mailbox."""
@@ -469,34 +468,39 @@
 
     def _refresh(self):
         """Update table of contents mapping."""
-        new_mtime = os.path.getmtime(os.path.join(self._path, 'new'))
-        cur_mtime = os.path.getmtime(os.path.join(self._path, 'cur'))
-
-        if (self._last_read is not None and
-            new_mtime <= self._last_read and cur_mtime <= self._last_read):
-            return
-
+        # If it has been less than two seconds since the last _refresh() call,
+        # we have to unconditionally re-read the mailbox just in case it has
+        # been modified, because os.path.mtime() has a 2 sec resolution in the
+        # most common worst case (FAT) and a 1 sec resolution typically.  This
+        # results in a few unnecessary re-reads when _refresh() is called
+        # multiple times in that interval, but once the clock ticks over, we
+        # will only re-read as needed.  Because the filesystem might be being
+        # served by an independent system with its own clock, we record and
+        # compare with the mtimes from the filesystem.  Because the other
+        # system's clock might be skewing relative to our clock, we add an
+        # extra delta to our wait.  The default is one tenth second, but is an
+        # instance variable and so can be adjusted if dealing with a
+        # particularly skewed or irregular system.
+        if time.time() - self._last_read > 2 + self._skewfactor:
+            refresh = False
+            for subdir in self._toc_mtimes:
+                mtime = os.path.getmtime(self._paths[subdir])
+                if mtime > self._toc_mtimes[subdir]:
+                    refresh = True
+                self._toc_mtimes[subdir] = mtime
+            if not refresh:
+                return
+        # Refresh toc
         self._toc = {}
-        def update_dir (subdir):
-            path = os.path.join(self._path, subdir)
+        for subdir in self._toc_mtimes:
+            path = self._paths[subdir]
             for entry in os.listdir(path):
                 p = os.path.join(path, entry)
                 if os.path.isdir(p):
                     continue
                 uniq = entry.split(self.colon)[0]
                 self._toc[uniq] = os.path.join(subdir, entry)
-
-        update_dir('new')
-        update_dir('cur')
-
-        # We record the current time - 1sec so that, if _refresh() is called
-        # again in the same second, we will always re-read the mailbox
-        # just in case it's been modified.  (os.path.mtime() only has
-        # 1sec resolution.)  This results in a few unnecessary re-reads
-        # when _refresh() is called multiple times in the same second,
-        # but once the clock ticks over, we will only re-read as needed.
-        now = int(time.time() - 1)
-        self._last_read = time.time() - 1
+        self._last_read = time.time()
 
     def _lookup(self, key):
         """Use TOC to return subpath for given key, or raise a KeyError."""
diff --git a/Lib/test/test_mailbox.py b/Lib/test/test_mailbox.py
--- a/Lib/test/test_mailbox.py
+++ b/Lib/test/test_mailbox.py
@@ -742,21 +742,18 @@
 
     def test_reread(self):
 
-        # Initially, the mailbox has not been read and the time is null.
-        assert getattr(self._box, '_last_read', None) is None
-
-        # Refresh mailbox; the times should now be set to something.
-        self._box._refresh()
-        assert getattr(self._box, '_last_read', None) is not None
-
-        # Put the last modified times more than one second into the past
-        # (because mtime has a one second granularity, a refresh is done
-        # unconditionally if called for within the same second, just in case
-        # the mbox has changed).
+        # Put the last modified times more than two seconds into the past
+        # (because mtime may have a two second granularity)
         for subdir in ('cur', 'new'):
             os.utime(os.path.join(self._box._path, subdir),
                      (time.time()-5,)*2)
 
+        # Because mtime has a two second granularity in worst case (FAT), a
+        # refresh is done unconditionally if called for within
+        # two-second-plus-a-bit of the last one, just in case the mbox has
+        # changed; so now we have to wait for that interval to expire.
+        time.sleep(2.01 + self._box._skewfactor)
+
         # Re-reading causes the ._toc attribute to be assigned a new dictionary
         # object, so we'll check that the ._toc attribute isn't a different
         # object.
@@ -765,7 +762,7 @@
             return self._box._toc is not orig_toc
 
         self._box._refresh()
-        assert not refreshed()
+        self.assertFalse(refreshed())
 
         # Now, write something into cur and remove it.  This changes
         # the mtime and should cause a re-read.
@@ -774,7 +771,7 @@
         f.close()
         os.unlink(filename)
         self._box._refresh()
-        assert refreshed()
+        self.assertTrue(refreshed())
 
 class _TestMboxMMDF(TestMailbox):
 
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -66,6 +66,10 @@
 Library
 -------
 
+- Issue 11999: fixed sporadic sync failure mailbox.Maildir due to its trying to
+  detect mtime changes by comparing to the system clock instead of to the
+  previous value of the mtime.
+
 - Issue #10684: shutil.move used to delete a folder on case insensitive
   filesystems when the source and destination name where the same except
   for the case.

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list