[Python-checkins] python/nondist/sandbox/mailbox libmailbox.tex, 1.11, 1.12 mailbox.py, 1.10, 1.11

gregorykjohnson@users.sourceforge.net gregorykjohnson at users.sourceforge.net
Thu Aug 18 19:52:09 CEST 2005


Update of /cvsroot/python/python/nondist/sandbox/mailbox
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27089

Modified Files:
	libmailbox.tex mailbox.py 
Log Message:
* Overhaul existing documentation.
* Minor fixes and tweaks:
    * Fix identifier typos in locking code.
    * Mangle "From " lines for mbox, as claimed.
    * Do tilde expansion on mailbox paths.


Index: libmailbox.tex
===================================================================
RCS file: /cvsroot/python/python/nondist/sandbox/mailbox/libmailbox.tex,v
retrieving revision 1.11
retrieving revision 1.12
diff -u -d -r1.11 -r1.12
--- libmailbox.tex	16 Aug 2005 23:38:11 -0000	1.11
+++ libmailbox.tex	17 Aug 2005 20:32:36 -0000	1.12
@@ -7,33 +7,36 @@
 \modulesynopsis{Manipulate mailboxes in various formats}
 
 
-The \module{mailbox} module defines objects for accessing and manipulating
-Maildir, mbox, MH, Babyl, and MMDF mailboxes and the messages they contain.
-(These formats are commonly used on \UNIX{} to store email messages on disk.)
+This module defines two classes, \class{Mailbox} and \class{Message}, for
+accessing and manipulating on-disk mailboxes and the messages they contain.
+\class{Mailbox} offers a dictionary-like mapping from keys to messages.
+\class{Message} extends the \module{email.Message} module's \class{Message}
+class with format-specific state and behavior. Supported mailbox formats are
+Maildir, mbox, MH, Babyl, and MMDF.
 
-\class{Mailbox} and \class{Message} are the two main classes offered by the
-module. \class{Mailbox} offers a dictionary-like mapping from keys to
-messages. \class{Message} extends the \module{email.Message} module's
-\class{Message} class with format-specific state and behavior. Both
-\class{Mailbox} and \class{Message} are extended by format-specific subclasses
-and are generally not instantiated directly.
+An example of using the module to sort mail:
 
-A \class{Mailbox} instance's keys are immutable objects, issued by the
-\class{Mailbox} instance, that are used to select messages from it. They
-remain meaningful for the life of the \class{Mailbox} instance, even if the
-mailbox is modified. Each time the value corresponding to a key is requested,
-a new message representation is created. Typically, messages are representated
-as \class{Message} instances, but a custom factory function may be specified.
-A message representation is independent of the particular \class{Mailbox}
-instance that created it (if any). For message modifications to be reflected
-in a mailbox, the modified message representation must be explicitly assigned
-back into the \class{Mailbox} instance's mapping.
+\begin{verbatim}
+>>> import mailbox
+>>> inbox = mailbox.Maildir('~/Maildir', None)
+>>> python_box = mailbox.Maildir('~/email/python-list', None)
+>>> len(inbox)          # Number of messages.
+13
+>>> len(python_box)
+818
+>>> for key, message in inbox.iteritems():
+...     if 'python-list' in message['list-id']:
+...         python_box.add(message)         # Add the message to python_box
+...         del inbox[key]                  # and remove it from inbox.
+...
+>>> len(inbox)
+2
+>>> len(python_box)
+829
+\end{verbatim}
 
 \begin{seealso}
     \seemodule{email}{Represent and manipulate messages.}
-    \seemodule{poplib}{Access mail via POP3.}
-    \seemodule{imaplib}{Access mail via IMAP4.}
-    \seemodule{smtplib}{Transfer mail via SMTP.}
 \end{seealso}
 
 \subsection{\class{Mailbox} objects}
@@ -49,33 +52,29 @@
 instance. A key continues to identify a message even if the corresponding
 message is modified, such as by replacing it with another message. Messages may
 be added to a \class{Mailbox} instance using the set-like method
-\method{add()}. Because keys are issued by a \class{Mailbox} instance rather
-than being chosen, the conventional method for adding an item to a mapping
-(assigning a value to a new key) cannot be used. (\strong{Implementation note:}
-\class{mbox}, \class{MH}, \class{Babyl}, and \class{MMDF} instances use
-integers as keys, and \class{Maildir} instances use short strings.)
+\method{add()} and removed using a \code{del} statement or the set-like methods
+\method{remove()} and \method{discard()}.
 
 \class{Mailbox} interface semantics differ from dictionary semantics in some
-ways. Each time a message is requested, a new message representation (typically
-a \class{Message} instance) is generated based upon the current state of the
-underlying message. The \class{Mailbox} instance does not reuse this
-representation or keep a reference to it. Similarly, when a message
-representation is assigned into a \class{Mailbox} instance's mapping, the
-message representation's contents are copied into the mailbox. In neither case
-is a reference to the message representation kept by the \class{Mailbox}
+noteworthy ways. Each time a message is requested, a new representation
+(typically a \class{Message} instance) is generated, based upon the current
+state of the mailbox. Similarly, when a message is added to a \class{Mailbox}
+instance, the provided message representation's contents are copied. In neither
+case is a reference to the message representation kept by the \class{Mailbox}
 instance.
 
 The default \class{Mailbox} iterator iterates over message representations, not
-keys as dictionaries do. Moreover, modification of a mailbox during iteration
-is safe and well-defined. Messages added to the mailbox after an iterator is
-created will not be seen by the iterator. Messages removed from the mailbox
-before the iterator yields them will be silently skipped, though using a key
-from an iterator may result in a \exception{KeyError} exception if the
-corresponding message is subsequently removed.
+keys as the default dictionary iterator does. Moreover, modification of a
+mailbox during iteration is safe and well-defined. Messages added to the
+mailbox after an iterator is created will not be seen by the iterator. Messages
+removed from the mailbox before the iterator yields them will be silently
+skipped, though using a key from an iterator may result in a
+\exception{KeyError} exception if the corresponding message is subsequently
+removed.
 
 \class{Mailbox} itself is intended to define an interface and to be inherited
 from by format-specific subclasses but is not intended to be instantiated.
-Instead, directly instantiate a subclass.
+Instead, you should instantiate a subclass.
 
 \class{Mailbox} instances have the following methods:
 
@@ -97,16 +96,15 @@
 Delete the message corresponding to \var{key} from the mailbox.
 
 If no such message exists, a \exception{KeyError} exception is raised if the
-method was called as \method{remove()} or \method{__delitem__()} and no
+method was called as \method{remove()} or \method{__delitem__()} but no
 exception is raised if the method was called as \method{discard()}. The
 behavior of \method{discard()} may be preferred if the underlying mailbox
 format supports concurrent modification by other processes.
 \end{methoddesc}
 
 \begin{methoddesc}{__setitem__}{key, message}
-Replace the message corresponding to \var{key} with the message represented by
-\var{message}. Raise a \exception{KeyError} exception if no message already
-corresponds to \var{key}.
+Replace the message corresponding to \var{key} with \var{message}. Raise a
+\exception{KeyError} exception if no message already corresponds to \var{key}.
 
 As with \method{add()}, parameter \var{message} may be a \class{Message}
 instance, an \class{email.Message.Message} instance, a string, or a file-like
@@ -129,9 +127,10 @@
 Return an iterator over representations of all messages if called as
 \method{itervalues()} or \method{__iter__()} or return a list of such
 representations if called as \method{values()}. The messages are represented as
-\class{Message} instances unless a custom message factory was specified when
-the \class{Mailbox} instance was initialized. \note{The behavior of
-\method{__iter__()} is unlike that of dictionaries, which iterate over keys.}
+instances of the appropriate format-specific \class{Message} subclass unless a
+custom message factory was specified when the \class{Mailbox} instance was
+initialized. \note{The behavior of \method{__iter__()} is unlike that of
+dictionaries, which iterate over keys.}
 \end{methoddesc}
 
 \begin{methoddesc}{iteritems}{}
@@ -139,9 +138,9 @@
 Return an iterator over (\var{key}, \var{message}) pairs, where \var{key} is a
 key and \var{message} is a message representation, if called as
 \method{iteritems()} or return a list of such pairs if called as
-\method{items()}. The messages are represented as \class{Message} instances
-unless a custom message factory was specified when the \class{Mailbox} instance
-was initialized.
+\method{items()}. The messages are represented as instances of the appropriate
+format-specific \class{Message} subclass unless a custom message factory was
+specified when the \class{Mailbox} instance was initialized.
 \end{methoddesc}
 
 \begin{methoddesc}{get}{key\optional{, default=None}}
@@ -149,15 +148,16 @@
 Return a representation of the message corresponding to \var{key}. If no such
 message exists, \var{default} is returned if the method was called as
 \method{get()} and a \exception{KeyError} exception is raised if the method was
-called as \method{__getitem__()}. The message is represented as a
-\class{Message} instance unless a custom message factory was specified when the
-\class{Mailbox} instance was initialized.
+called as \method{__getitem__()}. The message is represented as an instance of
+the appropriate format-specific \class{Message} subclass unless a custom
+message factory was specified when the \class{Mailbox} instance was
+initialized.
 \end{methoddesc}
 
 \begin{methoddesc}{get_message}{key}
-Return a \class{Message} representation of the message corresponding to
-\var{key}, or raise a \exception{KeyError} exception if no such message
-exists.
+Return a representation of the message corresponding to \var{key} as an
+instance of the appropriate format-specific \class{Message} subclass, or raise
+a \exception{KeyError} exception if no such message exists.
 \end{methoddesc}
 
 \begin{methoddesc}{get_string}{key}
@@ -171,13 +171,13 @@
 should be closed once it is no longer needed.
 
 \note{Unlike other representations of messages, file-like representations are
-not independent of the \class{Mailbox} instance that created them or of the
-underlying mailbox, although their exact behavior is mailbox-format dependent.
-More specific documentation is provided by each subclass.}
+not necessarily independent of the \class{Mailbox} instance that created them
+or of the underlying mailbox. More specific documentation is provided by each
+subclass.}
 \end{methoddesc}
 
 \begin{methoddesc}{has_key}{key}
-\methodline{__contains__}{}
+\methodline{__contains__}{key}
 Return \code{True} if \var{key} corresponds to a message, \code{False}
 otherwise.
 \end{methoddesc}
@@ -193,32 +193,36 @@
 \begin{methoddesc}{pop}{key\optional{, default}}
 Return a representation of the message corresponding to \var{key} and delete
 the message. If no such message exists, return \var{default} if it was supplied
-or else raise a \exception{KeyError} exception. The message is represented as a
-\class{Message} instance unless a custom message factory was specified when the
-\class{Mailbox} instance was initialized.
+or else raise a \exception{KeyError} exception. The message is represented as
+an instance of the appropriate format-specific \class{Message} subclass unless
+a custom message factory was specified when the \class{Mailbox} instance was
+initialized.
 \end{methoddesc}
 
 \begin{methoddesc}{popitem}{}
 Return an arbitrary (\var{key}, \var{message}) pair, where \var{key} is a key
 and \var{message} is a message representation, and delete the corresponding
 message. If the mailbox is empty, raise a \exception{KeyError} exception. The
-message is represented as a \class{Message} instance unless a custom message
-factory was specified when the \class{Mailbox} instance was initialized.
+message is represented as an instance of the appropriate format-specific
+\class{Message} subclass unless a custom message factory was specified when the
+\class{Mailbox} instance was initialized.
 \end{methoddesc}
 
 \begin{methoddesc}{update}{arg}
 Parameter \var{arg} should be a \var{key}-to-\var{message} mapping or an
 iterable of (\var{key}, \var{message}) pairs. Updates the mailbox so that, for
 each given \var{key} and \var{message}, the message corresponding to \var{key}
-is set to \var{message} as if by using \method{__setitem__()}. Each \var{key}
-must already correspond to a message in the mailbox or a \exception{KeyError}
-exception will be raised. \note{Unlike with dictionaries, keyword arguments
-are not supported.}
+is set to \var{message} as if by using \method{__setitem__()}. As with
+\method{__setitem__()}, each \var{key} must already correspond to a message in
+the mailbox or else a \exception{KeyError} exception will be raised, so in
+general it is incorrect for \var{arg} to be a \class{Mailbox} instance.
+\note{Unlike with dictionaries, keyword arguments are not supported.}
 \end{methoddesc}
 
 \begin{methoddesc}{flush}{}
 Write any pending changes to the filesystem. For some \class{Mailbox}
-subclasses, changes are written immediately and this method does nothing.
+subclasses, changes are always written immediately and this method does
+nothing.
 \end{methoddesc}
 
 \begin{methoddesc}{lock}{}
@@ -229,7 +233,7 @@
 \end{methoddesc}
 
 \begin{methoddesc}{unlock}{}
-Release the advisory lock on the mailbox, if any.
+Release the lock on the mailbox, if any.
 \end{methoddesc}
 
 \begin{methoddesc}{close}{}
@@ -244,30 +248,39 @@
 \begin{classdesc}{Maildir}{dirname\optional{, factory=rfc822.Message\optional{,
 create=True}}}
 A subclass of \class{Mailbox} for mailboxes in Maildir format. Parameter
-\var{factory} is a callable object that accepts a file-like object containing a
-raw message as its parameter and returns a message representation. If
-\var{factory} is \code{None}, \class{MaildirMessage} instances are used.
-If \var{create} is \code{True}, the mailbox is created if it does not exist.
+\var{factory} is a callable object that accepts a file-like message
+representation and returns a custom representation. If \var{factory} is
+\code{None}, \class{MaildirMessage} is used as the default message
+representation. If \var{create} is \code{True}, the mailbox is created if it
+does not exist.
 
 It is for historical reasons that \var{factory} defaults to
 \class{rfc822.Message} and that \var{dirname} is named as such rather than
-\var{path}.
+\var{path}. For a \class{Maildir} instance that behaves like instances of other
+\class{Mailbox} subclasses, set \var{factory} to \code{None}.
 \end{classdesc}
 
-Maildir is a directory-based mailbox format invented for the qmail MTA and now
-widely supported by other programs. Messages in a Maildir mailbox are stored
-in separate files within a shared directory structure. This structure allows
-Maildir mailboxes to be accessed and modified by multiple unrelated programs
-without data corruption, so file locking is unnecessary.
+Maildir is a directory-based mailbox format invented for the qmail mail
+transfer agent and now widely supported by other programs. Messages in a
+Maildir mailbox are stored in separate files within a common directory
+structure. This design allows Maildir mailboxes to be accessed and modified by
+multiple unrelated programs without data corruption, so file locking is
+unnecessary.
 
-Folders, as introduced by the Courier MTA, are supported. Each folder is
-itself a Maildir mailbox. Any subdirectory of the main Maildir directory is
-considered a folder if \character{.} is the first character in its name. Folder
-names are represented without the leading dot. For example, "Sent" would be the
-name of a folder implemented with a directory called ".Sent" on the filesystem.
-Folders should not be nested, i.e., a Maildir mailbox that is itself a folder
-should not contain other folders. Instead, logical nesting may be indicated
-using \character{.} to delimit levels---for example, "Archived.2005.07".
+Maildir mailboxes contain three subdirectories, namely: \file{tmp}, \file{new},
+and \file{cur}. Messages are created momentarily in the \file{tmp} subdirectory
+and then moved to the \file{new} subdirectory to finalize delivery. A mail user
+agent may subsequently move the message to the \file{cur} subdirectory and
+store information about the state of the message in a special "info" section
+appended to its file name.
+
+Folders of the style introduced by the Courier mail transfer agent are also
+supported. Any subdirectory of the main mailbox is considered a folder if
+\character{.} is the first character in its name. Folder names are represented
+by \class{Maildir} without the leading \character{.}. Each folder is itself a
+Maildir mailbox but should not contain other folders. Instead, a logical
+nesting is indicated using \character{.} to delimit levels, e.g.,
+"Archived.2005.07".
 
 \class{Maildir} instances have all of the methods of \class{Mailbox} in
 addition to the following:
@@ -327,16 +340,15 @@
 \end{methoddesc}
 
 \begin{methoddesc}{get_file}{key}
-Depending upon the host platform, it may not be possible to use a
-\class{Maildir} instance to modify or remove the underlying message while the
-returned file remains open.
+Depending upon the host platform, it may not be possible to modify or remove
+the underlying message while the returned file remains open.
 \end{methoddesc}
 
 \begin{seealso}
     \seelink{http://www.qmail.org/man/man5/maildir.html}{maildir man page from
     qmail}{The original specification of the format.}
     \seelink{http://cr.yp.to/proto/maildir.html}{Using maildir format}{Notes
-    on Maildir by it's inventor. Includes an updated name-creation scheme and
+    on Maildir by its inventor. Includes an updated name-creation scheme and
     details on "info" semantics.}
     \seelink{http://www.courier-mta.org/?maildir.html}{maildir man page from
     Courier}{Another specification of the format. Describes a common extension
@@ -348,15 +360,16 @@
 
 \begin{classdesc}{mbox}{path\optional{, factory=None\optional{, create=True}}}
 A subclass of \class{Mailbox} for mailboxes in mbox format. Parameter
-\var{factory} is a callable object that accepts a file-like object containing a
-raw message as its parameter and returns a message representation. If
-\var{factory} is \code{None}, \class{mboxMessage} instances are used. If
-\var{create} is \code{True}, the mailbox is created if it does not exist.
+\var{factory} is a callable object that accepts a file-like message
+representation and returns a custom representation. If \var{factory} is
+\code{None}, \class{mboxMessage} is used as the default message representation.
+If \var{create} is \code{True}, the mailbox is created if it does not exist.
 \end{classdesc}
 
 The mbox format is the classic format for storing mail on \UNIX{} systems. All
 messages in an mbox mailbox are stored in a single file with the beginning of
 each message indicated by a line whose first five characters are "From~".
+
 Several variations of the mbox format exist to address perceived shortcomings.
 In the interest of compatibility, \class{mbox} implements the original format,
 which is sometimes referred to as \dfn{mboxo}. This means that the
@@ -369,7 +382,8 @@
 remarks:
 
 \begin{methoddesc}{get_file}{key}
-XXX
+Using the file after calling \method{flush()} or \method{close()} on the
+\class{mbox} instance may yield unpredictable results or raise an exception.
 \end{methoddesc}
 
 \begin{methoddesc}{lock}{}
@@ -398,29 +412,24 @@
 
 \begin{classdesc}{MH}{path\optional{, factory=None\optional{, create=True}}}
 A subclass of \class{Mailbox} for mailboxes in MH format. Parameter
-\var{factory} is a callable object that accepts a file-like object containing a
-raw message as its parameter and returns a message representation. If
-\var{factory} is \code{None}, \class{MHMessage} instances are used. If
-\var{create} is \code{True}, the mailbox is created if it does not exist.
+\var{factory} is a callable object that accepts a file-like message
+representation and returns a custom representation. If \var{factory} is
+\code{None}, \class{MHMessage} is used as the default message representation.
+If \var{create} is \code{True}, the mailbox is created if it does not exist.
 \end{classdesc}
 
 MH is a directory-based mailbox format invented for the MH Message Handling
-System, a mail reading application. Each message in an MH mailbox resides in
-its own file. An MH mailbox may contain other MH mailboxes (called
-\dfn{folders}) in addition to messages. Folders may be nested indefinitely.
-
-MH mailboxes support \dfn{sequences}, which are named lists used to logically
-group messages without moving them to sub-folders. Sequences are defined in a
-file called \file{.mh_sequences} in each folder. Some mail reading programs
-(although not the standard \program{mh} and \program{nmh} implementations) use
-sequences to the same end as flags are used in other formats: unread messages
-are added to the "unseen" sequence, replied-to messages are added to the
-"replied" sequence, and important messages are added upon request to the
-"flagged" sequence.
+System, a mail user agent. Each message in an MH mailbox resides in its own
+file. An MH mailbox may contain other MH mailboxes (called \dfn{folders}) in
+addition to messages. Folders may be nested indefinitely. MH mailboxes also
+support \dfn{sequences}, which are named lists used to logically group messages
+without moving them to sub-folders. Sequences are defined in a file called
+\file{.mh_sequences} in each folder.
 
-\class{MH} manipulates MH mailboxes, but it does not attempt to emulate
-\program{mh}. In particular, it does not access or modify \file{context} or
-\file{.mh_profile} files.
+The \class{MH} class manipulates MH mailboxes, but it does not attempt to
+emulate all of \program{mh}'s behaviors. In particular, it does not access or
+modify the \file{context} or \file{.mh_profile} files that are used by
+\program{mh} to store its state and configuration.
 
 \class{MH} instances have all of the methods of \class{Mailbox} in addition to
 the following:
@@ -453,14 +462,14 @@
 
 \begin{methoddesc}{set_sequences}{sequences}
 Re-define the sequences that exist in the mailbox based upon \var{sequences}, a
-dictionary of names mapped to key lists like returned by
+dictionary of names mapped to key lists, like returned by
 \method{get_sequences()}.
 \end{methoddesc}
 
 \begin{methoddesc}{pack}{}
-Renames messages in the mailbox as necessary to eliminate gaps in numbering.
-Entries in the sequences list are updated correspondingly. Already-issued keys
-are invalidated by this operation.
+Rename messages in the mailbox as necessary to eliminate gaps in numbering.
+Entries in the sequences list are updated correspondingly. \note{Already-issued
+keys are invalidated by this operation and should not be subsequently used.}
 \end{methoddesc}
 
 Some \class{Mailbox} methods implemented by \class{MH} deserve special remarks:
@@ -468,8 +477,8 @@
 \begin{methoddesc}{remove}{key}
 \methodline{__delitem__}{key}
 \methodline{discard}{key}
-These methods immediately delete the message. The \program{mh} convention of
-marking a message for deletion by prepending a comma to its name is not used.
+These methods immediately delete the message. The MH convention of marking a
+message for deletion by prepending a comma to its name is not used.
 \end{methoddesc}
 
 \begin{methoddesc}{lock}{}
@@ -482,7 +491,8 @@
 \end{methoddesc}
 
 \begin{methoddesc}{get_file}{key}
-XXX
+Depending upon the host platform, it may not be possible to remove the
+underlying message while the returned file remains open.
 \end{methoddesc}
 
 \begin{methoddesc}{flush}{}
@@ -495,15 +505,6 @@
 to \method{unlock()}.
 \end{methoddesc}
 
-\class{MH} instances have all of the methods of \class{Mailbox} in addition to
-the following:
-
-Some \class{Mailbox} methods implemented by \class{MH} deserve special remarks:
-
-\begin{methoddesc}{get_file}{key}
-XXX
-\end{methoddesc}
-
 \begin{seealso}
 \seelink{http://www.nongnu.org/nmh/}{nmh - Message Handling System}{Home page
 of \program{nmh}, a modern version of the original \program{mh}.}
@@ -517,30 +518,37 @@
 
 \begin{classdesc}{Babyl}{path\optional{, factory=None\optional{, create=True}}}
 A subclass of \class{Mailbox} for mailboxes in Babyl format. Parameter
-\var{factory} is a callable object that accepts a file-like object containing a
-raw message as its parameter and returns a message representation. If
-\var{factory} is \code{None}, \class{BabylMessage} instances are used. If
-\var{create} is \code{True}, the mailbox is created if it does not exist.
+\var{factory} is a callable object that accepts a file-like message
+representation and returns a custom representation. If \var{factory} is
+\code{None}, \class{BabylMessage} is used as the default message
+representation. If \var{create} is \code{True}, the mailbox is created if it
+does not exist.
 \end{classdesc}
 
-Babyl is a single-file mailbox format invented for the \program{Rmail} mail
-reading application included with Emacs. A Babyl mailbox begins with an options
-section that indicates the format of the mailbox and contains a list of
-user-defined labels that appear in the mailbox. Messages follow the options
-section. The beginning of a message is indicated by a line containing exactly
-two control characters, namely Control-Underscore
-(\character{\textbackslash037}) followed by Control-L
-(\character{\textbackslash014}). The end of a message is indicated by the start
-of the next message or, in the case of the last message, a line containing only
-a Control-Underscore (\character{\textbackslash037}) character. Each message in
-a Babyl mailbox has an accompanying list of \dfn{labels}, or short strings that
-record extra information about the message.
+Babyl is a single-file mailbox format invented for the Rmail mail user agent
+included with Emacs. The beginning of a message is indicated by a line
+containing exactly the two characters Control-Underscore
+(\character{\textbackslash037}) and Control-L (\character{\textbackslash014}).
+The end of a message is indicated by the start of the next message or, in the
+case of the last message, a line containing only a Control-Underscore
+(\character{\textbackslash037}) character.
+
+Messages in a Babyl mailbox have two sets of headers, original headers and
+so-called visible headers. Visible headers are typically a subset of the
+original headers that have been reformatted or abridged to be more attractive.
+Each message in a Babyl mailbox also has an accompanying list of \dfn{labels},
+or short strings that record extra information about the message, and a list of
+all user-defined labels found in the mailbox is kept in the Babyl options
+section.
 
 \class{Babyl} instances have all of the methods of \class{Mailbox} in addition
 to the following:
 
 \begin{methoddesc}{get_labels}{}
 Return a list of the names of all user-defined labels used in the mailbox.
+\note{The actual messages are inspected to determine which labels exist in the
+mailbox rather than consulting the list of labels in the Babyl options section,
+but the Babyl section is updated whenever the mailbox is modified.}
 \end{methoddesc}
 
 Some \class{Mailbox} methods implemented by \class{Babyl} deserve special
@@ -548,9 +556,11 @@
 
 \begin{methoddesc}{get_file}{key}
 In Babyl mailboxes, the headers of a message are not stored contiguously with
-the body of the message. To generate a file-like representation, they are
-copied together into a \class{StringIO} instance (from the \module{StringIO}
-module), which may be used like a file.
+the body of the message. To generate a file-like representation, the headers
+and body are copied together into a \class{StringIO} instance (from the
+\module{StringIO} module), which has an API identical to that of a file. As a
+result, the file-like object is truly independent of the underlying mailbox but
+does not save memory compared to a string representation.
 \end{methoddesc}
 
 \begin{methoddesc}{lock}{}
@@ -571,25 +581,28 @@
 
 \begin{classdesc}{MMDF}{path\optional{, factory=None\optional{, create=True}}}
 A subclass of \class{Mailbox} for mailboxes in MMDF format. Parameter
-\var{factory} is a callable object that accepts a file-like object containing a
-raw message as its parameter and returns a message representation. If
-\var{factory} is \code{None}, \class{MMDFMessage} instances are used. If
-\var{create} is \code{True}, the mailbox is created if it does not exist.
+\var{factory} is a callable object that accepts a file-like message
+representation and returns a custom representation. If \var{factory} is
+\code{None}, \class{MMDFMessage} is used as the default message representation.
+If \var{create} is \code{True}, the mailbox is created if it does not exist.
 \end{classdesc}
 
 MMDF is a single-file mailbox format invented for the Multichannel Memorandum
 Distribution Facility, a mail transfer agent. Each message is in the same form
 as an mbox message but is bracketed before and after by lines containing four
-Control-A characters. As with the mbox format, the beginning of each message
-indicated by a line whose first five characters are "From~", but because of the
-additional message separators it is unnecessary to transform "From~" to
-">From~" when storing messages.
+Control-A (\character{\textbackslash001}) characters. As with the mbox format,
+the beginning of each message is indicated by a line whose first five
+characters are "From~", but additional occurrences of "From~" are not
+transformed to ">From~" when storing messages because the additional message
+separator lines prevent mistaking such occurrences for the starts of subsequent
+messages.
 
 Some \class{Mailbox} methods implemented by \class{MMDF} deserve special
 remarks:
 
 \begin{methoddesc}{get_file}{key}
-XXX
+Using the file after calling \method{flush()} or \method{close()} on the
+\class{MMDF} instance may yield unpredictable results or raise an exception.
 \end{methoddesc}
 
 \begin{methoddesc}{lock}{}
@@ -609,42 +622,33 @@
 \subsection{\class{Message} objects}
 \label{mailbox-message-objects}
 
-The \class{Message} class is an extension of a class of the same name from the
-\module{email.Message} module. In addition, subclasses of \class{Message}
-support mailbox-format-specific state and behavior.
-
 \begin{classdesc}{Message}{\optional{message}}
-A message with mailbox-format-specific properties.
+A subclass of the \module{email.Message} module's \class{Message}. Subclasses
+of \class{mailbox.Message} add mailbox-format-specific state and behavior.
 
 If \var{message} is omitted, the new instance is created in a default, empty
 state. If \var{message} is an \class{email.Message.Message} instance, its
-contents are copied, converting any format-specific information insofar as
-possible if \var{message} is a \class{Message} instance. If \var{message} is a
-string or a file, it should contain an \rfc{2822}-compliant message, which is
-read and parsed.
+contents are copied; furthermore, any format-specific information is converted
+insofar as possible if \var{message} is a \class{Message} instance. If
+\var{message} is a string or a file, it should contain an \rfc{2822}-compliant
+message, which is read and parsed.
 \end{classdesc}
 
 The format-specific state and behaviors offered by subclasses vary, but in
-general it is only the properties that are not specific to a particular
-mailbox that are supported (although presumably the properties are specific to
-a particular mailbox format). For example, file offsets for single-file mailbox
+general it is only the properties that are not specific to a particular mailbox
+that are supported (although presumably the properties are specific to a
+particular mailbox format). For example, file offsets for single-file mailbox
 formats and file names for directory-based mailbox formats are not retained,
-but state such as whether a message has been read or marked as important by the
-user is.
-
-In some situations, the time and memory overhead involved in generating
-\class{Message} representations might not not justified. For such situations,
-\class{Mailbox} instances also offer string and file-like representations, and
-a custom message factory may be specified when a \class{Mailbox} instance is
-initialized. There is no requirement to use the \class{Message} class to
-represent messages from a mailbox.
-
-All of the \class{email.Message.Message} class's methods and members are
-supported by \class{Message}, and subclasses of \class{Message} provide many
-additional format-specific methods. Some functionality supported by all
-\class{Message} subclasses is accessible via the following methods:
+because they are only applicable to the original mailbox. But state such as
+whether a message has been read by the user or marked as important is retained,
+because it applies to the message itself.
 
-XXX
+There is no requirement that \class{Message} instances be used to represent
+messages retrieved using \class{Mailbox} instances. In some situations, the
+time and memory required to generate \class{Message} representations might not
+not acceptable. For such situations, \class{Mailbox} instances also offer
+string and file-like representations, and a custom message factory may be
+specified when a \class{Mailbox} instance is initialized. 
 
 \subsubsection{\class{MaildirMessage}}
 \label{mailbox-maildirmessage}
@@ -654,26 +658,22 @@
 has the same meaning as with the \class{Message} constructor.
 \end{classdesc}
 
-Maildir messages are stored in individual files, in either the \file{new} or
-the \file{cur} subdirectory of the Maildir. Messages are delivered to the
-\file{new} subdirectory. Typically, a mail reading application moves messages
-to the \file{cur} subdirectory after the user opens and closes the mailbox,
-thereby recording that the messages are old whether or not they've actually
-been read.
-
-Each message in \file{cur} has an "info" section added to its file name to
-store information about its state. (Some mail readers may also add an "info"
-section to messages in \file{new}.) The "info" section may take one of two
-forms: it may contain "2," followed by a list of standardized flags (e.g.,
-"2,FR") or it may contain "1," followed by so-called experimental information.
-Standard flags for Maildir messages are as follows:
+Typically, a mail user agent application moves all of the messages in the
+\file{new} subdirectory to the \file{cur} subdirectory after the first time the
+user opens and closes the mailbox, recording that the messages are old whether
+or not they've actually been read. Each message in \file{cur} has an "info"
+section added to its file name to store information about its state. (Some mail
+readers may also add an "info" section to messages in \file{new}.) The "info"
+section may take one of two forms: it may contain "2," followed by a list of
+standardized flags (e.g., "2,FR") or it may contain "1," followed by so-called
+experimental information. Standard flags for Maildir messages are as follows:
 
 \begin{tableiii}{l|l|l}{textrm}{Flag}{Meaning}{Explanation}
 \lineiii{D}{Draft}{Under composition}
-\lineiii{F}{Flagged}{Marked by the user as important}
-\lineiii{P}{Passed}{Forwarded, resent, or bounced by the user}
-\lineiii{R}{Replied}{Responded to}
-\lineiii{S}{Seen}{Read by the user}
+\lineiii{F}{Flagged}{Marked as important}
+\lineiii{P}{Passed}{Forwarded, resent, or bounced}
+\lineiii{R}{Replied}{Replied to}
+\lineiii{S}{Seen}{Read}
 \lineiii{T}{Trashed}{Marked for subsequent deletion}
 \end{tableiii}
 
@@ -684,7 +684,7 @@
 subdirectory) or "cur" (if the message should be stored in the \file{cur}
 subdirectory). \note{A message is typically moved from \file{new} to \file{cur}
 after its mailbox has been accessed, whether or not the message is has been
-read. A message has been read if \code{"S" not in get_flags()}.}
+read. A message has been read if \code{"S" not in get_flags()} is \code{True}.}
 \end{methoddesc}
 
 \begin{methoddesc}{set_subdir}{subdir}
@@ -796,11 +796,11 @@
 Conventional flags for mbox messages are as follows:
 
 \begin{tableiii}{l|l|l}{textrm}{Flag}{Meaning}{Explanation}
-\lineiii{R}{Read}{Read by the user}
-\lineiii{O}{Old}{Previously detected by mail reader}
+\lineiii{R}{Read}{Read}
+\lineiii{O}{Old}{Previously detected by MUA}
 \lineiii{D}{Deleted}{Marked for subsequent deletion}
-\lineiii{F}{Flagged}{Marked by the user as important}
-\lineiii{A}{Answered}{Responded to}
+\lineiii{F}{Flagged}{Marked as important}
+\lineiii{A}{Answered}{Replied to}
 \end{tableiii}
 
 The "R" and "O" flags are stored in the \mailheader{Status} header, and the
@@ -819,8 +819,8 @@
 Set the "From~" line to \var{from_}, which should be specified without a
 leading "From~" or trailing newline. For convenience, \var{time_} may be
 specified and will be formatted appropriately and appended to \var{from_}. If
-\var{time_} is specified, it should be a \class{struct_time}, a tuple suitable
-for passing to \method{time.strftime()}, or \code{True} (to use
+\var{time_} is specified, it should be a \class{struct_time} instance, a tuple
+suitable for passing to \method{time.strftime()}, or \code{True} (to use
 \method{time.gmtime()}).
 \end{methoddesc}
 
@@ -907,17 +907,14 @@
 \end{classdesc}
 
 MH messages do not support marks or flags in the traditional sense, but they do
-support sequences, which are logical groupings of arbitrary messages. Because
-sequences are often used to indicate the state of a messsage, they are
-maintained by the \class{MHMessage} class even though they are not, strictly
-speaking, a property of the message itself.
-
-Some mail user agents make use of sequences to record message state as follows:
+support sequences, which are logical groupings of arbitrary messages. Some mail
+reading programs (although not the standard \program{mh} and \program{nmh}) use
+sequences in much the same way flags are used with other formats, as follows:
 
 \begin{tableii}{l|l}{textrm}{Sequence}{Explanation}
-\lineii{unseen}{Previously detected by mail reader but not read}
-\lineii{replied}{Responded to}
-\lineii{flagged}{Marked by the user as important}
+\lineii{unseen}{Not read, but previously detected by MUA}
+\lineii{replied}{Replied to}
+\lineii{flagged}{Marked as important}
 \end{tableii}
 
 \class{MHMessage} instances offer the following methods:
@@ -977,27 +974,23 @@
 meaning as with the \class{Message} constructor.
 \end{classdesc}
 
-Information about Babyl messages is recorded using \dfn{labels}, or short
-strings which are stored in an MH mailbox just before each message. Some
-labels are assigned special meaning and are called \dfn{attributes}. Other
-labels are user-defined. The attributes are as follows:
+Certain message labels, called \dfn{attributes}, are defined by convention to
+have special meanings. The attributes are as follows:
 
 \begin{tableii}{l|l}{textrm}{Label}{Explanation}
-\lineii{unseen}{Previously detected by mail reader but not read}
+\lineii{unseen}{Not read, but previously detected by MUA}
 \lineii{deleted}{Marked for subsequent deletion}
 \lineii{filed}{Copied to another file or mailbox}
-\lineii{answered}{Responded to}
-\lineii{forwarded}{Forwarded by the the user}
-\lineii{edited}{Message content modified by the user}
-\lineii{resent}{Resent by the user}
+\lineii{answered}{Replied to}
+\lineii{forwarded}{Forwarded}
+\lineii{edited}{Modified by the user}
+\lineii{resent}{Resent}
 \end{tableii}
 
-Each message in a Babyl mailbox has two sets of headers, original headers and
-visible headers. Visible headers are typically a subset of the original
-headers reformatted to be more attractive. By default, \program{Rmail} displays
-only visible headers. \class{BabylMessage} uses the original headers because
-they are more complete, though the visible headers may be accessed explicitly
-if desired.
+By default, Rmail displays only
+visible headers. The \class{BabylMessage} class, though, uses the original
+headers because they are more complete. Visible headers may be accessed
+explicitly if desired.
 
 \class{BabylMessage} instances offer the following methods:
 
@@ -1088,11 +1081,11 @@
 are as follows:
 
 \begin{tableiii}{l|l|l}{textrm}{Flag}{Meaning}{Explanation}
-\lineiii{R}{Read}{Read by the user}
-\lineiii{O}{Old}{Previously detected by mail reader}
+\lineiii{R}{Read}{Read}
+\lineiii{O}{Old}{Previously detected by MUA}
 \lineiii{D}{Deleted}{Marked for subsequent deletion}
-\lineiii{F}{Flagged}{Marked by the user as important}
-\lineiii{A}{Answered}{Responded to}
+\lineiii{F}{Flagged}{Marked as important}
+\lineiii{A}{Answered}{Replied to}
 \end{tableiii}
 
 The "R" and "O" flags are stored in the \mailheader{Status} header, and the
@@ -1111,9 +1104,9 @@
 \begin{methoddesc}{set_from}{from_\optional{, time_=None}}
 Set the "From~" line to \var{from_}, which should be specified without a
 leading "From~" or trailing newline. For convenience, \var{time_} may be
-specified to format a time appropriately and append it to \var{from_}. If
-\var{time_} is specified, it should be a \class{struct_time}, a tuple suitable
-for passing to \method{time.strftime()}, or \code{True} (to use
+specified and will be formatted appropriately and appended to \var{from_}. If
+\var{time_} is specified, it should be a \class{struct_time} instance, a tuple
+suitable for passing to \method{time.strftime()}, or \code{True} (to use
 \method{time.gmtime()}).
 \end{methoddesc}
 

Index: mailbox.py
===================================================================
RCS file: /cvsroot/python/python/nondist/sandbox/mailbox/mailbox.py,v
retrieving revision 1.10
retrieving revision 1.11
diff -u -d -r1.10 -r1.11
--- mailbox.py	16 Aug 2005 23:38:11 -0000	1.10
+++ mailbox.py	17 Aug 2005 20:32:36 -0000	1.11
@@ -29,7 +29,7 @@
 
     def __init__(self, path, factory=None, create=True):
         """Initialize a Mailbox instance."""
-        self._path = os.path.abspath(path)
+        self._path = os.path.abspath(os.path.expanduser(path))
         self._factory = factory
 
     def add(self, message):
@@ -182,19 +182,30 @@
         """Flush and close the mailbox."""
         raise NotImplementedError('Method must be implemented by subclass')
 
-    def _dump_message(self, message, target):
+    def _dump_message(self, message, target, mangle_from_=False):
         """Dump message contents to target file."""
         if isinstance(message, email.Message.Message):
-            generator = email.Generator.Generator(target, False, 0)
+            generator = email.Generator.Generator(target, mangle_from_, 0)
             generator.flatten(message)
         elif isinstance(message, str):
+            if mangle_from_:
+                message = message.replace('\nFrom ', '\n>From ')
             target.write(message)
         elif hasattr(message, 'read'):
-            while True:
-                buffer = message.read(4096)     # Buffer size is arbitrary.
-                if buffer == "":
-                    break
-                target.write(buffer)
+            if mangle_from_:
+                while True:
+                    line = message.readline()
+                    if line == '':
+                        break
+                    if line[:5] == 'From ':
+                        line = '>From ' + s[5:]
+                    target.write(line)
+            else:
+                while True:
+                    buffer = message.read(4096)     # Buffer size is arbitrary.
+                    if buffer == '':
+                        break
+                    target.write(buffer)
         else:
             raise TypeError('Invalid message type: %s' % type(message))
 
@@ -594,6 +605,8 @@
 class _mboxMMDF(_singlefileMailbox):
     """An mbox or MMDF mailbox."""
 
+    _mangle_from_ = True
+
     def get_message(self, key):
         """Return a Message representation or raise a KeyError."""
         start, stop = self._lookup(key)
@@ -638,7 +651,7 @@
             from_line = 'From MAILER-DAEMON %s' % time.asctime(time.gmtime())
         start = self._file.tell()
         self._file.write('%s%s' % (from_line, os.linesep))
-        self._dump_message(message, self._file)
+        self._dump_message(message, self._file, self._mangle_from_)
         stop = self._file.tell()
         return (start, stop)
 
@@ -646,6 +659,8 @@
 class mbox(_mboxMMDF):
     """A classic mbox mailbox."""
 
+    _mangle_from_ = True
+
     def __init__(self, path, factory=None, create=True):
         """Initialize an mbox mailbox."""
         self._message_factory = mboxMessage
@@ -1755,6 +1770,7 @@
 
 def _lock_file(f, dotlock=True):
     """Lock file f using lockf, flock, and dot locking."""
+    dotlock_done = False
     try:
         if fcntl:
             try:
@@ -1786,13 +1802,13 @@
                 if hasattr(os, 'link'):
                     os.link(pre_lock.name, f.name + '.lock')
                     dotlock_done = True
-                    os.unlink(pre_lock)
+                    os.unlink(pre_lock.name)
                 else:
-                    os.rename(pre_lock, f.name + '.lock')
+                    os.rename(pre_lock.name, f.name + '.lock')
                     dotlock_done = True
             except OSError, e:
                 if e.errno == errno.EEXIST:
-                    os.remove(pre_lock)
+                    os.remove(pre_lock.name)
                     raise ExternalClashError('dot lock unavailable: %s' % 
                                              f.name)
                 else:
@@ -1810,7 +1826,7 @@
     if fcntl:
         fcntl.lockf(f, fcntl.LOCK_UN)
         fcntl.flock(f, fcntl.LOCK_UN)
-    if os.path.exists(path + '.lock'):
+    if os.path.exists(f.name + '.lock'):
         os.remove(f.name + '.lock')
 
 def _create_carefully(path):



More information about the Python-checkins mailing list