[Jython-checkins] jython: Comment and formatting change only to stringlib/MarkupIterator.

Sun Jun 29 17:13:11 CEST 2014

http://hg.python.org/jython/rev/d45151484317
changeset:   7340:d45151484317
parent:      7315:44191dd20f5a
user:        Jeff Allen <ja.py at farowl.co.uk>
date:        Fri Jun 27 16:40:58 2014 +0100
summary:
  Comment and formatting change only to stringlib/MarkupIterator.

files:
  src/org/python/core/stringlib/MarkupIterator.java |  127 ++++++++-
  1 files changed, 113 insertions(+), 14 deletions(-)

diff --git a/src/org/python/core/stringlib/MarkupIterator.java b/src/org/python/core/stringlib/MarkupIterator.java
--- a/src/org/python/core/stringlib/MarkupIterator.java
+++ b/src/org/python/core/stringlib/MarkupIterator.java
@@ -9,21 +9,28 @@
 import org.python.expose.ExposedType;
 
 /**
- * Provides an implementation of str._formatter_parser()
+ * Provides an implementation of the object that <code>str._formatter_parser()</code> returns, which
+ * is an iterator returning successive 4-tuples, the sequence being equivalent to the original
+ * string.
  */
 @ExposedType(name = "formatteriterator", base = PyObject.class, isBaseType = false)
 public class MarkupIterator extends PyObject {
 
     public static final PyType TYPE = PyType.fromClass(MarkupIterator.class);
 
+    /** The string from which elements are being returned. */
     private final String markup;
+    /** How far along that string we are. */
     private int index;
+    /** A counter used to auto-number fields when not explicitly numbered in the format. */
     private final FieldNumbering numbering;
 
+    /** Constructor used at top-level to enumerate a format. */
     public MarkupIterator(String markup) {
         this(markup, null);
     }
 
+    /** Variant constructor used when formats are nested. */
     public MarkupIterator(String markup, MarkupIterator enclosingIterator) {
         this.markup = markup;
         if (enclosingIterator != null) {
@@ -48,6 +55,21 @@
         return formatteriterator___iternext__();
     }
 
+    /**
+     * Return the next "chunk" of the format (or return null if ended). A chunk is a 4-tuple
+     * describing
+     * <ol start=0>
+     * <li>the text leading up to the next format field,</li>
+     * <li>the field name or number (as a string) for accessing the value,</li>
+     * <li>the format specifier such as <code>"#12x"</code>, and</li>
+     * <li>any conversion that should be applied (the <code>'s'</code> or <code>'r'</code> codes for
+     * <code>str()</code> and <code>repr()</code>)</li>
+     * </ol>
+     * Elements 1-3 are None if this chunk contains no format specifier. Elements 0-2 are
+     * zero-length strings if missing from the format, while element 3 will be None if missing.
+     * 
+     * @return <code>PyTuple</code> chunk or <code>null</code>
+     */
     @ExposedMethod
     final PyObject formatteriterator___iternext__() {
         Chunk chunk;
@@ -60,16 +82,23 @@
             return null;
         }
         PyObject[] elements = new PyObject[4];
+
+        // Literal text is used verbatim.
         elements[0] = new PyString(chunk.literalText);
-        elements[1] = chunk.fieldName.length() == 0
-                    ? Py.None : new PyString(chunk.fieldName);
+
+        // A field name is empty only if there was no format at all.
+        elements[1] = chunk.fieldName.length() == 0 ? Py.None : new PyString(chunk.fieldName);
         if (chunk.fieldName.length() > 0) {
-            elements[2] = chunk.formatSpec == null
-                    ? Py.EmptyString : new PyString(chunk.formatSpec);
+            elements[2] =
+                    chunk.formatSpec == null ? Py.EmptyString : new PyString(chunk.formatSpec);
         } else {
             elements[2] = Py.None;
         }
+
+        // There may have been a conversion specifier.
         elements[3] = chunk.conversion == null ? Py.None : new PyString(chunk.conversion);
+
+        // And those make up the next answer.
         return new PyTuple(elements);
     }
 
@@ -78,36 +107,50 @@
             return null;
         }
         Chunk result = new Chunk();
+
+        // pos = index is the index of the first text not already chunked
         int pos = index;
+
+        // Advance pos to the first '{' that is not a "{{" (escaped brace), or pos<0 if none such.
         while (true) {
             pos = indexOfFirst(markup, pos, '{', '}');
             if (pos >= 0 && pos < markup.length() - 1
-                && markup.charAt(pos + 1) == markup.charAt(pos)) {
+                    && markup.charAt(pos + 1) == markup.charAt(pos)) {
                 // skip escaped bracket
                 pos += 2;
             } else if (pos >= 0 && markup.charAt(pos) == '}') {
+                // Un-escaped '}' is a syntax error
                 throw new IllegalArgumentException("Single '}' encountered in format string");
             } else {
+                // pos is at an un-escaped '{'
                 break;
             }
         }
+
+        // markup[index:pos] is the literal part of this chunk.
         if (pos < 0) {
+            // ... except pos<0, and there is no further format specifier, only literal text.
             result.literalText = unescapeBraces(markup.substring(index));
             result.fieldName = "";
             index = markup.length();
-        }
-        else {
+
+        } else {
+            // Grab the literal text, dealing with escaped braces.
             result.literalText = unescapeBraces(markup.substring(index, pos));
+            // Scan through the contents of the format spec, between the braces. Skip one '{'.
             pos++;
             int fieldStart = pos;
             int count = 1;
             while (pos < markup.length()) {
                 if (markup.charAt(pos) == '{') {
+                    // This means the spec we are gathering itself contains nested specifiers.
                     count++;
                     result.formatSpecNeedsExpanding = true;
                 } else if (markup.charAt(pos) == '}') {
+                    // And here is a '}' matching one we already counted.
                     count--;
                     if (count == 0) {
+                        // ... matching the one we began with: parse the replacement field.
                         parseField(result, markup.substring(fieldStart, pos));
                         pos++;
                         break;
@@ -116,6 +159,7 @@
                 pos++;
             }
             if (count > 0) {
+                // Must be end of string without matching '}'.
                 throw new IllegalArgumentException("Single '{' encountered in format string");
             }
             index = pos;
@@ -127,44 +171,75 @@
         return substring.replace("{{", "{").replace("}}", "}");
     }
 
+    /**
+     * Parse a "replacement field" consisting of a name, conversion and format specification.
+     * According to the Python Standard Library documentation, a replacement field has the
+     * structure:
+     * 
+     * <pre>
+     * replacement_field ::=  "{" [field_name] ["!" conversion] [":" format_spec] "}"
+     * field_name        ::=  arg_name ("." attribute_name | "[" element_index "]")*
+     * arg_name          ::=  [identifier | integer]
+     * attribute_name    ::=  identifier
+     * element_index     ::=  integer | index_string
+     * </pre>
+     * 
+     * except at this point, we have already discarded the outer braces.
+     * 
+     * @param result destination chunk
+     * @param fieldMarkup specifying a replacement field, possibly with nesting
+     */
     private void parseField(Chunk result, String fieldMarkup) {
         int pos = indexOfFirst(fieldMarkup, 0, '!', ':');
         if (pos >= 0) {
+            // There's a '!' or a ':', so what precedes the first of them is a field name.
             result.fieldName = fieldMarkup.substring(0, pos);
             if (fieldMarkup.charAt(pos) == '!') {
+                // There's a conversion specifier
                 if (pos == fieldMarkup.length() - 1) {
-                    throw new IllegalArgumentException("end of format while " +
-                            "looking for conversion specifier");
+                    throw new IllegalArgumentException("end of format while "
+                            + "looking for conversion specifier");
                 }
                 result.conversion = fieldMarkup.substring(pos + 1, pos + 2);
                 pos += 2;
+                // And if that's not the end, there ought to be a ':' now.
                 if (pos < fieldMarkup.length()) {
                     if (fieldMarkup.charAt(pos) != ':') {
-                        throw new IllegalArgumentException("expected ':' " +
-                                "after conversion specifier");
+                        throw new IllegalArgumentException("expected ':' "
+                                + "after conversion specifier");
                     }
+                    // So the format specifier is from the ':' to the end.
                     result.formatSpec = fieldMarkup.substring(pos + 1);
                 }
             } else {
+                // No '!', so the format specifier is from the ':' to the end. Or empty.
                 result.formatSpec = fieldMarkup.substring(pos + 1);
             }
         } else {
+            // Neither a '!' nor a ':', the whole thing is a name.
             result.fieldName = fieldMarkup;
         }
+
         if (result.fieldName.isEmpty()) {
+            // The field was empty, so generate a number automatically.
             result.fieldName = numbering.nextAutomaticFieldNumber();
             return;
         }
+
+        // Automatic numbers must also work when there is an .attribute or [index]
         char c = result.fieldName.charAt(0);
         if (c == '.' || c == '[') {
             result.fieldName = numbering.nextAutomaticFieldNumber() + result.fieldName;
             return;
         }
+
+        // Finally, remember the argument number was specified (perhaps complain of mixed use)
         if (Character.isDigit(c)) {
             numbering.useManualFieldNumbering();
         }
     }
 
+    /** Find the first of two characters, or return -1. */
     private int indexOfFirst(String s, int start, char c1, char c2) {
         int i1 = s.indexOf(c1, start);
         int i2 = s.indexOf(c2, start);
@@ -177,32 +252,56 @@
         return Math.min(i1, i2);
     }
 
+    /**
+     * Class used locally to assign indexes to the automatically-numbered arguments (see String
+     * Formatting section of the Python Standard Library).
+     */
     static final class FieldNumbering {
+
         private boolean manualFieldNumberSpecified;
         private int automaticFieldNumber = 0;
 
+        /**
+         * Generate a numeric argument index automatically, or raise an error if already started
+         * numbering manually.
+         * 
+         * @return index as string
+         */
         String nextAutomaticFieldNumber() {
             if (manualFieldNumberSpecified) {
-                throw new IllegalArgumentException("cannot switch from manual field specification to automatic field numbering");
+                throw new IllegalArgumentException(
+                        "cannot switch from manual field specification to automatic field numbering");
             }
             return Integer.toString(automaticFieldNumber++);
         }
+
+        /**
+         * Remember we are numbering manually, and raise an error if already started numbering
+         * automatically.
+         */
         void useManualFieldNumbering() {
             if (manualFieldNumberSpecified) {
                 return;
             }
             if (automaticFieldNumber != 0) {
-                throw new IllegalArgumentException("cannot switch from automatic field numbering to manual field specification");
+                throw new IllegalArgumentException(
+                        "cannot switch from automatic field numbering to manual field specification");
             }
             manualFieldNumberSpecified = true;
         }
     }
 
     public static final class Chunk {
+
+        /** The text leading up to the next format field. */
         public String literalText;
+        /** The field name or number (as a string) for accessing the value. */
         public String fieldName;
+        /** The format specifier such as <code>"#12x"</code>. */
         public String formatSpec;
+        /** Conversion to be applied, e.g. <code>'r'</code> for <code>repr()</code>. */
         public String conversion;
+        /** Signals the <code>formatSpec</code> needs expanding recursively. */
         public boolean formatSpecNeedsExpanding;
     }
 }

-- 
Repository URL: http://hg.python.org/jython