[Python-checkins] r54076 - sandbox/trunk/pep3101/unicodeformat.c

patrick.maupin python-checkins at python.org
Fri Mar 2 06:13:40 CET 2007


Author: patrick.maupin
Date: Fri Mar  2 06:13:36 2007
New Revision: 54076

Modified:
   sandbox/trunk/pep3101/unicodeformat.c
Log:
Reordered code to group into logical sections.
Added format string location to exception messages.


Modified: sandbox/trunk/pep3101/unicodeformat.c
==============================================================================
--- sandbox/trunk/pep3101/unicodeformat.c	(original)
+++ sandbox/trunk/pep3101/unicodeformat.c	Fri Mar  2 06:13:36 2007
@@ -11,9 +11,11 @@
     stringformat.c, to support both unicode and traditional strings.
 */
 
-/*
-    XXX -- todo: insert a fragment of the source string into error messages
-*/
+/************************************************************************/
+/***********    Macros to encapsulate build differences  ****************/
+/************************************************************************/
+
+/* We can build for several Python versions, and for Unicode or strings */
 
 #ifndef COMPILED_FROM_INSIDE_STRINGFORMAT
 #include "Python.h"
@@ -55,6 +57,16 @@
 #define SIZE_MULTIPLIER 2
 #define MAX_SIZE_INCREMENT  3200
 
+#if PYTHON_API_VERSION < 1013
+#define PySet_Discard    PyDict_DelItem
+#define PySet_New        PyDict_Copy
+#define PySet_GET_SIZE   PyDict_Size
+#endif
+
+/************************************************************************/
+/***********   Global data structures and forward declarations  *********/
+/************************************************************************/
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -108,6 +120,8 @@
     MarkupEscapeHandler do_markup;
     /* current position and end of the 'self' string passed to FormatMethod */
     SubString fmtstr;
+    /* Used for error reporting */
+    CH_TYPE *fmtstart;
     /* Output string we are constructing, including current and end pointers*/
     SubStringObj outstr;
     /* Field Specifier, after the colon in {1:{2}}
@@ -143,15 +157,27 @@
 static PyObject *
 recurse_format(FmtState *fs);
 
+/************************************************************************/
+/***********      Error handling and exception generation  **************/
+/************************************************************************/
+
 /*
     Most of our errors are value errors, because to Python, the
     format string is a "value".  Also, it's convenient to return
     a NULL when we are erroring out.
 */
 static void *
-SetError(const char *s)
+SetError(FmtState *fs, const char *s)
 {
-    PyErr_SetString(PyExc_ValueError, s);
+    if (fs->fmtstr.ptr == fs->fmtstr.end)
+        PyErr_Format(PyExc_ValueError, "%s at end of format_string", s);
+    else if ((fs->fmtstr.ptr >= fs->fmtstart) &&
+            (fs->fmtstr.ptr < fs->fmtstr.end))
+        PyErr_Format(PyExc_ValueError, "%s at format_string[%d]",
+            s, fs->fmtstr.ptr - fs->fmtstart);
+    else
+        PyErr_Format(PyExc_ValueError,
+            "%s (apparently in computed format specifier)", s);
     return NULL;
 }
 
@@ -163,55 +189,12 @@
 check_fmtstr(FmtState *fs)
 {
     return (fs->fmtstr.ptr < fs->fmtstr.end) ||
-           SetError("Invalid format string");
-}
-
-/*
-    end_identifier returns true if a character marks
-    the end of an identifier string.
-
-    Although the PEP specifies that identifiers are
-    numbers or valid Python identifiers, we just let
-    getattr/getitem handle that, so the implementation
-    is more flexible than the PEP would indicate.
-*/
-Py_LOCAL_INLINE(int)
-end_identifier(CH_TYPE c)
-{
-    switch (c) {
-        case '.': case '[': case ']': case '}': case ':':
-            return 1;
-        default:
-            return 0;
-    }
+           SetError(fs, "Unexpected end of format_string");
 }
 
-
-/* returns true if this character is a specifier alignment token */
-Py_LOCAL_INLINE(int)
-alignment_token(CH_TYPE c)
-{
-    switch (c) {
-    case '<': case '>': case '=':
-        return 1;
-    default:
-        return 0;
-    }
-}
-
-/* returns true if this character is a sign element */
-Py_LOCAL_INLINE(int)
-sign_element(CH_TYPE c)
-{
-    switch (c) {
-    case ' ': case '+': case '-': case '(':
-        return 1;
-    default:
-        return 0;
-    }
-}
-
-
+/************************************************************************/
+/***********    Output string management functions       ****************/
+/************************************************************************/
 
 /* Fill in a SubStringObj from a Python string */
 Py_LOCAL_INLINE(SubStringObj)
@@ -224,12 +207,6 @@
     return s;
 }
 
-#if PYTHON_API_VERSION < 1013
-#define PySet_Discard    PyDict_DelItem
-#define PySet_New        PyDict_Copy
-#define PySet_GET_SIZE   PyDict_Size
-#endif
-
 /*
     output_allocate reserves space in our output string buffer
 
@@ -264,7 +241,6 @@
     return 1;
 }
 
-/* XXX -- similar function elsewhere ???? */
 /*
     output_data dumps characters into our output string
     buffer.
@@ -284,6 +260,66 @@
     return 1;
 }
 
+/************************************************************************/
+/***********  Format string parsing -- integers and identifiers *********/
+/************************************************************************/
+
+/*
+    end_identifier returns true if a character marks
+    the end of an identifier string.
+
+    Although the PEP specifies that identifiers are
+    numbers or valid Python identifiers, we just let
+    getattr/getitem handle that, so the implementation
+    is more flexible than the PEP would indicate.
+*/
+Py_LOCAL_INLINE(int)
+end_identifier(CH_TYPE c)
+{
+    switch (c) {
+        case '.': case '[': case ']': case '}': case ':':
+            return 1;
+        default:
+            return 0;
+    }
+}
+
+/*
+    get_integer_index consumes 0 or more decimal digit characters
+    from a format string, updates *result with the corresponding
+    positive integer, and returns the number of digits consumed.
+
+    if the isargument parameter is true, it will remove the
+    integer from the arguments bitset.
+*/
+static int
+get_integer_index(FmtState *fs, Py_ssize_t *result)
+{
+    Py_ssize_t accumulator, digitval, oldaccumulator;
+    int numdigits;
+    accumulator = numdigits = 0;
+    for (;;fs->fmtstr.ptr++, numdigits++) {
+        if (fs->fmtstr.ptr >= fs->fmtstr.end)
+            break;
+        digitval = CH_TYPE_TODECIMAL(*fs->fmtstr.ptr);
+        if (digitval < 0)
+            break;
+        /*
+           This trick was copied from old Unicode format code.  It's cute,
+           but would really suck on an old machine with a slow divide
+           implementation.  Fortunately, in the normal case we do not
+           expect too many digits.
+        */
+        oldaccumulator = accumulator;
+        accumulator *= 10;
+        if ((accumulator+10)/10 != oldaccumulator+1)
+            return (int)SetError(fs, "Too many digits");
+        accumulator += digitval;
+    }
+    *result = accumulator;
+    return numdigits;
+}
+
 /*
     get_python_identifier is a bit of a misnomer.  It returns
     a value for use with getattr or getindex.  This value
@@ -303,17 +339,18 @@
            lookups and computed attribute names
         */
         if (--fs->max_recursion < 0)
-            return SetError("Max string recursion exceeded");
+            return SetError(fs, "Maximum string recursion limit exceeded");
         result = get_field_object(fs);
         fs->max_recursion++;
         if (result && (*fs->fmtstr.ptr++ != '}'))
-            result = SetError("Expected closing }");
+            result = SetError(fs, "Expected closing }");
         return result;
     }
     if (end_identifier(*fs->fmtstr.ptr))
-        return SetError("Expected attribute or index");
+        return SetError(fs, "Expected attribute or index");
     if ((*fs->fmtstr.ptr == '_') && !fs->allow_leading_under)
-        return SetError("Index/attribute leading underscores disallowed");
+        return SetError(fs,
+             "Leading underscores not allowed in attribute/index strings");
 
     for (startptr = fs->fmtstr.ptr;
          !end_identifier(*fs->fmtstr.ptr);
@@ -345,6 +382,15 @@
     return result;
 }
 
+/************************************************************************/
+/******** Functions to get field objects and specification strings ******/
+/************************************************************************/
+
+/* get_field_and_spec is the main function in this section.  It parses
+   the format string well enough to return a field object to render along
+   with a field specification string.
+*/
+
 /*
     If keywords are supplied as a sequence of dictionaries
     (e.g. locals/globals) then name_mapper will do multiple
@@ -372,42 +418,6 @@
 }
 
 /*
-    get_integer_index consumes 0 or more decimal digit characters
-    from a format string, updates *result with the corresponding
-    positive integer, and returns the number of digits consumed.
-
-    if the isargument parameter is true, it will remove the
-    integer from the arguments bitset.
-*/
-static int
-get_integer_index(FmtState *fs, Py_ssize_t *result)
-{
-    Py_ssize_t accumulator, digitval, oldaccumulator;
-    int numdigits;
-    accumulator = numdigits = 0;
-    for (;;fs->fmtstr.ptr++, numdigits++) {
-        if (fs->fmtstr.ptr >= fs->fmtstr.end)
-            break;
-        digitval = CH_TYPE_TODECIMAL(*fs->fmtstr.ptr);
-        if (digitval < 0)
-            break;
-        /*
-           This trick was copied from old Unicode format code.  It's cute,
-           but would really suck on an old machine with a slow divide
-           implementation.  Fortunately, in the normal case we do not
-           expect too many digits.
-        */
-        oldaccumulator = accumulator;
-        accumulator *= 10;
-        if ((accumulator+10)/10 != oldaccumulator+1)
-            return (int)SetError("field width or index value too large");
-        accumulator += digitval;
-    }
-    *result = accumulator;
-    return numdigits;
-}
-
-/*
     get_specifier retrieves the part of the format string
     between the colon and trailing }.
 */
@@ -471,7 +481,7 @@
     isnumeric = (CH_TYPE_ISDECIMAL(*fs->fmtstr.ptr));
     myobj = isnumeric ? fs->args : fs->keywords;
     if (myobj == NULL)
-        return SetError("No keyword arguments passed");
+        return SetError(fs, "Keyword not specified");
     Py_INCREF(myobj);
 
     for (isindex=1, expectclose=0, isargument=1;;) {
@@ -513,7 +523,7 @@
         myobj = newobj;
         if (expectclose)
             if  ((!check_fmtstr(fs)) || (*fs->fmtstr.ptr++ != ']')) {
-                SetError("Expected ]");
+                SetError(fs, "Expected ]");
                 break;
             }
         if (!check_fmtstr(fs))
@@ -525,13 +535,14 @@
         isargument = 0;
         isindex = expectclose = (c == '[');
         if (!isindex && (c != '.')) {
-           SetError("Expected ., [, :, or }");
+           SetError(fs, "Expected ., [, :, or }");
            break;
         }
     }
     Py_DECREF(myobj);
     return NULL;
 }
+
 /*
     get_field_and_spec calls subfunctions to retrieve the
     field object and optional specification string.
@@ -557,37 +568,21 @@
     return NULL;
 }
 
+/************************************************************************/
+/*****************  Field rendering functions  **************************/
+/************************************************************************/
+
 /*
-    user_format is invoked to format an object with a defined __format__
-    attribute.
+    render_field is the main function in this section.  It takes the field
+    object and field specification string generated by get_field_and_spec,
+    and renders the field into the output string.
+
+    The two main subfunctions of render_field are caller_render (which
+    calls the object-supplied __format__ hook), and internal_render, which
+    renders objects which don't have format hoohs.
 */
-static int
-user_format(FmtState *fs, PyObject *__format__)
-{
-    PyObject *myobj;
-    int ok;
 
-    myobj = fs->fieldspec.obj;
-    if (myobj == NULL) {
-        myobj = STROBJ_NEW(fs->fieldspec.ptr,
-                    fs->fieldspec.end - fs->fieldspec.ptr);
-        if (myobj == NULL)
-            return 0;
-        fs->fieldspec.obj = myobj;   /* Owned by our caller now */
-    }
-    /* XXX -- possible optimization to CallFunctionWithArgs */
-    myobj = PyObject_CallFunction(__format__, "(O)", myobj);
-    if (myobj == NULL)
-        return 0;
-    ok = STROBJ_CHECK(myobj);
-    if (!ok)
-        SetError("__format__ method did not return correct string type");
-    else
-        ok = output_data(fs, STROBJ_AS_PTR(myobj),
-                            STROBJ_GET_SIZE(myobj));
-    Py_DECREF(myobj);
-    return ok;
-}
+#if !DUMMY_FORMATTING
 
 typedef struct {
     CH_TYPE fill_char;
@@ -598,12 +593,36 @@
     CH_TYPE type;
 } DefaultFormat;
 
+/* returns true if this character is a specifier alignment token */
+Py_LOCAL_INLINE(int)
+alignment_token(CH_TYPE c)
+{
+    switch (c) {
+    case '<': case '>': case '=':
+        return 1;
+    default:
+        return 0;
+    }
+}
+
+/* returns true if this character is a sign element */
+Py_LOCAL_INLINE(int)
+sign_element(CH_TYPE c)
+{
+    switch (c) {
+    case ' ': case '+': case '-': case '(':
+        return 1;
+    default:
+        return 0;
+    }
+}
+
 /*
     parse the default specification
 */
 
 static int
-parse_default_format(FmtState *fs, DefaultFormat *format)
+parse_internal_render(FmtState *fs, DefaultFormat *format)
 {
     Py_ssize_t index = 0;
     Py_ssize_t specified_width;
@@ -682,7 +701,7 @@
     remaining = spec_len - index;
     if (remaining > 1) {
         /* invalid conversion spec */
-        SetError("Invalid conversion specification");
+        SetError(fs, "Invalid conversion specification");
         return 0;
     }
 
@@ -862,11 +881,12 @@
         return NULL;
     }
 }
+#endif
 
 /*
-    default_format -- "Then a miracle occurs"
+    internal_render -- "Then a miracle occurs"
 */
-static int default_format(FmtState *fs, PyObject *fieldobj)
+static int internal_render(FmtState *fs, PyObject *fieldobj)
 {
 #if DUMMY_FORMATTING == 1
     PyObject *myobj;
@@ -900,7 +920,7 @@
     CH_TYPE prefix;
     CH_TYPE suffix;
 
-    if (!parse_default_format(fs, &format)) {
+    if (!parse_internal_render(fs, &format)) {
         return 0;
     }
 
@@ -921,7 +941,7 @@
 
     conversion = conversion_function(format.type);
     if (conversion == NULL) {
-        SetError("Invalid conversion character");
+        SetError(fs, "Invalid conversion character");
         return 0;
     }
 
@@ -974,18 +994,50 @@
 }
 
 /*
-    renderfield determines if the field object has a defined __format__
+    caller_render is invoked to format an object with a defined __format__
+    attribute.
+*/
+static int
+caller_render(FmtState *fs, PyObject *__format__)
+{
+    PyObject *myobj;
+    int ok;
+
+    myobj = fs->fieldspec.obj;
+    if (myobj == NULL) {
+        myobj = STROBJ_NEW(fs->fieldspec.ptr,
+                    fs->fieldspec.end - fs->fieldspec.ptr);
+        if (myobj == NULL)
+            return 0;
+        fs->fieldspec.obj = myobj;   /* Owned by our caller now */
+    }
+    /* XXX -- possible optimization to CallFunctionWithArgs */
+    myobj = PyObject_CallFunction(__format__, "(O)", myobj);
+    if (myobj == NULL)
+        return 0;
+    ok = STROBJ_CHECK(myobj);
+    if (!ok)
+        SetError(fs, "__format__ method did not return correct string type");
+    else
+        ok = output_data(fs, STROBJ_AS_PTR(myobj),
+                            STROBJ_GET_SIZE(myobj));
+    Py_DECREF(myobj);
+    return ok;
+}
+
+/*
+    render_field determines if the field object has a defined __format__
     method, and dispatches to the appropriate subfunction.
 */
 static int
-renderfield(FmtState *fs, PyObject *fieldobj)
+render_field(FmtState *fs, PyObject *fieldobj)
 {
     int result;
     SubString savefmt;
 
     PyObject *__format__ = PyObject_GetAttrString(fieldobj, "__format__");
     if (__format__ != NULL) {
-        result = user_format(fs, __format__);
+        result = caller_render(fs, __format__);
         Py_DECREF(__format__);
     }
     else {
@@ -1000,12 +1052,16 @@
         savefmt = fs->fmtstr;
         fs->fmtstr.ptr = fs->fieldspec.ptr;
         fs->fmtstr.end = fs->fieldspec.end;
-        result = default_format(fs, fieldobj);
+        result = internal_render(fs, fieldobj);
         fs->fmtstr = savefmt;
     }
     return result;
 }
 
+/************************************************************************/
+/******* Output string allocation and escape-to-markup processing  ******/
+/************************************************************************/
+
 /*
     do_markup is the main program loop.  It rummages through
     the format string, looking for escapes to markup, and
@@ -1031,28 +1087,29 @@
             fmtstr.ptr++;
             count--;
         }
+        fmtstr.ptr++;
         count = total - count;
         total -= count;
-        doubled = (total > 1) && (fmtstr.ptr[1] == c);
+        doubled = (total > 1) && (*fmtstr.ptr == c);
         if (doubled) {
             output_data(fs, start, count+1);
-            fmtstr.ptr += 2;
+            fmtstr.ptr++;
             continue;
         } else if (count)
             output_data(fs, start, count);
-        if (total < 2) {
-            ok = !total ||
-                   (int)SetError("Invalid format string -- { or } at end");
-            break;
-        }
+        fs->fmtstr.ptr = fmtstr.ptr;
         if (c == '}') {
-            SetError("Invalid format string -- single } encountered");
+            SetError(fs, "Single } encountered");
             ok = 0;
             break;
         }
-        fs->fmtstr.ptr = fmtstr.ptr + 1;
+        if (total < 2) {
+            ok = !total ||
+                   (int)SetError(fs, "Single { encountered");
+            break;
+        }
         myobj = get_field_and_spec(fs);
-        ok = (myobj != NULL) && renderfield(fs, myobj);
+        ok = (myobj != NULL) && render_field(fs, myobj);
         Py_XDECREF(fs->fieldspec.obj);
         Py_XDECREF(myobj);
         if (!ok)
@@ -1103,7 +1160,7 @@
     SubStringObj saveoutstr = fs->outstr;
     int saveincrement = fs->size_increment;
     if (--(fs->max_recursion) < 0)
-        return SetError("Max string recursion exceeded");
+        return SetError(fs, "Max string recursion exceeded");
     result = do_format(fs);
     fs->max_recursion++;
     fs->outstr = saveoutstr;
@@ -1111,6 +1168,10 @@
     return result;
 }
 
+/************************************************************************/
+/*********** Main function, option processing, setup and teardown  ******/
+/************************************************************************/
+
 static int
 get_options(PyObject *keywords, FmtState *fs)
 {
@@ -1181,7 +1242,7 @@
     else
         fs->arg_param_offset = 0;
     fs->args = args;
-    fs->fmtstr.ptr = STROBJ_AS_PTR(self);
+    fs->fmtstr.ptr = fs->fmtstart = STROBJ_AS_PTR(self);
     fs->fmtstr.end = fs->fmtstr.ptr + STROBJ_GET_SIZE(self);
     return 1;
 }
@@ -1196,7 +1257,7 @@
         ok = (PySet_GET_SIZE(used) <= 1)  && !fs->positional_arg_set;
         if (!ok) {
             Py_DECREF(result);
-            result = SetError("Not all arguments consumed");
+            result = SetError(fs, "Not all arguments consumed");
         }
     }
     Py_XDECREF(used);


More information about the Python-checkins mailing list