[Python-checkins] r54076 - sandbox/trunk/pep3101/unicodeformat.c
patrick.maupin
python-checkins at python.org
Fri Mar 2 06:13:40 CET 2007
Author: patrick.maupin
Date: Fri Mar 2 06:13:36 2007
New Revision: 54076
Modified:
sandbox/trunk/pep3101/unicodeformat.c
Log:
Reordered code to group into logical sections.
Added format string location to exception messages.
Modified: sandbox/trunk/pep3101/unicodeformat.c
==============================================================================
--- sandbox/trunk/pep3101/unicodeformat.c (original)
+++ sandbox/trunk/pep3101/unicodeformat.c Fri Mar 2 06:13:36 2007
@@ -11,9 +11,11 @@
stringformat.c, to support both unicode and traditional strings.
*/
-/*
- XXX -- todo: insert a fragment of the source string into error messages
-*/
+/************************************************************************/
+/*********** Macros to encapsulate build differences ****************/
+/************************************************************************/
+
+/* We can build for several Python versions, and for Unicode or strings */
#ifndef COMPILED_FROM_INSIDE_STRINGFORMAT
#include "Python.h"
@@ -55,6 +57,16 @@
#define SIZE_MULTIPLIER 2
#define MAX_SIZE_INCREMENT 3200
+#if PYTHON_API_VERSION < 1013
+#define PySet_Discard PyDict_DelItem
+#define PySet_New PyDict_Copy
+#define PySet_GET_SIZE PyDict_Size
+#endif
+
+/************************************************************************/
+/*********** Global data structures and forward declarations *********/
+/************************************************************************/
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -108,6 +120,8 @@
MarkupEscapeHandler do_markup;
/* current position and end of the 'self' string passed to FormatMethod */
SubString fmtstr;
+ /* Used for error reporting */
+ CH_TYPE *fmtstart;
/* Output string we are constructing, including current and end pointers*/
SubStringObj outstr;
/* Field Specifier, after the colon in {1:{2}}
@@ -143,15 +157,27 @@
static PyObject *
recurse_format(FmtState *fs);
+/************************************************************************/
+/*********** Error handling and exception generation **************/
+/************************************************************************/
+
/*
Most of our errors are value errors, because to Python, the
format string is a "value". Also, it's convenient to return
a NULL when we are erroring out.
*/
static void *
-SetError(const char *s)
+SetError(FmtState *fs, const char *s)
{
- PyErr_SetString(PyExc_ValueError, s);
+ if (fs->fmtstr.ptr == fs->fmtstr.end)
+ PyErr_Format(PyExc_ValueError, "%s at end of format_string", s);
+ else if ((fs->fmtstr.ptr >= fs->fmtstart) &&
+ (fs->fmtstr.ptr < fs->fmtstr.end))
+ PyErr_Format(PyExc_ValueError, "%s at format_string[%d]",
+ s, fs->fmtstr.ptr - fs->fmtstart);
+ else
+ PyErr_Format(PyExc_ValueError,
+ "%s (apparently in computed format specifier)", s);
return NULL;
}
@@ -163,55 +189,12 @@
check_fmtstr(FmtState *fs)
{
return (fs->fmtstr.ptr < fs->fmtstr.end) ||
- SetError("Invalid format string");
-}
-
-/*
- end_identifier returns true if a character marks
- the end of an identifier string.
-
- Although the PEP specifies that identifiers are
- numbers or valid Python identifiers, we just let
- getattr/getitem handle that, so the implementation
- is more flexible than the PEP would indicate.
-*/
-Py_LOCAL_INLINE(int)
-end_identifier(CH_TYPE c)
-{
- switch (c) {
- case '.': case '[': case ']': case '}': case ':':
- return 1;
- default:
- return 0;
- }
+ SetError(fs, "Unexpected end of format_string");
}
-
-/* returns true if this character is a specifier alignment token */
-Py_LOCAL_INLINE(int)
-alignment_token(CH_TYPE c)
-{
- switch (c) {
- case '<': case '>': case '=':
- return 1;
- default:
- return 0;
- }
-}
-
-/* returns true if this character is a sign element */
-Py_LOCAL_INLINE(int)
-sign_element(CH_TYPE c)
-{
- switch (c) {
- case ' ': case '+': case '-': case '(':
- return 1;
- default:
- return 0;
- }
-}
-
-
+/************************************************************************/
+/*********** Output string management functions ****************/
+/************************************************************************/
/* Fill in a SubStringObj from a Python string */
Py_LOCAL_INLINE(SubStringObj)
@@ -224,12 +207,6 @@
return s;
}
-#if PYTHON_API_VERSION < 1013
-#define PySet_Discard PyDict_DelItem
-#define PySet_New PyDict_Copy
-#define PySet_GET_SIZE PyDict_Size
-#endif
-
/*
output_allocate reserves space in our output string buffer
@@ -264,7 +241,6 @@
return 1;
}
-/* XXX -- similar function elsewhere ???? */
/*
output_data dumps characters into our output string
buffer.
@@ -284,6 +260,66 @@
return 1;
}
+/************************************************************************/
+/*********** Format string parsing -- integers and identifiers *********/
+/************************************************************************/
+
+/*
+ end_identifier returns true if a character marks
+ the end of an identifier string.
+
+ Although the PEP specifies that identifiers are
+ numbers or valid Python identifiers, we just let
+ getattr/getitem handle that, so the implementation
+ is more flexible than the PEP would indicate.
+*/
+Py_LOCAL_INLINE(int)
+end_identifier(CH_TYPE c)
+{
+ switch (c) {
+ case '.': case '[': case ']': case '}': case ':':
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+/*
+ get_integer_index consumes 0 or more decimal digit characters
+ from a format string, updates *result with the corresponding
+ positive integer, and returns the number of digits consumed.
+
+ if the isargument parameter is true, it will remove the
+ integer from the arguments bitset.
+*/
+static int
+get_integer_index(FmtState *fs, Py_ssize_t *result)
+{
+ Py_ssize_t accumulator, digitval, oldaccumulator;
+ int numdigits;
+ accumulator = numdigits = 0;
+ for (;;fs->fmtstr.ptr++, numdigits++) {
+ if (fs->fmtstr.ptr >= fs->fmtstr.end)
+ break;
+ digitval = CH_TYPE_TODECIMAL(*fs->fmtstr.ptr);
+ if (digitval < 0)
+ break;
+ /*
+ This trick was copied from old Unicode format code. It's cute,
+ but would really suck on an old machine with a slow divide
+ implementation. Fortunately, in the normal case we do not
+ expect too many digits.
+ */
+ oldaccumulator = accumulator;
+ accumulator *= 10;
+ if ((accumulator+10)/10 != oldaccumulator+1)
+ return (int)SetError(fs, "Too many digits");
+ accumulator += digitval;
+ }
+ *result = accumulator;
+ return numdigits;
+}
+
/*
get_python_identifier is a bit of a misnomer. It returns
a value for use with getattr or getindex. This value
@@ -303,17 +339,18 @@
lookups and computed attribute names
*/
if (--fs->max_recursion < 0)
- return SetError("Max string recursion exceeded");
+ return SetError(fs, "Maximum string recursion limit exceeded");
result = get_field_object(fs);
fs->max_recursion++;
if (result && (*fs->fmtstr.ptr++ != '}'))
- result = SetError("Expected closing }");
+ result = SetError(fs, "Expected closing }");
return result;
}
if (end_identifier(*fs->fmtstr.ptr))
- return SetError("Expected attribute or index");
+ return SetError(fs, "Expected attribute or index");
if ((*fs->fmtstr.ptr == '_') && !fs->allow_leading_under)
- return SetError("Index/attribute leading underscores disallowed");
+ return SetError(fs,
+ "Leading underscores not allowed in attribute/index strings");
for (startptr = fs->fmtstr.ptr;
!end_identifier(*fs->fmtstr.ptr);
@@ -345,6 +382,15 @@
return result;
}
+/************************************************************************/
+/******** Functions to get field objects and specification strings ******/
+/************************************************************************/
+
+/* get_field_and_spec is the main function in this section. It parses
+ the format string well enough to return a field object to render along
+ with a field specification string.
+*/
+
/*
If keywords are supplied as a sequence of dictionaries
(e.g. locals/globals) then name_mapper will do multiple
@@ -372,42 +418,6 @@
}
/*
- get_integer_index consumes 0 or more decimal digit characters
- from a format string, updates *result with the corresponding
- positive integer, and returns the number of digits consumed.
-
- if the isargument parameter is true, it will remove the
- integer from the arguments bitset.
-*/
-static int
-get_integer_index(FmtState *fs, Py_ssize_t *result)
-{
- Py_ssize_t accumulator, digitval, oldaccumulator;
- int numdigits;
- accumulator = numdigits = 0;
- for (;;fs->fmtstr.ptr++, numdigits++) {
- if (fs->fmtstr.ptr >= fs->fmtstr.end)
- break;
- digitval = CH_TYPE_TODECIMAL(*fs->fmtstr.ptr);
- if (digitval < 0)
- break;
- /*
- This trick was copied from old Unicode format code. It's cute,
- but would really suck on an old machine with a slow divide
- implementation. Fortunately, in the normal case we do not
- expect too many digits.
- */
- oldaccumulator = accumulator;
- accumulator *= 10;
- if ((accumulator+10)/10 != oldaccumulator+1)
- return (int)SetError("field width or index value too large");
- accumulator += digitval;
- }
- *result = accumulator;
- return numdigits;
-}
-
-/*
get_specifier retrieves the part of the format string
between the colon and trailing }.
*/
@@ -471,7 +481,7 @@
isnumeric = (CH_TYPE_ISDECIMAL(*fs->fmtstr.ptr));
myobj = isnumeric ? fs->args : fs->keywords;
if (myobj == NULL)
- return SetError("No keyword arguments passed");
+ return SetError(fs, "Keyword not specified");
Py_INCREF(myobj);
for (isindex=1, expectclose=0, isargument=1;;) {
@@ -513,7 +523,7 @@
myobj = newobj;
if (expectclose)
if ((!check_fmtstr(fs)) || (*fs->fmtstr.ptr++ != ']')) {
- SetError("Expected ]");
+ SetError(fs, "Expected ]");
break;
}
if (!check_fmtstr(fs))
@@ -525,13 +535,14 @@
isargument = 0;
isindex = expectclose = (c == '[');
if (!isindex && (c != '.')) {
- SetError("Expected ., [, :, or }");
+ SetError(fs, "Expected ., [, :, or }");
break;
}
}
Py_DECREF(myobj);
return NULL;
}
+
/*
get_field_and_spec calls subfunctions to retrieve the
field object and optional specification string.
@@ -557,37 +568,21 @@
return NULL;
}
+/************************************************************************/
+/***************** Field rendering functions **************************/
+/************************************************************************/
+
/*
- user_format is invoked to format an object with a defined __format__
- attribute.
+ render_field is the main function in this section. It takes the field
+ object and field specification string generated by get_field_and_spec,
+ and renders the field into the output string.
+
+ The two main subfunctions of render_field are caller_render (which
+ calls the object-supplied __format__ hook), and internal_render, which
+ renders objects which don't have format hoohs.
*/
-static int
-user_format(FmtState *fs, PyObject *__format__)
-{
- PyObject *myobj;
- int ok;
- myobj = fs->fieldspec.obj;
- if (myobj == NULL) {
- myobj = STROBJ_NEW(fs->fieldspec.ptr,
- fs->fieldspec.end - fs->fieldspec.ptr);
- if (myobj == NULL)
- return 0;
- fs->fieldspec.obj = myobj; /* Owned by our caller now */
- }
- /* XXX -- possible optimization to CallFunctionWithArgs */
- myobj = PyObject_CallFunction(__format__, "(O)", myobj);
- if (myobj == NULL)
- return 0;
- ok = STROBJ_CHECK(myobj);
- if (!ok)
- SetError("__format__ method did not return correct string type");
- else
- ok = output_data(fs, STROBJ_AS_PTR(myobj),
- STROBJ_GET_SIZE(myobj));
- Py_DECREF(myobj);
- return ok;
-}
+#if !DUMMY_FORMATTING
typedef struct {
CH_TYPE fill_char;
@@ -598,12 +593,36 @@
CH_TYPE type;
} DefaultFormat;
+/* returns true if this character is a specifier alignment token */
+Py_LOCAL_INLINE(int)
+alignment_token(CH_TYPE c)
+{
+ switch (c) {
+ case '<': case '>': case '=':
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+/* returns true if this character is a sign element */
+Py_LOCAL_INLINE(int)
+sign_element(CH_TYPE c)
+{
+ switch (c) {
+ case ' ': case '+': case '-': case '(':
+ return 1;
+ default:
+ return 0;
+ }
+}
+
/*
parse the default specification
*/
static int
-parse_default_format(FmtState *fs, DefaultFormat *format)
+parse_internal_render(FmtState *fs, DefaultFormat *format)
{
Py_ssize_t index = 0;
Py_ssize_t specified_width;
@@ -682,7 +701,7 @@
remaining = spec_len - index;
if (remaining > 1) {
/* invalid conversion spec */
- SetError("Invalid conversion specification");
+ SetError(fs, "Invalid conversion specification");
return 0;
}
@@ -862,11 +881,12 @@
return NULL;
}
}
+#endif
/*
- default_format -- "Then a miracle occurs"
+ internal_render -- "Then a miracle occurs"
*/
-static int default_format(FmtState *fs, PyObject *fieldobj)
+static int internal_render(FmtState *fs, PyObject *fieldobj)
{
#if DUMMY_FORMATTING == 1
PyObject *myobj;
@@ -900,7 +920,7 @@
CH_TYPE prefix;
CH_TYPE suffix;
- if (!parse_default_format(fs, &format)) {
+ if (!parse_internal_render(fs, &format)) {
return 0;
}
@@ -921,7 +941,7 @@
conversion = conversion_function(format.type);
if (conversion == NULL) {
- SetError("Invalid conversion character");
+ SetError(fs, "Invalid conversion character");
return 0;
}
@@ -974,18 +994,50 @@
}
/*
- renderfield determines if the field object has a defined __format__
+ caller_render is invoked to format an object with a defined __format__
+ attribute.
+*/
+static int
+caller_render(FmtState *fs, PyObject *__format__)
+{
+ PyObject *myobj;
+ int ok;
+
+ myobj = fs->fieldspec.obj;
+ if (myobj == NULL) {
+ myobj = STROBJ_NEW(fs->fieldspec.ptr,
+ fs->fieldspec.end - fs->fieldspec.ptr);
+ if (myobj == NULL)
+ return 0;
+ fs->fieldspec.obj = myobj; /* Owned by our caller now */
+ }
+ /* XXX -- possible optimization to CallFunctionWithArgs */
+ myobj = PyObject_CallFunction(__format__, "(O)", myobj);
+ if (myobj == NULL)
+ return 0;
+ ok = STROBJ_CHECK(myobj);
+ if (!ok)
+ SetError(fs, "__format__ method did not return correct string type");
+ else
+ ok = output_data(fs, STROBJ_AS_PTR(myobj),
+ STROBJ_GET_SIZE(myobj));
+ Py_DECREF(myobj);
+ return ok;
+}
+
+/*
+ render_field determines if the field object has a defined __format__
method, and dispatches to the appropriate subfunction.
*/
static int
-renderfield(FmtState *fs, PyObject *fieldobj)
+render_field(FmtState *fs, PyObject *fieldobj)
{
int result;
SubString savefmt;
PyObject *__format__ = PyObject_GetAttrString(fieldobj, "__format__");
if (__format__ != NULL) {
- result = user_format(fs, __format__);
+ result = caller_render(fs, __format__);
Py_DECREF(__format__);
}
else {
@@ -1000,12 +1052,16 @@
savefmt = fs->fmtstr;
fs->fmtstr.ptr = fs->fieldspec.ptr;
fs->fmtstr.end = fs->fieldspec.end;
- result = default_format(fs, fieldobj);
+ result = internal_render(fs, fieldobj);
fs->fmtstr = savefmt;
}
return result;
}
+/************************************************************************/
+/******* Output string allocation and escape-to-markup processing ******/
+/************************************************************************/
+
/*
do_markup is the main program loop. It rummages through
the format string, looking for escapes to markup, and
@@ -1031,28 +1087,29 @@
fmtstr.ptr++;
count--;
}
+ fmtstr.ptr++;
count = total - count;
total -= count;
- doubled = (total > 1) && (fmtstr.ptr[1] == c);
+ doubled = (total > 1) && (*fmtstr.ptr == c);
if (doubled) {
output_data(fs, start, count+1);
- fmtstr.ptr += 2;
+ fmtstr.ptr++;
continue;
} else if (count)
output_data(fs, start, count);
- if (total < 2) {
- ok = !total ||
- (int)SetError("Invalid format string -- { or } at end");
- break;
- }
+ fs->fmtstr.ptr = fmtstr.ptr;
if (c == '}') {
- SetError("Invalid format string -- single } encountered");
+ SetError(fs, "Single } encountered");
ok = 0;
break;
}
- fs->fmtstr.ptr = fmtstr.ptr + 1;
+ if (total < 2) {
+ ok = !total ||
+ (int)SetError(fs, "Single { encountered");
+ break;
+ }
myobj = get_field_and_spec(fs);
- ok = (myobj != NULL) && renderfield(fs, myobj);
+ ok = (myobj != NULL) && render_field(fs, myobj);
Py_XDECREF(fs->fieldspec.obj);
Py_XDECREF(myobj);
if (!ok)
@@ -1103,7 +1160,7 @@
SubStringObj saveoutstr = fs->outstr;
int saveincrement = fs->size_increment;
if (--(fs->max_recursion) < 0)
- return SetError("Max string recursion exceeded");
+ return SetError(fs, "Max string recursion exceeded");
result = do_format(fs);
fs->max_recursion++;
fs->outstr = saveoutstr;
@@ -1111,6 +1168,10 @@
return result;
}
+/************************************************************************/
+/*********** Main function, option processing, setup and teardown ******/
+/************************************************************************/
+
static int
get_options(PyObject *keywords, FmtState *fs)
{
@@ -1181,7 +1242,7 @@
else
fs->arg_param_offset = 0;
fs->args = args;
- fs->fmtstr.ptr = STROBJ_AS_PTR(self);
+ fs->fmtstr.ptr = fs->fmtstart = STROBJ_AS_PTR(self);
fs->fmtstr.end = fs->fmtstr.ptr + STROBJ_GET_SIZE(self);
return 1;
}
@@ -1196,7 +1257,7 @@
ok = (PySet_GET_SIZE(used) <= 1) && !fs->positional_arg_set;
if (!ok) {
Py_DECREF(result);
- result = SetError("Not all arguments consumed");
+ result = SetError(fs, "Not all arguments consumed");
}
}
Py_XDECREF(used);
More information about the Python-checkins
mailing list