[Python-3000-checkins] r57556 - in python/branches/py3k: Include/unicodeobject.h Lib/string.py Objects/unicodeobject.c Python/sysmodule.c

eric.smith python-3000-checkins at python.org
Mon Aug 27 13:28:18 CEST 2007


Author: eric.smith
Date: Mon Aug 27 13:28:18 2007
New Revision: 57556

Modified:
   python/branches/py3k/Include/unicodeobject.h
   python/branches/py3k/Lib/string.py
   python/branches/py3k/Objects/unicodeobject.c
   python/branches/py3k/Python/sysmodule.c
Log:
PEP 3101: Removed _formatter_xxx routines from sysmodule, and made them unicode methods instead (per GvR suggestion).

Modified: python/branches/py3k/Include/unicodeobject.h
==============================================================================
--- python/branches/py3k/Include/unicodeobject.h	(original)
+++ python/branches/py3k/Include/unicodeobject.h	Mon Aug 27 13:28:18 2007
@@ -1437,9 +1437,6 @@
     const Py_UNICODE *s, Py_UNICODE c
     );
 
-PyObject *_PyUnicode_FormatterIterator(PyObject *str);
-PyObject *_PyUnicode_FormatterFieldNameSplit(PyObject *field_name);
-
 #ifdef __cplusplus
 }
 #endif

Modified: python/branches/py3k/Lib/string.py
==============================================================================
--- python/branches/py3k/Lib/string.py	(original)
+++ python/branches/py3k/Lib/string.py	Mon Aug 27 13:28:18 2007
@@ -200,10 +200,8 @@
 # exposed here via the sys module.  sys was chosen because it's always
 # available and doesn't have to be dynamically loaded.
 
-# The overall parser is implemented in sys._formatter_parser.
-# The field name parser is implemented in sys._formatter_field_name_split
-
-from sys import _formatter_parser, _formatter_field_name_split
+# The overall parser is implemented in str._formatter_parser.
+# The field name parser is implemented in str._formatter_field_name_split
 
 class Formatter:
     def format(self, format_string, *args, **kwargs):
@@ -213,13 +211,13 @@
         used_args = set()
         result = []
         for (is_markup, literal, field_name, format_spec, conversion) in \
-                _formatter_parser(format_string):
+                format_string._formatter_parser():
             if is_markup:
                 # given the field_name, find the object it references
 
                 # split it into the first part, and and iterator that
                 #  looks over the rest
-                first, rest = _formatter_field_name_split(field_name)
+                first, rest = field_name._formatter_field_name_split()
 
                 used_args.add(first)
                 obj = self.get_value(first, args, kwargs)

Modified: python/branches/py3k/Objects/unicodeobject.c
==============================================================================
--- python/branches/py3k/Objects/unicodeobject.c	(original)
+++ python/branches/py3k/Objects/unicodeobject.c	Mon Aug 27 13:28:18 2007
@@ -598,7 +598,7 @@
 			 */
 			if ((*f == 'l' || *f == 'z') &&
 					(f[1] == 'd' || f[1] == 'u'))
-				++f;
+			++f;
 
 			switch (*f) {
 			case 'c':
@@ -7981,1502 +7981,1509 @@
 }
 
 
-static PyObject *
-unicode_getnewargs(PyUnicodeObject *v)
-{
-	return Py_BuildValue("(u#)", v->str, v->length);
-}
-
-
-static PyMethodDef unicode_methods[] = {
+/********************* Formatter Iterator ************************/
 
-    /* Order is according to common usage: often used methods should
-       appear first, since lookup is done sequentially. */
+/* this is used to implement string.Formatter.vparse().  it exists so
+   Formatter can share code with the built in unicode.format()
+   method */
 
-    {"encode", (PyCFunction) unicode_encode, METH_VARARGS, encode__doc__},
-    {"replace", (PyCFunction) unicode_replace, METH_VARARGS, replace__doc__},
-    {"split", (PyCFunction) unicode_split, METH_VARARGS, split__doc__},
-    {"rsplit", (PyCFunction) unicode_rsplit, METH_VARARGS, rsplit__doc__},
-    {"join", (PyCFunction) unicode_join, METH_O, join__doc__},
-    {"capitalize", (PyCFunction) unicode_capitalize, METH_NOARGS, capitalize__doc__},
-    {"title", (PyCFunction) unicode_title, METH_NOARGS, title__doc__},
-    {"center", (PyCFunction) unicode_center, METH_VARARGS, center__doc__},
-    {"count", (PyCFunction) unicode_count, METH_VARARGS, count__doc__},
-    {"expandtabs", (PyCFunction) unicode_expandtabs, METH_VARARGS, expandtabs__doc__},
-    {"find", (PyCFunction) unicode_find, METH_VARARGS, find__doc__},
-    {"partition", (PyCFunction) unicode_partition, METH_O, partition__doc__},
-    {"index", (PyCFunction) unicode_index, METH_VARARGS, index__doc__},
-    {"ljust", (PyCFunction) unicode_ljust, METH_VARARGS, ljust__doc__},
-    {"lower", (PyCFunction) unicode_lower, METH_NOARGS, lower__doc__},
-    {"lstrip", (PyCFunction) unicode_lstrip, METH_VARARGS, lstrip__doc__},
-    {"decode", (PyCFunction) unicode_decode, METH_VARARGS, decode__doc__},
-/*  {"maketrans", (PyCFunction) unicode_maketrans, METH_VARARGS, maketrans__doc__}, */
-    {"rfind", (PyCFunction) unicode_rfind, METH_VARARGS, rfind__doc__},
-    {"rindex", (PyCFunction) unicode_rindex, METH_VARARGS, rindex__doc__},
-    {"rjust", (PyCFunction) unicode_rjust, METH_VARARGS, rjust__doc__},
-    {"rstrip", (PyCFunction) unicode_rstrip, METH_VARARGS, rstrip__doc__},
-    {"rpartition", (PyCFunction) unicode_rpartition, METH_O, rpartition__doc__},
-    {"splitlines", (PyCFunction) unicode_splitlines, METH_VARARGS, splitlines__doc__},
-    {"strip", (PyCFunction) unicode_strip, METH_VARARGS, strip__doc__},
-    {"swapcase", (PyCFunction) unicode_swapcase, METH_NOARGS, swapcase__doc__},
-    {"translate", (PyCFunction) unicode_translate, METH_O, translate__doc__},
-    {"upper", (PyCFunction) unicode_upper, METH_NOARGS, upper__doc__},
-    {"startswith", (PyCFunction) unicode_startswith, METH_VARARGS, startswith__doc__},
-    {"endswith", (PyCFunction) unicode_endswith, METH_VARARGS, endswith__doc__},
-    {"islower", (PyCFunction) unicode_islower, METH_NOARGS, islower__doc__},
-    {"isupper", (PyCFunction) unicode_isupper, METH_NOARGS, isupper__doc__},
-    {"istitle", (PyCFunction) unicode_istitle, METH_NOARGS, istitle__doc__},
-    {"isspace", (PyCFunction) unicode_isspace, METH_NOARGS, isspace__doc__},
-    {"isdecimal", (PyCFunction) unicode_isdecimal, METH_NOARGS, isdecimal__doc__},
-    {"isdigit", (PyCFunction) unicode_isdigit, METH_NOARGS, isdigit__doc__},
-    {"isnumeric", (PyCFunction) unicode_isnumeric, METH_NOARGS, isnumeric__doc__},
-    {"isalpha", (PyCFunction) unicode_isalpha, METH_NOARGS, isalpha__doc__},
-    {"isalnum", (PyCFunction) unicode_isalnum, METH_NOARGS, isalnum__doc__},
-    {"isidentifier", (PyCFunction) unicode_isidentifier, METH_NOARGS, isidentifier__doc__},
-    {"zfill", (PyCFunction) unicode_zfill, METH_VARARGS, zfill__doc__},
-    {"format", (PyCFunction) unicode_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
-    {"__format__", (PyCFunction) unicode__format__, METH_VARARGS, p_format__doc__},
-#if 0
-    {"capwords", (PyCFunction) unicode_capwords, METH_NOARGS, capwords__doc__},
-#endif
+typedef struct {
+	PyObject_HEAD
 
-#if 0
-    /* This one is just used for debugging the implementation. */
-    {"freelistsize", (PyCFunction) unicode_freelistsize, METH_NOARGS},
-#endif
+        PyUnicodeObject *str;
 
-    {"__getnewargs__",	(PyCFunction)unicode_getnewargs, METH_NOARGS},
-    {NULL, NULL}
-};
+        MarkupIterator it_markup;
+} formatteriterobject;
 
-static PyObject *
-unicode_mod(PyObject *v, PyObject *w)
+static void
+formatteriter_dealloc(formatteriterobject *it)
 {
-       if (!PyUnicode_Check(v)) {
-               Py_INCREF(Py_NotImplemented);
-               return Py_NotImplemented;
-       }
-       return PyUnicode_Format(v, w);
+        Py_XDECREF(it->str);
+	PyObject_FREE(it);
 }
 
-static PyNumberMethods unicode_as_number = {
-	0,				/*nb_add*/
-	0,				/*nb_subtract*/
-	0,				/*nb_multiply*/
-	unicode_mod,			/*nb_remainder*/
-};
-
-static PySequenceMethods unicode_as_sequence = {
-    (lenfunc) unicode_length, 		/* sq_length */
-    PyUnicode_Concat,		 	/* sq_concat */
-    (ssizeargfunc) unicode_repeat, 	/* sq_repeat */
-    (ssizeargfunc) unicode_getitem, 	/* sq_item */
-    (ssizessizeargfunc) unicode_slice, 	/* sq_slice */
-    0, 					/* sq_ass_item */
-    0, 					/* sq_ass_slice */
-    PyUnicode_Contains, 		/* sq_contains */
-};
-
-static PyObject*
-unicode_subscript(PyUnicodeObject* self, PyObject* item)
+/* returns a tuple:
+   (is_markup, literal, field_name, format_spec, conversion)
+   if is_markup == True:
+        literal is None
+        field_name is the string before the ':'
+        format_spec is the string after the ':'
+        conversion is either None, or the string after the '!'
+   if is_markup == False:
+        literal is the literal string
+        field_name is None
+        format_spec is None
+        conversion is None
+*/
+static PyObject *
+formatteriter_next(formatteriterobject *it)
 {
-    if (PyIndex_Check(item)) {
-        Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
-        if (i == -1 && PyErr_Occurred())
-            return NULL;
-        if (i < 0)
-            i += PyUnicode_GET_SIZE(self);
-        return unicode_getitem(self, i);
-    } else if (PySlice_Check(item)) {
-        Py_ssize_t start, stop, step, slicelength, cur, i;
-        Py_UNICODE* source_buf;
-        Py_UNICODE* result_buf;
-        PyObject* result;
-
-        if (PySlice_GetIndicesEx((PySliceObject*)item, PyUnicode_GET_SIZE(self),
-				 &start, &stop, &step, &slicelength) < 0) {
-            return NULL;
-        }
+        SubString literal;
+        SubString field_name;
+        SubString format_spec;
+        Py_UNICODE conversion;
+        int is_markup;
+        int format_spec_needs_expanding;
+        int result = MarkupIterator_next(&it->it_markup, &is_markup, &literal,
+                                         &field_name, &format_spec, &conversion,
+                                         &format_spec_needs_expanding);
 
-        if (slicelength <= 0) {
-            return PyUnicode_FromUnicode(NULL, 0);
+        /* all of the SubString objects point into it->str, so no
+           memory management needs to be done on them */
+        assert(0 <= result && result <= 2);
+        if (result == 0) {
+                /* error has already been set */
+                return NULL;
+        } else if (result == 1) {
+                /* end of iterator */
+                return NULL;
         } else {
-            source_buf = PyUnicode_AS_UNICODE((PyObject*)self);
-            result_buf = (Py_UNICODE *)PyMem_MALLOC(slicelength*
-                                                    sizeof(Py_UNICODE));
-	    
-	    if (result_buf == NULL)
-		    return PyErr_NoMemory();
+                PyObject *is_markup_bool = NULL;
+                PyObject *literal_str = NULL;
+                PyObject *field_name_str = NULL;
+                PyObject *format_spec_str = NULL;
+                PyObject *conversion_str = NULL;
+                PyObject *tuple = NULL;
 
-            for (cur = start, i = 0; i < slicelength; cur += step, i++) {
-                result_buf[i] = source_buf[cur];
-            }
+                is_markup_bool = PyBool_FromLong(is_markup);
+                if (!is_markup_bool)
+                    return NULL;
 
-            result = PyUnicode_FromUnicode(result_buf, slicelength);
-            PyMem_FREE(result_buf);
-            return result;
-        }
-    } else {
-        PyErr_SetString(PyExc_TypeError, "string indices must be integers");
-        return NULL;
-    }
-}
+                if (is_markup) {
+                        /* field_name, format_spec, and conversion are
+                           returned */
+                        literal_str = Py_None;
+                        Py_INCREF(literal_str);
 
-static PyMappingMethods unicode_as_mapping = {
-    (lenfunc)unicode_length,		/* mp_length */
-    (binaryfunc)unicode_subscript,	/* mp_subscript */
-    (objobjargproc)0,			/* mp_ass_subscript */
-};
+                        field_name_str = SubString_new_object(&field_name);
+                        if (field_name_str == NULL)
+                                goto error;
 
+                        format_spec_str = SubString_new_object(&format_spec);
+                        if (format_spec_str == NULL)
+                                goto error;
 
-static int
-unicode_buffer_getbuffer(PyUnicodeObject *self, PyBuffer *view, int flags)
-{
+                        /* if the conversion is not specified, return
+                           a None, otherwise create a one length
+                           string with the conversion characater */
+                        if (conversion == '\0') {
+                                conversion_str = Py_None;
+                                Py_INCREF(conversion_str);
+                        } else
+                            conversion_str = PyUnicode_FromUnicode(&conversion,
+                                                                   1);
+                        if (conversion_str == NULL)
+                                goto error;
+                } else {
+                        /* only literal is returned */
+                        literal_str = SubString_new_object(&literal);
+                        if (literal_str == NULL)
+                                goto error;
 
-    if (flags & PyBUF_CHARACTER) {
-        PyObject *str;
-        
-        str = _PyUnicode_AsDefaultEncodedString((PyObject *)self, NULL);
-        if (str == NULL) return -1;
-        return PyBuffer_FillInfo(view, (void *)PyString_AS_STRING(str),
-                                 PyString_GET_SIZE(str), 1, flags);
-    }
-    else {
-        return PyBuffer_FillInfo(view, (void *)self->str, 
-                                 PyUnicode_GET_DATA_SIZE(self), 1, flags);
-    }
-}
+                        field_name_str = Py_None;
+                        format_spec_str = Py_None;
+                        conversion_str = Py_None;
 
+                        Py_INCREF(field_name_str);
+                        Py_INCREF(format_spec_str);
+                        Py_INCREF(conversion_str);
+                }
+                tuple = PyTuple_Pack(5, is_markup_bool, literal_str,
+                                     field_name_str, format_spec_str,
+                                     conversion_str);
+        error:
+                Py_XDECREF(is_markup_bool);
+                Py_XDECREF(literal_str);
+                Py_XDECREF(field_name_str);
+                Py_XDECREF(format_spec_str);
+                Py_XDECREF(conversion_str);
+                return tuple;
+        }
+}
 
-/* Helpers for PyUnicode_Format() */
+static PyMethodDef formatteriter_methods[] = {
+ 	{NULL,		NULL}		/* sentinel */
+};
 
-static PyObject *
-getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
-{
-    Py_ssize_t argidx = *p_argidx;
-    if (argidx < arglen) {
-	(*p_argidx)++;
-	if (arglen < 0)
-	    return args;
-	else
-	    return PyTuple_GetItem(args, argidx);
-    }
-    PyErr_SetString(PyExc_TypeError,
-		    "not enough arguments for format string");
-    return NULL;
-}
-
-#define F_LJUST (1<<0)
-#define F_SIGN	(1<<1)
-#define F_BLANK (1<<2)
-#define F_ALT	(1<<3)
-#define F_ZERO	(1<<4)
+PyTypeObject PyFormatterIter_Type = {
+	PyVarObject_HEAD_INIT(&PyType_Type, 0)
+	"formatteriterator",			/* tp_name */
+	sizeof(formatteriterobject),		/* tp_basicsize */
+	0,					/* tp_itemsize */
+	/* methods */
+	(destructor)formatteriter_dealloc,	/* tp_dealloc */
+	0,					/* tp_print */
+	0,					/* tp_getattr */
+	0,					/* tp_setattr */
+	0,					/* tp_compare */
+	0,					/* tp_repr */
+	0,					/* tp_as_number */
+	0,					/* tp_as_sequence */
+	0,					/* tp_as_mapping */
+	0,					/* tp_hash */
+	0,					/* tp_call */
+	0,					/* tp_str */
+	PyObject_GenericGetAttr,		/* tp_getattro */
+	0,					/* tp_setattro */
+	0,					/* tp_as_buffer */
+	Py_TPFLAGS_DEFAULT,			/* tp_flags */
+	0,					/* tp_doc */
+	0,					/* tp_traverse */
+	0,					/* tp_clear */
+	0,					/* tp_richcompare */
+	0,					/* tp_weaklistoffset */
+	PyObject_SelfIter,			/* tp_iter */
+	(iternextfunc)formatteriter_next,	/* tp_iternext */
+	formatteriter_methods,			/* tp_methods */
+	0,
+};
 
-static Py_ssize_t
-strtounicode(Py_UNICODE *buffer, const char *charbuffer)
+/* unicode_formatter_parser is used to implement
+   string.Formatter.vformat.  it parses a string and returns tuples
+   describing the parsed elements.  It's a wrapper around
+   stringlib/string_format.h's MarkupIterator */
+static PyObject *
+unicode_formatter_parser(PyUnicodeObject *self)
 {
-    register Py_ssize_t i;
-    Py_ssize_t len = strlen(charbuffer);
-    for (i = len - 1; i >= 0; i--)
-	buffer[i] = (Py_UNICODE) charbuffer[i];
+        formatteriterobject *it;
 
-    return len;
-}
+	it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
+	if (it == NULL)
+		return NULL;
 
-static int
-doubletounicode(Py_UNICODE *buffer, size_t len, const char *format, double x)
-{
-    Py_ssize_t result;
+        /* take ownership, give the object to the iterator */
+        Py_INCREF(self);
+        it->str = self;
 
-    PyOS_ascii_formatd((char *)buffer, len, format, x);
-    result = strtounicode(buffer, (char *)buffer);
-    return Py_SAFE_DOWNCAST(result, Py_ssize_t, int);
+        /* initialize the contained MarkupIterator */
+        MarkupIterator_init(&it->it_markup,
+                            PyUnicode_AS_UNICODE(self),
+                            PyUnicode_GET_SIZE(self));
+
+	return (PyObject *)it;
 }
+/***************** end Formatter Iterator ************************/
+/********************* FieldName Iterator ************************/
 
-static int
-longtounicode(Py_UNICODE *buffer, size_t len, const char *format, long x)
-{
-    Py_ssize_t result;
+/* this is used to implement string.Formatter.vparse().  it parses
+   the field name into attribute and item values. */
 
-    PyOS_snprintf((char *)buffer, len, format, x);
-    result = strtounicode(buffer, (char *)buffer);
-    return Py_SAFE_DOWNCAST(result, Py_ssize_t, int);
-}
+typedef struct {
+	PyObject_HEAD
 
-/* XXX To save some code duplication, formatfloat/long/int could have been
-   shared with stringobject.c, converting from 8-bit to Unicode after the
-   formatting is done. */
+        PyUnicodeObject *str;
 
-static int
-formatfloat(Py_UNICODE *buf,
-	    size_t buflen,
-	    int flags,
-	    int prec,
-	    int type,
-	    PyObject *v)
+        FieldNameIterator it_field;
+} fieldnameiterobject;
+
+static void
+fieldnameiter_dealloc(fieldnameiterobject *it)
 {
-    /* fmt = '%#.' + `prec` + `type`
-       worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
-    char fmt[20];
-    double x;
+        Py_XDECREF(it->str);
+	PyObject_FREE(it);
+}
 
-    x = PyFloat_AsDouble(v);
-    if (x == -1.0 && PyErr_Occurred())
-	return -1;
-    if (prec < 0)
-	prec = 6;
-    if (type == 'f' && (fabs(x) / 1e25) >= 1e25)
-	type = 'g';
-    /* Worst case length calc to ensure no buffer overrun:
+/* returns a tuple:
+   (is_attr, value)
+   is_attr is true if we used attribute syntax (e.g., '.foo')
+              false if we used index syntax (e.g., '[foo]')
+   value is an integer or string
+*/
+static PyObject *
+fieldnameiter_next(fieldnameiterobject *it)
+{
+        int result;
+        int is_attr;
+        Py_ssize_t idx;
+        SubString name;
 
-       'g' formats:
-	 fmt = %#.<prec>g
-	 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
-	    for any double rep.)
-	 len = 1 + prec + 1 + 2 + 5 = 9 + prec
+        result = FieldNameIterator_next(&it->it_field, &is_attr,
+                                            &idx, &name);
+        if (result == 0 || result == 1) {
+                /* if 0, error has already been set, if 1, iterator is empty */
+                return NULL;
+        } else {
+                PyObject* result = NULL;
+                PyObject* is_attr_obj = NULL;
+                PyObject* obj = NULL;
 
-       'f' formats:
-	 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
-	 len = 1 + 50 + 1 + prec = 52 + prec
+                is_attr_obj = PyBool_FromLong(is_attr);
+                if (is_attr_obj == NULL)
+                        goto error;
 
-       If prec=0 the effective precision is 1 (the leading digit is
-       always given), therefore increase the length by one.
+                /* either an integer or a string */
+                if (idx != -1)
+                        obj = PyInt_FromSsize_t(idx);
+                else
+                        obj = STRINGLIB_NEW(name.ptr, name.end - name.ptr);
+                if (obj == NULL)
+                        goto error;
 
-    */
-    if (((type == 'g' || type == 'G') && 
-          buflen <= (size_t)10 + (size_t)prec) ||
-	(type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
-	PyErr_SetString(PyExc_OverflowError,
-			"formatted float is too long (precision too large?)");
-	return -1;
-    }
-    PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
-		  (flags&F_ALT) ? "#" : "",
-		  prec, type);
-    return doubletounicode(buf, buflen, fmt, x);
-}
+               /* return a tuple of values */
+                result = PyTuple_Pack(2, is_attr_obj, obj);
+                if (result == NULL)
+                        goto error;
 
-static PyObject*
-formatlong(PyObject *val, int flags, int prec, int type)
-{
-	char *buf;
-	int len;
-	PyObject *str; /* temporary string object. */
-	PyObject *result;
+                return result;
 
-	str = _PyString_FormatLong(val, flags, prec, type, &buf, &len);
-	if (!str)
-		return NULL;
-	result = PyUnicode_FromStringAndSize(buf, len);
-	Py_DECREF(str);
-	return result;
+        error:
+                Py_XDECREF(result);
+                Py_XDECREF(is_attr_obj);
+                Py_XDECREF(obj);
+                return NULL;
+        }
+        return NULL;
 }
 
-static int
-formatint(Py_UNICODE *buf,
-	  size_t buflen,
-	  int flags,
-	  int prec,
-	  int type,
-	  PyObject *v)
+static PyMethodDef fieldnameiter_methods[] = {
+ 	{NULL,		NULL}		/* sentinel */
+};
+
+static PyTypeObject PyFieldNameIter_Type = {
+	PyVarObject_HEAD_INIT(&PyType_Type, 0)
+	"fieldnameiterator",			/* tp_name */
+	sizeof(fieldnameiterobject),		/* tp_basicsize */
+	0,					/* tp_itemsize */
+	/* methods */
+	(destructor)fieldnameiter_dealloc,	/* tp_dealloc */
+	0,					/* tp_print */
+	0,					/* tp_getattr */
+	0,					/* tp_setattr */
+	0,					/* tp_compare */
+	0,					/* tp_repr */
+	0,					/* tp_as_number */
+	0,					/* tp_as_sequence */
+	0,					/* tp_as_mapping */
+	0,					/* tp_hash */
+	0,					/* tp_call */
+	0,					/* tp_str */
+	PyObject_GenericGetAttr,		/* tp_getattro */
+	0,					/* tp_setattro */
+	0,					/* tp_as_buffer */
+	Py_TPFLAGS_DEFAULT,			/* tp_flags */
+	0,					/* tp_doc */
+	0,					/* tp_traverse */
+	0,					/* tp_clear */
+	0,					/* tp_richcompare */
+	0,					/* tp_weaklistoffset */
+	PyObject_SelfIter,			/* tp_iter */
+	(iternextfunc)fieldnameiter_next,	/* tp_iternext */
+	fieldnameiter_methods,			/* tp_methods */
+        0};
+
+/* unicode_formatter_field_name_split is used to implement
+   string.Formatter.vformat.  it takes an PEP 3101 "field name", and
+   returns a tuple of (first, rest): "first", the part before the
+   first '.' or '['; and "rest", an iterator for the rest of the field
+   name.  it's a wrapper around stringlib/string_format.h's
+   field_name_split.  The iterator it returns is a
+   FieldNameIterator */
+static PyObject *
+unicode_formatter_field_name_split(PyUnicodeObject *self)
 {
-    /* fmt = '%#.' + `prec` + 'l' + `type`
-     * worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
-     *                     + 1 + 1
-     *                   = 24
-     */
-    char fmt[64]; /* plenty big enough! */
-    char *sign;
-    long x;
+        SubString first;
+        Py_ssize_t first_idx;
+        fieldnameiterobject *it;
 
-    x = PyInt_AsLong(v);
-    if (x == -1 && PyErr_Occurred())
-        return -1;
-    if (x < 0 && type == 'u') {
-        type = 'd';
-    }
-    if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
-        sign = "-";
-    else
-        sign = "";
-    if (prec < 0)
-        prec = 1;
+        PyObject *first_obj = NULL;
+        PyObject *result = NULL;
 
-    /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
-     * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
-     */
-    if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
-        PyErr_SetString(PyExc_OverflowError,
-    	        "formatted integer is too long (precision too large?)");
-        return -1;
-    }
+        it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
+        if (it == NULL)
+                return NULL;
 
-    if ((flags & F_ALT) &&
-        (type == 'x' || type == 'X' || type == 'o')) {
-        /* When converting under %#o, %#x or %#X, there are a number
-         * of issues that cause pain:
-	 * - for %#o, we want a different base marker than C
-         * - when 0 is being converted, the C standard leaves off
-         *   the '0x' or '0X', which is inconsistent with other
-         *   %#x/%#X conversions and inconsistent with Python's
-         *   hex() function
-         * - there are platforms that violate the standard and
-         *   convert 0 with the '0x' or '0X'
-         *   (Metrowerks, Compaq Tru64)
-         * - there are platforms that give '0x' when converting
-         *   under %#X, but convert 0 in accordance with the
-         *   standard (OS/2 EMX)
-         *
-         * We can achieve the desired consistency by inserting our
-         * own '0x' or '0X' prefix, and substituting %x/%X in place
-         * of %#x/%#X.
-         *
-         * Note that this is the same approach as used in
-         * formatint() in stringobject.c
-         */
-        PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
-                      sign, type, prec, type);
-    }
-    else {
-        PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
-                      sign, (flags&F_ALT) ? "#" : "",
-                      prec, type);
-    }
-    if (sign[0])
-        return longtounicode(buf, buflen, fmt, -x);
-    else
-        return longtounicode(buf, buflen, fmt, x);
-}
+        /* take ownership, give the object to the iterator.  this is
+           just to keep the field_name alive */
+        Py_INCREF(self);
+        it->str = self;
 
-static int
-formatchar(Py_UNICODE *buf,
-           size_t buflen,
-           PyObject *v)
-{
-    /* presume that the buffer is at least 2 characters long */
-    if (PyUnicode_Check(v)) {
-	if (PyUnicode_GET_SIZE(v) != 1)
-	    goto onError;
-	buf[0] = PyUnicode_AS_UNICODE(v)[0];
-    }
+        if (!field_name_split(STRINGLIB_STR(self),
+                              STRINGLIB_LEN(self),
+                              &first, &first_idx, &it->it_field))
+                goto error;
 
-    else if (PyString_Check(v)) {
-	if (PyString_GET_SIZE(v) != 1)
-	    goto onError;
-	buf[0] = (Py_UNICODE)PyString_AS_STRING(v)[0];
-    }
+        /* first becomes an integer, if possible, else a string */
+        if (first_idx != -1)
+                first_obj = PyInt_FromSsize_t(first_idx);
+        else
+                /* convert "first" into a string object */
+                first_obj = STRINGLIB_NEW(first.ptr, first.end - first.ptr);
+        if (first_obj == NULL)
+                goto error;
 
-    else {
-	/* Integer input truncated to a character */
-        long x;
-	x = PyInt_AsLong(v);
-	if (x == -1 && PyErr_Occurred())
-	    goto onError;
-#ifdef Py_UNICODE_WIDE
-	if (x < 0 || x > 0x10ffff) {
-	    PyErr_SetString(PyExc_OverflowError,
-			    "%c arg not in range(0x110000) "
-			    "(wide Python build)");
-	    return -1;
-	}
-#else
-	if (x < 0 || x > 0xffff) {
-	    PyErr_SetString(PyExc_OverflowError,
-			    "%c arg not in range(0x10000) "
-			    "(narrow Python build)");
-	    return -1;
-	}
-#endif
-	buf[0] = (Py_UNICODE) x;
-    }
-    buf[1] = '\0';
-    return 1;
+        /* return a tuple of values */
+        result = PyTuple_Pack(2, first_obj, it);
 
- onError:
-    PyErr_SetString(PyExc_TypeError,
-		    "%c requires int or char");
-    return -1;
+error:
+        Py_XDECREF(it);
+        Py_XDECREF(first_obj);
+        return result;
 }
+/***************** end FieldName Iterator ************************/
 
-/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
-
-   FORMATBUFLEN is the length of the buffer in which the floats, ints, &
-   chars are formatted. XXX This is a magic number. Each formatting
-   routine does bounds checking to ensure no overflow, but a better
-   solution may be to malloc a buffer of appropriate size for each
-   format. For now, the current solution is sufficient.
-*/
-#define FORMATBUFLEN (size_t)120
 
-PyObject *PyUnicode_Format(PyObject *format,
-			   PyObject *args)
+static PyObject *
+unicode_getnewargs(PyUnicodeObject *v)
 {
-    Py_UNICODE *fmt, *res;
-    Py_ssize_t fmtcnt, rescnt, reslen, arglen, argidx;
-    int args_owned = 0;
-    PyUnicodeObject *result = NULL;
-    PyObject *dict = NULL;
-    PyObject *uformat;
+	return Py_BuildValue("(u#)", v->str, v->length);
+}
 
-    if (format == NULL || args == NULL) {
-	PyErr_BadInternalCall();
-	return NULL;
-    }
-    uformat = PyUnicode_FromObject(format);
-    if (uformat == NULL)
-	return NULL;
-    fmt = PyUnicode_AS_UNICODE(uformat);
-    fmtcnt = PyUnicode_GET_SIZE(uformat);
 
-    reslen = rescnt = fmtcnt + 100;
-    result = _PyUnicode_New(reslen);
-    if (result == NULL)
-	goto onError;
-    res = PyUnicode_AS_UNICODE(result);
+static PyMethodDef unicode_methods[] = {
 
-    if (PyTuple_Check(args)) {
-	arglen = PyTuple_Size(args);
-	argidx = 0;
-    }
-    else {
-	arglen = -1;
-	argidx = -2;
-    }
-    if (Py_Type(args)->tp_as_mapping && !PyTuple_Check(args) &&
-        !PyObject_TypeCheck(args, &PyBaseString_Type))
-	dict = args;
+    /* Order is according to common usage: often used methods should
+       appear first, since lookup is done sequentially. */
 
-    while (--fmtcnt >= 0) {
-	if (*fmt != '%') {
-	    if (--rescnt < 0) {
-		rescnt = fmtcnt + 100;
-		reslen += rescnt;
-		if (_PyUnicode_Resize(&result, reslen) < 0)
-		    goto onError;
-		res = PyUnicode_AS_UNICODE(result) + reslen - rescnt;
-		--rescnt;
-	    }
-	    *res++ = *fmt++;
-	}
-	else {
-	    /* Got a format specifier */
-	    int flags = 0;
-	    Py_ssize_t width = -1;
-	    int prec = -1;
-	    Py_UNICODE c = '\0';
-	    Py_UNICODE fill;
-	    PyObject *v = NULL;
-	    PyObject *temp = NULL;
-	    Py_UNICODE *pbuf;
-	    Py_UNICODE sign;
-	    Py_ssize_t len;
-	    Py_UNICODE formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
+    {"encode", (PyCFunction) unicode_encode, METH_VARARGS, encode__doc__},
+    {"replace", (PyCFunction) unicode_replace, METH_VARARGS, replace__doc__},
+    {"split", (PyCFunction) unicode_split, METH_VARARGS, split__doc__},
+    {"rsplit", (PyCFunction) unicode_rsplit, METH_VARARGS, rsplit__doc__},
+    {"join", (PyCFunction) unicode_join, METH_O, join__doc__},
+    {"capitalize", (PyCFunction) unicode_capitalize, METH_NOARGS, capitalize__doc__},
+    {"title", (PyCFunction) unicode_title, METH_NOARGS, title__doc__},
+    {"center", (PyCFunction) unicode_center, METH_VARARGS, center__doc__},
+    {"count", (PyCFunction) unicode_count, METH_VARARGS, count__doc__},
+    {"expandtabs", (PyCFunction) unicode_expandtabs, METH_VARARGS, expandtabs__doc__},
+    {"find", (PyCFunction) unicode_find, METH_VARARGS, find__doc__},
+    {"partition", (PyCFunction) unicode_partition, METH_O, partition__doc__},
+    {"index", (PyCFunction) unicode_index, METH_VARARGS, index__doc__},
+    {"ljust", (PyCFunction) unicode_ljust, METH_VARARGS, ljust__doc__},
+    {"lower", (PyCFunction) unicode_lower, METH_NOARGS, lower__doc__},
+    {"lstrip", (PyCFunction) unicode_lstrip, METH_VARARGS, lstrip__doc__},
+    {"decode", (PyCFunction) unicode_decode, METH_VARARGS, decode__doc__},
+/*  {"maketrans", (PyCFunction) unicode_maketrans, METH_VARARGS, maketrans__doc__}, */
+    {"rfind", (PyCFunction) unicode_rfind, METH_VARARGS, rfind__doc__},
+    {"rindex", (PyCFunction) unicode_rindex, METH_VARARGS, rindex__doc__},
+    {"rjust", (PyCFunction) unicode_rjust, METH_VARARGS, rjust__doc__},
+    {"rstrip", (PyCFunction) unicode_rstrip, METH_VARARGS, rstrip__doc__},
+    {"rpartition", (PyCFunction) unicode_rpartition, METH_O, rpartition__doc__},
+    {"splitlines", (PyCFunction) unicode_splitlines, METH_VARARGS, splitlines__doc__},
+    {"strip", (PyCFunction) unicode_strip, METH_VARARGS, strip__doc__},
+    {"swapcase", (PyCFunction) unicode_swapcase, METH_NOARGS, swapcase__doc__},
+    {"translate", (PyCFunction) unicode_translate, METH_O, translate__doc__},
+    {"upper", (PyCFunction) unicode_upper, METH_NOARGS, upper__doc__},
+    {"startswith", (PyCFunction) unicode_startswith, METH_VARARGS, startswith__doc__},
+    {"endswith", (PyCFunction) unicode_endswith, METH_VARARGS, endswith__doc__},
+    {"islower", (PyCFunction) unicode_islower, METH_NOARGS, islower__doc__},
+    {"isupper", (PyCFunction) unicode_isupper, METH_NOARGS, isupper__doc__},
+    {"istitle", (PyCFunction) unicode_istitle, METH_NOARGS, istitle__doc__},
+    {"isspace", (PyCFunction) unicode_isspace, METH_NOARGS, isspace__doc__},
+    {"isdecimal", (PyCFunction) unicode_isdecimal, METH_NOARGS, isdecimal__doc__},
+    {"isdigit", (PyCFunction) unicode_isdigit, METH_NOARGS, isdigit__doc__},
+    {"isnumeric", (PyCFunction) unicode_isnumeric, METH_NOARGS, isnumeric__doc__},
+    {"isalpha", (PyCFunction) unicode_isalpha, METH_NOARGS, isalpha__doc__},
+    {"isalnum", (PyCFunction) unicode_isalnum, METH_NOARGS, isalnum__doc__},
+    {"isidentifier", (PyCFunction) unicode_isidentifier, METH_NOARGS, isidentifier__doc__},
+    {"zfill", (PyCFunction) unicode_zfill, METH_VARARGS, zfill__doc__},
+    {"format", (PyCFunction) unicode_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
+    {"__format__", (PyCFunction) unicode__format__, METH_VARARGS, p_format__doc__},
+    {"_formatter_field_name_split", (PyCFunction) unicode_formatter_field_name_split, METH_NOARGS},
+    {"_formatter_parser", (PyCFunction) unicode_formatter_parser, METH_NOARGS},
+#if 0
+    {"capwords", (PyCFunction) unicode_capwords, METH_NOARGS, capwords__doc__},
+#endif
 
-	    fmt++;
-	    if (*fmt == '(') {
-		Py_UNICODE *keystart;
-		Py_ssize_t keylen;
-		PyObject *key;
-		int pcount = 1;
+#if 0
+    /* This one is just used for debugging the implementation. */
+    {"freelistsize", (PyCFunction) unicode_freelistsize, METH_NOARGS},
+#endif
 
-		if (dict == NULL) {
-		    PyErr_SetString(PyExc_TypeError,
-				    "format requires a mapping");
-		    goto onError;
-		}
-		++fmt;
-		--fmtcnt;
-		keystart = fmt;
-		/* Skip over balanced parentheses */
-		while (pcount > 0 && --fmtcnt >= 0) {
-		    if (*fmt == ')')
-			--pcount;
-		    else if (*fmt == '(')
-			++pcount;
-		    fmt++;
-		}
-		keylen = fmt - keystart - 1;
-		if (fmtcnt < 0 || pcount > 0) {
-		    PyErr_SetString(PyExc_ValueError,
-				    "incomplete format key");
-		    goto onError;
-		}
-#if 0
-		/* keys are converted to strings using UTF-8 and
-		   then looked up since Python uses strings to hold
-		   variables names etc. in its namespaces and we
-		   wouldn't want to break common idioms. */
-		key = PyUnicode_EncodeUTF8(keystart,
-					   keylen,
-					   NULL);
-#else
-		key = PyUnicode_FromUnicode(keystart, keylen);
-#endif
-		if (key == NULL)
-		    goto onError;
-		if (args_owned) {
-		    Py_DECREF(args);
-		    args_owned = 0;
-		}
-		args = PyObject_GetItem(dict, key);
-		Py_DECREF(key);
-		if (args == NULL) {
-		    goto onError;
-		}
-		args_owned = 1;
-		arglen = -1;
-		argidx = -2;
-	    }
-	    while (--fmtcnt >= 0) {
-		switch (c = *fmt++) {
-		case '-': flags |= F_LJUST; continue;
-		case '+': flags |= F_SIGN; continue;
-		case ' ': flags |= F_BLANK; continue;
-		case '#': flags |= F_ALT; continue;
-		case '0': flags |= F_ZERO; continue;
-		}
-		break;
-	    }
-	    if (c == '*') {
-		v = getnextarg(args, arglen, &argidx);
-		if (v == NULL)
-		    goto onError;
-		if (!PyInt_Check(v)) {
-		    PyErr_SetString(PyExc_TypeError,
-				    "* wants int");
-		    goto onError;
-		}
-		width = PyInt_AsLong(v);
-		if (width == -1 && PyErr_Occurred())
-			goto onError;
-		if (width < 0) {
-		    flags |= F_LJUST;
-		    width = -width;
-		}
-		if (--fmtcnt >= 0)
-		    c = *fmt++;
-	    }
-	    else if (c >= '0' && c <= '9') {
-		width = c - '0';
-		while (--fmtcnt >= 0) {
-		    c = *fmt++;
-		    if (c < '0' || c > '9')
-			break;
-		    if ((width*10) / 10 != width) {
-			PyErr_SetString(PyExc_ValueError,
-					"width too big");
-			goto onError;
-		    }
-		    width = width*10 + (c - '0');
-		}
-	    }
-	    if (c == '.') {
-		prec = 0;
-		if (--fmtcnt >= 0)
-		    c = *fmt++;
-		if (c == '*') {
-		    v = getnextarg(args, arglen, &argidx);
-		    if (v == NULL)
-			goto onError;
-		    if (!PyInt_Check(v)) {
-			PyErr_SetString(PyExc_TypeError,
-					"* wants int");
-			goto onError;
-		    }
-		    prec = PyInt_AsLong(v);
-		    if (prec == -1 && PyErr_Occurred())
-			goto onError;
-		    if (prec < 0)
-			prec = 0;
-		    if (--fmtcnt >= 0)
-			c = *fmt++;
-		}
-		else if (c >= '0' && c <= '9') {
-		    prec = c - '0';
-		    while (--fmtcnt >= 0) {
-			c = Py_CHARMASK(*fmt++);
-			if (c < '0' || c > '9')
-			    break;
-			if ((prec*10) / 10 != prec) {
-			    PyErr_SetString(PyExc_ValueError,
-					    "prec too big");
-			    goto onError;
-			}
-			prec = prec*10 + (c - '0');
-		    }
-		}
-	    } /* prec */
-	    if (fmtcnt >= 0) {
-		if (c == 'h' || c == 'l' || c == 'L') {
-		    if (--fmtcnt >= 0)
-			c = *fmt++;
-		}
-	    }
-	    if (fmtcnt < 0) {
-		PyErr_SetString(PyExc_ValueError,
-				"incomplete format");
-		goto onError;
-	    }
-	    if (c != '%') {
-		v = getnextarg(args, arglen, &argidx);
-		if (v == NULL)
-		    goto onError;
-	    }
-	    sign = 0;
-	    fill = ' ';
-	    switch (c) {
-
-	    case '%':
-		pbuf = formatbuf;
-		/* presume that buffer length is at least 1 */
-		pbuf[0] = '%';
-		len = 1;
-		break;
-
-	    case 's':
-	    case 'r':
-		if (PyUnicode_Check(v) && c == 's') {
-		    temp = v;
-		    Py_INCREF(temp);
-		}
-		else {
-		    PyObject *unicode;
-		    if (c == 's')
-			temp = PyObject_Unicode(v);
-		    else
-			temp = PyObject_Repr(v);
-		    if (temp == NULL)
-			goto onError;
-                    if (PyUnicode_Check(temp))
-                        /* nothing to do */;
-                    else if (PyString_Check(temp)) {
-                        /* convert to string to Unicode */
-		        unicode = PyUnicode_Decode(PyString_AS_STRING(temp),
-						   PyString_GET_SIZE(temp),
-						   NULL,
-						   "strict");
-		        Py_DECREF(temp);
-		        temp = unicode;
-		        if (temp == NULL)
-			    goto onError;
-		    }
-		    else {
-			Py_DECREF(temp);
-			PyErr_SetString(PyExc_TypeError,
-					"%s argument has non-string str()");
-			goto onError;
-		    }
-		}
-		pbuf = PyUnicode_AS_UNICODE(temp);
-		len = PyUnicode_GET_SIZE(temp);
-		if (prec >= 0 && len > prec)
-		    len = prec;
-		break;
-
-	    case 'i':
-	    case 'd':
-	    case 'u':
-	    case 'o':
-	    case 'x':
-	    case 'X':
-		if (c == 'i')
-		    c = 'd';
-		if (PyLong_Check(v)) {
-		    temp = formatlong(v, flags, prec, c);
-		    if (!temp)
-			goto onError;
-		    pbuf = PyUnicode_AS_UNICODE(temp);
-		    len = PyUnicode_GET_SIZE(temp);
-		    sign = 1;
-		}
-		else {
-		    pbuf = formatbuf;
-		    len = formatint(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE),
-				    flags, prec, c, v);
-		    if (len < 0)
-			goto onError;
-		    sign = 1;
-		}
-		if (flags & F_ZERO)
-		    fill = '0';
-		break;
-
-	    case 'e':
-	    case 'E':
-	    case 'f':
-	    case 'F':
-	    case 'g':
-	    case 'G':
-		if (c == 'F')
-			c = 'f';
-		pbuf = formatbuf;
-		len = formatfloat(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE),
-			flags, prec, c, v);
-		if (len < 0)
-		    goto onError;
-		sign = 1;
-		if (flags & F_ZERO)
-		    fill = '0';
-		break;
-
-	    case 'c':
-		pbuf = formatbuf;
-		len = formatchar(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE), v);
-		if (len < 0)
-		    goto onError;
-		break;
-
-	    default:
-		PyErr_Format(PyExc_ValueError,
-			     "unsupported format character '%c' (0x%x) "
-			     "at index %zd",
-			     (31<=c && c<=126) ? (char)c : '?',
-                             (int)c,
-			     (Py_ssize_t)(fmt - 1 -
-					  PyUnicode_AS_UNICODE(uformat)));
-		goto onError;
-	    }
-	    if (sign) {
-		if (*pbuf == '-' || *pbuf == '+') {
-		    sign = *pbuf++;
-		    len--;
-		}
-		else if (flags & F_SIGN)
-		    sign = '+';
-		else if (flags & F_BLANK)
-		    sign = ' ';
-		else
-		    sign = 0;
-	    }
-	    if (width < len)
-		width = len;
-	    if (rescnt - (sign != 0) < width) {
-		reslen -= rescnt;
-		rescnt = width + fmtcnt + 100;
-		reslen += rescnt;
-		if (reslen < 0) {
-		    Py_XDECREF(temp);
-		    PyErr_NoMemory();
-		    goto onError;
-		}
-		if (_PyUnicode_Resize(&result, reslen) < 0) {
-		    Py_XDECREF(temp);
-		    goto onError;
-		}
-		res = PyUnicode_AS_UNICODE(result)
-		    + reslen - rescnt;
-	    }
-	    if (sign) {
-		if (fill != ' ')
-		    *res++ = sign;
-		rescnt--;
-		if (width > len)
-		    width--;
-	    }
-	    if ((flags & F_ALT) && (c == 'x' || c == 'X' || c == 'o')) {
-		assert(pbuf[0] == '0');
-		assert(pbuf[1] == c);
-		if (fill != ' ') {
-		    *res++ = *pbuf++;
-		    *res++ = *pbuf++;
-		}
-		rescnt -= 2;
-		width -= 2;
-		if (width < 0)
-		    width = 0;
-		len -= 2;
-	    }
-	    if (width > len && !(flags & F_LJUST)) {
-		do {
-		    --rescnt;
-		    *res++ = fill;
-		} while (--width > len);
-	    }
-	    if (fill == ' ') {
-		if (sign)
-		    *res++ = sign;
-		if ((flags & F_ALT) && (c == 'x' || c == 'X' || c == 'o')) {
-		    assert(pbuf[0] == '0');
-		    assert(pbuf[1] == c);
-		    *res++ = *pbuf++;
-		    *res++ = *pbuf++;
-		}
-	    }
-	    Py_UNICODE_COPY(res, pbuf, len);
-	    res += len;
-	    rescnt -= len;
-	    while (--width >= len) {
-		--rescnt;
-		*res++ = ' ';
-	    }
-	    if (dict && (argidx < arglen) && c != '%') {
-		PyErr_SetString(PyExc_TypeError,
-				"not all arguments converted during string formatting");
-                Py_XDECREF(temp);
-		goto onError;
-	    }
-	    Py_XDECREF(temp);
-	} /* '%' */
-    } /* until end */
-    if (argidx < arglen && !dict) {
-	PyErr_SetString(PyExc_TypeError,
-			"not all arguments converted during string formatting");
-	goto onError;
-    }
+    {"__getnewargs__",	(PyCFunction)unicode_getnewargs, METH_NOARGS},
+    {NULL, NULL}
+};
 
-    if (_PyUnicode_Resize(&result, reslen - rescnt) < 0)
-	goto onError;
-    if (args_owned) {
-	Py_DECREF(args);
-    }
-    Py_DECREF(uformat);
-    return (PyObject *)result;
+static PyObject *
+unicode_mod(PyObject *v, PyObject *w)
+{
+       if (!PyUnicode_Check(v)) {
+               Py_INCREF(Py_NotImplemented);
+               return Py_NotImplemented;
+       }
+       return PyUnicode_Format(v, w);
+}
 
- onError:
-    Py_XDECREF(result);
-    Py_DECREF(uformat);
-    if (args_owned) {
-	Py_DECREF(args);
+static PyNumberMethods unicode_as_number = {
+	0,				/*nb_add*/
+	0,				/*nb_subtract*/
+	0,				/*nb_multiply*/
+	unicode_mod,			/*nb_remainder*/
+};
+
+static PySequenceMethods unicode_as_sequence = {
+    (lenfunc) unicode_length, 		/* sq_length */
+    PyUnicode_Concat,		 	/* sq_concat */
+    (ssizeargfunc) unicode_repeat, 	/* sq_repeat */
+    (ssizeargfunc) unicode_getitem, 	/* sq_item */
+    (ssizessizeargfunc) unicode_slice, 	/* sq_slice */
+    0, 					/* sq_ass_item */
+    0, 					/* sq_ass_slice */
+    PyUnicode_Contains, 		/* sq_contains */
+};
+
+static PyObject*
+unicode_subscript(PyUnicodeObject* self, PyObject* item)
+{
+    if (PyIndex_Check(item)) {
+        Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
+        if (i == -1 && PyErr_Occurred())
+            return NULL;
+        if (i < 0)
+            i += PyUnicode_GET_SIZE(self);
+        return unicode_getitem(self, i);
+    } else if (PySlice_Check(item)) {
+        Py_ssize_t start, stop, step, slicelength, cur, i;
+        Py_UNICODE* source_buf;
+        Py_UNICODE* result_buf;
+        PyObject* result;
+
+        if (PySlice_GetIndicesEx((PySliceObject*)item, PyUnicode_GET_SIZE(self),
+				 &start, &stop, &step, &slicelength) < 0) {
+            return NULL;
+        }
+
+        if (slicelength <= 0) {
+            return PyUnicode_FromUnicode(NULL, 0);
+        } else {
+            source_buf = PyUnicode_AS_UNICODE((PyObject*)self);
+            result_buf = (Py_UNICODE *)PyMem_MALLOC(slicelength*
+                                                    sizeof(Py_UNICODE));
+	    
+	    if (result_buf == NULL)
+		    return PyErr_NoMemory();
+
+            for (cur = start, i = 0; i < slicelength; cur += step, i++) {
+                result_buf[i] = source_buf[cur];
+            }
+
+            result = PyUnicode_FromUnicode(result_buf, slicelength);
+            PyMem_FREE(result_buf);
+            return result;
+        }
+    } else {
+        PyErr_SetString(PyExc_TypeError, "string indices must be integers");
+        return NULL;
     }
-    return NULL;
 }
 
-static PyBufferProcs unicode_as_buffer = {
-    (getbufferproc) unicode_buffer_getbuffer,
-    NULL,
+static PyMappingMethods unicode_as_mapping = {
+    (lenfunc)unicode_length,		/* mp_length */
+    (binaryfunc)unicode_subscript,	/* mp_subscript */
+    (objobjargproc)0,			/* mp_ass_subscript */
 };
 
-static PyObject *
-unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
 
-static PyObject *
-unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+static int
+unicode_buffer_getbuffer(PyUnicodeObject *self, PyBuffer *view, int flags)
 {
-        PyObject *x = NULL;
-	static char *kwlist[] = {"object", "encoding", "errors", 0};
-	char *encoding = NULL;
-	char *errors = NULL;
 
-	if (type != &PyUnicode_Type)
-		return unicode_subtype_new(type, args, kwds);
-	if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:unicode",
-					  kwlist, &x, &encoding, &errors))
-	    return NULL;
-	if (x == NULL)
-		return (PyObject *)_PyUnicode_New(0);
-	if (encoding == NULL && errors == NULL)
-	    return PyObject_Unicode(x);
-	else
-	return PyUnicode_FromEncodedObject(x, encoding, errors);
+    if (flags & PyBUF_CHARACTER) {
+        PyObject *str;
+        
+        str = _PyUnicode_AsDefaultEncodedString((PyObject *)self, NULL);
+        if (str == NULL) return -1;
+        return PyBuffer_FillInfo(view, (void *)PyString_AS_STRING(str),
+                                 PyString_GET_SIZE(str), 1, flags);
+    }
+    else {
+        return PyBuffer_FillInfo(view, (void *)self->str, 
+                                 PyUnicode_GET_DATA_SIZE(self), 1, flags);
+    }
 }
 
+
+/* Helpers for PyUnicode_Format() */
+
 static PyObject *
-unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
 {
-	PyUnicodeObject *tmp, *pnew;
-	Py_ssize_t n;
+    Py_ssize_t argidx = *p_argidx;
+    if (argidx < arglen) {
+	(*p_argidx)++;
+	if (arglen < 0)
+	    return args;
+	else
+	    return PyTuple_GetItem(args, argidx);
+    }
+    PyErr_SetString(PyExc_TypeError,
+		    "not enough arguments for format string");
+    return NULL;
+}
 
-	assert(PyType_IsSubtype(type, &PyUnicode_Type));
-	tmp = (PyUnicodeObject *)unicode_new(&PyUnicode_Type, args, kwds);
-	if (tmp == NULL)
-		return NULL;
-	assert(PyUnicode_Check(tmp));
-	pnew = (PyUnicodeObject *) type->tp_alloc(type, n = tmp->length);
-	if (pnew == NULL) {
-		Py_DECREF(tmp);
-		return NULL;
-	}
-	pnew->str = PyMem_NEW(Py_UNICODE, n+1);
-	if (pnew->str == NULL) {
-		_Py_ForgetReference((PyObject *)pnew);
-		PyObject_Del(pnew);
-		Py_DECREF(tmp);
-		return PyErr_NoMemory();
-	}
-	Py_UNICODE_COPY(pnew->str, tmp->str, n+1);
-	pnew->length = n;
-	pnew->hash = tmp->hash;
-	Py_DECREF(tmp);
-	return (PyObject *)pnew;
+#define F_LJUST (1<<0)
+#define F_SIGN	(1<<1)
+#define F_BLANK (1<<2)
+#define F_ALT	(1<<3)
+#define F_ZERO	(1<<4)
+
+static Py_ssize_t
+strtounicode(Py_UNICODE *buffer, const char *charbuffer)
+{
+    register Py_ssize_t i;
+    Py_ssize_t len = strlen(charbuffer);
+    for (i = len - 1; i >= 0; i--)
+	buffer[i] = (Py_UNICODE) charbuffer[i];
+
+    return len;
 }
 
-PyDoc_STRVAR(unicode_doc,
-"str(string [, encoding[, errors]]) -> object\n\
-\n\
-Create a new string object from the given encoded string.\n\
-encoding defaults to the current default string encoding.\n\
-errors can be 'strict', 'replace' or 'ignore' and defaults to 'strict'.");
+static int
+doubletounicode(Py_UNICODE *buffer, size_t len, const char *format, double x)
+{
+    Py_ssize_t result;
 
-static PyObject *unicode_iter(PyObject *seq);
+    PyOS_ascii_formatd((char *)buffer, len, format, x);
+    result = strtounicode(buffer, (char *)buffer);
+    return Py_SAFE_DOWNCAST(result, Py_ssize_t, int);
+}
 
-PyTypeObject PyUnicode_Type = {
-    PyVarObject_HEAD_INIT(&PyType_Type, 0)
-    "str", 				/* tp_name */
-    sizeof(PyUnicodeObject), 		/* tp_size */
-    0, 					/* tp_itemsize */
-    /* Slots */
-    (destructor)unicode_dealloc, 	/* tp_dealloc */
-    0, 					/* tp_print */
-    0,				 	/* tp_getattr */
-    0, 					/* tp_setattr */
-    0, 					/* tp_compare */
-    unicode_repr, 			/* tp_repr */
-    &unicode_as_number, 		/* tp_as_number */
-    &unicode_as_sequence, 		/* tp_as_sequence */
-    &unicode_as_mapping, 		/* tp_as_mapping */
-    (hashfunc) unicode_hash, 		/* tp_hash*/
-    0, 					/* tp_call*/
-    (reprfunc) unicode_str,	 	/* tp_str */
-    PyObject_GenericGetAttr, 		/* tp_getattro */
-    0,			 		/* tp_setattro */
-    &unicode_as_buffer,			/* tp_as_buffer */
-    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | 
-        Py_TPFLAGS_UNICODE_SUBCLASS,	/* tp_flags */
-    unicode_doc,			/* tp_doc */
-    0,					/* tp_traverse */
-    0,					/* tp_clear */
-    PyUnicode_RichCompare,		/* tp_richcompare */
-    0,					/* tp_weaklistoffset */
-    unicode_iter,			/* tp_iter */
-    0,					/* tp_iternext */
-    unicode_methods,			/* tp_methods */
-    0,					/* tp_members */
-    0,					/* tp_getset */
-    &PyBaseString_Type,			/* tp_base */
-    0,					/* tp_dict */
-    0,					/* tp_descr_get */
-    0,					/* tp_descr_set */
-    0,					/* tp_dictoffset */
-    0,					/* tp_init */
-    0,					/* tp_alloc */
-    unicode_new,			/* tp_new */
-    PyObject_Del,      		/* tp_free */
-};
+static int
+longtounicode(Py_UNICODE *buffer, size_t len, const char *format, long x)
+{
+    Py_ssize_t result;
 
-/* Initialize the Unicode implementation */
+    PyOS_snprintf((char *)buffer, len, format, x);
+    result = strtounicode(buffer, (char *)buffer);
+    return Py_SAFE_DOWNCAST(result, Py_ssize_t, int);
+}
 
-void _PyUnicode_Init(void)
+/* XXX To save some code duplication, formatfloat/long/int could have been
+   shared with stringobject.c, converting from 8-bit to Unicode after the
+   formatting is done. */
+
+static int
+formatfloat(Py_UNICODE *buf,
+	    size_t buflen,
+	    int flags,
+	    int prec,
+	    int type,
+	    PyObject *v)
 {
-    int i;
+    /* fmt = '%#.' + `prec` + `type`
+       worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
+    char fmt[20];
+    double x;
 
-    /* XXX - move this array to unicodectype.c ? */
-    Py_UNICODE linebreak[] = {
-        0x000A, /* LINE FEED */
-        0x000D, /* CARRIAGE RETURN */
-        0x001C, /* FILE SEPARATOR */
-        0x001D, /* GROUP SEPARATOR */
-        0x001E, /* RECORD SEPARATOR */
-        0x0085, /* NEXT LINE */
-        0x2028, /* LINE SEPARATOR */
-        0x2029, /* PARAGRAPH SEPARATOR */
-    };
+    x = PyFloat_AsDouble(v);
+    if (x == -1.0 && PyErr_Occurred())
+	return -1;
+    if (prec < 0)
+	prec = 6;
+    if (type == 'f' && (fabs(x) / 1e25) >= 1e25)
+	type = 'g';
+    /* Worst case length calc to ensure no buffer overrun:
 
-    /* Init the implementation */
-    unicode_freelist = NULL;
-    unicode_freelist_size = 0;
-    unicode_empty = _PyUnicode_New(0);
-    if (!unicode_empty)
-	return;
+       'g' formats:
+	 fmt = %#.<prec>g
+	 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
+	    for any double rep.)
+	 len = 1 + prec + 1 + 2 + 5 = 9 + prec
 
-    for (i = 0; i < 256; i++)
-	unicode_latin1[i] = NULL;
-    if (PyType_Ready(&PyUnicode_Type) < 0)
-	Py_FatalError("Can't initialize 'unicode'");
+       'f' formats:
+	 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
+	 len = 1 + 50 + 1 + prec = 52 + prec
 
-    /* initialize the linebreak bloom filter */
-    bloom_linebreak = make_bloom_mask(
-        linebreak, sizeof(linebreak) / sizeof(linebreak[0])
-        );
+       If prec=0 the effective precision is 1 (the leading digit is
+       always given), therefore increase the length by one.
 
-    PyType_Ready(&EncodingMapType);
+    */
+    if (((type == 'g' || type == 'G') && 
+          buflen <= (size_t)10 + (size_t)prec) ||
+	(type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
+	PyErr_SetString(PyExc_OverflowError,
+			"formatted float is too long (precision too large?)");
+	return -1;
+    }
+    PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
+		  (flags&F_ALT) ? "#" : "",
+		  prec, type);
+    return doubletounicode(buf, buflen, fmt, x);
 }
 
-/* Finalize the Unicode implementation */
+static PyObject*
+formatlong(PyObject *val, int flags, int prec, int type)
+{
+	char *buf;
+	int len;
+	PyObject *str; /* temporary string object. */
+	PyObject *result;
 
-void
-_PyUnicode_Fini(void)
+	str = _PyString_FormatLong(val, flags, prec, type, &buf, &len);
+	if (!str)
+		return NULL;
+	result = PyUnicode_FromStringAndSize(buf, len);
+	Py_DECREF(str);
+	return result;
+}
+
+static int
+formatint(Py_UNICODE *buf,
+	  size_t buflen,
+	  int flags,
+	  int prec,
+	  int type,
+	  PyObject *v)
 {
-    PyUnicodeObject *u;
-    int i;
+    /* fmt = '%#.' + `prec` + 'l' + `type`
+     * worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
+     *                     + 1 + 1
+     *                   = 24
+     */
+    char fmt[64]; /* plenty big enough! */
+    char *sign;
+    long x;
 
-    Py_XDECREF(unicode_empty);
-    unicode_empty = NULL;
+    x = PyInt_AsLong(v);
+    if (x == -1 && PyErr_Occurred())
+        return -1;
+    if (x < 0 && type == 'u') {
+        type = 'd';
+    }
+    if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
+        sign = "-";
+    else
+        sign = "";
+    if (prec < 0)
+        prec = 1;
 
-    for (i = 0; i < 256; i++) {
-	if (unicode_latin1[i]) {
-	    Py_DECREF(unicode_latin1[i]);
-	    unicode_latin1[i] = NULL;
-	}
+    /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
+     * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
+     */
+    if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
+        PyErr_SetString(PyExc_OverflowError,
+    	        "formatted integer is too long (precision too large?)");
+        return -1;
     }
 
-    for (u = unicode_freelist; u != NULL;) {
-	PyUnicodeObject *v = u;
-	u = *(PyUnicodeObject **)u;
-	if (v->str)
-	    PyMem_DEL(v->str);
-	Py_XDECREF(v->defenc);
-	PyObject_Del(v);
+    if ((flags & F_ALT) &&
+        (type == 'x' || type == 'X' || type == 'o')) {
+        /* When converting under %#o, %#x or %#X, there are a number
+         * of issues that cause pain:
+	 * - for %#o, we want a different base marker than C
+         * - when 0 is being converted, the C standard leaves off
+         *   the '0x' or '0X', which is inconsistent with other
+         *   %#x/%#X conversions and inconsistent with Python's
+         *   hex() function
+         * - there are platforms that violate the standard and
+         *   convert 0 with the '0x' or '0X'
+         *   (Metrowerks, Compaq Tru64)
+         * - there are platforms that give '0x' when converting
+         *   under %#X, but convert 0 in accordance with the
+         *   standard (OS/2 EMX)
+         *
+         * We can achieve the desired consistency by inserting our
+         * own '0x' or '0X' prefix, and substituting %x/%X in place
+         * of %#x/%#X.
+         *
+         * Note that this is the same approach as used in
+         * formatint() in stringobject.c
+         */
+        PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
+                      sign, type, prec, type);
     }
-    unicode_freelist = NULL;
-    unicode_freelist_size = 0;
+    else {
+        PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
+                      sign, (flags&F_ALT) ? "#" : "",
+                      prec, type);
+    }
+    if (sign[0])
+        return longtounicode(buf, buflen, fmt, -x);
+    else
+        return longtounicode(buf, buflen, fmt, x);
 }
 
-void
-PyUnicode_InternInPlace(PyObject **p)
+static int
+formatchar(Py_UNICODE *buf,
+           size_t buflen,
+           PyObject *v)
 {
-	register PyUnicodeObject *s = (PyUnicodeObject *)(*p);
-	PyObject *t;
-	if (s == NULL || !PyUnicode_Check(s))
-		Py_FatalError(
-		    "PyUnicode_InternInPlace: unicode strings only please!");
-	/* If it's a subclass, we don't really know what putting
-	   it in the interned dict might do. */
-	if (!PyUnicode_CheckExact(s))
-		return;
-	if (PyUnicode_CHECK_INTERNED(s))
-		return;
-	if (interned == NULL) {
-		interned = PyDict_New();
-		if (interned == NULL) {
-			PyErr_Clear(); /* Don't leave an exception */
-			return;
-		}
-	}
-	/* It might be that the GetItem call fails even
-	   though the key is present in the dictionary,
-	   namely when this happens during a stack overflow. */
-	Py_ALLOW_RECURSION
-	t = PyDict_GetItem(interned, (PyObject *)s);
-	Py_END_ALLOW_RECURSION
+    /* presume that the buffer is at least 2 characters long */
+    if (PyUnicode_Check(v)) {
+	if (PyUnicode_GET_SIZE(v) != 1)
+	    goto onError;
+	buf[0] = PyUnicode_AS_UNICODE(v)[0];
+    }
 
-	if (t) {
-		Py_INCREF(t);
-		Py_DECREF(*p);
-		*p = t;
-		return;
-	}
+    else if (PyString_Check(v)) {
+	if (PyString_GET_SIZE(v) != 1)
+	    goto onError;
+	buf[0] = (Py_UNICODE)PyString_AS_STRING(v)[0];
+    }
 
-	PyThreadState_GET()->recursion_critical = 1;
-	if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
-		PyErr_Clear();
-		PyThreadState_GET()->recursion_critical = 0;
-		return;
+    else {
+	/* Integer input truncated to a character */
+        long x;
+	x = PyInt_AsLong(v);
+	if (x == -1 && PyErr_Occurred())
+	    goto onError;
+#ifdef Py_UNICODE_WIDE
+	if (x < 0 || x > 0x10ffff) {
+	    PyErr_SetString(PyExc_OverflowError,
+			    "%c arg not in range(0x110000) "
+			    "(wide Python build)");
+	    return -1;
 	}
-	PyThreadState_GET()->recursion_critical = 0;
-	/* The two references in interned are not counted by refcnt.
-	   The deallocator will take care of this */
-	Py_Refcnt(s) -= 2;
-	PyUnicode_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
+#else
+	if (x < 0 || x > 0xffff) {
+	    PyErr_SetString(PyExc_OverflowError,
+			    "%c arg not in range(0x10000) "
+			    "(narrow Python build)");
+	    return -1;
+	}
+#endif
+	buf[0] = (Py_UNICODE) x;
+    }
+    buf[1] = '\0';
+    return 1;
+
+ onError:
+    PyErr_SetString(PyExc_TypeError,
+		    "%c requires int or char");
+    return -1;
 }
 
-void
-PyUnicode_InternImmortal(PyObject **p)
-{
-	PyUnicode_InternInPlace(p);
-	if (PyUnicode_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
-		PyUnicode_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
-		Py_INCREF(*p);
-	}
-}
+/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
 
-PyObject *
-PyUnicode_InternFromString(const char *cp)
-{
-	PyObject *s = PyUnicode_FromString(cp);
-	if (s == NULL)
-		return NULL;
-	PyUnicode_InternInPlace(&s);
-	return s;
-}
+   FORMATBUFLEN is the length of the buffer in which the floats, ints, &
+   chars are formatted. XXX This is a magic number. Each formatting
+   routine does bounds checking to ensure no overflow, but a better
+   solution may be to malloc a buffer of appropriate size for each
+   format. For now, the current solution is sufficient.
+*/
+#define FORMATBUFLEN (size_t)120
 
-void _Py_ReleaseInternedUnicodeStrings(void)
+PyObject *PyUnicode_Format(PyObject *format,
+			   PyObject *args)
 {
-	PyObject *keys;
-	PyUnicodeObject *s;
-	Py_ssize_t i, n;
-	Py_ssize_t immortal_size = 0, mortal_size = 0;
-
-	if (interned == NULL || !PyDict_Check(interned))
-		return;
-	keys = PyDict_Keys(interned);
-	if (keys == NULL || !PyList_Check(keys)) {
-		PyErr_Clear();
-		return;
-	}
+    Py_UNICODE *fmt, *res;
+    Py_ssize_t fmtcnt, rescnt, reslen, arglen, argidx;
+    int args_owned = 0;
+    PyUnicodeObject *result = NULL;
+    PyObject *dict = NULL;
+    PyObject *uformat;
 
-	/* Since _Py_ReleaseInternedUnicodeStrings() is intended to help a leak
-	   detector, interned unicode strings are not forcibly deallocated;
-	   rather, we give them their stolen references back, and then clear
-	   and DECREF the interned dict. */
+    if (format == NULL || args == NULL) {
+	PyErr_BadInternalCall();
+	return NULL;
+    }
+    uformat = PyUnicode_FromObject(format);
+    if (uformat == NULL)
+	return NULL;
+    fmt = PyUnicode_AS_UNICODE(uformat);
+    fmtcnt = PyUnicode_GET_SIZE(uformat);
 
-	n = PyList_GET_SIZE(keys);
-	fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
-		n);
-	for (i = 0; i < n; i++) {
-		s = (PyUnicodeObject *) PyList_GET_ITEM(keys, i);
-		switch (s->state) {
-		case SSTATE_NOT_INTERNED:
-			/* XXX Shouldn't happen */
-			break;
-		case SSTATE_INTERNED_IMMORTAL:
-			Py_Refcnt(s) += 1;
-			immortal_size += s->length;
-			break;
-		case SSTATE_INTERNED_MORTAL:
-			Py_Refcnt(s) += 2;
-			mortal_size += s->length;
-			break;
-		default:
-			Py_FatalError("Inconsistent interned string state.");
-		}
-		s->state = SSTATE_NOT_INTERNED;
-	}
-	fprintf(stderr, "total size of all interned strings: "
-			"%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
-			"mortal/immortal\n", mortal_size, immortal_size);
-	Py_DECREF(keys);
-	PyDict_Clear(interned);
-	Py_DECREF(interned);
-	interned = NULL;
-}
+    reslen = rescnt = fmtcnt + 100;
+    result = _PyUnicode_New(reslen);
+    if (result == NULL)
+	goto onError;
+    res = PyUnicode_AS_UNICODE(result);
 
+    if (PyTuple_Check(args)) {
+	arglen = PyTuple_Size(args);
+	argidx = 0;
+    }
+    else {
+	arglen = -1;
+	argidx = -2;
+    }
+    if (Py_Type(args)->tp_as_mapping && !PyTuple_Check(args) &&
+        !PyObject_TypeCheck(args, &PyBaseString_Type))
+	dict = args;
 
-/********************* Formatter Iterator ************************/
+    while (--fmtcnt >= 0) {
+	if (*fmt != '%') {
+	    if (--rescnt < 0) {
+		rescnt = fmtcnt + 100;
+		reslen += rescnt;
+		if (_PyUnicode_Resize(&result, reslen) < 0)
+		    goto onError;
+		res = PyUnicode_AS_UNICODE(result) + reslen - rescnt;
+		--rescnt;
+	    }
+	    *res++ = *fmt++;
+	}
+	else {
+	    /* Got a format specifier */
+	    int flags = 0;
+	    Py_ssize_t width = -1;
+	    int prec = -1;
+	    Py_UNICODE c = '\0';
+	    Py_UNICODE fill;
+	    PyObject *v = NULL;
+	    PyObject *temp = NULL;
+	    Py_UNICODE *pbuf;
+	    Py_UNICODE sign;
+	    Py_ssize_t len;
+	    Py_UNICODE formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
 
-/* this is used to implement string.Formatter.vparse().  it exists so
-   Formatter can share code with the built in unicode.format()
-   method */
+	    fmt++;
+	    if (*fmt == '(') {
+		Py_UNICODE *keystart;
+		Py_ssize_t keylen;
+		PyObject *key;
+		int pcount = 1;
 
-typedef struct {
-	PyObject_HEAD
+		if (dict == NULL) {
+		    PyErr_SetString(PyExc_TypeError,
+				    "format requires a mapping");
+		    goto onError;
+		}
+		++fmt;
+		--fmtcnt;
+		keystart = fmt;
+		/* Skip over balanced parentheses */
+		while (pcount > 0 && --fmtcnt >= 0) {
+		    if (*fmt == ')')
+			--pcount;
+		    else if (*fmt == '(')
+			++pcount;
+		    fmt++;
+		}
+		keylen = fmt - keystart - 1;
+		if (fmtcnt < 0 || pcount > 0) {
+		    PyErr_SetString(PyExc_ValueError,
+				    "incomplete format key");
+		    goto onError;
+		}
+#if 0
+		/* keys are converted to strings using UTF-8 and
+		   then looked up since Python uses strings to hold
+		   variables names etc. in its namespaces and we
+		   wouldn't want to break common idioms. */
+		key = PyUnicode_EncodeUTF8(keystart,
+					   keylen,
+					   NULL);
+#else
+		key = PyUnicode_FromUnicode(keystart, keylen);
+#endif
+		if (key == NULL)
+		    goto onError;
+		if (args_owned) {
+		    Py_DECREF(args);
+		    args_owned = 0;
+		}
+		args = PyObject_GetItem(dict, key);
+		Py_DECREF(key);
+		if (args == NULL) {
+		    goto onError;
+		}
+		args_owned = 1;
+		arglen = -1;
+		argidx = -2;
+	    }
+	    while (--fmtcnt >= 0) {
+		switch (c = *fmt++) {
+		case '-': flags |= F_LJUST; continue;
+		case '+': flags |= F_SIGN; continue;
+		case ' ': flags |= F_BLANK; continue;
+		case '#': flags |= F_ALT; continue;
+		case '0': flags |= F_ZERO; continue;
+		}
+		break;
+	    }
+	    if (c == '*') {
+		v = getnextarg(args, arglen, &argidx);
+		if (v == NULL)
+		    goto onError;
+		if (!PyInt_Check(v)) {
+		    PyErr_SetString(PyExc_TypeError,
+				    "* wants int");
+		    goto onError;
+		}
+		width = PyInt_AsLong(v);
+		if (width == -1 && PyErr_Occurred())
+			goto onError;
+		if (width < 0) {
+		    flags |= F_LJUST;
+		    width = -width;
+		}
+		if (--fmtcnt >= 0)
+		    c = *fmt++;
+	    }
+	    else if (c >= '0' && c <= '9') {
+		width = c - '0';
+		while (--fmtcnt >= 0) {
+		    c = *fmt++;
+		    if (c < '0' || c > '9')
+			break;
+		    if ((width*10) / 10 != width) {
+			PyErr_SetString(PyExc_ValueError,
+					"width too big");
+			goto onError;
+		    }
+		    width = width*10 + (c - '0');
+		}
+	    }
+	    if (c == '.') {
+		prec = 0;
+		if (--fmtcnt >= 0)
+		    c = *fmt++;
+		if (c == '*') {
+		    v = getnextarg(args, arglen, &argidx);
+		    if (v == NULL)
+			goto onError;
+		    if (!PyInt_Check(v)) {
+			PyErr_SetString(PyExc_TypeError,
+					"* wants int");
+			goto onError;
+		    }
+		    prec = PyInt_AsLong(v);
+		    if (prec == -1 && PyErr_Occurred())
+			goto onError;
+		    if (prec < 0)
+			prec = 0;
+		    if (--fmtcnt >= 0)
+			c = *fmt++;
+		}
+		else if (c >= '0' && c <= '9') {
+		    prec = c - '0';
+		    while (--fmtcnt >= 0) {
+			c = Py_CHARMASK(*fmt++);
+			if (c < '0' || c > '9')
+			    break;
+			if ((prec*10) / 10 != prec) {
+			    PyErr_SetString(PyExc_ValueError,
+					    "prec too big");
+			    goto onError;
+			}
+			prec = prec*10 + (c - '0');
+		    }
+		}
+	    } /* prec */
+	    if (fmtcnt >= 0) {
+		if (c == 'h' || c == 'l' || c == 'L') {
+		    if (--fmtcnt >= 0)
+			c = *fmt++;
+		}
+	    }
+	    if (fmtcnt < 0) {
+		PyErr_SetString(PyExc_ValueError,
+				"incomplete format");
+		goto onError;
+	    }
+	    if (c != '%') {
+		v = getnextarg(args, arglen, &argidx);
+		if (v == NULL)
+		    goto onError;
+	    }
+	    sign = 0;
+	    fill = ' ';
+	    switch (c) {
 
-        /* we know this to be a unicode object, but since we just keep
-           it around to keep the object alive, having it as PyObject
-           is okay */
-        PyObject *str;
+	    case '%':
+		pbuf = formatbuf;
+		/* presume that buffer length is at least 1 */
+		pbuf[0] = '%';
+		len = 1;
+		break;
 
-        MarkupIterator it_markup;
-} formatteriterobject;
+	    case 's':
+	    case 'r':
+		if (PyUnicode_Check(v) && c == 's') {
+		    temp = v;
+		    Py_INCREF(temp);
+		}
+		else {
+		    PyObject *unicode;
+		    if (c == 's')
+			temp = PyObject_Unicode(v);
+		    else
+			temp = PyObject_Repr(v);
+		    if (temp == NULL)
+			goto onError;
+                    if (PyUnicode_Check(temp))
+                        /* nothing to do */;
+                    else if (PyString_Check(temp)) {
+                        /* convert to string to Unicode */
+		        unicode = PyUnicode_Decode(PyString_AS_STRING(temp),
+						   PyString_GET_SIZE(temp),
+						   NULL,
+						   "strict");
+		        Py_DECREF(temp);
+		        temp = unicode;
+		        if (temp == NULL)
+			    goto onError;
+		    }
+		    else {
+			Py_DECREF(temp);
+			PyErr_SetString(PyExc_TypeError,
+					"%s argument has non-string str()");
+			goto onError;
+		    }
+		}
+		pbuf = PyUnicode_AS_UNICODE(temp);
+		len = PyUnicode_GET_SIZE(temp);
+		if (prec >= 0 && len > prec)
+		    len = prec;
+		break;
 
-static void
-formatteriter_dealloc(formatteriterobject *it)
-{
-        Py_XDECREF(it->str);
-	PyObject_FREE(it);
-}
+	    case 'i':
+	    case 'd':
+	    case 'u':
+	    case 'o':
+	    case 'x':
+	    case 'X':
+		if (c == 'i')
+		    c = 'd';
+		if (PyLong_Check(v)) {
+		    temp = formatlong(v, flags, prec, c);
+		    if (!temp)
+			goto onError;
+		    pbuf = PyUnicode_AS_UNICODE(temp);
+		    len = PyUnicode_GET_SIZE(temp);
+		    sign = 1;
+		}
+		else {
+		    pbuf = formatbuf;
+		    len = formatint(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE),
+				    flags, prec, c, v);
+		    if (len < 0)
+			goto onError;
+		    sign = 1;
+		}
+		if (flags & F_ZERO)
+		    fill = '0';
+		break;
 
-/* returns a tuple:
-   (is_markup, literal, field_name, format_spec, conversion)
-   if is_markup == True:
-        literal is None
-        field_name is the string before the ':'
-        format_spec is the string after the ':'
-        conversion is either None, or the string after the '!'
-   if is_markup == False:
-        literal is the literal string
-        field_name is None
-        format_spec is None
-        conversion is None
-*/
-static PyObject *
-formatteriter_next(formatteriterobject *it)
-{
-        SubString literal;
-        SubString field_name;
-        SubString format_spec;
-        Py_UNICODE conversion;
-        int is_markup;
-        int format_spec_needs_expanding;
-        int result = MarkupIterator_next(&it->it_markup, &is_markup, &literal,
-                                         &field_name, &format_spec, &conversion,
-                                         &format_spec_needs_expanding);
+	    case 'e':
+	    case 'E':
+	    case 'f':
+	    case 'F':
+	    case 'g':
+	    case 'G':
+		if (c == 'F')
+			c = 'f';
+		pbuf = formatbuf;
+		len = formatfloat(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE),
+			flags, prec, c, v);
+		if (len < 0)
+		    goto onError;
+		sign = 1;
+		if (flags & F_ZERO)
+		    fill = '0';
+		break;
 
-        /* all of the SubString objects point into it->str, so no
-           memory management needs to be done on them */
-        assert(0 <= result && result <= 2);
-        if (result == 0) {
-                /* error has already been set */
-                return NULL;
-        } else if (result == 1) {
-                /* end of iterator */
-                return NULL;
-        } else {
-                PyObject *is_markup_bool = NULL;
-                PyObject *literal_str = NULL;
-                PyObject *field_name_str = NULL;
-                PyObject *format_spec_str = NULL;
-                PyObject *conversion_str = NULL;
-                PyObject *tuple = NULL;
+	    case 'c':
+		pbuf = formatbuf;
+		len = formatchar(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE), v);
+		if (len < 0)
+		    goto onError;
+		break;
 
-                is_markup_bool = PyBool_FromLong(is_markup);
-                if (!is_markup_bool)
-                    return NULL;
+	    default:
+		PyErr_Format(PyExc_ValueError,
+			     "unsupported format character '%c' (0x%x) "
+			     "at index %zd",
+			     (31<=c && c<=126) ? (char)c : '?',
+                             (int)c,
+			     (Py_ssize_t)(fmt - 1 -
+					  PyUnicode_AS_UNICODE(uformat)));
+		goto onError;
+	    }
+	    if (sign) {
+		if (*pbuf == '-' || *pbuf == '+') {
+		    sign = *pbuf++;
+		    len--;
+		}
+		else if (flags & F_SIGN)
+		    sign = '+';
+		else if (flags & F_BLANK)
+		    sign = ' ';
+		else
+		    sign = 0;
+	    }
+	    if (width < len)
+		width = len;
+	    if (rescnt - (sign != 0) < width) {
+		reslen -= rescnt;
+		rescnt = width + fmtcnt + 100;
+		reslen += rescnt;
+		if (reslen < 0) {
+		    Py_XDECREF(temp);
+		    PyErr_NoMemory();
+		    goto onError;
+		}
+		if (_PyUnicode_Resize(&result, reslen) < 0) {
+		    Py_XDECREF(temp);
+		    goto onError;
+		}
+		res = PyUnicode_AS_UNICODE(result)
+		    + reslen - rescnt;
+	    }
+	    if (sign) {
+		if (fill != ' ')
+		    *res++ = sign;
+		rescnt--;
+		if (width > len)
+		    width--;
+	    }
+	    if ((flags & F_ALT) && (c == 'x' || c == 'X' || c == 'o')) {
+		assert(pbuf[0] == '0');
+		assert(pbuf[1] == c);
+		if (fill != ' ') {
+		    *res++ = *pbuf++;
+		    *res++ = *pbuf++;
+		}
+		rescnt -= 2;
+		width -= 2;
+		if (width < 0)
+		    width = 0;
+		len -= 2;
+	    }
+	    if (width > len && !(flags & F_LJUST)) {
+		do {
+		    --rescnt;
+		    *res++ = fill;
+		} while (--width > len);
+	    }
+	    if (fill == ' ') {
+		if (sign)
+		    *res++ = sign;
+		if ((flags & F_ALT) && (c == 'x' || c == 'X' || c == 'o')) {
+		    assert(pbuf[0] == '0');
+		    assert(pbuf[1] == c);
+		    *res++ = *pbuf++;
+		    *res++ = *pbuf++;
+		}
+	    }
+	    Py_UNICODE_COPY(res, pbuf, len);
+	    res += len;
+	    rescnt -= len;
+	    while (--width >= len) {
+		--rescnt;
+		*res++ = ' ';
+	    }
+	    if (dict && (argidx < arglen) && c != '%') {
+		PyErr_SetString(PyExc_TypeError,
+				"not all arguments converted during string formatting");
+                Py_XDECREF(temp);
+		goto onError;
+	    }
+	    Py_XDECREF(temp);
+	} /* '%' */
+    } /* until end */
+    if (argidx < arglen && !dict) {
+	PyErr_SetString(PyExc_TypeError,
+			"not all arguments converted during string formatting");
+	goto onError;
+    }
 
-                if (is_markup) {
-                        /* field_name, format_spec, and conversion are
-                           returned */
-                        literal_str = Py_None;
-                        Py_INCREF(literal_str);
+    if (_PyUnicode_Resize(&result, reslen - rescnt) < 0)
+	goto onError;
+    if (args_owned) {
+	Py_DECREF(args);
+    }
+    Py_DECREF(uformat);
+    return (PyObject *)result;
 
-                        field_name_str = SubString_new_object(&field_name);
-                        if (field_name_str == NULL)
-                                goto error;
+ onError:
+    Py_XDECREF(result);
+    Py_DECREF(uformat);
+    if (args_owned) {
+	Py_DECREF(args);
+    }
+    return NULL;
+}
 
-                        format_spec_str = SubString_new_object(&format_spec);
-                        if (format_spec_str == NULL)
-                                goto error;
+static PyBufferProcs unicode_as_buffer = {
+    (getbufferproc) unicode_buffer_getbuffer,
+    NULL,
+};
 
-                        /* if the conversion is not specified, return
-                           a None, otherwise create a one length
-                           string with the conversion characater */
-                        if (conversion == '\0') {
-                                conversion_str = Py_None;
-                                Py_INCREF(conversion_str);
-                        } else
-                            conversion_str = PyUnicode_FromUnicode(&conversion,
-                                                                   1);
-                        if (conversion_str == NULL)
-                                goto error;
-                } else {
-                        /* only literal is returned */
-                        literal_str = SubString_new_object(&literal);
-                        if (literal_str == NULL)
-                                goto error;
+static PyObject *
+unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
 
-                        field_name_str = Py_None;
-                        format_spec_str = Py_None;
-                        conversion_str = Py_None;
+static PyObject *
+unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+        PyObject *x = NULL;
+	static char *kwlist[] = {"object", "encoding", "errors", 0};
+	char *encoding = NULL;
+	char *errors = NULL;
 
-                        Py_INCREF(field_name_str);
-                        Py_INCREF(format_spec_str);
-                        Py_INCREF(conversion_str);
-                }
-                tuple = PyTuple_Pack(5, is_markup_bool, literal_str,
-                                     field_name_str, format_spec_str,
-                                     conversion_str);
-        error:
-                Py_XDECREF(is_markup_bool);
-                Py_XDECREF(literal_str);
-                Py_XDECREF(field_name_str);
-                Py_XDECREF(format_spec_str);
-                Py_XDECREF(conversion_str);
-                return tuple;
-        }
+	if (type != &PyUnicode_Type)
+		return unicode_subtype_new(type, args, kwds);
+	if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:unicode",
+					  kwlist, &x, &encoding, &errors))
+	    return NULL;
+	if (x == NULL)
+		return (PyObject *)_PyUnicode_New(0);
+	if (encoding == NULL && errors == NULL)
+	    return PyObject_Unicode(x);
+	else
+	return PyUnicode_FromEncodedObject(x, encoding, errors);
 }
 
-static PyMethodDef formatteriter_methods[] = {
- 	{NULL,		NULL}		/* sentinel */
-};
-
-PyTypeObject PyFormatterIter_Type = {
-	PyVarObject_HEAD_INIT(&PyType_Type, 0)
-	"formatteriterator",			/* tp_name */
-	sizeof(formatteriterobject),		/* tp_basicsize */
-	0,					/* tp_itemsize */
-	/* methods */
-	(destructor)formatteriter_dealloc,	/* tp_dealloc */
-	0,					/* tp_print */
-	0,					/* tp_getattr */
-	0,					/* tp_setattr */
-	0,					/* tp_compare */
-	0,					/* tp_repr */
-	0,					/* tp_as_number */
-	0,					/* tp_as_sequence */
-	0,					/* tp_as_mapping */
-	0,					/* tp_hash */
-	0,					/* tp_call */
-	0,					/* tp_str */
-	PyObject_GenericGetAttr,		/* tp_getattro */
-	0,					/* tp_setattro */
-	0,					/* tp_as_buffer */
-	Py_TPFLAGS_DEFAULT,			/* tp_flags */
-	0,					/* tp_doc */
-	0,					/* tp_traverse */
-	0,					/* tp_clear */
-	0,					/* tp_richcompare */
-	0,					/* tp_weaklistoffset */
-	PyObject_SelfIter,			/* tp_iter */
-	(iternextfunc)formatteriter_next,	/* tp_iternext */
-	formatteriter_methods,			/* tp_methods */
-	0,
-};
-
-PyObject *
-_PyUnicode_FormatterIterator(PyObject *str)
+static PyObject *
+unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 {
-        formatteriterobject *it;
+	PyUnicodeObject *tmp, *pnew;
+	Py_ssize_t n;
 
-	assert(PyUnicode_Check(str));
-	it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
-	if (it == NULL)
+	assert(PyType_IsSubtype(type, &PyUnicode_Type));
+	tmp = (PyUnicodeObject *)unicode_new(&PyUnicode_Type, args, kwds);
+	if (tmp == NULL)
+		return NULL;
+	assert(PyUnicode_Check(tmp));
+	pnew = (PyUnicodeObject *) type->tp_alloc(type, n = tmp->length);
+	if (pnew == NULL) {
+		Py_DECREF(tmp);
 		return NULL;
+	}
+	pnew->str = PyMem_NEW(Py_UNICODE, n+1);
+	if (pnew->str == NULL) {
+		_Py_ForgetReference((PyObject *)pnew);
+		PyObject_Del(pnew);
+		Py_DECREF(tmp);
+		return PyErr_NoMemory();
+	}
+	Py_UNICODE_COPY(pnew->str, tmp->str, n+1);
+	pnew->length = n;
+	pnew->hash = tmp->hash;
+	Py_DECREF(tmp);
+	return (PyObject *)pnew;
+}
 
-        /* take ownership, give the object to the iterator */
-        Py_INCREF(str);
-        it->str = str;
+PyDoc_STRVAR(unicode_doc,
+"str(string [, encoding[, errors]]) -> object\n\
+\n\
+Create a new string object from the given encoded string.\n\
+encoding defaults to the current default string encoding.\n\
+errors can be 'strict', 'replace' or 'ignore' and defaults to 'strict'.");
 
-        /* initialize the contained MarkupIterator */
-        MarkupIterator_init(&it->it_markup,
-                            PyUnicode_AS_UNICODE(str),
-                            PyUnicode_GET_SIZE(str));
+static PyObject *unicode_iter(PyObject *seq);
 
-	return (PyObject *)it;
-}
+PyTypeObject PyUnicode_Type = {
+    PyVarObject_HEAD_INIT(&PyType_Type, 0)
+    "str", 				/* tp_name */
+    sizeof(PyUnicodeObject), 		/* tp_size */
+    0, 					/* tp_itemsize */
+    /* Slots */
+    (destructor)unicode_dealloc, 	/* tp_dealloc */
+    0, 					/* tp_print */
+    0,				 	/* tp_getattr */
+    0, 					/* tp_setattr */
+    0, 					/* tp_compare */
+    unicode_repr, 			/* tp_repr */
+    &unicode_as_number, 		/* tp_as_number */
+    &unicode_as_sequence, 		/* tp_as_sequence */
+    &unicode_as_mapping, 		/* tp_as_mapping */
+    (hashfunc) unicode_hash, 		/* tp_hash*/
+    0, 					/* tp_call*/
+    (reprfunc) unicode_str,	 	/* tp_str */
+    PyObject_GenericGetAttr, 		/* tp_getattro */
+    0,			 		/* tp_setattro */
+    &unicode_as_buffer,			/* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | 
+        Py_TPFLAGS_UNICODE_SUBCLASS,	/* tp_flags */
+    unicode_doc,			/* tp_doc */
+    0,					/* tp_traverse */
+    0,					/* tp_clear */
+    PyUnicode_RichCompare,		/* tp_richcompare */
+    0,					/* tp_weaklistoffset */
+    unicode_iter,			/* tp_iter */
+    0,					/* tp_iternext */
+    unicode_methods,			/* tp_methods */
+    0,					/* tp_members */
+    0,					/* tp_getset */
+    &PyBaseString_Type,			/* tp_base */
+    0,					/* tp_dict */
+    0,					/* tp_descr_get */
+    0,					/* tp_descr_set */
+    0,					/* tp_dictoffset */
+    0,					/* tp_init */
+    0,					/* tp_alloc */
+    unicode_new,			/* tp_new */
+    PyObject_Del,      		/* tp_free */
+};
 
-/********************* FieldName Iterator ************************/
+/* Initialize the Unicode implementation */
 
-/* this is used to implement string.Formatter.vparse().  it parses
-   the field name into attribute and item values. */
+void _PyUnicode_Init(void)
+{
+    int i;
 
-typedef struct {
-	PyObject_HEAD
+    /* XXX - move this array to unicodectype.c ? */
+    Py_UNICODE linebreak[] = {
+        0x000A, /* LINE FEED */
+        0x000D, /* CARRIAGE RETURN */
+        0x001C, /* FILE SEPARATOR */
+        0x001D, /* GROUP SEPARATOR */
+        0x001E, /* RECORD SEPARATOR */
+        0x0085, /* NEXT LINE */
+        0x2028, /* LINE SEPARATOR */
+        0x2029, /* PARAGRAPH SEPARATOR */
+    };
+
+    /* Init the implementation */
+    unicode_freelist = NULL;
+    unicode_freelist_size = 0;
+    unicode_empty = _PyUnicode_New(0);
+    if (!unicode_empty)
+	return;
 
-        /* we know this to be a unicode object, but since we just keep
-           it around to keep the object alive, having it as PyObject
-           is okay */
-        PyObject *str;
+    for (i = 0; i < 256; i++)
+	unicode_latin1[i] = NULL;
+    if (PyType_Ready(&PyUnicode_Type) < 0)
+	Py_FatalError("Can't initialize 'unicode'");
 
-        FieldNameIterator it_field;
-} fieldnameiterobject;
+    /* initialize the linebreak bloom filter */
+    bloom_linebreak = make_bloom_mask(
+        linebreak, sizeof(linebreak) / sizeof(linebreak[0])
+        );
 
-static void
-fieldnameiter_dealloc(fieldnameiterobject *it)
-{
-        Py_XDECREF(it->str);
-	PyObject_FREE(it);
+    PyType_Ready(&EncodingMapType);
 }
 
-/* returns a tuple:
-   (is_attr, value)
-   is_attr is true if we used attribute syntax (e.g., '.foo')
-              false if we used index syntax (e.g., '[foo]')
-   value is an integer or string
-*/
-static PyObject *
-fieldnameiter_next(fieldnameiterobject *it)
+/* Finalize the Unicode implementation */
+
+void
+_PyUnicode_Fini(void)
 {
-        int result;
-        int is_attr;
-        Py_ssize_t idx;
-        SubString name;
+    PyUnicodeObject *u;
+    int i;
 
-        result = FieldNameIterator_next(&it->it_field, &is_attr,
-                                            &idx, &name);
-        if (result == 0 || result == 1) {
-                /* if 0, error has already been set, if 1, iterator is empty */
-                return NULL;
-        } else {
-                PyObject* result = NULL;
-                PyObject* is_attr_obj = NULL;
-                PyObject* obj = NULL;
+    Py_XDECREF(unicode_empty);
+    unicode_empty = NULL;
 
-                is_attr_obj = PyBool_FromLong(is_attr);
-                if (is_attr_obj == NULL)
-                        goto error;
+    for (i = 0; i < 256; i++) {
+	if (unicode_latin1[i]) {
+	    Py_DECREF(unicode_latin1[i]);
+	    unicode_latin1[i] = NULL;
+	}
+    }
 
-                /* either an integer or a string */
-                if (idx != -1)
-                        obj = PyInt_FromSsize_t(idx);
-                else
-                        obj = STRINGLIB_NEW(name.ptr, name.end - name.ptr);
-                if (obj == NULL)
-                        goto error;
+    for (u = unicode_freelist; u != NULL;) {
+	PyUnicodeObject *v = u;
+	u = *(PyUnicodeObject **)u;
+	if (v->str)
+	    PyMem_DEL(v->str);
+	Py_XDECREF(v->defenc);
+	PyObject_Del(v);
+    }
+    unicode_freelist = NULL;
+    unicode_freelist_size = 0;
+}
 
-               /* return a tuple of values */
-                result = PyTuple_Pack(2, is_attr_obj, obj);
-                if (result == NULL)
-                        goto error;
+void
+PyUnicode_InternInPlace(PyObject **p)
+{
+	register PyUnicodeObject *s = (PyUnicodeObject *)(*p);
+	PyObject *t;
+	if (s == NULL || !PyUnicode_Check(s))
+		Py_FatalError(
+		    "PyUnicode_InternInPlace: unicode strings only please!");
+	/* If it's a subclass, we don't really know what putting
+	   it in the interned dict might do. */
+	if (!PyUnicode_CheckExact(s))
+		return;
+	if (PyUnicode_CHECK_INTERNED(s))
+		return;
+	if (interned == NULL) {
+		interned = PyDict_New();
+		if (interned == NULL) {
+			PyErr_Clear(); /* Don't leave an exception */
+			return;
+		}
+	}
+	/* It might be that the GetItem call fails even
+	   though the key is present in the dictionary,
+	   namely when this happens during a stack overflow. */
+	Py_ALLOW_RECURSION
+	t = PyDict_GetItem(interned, (PyObject *)s);
+	Py_END_ALLOW_RECURSION
 
-                return result;
+	if (t) {
+		Py_INCREF(t);
+		Py_DECREF(*p);
+		*p = t;
+		return;
+	}
 
-        error:
-                Py_XDECREF(result);
-                Py_XDECREF(is_attr_obj);
-                Py_XDECREF(obj);
-                return NULL;
-        }
-        return NULL;
+	PyThreadState_GET()->recursion_critical = 1;
+	if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
+		PyErr_Clear();
+		PyThreadState_GET()->recursion_critical = 0;
+		return;
+	}
+	PyThreadState_GET()->recursion_critical = 0;
+	/* The two references in interned are not counted by refcnt.
+	   The deallocator will take care of this */
+	Py_Refcnt(s) -= 2;
+	PyUnicode_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
 }
 
-static PyMethodDef fieldnameiter_methods[] = {
- 	{NULL,		NULL}		/* sentinel */
-};
-
-static PyTypeObject PyFieldNameIter_Type = {
-	PyVarObject_HEAD_INIT(&PyType_Type, 0)
-	"fieldnameiterator",			/* tp_name */
-	sizeof(fieldnameiterobject),		/* tp_basicsize */
-	0,					/* tp_itemsize */
-	/* methods */
-	(destructor)fieldnameiter_dealloc,	/* tp_dealloc */
-	0,					/* tp_print */
-	0,					/* tp_getattr */
-	0,					/* tp_setattr */
-	0,					/* tp_compare */
-	0,					/* tp_repr */
-	0,					/* tp_as_number */
-	0,					/* tp_as_sequence */
-	0,					/* tp_as_mapping */
-	0,					/* tp_hash */
-	0,					/* tp_call */
-	0,					/* tp_str */
-	PyObject_GenericGetAttr,		/* tp_getattro */
-	0,					/* tp_setattro */
-	0,					/* tp_as_buffer */
-	Py_TPFLAGS_DEFAULT,			/* tp_flags */
-	0,					/* tp_doc */
-	0,					/* tp_traverse */
-	0,					/* tp_clear */
-	0,					/* tp_richcompare */
-	0,					/* tp_weaklistoffset */
-	PyObject_SelfIter,			/* tp_iter */
-	(iternextfunc)fieldnameiter_next,	/* tp_iternext */
-	fieldnameiter_methods,			/* tp_methods */
-        0};
+void
+PyUnicode_InternImmortal(PyObject **p)
+{
+	PyUnicode_InternInPlace(p);
+	if (PyUnicode_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
+		PyUnicode_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
+		Py_INCREF(*p);
+	}
+}
 
 PyObject *
-_PyUnicode_FormatterFieldNameSplit(PyObject *field_name)
+PyUnicode_InternFromString(const char *cp)
 {
-        SubString first;
-        Py_ssize_t first_idx;
-        fieldnameiterobject *it;
-
-        PyObject *first_obj = NULL;
-        PyObject *result = NULL;
-
-        assert(PyUnicode_Check(field_name));
-        it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
-        if (it == NULL)
-                return NULL;
-
-        /* take ownership, give the object to the iterator.  this is
-           just to keep the field_name alive */
-        Py_INCREF(field_name);
-        it->str = field_name;
+	PyObject *s = PyUnicode_FromString(cp);
+	if (s == NULL)
+		return NULL;
+	PyUnicode_InternInPlace(&s);
+	return s;
+}
 
-        if (!field_name_split(STRINGLIB_STR(field_name),
-                              STRINGLIB_LEN(field_name),
-                              &first, &first_idx, &it->it_field))
-                goto error;
+void _Py_ReleaseInternedUnicodeStrings(void)
+{
+	PyObject *keys;
+	PyUnicodeObject *s;
+	Py_ssize_t i, n;
+	Py_ssize_t immortal_size = 0, mortal_size = 0;
 
-        /* first becomes an integer, if possible, else a string */
-        if (first_idx != -1)
-                first_obj = PyInt_FromSsize_t(first_idx);
-        else
-                /* convert "first" into a string object */
-                first_obj = STRINGLIB_NEW(first.ptr, first.end - first.ptr);
-        if (first_obj == NULL)
-                goto error;
+	if (interned == NULL || !PyDict_Check(interned))
+		return;
+	keys = PyDict_Keys(interned);
+	if (keys == NULL || !PyList_Check(keys)) {
+		PyErr_Clear();
+		return;
+	}
 
-        /* return a tuple of values */
-        result = PyTuple_Pack(2, first_obj, it);
+	/* Since _Py_ReleaseInternedUnicodeStrings() is intended to help a leak
+	   detector, interned unicode strings are not forcibly deallocated;
+	   rather, we give them their stolen references back, and then clear
+	   and DECREF the interned dict. */
 
-error:
-        Py_XDECREF(it);
-        Py_XDECREF(first_obj);
-        return result;
+	n = PyList_GET_SIZE(keys);
+	fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
+		n);
+	for (i = 0; i < n; i++) {
+		s = (PyUnicodeObject *) PyList_GET_ITEM(keys, i);
+		switch (s->state) {
+		case SSTATE_NOT_INTERNED:
+			/* XXX Shouldn't happen */
+			break;
+		case SSTATE_INTERNED_IMMORTAL:
+			Py_Refcnt(s) += 1;
+			immortal_size += s->length;
+			break;
+		case SSTATE_INTERNED_MORTAL:
+			Py_Refcnt(s) += 2;
+			mortal_size += s->length;
+			break;
+		default:
+			Py_FatalError("Inconsistent interned string state.");
+		}
+		s->state = SSTATE_NOT_INTERNED;
+	}
+	fprintf(stderr, "total size of all interned strings: "
+			"%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
+			"mortal/immortal\n", mortal_size, immortal_size);
+	Py_DECREF(keys);
+	PyDict_Clear(interned);
+	Py_DECREF(interned);
+	interned = NULL;
 }
 
+
 /********************* Unicode Iterator **************************/
 
 typedef struct {

Modified: python/branches/py3k/Python/sysmodule.c
==============================================================================
--- python/branches/py3k/Python/sysmodule.c	(original)
+++ python/branches/py3k/Python/sysmodule.c	Mon Aug 27 13:28:18 2007
@@ -660,54 +660,6 @@
 	return _PyThread_CurrentFrames();
 }
 
-/* sys_formatter_iterator is used to implement
-   string.Formatter.vformat.  it parses a string and returns tuples
-   describing the parsed elements.  see unicodeobject.c's
-   _PyUnicode_FormatterIterator for details */
-static PyObject *
-sys_formatter_iterator(PyObject *self, PyObject *args)
-{
-        /* in 2.6, check type and dispatch to unicode or string
-           accordingly */
-        PyObject *str;
-
-        if (!PyArg_ParseTuple(args, "O:_formatter_iterator", &str))
-                return NULL;
-
-        if (!PyUnicode_Check(str)) {
-                PyErr_SetString(PyExc_TypeError,
-                                "_formatter_iterator expects unicode object");
-                return NULL;
-        }
-
-        return _PyUnicode_FormatterIterator(str);
-}
-
-/* sys_formatter_field_name_split is used to implement
-   string.Formatter.vformat.  it takes an PEP 3101 "field name", and
-   returns a tuple of (first, rest): "first", the part before the
-   first '.' or '['; and "rest", an iterator for the rest of the field
-   name.  see unicodeobjects' _PyUnicode_FormatterFieldNameSplit for
-   details */
-static PyObject *
-sys_formatter_field_name_split(PyObject *self, PyObject *args)
-{
-        PyObject *field_name;
-
-        if (!PyArg_ParseTuple(args, "O:_formatter_field_name_split",
-                              &field_name))
-                return NULL;
-
-        if (!PyUnicode_Check(field_name)) {
-                PyErr_SetString(PyExc_TypeError, "_formatter_field_name_split "
-                                "expects unicode object");
-                return NULL;
-        }
-
-        return _PyUnicode_FormatterFieldNameSplit(field_name);
-}
-
-
 PyDoc_STRVAR(call_tracing_doc,
 "call_tracing(func, args) -> object\n\
 \n\
@@ -772,9 +724,6 @@
 	 callstats_doc},
 	{"_current_frames", sys_current_frames, METH_NOARGS,
 	 current_frames_doc},
-        {"_formatter_parser", sys_formatter_iterator, METH_VARARGS},
-        {"_formatter_field_name_split", sys_formatter_field_name_split,
-         METH_VARARGS},
 	{"displayhook",	sys_displayhook, METH_O, displayhook_doc},
 	{"exc_info",	sys_exc_info, METH_NOARGS, exc_info_doc},
 	{"excepthook",	sys_excepthook, METH_VARARGS, excepthook_doc},


More information about the Python-3000-checkins mailing list