[Python-3000-checkins] r57556 - in python/branches/py3k: Include/unicodeobject.h Lib/string.py Objects/unicodeobject.c Python/sysmodule.c
eric.smith
python-3000-checkins at python.org
Mon Aug 27 13:28:18 CEST 2007
Author: eric.smith
Date: Mon Aug 27 13:28:18 2007
New Revision: 57556
Modified:
python/branches/py3k/Include/unicodeobject.h
python/branches/py3k/Lib/string.py
python/branches/py3k/Objects/unicodeobject.c
python/branches/py3k/Python/sysmodule.c
Log:
PEP 3101: Removed _formatter_xxx routines from sysmodule, and made them unicode methods instead (per GvR suggestion).
Modified: python/branches/py3k/Include/unicodeobject.h
==============================================================================
--- python/branches/py3k/Include/unicodeobject.h (original)
+++ python/branches/py3k/Include/unicodeobject.h Mon Aug 27 13:28:18 2007
@@ -1437,9 +1437,6 @@
const Py_UNICODE *s, Py_UNICODE c
);
-PyObject *_PyUnicode_FormatterIterator(PyObject *str);
-PyObject *_PyUnicode_FormatterFieldNameSplit(PyObject *field_name);
-
#ifdef __cplusplus
}
#endif
Modified: python/branches/py3k/Lib/string.py
==============================================================================
--- python/branches/py3k/Lib/string.py (original)
+++ python/branches/py3k/Lib/string.py Mon Aug 27 13:28:18 2007
@@ -200,10 +200,8 @@
# exposed here via the sys module. sys was chosen because it's always
# available and doesn't have to be dynamically loaded.
-# The overall parser is implemented in sys._formatter_parser.
-# The field name parser is implemented in sys._formatter_field_name_split
-
-from sys import _formatter_parser, _formatter_field_name_split
+# The overall parser is implemented in str._formatter_parser.
+# The field name parser is implemented in str._formatter_field_name_split
class Formatter:
def format(self, format_string, *args, **kwargs):
@@ -213,13 +211,13 @@
used_args = set()
result = []
for (is_markup, literal, field_name, format_spec, conversion) in \
- _formatter_parser(format_string):
+ format_string._formatter_parser():
if is_markup:
# given the field_name, find the object it references
# split it into the first part, and and iterator that
# looks over the rest
- first, rest = _formatter_field_name_split(field_name)
+ first, rest = field_name._formatter_field_name_split()
used_args.add(first)
obj = self.get_value(first, args, kwargs)
Modified: python/branches/py3k/Objects/unicodeobject.c
==============================================================================
--- python/branches/py3k/Objects/unicodeobject.c (original)
+++ python/branches/py3k/Objects/unicodeobject.c Mon Aug 27 13:28:18 2007
@@ -598,7 +598,7 @@
*/
if ((*f == 'l' || *f == 'z') &&
(f[1] == 'd' || f[1] == 'u'))
- ++f;
+ ++f;
switch (*f) {
case 'c':
@@ -7981,1502 +7981,1509 @@
}
-static PyObject *
-unicode_getnewargs(PyUnicodeObject *v)
-{
- return Py_BuildValue("(u#)", v->str, v->length);
-}
-
-
-static PyMethodDef unicode_methods[] = {
+/********************* Formatter Iterator ************************/
- /* Order is according to common usage: often used methods should
- appear first, since lookup is done sequentially. */
+/* this is used to implement string.Formatter.vparse(). it exists so
+ Formatter can share code with the built in unicode.format()
+ method */
- {"encode", (PyCFunction) unicode_encode, METH_VARARGS, encode__doc__},
- {"replace", (PyCFunction) unicode_replace, METH_VARARGS, replace__doc__},
- {"split", (PyCFunction) unicode_split, METH_VARARGS, split__doc__},
- {"rsplit", (PyCFunction) unicode_rsplit, METH_VARARGS, rsplit__doc__},
- {"join", (PyCFunction) unicode_join, METH_O, join__doc__},
- {"capitalize", (PyCFunction) unicode_capitalize, METH_NOARGS, capitalize__doc__},
- {"title", (PyCFunction) unicode_title, METH_NOARGS, title__doc__},
- {"center", (PyCFunction) unicode_center, METH_VARARGS, center__doc__},
- {"count", (PyCFunction) unicode_count, METH_VARARGS, count__doc__},
- {"expandtabs", (PyCFunction) unicode_expandtabs, METH_VARARGS, expandtabs__doc__},
- {"find", (PyCFunction) unicode_find, METH_VARARGS, find__doc__},
- {"partition", (PyCFunction) unicode_partition, METH_O, partition__doc__},
- {"index", (PyCFunction) unicode_index, METH_VARARGS, index__doc__},
- {"ljust", (PyCFunction) unicode_ljust, METH_VARARGS, ljust__doc__},
- {"lower", (PyCFunction) unicode_lower, METH_NOARGS, lower__doc__},
- {"lstrip", (PyCFunction) unicode_lstrip, METH_VARARGS, lstrip__doc__},
- {"decode", (PyCFunction) unicode_decode, METH_VARARGS, decode__doc__},
-/* {"maketrans", (PyCFunction) unicode_maketrans, METH_VARARGS, maketrans__doc__}, */
- {"rfind", (PyCFunction) unicode_rfind, METH_VARARGS, rfind__doc__},
- {"rindex", (PyCFunction) unicode_rindex, METH_VARARGS, rindex__doc__},
- {"rjust", (PyCFunction) unicode_rjust, METH_VARARGS, rjust__doc__},
- {"rstrip", (PyCFunction) unicode_rstrip, METH_VARARGS, rstrip__doc__},
- {"rpartition", (PyCFunction) unicode_rpartition, METH_O, rpartition__doc__},
- {"splitlines", (PyCFunction) unicode_splitlines, METH_VARARGS, splitlines__doc__},
- {"strip", (PyCFunction) unicode_strip, METH_VARARGS, strip__doc__},
- {"swapcase", (PyCFunction) unicode_swapcase, METH_NOARGS, swapcase__doc__},
- {"translate", (PyCFunction) unicode_translate, METH_O, translate__doc__},
- {"upper", (PyCFunction) unicode_upper, METH_NOARGS, upper__doc__},
- {"startswith", (PyCFunction) unicode_startswith, METH_VARARGS, startswith__doc__},
- {"endswith", (PyCFunction) unicode_endswith, METH_VARARGS, endswith__doc__},
- {"islower", (PyCFunction) unicode_islower, METH_NOARGS, islower__doc__},
- {"isupper", (PyCFunction) unicode_isupper, METH_NOARGS, isupper__doc__},
- {"istitle", (PyCFunction) unicode_istitle, METH_NOARGS, istitle__doc__},
- {"isspace", (PyCFunction) unicode_isspace, METH_NOARGS, isspace__doc__},
- {"isdecimal", (PyCFunction) unicode_isdecimal, METH_NOARGS, isdecimal__doc__},
- {"isdigit", (PyCFunction) unicode_isdigit, METH_NOARGS, isdigit__doc__},
- {"isnumeric", (PyCFunction) unicode_isnumeric, METH_NOARGS, isnumeric__doc__},
- {"isalpha", (PyCFunction) unicode_isalpha, METH_NOARGS, isalpha__doc__},
- {"isalnum", (PyCFunction) unicode_isalnum, METH_NOARGS, isalnum__doc__},
- {"isidentifier", (PyCFunction) unicode_isidentifier, METH_NOARGS, isidentifier__doc__},
- {"zfill", (PyCFunction) unicode_zfill, METH_VARARGS, zfill__doc__},
- {"format", (PyCFunction) unicode_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
- {"__format__", (PyCFunction) unicode__format__, METH_VARARGS, p_format__doc__},
-#if 0
- {"capwords", (PyCFunction) unicode_capwords, METH_NOARGS, capwords__doc__},
-#endif
+typedef struct {
+ PyObject_HEAD
-#if 0
- /* This one is just used for debugging the implementation. */
- {"freelistsize", (PyCFunction) unicode_freelistsize, METH_NOARGS},
-#endif
+ PyUnicodeObject *str;
- {"__getnewargs__", (PyCFunction)unicode_getnewargs, METH_NOARGS},
- {NULL, NULL}
-};
+ MarkupIterator it_markup;
+} formatteriterobject;
-static PyObject *
-unicode_mod(PyObject *v, PyObject *w)
+static void
+formatteriter_dealloc(formatteriterobject *it)
{
- if (!PyUnicode_Check(v)) {
- Py_INCREF(Py_NotImplemented);
- return Py_NotImplemented;
- }
- return PyUnicode_Format(v, w);
+ Py_XDECREF(it->str);
+ PyObject_FREE(it);
}
-static PyNumberMethods unicode_as_number = {
- 0, /*nb_add*/
- 0, /*nb_subtract*/
- 0, /*nb_multiply*/
- unicode_mod, /*nb_remainder*/
-};
-
-static PySequenceMethods unicode_as_sequence = {
- (lenfunc) unicode_length, /* sq_length */
- PyUnicode_Concat, /* sq_concat */
- (ssizeargfunc) unicode_repeat, /* sq_repeat */
- (ssizeargfunc) unicode_getitem, /* sq_item */
- (ssizessizeargfunc) unicode_slice, /* sq_slice */
- 0, /* sq_ass_item */
- 0, /* sq_ass_slice */
- PyUnicode_Contains, /* sq_contains */
-};
-
-static PyObject*
-unicode_subscript(PyUnicodeObject* self, PyObject* item)
+/* returns a tuple:
+ (is_markup, literal, field_name, format_spec, conversion)
+ if is_markup == True:
+ literal is None
+ field_name is the string before the ':'
+ format_spec is the string after the ':'
+ conversion is either None, or the string after the '!'
+ if is_markup == False:
+ literal is the literal string
+ field_name is None
+ format_spec is None
+ conversion is None
+*/
+static PyObject *
+formatteriter_next(formatteriterobject *it)
{
- if (PyIndex_Check(item)) {
- Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
- if (i == -1 && PyErr_Occurred())
- return NULL;
- if (i < 0)
- i += PyUnicode_GET_SIZE(self);
- return unicode_getitem(self, i);
- } else if (PySlice_Check(item)) {
- Py_ssize_t start, stop, step, slicelength, cur, i;
- Py_UNICODE* source_buf;
- Py_UNICODE* result_buf;
- PyObject* result;
-
- if (PySlice_GetIndicesEx((PySliceObject*)item, PyUnicode_GET_SIZE(self),
- &start, &stop, &step, &slicelength) < 0) {
- return NULL;
- }
+ SubString literal;
+ SubString field_name;
+ SubString format_spec;
+ Py_UNICODE conversion;
+ int is_markup;
+ int format_spec_needs_expanding;
+ int result = MarkupIterator_next(&it->it_markup, &is_markup, &literal,
+ &field_name, &format_spec, &conversion,
+ &format_spec_needs_expanding);
- if (slicelength <= 0) {
- return PyUnicode_FromUnicode(NULL, 0);
+ /* all of the SubString objects point into it->str, so no
+ memory management needs to be done on them */
+ assert(0 <= result && result <= 2);
+ if (result == 0) {
+ /* error has already been set */
+ return NULL;
+ } else if (result == 1) {
+ /* end of iterator */
+ return NULL;
} else {
- source_buf = PyUnicode_AS_UNICODE((PyObject*)self);
- result_buf = (Py_UNICODE *)PyMem_MALLOC(slicelength*
- sizeof(Py_UNICODE));
-
- if (result_buf == NULL)
- return PyErr_NoMemory();
+ PyObject *is_markup_bool = NULL;
+ PyObject *literal_str = NULL;
+ PyObject *field_name_str = NULL;
+ PyObject *format_spec_str = NULL;
+ PyObject *conversion_str = NULL;
+ PyObject *tuple = NULL;
- for (cur = start, i = 0; i < slicelength; cur += step, i++) {
- result_buf[i] = source_buf[cur];
- }
+ is_markup_bool = PyBool_FromLong(is_markup);
+ if (!is_markup_bool)
+ return NULL;
- result = PyUnicode_FromUnicode(result_buf, slicelength);
- PyMem_FREE(result_buf);
- return result;
- }
- } else {
- PyErr_SetString(PyExc_TypeError, "string indices must be integers");
- return NULL;
- }
-}
+ if (is_markup) {
+ /* field_name, format_spec, and conversion are
+ returned */
+ literal_str = Py_None;
+ Py_INCREF(literal_str);
-static PyMappingMethods unicode_as_mapping = {
- (lenfunc)unicode_length, /* mp_length */
- (binaryfunc)unicode_subscript, /* mp_subscript */
- (objobjargproc)0, /* mp_ass_subscript */
-};
+ field_name_str = SubString_new_object(&field_name);
+ if (field_name_str == NULL)
+ goto error;
+ format_spec_str = SubString_new_object(&format_spec);
+ if (format_spec_str == NULL)
+ goto error;
-static int
-unicode_buffer_getbuffer(PyUnicodeObject *self, PyBuffer *view, int flags)
-{
+ /* if the conversion is not specified, return
+ a None, otherwise create a one length
+ string with the conversion characater */
+ if (conversion == '\0') {
+ conversion_str = Py_None;
+ Py_INCREF(conversion_str);
+ } else
+ conversion_str = PyUnicode_FromUnicode(&conversion,
+ 1);
+ if (conversion_str == NULL)
+ goto error;
+ } else {
+ /* only literal is returned */
+ literal_str = SubString_new_object(&literal);
+ if (literal_str == NULL)
+ goto error;
- if (flags & PyBUF_CHARACTER) {
- PyObject *str;
-
- str = _PyUnicode_AsDefaultEncodedString((PyObject *)self, NULL);
- if (str == NULL) return -1;
- return PyBuffer_FillInfo(view, (void *)PyString_AS_STRING(str),
- PyString_GET_SIZE(str), 1, flags);
- }
- else {
- return PyBuffer_FillInfo(view, (void *)self->str,
- PyUnicode_GET_DATA_SIZE(self), 1, flags);
- }
-}
+ field_name_str = Py_None;
+ format_spec_str = Py_None;
+ conversion_str = Py_None;
+ Py_INCREF(field_name_str);
+ Py_INCREF(format_spec_str);
+ Py_INCREF(conversion_str);
+ }
+ tuple = PyTuple_Pack(5, is_markup_bool, literal_str,
+ field_name_str, format_spec_str,
+ conversion_str);
+ error:
+ Py_XDECREF(is_markup_bool);
+ Py_XDECREF(literal_str);
+ Py_XDECREF(field_name_str);
+ Py_XDECREF(format_spec_str);
+ Py_XDECREF(conversion_str);
+ return tuple;
+ }
+}
-/* Helpers for PyUnicode_Format() */
+static PyMethodDef formatteriter_methods[] = {
+ {NULL, NULL} /* sentinel */
+};
-static PyObject *
-getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
-{
- Py_ssize_t argidx = *p_argidx;
- if (argidx < arglen) {
- (*p_argidx)++;
- if (arglen < 0)
- return args;
- else
- return PyTuple_GetItem(args, argidx);
- }
- PyErr_SetString(PyExc_TypeError,
- "not enough arguments for format string");
- return NULL;
-}
-
-#define F_LJUST (1<<0)
-#define F_SIGN (1<<1)
-#define F_BLANK (1<<2)
-#define F_ALT (1<<3)
-#define F_ZERO (1<<4)
+PyTypeObject PyFormatterIter_Type = {
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
+ "formatteriterator", /* tp_name */
+ sizeof(formatteriterobject), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ /* methods */
+ (destructor)formatteriter_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ PyObject_GenericGetAttr, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT, /* tp_flags */
+ 0, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ PyObject_SelfIter, /* tp_iter */
+ (iternextfunc)formatteriter_next, /* tp_iternext */
+ formatteriter_methods, /* tp_methods */
+ 0,
+};
-static Py_ssize_t
-strtounicode(Py_UNICODE *buffer, const char *charbuffer)
+/* unicode_formatter_parser is used to implement
+ string.Formatter.vformat. it parses a string and returns tuples
+ describing the parsed elements. It's a wrapper around
+ stringlib/string_format.h's MarkupIterator */
+static PyObject *
+unicode_formatter_parser(PyUnicodeObject *self)
{
- register Py_ssize_t i;
- Py_ssize_t len = strlen(charbuffer);
- for (i = len - 1; i >= 0; i--)
- buffer[i] = (Py_UNICODE) charbuffer[i];
+ formatteriterobject *it;
- return len;
-}
+ it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
+ if (it == NULL)
+ return NULL;
-static int
-doubletounicode(Py_UNICODE *buffer, size_t len, const char *format, double x)
-{
- Py_ssize_t result;
+ /* take ownership, give the object to the iterator */
+ Py_INCREF(self);
+ it->str = self;
- PyOS_ascii_formatd((char *)buffer, len, format, x);
- result = strtounicode(buffer, (char *)buffer);
- return Py_SAFE_DOWNCAST(result, Py_ssize_t, int);
+ /* initialize the contained MarkupIterator */
+ MarkupIterator_init(&it->it_markup,
+ PyUnicode_AS_UNICODE(self),
+ PyUnicode_GET_SIZE(self));
+
+ return (PyObject *)it;
}
+/***************** end Formatter Iterator ************************/
+/********************* FieldName Iterator ************************/
-static int
-longtounicode(Py_UNICODE *buffer, size_t len, const char *format, long x)
-{
- Py_ssize_t result;
+/* this is used to implement string.Formatter.vparse(). it parses
+ the field name into attribute and item values. */
- PyOS_snprintf((char *)buffer, len, format, x);
- result = strtounicode(buffer, (char *)buffer);
- return Py_SAFE_DOWNCAST(result, Py_ssize_t, int);
-}
+typedef struct {
+ PyObject_HEAD
-/* XXX To save some code duplication, formatfloat/long/int could have been
- shared with stringobject.c, converting from 8-bit to Unicode after the
- formatting is done. */
+ PyUnicodeObject *str;
-static int
-formatfloat(Py_UNICODE *buf,
- size_t buflen,
- int flags,
- int prec,
- int type,
- PyObject *v)
+ FieldNameIterator it_field;
+} fieldnameiterobject;
+
+static void
+fieldnameiter_dealloc(fieldnameiterobject *it)
{
- /* fmt = '%#.' + `prec` + `type`
- worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
- char fmt[20];
- double x;
+ Py_XDECREF(it->str);
+ PyObject_FREE(it);
+}
- x = PyFloat_AsDouble(v);
- if (x == -1.0 && PyErr_Occurred())
- return -1;
- if (prec < 0)
- prec = 6;
- if (type == 'f' && (fabs(x) / 1e25) >= 1e25)
- type = 'g';
- /* Worst case length calc to ensure no buffer overrun:
+/* returns a tuple:
+ (is_attr, value)
+ is_attr is true if we used attribute syntax (e.g., '.foo')
+ false if we used index syntax (e.g., '[foo]')
+ value is an integer or string
+*/
+static PyObject *
+fieldnameiter_next(fieldnameiterobject *it)
+{
+ int result;
+ int is_attr;
+ Py_ssize_t idx;
+ SubString name;
- 'g' formats:
- fmt = %#.<prec>g
- buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
- for any double rep.)
- len = 1 + prec + 1 + 2 + 5 = 9 + prec
+ result = FieldNameIterator_next(&it->it_field, &is_attr,
+ &idx, &name);
+ if (result == 0 || result == 1) {
+ /* if 0, error has already been set, if 1, iterator is empty */
+ return NULL;
+ } else {
+ PyObject* result = NULL;
+ PyObject* is_attr_obj = NULL;
+ PyObject* obj = NULL;
- 'f' formats:
- buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
- len = 1 + 50 + 1 + prec = 52 + prec
+ is_attr_obj = PyBool_FromLong(is_attr);
+ if (is_attr_obj == NULL)
+ goto error;
- If prec=0 the effective precision is 1 (the leading digit is
- always given), therefore increase the length by one.
+ /* either an integer or a string */
+ if (idx != -1)
+ obj = PyInt_FromSsize_t(idx);
+ else
+ obj = STRINGLIB_NEW(name.ptr, name.end - name.ptr);
+ if (obj == NULL)
+ goto error;
- */
- if (((type == 'g' || type == 'G') &&
- buflen <= (size_t)10 + (size_t)prec) ||
- (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
- PyErr_SetString(PyExc_OverflowError,
- "formatted float is too long (precision too large?)");
- return -1;
- }
- PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
- (flags&F_ALT) ? "#" : "",
- prec, type);
- return doubletounicode(buf, buflen, fmt, x);
-}
+ /* return a tuple of values */
+ result = PyTuple_Pack(2, is_attr_obj, obj);
+ if (result == NULL)
+ goto error;
-static PyObject*
-formatlong(PyObject *val, int flags, int prec, int type)
-{
- char *buf;
- int len;
- PyObject *str; /* temporary string object. */
- PyObject *result;
+ return result;
- str = _PyString_FormatLong(val, flags, prec, type, &buf, &len);
- if (!str)
- return NULL;
- result = PyUnicode_FromStringAndSize(buf, len);
- Py_DECREF(str);
- return result;
+ error:
+ Py_XDECREF(result);
+ Py_XDECREF(is_attr_obj);
+ Py_XDECREF(obj);
+ return NULL;
+ }
+ return NULL;
}
-static int
-formatint(Py_UNICODE *buf,
- size_t buflen,
- int flags,
- int prec,
- int type,
- PyObject *v)
+static PyMethodDef fieldnameiter_methods[] = {
+ {NULL, NULL} /* sentinel */
+};
+
+static PyTypeObject PyFieldNameIter_Type = {
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
+ "fieldnameiterator", /* tp_name */
+ sizeof(fieldnameiterobject), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ /* methods */
+ (destructor)fieldnameiter_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ PyObject_GenericGetAttr, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT, /* tp_flags */
+ 0, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ PyObject_SelfIter, /* tp_iter */
+ (iternextfunc)fieldnameiter_next, /* tp_iternext */
+ fieldnameiter_methods, /* tp_methods */
+ 0};
+
+/* unicode_formatter_field_name_split is used to implement
+ string.Formatter.vformat. it takes an PEP 3101 "field name", and
+ returns a tuple of (first, rest): "first", the part before the
+ first '.' or '['; and "rest", an iterator for the rest of the field
+ name. it's a wrapper around stringlib/string_format.h's
+ field_name_split. The iterator it returns is a
+ FieldNameIterator */
+static PyObject *
+unicode_formatter_field_name_split(PyUnicodeObject *self)
{
- /* fmt = '%#.' + `prec` + 'l' + `type`
- * worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
- * + 1 + 1
- * = 24
- */
- char fmt[64]; /* plenty big enough! */
- char *sign;
- long x;
+ SubString first;
+ Py_ssize_t first_idx;
+ fieldnameiterobject *it;
- x = PyInt_AsLong(v);
- if (x == -1 && PyErr_Occurred())
- return -1;
- if (x < 0 && type == 'u') {
- type = 'd';
- }
- if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
- sign = "-";
- else
- sign = "";
- if (prec < 0)
- prec = 1;
+ PyObject *first_obj = NULL;
+ PyObject *result = NULL;
- /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
- * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
- */
- if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
- PyErr_SetString(PyExc_OverflowError,
- "formatted integer is too long (precision too large?)");
- return -1;
- }
+ it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
+ if (it == NULL)
+ return NULL;
- if ((flags & F_ALT) &&
- (type == 'x' || type == 'X' || type == 'o')) {
- /* When converting under %#o, %#x or %#X, there are a number
- * of issues that cause pain:
- * - for %#o, we want a different base marker than C
- * - when 0 is being converted, the C standard leaves off
- * the '0x' or '0X', which is inconsistent with other
- * %#x/%#X conversions and inconsistent with Python's
- * hex() function
- * - there are platforms that violate the standard and
- * convert 0 with the '0x' or '0X'
- * (Metrowerks, Compaq Tru64)
- * - there are platforms that give '0x' when converting
- * under %#X, but convert 0 in accordance with the
- * standard (OS/2 EMX)
- *
- * We can achieve the desired consistency by inserting our
- * own '0x' or '0X' prefix, and substituting %x/%X in place
- * of %#x/%#X.
- *
- * Note that this is the same approach as used in
- * formatint() in stringobject.c
- */
- PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
- sign, type, prec, type);
- }
- else {
- PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
- sign, (flags&F_ALT) ? "#" : "",
- prec, type);
- }
- if (sign[0])
- return longtounicode(buf, buflen, fmt, -x);
- else
- return longtounicode(buf, buflen, fmt, x);
-}
+ /* take ownership, give the object to the iterator. this is
+ just to keep the field_name alive */
+ Py_INCREF(self);
+ it->str = self;
-static int
-formatchar(Py_UNICODE *buf,
- size_t buflen,
- PyObject *v)
-{
- /* presume that the buffer is at least 2 characters long */
- if (PyUnicode_Check(v)) {
- if (PyUnicode_GET_SIZE(v) != 1)
- goto onError;
- buf[0] = PyUnicode_AS_UNICODE(v)[0];
- }
+ if (!field_name_split(STRINGLIB_STR(self),
+ STRINGLIB_LEN(self),
+ &first, &first_idx, &it->it_field))
+ goto error;
- else if (PyString_Check(v)) {
- if (PyString_GET_SIZE(v) != 1)
- goto onError;
- buf[0] = (Py_UNICODE)PyString_AS_STRING(v)[0];
- }
+ /* first becomes an integer, if possible, else a string */
+ if (first_idx != -1)
+ first_obj = PyInt_FromSsize_t(first_idx);
+ else
+ /* convert "first" into a string object */
+ first_obj = STRINGLIB_NEW(first.ptr, first.end - first.ptr);
+ if (first_obj == NULL)
+ goto error;
- else {
- /* Integer input truncated to a character */
- long x;
- x = PyInt_AsLong(v);
- if (x == -1 && PyErr_Occurred())
- goto onError;
-#ifdef Py_UNICODE_WIDE
- if (x < 0 || x > 0x10ffff) {
- PyErr_SetString(PyExc_OverflowError,
- "%c arg not in range(0x110000) "
- "(wide Python build)");
- return -1;
- }
-#else
- if (x < 0 || x > 0xffff) {
- PyErr_SetString(PyExc_OverflowError,
- "%c arg not in range(0x10000) "
- "(narrow Python build)");
- return -1;
- }
-#endif
- buf[0] = (Py_UNICODE) x;
- }
- buf[1] = '\0';
- return 1;
+ /* return a tuple of values */
+ result = PyTuple_Pack(2, first_obj, it);
- onError:
- PyErr_SetString(PyExc_TypeError,
- "%c requires int or char");
- return -1;
+error:
+ Py_XDECREF(it);
+ Py_XDECREF(first_obj);
+ return result;
}
+/***************** end FieldName Iterator ************************/
-/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
-
- FORMATBUFLEN is the length of the buffer in which the floats, ints, &
- chars are formatted. XXX This is a magic number. Each formatting
- routine does bounds checking to ensure no overflow, but a better
- solution may be to malloc a buffer of appropriate size for each
- format. For now, the current solution is sufficient.
-*/
-#define FORMATBUFLEN (size_t)120
-PyObject *PyUnicode_Format(PyObject *format,
- PyObject *args)
+static PyObject *
+unicode_getnewargs(PyUnicodeObject *v)
{
- Py_UNICODE *fmt, *res;
- Py_ssize_t fmtcnt, rescnt, reslen, arglen, argidx;
- int args_owned = 0;
- PyUnicodeObject *result = NULL;
- PyObject *dict = NULL;
- PyObject *uformat;
+ return Py_BuildValue("(u#)", v->str, v->length);
+}
- if (format == NULL || args == NULL) {
- PyErr_BadInternalCall();
- return NULL;
- }
- uformat = PyUnicode_FromObject(format);
- if (uformat == NULL)
- return NULL;
- fmt = PyUnicode_AS_UNICODE(uformat);
- fmtcnt = PyUnicode_GET_SIZE(uformat);
- reslen = rescnt = fmtcnt + 100;
- result = _PyUnicode_New(reslen);
- if (result == NULL)
- goto onError;
- res = PyUnicode_AS_UNICODE(result);
+static PyMethodDef unicode_methods[] = {
- if (PyTuple_Check(args)) {
- arglen = PyTuple_Size(args);
- argidx = 0;
- }
- else {
- arglen = -1;
- argidx = -2;
- }
- if (Py_Type(args)->tp_as_mapping && !PyTuple_Check(args) &&
- !PyObject_TypeCheck(args, &PyBaseString_Type))
- dict = args;
+ /* Order is according to common usage: often used methods should
+ appear first, since lookup is done sequentially. */
- while (--fmtcnt >= 0) {
- if (*fmt != '%') {
- if (--rescnt < 0) {
- rescnt = fmtcnt + 100;
- reslen += rescnt;
- if (_PyUnicode_Resize(&result, reslen) < 0)
- goto onError;
- res = PyUnicode_AS_UNICODE(result) + reslen - rescnt;
- --rescnt;
- }
- *res++ = *fmt++;
- }
- else {
- /* Got a format specifier */
- int flags = 0;
- Py_ssize_t width = -1;
- int prec = -1;
- Py_UNICODE c = '\0';
- Py_UNICODE fill;
- PyObject *v = NULL;
- PyObject *temp = NULL;
- Py_UNICODE *pbuf;
- Py_UNICODE sign;
- Py_ssize_t len;
- Py_UNICODE formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
+ {"encode", (PyCFunction) unicode_encode, METH_VARARGS, encode__doc__},
+ {"replace", (PyCFunction) unicode_replace, METH_VARARGS, replace__doc__},
+ {"split", (PyCFunction) unicode_split, METH_VARARGS, split__doc__},
+ {"rsplit", (PyCFunction) unicode_rsplit, METH_VARARGS, rsplit__doc__},
+ {"join", (PyCFunction) unicode_join, METH_O, join__doc__},
+ {"capitalize", (PyCFunction) unicode_capitalize, METH_NOARGS, capitalize__doc__},
+ {"title", (PyCFunction) unicode_title, METH_NOARGS, title__doc__},
+ {"center", (PyCFunction) unicode_center, METH_VARARGS, center__doc__},
+ {"count", (PyCFunction) unicode_count, METH_VARARGS, count__doc__},
+ {"expandtabs", (PyCFunction) unicode_expandtabs, METH_VARARGS, expandtabs__doc__},
+ {"find", (PyCFunction) unicode_find, METH_VARARGS, find__doc__},
+ {"partition", (PyCFunction) unicode_partition, METH_O, partition__doc__},
+ {"index", (PyCFunction) unicode_index, METH_VARARGS, index__doc__},
+ {"ljust", (PyCFunction) unicode_ljust, METH_VARARGS, ljust__doc__},
+ {"lower", (PyCFunction) unicode_lower, METH_NOARGS, lower__doc__},
+ {"lstrip", (PyCFunction) unicode_lstrip, METH_VARARGS, lstrip__doc__},
+ {"decode", (PyCFunction) unicode_decode, METH_VARARGS, decode__doc__},
+/* {"maketrans", (PyCFunction) unicode_maketrans, METH_VARARGS, maketrans__doc__}, */
+ {"rfind", (PyCFunction) unicode_rfind, METH_VARARGS, rfind__doc__},
+ {"rindex", (PyCFunction) unicode_rindex, METH_VARARGS, rindex__doc__},
+ {"rjust", (PyCFunction) unicode_rjust, METH_VARARGS, rjust__doc__},
+ {"rstrip", (PyCFunction) unicode_rstrip, METH_VARARGS, rstrip__doc__},
+ {"rpartition", (PyCFunction) unicode_rpartition, METH_O, rpartition__doc__},
+ {"splitlines", (PyCFunction) unicode_splitlines, METH_VARARGS, splitlines__doc__},
+ {"strip", (PyCFunction) unicode_strip, METH_VARARGS, strip__doc__},
+ {"swapcase", (PyCFunction) unicode_swapcase, METH_NOARGS, swapcase__doc__},
+ {"translate", (PyCFunction) unicode_translate, METH_O, translate__doc__},
+ {"upper", (PyCFunction) unicode_upper, METH_NOARGS, upper__doc__},
+ {"startswith", (PyCFunction) unicode_startswith, METH_VARARGS, startswith__doc__},
+ {"endswith", (PyCFunction) unicode_endswith, METH_VARARGS, endswith__doc__},
+ {"islower", (PyCFunction) unicode_islower, METH_NOARGS, islower__doc__},
+ {"isupper", (PyCFunction) unicode_isupper, METH_NOARGS, isupper__doc__},
+ {"istitle", (PyCFunction) unicode_istitle, METH_NOARGS, istitle__doc__},
+ {"isspace", (PyCFunction) unicode_isspace, METH_NOARGS, isspace__doc__},
+ {"isdecimal", (PyCFunction) unicode_isdecimal, METH_NOARGS, isdecimal__doc__},
+ {"isdigit", (PyCFunction) unicode_isdigit, METH_NOARGS, isdigit__doc__},
+ {"isnumeric", (PyCFunction) unicode_isnumeric, METH_NOARGS, isnumeric__doc__},
+ {"isalpha", (PyCFunction) unicode_isalpha, METH_NOARGS, isalpha__doc__},
+ {"isalnum", (PyCFunction) unicode_isalnum, METH_NOARGS, isalnum__doc__},
+ {"isidentifier", (PyCFunction) unicode_isidentifier, METH_NOARGS, isidentifier__doc__},
+ {"zfill", (PyCFunction) unicode_zfill, METH_VARARGS, zfill__doc__},
+ {"format", (PyCFunction) unicode_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
+ {"__format__", (PyCFunction) unicode__format__, METH_VARARGS, p_format__doc__},
+ {"_formatter_field_name_split", (PyCFunction) unicode_formatter_field_name_split, METH_NOARGS},
+ {"_formatter_parser", (PyCFunction) unicode_formatter_parser, METH_NOARGS},
+#if 0
+ {"capwords", (PyCFunction) unicode_capwords, METH_NOARGS, capwords__doc__},
+#endif
- fmt++;
- if (*fmt == '(') {
- Py_UNICODE *keystart;
- Py_ssize_t keylen;
- PyObject *key;
- int pcount = 1;
+#if 0
+ /* This one is just used for debugging the implementation. */
+ {"freelistsize", (PyCFunction) unicode_freelistsize, METH_NOARGS},
+#endif
- if (dict == NULL) {
- PyErr_SetString(PyExc_TypeError,
- "format requires a mapping");
- goto onError;
- }
- ++fmt;
- --fmtcnt;
- keystart = fmt;
- /* Skip over balanced parentheses */
- while (pcount > 0 && --fmtcnt >= 0) {
- if (*fmt == ')')
- --pcount;
- else if (*fmt == '(')
- ++pcount;
- fmt++;
- }
- keylen = fmt - keystart - 1;
- if (fmtcnt < 0 || pcount > 0) {
- PyErr_SetString(PyExc_ValueError,
- "incomplete format key");
- goto onError;
- }
-#if 0
- /* keys are converted to strings using UTF-8 and
- then looked up since Python uses strings to hold
- variables names etc. in its namespaces and we
- wouldn't want to break common idioms. */
- key = PyUnicode_EncodeUTF8(keystart,
- keylen,
- NULL);
-#else
- key = PyUnicode_FromUnicode(keystart, keylen);
-#endif
- if (key == NULL)
- goto onError;
- if (args_owned) {
- Py_DECREF(args);
- args_owned = 0;
- }
- args = PyObject_GetItem(dict, key);
- Py_DECREF(key);
- if (args == NULL) {
- goto onError;
- }
- args_owned = 1;
- arglen = -1;
- argidx = -2;
- }
- while (--fmtcnt >= 0) {
- switch (c = *fmt++) {
- case '-': flags |= F_LJUST; continue;
- case '+': flags |= F_SIGN; continue;
- case ' ': flags |= F_BLANK; continue;
- case '#': flags |= F_ALT; continue;
- case '0': flags |= F_ZERO; continue;
- }
- break;
- }
- if (c == '*') {
- v = getnextarg(args, arglen, &argidx);
- if (v == NULL)
- goto onError;
- if (!PyInt_Check(v)) {
- PyErr_SetString(PyExc_TypeError,
- "* wants int");
- goto onError;
- }
- width = PyInt_AsLong(v);
- if (width == -1 && PyErr_Occurred())
- goto onError;
- if (width < 0) {
- flags |= F_LJUST;
- width = -width;
- }
- if (--fmtcnt >= 0)
- c = *fmt++;
- }
- else if (c >= '0' && c <= '9') {
- width = c - '0';
- while (--fmtcnt >= 0) {
- c = *fmt++;
- if (c < '0' || c > '9')
- break;
- if ((width*10) / 10 != width) {
- PyErr_SetString(PyExc_ValueError,
- "width too big");
- goto onError;
- }
- width = width*10 + (c - '0');
- }
- }
- if (c == '.') {
- prec = 0;
- if (--fmtcnt >= 0)
- c = *fmt++;
- if (c == '*') {
- v = getnextarg(args, arglen, &argidx);
- if (v == NULL)
- goto onError;
- if (!PyInt_Check(v)) {
- PyErr_SetString(PyExc_TypeError,
- "* wants int");
- goto onError;
- }
- prec = PyInt_AsLong(v);
- if (prec == -1 && PyErr_Occurred())
- goto onError;
- if (prec < 0)
- prec = 0;
- if (--fmtcnt >= 0)
- c = *fmt++;
- }
- else if (c >= '0' && c <= '9') {
- prec = c - '0';
- while (--fmtcnt >= 0) {
- c = Py_CHARMASK(*fmt++);
- if (c < '0' || c > '9')
- break;
- if ((prec*10) / 10 != prec) {
- PyErr_SetString(PyExc_ValueError,
- "prec too big");
- goto onError;
- }
- prec = prec*10 + (c - '0');
- }
- }
- } /* prec */
- if (fmtcnt >= 0) {
- if (c == 'h' || c == 'l' || c == 'L') {
- if (--fmtcnt >= 0)
- c = *fmt++;
- }
- }
- if (fmtcnt < 0) {
- PyErr_SetString(PyExc_ValueError,
- "incomplete format");
- goto onError;
- }
- if (c != '%') {
- v = getnextarg(args, arglen, &argidx);
- if (v == NULL)
- goto onError;
- }
- sign = 0;
- fill = ' ';
- switch (c) {
-
- case '%':
- pbuf = formatbuf;
- /* presume that buffer length is at least 1 */
- pbuf[0] = '%';
- len = 1;
- break;
-
- case 's':
- case 'r':
- if (PyUnicode_Check(v) && c == 's') {
- temp = v;
- Py_INCREF(temp);
- }
- else {
- PyObject *unicode;
- if (c == 's')
- temp = PyObject_Unicode(v);
- else
- temp = PyObject_Repr(v);
- if (temp == NULL)
- goto onError;
- if (PyUnicode_Check(temp))
- /* nothing to do */;
- else if (PyString_Check(temp)) {
- /* convert to string to Unicode */
- unicode = PyUnicode_Decode(PyString_AS_STRING(temp),
- PyString_GET_SIZE(temp),
- NULL,
- "strict");
- Py_DECREF(temp);
- temp = unicode;
- if (temp == NULL)
- goto onError;
- }
- else {
- Py_DECREF(temp);
- PyErr_SetString(PyExc_TypeError,
- "%s argument has non-string str()");
- goto onError;
- }
- }
- pbuf = PyUnicode_AS_UNICODE(temp);
- len = PyUnicode_GET_SIZE(temp);
- if (prec >= 0 && len > prec)
- len = prec;
- break;
-
- case 'i':
- case 'd':
- case 'u':
- case 'o':
- case 'x':
- case 'X':
- if (c == 'i')
- c = 'd';
- if (PyLong_Check(v)) {
- temp = formatlong(v, flags, prec, c);
- if (!temp)
- goto onError;
- pbuf = PyUnicode_AS_UNICODE(temp);
- len = PyUnicode_GET_SIZE(temp);
- sign = 1;
- }
- else {
- pbuf = formatbuf;
- len = formatint(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE),
- flags, prec, c, v);
- if (len < 0)
- goto onError;
- sign = 1;
- }
- if (flags & F_ZERO)
- fill = '0';
- break;
-
- case 'e':
- case 'E':
- case 'f':
- case 'F':
- case 'g':
- case 'G':
- if (c == 'F')
- c = 'f';
- pbuf = formatbuf;
- len = formatfloat(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE),
- flags, prec, c, v);
- if (len < 0)
- goto onError;
- sign = 1;
- if (flags & F_ZERO)
- fill = '0';
- break;
-
- case 'c':
- pbuf = formatbuf;
- len = formatchar(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE), v);
- if (len < 0)
- goto onError;
- break;
-
- default:
- PyErr_Format(PyExc_ValueError,
- "unsupported format character '%c' (0x%x) "
- "at index %zd",
- (31<=c && c<=126) ? (char)c : '?',
- (int)c,
- (Py_ssize_t)(fmt - 1 -
- PyUnicode_AS_UNICODE(uformat)));
- goto onError;
- }
- if (sign) {
- if (*pbuf == '-' || *pbuf == '+') {
- sign = *pbuf++;
- len--;
- }
- else if (flags & F_SIGN)
- sign = '+';
- else if (flags & F_BLANK)
- sign = ' ';
- else
- sign = 0;
- }
- if (width < len)
- width = len;
- if (rescnt - (sign != 0) < width) {
- reslen -= rescnt;
- rescnt = width + fmtcnt + 100;
- reslen += rescnt;
- if (reslen < 0) {
- Py_XDECREF(temp);
- PyErr_NoMemory();
- goto onError;
- }
- if (_PyUnicode_Resize(&result, reslen) < 0) {
- Py_XDECREF(temp);
- goto onError;
- }
- res = PyUnicode_AS_UNICODE(result)
- + reslen - rescnt;
- }
- if (sign) {
- if (fill != ' ')
- *res++ = sign;
- rescnt--;
- if (width > len)
- width--;
- }
- if ((flags & F_ALT) && (c == 'x' || c == 'X' || c == 'o')) {
- assert(pbuf[0] == '0');
- assert(pbuf[1] == c);
- if (fill != ' ') {
- *res++ = *pbuf++;
- *res++ = *pbuf++;
- }
- rescnt -= 2;
- width -= 2;
- if (width < 0)
- width = 0;
- len -= 2;
- }
- if (width > len && !(flags & F_LJUST)) {
- do {
- --rescnt;
- *res++ = fill;
- } while (--width > len);
- }
- if (fill == ' ') {
- if (sign)
- *res++ = sign;
- if ((flags & F_ALT) && (c == 'x' || c == 'X' || c == 'o')) {
- assert(pbuf[0] == '0');
- assert(pbuf[1] == c);
- *res++ = *pbuf++;
- *res++ = *pbuf++;
- }
- }
- Py_UNICODE_COPY(res, pbuf, len);
- res += len;
- rescnt -= len;
- while (--width >= len) {
- --rescnt;
- *res++ = ' ';
- }
- if (dict && (argidx < arglen) && c != '%') {
- PyErr_SetString(PyExc_TypeError,
- "not all arguments converted during string formatting");
- Py_XDECREF(temp);
- goto onError;
- }
- Py_XDECREF(temp);
- } /* '%' */
- } /* until end */
- if (argidx < arglen && !dict) {
- PyErr_SetString(PyExc_TypeError,
- "not all arguments converted during string formatting");
- goto onError;
- }
+ {"__getnewargs__", (PyCFunction)unicode_getnewargs, METH_NOARGS},
+ {NULL, NULL}
+};
- if (_PyUnicode_Resize(&result, reslen - rescnt) < 0)
- goto onError;
- if (args_owned) {
- Py_DECREF(args);
- }
- Py_DECREF(uformat);
- return (PyObject *)result;
+static PyObject *
+unicode_mod(PyObject *v, PyObject *w)
+{
+ if (!PyUnicode_Check(v)) {
+ Py_INCREF(Py_NotImplemented);
+ return Py_NotImplemented;
+ }
+ return PyUnicode_Format(v, w);
+}
- onError:
- Py_XDECREF(result);
- Py_DECREF(uformat);
- if (args_owned) {
- Py_DECREF(args);
+static PyNumberMethods unicode_as_number = {
+ 0, /*nb_add*/
+ 0, /*nb_subtract*/
+ 0, /*nb_multiply*/
+ unicode_mod, /*nb_remainder*/
+};
+
+static PySequenceMethods unicode_as_sequence = {
+ (lenfunc) unicode_length, /* sq_length */
+ PyUnicode_Concat, /* sq_concat */
+ (ssizeargfunc) unicode_repeat, /* sq_repeat */
+ (ssizeargfunc) unicode_getitem, /* sq_item */
+ (ssizessizeargfunc) unicode_slice, /* sq_slice */
+ 0, /* sq_ass_item */
+ 0, /* sq_ass_slice */
+ PyUnicode_Contains, /* sq_contains */
+};
+
+static PyObject*
+unicode_subscript(PyUnicodeObject* self, PyObject* item)
+{
+ if (PyIndex_Check(item)) {
+ Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
+ if (i == -1 && PyErr_Occurred())
+ return NULL;
+ if (i < 0)
+ i += PyUnicode_GET_SIZE(self);
+ return unicode_getitem(self, i);
+ } else if (PySlice_Check(item)) {
+ Py_ssize_t start, stop, step, slicelength, cur, i;
+ Py_UNICODE* source_buf;
+ Py_UNICODE* result_buf;
+ PyObject* result;
+
+ if (PySlice_GetIndicesEx((PySliceObject*)item, PyUnicode_GET_SIZE(self),
+ &start, &stop, &step, &slicelength) < 0) {
+ return NULL;
+ }
+
+ if (slicelength <= 0) {
+ return PyUnicode_FromUnicode(NULL, 0);
+ } else {
+ source_buf = PyUnicode_AS_UNICODE((PyObject*)self);
+ result_buf = (Py_UNICODE *)PyMem_MALLOC(slicelength*
+ sizeof(Py_UNICODE));
+
+ if (result_buf == NULL)
+ return PyErr_NoMemory();
+
+ for (cur = start, i = 0; i < slicelength; cur += step, i++) {
+ result_buf[i] = source_buf[cur];
+ }
+
+ result = PyUnicode_FromUnicode(result_buf, slicelength);
+ PyMem_FREE(result_buf);
+ return result;
+ }
+ } else {
+ PyErr_SetString(PyExc_TypeError, "string indices must be integers");
+ return NULL;
}
- return NULL;
}
-static PyBufferProcs unicode_as_buffer = {
- (getbufferproc) unicode_buffer_getbuffer,
- NULL,
+static PyMappingMethods unicode_as_mapping = {
+ (lenfunc)unicode_length, /* mp_length */
+ (binaryfunc)unicode_subscript, /* mp_subscript */
+ (objobjargproc)0, /* mp_ass_subscript */
};
-static PyObject *
-unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
-static PyObject *
-unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+static int
+unicode_buffer_getbuffer(PyUnicodeObject *self, PyBuffer *view, int flags)
{
- PyObject *x = NULL;
- static char *kwlist[] = {"object", "encoding", "errors", 0};
- char *encoding = NULL;
- char *errors = NULL;
- if (type != &PyUnicode_Type)
- return unicode_subtype_new(type, args, kwds);
- if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:unicode",
- kwlist, &x, &encoding, &errors))
- return NULL;
- if (x == NULL)
- return (PyObject *)_PyUnicode_New(0);
- if (encoding == NULL && errors == NULL)
- return PyObject_Unicode(x);
- else
- return PyUnicode_FromEncodedObject(x, encoding, errors);
+ if (flags & PyBUF_CHARACTER) {
+ PyObject *str;
+
+ str = _PyUnicode_AsDefaultEncodedString((PyObject *)self, NULL);
+ if (str == NULL) return -1;
+ return PyBuffer_FillInfo(view, (void *)PyString_AS_STRING(str),
+ PyString_GET_SIZE(str), 1, flags);
+ }
+ else {
+ return PyBuffer_FillInfo(view, (void *)self->str,
+ PyUnicode_GET_DATA_SIZE(self), 1, flags);
+ }
}
+
+/* Helpers for PyUnicode_Format() */
+
static PyObject *
-unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
{
- PyUnicodeObject *tmp, *pnew;
- Py_ssize_t n;
+ Py_ssize_t argidx = *p_argidx;
+ if (argidx < arglen) {
+ (*p_argidx)++;
+ if (arglen < 0)
+ return args;
+ else
+ return PyTuple_GetItem(args, argidx);
+ }
+ PyErr_SetString(PyExc_TypeError,
+ "not enough arguments for format string");
+ return NULL;
+}
- assert(PyType_IsSubtype(type, &PyUnicode_Type));
- tmp = (PyUnicodeObject *)unicode_new(&PyUnicode_Type, args, kwds);
- if (tmp == NULL)
- return NULL;
- assert(PyUnicode_Check(tmp));
- pnew = (PyUnicodeObject *) type->tp_alloc(type, n = tmp->length);
- if (pnew == NULL) {
- Py_DECREF(tmp);
- return NULL;
- }
- pnew->str = PyMem_NEW(Py_UNICODE, n+1);
- if (pnew->str == NULL) {
- _Py_ForgetReference((PyObject *)pnew);
- PyObject_Del(pnew);
- Py_DECREF(tmp);
- return PyErr_NoMemory();
- }
- Py_UNICODE_COPY(pnew->str, tmp->str, n+1);
- pnew->length = n;
- pnew->hash = tmp->hash;
- Py_DECREF(tmp);
- return (PyObject *)pnew;
+#define F_LJUST (1<<0)
+#define F_SIGN (1<<1)
+#define F_BLANK (1<<2)
+#define F_ALT (1<<3)
+#define F_ZERO (1<<4)
+
+static Py_ssize_t
+strtounicode(Py_UNICODE *buffer, const char *charbuffer)
+{
+ register Py_ssize_t i;
+ Py_ssize_t len = strlen(charbuffer);
+ for (i = len - 1; i >= 0; i--)
+ buffer[i] = (Py_UNICODE) charbuffer[i];
+
+ return len;
}
-PyDoc_STRVAR(unicode_doc,
-"str(string [, encoding[, errors]]) -> object\n\
-\n\
-Create a new string object from the given encoded string.\n\
-encoding defaults to the current default string encoding.\n\
-errors can be 'strict', 'replace' or 'ignore' and defaults to 'strict'.");
+static int
+doubletounicode(Py_UNICODE *buffer, size_t len, const char *format, double x)
+{
+ Py_ssize_t result;
-static PyObject *unicode_iter(PyObject *seq);
+ PyOS_ascii_formatd((char *)buffer, len, format, x);
+ result = strtounicode(buffer, (char *)buffer);
+ return Py_SAFE_DOWNCAST(result, Py_ssize_t, int);
+}
-PyTypeObject PyUnicode_Type = {
- PyVarObject_HEAD_INIT(&PyType_Type, 0)
- "str", /* tp_name */
- sizeof(PyUnicodeObject), /* tp_size */
- 0, /* tp_itemsize */
- /* Slots */
- (destructor)unicode_dealloc, /* tp_dealloc */
- 0, /* tp_print */
- 0, /* tp_getattr */
- 0, /* tp_setattr */
- 0, /* tp_compare */
- unicode_repr, /* tp_repr */
- &unicode_as_number, /* tp_as_number */
- &unicode_as_sequence, /* tp_as_sequence */
- &unicode_as_mapping, /* tp_as_mapping */
- (hashfunc) unicode_hash, /* tp_hash*/
- 0, /* tp_call*/
- (reprfunc) unicode_str, /* tp_str */
- PyObject_GenericGetAttr, /* tp_getattro */
- 0, /* tp_setattro */
- &unicode_as_buffer, /* tp_as_buffer */
- Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
- Py_TPFLAGS_UNICODE_SUBCLASS, /* tp_flags */
- unicode_doc, /* tp_doc */
- 0, /* tp_traverse */
- 0, /* tp_clear */
- PyUnicode_RichCompare, /* tp_richcompare */
- 0, /* tp_weaklistoffset */
- unicode_iter, /* tp_iter */
- 0, /* tp_iternext */
- unicode_methods, /* tp_methods */
- 0, /* tp_members */
- 0, /* tp_getset */
- &PyBaseString_Type, /* tp_base */
- 0, /* tp_dict */
- 0, /* tp_descr_get */
- 0, /* tp_descr_set */
- 0, /* tp_dictoffset */
- 0, /* tp_init */
- 0, /* tp_alloc */
- unicode_new, /* tp_new */
- PyObject_Del, /* tp_free */
-};
+static int
+longtounicode(Py_UNICODE *buffer, size_t len, const char *format, long x)
+{
+ Py_ssize_t result;
-/* Initialize the Unicode implementation */
+ PyOS_snprintf((char *)buffer, len, format, x);
+ result = strtounicode(buffer, (char *)buffer);
+ return Py_SAFE_DOWNCAST(result, Py_ssize_t, int);
+}
-void _PyUnicode_Init(void)
+/* XXX To save some code duplication, formatfloat/long/int could have been
+ shared with stringobject.c, converting from 8-bit to Unicode after the
+ formatting is done. */
+
+static int
+formatfloat(Py_UNICODE *buf,
+ size_t buflen,
+ int flags,
+ int prec,
+ int type,
+ PyObject *v)
{
- int i;
+ /* fmt = '%#.' + `prec` + `type`
+ worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
+ char fmt[20];
+ double x;
- /* XXX - move this array to unicodectype.c ? */
- Py_UNICODE linebreak[] = {
- 0x000A, /* LINE FEED */
- 0x000D, /* CARRIAGE RETURN */
- 0x001C, /* FILE SEPARATOR */
- 0x001D, /* GROUP SEPARATOR */
- 0x001E, /* RECORD SEPARATOR */
- 0x0085, /* NEXT LINE */
- 0x2028, /* LINE SEPARATOR */
- 0x2029, /* PARAGRAPH SEPARATOR */
- };
+ x = PyFloat_AsDouble(v);
+ if (x == -1.0 && PyErr_Occurred())
+ return -1;
+ if (prec < 0)
+ prec = 6;
+ if (type == 'f' && (fabs(x) / 1e25) >= 1e25)
+ type = 'g';
+ /* Worst case length calc to ensure no buffer overrun:
- /* Init the implementation */
- unicode_freelist = NULL;
- unicode_freelist_size = 0;
- unicode_empty = _PyUnicode_New(0);
- if (!unicode_empty)
- return;
+ 'g' formats:
+ fmt = %#.<prec>g
+ buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
+ for any double rep.)
+ len = 1 + prec + 1 + 2 + 5 = 9 + prec
- for (i = 0; i < 256; i++)
- unicode_latin1[i] = NULL;
- if (PyType_Ready(&PyUnicode_Type) < 0)
- Py_FatalError("Can't initialize 'unicode'");
+ 'f' formats:
+ buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
+ len = 1 + 50 + 1 + prec = 52 + prec
- /* initialize the linebreak bloom filter */
- bloom_linebreak = make_bloom_mask(
- linebreak, sizeof(linebreak) / sizeof(linebreak[0])
- );
+ If prec=0 the effective precision is 1 (the leading digit is
+ always given), therefore increase the length by one.
- PyType_Ready(&EncodingMapType);
+ */
+ if (((type == 'g' || type == 'G') &&
+ buflen <= (size_t)10 + (size_t)prec) ||
+ (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
+ PyErr_SetString(PyExc_OverflowError,
+ "formatted float is too long (precision too large?)");
+ return -1;
+ }
+ PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
+ (flags&F_ALT) ? "#" : "",
+ prec, type);
+ return doubletounicode(buf, buflen, fmt, x);
}
-/* Finalize the Unicode implementation */
+static PyObject*
+formatlong(PyObject *val, int flags, int prec, int type)
+{
+ char *buf;
+ int len;
+ PyObject *str; /* temporary string object. */
+ PyObject *result;
-void
-_PyUnicode_Fini(void)
+ str = _PyString_FormatLong(val, flags, prec, type, &buf, &len);
+ if (!str)
+ return NULL;
+ result = PyUnicode_FromStringAndSize(buf, len);
+ Py_DECREF(str);
+ return result;
+}
+
+static int
+formatint(Py_UNICODE *buf,
+ size_t buflen,
+ int flags,
+ int prec,
+ int type,
+ PyObject *v)
{
- PyUnicodeObject *u;
- int i;
+ /* fmt = '%#.' + `prec` + 'l' + `type`
+ * worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
+ * + 1 + 1
+ * = 24
+ */
+ char fmt[64]; /* plenty big enough! */
+ char *sign;
+ long x;
- Py_XDECREF(unicode_empty);
- unicode_empty = NULL;
+ x = PyInt_AsLong(v);
+ if (x == -1 && PyErr_Occurred())
+ return -1;
+ if (x < 0 && type == 'u') {
+ type = 'd';
+ }
+ if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
+ sign = "-";
+ else
+ sign = "";
+ if (prec < 0)
+ prec = 1;
- for (i = 0; i < 256; i++) {
- if (unicode_latin1[i]) {
- Py_DECREF(unicode_latin1[i]);
- unicode_latin1[i] = NULL;
- }
+ /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
+ * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
+ */
+ if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
+ PyErr_SetString(PyExc_OverflowError,
+ "formatted integer is too long (precision too large?)");
+ return -1;
}
- for (u = unicode_freelist; u != NULL;) {
- PyUnicodeObject *v = u;
- u = *(PyUnicodeObject **)u;
- if (v->str)
- PyMem_DEL(v->str);
- Py_XDECREF(v->defenc);
- PyObject_Del(v);
+ if ((flags & F_ALT) &&
+ (type == 'x' || type == 'X' || type == 'o')) {
+ /* When converting under %#o, %#x or %#X, there are a number
+ * of issues that cause pain:
+ * - for %#o, we want a different base marker than C
+ * - when 0 is being converted, the C standard leaves off
+ * the '0x' or '0X', which is inconsistent with other
+ * %#x/%#X conversions and inconsistent with Python's
+ * hex() function
+ * - there are platforms that violate the standard and
+ * convert 0 with the '0x' or '0X'
+ * (Metrowerks, Compaq Tru64)
+ * - there are platforms that give '0x' when converting
+ * under %#X, but convert 0 in accordance with the
+ * standard (OS/2 EMX)
+ *
+ * We can achieve the desired consistency by inserting our
+ * own '0x' or '0X' prefix, and substituting %x/%X in place
+ * of %#x/%#X.
+ *
+ * Note that this is the same approach as used in
+ * formatint() in stringobject.c
+ */
+ PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
+ sign, type, prec, type);
}
- unicode_freelist = NULL;
- unicode_freelist_size = 0;
+ else {
+ PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
+ sign, (flags&F_ALT) ? "#" : "",
+ prec, type);
+ }
+ if (sign[0])
+ return longtounicode(buf, buflen, fmt, -x);
+ else
+ return longtounicode(buf, buflen, fmt, x);
}
-void
-PyUnicode_InternInPlace(PyObject **p)
+static int
+formatchar(Py_UNICODE *buf,
+ size_t buflen,
+ PyObject *v)
{
- register PyUnicodeObject *s = (PyUnicodeObject *)(*p);
- PyObject *t;
- if (s == NULL || !PyUnicode_Check(s))
- Py_FatalError(
- "PyUnicode_InternInPlace: unicode strings only please!");
- /* If it's a subclass, we don't really know what putting
- it in the interned dict might do. */
- if (!PyUnicode_CheckExact(s))
- return;
- if (PyUnicode_CHECK_INTERNED(s))
- return;
- if (interned == NULL) {
- interned = PyDict_New();
- if (interned == NULL) {
- PyErr_Clear(); /* Don't leave an exception */
- return;
- }
- }
- /* It might be that the GetItem call fails even
- though the key is present in the dictionary,
- namely when this happens during a stack overflow. */
- Py_ALLOW_RECURSION
- t = PyDict_GetItem(interned, (PyObject *)s);
- Py_END_ALLOW_RECURSION
+ /* presume that the buffer is at least 2 characters long */
+ if (PyUnicode_Check(v)) {
+ if (PyUnicode_GET_SIZE(v) != 1)
+ goto onError;
+ buf[0] = PyUnicode_AS_UNICODE(v)[0];
+ }
- if (t) {
- Py_INCREF(t);
- Py_DECREF(*p);
- *p = t;
- return;
- }
+ else if (PyString_Check(v)) {
+ if (PyString_GET_SIZE(v) != 1)
+ goto onError;
+ buf[0] = (Py_UNICODE)PyString_AS_STRING(v)[0];
+ }
- PyThreadState_GET()->recursion_critical = 1;
- if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
- PyErr_Clear();
- PyThreadState_GET()->recursion_critical = 0;
- return;
+ else {
+ /* Integer input truncated to a character */
+ long x;
+ x = PyInt_AsLong(v);
+ if (x == -1 && PyErr_Occurred())
+ goto onError;
+#ifdef Py_UNICODE_WIDE
+ if (x < 0 || x > 0x10ffff) {
+ PyErr_SetString(PyExc_OverflowError,
+ "%c arg not in range(0x110000) "
+ "(wide Python build)");
+ return -1;
}
- PyThreadState_GET()->recursion_critical = 0;
- /* The two references in interned are not counted by refcnt.
- The deallocator will take care of this */
- Py_Refcnt(s) -= 2;
- PyUnicode_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
+#else
+ if (x < 0 || x > 0xffff) {
+ PyErr_SetString(PyExc_OverflowError,
+ "%c arg not in range(0x10000) "
+ "(narrow Python build)");
+ return -1;
+ }
+#endif
+ buf[0] = (Py_UNICODE) x;
+ }
+ buf[1] = '\0';
+ return 1;
+
+ onError:
+ PyErr_SetString(PyExc_TypeError,
+ "%c requires int or char");
+ return -1;
}
-void
-PyUnicode_InternImmortal(PyObject **p)
-{
- PyUnicode_InternInPlace(p);
- if (PyUnicode_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
- PyUnicode_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
- Py_INCREF(*p);
- }
-}
+/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
-PyObject *
-PyUnicode_InternFromString(const char *cp)
-{
- PyObject *s = PyUnicode_FromString(cp);
- if (s == NULL)
- return NULL;
- PyUnicode_InternInPlace(&s);
- return s;
-}
+ FORMATBUFLEN is the length of the buffer in which the floats, ints, &
+ chars are formatted. XXX This is a magic number. Each formatting
+ routine does bounds checking to ensure no overflow, but a better
+ solution may be to malloc a buffer of appropriate size for each
+ format. For now, the current solution is sufficient.
+*/
+#define FORMATBUFLEN (size_t)120
-void _Py_ReleaseInternedUnicodeStrings(void)
+PyObject *PyUnicode_Format(PyObject *format,
+ PyObject *args)
{
- PyObject *keys;
- PyUnicodeObject *s;
- Py_ssize_t i, n;
- Py_ssize_t immortal_size = 0, mortal_size = 0;
-
- if (interned == NULL || !PyDict_Check(interned))
- return;
- keys = PyDict_Keys(interned);
- if (keys == NULL || !PyList_Check(keys)) {
- PyErr_Clear();
- return;
- }
+ Py_UNICODE *fmt, *res;
+ Py_ssize_t fmtcnt, rescnt, reslen, arglen, argidx;
+ int args_owned = 0;
+ PyUnicodeObject *result = NULL;
+ PyObject *dict = NULL;
+ PyObject *uformat;
- /* Since _Py_ReleaseInternedUnicodeStrings() is intended to help a leak
- detector, interned unicode strings are not forcibly deallocated;
- rather, we give them their stolen references back, and then clear
- and DECREF the interned dict. */
+ if (format == NULL || args == NULL) {
+ PyErr_BadInternalCall();
+ return NULL;
+ }
+ uformat = PyUnicode_FromObject(format);
+ if (uformat == NULL)
+ return NULL;
+ fmt = PyUnicode_AS_UNICODE(uformat);
+ fmtcnt = PyUnicode_GET_SIZE(uformat);
- n = PyList_GET_SIZE(keys);
- fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
- n);
- for (i = 0; i < n; i++) {
- s = (PyUnicodeObject *) PyList_GET_ITEM(keys, i);
- switch (s->state) {
- case SSTATE_NOT_INTERNED:
- /* XXX Shouldn't happen */
- break;
- case SSTATE_INTERNED_IMMORTAL:
- Py_Refcnt(s) += 1;
- immortal_size += s->length;
- break;
- case SSTATE_INTERNED_MORTAL:
- Py_Refcnt(s) += 2;
- mortal_size += s->length;
- break;
- default:
- Py_FatalError("Inconsistent interned string state.");
- }
- s->state = SSTATE_NOT_INTERNED;
- }
- fprintf(stderr, "total size of all interned strings: "
- "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
- "mortal/immortal\n", mortal_size, immortal_size);
- Py_DECREF(keys);
- PyDict_Clear(interned);
- Py_DECREF(interned);
- interned = NULL;
-}
+ reslen = rescnt = fmtcnt + 100;
+ result = _PyUnicode_New(reslen);
+ if (result == NULL)
+ goto onError;
+ res = PyUnicode_AS_UNICODE(result);
+ if (PyTuple_Check(args)) {
+ arglen = PyTuple_Size(args);
+ argidx = 0;
+ }
+ else {
+ arglen = -1;
+ argidx = -2;
+ }
+ if (Py_Type(args)->tp_as_mapping && !PyTuple_Check(args) &&
+ !PyObject_TypeCheck(args, &PyBaseString_Type))
+ dict = args;
-/********************* Formatter Iterator ************************/
+ while (--fmtcnt >= 0) {
+ if (*fmt != '%') {
+ if (--rescnt < 0) {
+ rescnt = fmtcnt + 100;
+ reslen += rescnt;
+ if (_PyUnicode_Resize(&result, reslen) < 0)
+ goto onError;
+ res = PyUnicode_AS_UNICODE(result) + reslen - rescnt;
+ --rescnt;
+ }
+ *res++ = *fmt++;
+ }
+ else {
+ /* Got a format specifier */
+ int flags = 0;
+ Py_ssize_t width = -1;
+ int prec = -1;
+ Py_UNICODE c = '\0';
+ Py_UNICODE fill;
+ PyObject *v = NULL;
+ PyObject *temp = NULL;
+ Py_UNICODE *pbuf;
+ Py_UNICODE sign;
+ Py_ssize_t len;
+ Py_UNICODE formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
-/* this is used to implement string.Formatter.vparse(). it exists so
- Formatter can share code with the built in unicode.format()
- method */
+ fmt++;
+ if (*fmt == '(') {
+ Py_UNICODE *keystart;
+ Py_ssize_t keylen;
+ PyObject *key;
+ int pcount = 1;
-typedef struct {
- PyObject_HEAD
+ if (dict == NULL) {
+ PyErr_SetString(PyExc_TypeError,
+ "format requires a mapping");
+ goto onError;
+ }
+ ++fmt;
+ --fmtcnt;
+ keystart = fmt;
+ /* Skip over balanced parentheses */
+ while (pcount > 0 && --fmtcnt >= 0) {
+ if (*fmt == ')')
+ --pcount;
+ else if (*fmt == '(')
+ ++pcount;
+ fmt++;
+ }
+ keylen = fmt - keystart - 1;
+ if (fmtcnt < 0 || pcount > 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "incomplete format key");
+ goto onError;
+ }
+#if 0
+ /* keys are converted to strings using UTF-8 and
+ then looked up since Python uses strings to hold
+ variables names etc. in its namespaces and we
+ wouldn't want to break common idioms. */
+ key = PyUnicode_EncodeUTF8(keystart,
+ keylen,
+ NULL);
+#else
+ key = PyUnicode_FromUnicode(keystart, keylen);
+#endif
+ if (key == NULL)
+ goto onError;
+ if (args_owned) {
+ Py_DECREF(args);
+ args_owned = 0;
+ }
+ args = PyObject_GetItem(dict, key);
+ Py_DECREF(key);
+ if (args == NULL) {
+ goto onError;
+ }
+ args_owned = 1;
+ arglen = -1;
+ argidx = -2;
+ }
+ while (--fmtcnt >= 0) {
+ switch (c = *fmt++) {
+ case '-': flags |= F_LJUST; continue;
+ case '+': flags |= F_SIGN; continue;
+ case ' ': flags |= F_BLANK; continue;
+ case '#': flags |= F_ALT; continue;
+ case '0': flags |= F_ZERO; continue;
+ }
+ break;
+ }
+ if (c == '*') {
+ v = getnextarg(args, arglen, &argidx);
+ if (v == NULL)
+ goto onError;
+ if (!PyInt_Check(v)) {
+ PyErr_SetString(PyExc_TypeError,
+ "* wants int");
+ goto onError;
+ }
+ width = PyInt_AsLong(v);
+ if (width == -1 && PyErr_Occurred())
+ goto onError;
+ if (width < 0) {
+ flags |= F_LJUST;
+ width = -width;
+ }
+ if (--fmtcnt >= 0)
+ c = *fmt++;
+ }
+ else if (c >= '0' && c <= '9') {
+ width = c - '0';
+ while (--fmtcnt >= 0) {
+ c = *fmt++;
+ if (c < '0' || c > '9')
+ break;
+ if ((width*10) / 10 != width) {
+ PyErr_SetString(PyExc_ValueError,
+ "width too big");
+ goto onError;
+ }
+ width = width*10 + (c - '0');
+ }
+ }
+ if (c == '.') {
+ prec = 0;
+ if (--fmtcnt >= 0)
+ c = *fmt++;
+ if (c == '*') {
+ v = getnextarg(args, arglen, &argidx);
+ if (v == NULL)
+ goto onError;
+ if (!PyInt_Check(v)) {
+ PyErr_SetString(PyExc_TypeError,
+ "* wants int");
+ goto onError;
+ }
+ prec = PyInt_AsLong(v);
+ if (prec == -1 && PyErr_Occurred())
+ goto onError;
+ if (prec < 0)
+ prec = 0;
+ if (--fmtcnt >= 0)
+ c = *fmt++;
+ }
+ else if (c >= '0' && c <= '9') {
+ prec = c - '0';
+ while (--fmtcnt >= 0) {
+ c = Py_CHARMASK(*fmt++);
+ if (c < '0' || c > '9')
+ break;
+ if ((prec*10) / 10 != prec) {
+ PyErr_SetString(PyExc_ValueError,
+ "prec too big");
+ goto onError;
+ }
+ prec = prec*10 + (c - '0');
+ }
+ }
+ } /* prec */
+ if (fmtcnt >= 0) {
+ if (c == 'h' || c == 'l' || c == 'L') {
+ if (--fmtcnt >= 0)
+ c = *fmt++;
+ }
+ }
+ if (fmtcnt < 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "incomplete format");
+ goto onError;
+ }
+ if (c != '%') {
+ v = getnextarg(args, arglen, &argidx);
+ if (v == NULL)
+ goto onError;
+ }
+ sign = 0;
+ fill = ' ';
+ switch (c) {
- /* we know this to be a unicode object, but since we just keep
- it around to keep the object alive, having it as PyObject
- is okay */
- PyObject *str;
+ case '%':
+ pbuf = formatbuf;
+ /* presume that buffer length is at least 1 */
+ pbuf[0] = '%';
+ len = 1;
+ break;
- MarkupIterator it_markup;
-} formatteriterobject;
+ case 's':
+ case 'r':
+ if (PyUnicode_Check(v) && c == 's') {
+ temp = v;
+ Py_INCREF(temp);
+ }
+ else {
+ PyObject *unicode;
+ if (c == 's')
+ temp = PyObject_Unicode(v);
+ else
+ temp = PyObject_Repr(v);
+ if (temp == NULL)
+ goto onError;
+ if (PyUnicode_Check(temp))
+ /* nothing to do */;
+ else if (PyString_Check(temp)) {
+ /* convert to string to Unicode */
+ unicode = PyUnicode_Decode(PyString_AS_STRING(temp),
+ PyString_GET_SIZE(temp),
+ NULL,
+ "strict");
+ Py_DECREF(temp);
+ temp = unicode;
+ if (temp == NULL)
+ goto onError;
+ }
+ else {
+ Py_DECREF(temp);
+ PyErr_SetString(PyExc_TypeError,
+ "%s argument has non-string str()");
+ goto onError;
+ }
+ }
+ pbuf = PyUnicode_AS_UNICODE(temp);
+ len = PyUnicode_GET_SIZE(temp);
+ if (prec >= 0 && len > prec)
+ len = prec;
+ break;
-static void
-formatteriter_dealloc(formatteriterobject *it)
-{
- Py_XDECREF(it->str);
- PyObject_FREE(it);
-}
+ case 'i':
+ case 'd':
+ case 'u':
+ case 'o':
+ case 'x':
+ case 'X':
+ if (c == 'i')
+ c = 'd';
+ if (PyLong_Check(v)) {
+ temp = formatlong(v, flags, prec, c);
+ if (!temp)
+ goto onError;
+ pbuf = PyUnicode_AS_UNICODE(temp);
+ len = PyUnicode_GET_SIZE(temp);
+ sign = 1;
+ }
+ else {
+ pbuf = formatbuf;
+ len = formatint(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE),
+ flags, prec, c, v);
+ if (len < 0)
+ goto onError;
+ sign = 1;
+ }
+ if (flags & F_ZERO)
+ fill = '0';
+ break;
-/* returns a tuple:
- (is_markup, literal, field_name, format_spec, conversion)
- if is_markup == True:
- literal is None
- field_name is the string before the ':'
- format_spec is the string after the ':'
- conversion is either None, or the string after the '!'
- if is_markup == False:
- literal is the literal string
- field_name is None
- format_spec is None
- conversion is None
-*/
-static PyObject *
-formatteriter_next(formatteriterobject *it)
-{
- SubString literal;
- SubString field_name;
- SubString format_spec;
- Py_UNICODE conversion;
- int is_markup;
- int format_spec_needs_expanding;
- int result = MarkupIterator_next(&it->it_markup, &is_markup, &literal,
- &field_name, &format_spec, &conversion,
- &format_spec_needs_expanding);
+ case 'e':
+ case 'E':
+ case 'f':
+ case 'F':
+ case 'g':
+ case 'G':
+ if (c == 'F')
+ c = 'f';
+ pbuf = formatbuf;
+ len = formatfloat(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE),
+ flags, prec, c, v);
+ if (len < 0)
+ goto onError;
+ sign = 1;
+ if (flags & F_ZERO)
+ fill = '0';
+ break;
- /* all of the SubString objects point into it->str, so no
- memory management needs to be done on them */
- assert(0 <= result && result <= 2);
- if (result == 0) {
- /* error has already been set */
- return NULL;
- } else if (result == 1) {
- /* end of iterator */
- return NULL;
- } else {
- PyObject *is_markup_bool = NULL;
- PyObject *literal_str = NULL;
- PyObject *field_name_str = NULL;
- PyObject *format_spec_str = NULL;
- PyObject *conversion_str = NULL;
- PyObject *tuple = NULL;
+ case 'c':
+ pbuf = formatbuf;
+ len = formatchar(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE), v);
+ if (len < 0)
+ goto onError;
+ break;
- is_markup_bool = PyBool_FromLong(is_markup);
- if (!is_markup_bool)
- return NULL;
+ default:
+ PyErr_Format(PyExc_ValueError,
+ "unsupported format character '%c' (0x%x) "
+ "at index %zd",
+ (31<=c && c<=126) ? (char)c : '?',
+ (int)c,
+ (Py_ssize_t)(fmt - 1 -
+ PyUnicode_AS_UNICODE(uformat)));
+ goto onError;
+ }
+ if (sign) {
+ if (*pbuf == '-' || *pbuf == '+') {
+ sign = *pbuf++;
+ len--;
+ }
+ else if (flags & F_SIGN)
+ sign = '+';
+ else if (flags & F_BLANK)
+ sign = ' ';
+ else
+ sign = 0;
+ }
+ if (width < len)
+ width = len;
+ if (rescnt - (sign != 0) < width) {
+ reslen -= rescnt;
+ rescnt = width + fmtcnt + 100;
+ reslen += rescnt;
+ if (reslen < 0) {
+ Py_XDECREF(temp);
+ PyErr_NoMemory();
+ goto onError;
+ }
+ if (_PyUnicode_Resize(&result, reslen) < 0) {
+ Py_XDECREF(temp);
+ goto onError;
+ }
+ res = PyUnicode_AS_UNICODE(result)
+ + reslen - rescnt;
+ }
+ if (sign) {
+ if (fill != ' ')
+ *res++ = sign;
+ rescnt--;
+ if (width > len)
+ width--;
+ }
+ if ((flags & F_ALT) && (c == 'x' || c == 'X' || c == 'o')) {
+ assert(pbuf[0] == '0');
+ assert(pbuf[1] == c);
+ if (fill != ' ') {
+ *res++ = *pbuf++;
+ *res++ = *pbuf++;
+ }
+ rescnt -= 2;
+ width -= 2;
+ if (width < 0)
+ width = 0;
+ len -= 2;
+ }
+ if (width > len && !(flags & F_LJUST)) {
+ do {
+ --rescnt;
+ *res++ = fill;
+ } while (--width > len);
+ }
+ if (fill == ' ') {
+ if (sign)
+ *res++ = sign;
+ if ((flags & F_ALT) && (c == 'x' || c == 'X' || c == 'o')) {
+ assert(pbuf[0] == '0');
+ assert(pbuf[1] == c);
+ *res++ = *pbuf++;
+ *res++ = *pbuf++;
+ }
+ }
+ Py_UNICODE_COPY(res, pbuf, len);
+ res += len;
+ rescnt -= len;
+ while (--width >= len) {
+ --rescnt;
+ *res++ = ' ';
+ }
+ if (dict && (argidx < arglen) && c != '%') {
+ PyErr_SetString(PyExc_TypeError,
+ "not all arguments converted during string formatting");
+ Py_XDECREF(temp);
+ goto onError;
+ }
+ Py_XDECREF(temp);
+ } /* '%' */
+ } /* until end */
+ if (argidx < arglen && !dict) {
+ PyErr_SetString(PyExc_TypeError,
+ "not all arguments converted during string formatting");
+ goto onError;
+ }
- if (is_markup) {
- /* field_name, format_spec, and conversion are
- returned */
- literal_str = Py_None;
- Py_INCREF(literal_str);
+ if (_PyUnicode_Resize(&result, reslen - rescnt) < 0)
+ goto onError;
+ if (args_owned) {
+ Py_DECREF(args);
+ }
+ Py_DECREF(uformat);
+ return (PyObject *)result;
- field_name_str = SubString_new_object(&field_name);
- if (field_name_str == NULL)
- goto error;
+ onError:
+ Py_XDECREF(result);
+ Py_DECREF(uformat);
+ if (args_owned) {
+ Py_DECREF(args);
+ }
+ return NULL;
+}
- format_spec_str = SubString_new_object(&format_spec);
- if (format_spec_str == NULL)
- goto error;
+static PyBufferProcs unicode_as_buffer = {
+ (getbufferproc) unicode_buffer_getbuffer,
+ NULL,
+};
- /* if the conversion is not specified, return
- a None, otherwise create a one length
- string with the conversion characater */
- if (conversion == '\0') {
- conversion_str = Py_None;
- Py_INCREF(conversion_str);
- } else
- conversion_str = PyUnicode_FromUnicode(&conversion,
- 1);
- if (conversion_str == NULL)
- goto error;
- } else {
- /* only literal is returned */
- literal_str = SubString_new_object(&literal);
- if (literal_str == NULL)
- goto error;
+static PyObject *
+unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
- field_name_str = Py_None;
- format_spec_str = Py_None;
- conversion_str = Py_None;
+static PyObject *
+unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+ PyObject *x = NULL;
+ static char *kwlist[] = {"object", "encoding", "errors", 0};
+ char *encoding = NULL;
+ char *errors = NULL;
- Py_INCREF(field_name_str);
- Py_INCREF(format_spec_str);
- Py_INCREF(conversion_str);
- }
- tuple = PyTuple_Pack(5, is_markup_bool, literal_str,
- field_name_str, format_spec_str,
- conversion_str);
- error:
- Py_XDECREF(is_markup_bool);
- Py_XDECREF(literal_str);
- Py_XDECREF(field_name_str);
- Py_XDECREF(format_spec_str);
- Py_XDECREF(conversion_str);
- return tuple;
- }
+ if (type != &PyUnicode_Type)
+ return unicode_subtype_new(type, args, kwds);
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:unicode",
+ kwlist, &x, &encoding, &errors))
+ return NULL;
+ if (x == NULL)
+ return (PyObject *)_PyUnicode_New(0);
+ if (encoding == NULL && errors == NULL)
+ return PyObject_Unicode(x);
+ else
+ return PyUnicode_FromEncodedObject(x, encoding, errors);
}
-static PyMethodDef formatteriter_methods[] = {
- {NULL, NULL} /* sentinel */
-};
-
-PyTypeObject PyFormatterIter_Type = {
- PyVarObject_HEAD_INIT(&PyType_Type, 0)
- "formatteriterator", /* tp_name */
- sizeof(formatteriterobject), /* tp_basicsize */
- 0, /* tp_itemsize */
- /* methods */
- (destructor)formatteriter_dealloc, /* tp_dealloc */
- 0, /* tp_print */
- 0, /* tp_getattr */
- 0, /* tp_setattr */
- 0, /* tp_compare */
- 0, /* tp_repr */
- 0, /* tp_as_number */
- 0, /* tp_as_sequence */
- 0, /* tp_as_mapping */
- 0, /* tp_hash */
- 0, /* tp_call */
- 0, /* tp_str */
- PyObject_GenericGetAttr, /* tp_getattro */
- 0, /* tp_setattro */
- 0, /* tp_as_buffer */
- Py_TPFLAGS_DEFAULT, /* tp_flags */
- 0, /* tp_doc */
- 0, /* tp_traverse */
- 0, /* tp_clear */
- 0, /* tp_richcompare */
- 0, /* tp_weaklistoffset */
- PyObject_SelfIter, /* tp_iter */
- (iternextfunc)formatteriter_next, /* tp_iternext */
- formatteriter_methods, /* tp_methods */
- 0,
-};
-
-PyObject *
-_PyUnicode_FormatterIterator(PyObject *str)
+static PyObject *
+unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
- formatteriterobject *it;
+ PyUnicodeObject *tmp, *pnew;
+ Py_ssize_t n;
- assert(PyUnicode_Check(str));
- it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
- if (it == NULL)
+ assert(PyType_IsSubtype(type, &PyUnicode_Type));
+ tmp = (PyUnicodeObject *)unicode_new(&PyUnicode_Type, args, kwds);
+ if (tmp == NULL)
+ return NULL;
+ assert(PyUnicode_Check(tmp));
+ pnew = (PyUnicodeObject *) type->tp_alloc(type, n = tmp->length);
+ if (pnew == NULL) {
+ Py_DECREF(tmp);
return NULL;
+ }
+ pnew->str = PyMem_NEW(Py_UNICODE, n+1);
+ if (pnew->str == NULL) {
+ _Py_ForgetReference((PyObject *)pnew);
+ PyObject_Del(pnew);
+ Py_DECREF(tmp);
+ return PyErr_NoMemory();
+ }
+ Py_UNICODE_COPY(pnew->str, tmp->str, n+1);
+ pnew->length = n;
+ pnew->hash = tmp->hash;
+ Py_DECREF(tmp);
+ return (PyObject *)pnew;
+}
- /* take ownership, give the object to the iterator */
- Py_INCREF(str);
- it->str = str;
+PyDoc_STRVAR(unicode_doc,
+"str(string [, encoding[, errors]]) -> object\n\
+\n\
+Create a new string object from the given encoded string.\n\
+encoding defaults to the current default string encoding.\n\
+errors can be 'strict', 'replace' or 'ignore' and defaults to 'strict'.");
- /* initialize the contained MarkupIterator */
- MarkupIterator_init(&it->it_markup,
- PyUnicode_AS_UNICODE(str),
- PyUnicode_GET_SIZE(str));
+static PyObject *unicode_iter(PyObject *seq);
- return (PyObject *)it;
-}
+PyTypeObject PyUnicode_Type = {
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
+ "str", /* tp_name */
+ sizeof(PyUnicodeObject), /* tp_size */
+ 0, /* tp_itemsize */
+ /* Slots */
+ (destructor)unicode_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ unicode_repr, /* tp_repr */
+ &unicode_as_number, /* tp_as_number */
+ &unicode_as_sequence, /* tp_as_sequence */
+ &unicode_as_mapping, /* tp_as_mapping */
+ (hashfunc) unicode_hash, /* tp_hash*/
+ 0, /* tp_call*/
+ (reprfunc) unicode_str, /* tp_str */
+ PyObject_GenericGetAttr, /* tp_getattro */
+ 0, /* tp_setattro */
+ &unicode_as_buffer, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
+ Py_TPFLAGS_UNICODE_SUBCLASS, /* tp_flags */
+ unicode_doc, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ PyUnicode_RichCompare, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ unicode_iter, /* tp_iter */
+ 0, /* tp_iternext */
+ unicode_methods, /* tp_methods */
+ 0, /* tp_members */
+ 0, /* tp_getset */
+ &PyBaseString_Type, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ 0, /* tp_init */
+ 0, /* tp_alloc */
+ unicode_new, /* tp_new */
+ PyObject_Del, /* tp_free */
+};
-/********************* FieldName Iterator ************************/
+/* Initialize the Unicode implementation */
-/* this is used to implement string.Formatter.vparse(). it parses
- the field name into attribute and item values. */
+void _PyUnicode_Init(void)
+{
+ int i;
-typedef struct {
- PyObject_HEAD
+ /* XXX - move this array to unicodectype.c ? */
+ Py_UNICODE linebreak[] = {
+ 0x000A, /* LINE FEED */
+ 0x000D, /* CARRIAGE RETURN */
+ 0x001C, /* FILE SEPARATOR */
+ 0x001D, /* GROUP SEPARATOR */
+ 0x001E, /* RECORD SEPARATOR */
+ 0x0085, /* NEXT LINE */
+ 0x2028, /* LINE SEPARATOR */
+ 0x2029, /* PARAGRAPH SEPARATOR */
+ };
+
+ /* Init the implementation */
+ unicode_freelist = NULL;
+ unicode_freelist_size = 0;
+ unicode_empty = _PyUnicode_New(0);
+ if (!unicode_empty)
+ return;
- /* we know this to be a unicode object, but since we just keep
- it around to keep the object alive, having it as PyObject
- is okay */
- PyObject *str;
+ for (i = 0; i < 256; i++)
+ unicode_latin1[i] = NULL;
+ if (PyType_Ready(&PyUnicode_Type) < 0)
+ Py_FatalError("Can't initialize 'unicode'");
- FieldNameIterator it_field;
-} fieldnameiterobject;
+ /* initialize the linebreak bloom filter */
+ bloom_linebreak = make_bloom_mask(
+ linebreak, sizeof(linebreak) / sizeof(linebreak[0])
+ );
-static void
-fieldnameiter_dealloc(fieldnameiterobject *it)
-{
- Py_XDECREF(it->str);
- PyObject_FREE(it);
+ PyType_Ready(&EncodingMapType);
}
-/* returns a tuple:
- (is_attr, value)
- is_attr is true if we used attribute syntax (e.g., '.foo')
- false if we used index syntax (e.g., '[foo]')
- value is an integer or string
-*/
-static PyObject *
-fieldnameiter_next(fieldnameiterobject *it)
+/* Finalize the Unicode implementation */
+
+void
+_PyUnicode_Fini(void)
{
- int result;
- int is_attr;
- Py_ssize_t idx;
- SubString name;
+ PyUnicodeObject *u;
+ int i;
- result = FieldNameIterator_next(&it->it_field, &is_attr,
- &idx, &name);
- if (result == 0 || result == 1) {
- /* if 0, error has already been set, if 1, iterator is empty */
- return NULL;
- } else {
- PyObject* result = NULL;
- PyObject* is_attr_obj = NULL;
- PyObject* obj = NULL;
+ Py_XDECREF(unicode_empty);
+ unicode_empty = NULL;
- is_attr_obj = PyBool_FromLong(is_attr);
- if (is_attr_obj == NULL)
- goto error;
+ for (i = 0; i < 256; i++) {
+ if (unicode_latin1[i]) {
+ Py_DECREF(unicode_latin1[i]);
+ unicode_latin1[i] = NULL;
+ }
+ }
- /* either an integer or a string */
- if (idx != -1)
- obj = PyInt_FromSsize_t(idx);
- else
- obj = STRINGLIB_NEW(name.ptr, name.end - name.ptr);
- if (obj == NULL)
- goto error;
+ for (u = unicode_freelist; u != NULL;) {
+ PyUnicodeObject *v = u;
+ u = *(PyUnicodeObject **)u;
+ if (v->str)
+ PyMem_DEL(v->str);
+ Py_XDECREF(v->defenc);
+ PyObject_Del(v);
+ }
+ unicode_freelist = NULL;
+ unicode_freelist_size = 0;
+}
- /* return a tuple of values */
- result = PyTuple_Pack(2, is_attr_obj, obj);
- if (result == NULL)
- goto error;
+void
+PyUnicode_InternInPlace(PyObject **p)
+{
+ register PyUnicodeObject *s = (PyUnicodeObject *)(*p);
+ PyObject *t;
+ if (s == NULL || !PyUnicode_Check(s))
+ Py_FatalError(
+ "PyUnicode_InternInPlace: unicode strings only please!");
+ /* If it's a subclass, we don't really know what putting
+ it in the interned dict might do. */
+ if (!PyUnicode_CheckExact(s))
+ return;
+ if (PyUnicode_CHECK_INTERNED(s))
+ return;
+ if (interned == NULL) {
+ interned = PyDict_New();
+ if (interned == NULL) {
+ PyErr_Clear(); /* Don't leave an exception */
+ return;
+ }
+ }
+ /* It might be that the GetItem call fails even
+ though the key is present in the dictionary,
+ namely when this happens during a stack overflow. */
+ Py_ALLOW_RECURSION
+ t = PyDict_GetItem(interned, (PyObject *)s);
+ Py_END_ALLOW_RECURSION
- return result;
+ if (t) {
+ Py_INCREF(t);
+ Py_DECREF(*p);
+ *p = t;
+ return;
+ }
- error:
- Py_XDECREF(result);
- Py_XDECREF(is_attr_obj);
- Py_XDECREF(obj);
- return NULL;
- }
- return NULL;
+ PyThreadState_GET()->recursion_critical = 1;
+ if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
+ PyErr_Clear();
+ PyThreadState_GET()->recursion_critical = 0;
+ return;
+ }
+ PyThreadState_GET()->recursion_critical = 0;
+ /* The two references in interned are not counted by refcnt.
+ The deallocator will take care of this */
+ Py_Refcnt(s) -= 2;
+ PyUnicode_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
}
-static PyMethodDef fieldnameiter_methods[] = {
- {NULL, NULL} /* sentinel */
-};
-
-static PyTypeObject PyFieldNameIter_Type = {
- PyVarObject_HEAD_INIT(&PyType_Type, 0)
- "fieldnameiterator", /* tp_name */
- sizeof(fieldnameiterobject), /* tp_basicsize */
- 0, /* tp_itemsize */
- /* methods */
- (destructor)fieldnameiter_dealloc, /* tp_dealloc */
- 0, /* tp_print */
- 0, /* tp_getattr */
- 0, /* tp_setattr */
- 0, /* tp_compare */
- 0, /* tp_repr */
- 0, /* tp_as_number */
- 0, /* tp_as_sequence */
- 0, /* tp_as_mapping */
- 0, /* tp_hash */
- 0, /* tp_call */
- 0, /* tp_str */
- PyObject_GenericGetAttr, /* tp_getattro */
- 0, /* tp_setattro */
- 0, /* tp_as_buffer */
- Py_TPFLAGS_DEFAULT, /* tp_flags */
- 0, /* tp_doc */
- 0, /* tp_traverse */
- 0, /* tp_clear */
- 0, /* tp_richcompare */
- 0, /* tp_weaklistoffset */
- PyObject_SelfIter, /* tp_iter */
- (iternextfunc)fieldnameiter_next, /* tp_iternext */
- fieldnameiter_methods, /* tp_methods */
- 0};
+void
+PyUnicode_InternImmortal(PyObject **p)
+{
+ PyUnicode_InternInPlace(p);
+ if (PyUnicode_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
+ PyUnicode_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
+ Py_INCREF(*p);
+ }
+}
PyObject *
-_PyUnicode_FormatterFieldNameSplit(PyObject *field_name)
+PyUnicode_InternFromString(const char *cp)
{
- SubString first;
- Py_ssize_t first_idx;
- fieldnameiterobject *it;
-
- PyObject *first_obj = NULL;
- PyObject *result = NULL;
-
- assert(PyUnicode_Check(field_name));
- it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
- if (it == NULL)
- return NULL;
-
- /* take ownership, give the object to the iterator. this is
- just to keep the field_name alive */
- Py_INCREF(field_name);
- it->str = field_name;
+ PyObject *s = PyUnicode_FromString(cp);
+ if (s == NULL)
+ return NULL;
+ PyUnicode_InternInPlace(&s);
+ return s;
+}
- if (!field_name_split(STRINGLIB_STR(field_name),
- STRINGLIB_LEN(field_name),
- &first, &first_idx, &it->it_field))
- goto error;
+void _Py_ReleaseInternedUnicodeStrings(void)
+{
+ PyObject *keys;
+ PyUnicodeObject *s;
+ Py_ssize_t i, n;
+ Py_ssize_t immortal_size = 0, mortal_size = 0;
- /* first becomes an integer, if possible, else a string */
- if (first_idx != -1)
- first_obj = PyInt_FromSsize_t(first_idx);
- else
- /* convert "first" into a string object */
- first_obj = STRINGLIB_NEW(first.ptr, first.end - first.ptr);
- if (first_obj == NULL)
- goto error;
+ if (interned == NULL || !PyDict_Check(interned))
+ return;
+ keys = PyDict_Keys(interned);
+ if (keys == NULL || !PyList_Check(keys)) {
+ PyErr_Clear();
+ return;
+ }
- /* return a tuple of values */
- result = PyTuple_Pack(2, first_obj, it);
+ /* Since _Py_ReleaseInternedUnicodeStrings() is intended to help a leak
+ detector, interned unicode strings are not forcibly deallocated;
+ rather, we give them their stolen references back, and then clear
+ and DECREF the interned dict. */
-error:
- Py_XDECREF(it);
- Py_XDECREF(first_obj);
- return result;
+ n = PyList_GET_SIZE(keys);
+ fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
+ n);
+ for (i = 0; i < n; i++) {
+ s = (PyUnicodeObject *) PyList_GET_ITEM(keys, i);
+ switch (s->state) {
+ case SSTATE_NOT_INTERNED:
+ /* XXX Shouldn't happen */
+ break;
+ case SSTATE_INTERNED_IMMORTAL:
+ Py_Refcnt(s) += 1;
+ immortal_size += s->length;
+ break;
+ case SSTATE_INTERNED_MORTAL:
+ Py_Refcnt(s) += 2;
+ mortal_size += s->length;
+ break;
+ default:
+ Py_FatalError("Inconsistent interned string state.");
+ }
+ s->state = SSTATE_NOT_INTERNED;
+ }
+ fprintf(stderr, "total size of all interned strings: "
+ "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
+ "mortal/immortal\n", mortal_size, immortal_size);
+ Py_DECREF(keys);
+ PyDict_Clear(interned);
+ Py_DECREF(interned);
+ interned = NULL;
}
+
/********************* Unicode Iterator **************************/
typedef struct {
Modified: python/branches/py3k/Python/sysmodule.c
==============================================================================
--- python/branches/py3k/Python/sysmodule.c (original)
+++ python/branches/py3k/Python/sysmodule.c Mon Aug 27 13:28:18 2007
@@ -660,54 +660,6 @@
return _PyThread_CurrentFrames();
}
-/* sys_formatter_iterator is used to implement
- string.Formatter.vformat. it parses a string and returns tuples
- describing the parsed elements. see unicodeobject.c's
- _PyUnicode_FormatterIterator for details */
-static PyObject *
-sys_formatter_iterator(PyObject *self, PyObject *args)
-{
- /* in 2.6, check type and dispatch to unicode or string
- accordingly */
- PyObject *str;
-
- if (!PyArg_ParseTuple(args, "O:_formatter_iterator", &str))
- return NULL;
-
- if (!PyUnicode_Check(str)) {
- PyErr_SetString(PyExc_TypeError,
- "_formatter_iterator expects unicode object");
- return NULL;
- }
-
- return _PyUnicode_FormatterIterator(str);
-}
-
-/* sys_formatter_field_name_split is used to implement
- string.Formatter.vformat. it takes an PEP 3101 "field name", and
- returns a tuple of (first, rest): "first", the part before the
- first '.' or '['; and "rest", an iterator for the rest of the field
- name. see unicodeobjects' _PyUnicode_FormatterFieldNameSplit for
- details */
-static PyObject *
-sys_formatter_field_name_split(PyObject *self, PyObject *args)
-{
- PyObject *field_name;
-
- if (!PyArg_ParseTuple(args, "O:_formatter_field_name_split",
- &field_name))
- return NULL;
-
- if (!PyUnicode_Check(field_name)) {
- PyErr_SetString(PyExc_TypeError, "_formatter_field_name_split "
- "expects unicode object");
- return NULL;
- }
-
- return _PyUnicode_FormatterFieldNameSplit(field_name);
-}
-
-
PyDoc_STRVAR(call_tracing_doc,
"call_tracing(func, args) -> object\n\
\n\
@@ -772,9 +724,6 @@
callstats_doc},
{"_current_frames", sys_current_frames, METH_NOARGS,
current_frames_doc},
- {"_formatter_parser", sys_formatter_iterator, METH_VARARGS},
- {"_formatter_field_name_split", sys_formatter_field_name_split,
- METH_VARARGS},
{"displayhook", sys_displayhook, METH_O, displayhook_doc},
{"exc_info", sys_exc_info, METH_NOARGS, exc_info_doc},
{"excepthook", sys_excepthook, METH_VARARGS, excepthook_doc},
More information about the Python-3000-checkins
mailing list