[pypy-commit] pypy default: add missing file
mattip
pypy.commits at gmail.com
Mon Sep 11 15:40:40 EDT 2017
Author: Matti Picus <matti.picus at gmail.com>
Branch:
Changeset: r92375:303d4b69c445
Date: 2017-09-11 22:39 +0300
http://bitbucket.org/pypy/pypy/changeset/303d4b69c445/
Log: add missing file
diff --git a/pypy/module/cpyext/src/unicodeobject.c b/pypy/module/cpyext/src/unicodeobject.c
new file mode 100644
--- /dev/null
+++ b/pypy/module/cpyext/src/unicodeobject.c
@@ -0,0 +1,423 @@
+
+#include "Python.h"
+static void
+makefmt(char *fmt, int longflag, int size_tflag, int zeropad, int width, int precision, char c)
+{
+ *fmt++ = '%';
+ if (width) {
+ if (zeropad)
+ *fmt++ = '0';
+ fmt += sprintf(fmt, "%d", width);
+ }
+ if (precision)
+ fmt += sprintf(fmt, ".%d", precision);
+ if (longflag)
+ *fmt++ = 'l';
+ else if (size_tflag) {
+ char *f = PY_FORMAT_SIZE_T;
+ while (*f)
+ *fmt++ = *f++;
+ }
+ *fmt++ = c;
+ *fmt = '\0';
+}
+
+#define appendstring(string) \
+ do { \
+ for (copy = string;*copy; copy++) { \
+ *s++ = (unsigned char)*copy; \
+ } \
+ } while (0)
+
+
+PyObject *
+PyUnicode_FromFormatV(const char *format, va_list vargs)
+{
+ va_list count;
+ Py_ssize_t callcount = 0;
+ PyObject **callresults = NULL;
+ PyObject **callresult = NULL;
+ Py_ssize_t n = 0;
+ int width = 0;
+ int precision = 0;
+ int zeropad;
+ const char* f;
+ Py_UNICODE *s;
+ PyObject *string;
+ /* used by sprintf */
+ char buffer[21];
+ /* use abuffer instead of buffer, if we need more space
+ * (which can happen if there's a format specifier with width). */
+ char *abuffer = NULL;
+ char *realbuffer;
+ Py_ssize_t abuffersize = 0;
+ char fmt[60]; /* should be enough for %0width.precisionld */
+ const char *copy;
+
+#ifdef VA_LIST_IS_ARRAY
+ Py_MEMCPY(count, vargs, sizeof(va_list));
+#else
+#ifdef __va_copy
+ __va_copy(count, vargs);
+#else
+ count = vargs;
+#endif
+#endif
+ /* step 1: count the number of %S/%R/%s format specifications
+ * (we call PyObject_Str()/PyObject_Repr()/PyUnicode_DecodeUTF8() for these
+ * objects once during step 3 and put the result in an array) */
+ for (f = format; *f; f++) {
+ if (*f == '%') {
+ f++;
+ while (*f && *f != '%' && !isalpha((unsigned)*f))
+ f++;
+ if (!*f)
+ break;
+ if (*f == 's' || *f=='S' || *f=='R')
+ ++callcount;
+ }
+ }
+ /* step 2: allocate memory for the results of
+ * PyObject_Str()/PyObject_Repr()/PyUnicode_DecodeUTF8() calls */
+ if (callcount) {
+ callresults = PyObject_Malloc(sizeof(PyObject *)*callcount);
+ if (!callresults) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+ callresult = callresults;
+ }
+ /* step 3: figure out how large a buffer we need */
+ for (f = format; *f; f++) {
+ if (*f == '%') {
+ const char* p = f++;
+ width = 0;
+ while (isdigit((unsigned)*f))
+ width = (width*10) + *f++ - '0';
+ precision = 0;
+ if (*f == '.') {
+ f++;
+ while (isdigit((unsigned)*f))
+ precision = (precision*10) + *f++ - '0';
+ }
+
+ /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
+ * they don't affect the amount of space we reserve.
+ */
+ if ((*f == 'l' || *f == 'z') &&
+ (f[1] == 'd' || f[1] == 'u'))
+ ++f;
+
+ switch (*f) {
+ case 'c':
+ {
+ int ordinal = va_arg(count, int);
+#ifdef Py_UNICODE_WIDE
+ if (ordinal < 0 || ordinal > 0x10ffff) {
+ PyErr_SetString(PyExc_OverflowError,
+ "%c arg not in range(0x110000) "
+ "(wide Python build)");
+ goto fail;
+ }
+#else
+ if (ordinal < 0 || ordinal > 0xffff) {
+ PyErr_SetString(PyExc_OverflowError,
+ "%c arg not in range(0x10000) "
+ "(narrow Python build)");
+ goto fail;
+ }
+#endif
+ /* fall through... */
+ }
+ case '%':
+ n++;
+ break;
+ case 'd': case 'u': case 'i': case 'x':
+ (void) va_arg(count, int);
+ if (width < precision)
+ width = precision;
+ /* 20 bytes is enough to hold a 64-bit
+ integer. Decimal takes the most space.
+ This isn't enough for octal.
+ If a width is specified we need more
+ (which we allocate later). */
+ if (width < 20)
+ width = 20;
+ n += width;
+ if (abuffersize < width)
+ abuffersize = width;
+ break;
+ case 's':
+ {
+ /* UTF-8 */
+ const char *s = va_arg(count, const char*);
+ PyObject *str = PyUnicode_DecodeUTF8(s, strlen(s), "replace");
+ if (!str)
+ goto fail;
+ n += PyUnicode_GET_SIZE(str);
+ /* Remember the str and switch to the next slot */
+ *callresult++ = str;
+ break;
+ }
+ case 'U':
+ {
+ PyObject *obj = va_arg(count, PyObject *);
+ assert(obj && PyUnicode_Check(obj));
+ n += PyUnicode_GET_SIZE(obj);
+ break;
+ }
+ case 'V':
+ {
+ PyObject *obj = va_arg(count, PyObject *);
+ const char *str = va_arg(count, const char *);
+ assert(obj || str);
+ assert(!obj || PyUnicode_Check(obj));
+ if (obj)
+ n += PyUnicode_GET_SIZE(obj);
+ else
+ n += strlen(str);
+ break;
+ }
+ case 'S':
+ {
+ PyObject *obj = va_arg(count, PyObject *);
+ PyObject *str;
+ assert(obj);
+ str = PyObject_Str(obj);
+ if (!str)
+ goto fail;
+ n += PyString_GET_SIZE(str);
+ /* Remember the str and switch to the next slot */
+ *callresult++ = str;
+ break;
+ }
+ case 'R':
+ {
+ PyObject *obj = va_arg(count, PyObject *);
+ PyObject *repr;
+ assert(obj);
+ repr = PyObject_Repr(obj);
+ if (!repr)
+ goto fail;
+ n += PyUnicode_GET_SIZE(repr);
+ /* Remember the repr and switch to the next slot */
+ *callresult++ = repr;
+ break;
+ }
+ case 'p':
+ (void) va_arg(count, int);
+ /* maximum 64-bit pointer representation:
+ * 0xffffffffffffffff
+ * so 19 characters is enough.
+ * XXX I count 18 -- what's the extra for?
+ */
+ n += 19;
+ break;
+ default:
+ /* if we stumble upon an unknown
+ formatting code, copy the rest of
+ the format string to the output
+ string. (we cannot just skip the
+ code, since there's no way to know
+ what's in the argument list) */
+ n += strlen(p);
+ goto expand;
+ }
+ } else
+ n++;
+ }
+ expand:
+ if (abuffersize > 20) {
+ /* add 1 for sprintf's trailing null byte */
+ abuffer = PyObject_Malloc(abuffersize + 1);
+ if (!abuffer) {
+ PyErr_NoMemory();
+ goto fail;
+ }
+ realbuffer = abuffer;
+ }
+ else
+ realbuffer = buffer;
+ /* step 4: fill the buffer */
+ /* Since we've analyzed how much space we need for the worst case,
+ we don't have to resize the string.
+ There can be no errors beyond this point. */
+ string = PyUnicode_FromUnicode(NULL, n);
+ if (!string)
+ goto fail;
+
+ s = PyUnicode_AS_UNICODE(string);
+ callresult = callresults;
+
+ for (f = format; *f; f++) {
+ if (*f == '%') {
+ const char* p = f++;
+ int longflag = 0;
+ int size_tflag = 0;
+ zeropad = (*f == '0');
+ /* parse the width.precision part */
+ width = 0;
+ while (isdigit((unsigned)*f))
+ width = (width*10) + *f++ - '0';
+ precision = 0;
+ if (*f == '.') {
+ f++;
+ while (isdigit((unsigned)*f))
+ precision = (precision*10) + *f++ - '0';
+ }
+ /* handle the long flag, but only for %ld and %lu.
+ others can be added when necessary. */
+ if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
+ longflag = 1;
+ ++f;
+ }
+ /* handle the size_t flag. */
+ if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
+ size_tflag = 1;
+ ++f;
+ }
+
+ switch (*f) {
+ case 'c':
+ *s++ = va_arg(vargs, int);
+ break;
+ case 'd':
+ makefmt(fmt, longflag, size_tflag, zeropad, width, precision, 'd');
+ if (longflag)
+ sprintf(realbuffer, fmt, va_arg(vargs, long));
+ else if (size_tflag)
+ sprintf(realbuffer, fmt, va_arg(vargs, Py_ssize_t));
+ else
+ sprintf(realbuffer, fmt, va_arg(vargs, int));
+ appendstring(realbuffer);
+ break;
+ case 'u':
+ makefmt(fmt, longflag, size_tflag, zeropad, width, precision, 'u');
+ if (longflag)
+ sprintf(realbuffer, fmt, va_arg(vargs, unsigned long));
+ else if (size_tflag)
+ sprintf(realbuffer, fmt, va_arg(vargs, size_t));
+ else
+ sprintf(realbuffer, fmt, va_arg(vargs, unsigned int));
+ appendstring(realbuffer);
+ break;
+ case 'i':
+ makefmt(fmt, 0, 0, zeropad, width, precision, 'i');
+ sprintf(realbuffer, fmt, va_arg(vargs, int));
+ appendstring(realbuffer);
+ break;
+ case 'x':
+ makefmt(fmt, 0, 0, zeropad, width, precision, 'x');
+ sprintf(realbuffer, fmt, va_arg(vargs, int));
+ appendstring(realbuffer);
+ break;
+ case 's':
+ {
+ /* unused, since we already have the result */
+ (void) va_arg(vargs, char *);
+ Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(*callresult),
+ PyUnicode_GET_SIZE(*callresult));
+ s += PyUnicode_GET_SIZE(*callresult);
+ /* We're done with the unicode()/repr() => forget it */
+ Py_DECREF(*callresult);
+ /* switch to next unicode()/repr() result */
+ ++callresult;
+ break;
+ }
+ case 'U':
+ {
+ PyObject *obj = va_arg(vargs, PyObject *);
+ Py_ssize_t size = PyUnicode_GET_SIZE(obj);
+ Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(obj), size);
+ s += size;
+ break;
+ }
+ case 'V':
+ {
+ PyObject *obj = va_arg(vargs, PyObject *);
+ const char *str = va_arg(vargs, const char *);
+ if (obj) {
+ Py_ssize_t size = PyUnicode_GET_SIZE(obj);
+ Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(obj), size);
+ s += size;
+ } else {
+ appendstring(str);
+ }
+ break;
+ }
+ case 'S':
+ case 'R':
+ {
+ const char *str = PyString_AS_STRING(*callresult);
+ /* unused, since we already have the result */
+ (void) va_arg(vargs, PyObject *);
+ appendstring(str);
+ /* We're done with the unicode()/repr() => forget it */
+ Py_DECREF(*callresult);
+ /* switch to next unicode()/repr() result */
+ ++callresult;
+ break;
+ }
+ case 'p':
+ sprintf(buffer, "%p", va_arg(vargs, void*));
+ /* %p is ill-defined: ensure leading 0x. */
+ if (buffer[1] == 'X')
+ buffer[1] = 'x';
+ else if (buffer[1] != 'x') {
+ memmove(buffer+2, buffer, strlen(buffer)+1);
+ buffer[0] = '0';
+ buffer[1] = 'x';
+ }
+ appendstring(buffer);
+ break;
+ case '%':
+ *s++ = '%';
+ break;
+ default:
+ appendstring(p);
+ goto end;
+ }
+ } else
+ *s++ = *f;
+ }
+
+ end:
+ if (callresults)
+ PyObject_Free(callresults);
+ if (abuffer)
+ PyObject_Free(abuffer);
+ PyUnicode_Resize(&string, s - PyUnicode_AS_UNICODE(string));
+ return string;
+ fail:
+ if (callresults) {
+ PyObject **callresult2 = callresults;
+ while (callresult2 < callresult) {
+ Py_DECREF(*callresult2);
+ ++callresult2;
+ }
+ PyObject_Free(callresults);
+ }
+ if (abuffer)
+ PyObject_Free(abuffer);
+ return NULL;
+}
+
+#undef appendstring
+
+PyObject *
+PyUnicode_FromFormat(const char *format, ...)
+{
+ PyObject* ret;
+ va_list vargs;
+
+#ifdef HAVE_STDARG_PROTOTYPES
+ va_start(vargs, format);
+#else
+ va_start(vargs);
+#endif
+ ret = PyUnicode_FromFormatV(format, vargs);
+ va_end(vargs);
+ return ret;
+}
+
+
More information about the pypy-commit
mailing list