[Csv] test coverage

Skip Montanaro skip at pobox.com
Mon Feb 3 17:57:04 CET 2003


Attached is the output of running a gcov-instrumented version of Python and
the _csv module against the current test suite.

FYI.

Skip

-------------- next part --------------
                /* TODO:
                   + Add reader() and writer() functions which return CSV
                     reader/writer objects which implement the PEP interface:
                
                     csvreader = csv.reader(file("blah.csv", "rb"), kwargs)
                     for row in csvreader:
                         process(row)
                
                     csvwriter = csv.writer(file("some.csv", "wb"), kwargs)
                     for row in someiter:
                         csvwriter.write(row)
                
                   + Add CsvWriter.writelines(someiter)
                */
                
                #include "Python.h"
                #include "structmember.h"
                
                /* begin 2.2 compatibility macros */
                #ifndef PyDoc_STRVAR
                /* Define macros for inline documentation. */
                #define PyDoc_VAR(name) static char name[]
                #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
                #ifdef WITH_DOC_STRINGS
                #define PyDoc_STR(str) str
                #else
                #define PyDoc_STR(str) ""
                #endif
                #endif /* ifndef PyDoc_STRVAR */
                
                #ifndef PyMODINIT_FUNC
                #       if defined(__cplusplus)
                #               define PyMODINIT_FUNC extern "C" void
                #       else /* __cplusplus */
                #               define PyMODINIT_FUNC void
                #       endif /* __cplusplus */
                #endif
                /* end 2.2 compatibility macros */
                
                static PyObject *error_obj;     /* CSV exception */
                
                typedef enum {
                        START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD, 
                        IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD
                } ParserState;
                
                typedef enum {
                        QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
                } QuoteStyle;
                
                typedef struct {
                        PyObject_HEAD
                
                        int doublequote;        /* is " represented by ""? */
                        char delimiter;         /* field separator */
                        int have_quotechar;     /* is a quotechar defined */
                        char quotechar;         /* quote character */
                        int have_escapechar;    /* is an escapechar defined */
                        char escapechar;        /* escape character */
                        int skipinitialspace;   /* ignore spaces following delimiter? */
                        PyObject *lineterminator; /* string to write between records */
                        QuoteStyle quoting;     /* style of quoting to write */
                
                        ParserState state;      /* current CSV parse state */
                        PyObject *fields;       /* field list for current record */
                
                        int autoclear;          /* should fields be cleared on next
                                                   parse() after exception? */
                        int strict;             /* raise exception on bad CSV */
                
                        int had_parse_error;    /* did we have a parse error? */
                
                        char *field;            /* build current field in here */
                        int field_size;         /* size of allocated buffer */
                        int field_len;          /* length of current field */
                
                        char *rec;              /* buffer for parser.join */
                        int rec_size;           /* size of allocated record */
                        int rec_len;            /* length of record */
                        int num_fields;         /* number of fields in record */
                } ParserObj;
                
                staticforward PyTypeObject Parser_Type;
                
                static PyObject *
                raise_exception(char *fmt, ...)
           1    {
           1            va_list ap;
           1            char msg[512];
           1            PyObject *pymsg;
                
           1            va_start(ap, fmt);
                #ifdef _WIN32
                        _vsnprintf(msg, sizeof(msg), fmt, ap);
                #else
           1            vsnprintf(msg, sizeof(msg), fmt, ap);
                #endif
           1            va_end(ap);
           1            pymsg = PyString_FromString(msg);
           1            PyErr_SetObject(error_obj, pymsg);
           1            Py_XDECREF(pymsg);
                
           1            return NULL;
                }
                
                static void
                parse_save_field(ParserObj *self)
          39    {
          39            PyObject *field;
                
          39            field = PyString_FromStringAndSize(self->field, self->field_len);
          39            if (field != NULL) {
          39                    PyList_Append(self->fields, field);
          39                    Py_XDECREF(field);
                        }
          39            self->field_len = 0;
                }
                
                static int
                parse_grow_buff(ParserObj *self)
          12    {
          12            if (self->field_size == 0) {
          12                    self->field_size = 4096;
          12                    self->field = PyMem_Malloc(self->field_size);
                        }
                        else {
      ######                    self->field_size *= 2;
      ######                    self->field = PyMem_Realloc(self->field, self->field_size);
                        }
          12            if (self->field == NULL) {
      ######                    PyErr_NoMemory();
      ######                    return 0;
                        }
          12            return 1;
                }
                
                static void
                parse_add_char(ParserObj *self, char c)
         192    {
         192            if (self->field_len == self->field_size && !parse_grow_buff(self))
      ######                    return;
         192            self->field[self->field_len++] = c;
                }
                
                static void
                parse_prepend_char(ParserObj *self, char c)
      ######    {
      ######            if (self->field_len == self->field_size && !parse_grow_buff(self))
      ######                    return;
      ######            memmove(self->field + 1, self->field, self->field_len);
      ######            self->field[0] = c;
      ######            self->field_len++;
                }
                
                static void
                parse_process_char(ParserObj *self, char c)
         262    {
         262            switch (self->state) {
                        case START_RECORD:
                                /* start of record */
          17                    if (c == '\0')
                                        /* empty line - return [] */
      ######                            break;
                                /* normal character - handle as START_FIELD */
          17                    self->state = START_FIELD;
                                /* fallthru */
                        case START_FIELD:
                                /* expecting field */
          39                    if (c == '\0') {
                                        /* save empty field - return [fields] */
           3                            parse_save_field(self);
           3                            self->state = START_RECORD;
                                }
          36                    else if (c == self->quotechar) {
                                        /* start quoted field */
          12                            self->state = IN_QUOTED_FIELD;
                                }
          24                    else if (c == self->escapechar) {
                                        /* possible escaped character */
      ######                            self->state = ESCAPED_CHAR;
                                }
          24                    else if (c == self->delimiter) {
                                        /* save empty field */
           2                            parse_save_field(self);
                                }
          22                    else if (c == ' ' && self->skipinitialspace)
                                        /* ignore space at start of field */
                                        ;
                                else {
                                        /* begin new unquoted field */
          22                            parse_add_char(self, c);
          22                            self->state = IN_FIELD;
                                }
          22                    break;
                
                        case ESCAPED_CHAR:
      ######                    if (c != self->escapechar && c != self->delimiter &&
                                    c != self->quotechar)
      ######                            parse_add_char(self, self->escapechar);
      ######                    parse_add_char(self, c);
      ######                    self->state = IN_FIELD;
      ######                    break;
                
                        case IN_FIELD:
                                /* in unquoted field */
          42                    if (c == '\0') {
                                        /* end of line - return [fields] */
           8                            parse_save_field(self);
           8                            self->state = START_RECORD;
                                }
          34                    else if (c == self->escapechar) {
                                        /* possible escaped character */
      ######                            self->state = ESCAPED_CHAR;
                                }
          34                    else if (c == self->delimiter) {
                                        /* save field - wait for new field */
          16                            parse_save_field(self);
          16                            self->state = START_FIELD;
                                }
                                else {
                                        /* normal character - save in field */
          18                            parse_add_char(self, c);
                                }
          18                    break;
                
                        case IN_QUOTED_FIELD:
                                /* in quoted field */
         162                    if (c == '\0') {
                                        /* end of line - save '\n' in field */
           2                            parse_add_char(self, '\n');
                                }
         160                    else if (c == self->escapechar) {
                                        /* Possible escape character */
      ######                            self->state = ESCAPE_IN_QUOTED_FIELD;
                                }
         160                    else if (c == self->quotechar) {
          19                            if (self->doublequote) {
                                                /* doublequote; " represented by "" */
          19                                    self->state = QUOTE_IN_QUOTED_FIELD;
                                        }
                                        else {
                                                /* end of quote part of field */
      ######                                    self->state = IN_FIELD;
                                        }
                                }
                                else {
                                        /* normal character - save in field */
         141                            parse_add_char(self, c);
                                }
         141                    break;
                
                        case ESCAPE_IN_QUOTED_FIELD:
      ######                    if (c != self->escapechar && c != self->delimiter &&
                                    c != self->quotechar)
      ######                            parse_add_char(self, self->escapechar);
      ######                    parse_add_char(self, c);
      ######                    self->state = IN_QUOTED_FIELD;
      ######                    break;
                
                        case QUOTE_IN_QUOTED_FIELD:
                                /* doublequote - seen a quote in an quoted field */
          19                    if (self->have_quotechar && c == self->quotechar) {
                                        /* save "" as " */
           7                            parse_add_char(self, c);
           7                            self->state = IN_QUOTED_FIELD;
                                }
          12                    else if (c == self->delimiter) {
                                        /* save field - wait for new field */
           4                            parse_save_field(self);
           4                            self->state = START_FIELD;
                                }
           8                    else if (c == '\0') {
                                        /* end of line - return [fields] */
           6                            parse_save_field(self);
           6                            self->state = START_RECORD;
                                }
           2                    else if (!self->strict) {
           2                            parse_add_char(self, c);
           2                            self->state = IN_FIELD;
                                }
                                else {
                                        /* illegal */
      ######                            self->had_parse_error = 1;
      ######                            raise_exception("%c expected after %c", 
                                                        self->delimiter, self->quotechar);
                                }
                                break;
                
                        }
                }
                
                static void
                clear_fields_and_status(ParserObj *self)
      ######    {
      ######            if (self->fields) {
      ######                    Py_XDECREF(self->fields);
                        }
      ######            self->fields = PyList_New(0);
      ######            self->field_len = 0;
      ######            self->state = START_RECORD;
                
      ######            self->had_parse_error = 0;
                }
                
                /* ---------------------------------------------------------------- */
                
                PyDoc_STRVAR(Parser_parse_doc,
                "parse(s) -> list of strings\n"
                "\n"
                "CSV parse the single line in the string s and return a\n"
                "list of string fields.  If the CSV record contains multi-line\n"
                "fields, the function will return None until all lines of the\n"
                "record have been parsed.");
                
                static PyObject *
                Parser_parse(ParserObj *self, PyObject *args)
          19    {
          19            char *line;
                
          19            if (!PyArg_ParseTuple(args, "s", &line))
      ######                    return NULL;
                
          19            if (self->autoclear && self->had_parse_error)
      ######                    clear_fields_and_status(self);
                
                        /* Process line of text - send '\0' to processing code to
                           represent end of line.  End of line which is not at end of
                           string is an error. */
         262            while (*line) {
         246                    char c;
                
         246                    c = *line++;
         246                    if (c == '\r') {
      ######                            c = *line++;
      ######                            if (c == '\0')
                                                /* macintosh end of line */
      ######                                    break;
      ######                            if (c == '\n') {
      ######                                    c = *line++;
      ######                                    if (c == '\0')
                                                        /* DOS end of line */
      ######                                            break;
                                        }
      ######                            self->had_parse_error = 1;
      ######                            return raise_exception("newline inside string");
                                }
         246                    if (c == '\n') {
           3                            c = *line++;
           3                            if (c == '\0')
                                                /* unix end of line */
           3                                    break;
      ######                            self->had_parse_error = 1;
      ######                            return raise_exception("newline inside string");
                                }
         243                    parse_process_char(self, c);
         243                    if (PyErr_Occurred())
      ######                            return NULL;
                        }
          19            parse_process_char(self, '\0');
                
          19            if (self->state == START_RECORD) {
          17                    PyObject *fields = self->fields;
          17                    self->fields = PyList_New(0);
          17                    return fields;
                        }
                
           2            Py_INCREF(Py_None);
           2            return Py_None;
                }
                
                /* ---------------------------------------------------------------- */
                
                PyDoc_STRVAR(Parser_clear_doc,
                "clear() -> None\n"
                "\n"
                "Discard partially parsed record.  This must be called to reset\n"
                "parser state after an exception.");
                
                static PyObject *
                Parser_clear(ParserObj *self)
      ######    {
      ######            clear_fields_and_status(self);
                
      ######            Py_INCREF(Py_None);
      ######            return Py_None;
                }
                
                /* ---------------------------------------------------------------- */
                static void
                join_reset(ParserObj *self)
          11    {
          11            self->rec_len = 0;
          11            self->num_fields = 0;
                }
                
                #define MEM_INCR 32768
                
                /* Calculate new record length or append field to record.  Return new
                 * record length.
                 */
                static int
                join_append_data(ParserObj *self, char *field, int quote_empty,
                                 int *quoted, int copy_phase)
         270    {
         270            int i, rec_len;
                
         270            rec_len = self->rec_len;
                
                        /* If this is not the first field we need a field separator.
                         */
         270            if (self->num_fields > 0) {
         248                    if (copy_phase)
         124                            self->rec[rec_len] = self->delimiter;
         248                    rec_len++;
                        }
                        /* Handle preceding quote.
                         */
         270            switch (self->quoting) {
                        case QUOTE_ALL:
      ######                    *quoted = 1;
      ######                    if (copy_phase)
      ######                            self->rec[rec_len] = self->quotechar;
      ######                    rec_len++;
      ######                    break;
                        case QUOTE_MINIMAL:
                        case QUOTE_NONNUMERIC:
                                /* We only know about quoted in the copy phase.
                                 */
         270                    if (copy_phase && *quoted) {
           3                            self->rec[rec_len] = self->quotechar;
           3                            rec_len++;
                                }
                                break;
                        case QUOTE_NONE:
         270                    break;
                        }
                        /* Copy/count field data.
                         */
        1090            for (i = 0;; i++) {
        1090                    char c = field[i];
                
        1090                    if (c == '\0')
         270                            break;
                                /* If in doublequote mode we escape quote chars with a
                                 * quote.
                                 */
         820                    if (self->have_quotechar
                                    && c == self->quotechar && self->doublequote) {
           4                            if (copy_phase)
           2                                    self->rec[rec_len] = self->quotechar;
           4                            *quoted = 1;
           4                            rec_len++;
         816                    } else if (self->quoting == QUOTE_NONNUMERIC && !*quoted
                                           && !(isdigit(c) || c == '+' || c == '-' || c == '.'))
      ######                            *quoted = 1;
                
                                /* Some special characters need to be escaped.  If we have a
                                 * quote character switch to quoted field instead of escaping
                                 * individual characters.
                                 */
         820                    if (!*quoted
                                    && (c == self->delimiter || c == self->escapechar
                                        || c == '\n' || c == '\r')) {
           2                            if (self->have_quotechar
                                            && self->quoting != QUOTE_NONE)
           2                                    *quoted = 1;
      ######                            else if (self->escapechar) {
      ######                                    if (copy_phase)
      ######                                            self->rec[rec_len] = self->escapechar;
      ######                                    rec_len++;
                                        }
                                        else {
      ######                                    raise_exception("delimiter must be quoted or escaped");
      ######                                    return -1;
                                        }
                                }
                                /* Copy field character into record buffer.
                                 */
         820                    if (copy_phase)
         410                            self->rec[rec_len] = c;
         820                    rec_len++;
                        }
                
                        /* If field is empty check if it needs to be quoted.
                         */
         270            if (i == 0 && quote_empty && self->have_quotechar)
      ######                    *quoted = 1;
                
                        /* Handle final quote character on field.
                         */
         270            if (*quoted) {
           6                    if (copy_phase)
           3                            self->rec[rec_len] = self->quotechar;
                                else
                                        /* Didn't know about leading quote until we found it
                                         * necessary in field data - compensate for it now.
                                         */
           3                            rec_len++;
           6                    rec_len++;
                        }
                
         270            return rec_len;
                }
                
                static int
                join_check_rec_size(ParserObj *self, int rec_len)
         146    {
         146            if (rec_len > self->rec_size) {
          11                    if (self->rec_size == 0) {
          11                            self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
          11                            self->rec = PyMem_Malloc(self->rec_size);
                                }
                                else {
      ######                            char *old_rec = self->rec;
                
      ######                            self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
      ######                            self->rec = PyMem_Realloc(self->rec, self->rec_size);
      ######                            if (self->rec == NULL)
      ######                                    free(old_rec);
                                }
          11                    if (self->rec == NULL) {
      ######                            PyErr_NoMemory();
      ######                            return 0;
                                }
                        }
         146            return 1;
                }
                
                static int
                join_append(ParserObj *self, char *field, int quote_empty)
         135    {
         135            int rec_len, quoted;
                
         135            quoted = 0;
         135            rec_len = join_append_data(self, field, quote_empty, &quoted, 0);
         135            if (rec_len < 0)
      ######                    return 0;
                
                        /* grow record buffer if necessary */
         135            if (!join_check_rec_size(self, rec_len))
      ######                    return 0;
                
         135            self->rec_len = join_append_data(self, field, quote_empty, &quoted, 1);
         135            self->num_fields++;
                
         135            return 1;
                }
                
                static int
                join_append_lineterminator(ParserObj *self)
          11    {
          11            int terminator_len;
                
          11            terminator_len = PyString_Size(self->lineterminator);
                
                        /* grow record buffer if necessary */
          11            if (!join_check_rec_size(self, self->rec_len + terminator_len))
      ######                    return 0;
                
          11            memmove(self->rec + self->rec_len,
                                PyString_AsString(self->lineterminator), terminator_len);
          11            self->rec_len += terminator_len;
                
          11            return 1;
                }
                
                static PyObject *
                join_string(ParserObj *self)
          11    {
          11            return PyString_FromStringAndSize(self->rec, self->rec_len);
                }
                
                PyDoc_STRVAR(Parser_join_doc,
                "join(sequence) -> string\n"
                "\n"
                "Construct a CSV record from a sequence of fields.  Non-string\n"
                "elements will be converted to string.");
                
                static PyObject *
                Parser_join(ParserObj *self, PyObject *seq)
          12    {
          12            int len, i;
                
          12            if (!PySequence_Check(seq))
           1                    return raise_exception("sequence expected");
                
          11            len = PySequence_Length(seq);
          11            if (len < 0)
      ######                    return NULL;
                
                        /* Join all fields in internal buffer.
                         */
          11            join_reset(self);
         146            for (i = 0; i < len; i++) {
         135                    PyObject *field;
         135                    int append_ok;
                
         135                    field = PySequence_GetItem(seq, i);
         135                    if (field == NULL)
      ######                            return NULL;
                
         135                    if (PyString_Check(field)) {
          59                            append_ok = join_append(self, PyString_AsString(field), len == 1);
          59                            Py_DECREF(field);
                                }
          76                    else if (field == Py_None) {
      ######                            append_ok = join_append(self, "", len == 1);
      ######                            Py_DECREF(field);
                                }
                                else {
          76                            PyObject *str;
                
          76                            str = PyObject_Str(field);
          76                            Py_DECREF(field);
          76                            if (str == NULL)
      ######                                    return NULL;
                
          76                            append_ok = join_append(self, PyString_AsString(str), len == 1);
          76                            Py_DECREF(str);
                                }
         135                    if (!append_ok)
      ######                            return NULL;
                        }
                
                        /* Add line terminator.
                         */
          11            if (!join_append_lineterminator(self))
      ######                    return 0;
                
          11            return join_string(self);
                }
                
                static struct PyMethodDef Parser_methods[] = {
                        { "parse", (PyCFunction)Parser_parse, METH_VARARGS,
                          Parser_parse_doc },
                        { "clear", (PyCFunction)Parser_clear, METH_NOARGS,
                          Parser_clear_doc },
                        { "join", (PyCFunction)Parser_join, METH_O,
                          Parser_join_doc },
                        { NULL, NULL }
                };
                
                static void
                Parser_dealloc(ParserObj *self)
          30    {
          30            if (self->field)
          12                    free(self->field);
          30            Py_XDECREF(self->fields);
          30            Py_XDECREF(self->lineterminator);
                
          30            if (self->rec)
          11                    free(self->rec);
                
          30            PyMem_DEL(self);
                }
                
                #define OFF(x) offsetof(ParserObj, x)
                
                static struct memberlist Parser_memberlist[] = {
                        { "quotechar",        T_CHAR,   OFF(quotechar) },
                        { "delimiter",        T_CHAR,   OFF(delimiter) },
                        { "escapechar",       T_CHAR,   OFF(escapechar) },
                        { "skipinitialspace", T_INT,    OFF(skipinitialspace) },
                        { "lineterminator",   T_OBJECT, OFF(lineterminator) },
                        { "quoting",          T_INT,    OFF(quoting) },
                        { "doublequote",      T_INT,    OFF(doublequote) },
                        { "fields",           T_OBJECT, OFF(fields) },
                        { "autoclear",        T_INT,    OFF(autoclear) },
                        { "strict",           T_INT,    OFF(strict) },
                        { "had_parse_error",  T_INT,    OFF(had_parse_error), RO },
                        { NULL }
                };
                
                static PyObject *
                Parser_getattr(ParserObj *self, char *name)
          48    {
          48            PyObject *rv;
                
          48            if ((strcmp(name, "quotechar") == 0 && !self->have_quotechar)
                            || (strcmp(name, "escapechar") == 0 && !self->have_escapechar)) {
      ######                    Py_INCREF(Py_None);
      ######                    return Py_None;
                        }
                
          48            rv = PyMember_Get((char *)self, Parser_memberlist, name);
          48            if (rv)
      ######                    return rv;
          48            PyErr_Clear();
          48            return Py_FindMethod(Parser_methods, (PyObject *)self, name);
                }
                
                static int
                _set_char_attr(char *attr, int *have_attr, PyObject *v)
          60    {
                        /* Special case for constructor - NULL == use default.
                         */
          60            if (v == NULL)
      ######                    return 0;
                
          60            if (v == Py_None) {
          30                    *have_attr = 0;
          30                    *attr = 0;
          30                    return 0;
                        }
          30            else if (PyString_Check(v) && PyString_Size(v) == 1) {
          30                    *attr = PyString_AsString(v)[0];
          30                    *have_attr = 1;
          30                    return 0;
                        }
                        else {
      ######                    PyErr_BadArgument();
      ######                    return -1;
                        }
                }
                
                static int
                Parser_setattr(ParserObj *self, char *name, PyObject *v)
      ######    {
      ######            if (v == NULL) {
      ######                    PyErr_SetString(PyExc_AttributeError, "Cannot delete attribute");
      ######                    return -1;
                        }
      ######            if (strcmp(name, "quotechar") == 0)
      ######                    return _set_char_attr(&self->quotechar,
                                                      &self->have_quotechar, v);
      ######            else if (strcmp(name, "escapechar") == 0)
      ######                    return _set_char_attr(&self->escapechar,
                                                      &self->have_escapechar, v);
      ######            else if (strcmp(name, "quoting") == 0 && PyInt_Check(v)) {
      ######                    int n = PyInt_AsLong(v);
                
      ######                    if (n < 0 || n > QUOTE_NONE) {
      ######                            PyErr_BadArgument();
      ######                            return -1;
                                }
      ######                    if (n == QUOTE_NONE)
      ######                            self->have_quotechar = 0;
      ######                    self->quoting = n;
      ######                    return 0;
                        }
      ######            else if (strcmp(name, "lineterminator") == 0 && !PyString_Check(v)) {
      ######                    PyErr_BadArgument();
      ######                    return -1;
                        }
                        else
      ######                    return PyMember_Set((char *)self, Parser_memberlist, name, v);
                }
                
                static PyObject *
                csv_parser(PyObject *module, PyObject *args, PyObject *keyword_args);
                
                PyDoc_STRVAR(Parser_Type_doc, "CSV parser");
                
                static PyTypeObject Parser_Type = {
                        PyObject_HEAD_INIT(0)
                        0,                      /*ob_size*/
                        "_csv.parser",          /*tp_name*/
                        sizeof(ParserObj),      /*tp_basicsize*/
                        0,                      /*tp_itemsize*/
                        /* methods */
                        (destructor)Parser_dealloc, /*tp_dealloc*/
                        (printfunc)0,           /*tp_print*/
                        (getattrfunc)Parser_getattr, /*tp_getattr*/
                        (setattrfunc)Parser_setattr, /*tp_setattr*/
                        (cmpfunc)0,             /*tp_compare*/
                        (reprfunc)0,            /*tp_repr*/
                        0,                      /*tp_as_number*/
                        0,                      /*tp_as_sequence*/
                        0,                      /*tp_as_mapping*/
                        (hashfunc)0,            /*tp_hash*/
                        (ternaryfunc)0,         /*tp_call*/
                        (reprfunc)0,            /*tp_str*/
                
                        0L, 0L, 0L, 0L,
                        Parser_Type_doc
                };
                
                PyDoc_STRVAR(csv_parser_doc,
                "parser(delimiter=',', quotechar='\"', escapechar=None,\n"
                "       doublequote=1, lineterminator='\\r\\n', quoting='minimal',\n"
                "       autoclear=1, strict=0) -> Parser\n"
                "\n"
                "Constructs a CSV parser object.\n"
                "\n"
                "    delimiter\n"
                "        Defines the character that will be used to separate\n"
                "        fields in the CSV record.\n"
                "\n"
                "    quotechar\n"
                "        Defines the character used to quote fields that\n"
                "        contain the field separator or newlines.  If set to None\n"
                "        special characters will be escaped using the escapechar.\n"
                "\n"
                "    escapechar\n"
                "        Defines the character used to escape special\n"
                "        characters.  Only used if quotechar is None.\n"
                "\n"
                "    doublequote\n"
                "        When True, quotes in a field must be doubled up.\n"
                "\n"
                "    skipinitialspace\n"
                "        When True spaces following the delimiter are ignored.\n"
                "\n"
                "    lineterminator\n"
                "        The string used to terminate records.\n"
                "\n"
                "    quoting\n"
                "        Controls the generation of quotes around fields when writing\n"
                "        records.  This is only used when quotechar is not None.\n"
                "\n"
                "    autoclear\n"
                "        When True, calling parse() will automatically call\n"
                "        the clear() method if the previous call to parse() raised an\n"
                "        exception during parsing.\n"
                "\n"
                "    strict\n"
                "        When True, the parser will raise an exception on\n"
                "        malformed fields rather than attempting to guess the right\n"
                "        behavior.\n");
                
                static PyObject *
                csv_parser(PyObject *module, PyObject *args, PyObject *keyword_args)
          30    {
                        static char *keywords[] = {
                                "quotechar", "delimiter", "escapechar", "skipinitialspace",
                                "lineterminator", "quoting", "doublequote",
                                "autoclear", "strict", 
                                NULL
          30            };
          30            PyObject *quotechar, *escapechar;
          30            ParserObj *self = PyObject_NEW(ParserObj, &Parser_Type);
                
          30            if (self == NULL)
      ######                    return NULL;
                
          30            self->quotechar = '"';
          30            self->have_quotechar = 1;
          30            self->delimiter = ',';
          30            self->escapechar = '\0';
          30            self->have_escapechar = 0;
          30            self->skipinitialspace = 0;
          30            self->lineterminator = NULL;
          30            self->quoting = QUOTE_MINIMAL;
          30            self->doublequote = 1;
          30            self->autoclear = 1;
          30            self->strict = 0;
                
          30            self->state = START_RECORD;
          30            self->fields = PyList_New(0);
          30            if (self->fields == NULL) {
      ######                    Py_DECREF(self);
      ######                    return NULL;
                        }
                
          30            self->had_parse_error = 0;
          30            self->field = NULL;
          30            self->field_size = 0;
          30            self->field_len = 0;
                
          30            self->rec = NULL;
          30            self->rec_size = 0;
          30            self->rec_len = 0;
          30            self->num_fields = 0;
                
          30            quotechar = escapechar = NULL;
          30            if (PyArg_ParseTupleAndKeywords(args, keyword_args, "|OcOiSiiii",
                                                        keywords,
                                                        &quotechar, &self->delimiter,
                                                        &escapechar, &self->skipinitialspace,
                                                        &self->lineterminator, &self->quoting,
                                                        &self->doublequote,
                                                        &self->autoclear, &self->strict)
                            && !_set_char_attr(&self->quotechar,
                                               &self->have_quotechar, quotechar)
                            && !_set_char_attr(&self->escapechar,
                                               &self->have_escapechar, escapechar)) {
          30                    if (self->lineterminator == NULL)
      ######                            self->lineterminator = PyString_FromString("\r\n");
                                else {
          30                            Py_INCREF(self->lineterminator);
                                }
                
          30                    if (self->quoting < 0 || self->quoting > QUOTE_NONE)
      ######                            PyErr_SetString(PyExc_ValueError, "bad quoting value");
                                else {
          30                            if (self->quoting == QUOTE_NONE)
      ######                                    self->have_quotechar = 0;
          30                            else if (!self->have_quotechar)
      ######                                    self->quoting = QUOTE_NONE;
          30                            return (PyObject*)self;
                                }
                        }
                
      ######            Py_DECREF(self);
      ######            return NULL;
                }
                
                static struct PyMethodDef csv_methods[] = {
                        { "parser", (PyCFunction)csv_parser, METH_VARARGS | METH_KEYWORDS,
                          csv_parser_doc },
                        { NULL, NULL }
                };
                
                PyDoc_STRVAR(csv_module_doc,
                "This module provides class for performing CSV parsing and writing.\n"
                "\n"
                "The CSV parser object (returned by the parser() function) supports the\n"
                "following methods:\n"
                "    clear()\n"
                "        Discards all fields parsed so far.  If autoclear is set to\n"
                "        zero. You should call this after a parser exception.\n"
                "\n"
                "    parse(string) -> list of strings\n"
                "        Extracts fields from the (partial) CSV record in string.\n"
                "        Trailing end of line characters are ignored, so you do not\n"
                "        need to strip the string before passing it to the parser. If\n"
                "        you pass more than a single line of text, a _csv.Error\n"
                "        exception will be raised.\n"
                "\n"
                "    join(sequence) -> string\n"
                "        Construct a CSV record from a sequence of fields. Non-string\n"
                "        elements will be converted to string.\n"
                "\n"
                "Typical usage:\n"
                "\n"
                "    import _csv\n"
                "    p = _csv.parser()\n"
                "    fp = open('afile.csv', 'U')\n"
                "    for line in fp:\n"
                "        fields = p.parse(line)\n"
                "        if not fields:\n"
                "            # multi-line record\n"
                "            continue\n"
                "        # process the fields\n");
                
                PyMODINIT_FUNC
                init_csv(void)
           1    {
           1            PyObject *mod;
           1            PyObject *dict;
           1            PyObject *rev;
                
           1            if (PyType_Ready(&Parser_Type) < 0)
      ######                    return;
                
                        /* Create the module and add the functions */
           1            mod = Py_InitModule3("_csv", csv_methods, csv_module_doc);
           1            if (mod == NULL)
      ######                    return;
                
                        /* Add version to the module. */
           1            dict = PyModule_GetDict(mod);
           1            if (dict == NULL)
      ######                    return;
           1            rev = PyString_FromString("1.0");
           1            if (rev == NULL)
      ######                    return;
           1            if (PyDict_SetItemString(dict, "__version__", rev) < 0)
      ######                    return;
                
                        /* Add the CSV exception object to the module. */
           1            error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
           1            if (error_obj == NULL)
      ######                    return;
                
           1            PyDict_SetItemString(dict, "Error", error_obj);
                
           1            Py_XDECREF(rev);
           1            Py_XDECREF(error_obj);
                }


More information about the Csv mailing list