Skip to content

gh-113732: Fix support of QUOTE_NOTNULL and QUOTE_STRINGS in csv.reader #113738

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jan 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Doc/whatsnew/3.12.rst
Original file line number Diff line number Diff line change
Expand Up @@ -690,7 +690,7 @@ csv

* Add :const:`csv.QUOTE_NOTNULL` and :const:`csv.QUOTE_STRINGS` flags to
provide finer grained control of ``None`` and empty strings by
:class:`csv.writer` objects.
:class:`~csv.reader` and :class:`~csv.writer` objects.

dis
---
Expand Down
25 changes: 25 additions & 0 deletions Lib/test/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,17 +392,42 @@ def test_read_quoting(self):
# will this fail where locale uses comma for decimals?
self._read_test([',3,"5",7.3, 9'], [['', 3, '5', 7.3, 9]],
quoting=csv.QUOTE_NONNUMERIC)
self._read_test([',3,"5",7.3, 9'], [[None, '3', '5', '7.3', ' 9']],
quoting=csv.QUOTE_NOTNULL)
self._read_test([',3,"5",7.3, 9'], [[None, 3, '5', 7.3, 9]],
quoting=csv.QUOTE_STRINGS)

self._read_test([',,"",'], [['', '', '', '']])
self._read_test([',,"",'], [['', '', '', '']],
quoting=csv.QUOTE_NONNUMERIC)
self._read_test([',,"",'], [[None, None, '', None]],
quoting=csv.QUOTE_NOTNULL)
self._read_test([',,"",'], [[None, None, '', None]],
quoting=csv.QUOTE_STRINGS)

self._read_test(['"a\nb", 7'], [['a\nb', ' 7']])
self.assertRaises(ValueError, self._read_test,
['abc,3'], [[]],
quoting=csv.QUOTE_NONNUMERIC)
self.assertRaises(ValueError, self._read_test,
['abc,3'], [[]],
quoting=csv.QUOTE_STRINGS)
self._read_test(['1,@,3,@,5'], [['1', ',3,', '5']], quotechar='@')
self._read_test(['1,\0,3,\0,5'], [['1', ',3,', '5']], quotechar='\0')

def test_read_skipinitialspace(self):
self._read_test(['no space, space, spaces,\ttab'],
[['no space', 'space', 'spaces', '\ttab']],
skipinitialspace=True)
self._read_test([' , , '],
[['', '', '']],
skipinitialspace=True)
self._read_test([' , , '],
[[None, None, None]],
skipinitialspace=True, quoting=csv.QUOTE_NOTNULL)
self._read_test([' , , '],
[[None, None, None]],
skipinitialspace=True, quoting=csv.QUOTE_STRINGS)

def test_read_bigfield(self):
# This exercises the buffer realloc functionality and field size
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fix support of :data:`~csv.QUOTE_NOTNULL` and :data:`~csv.QUOTE_STRINGS` in
:func:`csv.reader`.
46 changes: 29 additions & 17 deletions Modules/_csv.c
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ typedef struct {
Py_UCS4 *field; /* temporary buffer */
Py_ssize_t field_size; /* size of allocated buffer */
Py_ssize_t field_len; /* length of current field */
int numeric_field; /* treat field as numeric */
bool unquoted_field; /* true if no quotes around the current field */
unsigned long line_num; /* Source-file line number */
} ReaderObj;

Expand Down Expand Up @@ -644,22 +644,33 @@ _call_dialect(_csvstate *module_state, PyObject *dialect_inst, PyObject *kwargs)
static int
parse_save_field(ReaderObj *self)
{
int quoting = self->dialect->quoting;
PyObject *field;

field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
(void *) self->field, self->field_len);
if (field == NULL)
return -1;
self->field_len = 0;
if (self->numeric_field) {
PyObject *tmp;

self->numeric_field = 0;
tmp = PyNumber_Float(field);
Py_DECREF(field);
if (tmp == NULL)
if (self->unquoted_field &&
self->field_len == 0 &&
(quoting == QUOTE_NOTNULL || quoting == QUOTE_STRINGS))
{
field = Py_NewRef(Py_None);
}
else {
field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
(void *) self->field, self->field_len);
if (field == NULL) {
return -1;
field = tmp;
}
if (self->unquoted_field &&
self->field_len != 0 &&
(quoting == QUOTE_NONNUMERIC || quoting == QUOTE_STRINGS))
{
PyObject *tmp = PyNumber_Float(field);
Py_DECREF(field);
if (tmp == NULL) {
return -1;
}
field = tmp;
}
self->field_len = 0;
}
if (PyList_Append(self->fields, field) < 0) {
Py_DECREF(field);
Expand Down Expand Up @@ -721,6 +732,7 @@ parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
/* fallthru */
case START_FIELD:
/* expecting field */
self->unquoted_field = true;
if (c == '\n' || c == '\r' || c == EOL) {
/* save empty field - return [fields] */
if (parse_save_field(self) < 0)
Expand All @@ -730,10 +742,12 @@ parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
else if (c == dialect->quotechar &&
dialect->quoting != QUOTE_NONE) {
/* start quoted field */
self->unquoted_field = false;
self->state = IN_QUOTED_FIELD;
}
else if (c == dialect->escapechar) {
/* possible escaped character */
self->unquoted_field = false;
self->state = ESCAPED_CHAR;
}
else if (c == ' ' && dialect->skipinitialspace)
Expand All @@ -746,8 +760,6 @@ parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
}
else {
/* begin new unquoted field */
if (dialect->quoting == QUOTE_NONNUMERIC)
self->numeric_field = 1;
if (parse_add_char(self, module_state, c) < 0)
return -1;
self->state = IN_FIELD;
Expand Down Expand Up @@ -892,7 +904,7 @@ parse_reset(ReaderObj *self)
return -1;
self->field_len = 0;
self->state = START_RECORD;
self->numeric_field = 0;
self->unquoted_field = false;
return 0;
}

Expand Down