Skip to content

bpo-40762: Fix writing bytes in csv #20371

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions Doc/library/csv.rst
Original file line number Diff line number Diff line change
Expand Up @@ -434,9 +434,10 @@ Writer Objects

:class:`Writer` objects (:class:`DictWriter` instances and objects returned by
the :func:`writer` function) have the following public methods. A *row* must be
an iterable of strings or numbers for :class:`Writer` objects and a dictionary
mapping fieldnames to strings or numbers (by passing them through :func:`str`
first) for :class:`DictWriter` objects. Note that complex numbers are written
an iterable of strings, numbers or bytes for :class:`Writer` objects and a dictionary
mapping fieldnames to strings, numbers or bytes (by passing them through :func:`str`
first) for :class:`DictWriter` objects. Note that bytes will be written according to
the encoding scheme of the output file. Also note that complex numbers are written
out surrounded by parens. This may cause some problems for other programs which
read CSV files (assuming they support complex numbers at all).

Expand Down
12 changes: 12 additions & 0 deletions Lib/test/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,18 @@ def test_writerows_with_none(self):
fileobj.seek(0)
self.assertEqual(fileobj.read(), 'a\r\n""\r\n')

def test_writerows_with_bytes(self):
with TemporaryFile("w+", newline='', encoding='latin-1') as fileobj:
writer = csv.writer(fileobj)
writer.writerows([['a', b'\xc2'], [b'\xc2', 'c']])
fileobj.seek(0);
self.assertEqual(fileobj.read(), 'a,\xc2\r\n\xc2,c\r\n')

with TemporaryFile("w+", newline='', encoding='utf-8') as fileobj:
writer = csv.writer(fileobj)
self.assertRaises(UnicodeDecodeError, writer.writerows, [['a', b'\xc2'], ['a', 'c']])


@support.cpython_only
def test_writerows_legacy_strings(self):
import _testcapi
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
csv.Writer.writerow() now supports writing bytes as it is instead of writing them as b-prefixed strings. Uses encoding provided by the file object to write the bytes in the text mode. Incase the file object has no encoding attribute it falls back on using ``locale.getpreferredencoding`` to decide.
36 changes: 35 additions & 1 deletion Modules/_csv.c
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@ typedef struct {

DialectObj *dialect; /* parsing dialect */

PyObject *encoding; /* use this encoding when writing bytes */

Py_UCS4 *rec; /* buffer for parser.join */
Py_ssize_t rec_size; /* size of allocated record */
Py_ssize_t rec_len; /* length of record */
Expand Down Expand Up @@ -1206,7 +1208,12 @@ csv_writerow(WriterObj *self, PyObject *seq)
else {
PyObject *str;

str = PyObject_Str(field);
if (PyBytes_Check(field)) {
const char * encoding = PyUnicode_AsUTF8(self->encoding);
str = PyUnicode_FromEncodedObject(field, encoding, NULL);
} else {
str = PyObject_Str(field);
}
Py_DECREF(field);
if (str == NULL) {
Py_DECREF(iter);
Expand Down Expand Up @@ -1305,6 +1312,7 @@ Writer_dealloc(WriterObj *self)
Py_XDECREF(self->write);
if (self->rec != NULL)
PyMem_Free(self->rec);
Py_XDECREF(self->encoding);
PyObject_GC_Del(self);
}

Expand All @@ -1313,6 +1321,7 @@ Writer_traverse(WriterObj *self, visitproc visit, void *arg)
{
Py_VISIT(self->dialect);
Py_VISIT(self->write);
Py_VISIT(self->encoding);
return 0;
}

Expand All @@ -1321,6 +1330,7 @@ Writer_clear(WriterObj *self)
{
Py_CLEAR(self->dialect);
Py_CLEAR(self->write);
Py_CLEAR(self->encoding);
return 0;
}

Expand Down Expand Up @@ -1372,12 +1382,15 @@ csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
PyObject * output_file, * dialect = NULL;
WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
_Py_IDENTIFIER(write);
_Py_IDENTIFIER(encoding);
_Py_IDENTIFIER(getpreferredencoding);

if (!self)
return NULL;

self->dialect = NULL;
self->write = NULL;
self->encoding = NULL;

self->rec = NULL;
self->rec_size = 0;
Expand All @@ -1398,6 +1411,27 @@ csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
Py_DECREF(self);
return NULL;
}

int r = _PyObject_LookupAttrId(output_file, &PyId_encoding, &self->encoding);
if (r < 0) {
Py_DECREF(self);
return NULL;
}
else if (r == 0) {
PyObject* locale_module = PyImport_ImportModule("locale");
if (locale_module == NULL) {
Py_DECREF(self);
return NULL;
}
self->encoding = _PyObject_CallMethodIdOneArg(
locale_module, &PyId_getpreferredencoding, Py_False);
Py_DECREF(locale_module);
}
if (self->encoding == NULL) {
Py_DECREF(self);
return NULL;
}

self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
if (self->dialect == NULL) {
Py_DECREF(self);
Expand Down