diff --git a/Doc/library/csv.rst b/Doc/library/csv.rst index 61d39828e0194a..be100ac5c5c99f 100644 --- a/Doc/library/csv.rst +++ b/Doc/library/csv.rst @@ -434,9 +434,10 @@ Writer Objects :class:`Writer` objects (:class:`DictWriter` instances and objects returned by the :func:`writer` function) have the following public methods. A *row* must be -an iterable of strings or numbers for :class:`Writer` objects and a dictionary -mapping fieldnames to strings or numbers (by passing them through :func:`str` -first) for :class:`DictWriter` objects. Note that complex numbers are written +an iterable of strings, numbers or bytes for :class:`Writer` objects and a dictionary +mapping fieldnames to strings, numbers or bytes (by passing them through :func:`str` +first) for :class:`DictWriter` objects. Note that bytes will be written according to +the encoding scheme of the output file. Also note that complex numbers are written out surrounded by parens. This may cause some problems for other programs which read CSV files (assuming they support complex numbers at all). diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index a16d14019f341f..ca17d799c85bd6 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -230,6 +230,18 @@ def test_writerows_with_none(self): fileobj.seek(0) self.assertEqual(fileobj.read(), 'a\r\n""\r\n') + def test_writerows_with_bytes(self): + with TemporaryFile("w+", newline='', encoding='latin-1') as fileobj: + writer = csv.writer(fileobj) + writer.writerows([['a', b'\xc2'], [b'\xc2', 'c']]) + fileobj.seek(0); + self.assertEqual(fileobj.read(), 'a,\xc2\r\n\xc2,c\r\n') + + with TemporaryFile("w+", newline='', encoding='utf-8') as fileobj: + writer = csv.writer(fileobj) + self.assertRaises(UnicodeDecodeError, writer.writerows, [['a', b'\xc2'], ['a', 'c']]) + + @support.cpython_only def test_writerows_legacy_strings(self): import _testcapi diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-05-25-07-25-29.bpo-40762.TkMFHk.rst b/Misc/NEWS.d/next/Core and Builtins/2020-05-25-07-25-29.bpo-40762.TkMFHk.rst new file mode 100644 index 00000000000000..1a45a5fcaabaae --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2020-05-25-07-25-29.bpo-40762.TkMFHk.rst @@ -0,0 +1 @@ +csv.Writer.writerow() now supports writing bytes as it is instead of writing them as b-prefixed strings. Uses encoding provided by the file object to write the bytes in the text mode. Incase the file object has no encoding attribute it falls back on using ``locale.getpreferredencoding`` to decide. \ No newline at end of file diff --git a/Modules/_csv.c b/Modules/_csv.c index 3a52632ccfd456..e9d8d01a97117a 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -121,6 +121,8 @@ typedef struct { DialectObj *dialect; /* parsing dialect */ + PyObject *encoding; /* use this encoding when writing bytes */ + Py_UCS4 *rec; /* buffer for parser.join */ Py_ssize_t rec_size; /* size of allocated record */ Py_ssize_t rec_len; /* length of record */ @@ -1206,7 +1208,12 @@ csv_writerow(WriterObj *self, PyObject *seq) else { PyObject *str; - str = PyObject_Str(field); + if (PyBytes_Check(field)) { + const char * encoding = PyUnicode_AsUTF8(self->encoding); + str = PyUnicode_FromEncodedObject(field, encoding, NULL); + } else { + str = PyObject_Str(field); + } Py_DECREF(field); if (str == NULL) { Py_DECREF(iter); @@ -1305,6 +1312,7 @@ Writer_dealloc(WriterObj *self) Py_XDECREF(self->write); if (self->rec != NULL) PyMem_Free(self->rec); + Py_XDECREF(self->encoding); PyObject_GC_Del(self); } @@ -1313,6 +1321,7 @@ Writer_traverse(WriterObj *self, visitproc visit, void *arg) { Py_VISIT(self->dialect); Py_VISIT(self->write); + Py_VISIT(self->encoding); return 0; } @@ -1321,6 +1330,7 @@ Writer_clear(WriterObj *self) { Py_CLEAR(self->dialect); Py_CLEAR(self->write); + Py_CLEAR(self->encoding); return 0; } @@ -1372,12 +1382,15 @@ csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args) PyObject * output_file, * dialect = NULL; WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type); _Py_IDENTIFIER(write); + _Py_IDENTIFIER(encoding); + _Py_IDENTIFIER(getpreferredencoding); if (!self) return NULL; self->dialect = NULL; self->write = NULL; + self->encoding = NULL; self->rec = NULL; self->rec_size = 0; @@ -1398,6 +1411,27 @@ csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args) Py_DECREF(self); return NULL; } + + int r = _PyObject_LookupAttrId(output_file, &PyId_encoding, &self->encoding); + if (r < 0) { + Py_DECREF(self); + return NULL; + } + else if (r == 0) { + PyObject* locale_module = PyImport_ImportModule("locale"); + if (locale_module == NULL) { + Py_DECREF(self); + return NULL; + } + self->encoding = _PyObject_CallMethodIdOneArg( + locale_module, &PyId_getpreferredencoding, Py_False); + Py_DECREF(locale_module); + } + if (self->encoding == NULL) { + Py_DECREF(self); + return NULL; + } + self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args); if (self->dialect == NULL) { Py_DECREF(self);