Skip to content

Commit 1ee31b0

Browse files
authored
handle zip files which contain non-UTF-8 encoded files (#75)
1 parent a96bdc3 commit 1ee31b0

File tree

7 files changed

+21
-1
lines changed

7 files changed

+21
-1
lines changed

CHANGELOG.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@ Change log
44
0.6.0 - tbd
55
--------------------------------------------------------------------------------
66

7+
#. `#74 <https://github.com/pyexcel/pyexcel-io/issues/74>`_: handle zip files which
8+
contain non-UTF-8 encoded files.
9+
10+
711
**removed**
812

913
#. python 3.6 lower versions are no longer supported

CONTRIBUTORS.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
In alphabetical order:
66

77
* `Antherkiv <https://api.github.com/users/antherkiv>`_
8+
* `Craig Anderson <https://api.github.com/users/craiga>`_
89
* `John Vandenberg <https://api.github.com/users/jayvdb>`_
910
* `Stephen J. Fuhry <https://api.github.com/users/fuhrysteve>`_
1011
* `Stephen Rauch <https://api.github.com/users/stephenrauch>`_

pyexcel_io/readers/csvz.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
"""
1010
import zipfile
1111

12+
import chardet
13+
1214
from pyexcel_io.sheet import NamedContent
1315
from pyexcel_io._compact import StringIO
1416
from pyexcel_io.readers.csvr import CSVinMemoryReader
@@ -43,7 +45,8 @@ def close(self):
4345
def read_sheet(self, index):
4446
name = self.content_array[index].name
4547
content = self.zipfile.read(self.content_array[index].payload)
46-
sheet = StringIO(content.decode("utf-8"))
48+
encoding_guess = chardet.detect(content)
49+
sheet = StringIO(content.decode(encoding_guess["encoding"]))
4750

4851
return CSVinMemoryReader(NamedContent(name, sheet), **self.keywords)
4952

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
ordereddict;python_version<"2.7"
22
lml>=0.0.4
3+
chardet

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474

7575
INSTALL_REQUIRES = [
7676
"lml>=0.0.4",
77+
"chardet",
7778
]
7879
SETUP_COMMANDS = {}
7980

test.sh

100644100755
File mode changed.

tests/test_new_csvz_book.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,16 @@ def test_reading(self):
5353
self.assertEqual(list(data["pyexcel_sheet1"]), [[u"中", u"文", 1, 2, 3]])
5454
zipreader.close()
5555

56+
def test_reading_utf32(self):
57+
zip = zipfile.ZipFile(self.file, "w")
58+
zip.writestr("something.ext", self.result.encode("utf-32"))
59+
zip.close()
60+
zipreader = self.reader_class()
61+
zipreader.open(self.file)
62+
data = zipreader.read_all()
63+
self.assertEqual(list(data["something"]), [[u"中", u"文", 1, 2, 3]])
64+
zipreader.close()
65+
5666
def tearDown(self):
5767
os.unlink(self.file)
5868

0 commit comments

Comments
 (0)