Skip to content

Commit 79442a5

Browse files
committed
pythongh-91349: Expose the crc32 function from the lzma library.
It is faster than the crc32 function from the zlib library. Update zipfile to detect and use lzma.crc32 zlic.crc32 binascii.crc32, in order of preference.
1 parent 8ad4646 commit 79442a5

File tree

6 files changed

+148
-1
lines changed

6 files changed

+148
-1
lines changed

Doc/library/lzma.rst

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,25 @@ Compressing and decompressing data in memory
311311
*preset* and *filters* arguments.
312312

313313

314+
.. function:: crc32(data[, value])
315+
316+
.. index::
317+
single: Cyclic Redundancy Check
318+
single: checksum; Cyclic Redundancy Check
319+
320+
Computes a CRC (Cyclic Redundancy Check) checksum of *data*. The
321+
result is an unsigned 32-bit integer. If *value* is present, it is used
322+
as the starting value of the checksum; otherwise, a default value of 0
323+
is used. Passing in *value* allows computing a running checksum over the
324+
concatenation of several inputs. The algorithm is not cryptographically
325+
strong, and should not be used for authentication or digital signatures. Since
326+
the algorithm is designed for use as a checksum algorithm, it is not suitable
327+
for use as a general hash algorithm.
328+
329+
.. versionadded:: 3.14
330+
The result is always unsigned.
331+
332+
314333
.. function:: decompress(data, format=FORMAT_AUTO, memlimit=None, filters=None)
315334

316335
Decompress *data* (a :class:`bytes` object), returning the uncompressed data

Lib/test/test_lzma.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import _compression
22
import array
3+
import binascii
34
from io import BytesIO, UnsupportedOperation, DEFAULT_BUFFER_SIZE
45
import os
56
import pickle
@@ -17,6 +18,45 @@
1718
lzma = import_module("lzma")
1819
from lzma import LZMACompressor, LZMADecompressor, LZMAError, LZMAFile
1920

21+
class ChecksumTestCase(unittest.TestCase):
22+
# checksum test cases
23+
def test_crc32start(self):
24+
self.assertEqual(lzma.crc32(b""), lzma.crc32(b"", 0))
25+
self.assertTrue(lzma.crc32(b"abc", 0xffffffff))
26+
27+
def test_crc32empty(self):
28+
self.assertEqual(lzma.crc32(b"", 0), 0)
29+
self.assertEqual(lzma.crc32(b"", 1), 1)
30+
self.assertEqual(lzma.crc32(b"", 432), 432)
31+
32+
def test_penguins(self):
33+
self.assertEqual(lzma.crc32(b"penguin", 0), 0x0e5c1a120)
34+
self.assertEqual(lzma.crc32(b"penguin", 1), 0x43b6aa94)
35+
self.assertEqual(lzma.crc32(b"penguin"), lzma.crc32(b"penguin", 0))
36+
37+
def test_crc32_unsigned(self):
38+
foo = b'abcdefghijklmnop'
39+
# explicitly test signed behavior
40+
self.assertEqual(lzma.crc32(foo), 2486878355)
41+
self.assertEqual(lzma.crc32(b'spam'), 1138425661)
42+
43+
def test_same_as_binascii_crc32(self):
44+
foo = b'abcdefghijklmnop'
45+
crc = 2486878355
46+
self.assertEqual(binascii.crc32(foo), crc)
47+
self.assertEqual(lzma.crc32(foo), crc)
48+
self.assertEqual(binascii.crc32(b'spam'), lzma.crc32(b'spam'))
49+
50+
51+
# Issue #10276 - check that inputs >=4 GiB are handled correctly.
52+
class ChecksumBigBufferTestCase(unittest.TestCase):
53+
54+
@bigmemtest(size=_4G + 4, memuse=1, dry_run=False)
55+
def test_big_buffer(self, size):
56+
data = b"nyan" * (_1G + 1)
57+
self.assertEqual(lzma.crc32(data), 1044521549)
58+
self.assertEqual(zlib.adler32(data), 2256789997)
59+
2060

2161
class CompressorDecompressorTestCase(unittest.TestCase):
2262

Lib/zipfile/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828

2929
try:
3030
import lzma # We may need its compression method
31+
crc32 = lzma.crc32
3132
except ImportError:
3233
lzma = None
3334

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Expose the crc32 function from the lzma library. It is faster than the crc32
2+
function from the zlib library. Update zipfile to detect and use lzma.crc32
3+
zlic.crc32 binascii.crc32, in order of preference.

Modules/_lzmamodule.c

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1599,10 +1599,40 @@ lzma_exec(PyObject *module)
15991599
return 0;
16001600
}
16011601

1602+
/*[clinic input]
1603+
_lzma.crc32 -> unsigned_int
1604+
1605+
data: Py_buffer
1606+
value: unsigned_int(bitwise=True) = 0
1607+
Starting value of the checksum.
1608+
/
1609+
1610+
Compute a CRC-32 checksum of data.
1611+
1612+
The returned checksum is an integer.
1613+
[clinic start generated code]*/
1614+
1615+
static unsigned int
1616+
_lzma_crc32_impl(PyObject *module, Py_buffer *data, unsigned int value)
1617+
/*[clinic end generated code: output=fca7916d796faf8b input=bb623a169c14534f]*/
1618+
{
1619+
/* Releasing the GIL for very small buffers is inefficient
1620+
and may lower performance */
1621+
if (data->len > 1024*5) {
1622+
Py_BEGIN_ALLOW_THREADS
1623+
value = lzma_crc32(data->buf, (size_t)data->len, (uint32_t)value);
1624+
Py_END_ALLOW_THREADS
1625+
} else {
1626+
value = lzma_crc32(data->buf, (size_t)data->len, (uint32_t)value);
1627+
}
1628+
return value;
1629+
}
1630+
16021631
static PyMethodDef lzma_methods[] = {
16031632
_LZMA_IS_CHECK_SUPPORTED_METHODDEF
16041633
_LZMA__ENCODE_FILTER_PROPERTIES_METHODDEF
16051634
_LZMA__DECODE_FILTER_PROPERTIES_METHODDEF
1635+
_LZMA_CRC32_METHODDEF
16061636
{NULL}
16071637
};
16081638

Modules/clinic/_lzmamodule.c.h

Lines changed: 55 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)