Skip to content

gh-91349: Expose the crc32 function from the lzma library #131721

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions Doc/library/lzma.rst
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,23 @@ Compressing and decompressing data in memory
*preset* and *filters* arguments.


.. function:: crc32(data, value=0)

.. index::
single: Cyclic Redundancy Check
single: checksum; Cyclic Redundancy Check

Computes a CRC (Cyclic Redundancy Check) checksum of *data*. The
result is a positive integer, less than :math:`2^32`. If *value* is present, it is used
as the starting value of the checksum; otherwise, a default value of 0
is used. Passing in *value* allows computing a running checksum over the
concatenation of several inputs. The algorithm is not cryptographically
strong, and should not be used for authentication or digital signatures. Since
the algorithm is designed for use as a checksum algorithm, it is not suitable
for use as a general hash algorithm.

.. versionadded:: next

.. function:: decompress(data, format=FORMAT_AUTO, memlimit=None, filters=None)

Decompress *data* (a :class:`bytes` object), returning the uncompressed data
Expand Down
41 changes: 40 additions & 1 deletion Lib/test/test_lzma.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import _compression
import array
import binascii
from io import BytesIO, UnsupportedOperation, DEFAULT_BUFFER_SIZE
import os
import pickle
Expand All @@ -8,7 +9,7 @@
from test import support
import unittest

from test.support import _4G, bigmemtest
from test.support import _1G, _4G, bigmemtest
from test.support.import_helper import import_module
from test.support.os_helper import (
TESTFN, unlink, FakePath
Expand All @@ -17,6 +18,44 @@
lzma = import_module("lzma")
from lzma import LZMACompressor, LZMADecompressor, LZMAError, LZMAFile

class ChecksumTestCase(unittest.TestCase):
# checksum test cases
def test_crc32start(self):
self.assertEqual(lzma.crc32(b""), lzma.crc32(b"", 0))
self.assertTrue(lzma.crc32(b"abc", 0xffffffff))

def test_crc32empty(self):
self.assertEqual(lzma.crc32(b"", 0), 0)
self.assertEqual(lzma.crc32(b"", 1), 1)
self.assertEqual(lzma.crc32(b"", 432), 432)

def test_penguins(self):
self.assertEqual(lzma.crc32(b"penguin", 0), 0x0e5c1a120)
self.assertEqual(lzma.crc32(b"penguin", 1), 0x43b6aa94)
self.assertEqual(lzma.crc32(b"penguin"), lzma.crc32(b"penguin", 0))

def test_crc32_unsigned(self):
foo = b'abcdefghijklmnop'
# explicitly test signed behavior
self.assertEqual(lzma.crc32(foo), 2486878355)
self.assertEqual(lzma.crc32(b'spam'), 1138425661)

def test_same_as_binascii_crc32(self):
foo = b'abcdefghijklmnop'
crc = 2486878355
self.assertEqual(binascii.crc32(foo), crc)
self.assertEqual(lzma.crc32(foo), crc)
self.assertEqual(binascii.crc32(b'spam'), lzma.crc32(b'spam'))


# GH-54485 - check that inputs >=4 GiB are handled correctly.
class ChecksumBigBufferTestCase(unittest.TestCase):

@bigmemtest(size=_4G + 4, memuse=1, dry_run=False)
def test_big_buffer(self, size):
data = b"nyan" * (_1G + 1)
self.assertEqual(lzma.crc32(data), 1044521549)


class CompressorDecompressorTestCase(unittest.TestCase):

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Expose the crc32 function from the lzma library as :func:`lzma.crc32`.
30 changes: 30 additions & 0 deletions Modules/_lzmamodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -1599,10 +1599,40 @@ lzma_exec(PyObject *module)
return 0;
}

/*[clinic input]
_lzma.crc32 -> unsigned_int

data: Py_buffer
value: unsigned_int(bitwise=True) = 0
Starting value of the checksum.
/

Compute a CRC-32 checksum of data.

The returned checksum is an integer.
[clinic start generated code]*/

static unsigned int
_lzma_crc32_impl(PyObject *module, Py_buffer *data, unsigned int value)
/*[clinic end generated code: output=fca7916d796faf8b input=bb623a169c14534f]*/
{
/* Releasing the GIL for very small buffers is inefficient
and may lower performance */
if (data->len > 1024*5) {
Py_BEGIN_ALLOW_THREADS
value = lzma_crc32(data->buf, (size_t)data->len, (uint32_t)value);
Py_END_ALLOW_THREADS
} else {
value = lzma_crc32(data->buf, (size_t)data->len, (uint32_t)value);
}
return value;
}

static PyMethodDef lzma_methods[] = {
_LZMA_IS_CHECK_SUPPORTED_METHODDEF
_LZMA__ENCODE_FILTER_PROPERTIES_METHODDEF
_LZMA__DECODE_FILTER_PROPERTIES_METHODDEF
_LZMA_CRC32_METHODDEF
{NULL}
};

Expand Down
56 changes: 55 additions & 1 deletion Modules/clinic/_lzmamodule.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading