Skip to content

gh-67413: Fix segfaults and multiple leaks in the lzma and bz2 modules #7822

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Feb 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Lib/test/test_bz2.py
Original file line number Diff line number Diff line change
Expand Up @@ -844,6 +844,10 @@ def test_refleaks_in___init__(self):
bzd.__init__()
self.assertAlmostEqual(gettotalrefcount() - refs_before, 0, delta=10)

def test_uninitialized_BZ2Decompressor_crash(self):
self.assertEqual(BZ2Decompressor.__new__(BZ2Decompressor).
decompress(bytes()), b'')


class CompressDecompressTest(BaseTest):
def testCompress(self):
Expand Down
4 changes: 4 additions & 0 deletions Lib/test/test_lzma.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,10 @@ def test_refleaks_in_decompressor___init__(self):
lzd.__init__()
self.assertAlmostEqual(gettotalrefcount() - refs_before, 0, delta=10)

def test_uninitialized_LZMADecompressor_crash(self):
self.assertEqual(LZMADecompressor.__new__(LZMADecompressor).
decompress(bytes()), b'')


class CompressDecompressFunctionTestCase(unittest.TestCase):

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Fix segfaults when creating :class:`lzma.LZMADecompressor` and
:class:`bz2.BZ2Decompressor` objects without calling ``__init__()``, and fix
leakage of locks and internal buffers when calling the ``__init__()``
methods of :class:`lzma.LZMADecompressor`, :class:`lzma.LZMACompressor`,
:class:`bz2.BZ2Compressor`, and :class:`bz2.BZ2Decompressor` objects
multiple times.
230 changes: 122 additions & 108 deletions Modules/_bz2module.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,29 @@
#error "The maximum block size accepted by libbzip2 is UINT32_MAX."
#endif

typedef struct {
PyTypeObject *bz2_compressor_type;
PyTypeObject *bz2_decompressor_type;
} _bz2_state;

static inline _bz2_state *
get_module_state(PyObject *module)
{
void *state = PyModule_GetState(module);
assert(state != NULL);
return (_bz2_state *)state;
}

static struct PyModuleDef _bz2module;

static inline _bz2_state *
find_module_state_by_def(PyTypeObject *type)
{
PyObject *module = PyType_GetModuleByDef(type, &_bz2module);
assert(module != NULL);
return get_module_state(module);
}

/* On success, return value >= 0
On failure, return -1 */
static inline Py_ssize_t
Expand Down Expand Up @@ -214,12 +237,14 @@ compress(BZ2Compressor *c, char *data, size_t len, int action)

/*[clinic input]
module _bz2
class _bz2.BZ2Compressor "BZ2Compressor *" "&BZ2Compressor_Type"
class _bz2.BZ2Decompressor "BZ2Decompressor *" "&BZ2Decompressor_Type"
class _bz2.BZ2Compressor "BZ2Compressor *" "clinic_state()->bz2_compressor_type"
class _bz2.BZ2Decompressor "BZ2Decompressor *" "clinic_state()->bz2_decompressor_type"
[clinic start generated code]*/
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=dc7d7992a79f9cb7]*/
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=92348121632b94c4]*/

#define clinic_state() (find_module_state_by_def(type))
#include "clinic/_bz2module.c.h"
#undef clinic_state

/*[clinic input]
_bz2.BZ2Compressor.compress
Expand Down Expand Up @@ -295,24 +320,43 @@ BZ2_Free(void* ctx, void *ptr)
PyMem_RawFree(ptr);
}

/*[clinic input]
@classmethod
_bz2.BZ2Compressor.__new__

compresslevel: int = 9
Compression level, as a number between 1 and 9.
/

/* Argument Clinic is not used since the Argument Clinic always want to
check the type which would be wrong here */
static int
_bz2_BZ2Compressor___init___impl(BZ2Compressor *self, int compresslevel)
Create a compressor object for compressing data incrementally.

For one-shot compression, use the compress() function instead.
[clinic start generated code]*/

static PyObject *
_bz2_BZ2Compressor_impl(PyTypeObject *type, int compresslevel)
/*[clinic end generated code: output=83346c96beaacad7 input=d4500d2a52c8b263]*/
{
int bzerror;
BZ2Compressor *self;

if (!(1 <= compresslevel && compresslevel <= 9)) {
PyErr_SetString(PyExc_ValueError,
"compresslevel must be between 1 and 9");
return -1;
return NULL;
}

assert(type != NULL && type->tp_alloc != NULL);
self = (BZ2Compressor *)type->tp_alloc(type, 0);
if (self == NULL) {
return NULL;
}

self->lock = PyThread_allocate_lock();
if (self->lock == NULL) {
Py_DECREF(self);
PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
return -1;
return NULL;
}

self->bzs.opaque = NULL;
Expand All @@ -322,49 +366,11 @@ _bz2_BZ2Compressor___init___impl(BZ2Compressor *self, int compresslevel)
if (catch_bz2_error(bzerror))
goto error;

return 0;
return (PyObject *)self;

error:
PyThread_free_lock(self->lock);
self->lock = NULL;
return -1;
}

PyDoc_STRVAR(_bz2_BZ2Compressor___init____doc__,
"BZ2Compressor(compresslevel=9, /)\n"
"--\n"
"\n"
"Create a compressor object for compressing data incrementally.\n"
"\n"
" compresslevel\n"
" Compression level, as a number between 1 and 9.\n"
"\n"
"For one-shot compression, use the compress() function instead.");

static int
_bz2_BZ2Compressor___init__(PyObject *self, PyObject *args, PyObject *kwargs)
{
int return_value = -1;
int compresslevel = 9;

if (!_PyArg_NoKeywords("BZ2Compressor", kwargs)) {
goto exit;
}
if (!_PyArg_CheckPositional("BZ2Compressor", PyTuple_GET_SIZE(args), 0, 1)) {
goto exit;
}
if (PyTuple_GET_SIZE(args) < 1) {
goto skip_optional;
}
compresslevel = _PyLong_AsInt(PyTuple_GET_ITEM(args, 0));
if (compresslevel == -1 && PyErr_Occurred()) {
goto exit;
}
skip_optional:
return_value = _bz2_BZ2Compressor___init___impl((BZ2Compressor *)self, compresslevel);

exit:
return return_value;
Py_DECREF(self);
return NULL;
}

static void
Expand Down Expand Up @@ -395,9 +401,8 @@ static PyMethodDef BZ2Compressor_methods[] = {
static PyType_Slot bz2_compressor_type_slots[] = {
{Py_tp_dealloc, BZ2Compressor_dealloc},
{Py_tp_methods, BZ2Compressor_methods},
{Py_tp_init, _bz2_BZ2Compressor___init__},
{Py_tp_new, PyType_GenericNew},
{Py_tp_doc, (char *)_bz2_BZ2Compressor___init____doc__},
{Py_tp_new, _bz2_BZ2Compressor},
{Py_tp_doc, (char *)_bz2_BZ2Compressor__doc__},
{Py_tp_traverse, BZ2Compressor_traverse},
{0, 0}
};
Expand Down Expand Up @@ -624,69 +629,54 @@ _bz2_BZ2Decompressor_decompress_impl(BZ2Decompressor *self, Py_buffer *data,
return result;
}

/* Argument Clinic is not used since the Argument Clinic always want to
check the type which would be wrong here */
static int
_bz2_BZ2Decompressor___init___impl(BZ2Decompressor *self)
/*[clinic input]
@classmethod
_bz2.BZ2Decompressor.__new__

Create a decompressor object for decompressing data incrementally.

For one-shot decompression, use the decompress() function instead.
[clinic start generated code]*/

static PyObject *
_bz2_BZ2Decompressor_impl(PyTypeObject *type)
/*[clinic end generated code: output=5150d51ccaab220e input=b87413ce51853528]*/
{
BZ2Decompressor *self;
int bzerror;

PyThread_type_lock lock = PyThread_allocate_lock();
if (lock == NULL) {
PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
return -1;
assert(type != NULL && type->tp_alloc != NULL);
self = (BZ2Decompressor *)type->tp_alloc(type, 0);
if (self == NULL) {
return NULL;
}
if (self->lock != NULL) {
PyThread_free_lock(self->lock);

self->lock = PyThread_allocate_lock();
if (self->lock == NULL) {
Py_DECREF(self);
PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
return NULL;
}
self->lock = lock;

self->needs_input = 1;
self->bzs_avail_in_real = 0;
self->input_buffer = NULL;
self->input_buffer_size = 0;
Py_XSETREF(self->unused_data, PyBytes_FromStringAndSize(NULL, 0));
self->unused_data = PyBytes_FromStringAndSize(NULL, 0);
if (self->unused_data == NULL)
goto error;

bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
if (catch_bz2_error(bzerror))
goto error;

return 0;
return (PyObject *)self;

error:
Py_CLEAR(self->unused_data);
PyThread_free_lock(self->lock);
self->lock = NULL;
return -1;
}

static int
_bz2_BZ2Decompressor___init__(PyObject *self, PyObject *args, PyObject *kwargs)
{
int return_value = -1;

if (!_PyArg_NoPositional("BZ2Decompressor", args)) {
goto exit;
}
if (!_PyArg_NoKeywords("BZ2Decompressor", kwargs)) {
goto exit;
}
return_value = _bz2_BZ2Decompressor___init___impl((BZ2Decompressor *)self);

exit:
return return_value;
Py_DECREF(self);
return NULL;
}

PyDoc_STRVAR(_bz2_BZ2Decompressor___init____doc__,
"BZ2Decompressor()\n"
"--\n"
"\n"
"Create a decompressor object for decompressing data incrementally.\n"
"\n"
"For one-shot decompression, use the decompress() function instead.");

static void
BZ2Decompressor_dealloc(BZ2Decompressor *self)
{
Expand Down Expand Up @@ -738,10 +728,9 @@ static PyMemberDef BZ2Decompressor_members[] = {
static PyType_Slot bz2_decompressor_type_slots[] = {
{Py_tp_dealloc, BZ2Decompressor_dealloc},
{Py_tp_methods, BZ2Decompressor_methods},
{Py_tp_init, _bz2_BZ2Decompressor___init__},
{Py_tp_doc, (char *)_bz2_BZ2Decompressor___init____doc__},
{Py_tp_doc, (char *)_bz2_BZ2Decompressor__doc__},
{Py_tp_members, BZ2Decompressor_members},
{Py_tp_new, PyType_GenericNew},
{Py_tp_new, _bz2_BZ2Decompressor},
{Py_tp_traverse, BZ2Decompressor_traverse},
{0, 0}
};
Expand All @@ -762,31 +751,52 @@ static PyType_Spec bz2_decompressor_type_spec = {
static int
_bz2_exec(PyObject *module)
{
PyTypeObject *bz2_compressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
_bz2_state *state = get_module_state(module);
state->bz2_compressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
&bz2_compressor_type_spec, NULL);
if (bz2_compressor_type == NULL) {
if (state->bz2_compressor_type == NULL) {
return -1;
}
int rc = PyModule_AddType(module, bz2_compressor_type);
Py_DECREF(bz2_compressor_type);
if (rc < 0) {
if (PyModule_AddType(module, state->bz2_compressor_type) < 0) {
return -1;
}

PyTypeObject *bz2_decompressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
state->bz2_decompressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
&bz2_decompressor_type_spec, NULL);
if (bz2_decompressor_type == NULL) {
if (state->bz2_decompressor_type == NULL) {
return -1;
}
rc = PyModule_AddType(module, bz2_decompressor_type);
Py_DECREF(bz2_decompressor_type);
if (rc < 0) {
if (PyModule_AddType(module, state->bz2_decompressor_type) < 0) {
return -1;
}

return 0;
}

static int
_bz2_traverse(PyObject *module, visitproc visit, void *arg)
{
_bz2_state *state = get_module_state(module);
Py_VISIT(state->bz2_compressor_type);
Py_VISIT(state->bz2_decompressor_type);
return 0;
}

static int
_bz2_clear(PyObject *module)
{
_bz2_state *state = get_module_state(module);
Py_CLEAR(state->bz2_compressor_type);
Py_CLEAR(state->bz2_decompressor_type);
return 0;
}

static void
_bz2_free(void *module)
{
(void)_bz2_clear((PyObject *)module);
}

static struct PyModuleDef_Slot _bz2_slots[] = {
{Py_mod_exec, _bz2_exec},
{0, NULL}
Expand All @@ -795,6 +805,10 @@ static struct PyModuleDef_Slot _bz2_slots[] = {
static struct PyModuleDef _bz2module = {
.m_base = PyModuleDef_HEAD_INIT,
.m_name = "_bz2",
.m_size = sizeof(_bz2_state),
.m_traverse = _bz2_traverse,
.m_clear = _bz2_clear,
.m_free = _bz2_free,
.m_slots = _bz2_slots,
};

Expand Down
Loading