Skip to content

Commit 665730d

Browse files
authored
bpo-23224: Fix segfaults and multiple leaks in the lzma and bz2 modules (GH-7822)
lzma.LZMADecompressor and bz2.BZ2Decompressor objects caused segfaults when their `__init__()` methods were not called. lzma.LZMADecompressor, lzma.LZMACompressor, bz2.BZ2Compressor, and bz2.BZ2Decompressor objects would leak locks and internal buffers when their `__init__()` methods were called multiple times. https://bugs.python.org/issue23224
1 parent 9bba803 commit 665730d

File tree

7 files changed

+288
-175
lines changed

7 files changed

+288
-175
lines changed

Lib/test/test_bz2.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -844,6 +844,10 @@ def test_refleaks_in___init__(self):
844844
bzd.__init__()
845845
self.assertAlmostEqual(gettotalrefcount() - refs_before, 0, delta=10)
846846

847+
def test_uninitialized_BZ2Decompressor_crash(self):
848+
self.assertEqual(BZ2Decompressor.__new__(BZ2Decompressor).
849+
decompress(bytes()), b'')
850+
847851

848852
class CompressDecompressTest(BaseTest):
849853
def testCompress(self):

Lib/test/test_lzma.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,10 @@ def test_refleaks_in_decompressor___init__(self):
380380
lzd.__init__()
381381
self.assertAlmostEqual(gettotalrefcount() - refs_before, 0, delta=10)
382382

383+
def test_uninitialized_LZMADecompressor_crash(self):
384+
self.assertEqual(LZMADecompressor.__new__(LZMADecompressor).
385+
decompress(bytes()), b'')
386+
383387

384388
class CompressDecompressFunctionTestCase(unittest.TestCase):
385389

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
Fix segfaults when creating :class:`lzma.LZMADecompressor` and
2+
:class:`bz2.BZ2Decompressor` objects without calling ``__init__()``, and fix
3+
leakage of locks and internal buffers when calling the ``__init__()``
4+
methods of :class:`lzma.LZMADecompressor`, :class:`lzma.LZMACompressor`,
5+
:class:`bz2.BZ2Compressor`, and :class:`bz2.BZ2Decompressor` objects
6+
multiple times.

Modules/_bz2module.c

Lines changed: 122 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,29 @@
1515
#error "The maximum block size accepted by libbzip2 is UINT32_MAX."
1616
#endif
1717

18+
typedef struct {
19+
PyTypeObject *bz2_compressor_type;
20+
PyTypeObject *bz2_decompressor_type;
21+
} _bz2_state;
22+
23+
static inline _bz2_state *
24+
get_module_state(PyObject *module)
25+
{
26+
void *state = PyModule_GetState(module);
27+
assert(state != NULL);
28+
return (_bz2_state *)state;
29+
}
30+
31+
static struct PyModuleDef _bz2module;
32+
33+
static inline _bz2_state *
34+
find_module_state_by_def(PyTypeObject *type)
35+
{
36+
PyObject *module = PyType_GetModuleByDef(type, &_bz2module);
37+
assert(module != NULL);
38+
return get_module_state(module);
39+
}
40+
1841
/* On success, return value >= 0
1942
On failure, return -1 */
2043
static inline Py_ssize_t
@@ -214,12 +237,14 @@ compress(BZ2Compressor *c, char *data, size_t len, int action)
214237

215238
/*[clinic input]
216239
module _bz2
217-
class _bz2.BZ2Compressor "BZ2Compressor *" "&BZ2Compressor_Type"
218-
class _bz2.BZ2Decompressor "BZ2Decompressor *" "&BZ2Decompressor_Type"
240+
class _bz2.BZ2Compressor "BZ2Compressor *" "clinic_state()->bz2_compressor_type"
241+
class _bz2.BZ2Decompressor "BZ2Decompressor *" "clinic_state()->bz2_decompressor_type"
219242
[clinic start generated code]*/
220-
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=dc7d7992a79f9cb7]*/
243+
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=92348121632b94c4]*/
221244

245+
#define clinic_state() (find_module_state_by_def(type))
222246
#include "clinic/_bz2module.c.h"
247+
#undef clinic_state
223248

224249
/*[clinic input]
225250
_bz2.BZ2Compressor.compress
@@ -295,24 +320,43 @@ BZ2_Free(void* ctx, void *ptr)
295320
PyMem_RawFree(ptr);
296321
}
297322

323+
/*[clinic input]
324+
@classmethod
325+
_bz2.BZ2Compressor.__new__
326+
327+
compresslevel: int = 9
328+
Compression level, as a number between 1 and 9.
329+
/
298330
299-
/* Argument Clinic is not used since the Argument Clinic always want to
300-
check the type which would be wrong here */
301-
static int
302-
_bz2_BZ2Compressor___init___impl(BZ2Compressor *self, int compresslevel)
331+
Create a compressor object for compressing data incrementally.
332+
333+
For one-shot compression, use the compress() function instead.
334+
[clinic start generated code]*/
335+
336+
static PyObject *
337+
_bz2_BZ2Compressor_impl(PyTypeObject *type, int compresslevel)
338+
/*[clinic end generated code: output=83346c96beaacad7 input=d4500d2a52c8b263]*/
303339
{
304340
int bzerror;
341+
BZ2Compressor *self;
305342

306343
if (!(1 <= compresslevel && compresslevel <= 9)) {
307344
PyErr_SetString(PyExc_ValueError,
308345
"compresslevel must be between 1 and 9");
309-
return -1;
346+
return NULL;
347+
}
348+
349+
assert(type != NULL && type->tp_alloc != NULL);
350+
self = (BZ2Compressor *)type->tp_alloc(type, 0);
351+
if (self == NULL) {
352+
return NULL;
310353
}
311354

312355
self->lock = PyThread_allocate_lock();
313356
if (self->lock == NULL) {
357+
Py_DECREF(self);
314358
PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
315-
return -1;
359+
return NULL;
316360
}
317361

318362
self->bzs.opaque = NULL;
@@ -322,49 +366,11 @@ _bz2_BZ2Compressor___init___impl(BZ2Compressor *self, int compresslevel)
322366
if (catch_bz2_error(bzerror))
323367
goto error;
324368

325-
return 0;
369+
return (PyObject *)self;
326370

327371
error:
328-
PyThread_free_lock(self->lock);
329-
self->lock = NULL;
330-
return -1;
331-
}
332-
333-
PyDoc_STRVAR(_bz2_BZ2Compressor___init____doc__,
334-
"BZ2Compressor(compresslevel=9, /)\n"
335-
"--\n"
336-
"\n"
337-
"Create a compressor object for compressing data incrementally.\n"
338-
"\n"
339-
" compresslevel\n"
340-
" Compression level, as a number between 1 and 9.\n"
341-
"\n"
342-
"For one-shot compression, use the compress() function instead.");
343-
344-
static int
345-
_bz2_BZ2Compressor___init__(PyObject *self, PyObject *args, PyObject *kwargs)
346-
{
347-
int return_value = -1;
348-
int compresslevel = 9;
349-
350-
if (!_PyArg_NoKeywords("BZ2Compressor", kwargs)) {
351-
goto exit;
352-
}
353-
if (!_PyArg_CheckPositional("BZ2Compressor", PyTuple_GET_SIZE(args), 0, 1)) {
354-
goto exit;
355-
}
356-
if (PyTuple_GET_SIZE(args) < 1) {
357-
goto skip_optional;
358-
}
359-
compresslevel = _PyLong_AsInt(PyTuple_GET_ITEM(args, 0));
360-
if (compresslevel == -1 && PyErr_Occurred()) {
361-
goto exit;
362-
}
363-
skip_optional:
364-
return_value = _bz2_BZ2Compressor___init___impl((BZ2Compressor *)self, compresslevel);
365-
366-
exit:
367-
return return_value;
372+
Py_DECREF(self);
373+
return NULL;
368374
}
369375

370376
static void
@@ -395,9 +401,8 @@ static PyMethodDef BZ2Compressor_methods[] = {
395401
static PyType_Slot bz2_compressor_type_slots[] = {
396402
{Py_tp_dealloc, BZ2Compressor_dealloc},
397403
{Py_tp_methods, BZ2Compressor_methods},
398-
{Py_tp_init, _bz2_BZ2Compressor___init__},
399-
{Py_tp_new, PyType_GenericNew},
400-
{Py_tp_doc, (char *)_bz2_BZ2Compressor___init____doc__},
404+
{Py_tp_new, _bz2_BZ2Compressor},
405+
{Py_tp_doc, (char *)_bz2_BZ2Compressor__doc__},
401406
{Py_tp_traverse, BZ2Compressor_traverse},
402407
{0, 0}
403408
};
@@ -624,69 +629,54 @@ _bz2_BZ2Decompressor_decompress_impl(BZ2Decompressor *self, Py_buffer *data,
624629
return result;
625630
}
626631

627-
/* Argument Clinic is not used since the Argument Clinic always want to
628-
check the type which would be wrong here */
629-
static int
630-
_bz2_BZ2Decompressor___init___impl(BZ2Decompressor *self)
632+
/*[clinic input]
633+
@classmethod
634+
_bz2.BZ2Decompressor.__new__
635+
636+
Create a decompressor object for decompressing data incrementally.
637+
638+
For one-shot decompression, use the decompress() function instead.
639+
[clinic start generated code]*/
640+
641+
static PyObject *
642+
_bz2_BZ2Decompressor_impl(PyTypeObject *type)
643+
/*[clinic end generated code: output=5150d51ccaab220e input=b87413ce51853528]*/
631644
{
645+
BZ2Decompressor *self;
632646
int bzerror;
633647

634-
PyThread_type_lock lock = PyThread_allocate_lock();
635-
if (lock == NULL) {
636-
PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
637-
return -1;
648+
assert(type != NULL && type->tp_alloc != NULL);
649+
self = (BZ2Decompressor *)type->tp_alloc(type, 0);
650+
if (self == NULL) {
651+
return NULL;
638652
}
639-
if (self->lock != NULL) {
640-
PyThread_free_lock(self->lock);
653+
654+
self->lock = PyThread_allocate_lock();
655+
if (self->lock == NULL) {
656+
Py_DECREF(self);
657+
PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
658+
return NULL;
641659
}
642-
self->lock = lock;
643660

644661
self->needs_input = 1;
645662
self->bzs_avail_in_real = 0;
646663
self->input_buffer = NULL;
647664
self->input_buffer_size = 0;
648-
Py_XSETREF(self->unused_data, PyBytes_FromStringAndSize(NULL, 0));
665+
self->unused_data = PyBytes_FromStringAndSize(NULL, 0);
649666
if (self->unused_data == NULL)
650667
goto error;
651668

652669
bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
653670
if (catch_bz2_error(bzerror))
654671
goto error;
655672

656-
return 0;
673+
return (PyObject *)self;
657674

658675
error:
659-
Py_CLEAR(self->unused_data);
660-
PyThread_free_lock(self->lock);
661-
self->lock = NULL;
662-
return -1;
663-
}
664-
665-
static int
666-
_bz2_BZ2Decompressor___init__(PyObject *self, PyObject *args, PyObject *kwargs)
667-
{
668-
int return_value = -1;
669-
670-
if (!_PyArg_NoPositional("BZ2Decompressor", args)) {
671-
goto exit;
672-
}
673-
if (!_PyArg_NoKeywords("BZ2Decompressor", kwargs)) {
674-
goto exit;
675-
}
676-
return_value = _bz2_BZ2Decompressor___init___impl((BZ2Decompressor *)self);
677-
678-
exit:
679-
return return_value;
676+
Py_DECREF(self);
677+
return NULL;
680678
}
681679

682-
PyDoc_STRVAR(_bz2_BZ2Decompressor___init____doc__,
683-
"BZ2Decompressor()\n"
684-
"--\n"
685-
"\n"
686-
"Create a decompressor object for decompressing data incrementally.\n"
687-
"\n"
688-
"For one-shot decompression, use the decompress() function instead.");
689-
690680
static void
691681
BZ2Decompressor_dealloc(BZ2Decompressor *self)
692682
{
@@ -738,10 +728,9 @@ static PyMemberDef BZ2Decompressor_members[] = {
738728
static PyType_Slot bz2_decompressor_type_slots[] = {
739729
{Py_tp_dealloc, BZ2Decompressor_dealloc},
740730
{Py_tp_methods, BZ2Decompressor_methods},
741-
{Py_tp_init, _bz2_BZ2Decompressor___init__},
742-
{Py_tp_doc, (char *)_bz2_BZ2Decompressor___init____doc__},
731+
{Py_tp_doc, (char *)_bz2_BZ2Decompressor__doc__},
743732
{Py_tp_members, BZ2Decompressor_members},
744-
{Py_tp_new, PyType_GenericNew},
733+
{Py_tp_new, _bz2_BZ2Decompressor},
745734
{Py_tp_traverse, BZ2Decompressor_traverse},
746735
{0, 0}
747736
};
@@ -762,31 +751,52 @@ static PyType_Spec bz2_decompressor_type_spec = {
762751
static int
763752
_bz2_exec(PyObject *module)
764753
{
765-
PyTypeObject *bz2_compressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
754+
_bz2_state *state = get_module_state(module);
755+
state->bz2_compressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
766756
&bz2_compressor_type_spec, NULL);
767-
if (bz2_compressor_type == NULL) {
757+
if (state->bz2_compressor_type == NULL) {
768758
return -1;
769759
}
770-
int rc = PyModule_AddType(module, bz2_compressor_type);
771-
Py_DECREF(bz2_compressor_type);
772-
if (rc < 0) {
760+
if (PyModule_AddType(module, state->bz2_compressor_type) < 0) {
773761
return -1;
774762
}
775763

776-
PyTypeObject *bz2_decompressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
764+
state->bz2_decompressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
777765
&bz2_decompressor_type_spec, NULL);
778-
if (bz2_decompressor_type == NULL) {
766+
if (state->bz2_decompressor_type == NULL) {
779767
return -1;
780768
}
781-
rc = PyModule_AddType(module, bz2_decompressor_type);
782-
Py_DECREF(bz2_decompressor_type);
783-
if (rc < 0) {
769+
if (PyModule_AddType(module, state->bz2_decompressor_type) < 0) {
784770
return -1;
785771
}
786772

787773
return 0;
788774
}
789775

776+
static int
777+
_bz2_traverse(PyObject *module, visitproc visit, void *arg)
778+
{
779+
_bz2_state *state = get_module_state(module);
780+
Py_VISIT(state->bz2_compressor_type);
781+
Py_VISIT(state->bz2_decompressor_type);
782+
return 0;
783+
}
784+
785+
static int
786+
_bz2_clear(PyObject *module)
787+
{
788+
_bz2_state *state = get_module_state(module);
789+
Py_CLEAR(state->bz2_compressor_type);
790+
Py_CLEAR(state->bz2_decompressor_type);
791+
return 0;
792+
}
793+
794+
static void
795+
_bz2_free(void *module)
796+
{
797+
(void)_bz2_clear((PyObject *)module);
798+
}
799+
790800
static struct PyModuleDef_Slot _bz2_slots[] = {
791801
{Py_mod_exec, _bz2_exec},
792802
{0, NULL}
@@ -795,6 +805,10 @@ static struct PyModuleDef_Slot _bz2_slots[] = {
795805
static struct PyModuleDef _bz2module = {
796806
.m_base = PyModuleDef_HEAD_INIT,
797807
.m_name = "_bz2",
808+
.m_size = sizeof(_bz2_state),
809+
.m_traverse = _bz2_traverse,
810+
.m_clear = _bz2_clear,
811+
.m_free = _bz2_free,
798812
.m_slots = _bz2_slots,
799813
};
800814

0 commit comments

Comments
 (0)