diff --git a/Doc/license.rst b/Doc/license.rst
index 90783e3e31a69d..480414bb84c4f2 100644
--- a/Doc/license.rst
+++ b/Doc/license.rst
@@ -1132,3 +1132,40 @@ The file is distributed under the 2-Clause BSD License::
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+Zstandard bindings
+------------------
+
+Zstandard bindings in :file:`Modules/_zstd` and :file:`Lib/compression/zstd`
+are based on code from the
+`pyzstd library `_, copyright Ma Lin and
+contributors. The pyzstd code is distributed under the 3-Clause BSD License::
+
+ Copyright (c) 2020-present, Ma Lin and contributors.
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+ 3. Neither the name of the copyright holder nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h
index 121466dd2ec1ce..9bde4faaf5a040 100644
--- a/Include/internal/pycore_global_objects_fini_generated.h
+++ b/Include/internal/pycore_global_objects_fini_generated.h
@@ -834,6 +834,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(bytes_per_sep));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(c_call));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(c_exception));
+ _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(c_parameter_type));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(c_return));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cached_datetime_module));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cached_statements));
@@ -888,6 +889,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(count));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(covariant));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cwd));
+ _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(d_parameter_type));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(data));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(database));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(day));
@@ -902,6 +904,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(deterministic));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(device));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(dict));
+ _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(dict_content));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(dictcomp));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(difference_update));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(digest));
@@ -968,6 +971,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(follow_symlinks));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(format));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(format_spec));
+ _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(frame_buffer));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(from_param));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fromlist));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fromtimestamp));
@@ -1026,6 +1030,8 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(intersection));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(interval));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(io));
+ _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(is_compress));
+ _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(is_raw));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(is_running));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(is_struct));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(isatty));
@@ -1149,6 +1155,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(overlapped));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(owner));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(pages));
+ _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(parameter));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(parent));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(password));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(path));
@@ -1310,6 +1317,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(write_through));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(year));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(zdict));
+ _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(zstd_dict));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[0]);
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[1]);
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[2]);
diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h
index 20e2e6f2a7fc66..3a83fd6b6042e2 100644
--- a/Include/internal/pycore_global_strings.h
+++ b/Include/internal/pycore_global_strings.h
@@ -325,6 +325,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(bytes_per_sep)
STRUCT_FOR_ID(c_call)
STRUCT_FOR_ID(c_exception)
+ STRUCT_FOR_ID(c_parameter_type)
STRUCT_FOR_ID(c_return)
STRUCT_FOR_ID(cached_datetime_module)
STRUCT_FOR_ID(cached_statements)
@@ -379,6 +380,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(count)
STRUCT_FOR_ID(covariant)
STRUCT_FOR_ID(cwd)
+ STRUCT_FOR_ID(d_parameter_type)
STRUCT_FOR_ID(data)
STRUCT_FOR_ID(database)
STRUCT_FOR_ID(day)
@@ -393,6 +395,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(deterministic)
STRUCT_FOR_ID(device)
STRUCT_FOR_ID(dict)
+ STRUCT_FOR_ID(dict_content)
STRUCT_FOR_ID(dictcomp)
STRUCT_FOR_ID(difference_update)
STRUCT_FOR_ID(digest)
@@ -459,6 +462,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(follow_symlinks)
STRUCT_FOR_ID(format)
STRUCT_FOR_ID(format_spec)
+ STRUCT_FOR_ID(frame_buffer)
STRUCT_FOR_ID(from_param)
STRUCT_FOR_ID(fromlist)
STRUCT_FOR_ID(fromtimestamp)
@@ -517,6 +521,8 @@ struct _Py_global_strings {
STRUCT_FOR_ID(intersection)
STRUCT_FOR_ID(interval)
STRUCT_FOR_ID(io)
+ STRUCT_FOR_ID(is_compress)
+ STRUCT_FOR_ID(is_raw)
STRUCT_FOR_ID(is_running)
STRUCT_FOR_ID(is_struct)
STRUCT_FOR_ID(isatty)
@@ -640,6 +646,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(overlapped)
STRUCT_FOR_ID(owner)
STRUCT_FOR_ID(pages)
+ STRUCT_FOR_ID(parameter)
STRUCT_FOR_ID(parent)
STRUCT_FOR_ID(password)
STRUCT_FOR_ID(path)
@@ -801,6 +808,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(write_through)
STRUCT_FOR_ID(year)
STRUCT_FOR_ID(zdict)
+ STRUCT_FOR_ID(zstd_dict)
} identifiers;
struct {
PyASCIIObject _ascii;
diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h
index de1dfd0cce8d7e..4a34ffa559e124 100644
--- a/Include/internal/pycore_runtime_init_generated.h
+++ b/Include/internal/pycore_runtime_init_generated.h
@@ -832,6 +832,7 @@ extern "C" {
INIT_ID(bytes_per_sep), \
INIT_ID(c_call), \
INIT_ID(c_exception), \
+ INIT_ID(c_parameter_type), \
INIT_ID(c_return), \
INIT_ID(cached_datetime_module), \
INIT_ID(cached_statements), \
@@ -886,6 +887,7 @@ extern "C" {
INIT_ID(count), \
INIT_ID(covariant), \
INIT_ID(cwd), \
+ INIT_ID(d_parameter_type), \
INIT_ID(data), \
INIT_ID(database), \
INIT_ID(day), \
@@ -900,6 +902,7 @@ extern "C" {
INIT_ID(deterministic), \
INIT_ID(device), \
INIT_ID(dict), \
+ INIT_ID(dict_content), \
INIT_ID(dictcomp), \
INIT_ID(difference_update), \
INIT_ID(digest), \
@@ -966,6 +969,7 @@ extern "C" {
INIT_ID(follow_symlinks), \
INIT_ID(format), \
INIT_ID(format_spec), \
+ INIT_ID(frame_buffer), \
INIT_ID(from_param), \
INIT_ID(fromlist), \
INIT_ID(fromtimestamp), \
@@ -1024,6 +1028,8 @@ extern "C" {
INIT_ID(intersection), \
INIT_ID(interval), \
INIT_ID(io), \
+ INIT_ID(is_compress), \
+ INIT_ID(is_raw), \
INIT_ID(is_running), \
INIT_ID(is_struct), \
INIT_ID(isatty), \
@@ -1147,6 +1153,7 @@ extern "C" {
INIT_ID(overlapped), \
INIT_ID(owner), \
INIT_ID(pages), \
+ INIT_ID(parameter), \
INIT_ID(parent), \
INIT_ID(password), \
INIT_ID(path), \
@@ -1308,6 +1315,7 @@ extern "C" {
INIT_ID(write_through), \
INIT_ID(year), \
INIT_ID(zdict), \
+ INIT_ID(zstd_dict), \
}
#define _Py_str_ascii_INIT { \
diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h
index ad78dc8c4d589a..fefacef77c89ee 100644
--- a/Include/internal/pycore_unicodeobject_generated.h
+++ b/Include/internal/pycore_unicodeobject_generated.h
@@ -1088,6 +1088,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
+ string = &_Py_ID(c_parameter_type);
+ _PyUnicode_InternStatic(interp, &string);
+ assert(_PyUnicode_CheckConsistency(string, 1));
+ assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(c_return);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
@@ -1304,6 +1308,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
+ string = &_Py_ID(d_parameter_type);
+ _PyUnicode_InternStatic(interp, &string);
+ assert(_PyUnicode_CheckConsistency(string, 1));
+ assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(data);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
@@ -1360,6 +1368,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
+ string = &_Py_ID(dict_content);
+ _PyUnicode_InternStatic(interp, &string);
+ assert(_PyUnicode_CheckConsistency(string, 1));
+ assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(dictcomp);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
@@ -1624,6 +1636,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
+ string = &_Py_ID(frame_buffer);
+ _PyUnicode_InternStatic(interp, &string);
+ assert(_PyUnicode_CheckConsistency(string, 1));
+ assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(from_param);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
@@ -1856,6 +1872,14 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
+ string = &_Py_ID(is_compress);
+ _PyUnicode_InternStatic(interp, &string);
+ assert(_PyUnicode_CheckConsistency(string, 1));
+ assert(PyUnicode_GET_LENGTH(string) != 1);
+ string = &_Py_ID(is_raw);
+ _PyUnicode_InternStatic(interp, &string);
+ assert(_PyUnicode_CheckConsistency(string, 1));
+ assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(is_running);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
@@ -2348,6 +2372,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
+ string = &_Py_ID(parameter);
+ _PyUnicode_InternStatic(interp, &string);
+ assert(_PyUnicode_CheckConsistency(string, 1));
+ assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(parent);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
@@ -2992,6 +3020,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
+ string = &_Py_ID(zstd_dict);
+ _PyUnicode_InternStatic(interp, &string);
+ assert(_PyUnicode_CheckConsistency(string, 1));
+ assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_STR(empty);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
diff --git a/Makefile.pre.in b/Makefile.pre.in
index 37ce0b55203cf7..9f90df6019dea9 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -3341,6 +3341,7 @@ MODULE__TESTCAPI_DEPS=$(srcdir)/Modules/_testcapi/parts.h $(srcdir)/Modules/_tes
MODULE__TESTLIMITEDCAPI_DEPS=$(srcdir)/Modules/_testlimitedcapi/testcapi_long.h $(srcdir)/Modules/_testlimitedcapi/parts.h $(srcdir)/Modules/_testlimitedcapi/util.h
MODULE__TESTINTERNALCAPI_DEPS=$(srcdir)/Modules/_testinternalcapi/parts.h
MODULE__SQLITE3_DEPS=$(srcdir)/Modules/_sqlite/connection.h $(srcdir)/Modules/_sqlite/cursor.h $(srcdir)/Modules/_sqlite/microprotocols.h $(srcdir)/Modules/_sqlite/module.h $(srcdir)/Modules/_sqlite/prepare_protocol.h $(srcdir)/Modules/_sqlite/row.h $(srcdir)/Modules/_sqlite/util.h
+MODULE__ZSTD_DEPS=$(srcdir)/Modules/_zstd/_zstdmodule.h $(srcdir)/Modules/_zstd/buffer.h
CODECS_COMMON_HEADERS=$(srcdir)/Modules/cjkcodecs/multibytecodec.h $(srcdir)/Modules/cjkcodecs/cjkcodecs.h
MODULE__CODECS_CN_DEPS=$(srcdir)/Modules/cjkcodecs/mappings_cn.h $(CODECS_COMMON_HEADERS)
diff --git a/Modules/Setup b/Modules/Setup
index c3e0d9eb9344a9..f23f082d9eff5f 100644
--- a/Modules/Setup
+++ b/Modules/Setup
@@ -200,6 +200,7 @@ PYTHONPATH=$(COREPYTHONPATH)
#_dbm _dbmmodule.c -lgdbm_compat -DUSE_GDBM_COMPAT
#_gdbm _gdbmmodule.c -lgdbm
#_lzma _lzmamodule.c -llzma
+#_zstd _zstd/_zstdmodule.c -lzstd -I$(srcdir)/Modules/_zstd
#_uuid _uuidmodule.c -luuid
#zlib zlibmodule.c -lz
diff --git a/Modules/Setup.stdlib.in b/Modules/Setup.stdlib.in
index be4fb513e592e1..1512187bb097bd 100644
--- a/Modules/Setup.stdlib.in
+++ b/Modules/Setup.stdlib.in
@@ -65,10 +65,11 @@
@MODULE__DECIMAL_TRUE@_decimal _decimal/_decimal.c
# compression libs and binascii (optional CRC32 from zlib)
-# bindings need -lbz2, -lz, or -llzma, respectively
+# bindings need -lbz2, -llzma, -lzstd, or -lz, respectively
@MODULE_BINASCII_TRUE@binascii binascii.c
@MODULE__BZ2_TRUE@_bz2 _bz2module.c
@MODULE__LZMA_TRUE@_lzma _lzmamodule.c
+@MODULE__ZSTD_TRUE@_zstd _zstd/_zstdmodule.c _zstd/zdict.c _zstd/compressor.c _zstd/decompressor.c
@MODULE_ZLIB_TRUE@zlib zlibmodule.c
# dbm/gdbm
diff --git a/Modules/_zstd/_zstdmodule.c b/Modules/_zstd/_zstdmodule.c
new file mode 100644
index 00000000000000..18dc13b3fd16f0
--- /dev/null
+++ b/Modules/_zstd/_zstdmodule.c
@@ -0,0 +1,914 @@
+/*
+Low level interface to Meta's zstd library for use in the compression.zstd
+Python module.
+*/
+
+#ifndef Py_BUILD_CORE_BUILTIN
+# define Py_BUILD_CORE_MODULE 1
+#endif
+
+#include "_zstdmodule.h"
+
+/*[clinic input]
+module _zstd
+
+[clinic start generated code]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=4b5f5587aac15c14]*/
+#include "clinic/_zstdmodule.c.h"
+
+
+/* Format error message and set ZstdError. */
+void
+set_zstd_error(const _zstd_state* const state,
+ error_type type, size_t zstd_ret)
+{
+ char *msg;
+ assert(ZSTD_isError(zstd_ret));
+
+ switch (type)
+ {
+ case ERR_DECOMPRESS:
+ msg = "Unable to decompress zstd data: %s";
+ break;
+ case ERR_COMPRESS:
+ msg = "Unable to compress zstd data: %s";
+ break;
+ case ERR_SET_PLEDGED_INPUT_SIZE:
+ msg = "Unable to set pledged uncompressed content size: %s";
+ break;
+
+ case ERR_LOAD_D_DICT:
+ msg = "Unable to load zstd dictionary or prefix for decompression: %s";
+ break;
+ case ERR_LOAD_C_DICT:
+ msg = "Unable to load zstd dictionary or prefix for compression: %s";
+ break;
+
+ case ERR_GET_C_BOUNDS:
+ msg = "Unable to get zstd compression parameter bounds: %s";
+ break;
+ case ERR_GET_D_BOUNDS:
+ msg = "Unable to get zstd decompression parameter bounds: %s";
+ break;
+ case ERR_SET_C_LEVEL:
+ msg = "Unable to set zstd compression level: %s";
+ break;
+
+ case ERR_TRAIN_DICT:
+ msg = "Unable to train zstd dictionary: %s";
+ break;
+ case ERR_FINALIZE_DICT:
+ msg = "Unable to finalize zstd dictionary: %s";
+ break;
+
+ default:
+ Py_UNREACHABLE();
+ }
+ PyErr_Format(state->ZstdError, msg, ZSTD_getErrorName(zstd_ret));
+}
+
+typedef struct {
+ int parameter;
+ char parameter_name[32];
+} ParameterInfo;
+
+static const ParameterInfo cp_list[] =
+{
+ {ZSTD_c_compressionLevel, "compressionLevel"},
+ {ZSTD_c_windowLog, "windowLog"},
+ {ZSTD_c_hashLog, "hashLog"},
+ {ZSTD_c_chainLog, "chainLog"},
+ {ZSTD_c_searchLog, "searchLog"},
+ {ZSTD_c_minMatch, "minMatch"},
+ {ZSTD_c_targetLength, "targetLength"},
+ {ZSTD_c_strategy, "strategy"},
+
+ {ZSTD_c_enableLongDistanceMatching, "enableLongDistanceMatching"},
+ {ZSTD_c_ldmHashLog, "ldmHashLog"},
+ {ZSTD_c_ldmMinMatch, "ldmMinMatch"},
+ {ZSTD_c_ldmBucketSizeLog, "ldmBucketSizeLog"},
+ {ZSTD_c_ldmHashRateLog, "ldmHashRateLog"},
+
+ {ZSTD_c_contentSizeFlag, "contentSizeFlag"},
+ {ZSTD_c_checksumFlag, "checksumFlag"},
+ {ZSTD_c_dictIDFlag, "dictIDFlag"},
+
+ {ZSTD_c_nbWorkers, "nbWorkers"},
+ {ZSTD_c_jobSize, "jobSize"},
+ {ZSTD_c_overlapLog, "overlapLog"}
+};
+
+static const ParameterInfo dp_list[] =
+{
+ {ZSTD_d_windowLogMax, "windowLogMax"}
+};
+
+void
+set_parameter_error(const _zstd_state* const state, int is_compress,
+ int key_v, int value_v)
+{
+ ParameterInfo const *list;
+ int list_size;
+ char const *name;
+ char *type;
+ ZSTD_bounds bounds;
+ int i;
+ char pos_msg[128];
+
+ if (is_compress) {
+ list = cp_list;
+ list_size = Py_ARRAY_LENGTH(cp_list);
+ type = "compression";
+ }
+ else {
+ list = dp_list;
+ list_size = Py_ARRAY_LENGTH(dp_list);
+ type = "decompression";
+ }
+
+ /* Find parameter's name */
+ name = NULL;
+ for (i = 0; i < list_size; i++) {
+ if (key_v == (list+i)->parameter) {
+ name = (list+i)->parameter_name;
+ break;
+ }
+ }
+
+ /* Unknown parameter */
+ if (name == NULL) {
+ PyOS_snprintf(pos_msg, sizeof(pos_msg),
+ "unknown parameter (key %d)", key_v);
+ name = pos_msg;
+ }
+
+ /* Get parameter bounds */
+ if (is_compress) {
+ bounds = ZSTD_cParam_getBounds(key_v);
+ }
+ else {
+ bounds = ZSTD_dParam_getBounds(key_v);
+ }
+ if (ZSTD_isError(bounds.error)) {
+ PyErr_Format(state->ZstdError,
+ "Zstd %s parameter \"%s\" is invalid. (zstd v%s)",
+ type, name, ZSTD_versionString());
+ return;
+ }
+
+ /* Error message */
+ PyErr_Format(state->ZstdError,
+ "Error when setting zstd %s parameter \"%s\", it "
+ "should %d <= value <= %d, provided value is %d. "
+ "(zstd v%s, %d-bit build)",
+ type, name,
+ bounds.lowerBound, bounds.upperBound, value_v,
+ ZSTD_versionString(), 8*(int)sizeof(Py_ssize_t));
+}
+
+static inline _zstd_state*
+get_zstd_state(PyObject *module)
+{
+ void *state = PyModule_GetState(module);
+ assert(state != NULL);
+ return (_zstd_state *)state;
+}
+
+
+/*[clinic input]
+_zstd._train_dict
+
+ samples_bytes: PyBytesObject
+ Concatenation of samples.
+ samples_size_list: object(subclass_of='&PyList_Type')
+ List of samples' sizes.
+ dict_size: Py_ssize_t
+ The size of the dictionary.
+ /
+
+Internal function, train a zstd dictionary on sample data.
+[clinic start generated code]*/
+
+static PyObject *
+_zstd__train_dict_impl(PyObject *module, PyBytesObject *samples_bytes,
+ PyObject *samples_size_list, Py_ssize_t dict_size)
+/*[clinic end generated code: output=ee53c34c8f77886b input=b21d092c695a3a81]*/
+{
+ // TODO(emmatyping): The preamble and suffix to this function and _finalize_dict
+ // are pretty similar. We should see if we can refactor them to share that code.
+ Py_ssize_t chunks_number;
+ size_t *chunk_sizes = NULL;
+ PyObject *dst_dict_bytes = NULL;
+ size_t zstd_ret;
+ Py_ssize_t sizes_sum;
+ Py_ssize_t i;
+
+ /* Check arguments */
+ if (dict_size <= 0) {
+ PyErr_SetString(PyExc_ValueError, "dict_size argument should be positive number.");
+ return NULL;
+ }
+
+ chunks_number = Py_SIZE(samples_size_list);
+ if ((size_t) chunks_number > UINT32_MAX) {
+ PyErr_Format(PyExc_ValueError,
+ "The number of samples should be <= %u.", UINT32_MAX);
+ return NULL;
+ }
+
+ /* Prepare chunk_sizes */
+ chunk_sizes = PyMem_New(size_t, chunks_number);
+ if (chunk_sizes == NULL) {
+ PyErr_NoMemory();
+ goto error;
+ }
+
+ sizes_sum = 0;
+ for (i = 0; i < chunks_number; i++) {
+ PyObject *size = PyList_GetItemRef(samples_size_list, i);
+ chunk_sizes[i] = PyLong_AsSize_t(size);
+ Py_DECREF(size);
+ if (chunk_sizes[i] == (size_t)-1 && PyErr_Occurred()) {
+ PyErr_Format(PyExc_ValueError,
+ "Items in samples_size_list should be an int "
+ "object, with a value between 0 and %u.", SIZE_MAX);
+ goto error;
+ }
+ sizes_sum += chunk_sizes[i];
+ }
+
+ if (sizes_sum != Py_SIZE(samples_bytes)) {
+ PyErr_SetString(PyExc_ValueError,
+ "The samples size list doesn't match the concatenation's size.");
+ goto error;
+ }
+
+ /* Allocate dict buffer */
+ dst_dict_bytes = PyBytes_FromStringAndSize(NULL, dict_size);
+ if (dst_dict_bytes == NULL) {
+ goto error;
+ }
+
+ /* Train the dictionary */
+ char *dst_dict_buffer = PyBytes_AS_STRING(dst_dict_bytes);
+ char *samples_buffer = PyBytes_AS_STRING(samples_bytes);
+ Py_BEGIN_ALLOW_THREADS
+ zstd_ret = ZDICT_trainFromBuffer(dst_dict_buffer, dict_size,
+ samples_buffer,
+ chunk_sizes, (uint32_t)chunks_number);
+ Py_END_ALLOW_THREADS
+
+ /* Check zstd dict error */
+ if (ZDICT_isError(zstd_ret)) {
+ _zstd_state* const mod_state = get_zstd_state(module);
+ set_zstd_error(mod_state, ERR_TRAIN_DICT, zstd_ret);
+ goto error;
+ }
+
+ /* Resize dict_buffer */
+ if (_PyBytes_Resize(&dst_dict_bytes, zstd_ret) < 0) {
+ goto error;
+ }
+
+ goto success;
+
+error:
+ Py_CLEAR(dst_dict_bytes);
+
+success:
+ PyMem_Free(chunk_sizes);
+ return dst_dict_bytes;
+}
+
+/*[clinic input]
+_zstd._finalize_dict
+
+ custom_dict_bytes: PyBytesObject
+ Custom dictionary content.
+ samples_bytes: PyBytesObject
+ Concatenation of samples.
+ samples_size_list: object(subclass_of='&PyList_Type')
+ List of samples' sizes.
+ dict_size: Py_ssize_t
+ The size of the dictionary.
+ compression_level: int
+ Optimize for a specific zstd compression level, 0 means default.
+ /
+
+Internal function, finalize a zstd dictionary.
+[clinic start generated code]*/
+
+static PyObject *
+_zstd__finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes,
+ PyBytesObject *samples_bytes,
+ PyObject *samples_size_list, Py_ssize_t dict_size,
+ int compression_level)
+/*[clinic end generated code: output=9c2a7d8c845cee93 input=08531a803d87c56f]*/
+{
+ Py_ssize_t chunks_number;
+ size_t *chunk_sizes = NULL;
+ PyObject *dst_dict_bytes = NULL;
+ size_t zstd_ret;
+ ZDICT_params_t params;
+ Py_ssize_t sizes_sum;
+ Py_ssize_t i;
+
+ /* Check arguments */
+ if (dict_size <= 0) {
+ PyErr_SetString(PyExc_ValueError, "dict_size argument should be positive number.");
+ return NULL;
+ }
+
+ chunks_number = Py_SIZE(samples_size_list);
+ if ((size_t) chunks_number > UINT32_MAX) {
+ PyErr_Format(PyExc_ValueError,
+ "The number of samples should be <= %u.", UINT32_MAX);
+ return NULL;
+ }
+
+ /* Prepare chunk_sizes */
+ chunk_sizes = PyMem_New(size_t, chunks_number);
+ if (chunk_sizes == NULL) {
+ PyErr_NoMemory();
+ goto error;
+ }
+
+ sizes_sum = 0;
+ for (i = 0; i < chunks_number; i++) {
+ PyObject *size = PyList_GET_ITEM(samples_size_list, i);
+ chunk_sizes[i] = PyLong_AsSize_t(size);
+ if (chunk_sizes[i] == (size_t)-1 && PyErr_Occurred()) {
+ PyErr_Format(PyExc_ValueError,
+ "Items in samples_size_list should be an int "
+ "object, with a value between 0 and %u.", SIZE_MAX);
+ goto error;
+ }
+ sizes_sum += chunk_sizes[i];
+ }
+
+ if (sizes_sum != Py_SIZE(samples_bytes)) {
+ PyErr_SetString(PyExc_ValueError,
+ "The samples size list doesn't match the concatenation's size.");
+ goto error;
+ }
+
+ /* Allocate dict buffer */
+ dst_dict_bytes = PyBytes_FromStringAndSize(NULL, dict_size);
+ if (dst_dict_bytes == NULL) {
+ goto error;
+ }
+
+ /* Parameters */
+
+ /* Optimize for a specific zstd compression level, 0 means default. */
+ params.compressionLevel = compression_level;
+ /* Write log to stderr, 0 = none. */
+ params.notificationLevel = 0;
+ /* Force dictID value, 0 means auto mode (32-bits random value). */
+ params.dictID = 0;
+
+ /* Finalize the dictionary */
+ Py_BEGIN_ALLOW_THREADS
+ zstd_ret = ZDICT_finalizeDictionary(
+ PyBytes_AS_STRING(dst_dict_bytes), dict_size,
+ PyBytes_AS_STRING(custom_dict_bytes), Py_SIZE(custom_dict_bytes),
+ PyBytes_AS_STRING(samples_bytes), chunk_sizes,
+ (uint32_t)chunks_number, params);
+ Py_END_ALLOW_THREADS
+
+ /* Check zstd dict error */
+ if (ZDICT_isError(zstd_ret)) {
+ _zstd_state* const mod_state = get_zstd_state(module);
+ set_zstd_error(mod_state, ERR_FINALIZE_DICT, zstd_ret);
+ goto error;
+ }
+
+ /* Resize dict_buffer */
+ if (_PyBytes_Resize(&dst_dict_bytes, zstd_ret) < 0) {
+ goto error;
+ }
+
+ goto success;
+
+error:
+ Py_CLEAR(dst_dict_bytes);
+
+success:
+ PyMem_Free(chunk_sizes);
+ return dst_dict_bytes;
+}
+
+
+/*[clinic input]
+_zstd._get_param_bounds
+
+ is_compress: bool
+ True for CParameter, False for DParameter.
+ parameter: int
+ The parameter to get bounds.
+
+Internal function, get CParameter/DParameter bounds.
+[clinic start generated code]*/
+
+static PyObject *
+_zstd__get_param_bounds_impl(PyObject *module, int is_compress,
+ int parameter)
+/*[clinic end generated code: output=b751dc710f89ef55 input=fb21ff96aff65df1]*/
+{
+ ZSTD_bounds bound;
+ if (is_compress) {
+ bound = ZSTD_cParam_getBounds(parameter);
+ if (ZSTD_isError(bound.error)) {
+ _zstd_state* const mod_state = get_zstd_state(module);
+ set_zstd_error(mod_state, ERR_GET_C_BOUNDS, bound.error);
+ return NULL;
+ }
+ }
+ else {
+ bound = ZSTD_dParam_getBounds(parameter);
+ if (ZSTD_isError(bound.error)) {
+ _zstd_state* const mod_state = get_zstd_state(module);
+ set_zstd_error(mod_state, ERR_GET_D_BOUNDS, bound.error);
+ return NULL;
+ }
+ }
+
+ return Py_BuildValue("ii", bound.lowerBound, bound.upperBound);
+}
+
+/*[clinic input]
+_zstd.get_frame_size
+
+ frame_buffer: Py_buffer
+ A bytes-like object, it should start from the beginning of a frame,
+ and contains at least one complete frame.
+
+Get the size of a zstd frame, including frame header and 4-byte checksum if it has one.
+
+It will iterate all blocks' headers within a frame, to accumulate the frame size.
+[clinic start generated code]*/
+
+static PyObject *
+_zstd_get_frame_size_impl(PyObject *module, Py_buffer *frame_buffer)
+/*[clinic end generated code: output=a7384c2f8780f442 input=7d3ad24311893bf3]*/
+{
+ size_t frame_size;
+
+ frame_size = ZSTD_findFrameCompressedSize(frame_buffer->buf, frame_buffer->len);
+ if (ZSTD_isError(frame_size)) {
+ _zstd_state* const mod_state = get_zstd_state(module);
+ PyErr_Format(mod_state->ZstdError,
+ "Error when finding the compressed size of a zstd frame. "
+ "Make sure the frame_buffer argument starts from the "
+ "beginning of a frame, and its length not less than this "
+ "complete frame. Zstd error message: %s.",
+ ZSTD_getErrorName(frame_size));
+ return NULL;
+ }
+
+ return PyLong_FromSize_t(frame_size);
+}
+
+/*[clinic input]
+_zstd._get_frame_info
+
+ frame_buffer: Py_buffer
+ A bytes-like object, containing the header of a zstd frame.
+
+Internal function, get zstd frame infomation from a frame header.
+[clinic start generated code]*/
+
+static PyObject *
+_zstd__get_frame_info_impl(PyObject *module, Py_buffer *frame_buffer)
+/*[clinic end generated code: output=5462855464ecdf81 input=67f1f8e4b7b89c4d]*/
+{
+ uint64_t decompressed_size;
+ uint32_t dict_id;
+
+ /* ZSTD_getFrameContentSize */
+ decompressed_size = ZSTD_getFrameContentSize(frame_buffer->buf,
+ frame_buffer->len);
+
+ /* #define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1)
+ #define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) */
+ if (decompressed_size == ZSTD_CONTENTSIZE_ERROR) {
+ _zstd_state* const mod_state = get_zstd_state(module);
+ PyErr_SetString(mod_state->ZstdError,
+ "Error when getting information from the header of "
+ "a zstd frame. Make sure the frame_buffer argument "
+ "starts from the beginning of a frame, and its length "
+ "not less than the frame header (6~18 bytes).");
+ return NULL;
+ }
+
+ /* ZSTD_getDictID_fromFrame */
+ dict_id = ZSTD_getDictID_fromFrame(frame_buffer->buf, frame_buffer->len);
+
+ /* Build tuple */
+ if (decompressed_size == ZSTD_CONTENTSIZE_UNKNOWN) {
+ return Py_BuildValue("OI", Py_None, dict_id);
+ }
+ return Py_BuildValue("KI", decompressed_size, dict_id);
+}
+
+/*[clinic input]
+_zstd._set_parameter_types
+
+ c_parameter_type: object(subclass_of='&PyType_Type')
+ CParameter IntEnum type object
+ d_parameter_type: object(subclass_of='&PyType_Type')
+ DParameter IntEnum type object
+
+Internal function, set CParameter/DParameter types for validity check.
+[clinic start generated code]*/
+
+static PyObject *
+_zstd__set_parameter_types_impl(PyObject *module, PyObject *c_parameter_type,
+ PyObject *d_parameter_type)
+/*[clinic end generated code: output=a13d4890ccbd2873 input=3e7d0d37c3a1045a]*/
+{
+ _zstd_state* const mod_state = get_zstd_state(module);
+
+ if (!PyType_Check(c_parameter_type) || !PyType_Check(d_parameter_type)) {
+ PyErr_SetString(PyExc_ValueError,
+ "The two arguments should be CParameter and "
+ "DParameter types.");
+ return NULL;
+ }
+
+ Py_XDECREF(mod_state->CParameter_type);
+ Py_INCREF(c_parameter_type);
+ mod_state->CParameter_type = (PyTypeObject*) c_parameter_type;
+
+ Py_XDECREF(mod_state->DParameter_type);
+ Py_INCREF(d_parameter_type);
+ mod_state->DParameter_type = (PyTypeObject*)d_parameter_type;
+
+ Py_RETURN_NONE;
+}
+
+static PyMethodDef _zstd_methods[] = {
+ _ZSTD__TRAIN_DICT_METHODDEF
+ _ZSTD__FINALIZE_DICT_METHODDEF
+ _ZSTD__GET_PARAM_BOUNDS_METHODDEF
+ _ZSTD_GET_FRAME_SIZE_METHODDEF
+ _ZSTD__GET_FRAME_INFO_METHODDEF
+ _ZSTD__SET_PARAMETER_TYPES_METHODDEF
+
+ {0}
+};
+
+
+#define ADD_INT_PREFIX_MACRO(module, macro) \
+ do { \
+ if (PyModule_AddIntConstant(module, "_" #macro, macro) < 0) { \
+ return -1; \
+ } \
+ } while(0)
+
+static int
+add_parameters(PyObject *module)
+{
+ /* If add new parameters, please also add to cp_list/dp_list above. */
+
+ /* Compression parameters */
+ ADD_INT_PREFIX_MACRO(module, ZSTD_c_compressionLevel);
+ ADD_INT_PREFIX_MACRO(module, ZSTD_c_windowLog);
+ ADD_INT_PREFIX_MACRO(module, ZSTD_c_hashLog);
+ ADD_INT_PREFIX_MACRO(module, ZSTD_c_chainLog);
+ ADD_INT_PREFIX_MACRO(module, ZSTD_c_searchLog);
+ ADD_INT_PREFIX_MACRO(module, ZSTD_c_minMatch);
+ ADD_INT_PREFIX_MACRO(module, ZSTD_c_targetLength);
+ ADD_INT_PREFIX_MACRO(module, ZSTD_c_strategy);
+
+ ADD_INT_PREFIX_MACRO(module, ZSTD_c_enableLongDistanceMatching);
+ ADD_INT_PREFIX_MACRO(module, ZSTD_c_ldmHashLog);
+ ADD_INT_PREFIX_MACRO(module, ZSTD_c_ldmMinMatch);
+ ADD_INT_PREFIX_MACRO(module, ZSTD_c_ldmBucketSizeLog);
+ ADD_INT_PREFIX_MACRO(module, ZSTD_c_ldmHashRateLog);
+
+ ADD_INT_PREFIX_MACRO(module, ZSTD_c_contentSizeFlag);
+ ADD_INT_PREFIX_MACRO(module, ZSTD_c_checksumFlag);
+ ADD_INT_PREFIX_MACRO(module, ZSTD_c_dictIDFlag);
+
+ ADD_INT_PREFIX_MACRO(module, ZSTD_c_nbWorkers);
+ ADD_INT_PREFIX_MACRO(module, ZSTD_c_jobSize);
+ ADD_INT_PREFIX_MACRO(module, ZSTD_c_overlapLog);
+
+ /* Decompression parameters */
+ ADD_INT_PREFIX_MACRO(module, ZSTD_d_windowLogMax);
+
+ /* ZSTD_strategy enum */
+ ADD_INT_PREFIX_MACRO(module, ZSTD_fast);
+ ADD_INT_PREFIX_MACRO(module, ZSTD_dfast);
+ ADD_INT_PREFIX_MACRO(module, ZSTD_greedy);
+ ADD_INT_PREFIX_MACRO(module, ZSTD_lazy);
+ ADD_INT_PREFIX_MACRO(module, ZSTD_lazy2);
+ ADD_INT_PREFIX_MACRO(module, ZSTD_btlazy2);
+ ADD_INT_PREFIX_MACRO(module, ZSTD_btopt);
+ ADD_INT_PREFIX_MACRO(module, ZSTD_btultra);
+ ADD_INT_PREFIX_MACRO(module, ZSTD_btultra2);
+
+ return 0;
+}
+
+static inline PyObject *
+get_zstd_version_info(void)
+{
+ uint32_t ver = ZSTD_versionNumber();
+ uint32_t major, minor, release;
+
+ major = ver / 10000;
+ minor = (ver / 100) % 100;
+ release = ver % 100;
+
+ return Py_BuildValue("III", major, minor, release);
+}
+
+static inline int
+add_vars_to_module(PyObject *module)
+{
+ PyObject *obj;
+
+ /* zstd_version, a str. */
+ if (PyModule_AddStringConstant(module, "zstd_version",
+ ZSTD_versionString()) < 0) {
+ return -1;
+ }
+
+ /* zstd_version_info, a tuple. */
+ obj = get_zstd_version_info();
+ if (PyModule_AddObjectRef(module, "zstd_version_info", obj) < 0) {
+ Py_XDECREF(obj);
+ return -1;
+ }
+ Py_DECREF(obj);
+
+ /* Add zstd parameters */
+ if (add_parameters(module) < 0) {
+ return -1;
+ }
+
+ /* _compressionLevel_values: (default, min, max)
+ ZSTD_defaultCLevel() was added in zstd v1.5.0 */
+ obj = Py_BuildValue("iii",
+#if ZSTD_VERSION_NUMBER < 10500
+ ZSTD_CLEVEL_DEFAULT,
+#else
+ ZSTD_defaultCLevel(),
+#endif
+ ZSTD_minCLevel(),
+ ZSTD_maxCLevel());
+ if (PyModule_AddObjectRef(module,
+ "_compressionLevel_values",
+ obj) < 0) {
+ Py_XDECREF(obj);
+ return -1;
+ }
+ Py_DECREF(obj);
+
+ /* _ZSTD_CStreamSizes */
+ obj = Py_BuildValue("II",
+ (uint32_t)ZSTD_CStreamInSize(),
+ (uint32_t)ZSTD_CStreamOutSize());
+ if (PyModule_AddObjectRef(module, "_ZSTD_CStreamSizes", obj) < 0) {
+ Py_XDECREF(obj);
+ return -1;
+ }
+ Py_DECREF(obj);
+
+ /* _ZSTD_DStreamSizes */
+ obj = Py_BuildValue("II",
+ (uint32_t)ZSTD_DStreamInSize(),
+ (uint32_t)ZSTD_DStreamOutSize());
+ if (PyModule_AddObjectRef(module, "_ZSTD_DStreamSizes", obj) < 0) {
+ Py_XDECREF(obj);
+ return -1;
+ }
+ Py_DECREF(obj);
+
+ /* _ZSTD_CONFIG */
+ obj = Py_BuildValue("isOOO", 8*(int)sizeof(Py_ssize_t), "c",
+ Py_False,
+ Py_True,
+/* User mremap output buffer */
+#if defined(HAVE_MREMAP)
+ Py_True
+#else
+ Py_False
+#endif
+ );
+ if (PyModule_AddObjectRef(module, "_ZSTD_CONFIG", obj) < 0) {
+ Py_XDECREF(obj);
+ return -1;
+ }
+ Py_DECREF(obj);
+
+ return 0;
+}
+
+#define ADD_STR_TO_STATE_MACRO(STR) \
+ do { \
+ mod_state->str_##STR = PyUnicode_FromString(#STR); \
+ if (mod_state->str_##STR == NULL) { \
+ return -1; \
+ } \
+ } while(0)
+
+static inline int
+add_type_to_module(PyObject *module, const char *name,
+ PyType_Spec *type_spec, PyTypeObject **dest)
+{
+ PyObject *temp = PyType_FromModuleAndSpec(module, type_spec, NULL);
+
+ if (PyModule_AddObjectRef(module, name, temp) < 0) {
+ Py_XDECREF(temp);
+ return -1;
+ }
+
+ *dest = (PyTypeObject*) temp;
+
+ return 0;
+}
+
+static inline int
+add_constant_to_type(PyTypeObject *type, const char *name, long value)
+{
+ PyObject *temp;
+
+ temp = PyLong_FromLong(value);
+ if (temp == NULL) {
+ return -1;
+ }
+
+ int rc = PyObject_SetAttrString((PyObject*) type, name, temp);
+ Py_DECREF(temp);
+ return rc;
+}
+
+static int _zstd_exec(PyObject *module) {
+ _zstd_state* const mod_state = get_zstd_state(module);
+
+ /* Reusable objects & variables */
+ mod_state->empty_bytes = PyBytes_FromStringAndSize(NULL, 0);
+ if (mod_state->empty_bytes == NULL) {
+ return -1;
+ }
+
+ mod_state->empty_readonly_memoryview =
+ PyMemoryView_FromMemory((char*)mod_state, 0, PyBUF_READ);
+ if (mod_state->empty_readonly_memoryview == NULL) {
+ return -1;
+ }
+
+ /* Add str to module state */
+ ADD_STR_TO_STATE_MACRO(read);
+ ADD_STR_TO_STATE_MACRO(readinto);
+ ADD_STR_TO_STATE_MACRO(write);
+ ADD_STR_TO_STATE_MACRO(flush);
+
+ mod_state->CParameter_type = NULL;
+ mod_state->DParameter_type = NULL;
+
+ /* Add variables to module */
+ if (add_vars_to_module(module) < 0) {
+ return -1;
+ }
+
+ /* ZstdError */
+ mod_state->ZstdError = PyErr_NewExceptionWithDoc(
+ "_zstd.ZstdError",
+ "Call to the underlying zstd library failed.",
+ NULL, NULL);
+ if (mod_state->ZstdError == NULL) {
+ return -1;
+ }
+
+ if (PyModule_AddObjectRef(module, "ZstdError", mod_state->ZstdError) < 0) {
+ Py_DECREF(mod_state->ZstdError);
+ return -1;
+ }
+
+ /* ZstdDict */
+ if (add_type_to_module(module,
+ "ZstdDict",
+ &zstddict_type_spec,
+ &mod_state->ZstdDict_type) < 0) {
+ return -1;
+ }
+
+ // ZstdCompressor
+ if (add_type_to_module(module,
+ "ZstdCompressor",
+ &zstdcompressor_type_spec,
+ &mod_state->ZstdCompressor_type) < 0) {
+ return -1;
+ }
+
+ // Add EndDirective enum to ZstdCompressor
+ if (add_constant_to_type(mod_state->ZstdCompressor_type,
+ "CONTINUE",
+ ZSTD_e_continue) < 0) {
+ return -1;
+ }
+
+ if (add_constant_to_type(mod_state->ZstdCompressor_type,
+ "FLUSH_BLOCK",
+ ZSTD_e_flush) < 0) {
+ return -1;
+ }
+
+ if (add_constant_to_type(mod_state->ZstdCompressor_type,
+ "FLUSH_FRAME",
+ ZSTD_e_end) < 0) {
+ return -1;
+ }
+
+ // ZstdDecompressor
+ if (add_type_to_module(module,
+ "ZstdDecompressor",
+ &ZstdDecompressor_type_spec,
+ &mod_state->ZstdDecompressor_type) < 0) {
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+_zstd_traverse(PyObject *module, visitproc visit, void *arg)
+{
+ _zstd_state* const mod_state = get_zstd_state(module);
+
+ Py_VISIT(mod_state->empty_bytes);
+ Py_VISIT(mod_state->empty_readonly_memoryview);
+ Py_VISIT(mod_state->str_read);
+ Py_VISIT(mod_state->str_readinto);
+ Py_VISIT(mod_state->str_write);
+ Py_VISIT(mod_state->str_flush);
+
+ Py_VISIT(mod_state->ZstdDict_type);
+ Py_VISIT(mod_state->ZstdCompressor_type);
+
+ Py_VISIT(mod_state->ZstdDecompressor_type);
+
+ Py_VISIT(mod_state->ZstdError);
+
+ Py_VISIT(mod_state->CParameter_type);
+ Py_VISIT(mod_state->DParameter_type);
+ return 0;
+}
+
+static int
+_zstd_clear(PyObject *module)
+{
+ _zstd_state* const mod_state = get_zstd_state(module);
+
+ Py_CLEAR(mod_state->empty_bytes);
+ Py_CLEAR(mod_state->empty_readonly_memoryview);
+ Py_CLEAR(mod_state->str_read);
+ Py_CLEAR(mod_state->str_readinto);
+ Py_CLEAR(mod_state->str_write);
+ Py_CLEAR(mod_state->str_flush);
+
+ Py_CLEAR(mod_state->ZstdDict_type);
+ Py_CLEAR(mod_state->ZstdCompressor_type);
+
+ Py_CLEAR(mod_state->ZstdDecompressor_type);
+
+ Py_CLEAR(mod_state->ZstdError);
+
+ Py_CLEAR(mod_state->CParameter_type);
+ Py_CLEAR(mod_state->DParameter_type);
+ return 0;
+}
+
+static void
+_zstd_free(void *module)
+{
+ (void)_zstd_clear((PyObject *)module);
+}
+
+static struct PyModuleDef_Slot _zstd_slots[] = {
+ {Py_mod_exec, _zstd_exec},
+ {Py_mod_gil, Py_MOD_GIL_NOT_USED},
+
+ {0}
+};
+
+struct PyModuleDef _zstdmodule = {
+ PyModuleDef_HEAD_INIT,
+ .m_name = "_zstd",
+ .m_size = sizeof(_zstd_state),
+ .m_slots = _zstd_slots,
+ .m_methods = _zstd_methods,
+ .m_traverse = _zstd_traverse,
+ .m_clear = _zstd_clear,
+ .m_free = _zstd_free
+};
+
+PyMODINIT_FUNC
+PyInit__zstd(void)
+{
+ return PyModuleDef_Init(&_zstdmodule);
+}
diff --git a/Modules/_zstd/_zstdmodule.h b/Modules/_zstd/_zstdmodule.h
new file mode 100644
index 00000000000000..d50f1489e6f574
--- /dev/null
+++ b/Modules/_zstd/_zstdmodule.h
@@ -0,0 +1,204 @@
+#pragma once
+/*
+Low level interface to Meta's zstd library for use in the compression.zstd
+Python module.
+*/
+
+/* Declarations shared between different parts of the _zstd module*/
+
+#include "Python.h"
+
+#include "zstd.h"
+#include "zdict.h"
+
+
+// if you update the minimum version, you should update the compile
+// check in configure.ac
+#define PYTHON_MINIMUM_SUPPORTED_ZSTD_VERSION 10405
+
+#if ZSTD_VERSION_NUMBER < PYTHON_MINIMUM_SUPPORTED_ZSTD_VERSION
+ #error "_zstd module requires zstd v1.4.5+"
+#endif
+
+/* Forward declaration of module state */
+typedef struct _zstd_state _zstd_state;
+
+/* Forward reference of module def */
+extern PyModuleDef _zstdmodule;
+
+/* For clinic type calculations */
+static inline _zstd_state *
+get_zstd_state_from_type(PyTypeObject *type) {
+ PyObject *module = PyType_GetModuleByDef(type, &_zstdmodule);
+ if (module == NULL) {
+ return NULL;
+ }
+ void *state = PyModule_GetState(module);
+ assert(state != NULL);
+ return (_zstd_state *)state;
+}
+
+extern PyType_Spec zstddict_type_spec;
+extern PyType_Spec zstdcompressor_type_spec;
+extern PyType_Spec ZstdDecompressor_type_spec;
+
+struct _zstd_state {
+ PyObject *empty_bytes;
+ PyObject *empty_readonly_memoryview;
+ PyObject *str_read;
+ PyObject *str_readinto;
+ PyObject *str_write;
+ PyObject *str_flush;
+
+ PyTypeObject *ZstdDict_type;
+ PyTypeObject *ZstdCompressor_type;
+ PyTypeObject *ZstdDecompressor_type;
+ PyObject *ZstdError;
+
+ PyTypeObject *CParameter_type;
+ PyTypeObject *DParameter_type;
+};
+
+typedef struct {
+ PyObject_HEAD
+
+ /* Reusable compress/decompress dictionary, they are created once and
+ can be shared by multiple threads concurrently, since its usage is
+ read-only.
+ c_dicts is a dict, int(compressionLevel):PyCapsule(ZSTD_CDict*) */
+ ZSTD_DDict *d_dict;
+ PyObject *c_dicts;
+
+ /* Content of the dictionary, bytes object. */
+ PyObject *dict_content;
+ /* Dictionary id */
+ uint32_t dict_id;
+
+ /* __init__ has been called, 0 or 1. */
+ int inited;
+} ZstdDict;
+
+typedef struct {
+ PyObject_HEAD
+
+ /* Compression context */
+ ZSTD_CCtx *cctx;
+
+ /* ZstdDict object in use */
+ PyObject *dict;
+
+ /* Last mode, initialized to ZSTD_e_end */
+ int last_mode;
+
+ /* (nbWorker >= 1) ? 1 : 0 */
+ int use_multithread;
+
+ /* Compression level */
+ int compression_level;
+
+ /* __init__ has been called, 0 or 1. */
+ int inited;
+} ZstdCompressor;
+
+typedef struct {
+ PyObject_HEAD
+
+ /* Decompression context */
+ ZSTD_DCtx *dctx;
+
+ /* ZstdDict object in use */
+ PyObject *dict;
+
+ /* Unconsumed input data */
+ char *input_buffer;
+ size_t input_buffer_size;
+ size_t in_begin, in_end;
+
+ /* Unused data */
+ PyObject *unused_data;
+
+ /* 0 if decompressor has (or may has) unconsumed input data, 0 or 1. */
+ char needs_input;
+
+ /* For decompress(), 0 or 1.
+ 1 when both input and output streams are at a frame edge, means a
+ frame is completely decoded and fully flushed, or the decompressor
+ just be initialized. */
+ char at_frame_edge;
+
+ /* For ZstdDecompressor, 0 or 1.
+ 1 means the end of the first frame has been reached. */
+ char eof;
+
+ /* Used for fast reset above three variables */
+ char _unused_char_for_align;
+
+ /* __init__ has been called, 0 or 1. */
+ int inited;
+} ZstdDecompressor;
+
+typedef enum {
+ TYPE_DECOMPRESSOR, // , ZstdDecompressor class
+ TYPE_ENDLESS_DECOMPRESSOR, // , decompress() function
+} decompress_type;
+
+typedef enum {
+ ERR_DECOMPRESS,
+ ERR_COMPRESS,
+ ERR_SET_PLEDGED_INPUT_SIZE,
+
+ ERR_LOAD_D_DICT,
+ ERR_LOAD_C_DICT,
+
+ ERR_GET_C_BOUNDS,
+ ERR_GET_D_BOUNDS,
+ ERR_SET_C_LEVEL,
+
+ ERR_TRAIN_DICT,
+ ERR_FINALIZE_DICT
+} error_type;
+
+typedef enum {
+ DICT_TYPE_DIGESTED = 0,
+ DICT_TYPE_UNDIGESTED = 1,
+ DICT_TYPE_PREFIX = 2
+} dictionary_type;
+
+static inline int
+mt_continue_should_break(ZSTD_inBuffer *in, ZSTD_outBuffer *out) {
+ return in->size == in->pos && out->size != out->pos;
+}
+
+/* Format error message and set ZstdError. */
+extern void
+set_zstd_error(const _zstd_state* const state,
+ const error_type type, size_t zstd_ret);
+
+extern void
+set_parameter_error(const _zstd_state* const state, int is_compress,
+ int key_v, int value_v);
+
+static const char init_twice_msg[] = "__init__ method is called twice.";
+
+extern int
+_PyZstd_load_c_dict(ZstdCompressor *self, PyObject *dict);
+
+extern int
+_PyZstd_load_d_dict(ZstdDecompressor *self, PyObject *dict);
+
+extern int
+_PyZstd_set_c_parameters(ZstdCompressor *self, PyObject *level_or_options,
+ const char *arg_name, const char *arg_type);
+
+extern int
+_PyZstd_set_d_parameters(ZstdDecompressor *self, PyObject *options);
+
+extern PyObject *
+decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in,
+ Py_ssize_t max_length,
+ Py_ssize_t initial_size,
+ decompress_type type);
+
+extern PyObject *
+compress_impl(ZstdCompressor *self, Py_buffer *data,
+ ZSTD_EndDirective end_directive);
diff --git a/Modules/_zstd/buffer.h b/Modules/_zstd/buffer.h
new file mode 100644
index 00000000000000..319b1214833fcf
--- /dev/null
+++ b/Modules/_zstd/buffer.h
@@ -0,0 +1,104 @@
+/*
+Low level interface to Meta's zstd library for use in the compression.zstd
+Python module.
+*/
+
+#include "_zstdmodule.h"
+#include "pycore_blocks_output_buffer.h"
+
+/* Blocks output buffer wrapper code */
+
+/* Initialize the buffer, and grow the buffer.
+ Return 0 on success
+ Return -1 on failure */
+static inline int
+_OutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob,
+ Py_ssize_t max_length)
+{
+ /* Ensure .list was set to NULL */
+ assert(buffer->list == NULL);
+
+ Py_ssize_t res = _BlocksOutputBuffer_InitAndGrow(buffer, max_length, &ob->dst);
+ if (res < 0) {
+ return -1;
+ }
+ ob->size = (size_t) res;
+ ob->pos = 0;
+ return 0;
+}
+
+/* Initialize the buffer, with an initial size.
+ init_size: the initial size.
+ Return 0 on success
+ Return -1 on failure */
+static inline int
+_OutputBuffer_InitWithSize(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob,
+ Py_ssize_t max_length,
+ Py_ssize_t init_size)
+{
+ Py_ssize_t block_size;
+
+ /* Ensure .list was set to NULL */
+ assert(buffer->list == NULL);
+
+ /* Get block size */
+ if (0 <= max_length && max_length < init_size) {
+ block_size = max_length;
+ }
+ else {
+ block_size = init_size;
+ }
+
+ Py_ssize_t res = _BlocksOutputBuffer_InitWithSize(buffer, block_size, &ob->dst);
+ if (res < 0) {
+ return -1;
+ }
+ // Set max_length, InitWithSize doesn't do this
+ buffer->max_length = max_length;
+ ob->size = (size_t) res;
+ ob->pos = 0;
+ return 0;
+}
+
+/* Grow the buffer.
+ Return 0 on success
+ Return -1 on failure */
+static inline int
+_OutputBuffer_Grow(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob)
+{
+ assert(ob->pos == ob->size);
+ Py_ssize_t res = _BlocksOutputBuffer_Grow(buffer, &ob->dst, 0);
+ if (res < 0) {
+ return -1;
+ }
+ ob->size = (size_t) res;
+ ob->pos = 0;
+ return 0;
+}
+
+/* Finish the buffer.
+ Return a bytes object on success
+ Return NULL on failure */
+static inline PyObject *
+_OutputBuffer_Finish(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob)
+{
+ return _BlocksOutputBuffer_Finish(buffer, ob->size - ob->pos);
+}
+
+/* Clean up the buffer */
+static inline void
+_OutputBuffer_OnError(_BlocksOutputBuffer *buffer)
+{
+ _BlocksOutputBuffer_OnError(buffer);
+}
+
+/* Whether the output data has reached max_length.
+The avail_out must be 0, please check it before calling. */
+static inline int
+_OutputBuffer_ReachedMaxLength(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob)
+{
+ /* Ensure (data size == allocated size) */
+ assert(ob->pos == ob->size);
+
+ return buffer->allocated == buffer->max_length;
+}
diff --git a/Modules/_zstd/clinic/_zstdmodule.c.h b/Modules/_zstd/clinic/_zstdmodule.c.h
new file mode 100644
index 00000000000000..4b78bded67bca7
--- /dev/null
+++ b/Modules/_zstd/clinic/_zstdmodule.c.h
@@ -0,0 +1,432 @@
+/*[clinic input]
+preserve
+[clinic start generated code]*/
+
+#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+# include "pycore_gc.h" // PyGC_Head
+# include "pycore_runtime.h" // _Py_ID()
+#endif
+#include "pycore_abstract.h" // _PyNumber_Index()
+#include "pycore_modsupport.h" // _PyArg_CheckPositional()
+
+PyDoc_STRVAR(_zstd__train_dict__doc__,
+"_train_dict($module, samples_bytes, samples_size_list, dict_size, /)\n"
+"--\n"
+"\n"
+"Internal function, train a zstd dictionary on sample data.\n"
+"\n"
+" samples_bytes\n"
+" Concatenation of samples.\n"
+" samples_size_list\n"
+" List of samples\' sizes.\n"
+" dict_size\n"
+" The size of the dictionary.");
+
+#define _ZSTD__TRAIN_DICT_METHODDEF \
+ {"_train_dict", _PyCFunction_CAST(_zstd__train_dict), METH_FASTCALL, _zstd__train_dict__doc__},
+
+static PyObject *
+_zstd__train_dict_impl(PyObject *module, PyBytesObject *samples_bytes,
+ PyObject *samples_size_list, Py_ssize_t dict_size);
+
+static PyObject *
+_zstd__train_dict(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
+{
+ PyObject *return_value = NULL;
+ PyBytesObject *samples_bytes;
+ PyObject *samples_size_list;
+ Py_ssize_t dict_size;
+
+ if (!_PyArg_CheckPositional("_train_dict", nargs, 3, 3)) {
+ goto exit;
+ }
+ if (!PyBytes_Check(args[0])) {
+ _PyArg_BadArgument("_train_dict", "argument 1", "bytes", args[0]);
+ goto exit;
+ }
+ samples_bytes = (PyBytesObject *)args[0];
+ if (!PyList_Check(args[1])) {
+ _PyArg_BadArgument("_train_dict", "argument 2", "list", args[1]);
+ goto exit;
+ }
+ samples_size_list = args[1];
+ {
+ Py_ssize_t ival = -1;
+ PyObject *iobj = _PyNumber_Index(args[2]);
+ if (iobj != NULL) {
+ ival = PyLong_AsSsize_t(iobj);
+ Py_DECREF(iobj);
+ }
+ if (ival == -1 && PyErr_Occurred()) {
+ goto exit;
+ }
+ dict_size = ival;
+ }
+ return_value = _zstd__train_dict_impl(module, samples_bytes, samples_size_list, dict_size);
+
+exit:
+ return return_value;
+}
+
+PyDoc_STRVAR(_zstd__finalize_dict__doc__,
+"_finalize_dict($module, custom_dict_bytes, samples_bytes,\n"
+" samples_size_list, dict_size, compression_level, /)\n"
+"--\n"
+"\n"
+"Internal function, finalize a zstd dictionary.\n"
+"\n"
+" custom_dict_bytes\n"
+" Custom dictionary content.\n"
+" samples_bytes\n"
+" Concatenation of samples.\n"
+" samples_size_list\n"
+" List of samples\' sizes.\n"
+" dict_size\n"
+" The size of the dictionary.\n"
+" compression_level\n"
+" Optimize for a specific zstd compression level, 0 means default.");
+
+#define _ZSTD__FINALIZE_DICT_METHODDEF \
+ {"_finalize_dict", _PyCFunction_CAST(_zstd__finalize_dict), METH_FASTCALL, _zstd__finalize_dict__doc__},
+
+static PyObject *
+_zstd__finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes,
+ PyBytesObject *samples_bytes,
+ PyObject *samples_size_list, Py_ssize_t dict_size,
+ int compression_level);
+
+static PyObject *
+_zstd__finalize_dict(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
+{
+ PyObject *return_value = NULL;
+ PyBytesObject *custom_dict_bytes;
+ PyBytesObject *samples_bytes;
+ PyObject *samples_size_list;
+ Py_ssize_t dict_size;
+ int compression_level;
+
+ if (!_PyArg_CheckPositional("_finalize_dict", nargs, 5, 5)) {
+ goto exit;
+ }
+ if (!PyBytes_Check(args[0])) {
+ _PyArg_BadArgument("_finalize_dict", "argument 1", "bytes", args[0]);
+ goto exit;
+ }
+ custom_dict_bytes = (PyBytesObject *)args[0];
+ if (!PyBytes_Check(args[1])) {
+ _PyArg_BadArgument("_finalize_dict", "argument 2", "bytes", args[1]);
+ goto exit;
+ }
+ samples_bytes = (PyBytesObject *)args[1];
+ if (!PyList_Check(args[2])) {
+ _PyArg_BadArgument("_finalize_dict", "argument 3", "list", args[2]);
+ goto exit;
+ }
+ samples_size_list = args[2];
+ {
+ Py_ssize_t ival = -1;
+ PyObject *iobj = _PyNumber_Index(args[3]);
+ if (iobj != NULL) {
+ ival = PyLong_AsSsize_t(iobj);
+ Py_DECREF(iobj);
+ }
+ if (ival == -1 && PyErr_Occurred()) {
+ goto exit;
+ }
+ dict_size = ival;
+ }
+ compression_level = PyLong_AsInt(args[4]);
+ if (compression_level == -1 && PyErr_Occurred()) {
+ goto exit;
+ }
+ return_value = _zstd__finalize_dict_impl(module, custom_dict_bytes, samples_bytes, samples_size_list, dict_size, compression_level);
+
+exit:
+ return return_value;
+}
+
+PyDoc_STRVAR(_zstd__get_param_bounds__doc__,
+"_get_param_bounds($module, /, is_compress, parameter)\n"
+"--\n"
+"\n"
+"Internal function, get CParameter/DParameter bounds.\n"
+"\n"
+" is_compress\n"
+" True for CParameter, False for DParameter.\n"
+" parameter\n"
+" The parameter to get bounds.");
+
+#define _ZSTD__GET_PARAM_BOUNDS_METHODDEF \
+ {"_get_param_bounds", _PyCFunction_CAST(_zstd__get_param_bounds), METH_FASTCALL|METH_KEYWORDS, _zstd__get_param_bounds__doc__},
+
+static PyObject *
+_zstd__get_param_bounds_impl(PyObject *module, int is_compress,
+ int parameter);
+
+static PyObject *
+_zstd__get_param_bounds(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+ PyObject *return_value = NULL;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 2
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ Py_hash_t ob_hash;
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_hash = -1,
+ .ob_item = { &_Py_ID(is_compress), &_Py_ID(parameter), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"is_compress", "parameter", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "_get_param_bounds",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[2];
+ int is_compress;
+ int parameter;
+
+ args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser,
+ /*minpos*/ 2, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
+ if (!args) {
+ goto exit;
+ }
+ is_compress = PyObject_IsTrue(args[0]);
+ if (is_compress < 0) {
+ goto exit;
+ }
+ parameter = PyLong_AsInt(args[1]);
+ if (parameter == -1 && PyErr_Occurred()) {
+ goto exit;
+ }
+ return_value = _zstd__get_param_bounds_impl(module, is_compress, parameter);
+
+exit:
+ return return_value;
+}
+
+PyDoc_STRVAR(_zstd_get_frame_size__doc__,
+"get_frame_size($module, /, frame_buffer)\n"
+"--\n"
+"\n"
+"Get the size of a zstd frame, including frame header and 4-byte checksum if it has one.\n"
+"\n"
+" frame_buffer\n"
+" A bytes-like object, it should start from the beginning of a frame,\n"
+" and contains at least one complete frame.\n"
+"\n"
+"It will iterate all blocks\' headers within a frame, to accumulate the frame size.");
+
+#define _ZSTD_GET_FRAME_SIZE_METHODDEF \
+ {"get_frame_size", _PyCFunction_CAST(_zstd_get_frame_size), METH_FASTCALL|METH_KEYWORDS, _zstd_get_frame_size__doc__},
+
+static PyObject *
+_zstd_get_frame_size_impl(PyObject *module, Py_buffer *frame_buffer);
+
+static PyObject *
+_zstd_get_frame_size(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+ PyObject *return_value = NULL;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 1
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ Py_hash_t ob_hash;
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_hash = -1,
+ .ob_item = { &_Py_ID(frame_buffer), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"frame_buffer", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "get_frame_size",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[1];
+ Py_buffer frame_buffer = {NULL, NULL};
+
+ args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser,
+ /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
+ if (!args) {
+ goto exit;
+ }
+ if (PyObject_GetBuffer(args[0], &frame_buffer, PyBUF_SIMPLE) != 0) {
+ goto exit;
+ }
+ return_value = _zstd_get_frame_size_impl(module, &frame_buffer);
+
+exit:
+ /* Cleanup for frame_buffer */
+ if (frame_buffer.obj) {
+ PyBuffer_Release(&frame_buffer);
+ }
+
+ return return_value;
+}
+
+PyDoc_STRVAR(_zstd__get_frame_info__doc__,
+"_get_frame_info($module, /, frame_buffer)\n"
+"--\n"
+"\n"
+"Internal function, get zstd frame infomation from a frame header.\n"
+"\n"
+" frame_buffer\n"
+" A bytes-like object, containing the header of a zstd frame.");
+
+#define _ZSTD__GET_FRAME_INFO_METHODDEF \
+ {"_get_frame_info", _PyCFunction_CAST(_zstd__get_frame_info), METH_FASTCALL|METH_KEYWORDS, _zstd__get_frame_info__doc__},
+
+static PyObject *
+_zstd__get_frame_info_impl(PyObject *module, Py_buffer *frame_buffer);
+
+static PyObject *
+_zstd__get_frame_info(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+ PyObject *return_value = NULL;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 1
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ Py_hash_t ob_hash;
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_hash = -1,
+ .ob_item = { &_Py_ID(frame_buffer), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"frame_buffer", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "_get_frame_info",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[1];
+ Py_buffer frame_buffer = {NULL, NULL};
+
+ args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser,
+ /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
+ if (!args) {
+ goto exit;
+ }
+ if (PyObject_GetBuffer(args[0], &frame_buffer, PyBUF_SIMPLE) != 0) {
+ goto exit;
+ }
+ return_value = _zstd__get_frame_info_impl(module, &frame_buffer);
+
+exit:
+ /* Cleanup for frame_buffer */
+ if (frame_buffer.obj) {
+ PyBuffer_Release(&frame_buffer);
+ }
+
+ return return_value;
+}
+
+PyDoc_STRVAR(_zstd__set_parameter_types__doc__,
+"_set_parameter_types($module, /, c_parameter_type, d_parameter_type)\n"
+"--\n"
+"\n"
+"Internal function, set CParameter/DParameter types for validity check.\n"
+"\n"
+" c_parameter_type\n"
+" CParameter IntEnum type object\n"
+" d_parameter_type\n"
+" DParameter IntEnum type object");
+
+#define _ZSTD__SET_PARAMETER_TYPES_METHODDEF \
+ {"_set_parameter_types", _PyCFunction_CAST(_zstd__set_parameter_types), METH_FASTCALL|METH_KEYWORDS, _zstd__set_parameter_types__doc__},
+
+static PyObject *
+_zstd__set_parameter_types_impl(PyObject *module, PyObject *c_parameter_type,
+ PyObject *d_parameter_type);
+
+static PyObject *
+_zstd__set_parameter_types(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+ PyObject *return_value = NULL;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 2
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ Py_hash_t ob_hash;
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_hash = -1,
+ .ob_item = { &_Py_ID(c_parameter_type), &_Py_ID(d_parameter_type), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"c_parameter_type", "d_parameter_type", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "_set_parameter_types",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[2];
+ PyObject *c_parameter_type;
+ PyObject *d_parameter_type;
+
+ args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser,
+ /*minpos*/ 2, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
+ if (!args) {
+ goto exit;
+ }
+ if (!PyObject_TypeCheck(args[0], &PyType_Type)) {
+ _PyArg_BadArgument("_set_parameter_types", "argument 'c_parameter_type'", (&PyType_Type)->tp_name, args[0]);
+ goto exit;
+ }
+ c_parameter_type = args[0];
+ if (!PyObject_TypeCheck(args[1], &PyType_Type)) {
+ _PyArg_BadArgument("_set_parameter_types", "argument 'd_parameter_type'", (&PyType_Type)->tp_name, args[1]);
+ goto exit;
+ }
+ d_parameter_type = args[1];
+ return_value = _zstd__set_parameter_types_impl(module, c_parameter_type, d_parameter_type);
+
+exit:
+ return return_value;
+}
+/*[clinic end generated code: output=077c8ea2b11fb188 input=a9049054013a1b77]*/
diff --git a/Modules/_zstd/clinic/compressor.c.h b/Modules/_zstd/clinic/compressor.c.h
new file mode 100644
index 00000000000000..d7909cdf89fcd1
--- /dev/null
+++ b/Modules/_zstd/clinic/compressor.c.h
@@ -0,0 +1,255 @@
+/*[clinic input]
+preserve
+[clinic start generated code]*/
+
+#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+# include "pycore_gc.h" // PyGC_Head
+# include "pycore_runtime.h" // _Py_ID()
+#endif
+#include "pycore_modsupport.h" // _PyArg_UnpackKeywords()
+
+PyDoc_STRVAR(_zstd_ZstdCompressor___init____doc__,
+"ZstdCompressor(level=None, options=None, zstd_dict=None)\n"
+"--\n"
+"\n"
+"Create a compressor object for compressing data incrementally.\n"
+"\n"
+" level\n"
+" The compression level to use, defaults to ZSTD_CLEVEL_DEFAULT.\n"
+" options\n"
+" A dict object that contains advanced compression parameters.\n"
+" zstd_dict\n"
+" A ZstdDict object, a pre-trained zstd dictionary.\n"
+"\n"
+"Thread-safe at method level. For one-shot compression, use the compress()\n"
+"function instead.");
+
+static int
+_zstd_ZstdCompressor___init___impl(ZstdCompressor *self, PyObject *level,
+ PyObject *options, PyObject *zstd_dict);
+
+static int
+_zstd_ZstdCompressor___init__(PyObject *self, PyObject *args, PyObject *kwargs)
+{
+ int return_value = -1;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 3
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ Py_hash_t ob_hash;
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_hash = -1,
+ .ob_item = { &_Py_ID(level), &_Py_ID(options), &_Py_ID(zstd_dict), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"level", "options", "zstd_dict", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "ZstdCompressor",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[3];
+ PyObject * const *fastargs;
+ Py_ssize_t nargs = PyTuple_GET_SIZE(args);
+ Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0;
+ PyObject *level = Py_None;
+ PyObject *options = Py_None;
+ PyObject *zstd_dict = Py_None;
+
+ fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser,
+ /*minpos*/ 0, /*maxpos*/ 3, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
+ if (!fastargs) {
+ goto exit;
+ }
+ if (!noptargs) {
+ goto skip_optional_pos;
+ }
+ if (fastargs[0]) {
+ level = fastargs[0];
+ if (!--noptargs) {
+ goto skip_optional_pos;
+ }
+ }
+ if (fastargs[1]) {
+ options = fastargs[1];
+ if (!--noptargs) {
+ goto skip_optional_pos;
+ }
+ }
+ zstd_dict = fastargs[2];
+skip_optional_pos:
+ return_value = _zstd_ZstdCompressor___init___impl((ZstdCompressor *)self, level, options, zstd_dict);
+
+exit:
+ return return_value;
+}
+
+PyDoc_STRVAR(_zstd_ZstdCompressor_compress__doc__,
+"compress($self, /, data, mode=ZstdCompressor.CONTINUE)\n"
+"--\n"
+"\n"
+"Provide data to the compressor object.\n"
+"\n"
+" mode\n"
+" Can be these 3 values ZstdCompressor.CONTINUE,\n"
+" ZstdCompressor.FLUSH_BLOCK, ZstdCompressor.FLUSH_FRAME\n"
+"\n"
+"Return a chunk of compressed data if possible, or b\'\' otherwise. When you have\n"
+"finished providing data to the compressor, call the flush() method to finish\n"
+"the compression process.");
+
+#define _ZSTD_ZSTDCOMPRESSOR_COMPRESS_METHODDEF \
+ {"compress", _PyCFunction_CAST(_zstd_ZstdCompressor_compress), METH_FASTCALL|METH_KEYWORDS, _zstd_ZstdCompressor_compress__doc__},
+
+static PyObject *
+_zstd_ZstdCompressor_compress_impl(ZstdCompressor *self, Py_buffer *data,
+ int mode);
+
+static PyObject *
+_zstd_ZstdCompressor_compress(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+ PyObject *return_value = NULL;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 2
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ Py_hash_t ob_hash;
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_hash = -1,
+ .ob_item = { &_Py_ID(data), &_Py_ID(mode), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"data", "mode", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "compress",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[2];
+ Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1;
+ Py_buffer data = {NULL, NULL};
+ int mode = ZSTD_e_continue;
+
+ args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser,
+ /*minpos*/ 1, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
+ if (!args) {
+ goto exit;
+ }
+ if (PyObject_GetBuffer(args[0], &data, PyBUF_SIMPLE) != 0) {
+ goto exit;
+ }
+ if (!noptargs) {
+ goto skip_optional_pos;
+ }
+ mode = PyLong_AsInt(args[1]);
+ if (mode == -1 && PyErr_Occurred()) {
+ goto exit;
+ }
+skip_optional_pos:
+ return_value = _zstd_ZstdCompressor_compress_impl((ZstdCompressor *)self, &data, mode);
+
+exit:
+ /* Cleanup for data */
+ if (data.obj) {
+ PyBuffer_Release(&data);
+ }
+
+ return return_value;
+}
+
+PyDoc_STRVAR(_zstd_ZstdCompressor_flush__doc__,
+"flush($self, /, mode=ZstdCompressor.FLUSH_FRAME)\n"
+"--\n"
+"\n"
+"Finish the compression process.\n"
+"\n"
+" mode\n"
+" Can be these 2 values ZstdCompressor.FLUSH_FRAME,\n"
+" ZstdCompressor.FLUSH_BLOCK\n"
+"\n"
+"Flush any remaining data left in internal buffers. Since zstd data consists\n"
+"of one or more independent frames, the compressor object can still be used\n"
+"after this method is called.");
+
+#define _ZSTD_ZSTDCOMPRESSOR_FLUSH_METHODDEF \
+ {"flush", _PyCFunction_CAST(_zstd_ZstdCompressor_flush), METH_FASTCALL|METH_KEYWORDS, _zstd_ZstdCompressor_flush__doc__},
+
+static PyObject *
+_zstd_ZstdCompressor_flush_impl(ZstdCompressor *self, int mode);
+
+static PyObject *
+_zstd_ZstdCompressor_flush(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+ PyObject *return_value = NULL;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 1
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ Py_hash_t ob_hash;
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_hash = -1,
+ .ob_item = { &_Py_ID(mode), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"mode", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "flush",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[1];
+ Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0;
+ int mode = ZSTD_e_end;
+
+ args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser,
+ /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
+ if (!args) {
+ goto exit;
+ }
+ if (!noptargs) {
+ goto skip_optional_pos;
+ }
+ mode = PyLong_AsInt(args[0]);
+ if (mode == -1 && PyErr_Occurred()) {
+ goto exit;
+ }
+skip_optional_pos:
+ return_value = _zstd_ZstdCompressor_flush_impl((ZstdCompressor *)self, mode);
+
+exit:
+ return return_value;
+}
+/*[clinic end generated code: output=ef69eab155be39f6 input=a9049054013a1b77]*/
diff --git a/Modules/_zstd/clinic/decompressor.c.h b/Modules/_zstd/clinic/decompressor.c.h
new file mode 100644
index 00000000000000..9359c637203f8f
--- /dev/null
+++ b/Modules/_zstd/clinic/decompressor.c.h
@@ -0,0 +1,230 @@
+/*[clinic input]
+preserve
+[clinic start generated code]*/
+
+#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+# include "pycore_gc.h" // PyGC_Head
+# include "pycore_runtime.h" // _Py_ID()
+#endif
+#include "pycore_abstract.h" // _PyNumber_Index()
+#include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION()
+#include "pycore_modsupport.h" // _PyArg_UnpackKeywords()
+
+PyDoc_STRVAR(_zstd_ZstdDecompressor___init____doc__,
+"ZstdDecompressor(zstd_dict=None, options=None)\n"
+"--\n"
+"\n"
+"Create a decompressor object for decompressing data incrementally.\n"
+"\n"
+" zstd_dict\n"
+" A ZstdDict object, a pre-trained zstd dictionary.\n"
+" options\n"
+" A dict object that contains advanced decompression parameters.\n"
+"\n"
+"Thread-safe at method level. For one-shot decompression, use the decompress()\n"
+"function instead.");
+
+static int
+_zstd_ZstdDecompressor___init___impl(ZstdDecompressor *self,
+ PyObject *zstd_dict, PyObject *options);
+
+static int
+_zstd_ZstdDecompressor___init__(PyObject *self, PyObject *args, PyObject *kwargs)
+{
+ int return_value = -1;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 2
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ Py_hash_t ob_hash;
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_hash = -1,
+ .ob_item = { &_Py_ID(zstd_dict), &_Py_ID(options), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"zstd_dict", "options", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "ZstdDecompressor",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[2];
+ PyObject * const *fastargs;
+ Py_ssize_t nargs = PyTuple_GET_SIZE(args);
+ Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0;
+ PyObject *zstd_dict = Py_None;
+ PyObject *options = Py_None;
+
+ fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser,
+ /*minpos*/ 0, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
+ if (!fastargs) {
+ goto exit;
+ }
+ if (!noptargs) {
+ goto skip_optional_pos;
+ }
+ if (fastargs[0]) {
+ zstd_dict = fastargs[0];
+ if (!--noptargs) {
+ goto skip_optional_pos;
+ }
+ }
+ options = fastargs[1];
+skip_optional_pos:
+ return_value = _zstd_ZstdDecompressor___init___impl((ZstdDecompressor *)self, zstd_dict, options);
+
+exit:
+ return return_value;
+}
+
+PyDoc_STRVAR(_zstd_ZstdDecompressor_unused_data__doc__,
+"A bytes object of un-consumed input data.\n"
+"\n"
+"When ZstdDecompressor object stops after a frame is\n"
+"decompressed, unused input data after the frame. Otherwise this will be b\'\'.");
+#if defined(_zstd_ZstdDecompressor_unused_data_DOCSTR)
+# undef _zstd_ZstdDecompressor_unused_data_DOCSTR
+#endif
+#define _zstd_ZstdDecompressor_unused_data_DOCSTR _zstd_ZstdDecompressor_unused_data__doc__
+
+#if !defined(_zstd_ZstdDecompressor_unused_data_DOCSTR)
+# define _zstd_ZstdDecompressor_unused_data_DOCSTR NULL
+#endif
+#if defined(_ZSTD_ZSTDDECOMPRESSOR_UNUSED_DATA_GETSETDEF)
+# undef _ZSTD_ZSTDDECOMPRESSOR_UNUSED_DATA_GETSETDEF
+# define _ZSTD_ZSTDDECOMPRESSOR_UNUSED_DATA_GETSETDEF {"unused_data", (getter)_zstd_ZstdDecompressor_unused_data_get, (setter)_zstd_ZstdDecompressor_unused_data_set, _zstd_ZstdDecompressor_unused_data_DOCSTR},
+#else
+# define _ZSTD_ZSTDDECOMPRESSOR_UNUSED_DATA_GETSETDEF {"unused_data", (getter)_zstd_ZstdDecompressor_unused_data_get, NULL, _zstd_ZstdDecompressor_unused_data_DOCSTR},
+#endif
+
+static PyObject *
+_zstd_ZstdDecompressor_unused_data_get_impl(ZstdDecompressor *self);
+
+static PyObject *
+_zstd_ZstdDecompressor_unused_data_get(PyObject *self, void *Py_UNUSED(context))
+{
+ PyObject *return_value = NULL;
+
+ Py_BEGIN_CRITICAL_SECTION(self);
+ return_value = _zstd_ZstdDecompressor_unused_data_get_impl((ZstdDecompressor *)self);
+ Py_END_CRITICAL_SECTION();
+
+ return return_value;
+}
+
+PyDoc_STRVAR(_zstd_ZstdDecompressor_decompress__doc__,
+"decompress($self, /, data, max_length=-1)\n"
+"--\n"
+"\n"
+"Decompress *data*, returning uncompressed bytes if possible, or b\'\' otherwise.\n"
+"\n"
+" data\n"
+" A bytes-like object, zstd data to be decompressed.\n"
+" max_length\n"
+" Maximum size of returned data. When it is negative, the size of\n"
+" output buffer is unlimited. When it is nonnegative, returns at\n"
+" most max_length bytes of decompressed data.\n"
+"\n"
+"If *max_length* is nonnegative, returns at most *max_length* bytes of\n"
+"decompressed data. If this limit is reached and further output can be\n"
+"produced, *self.needs_input* will be set to ``False``. In this case, the next\n"
+"call to *decompress()* may provide *data* as b\'\' to obtain more of the output.\n"
+"\n"
+"If all of the input data was decompressed and returned (either because this\n"
+"was less than *max_length* bytes, or because *max_length* was negative),\n"
+"*self.needs_input* will be set to True.\n"
+"\n"
+"Attempting to decompress data after the end of a frame is reached raises an\n"
+"EOFError. Any data found after the end of the frame is ignored and saved in\n"
+"the self.unused_data attribute.");
+
+#define _ZSTD_ZSTDDECOMPRESSOR_DECOMPRESS_METHODDEF \
+ {"decompress", _PyCFunction_CAST(_zstd_ZstdDecompressor_decompress), METH_FASTCALL|METH_KEYWORDS, _zstd_ZstdDecompressor_decompress__doc__},
+
+static PyObject *
+_zstd_ZstdDecompressor_decompress_impl(ZstdDecompressor *self,
+ Py_buffer *data,
+ Py_ssize_t max_length);
+
+static PyObject *
+_zstd_ZstdDecompressor_decompress(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+ PyObject *return_value = NULL;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 2
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ Py_hash_t ob_hash;
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_hash = -1,
+ .ob_item = { &_Py_ID(data), &_Py_ID(max_length), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"data", "max_length", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "decompress",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[2];
+ Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1;
+ Py_buffer data = {NULL, NULL};
+ Py_ssize_t max_length = -1;
+
+ args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser,
+ /*minpos*/ 1, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
+ if (!args) {
+ goto exit;
+ }
+ if (PyObject_GetBuffer(args[0], &data, PyBUF_SIMPLE) != 0) {
+ goto exit;
+ }
+ if (!noptargs) {
+ goto skip_optional_pos;
+ }
+ {
+ Py_ssize_t ival = -1;
+ PyObject *iobj = _PyNumber_Index(args[1]);
+ if (iobj != NULL) {
+ ival = PyLong_AsSsize_t(iobj);
+ Py_DECREF(iobj);
+ }
+ if (ival == -1 && PyErr_Occurred()) {
+ goto exit;
+ }
+ max_length = ival;
+ }
+skip_optional_pos:
+ return_value = _zstd_ZstdDecompressor_decompress_impl((ZstdDecompressor *)self, &data, max_length);
+
+exit:
+ /* Cleanup for data */
+ if (data.obj) {
+ PyBuffer_Release(&data);
+ }
+
+ return return_value;
+}
+/*[clinic end generated code: output=ae703f0465a2906d input=a9049054013a1b77]*/
diff --git a/Modules/_zstd/clinic/zdict.c.h b/Modules/_zstd/clinic/zdict.c.h
new file mode 100644
index 00000000000000..4e0f7b64172a74
--- /dev/null
+++ b/Modules/_zstd/clinic/zdict.c.h
@@ -0,0 +1,207 @@
+/*[clinic input]
+preserve
+[clinic start generated code]*/
+
+#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+# include "pycore_gc.h" // PyGC_Head
+# include "pycore_runtime.h" // _Py_ID()
+#endif
+#include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION()
+#include "pycore_modsupport.h" // _PyArg_UnpackKeywords()
+
+PyDoc_STRVAR(_zstd_ZstdDict___init____doc__,
+"ZstdDict(dict_content, is_raw=False)\n"
+"--\n"
+"\n"
+"Represents a zstd dictionary, which can be used for compression/decompression.\n"
+"\n"
+" dict_content\n"
+" A bytes-like object, dictionary\'s content.\n"
+" is_raw\n"
+" This parameter is for advanced user. True means dict_content\n"
+" argument is a \"raw content\" dictionary, free of any format\n"
+" restriction. False means dict_content argument is an ordinary\n"
+" zstd dictionary, was created by zstd functions, follow a\n"
+" specified format.\n"
+"\n"
+"It\'s thread-safe, and can be shared by multiple ZstdCompressor /\n"
+"ZstdDecompressor objects.");
+
+static int
+_zstd_ZstdDict___init___impl(ZstdDict *self, PyObject *dict_content,
+ int is_raw);
+
+static int
+_zstd_ZstdDict___init__(PyObject *self, PyObject *args, PyObject *kwargs)
+{
+ int return_value = -1;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 2
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ Py_hash_t ob_hash;
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_hash = -1,
+ .ob_item = { &_Py_ID(dict_content), &_Py_ID(is_raw), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"dict_content", "is_raw", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "ZstdDict",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[2];
+ PyObject * const *fastargs;
+ Py_ssize_t nargs = PyTuple_GET_SIZE(args);
+ Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 1;
+ PyObject *dict_content;
+ int is_raw = 0;
+
+ fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser,
+ /*minpos*/ 1, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
+ if (!fastargs) {
+ goto exit;
+ }
+ dict_content = fastargs[0];
+ if (!noptargs) {
+ goto skip_optional_pos;
+ }
+ is_raw = PyObject_IsTrue(fastargs[1]);
+ if (is_raw < 0) {
+ goto exit;
+ }
+skip_optional_pos:
+ return_value = _zstd_ZstdDict___init___impl((ZstdDict *)self, dict_content, is_raw);
+
+exit:
+ return return_value;
+}
+
+PyDoc_STRVAR(_zstd_ZstdDict_as_digested_dict__doc__,
+"Load as a digested dictionary to compressor.\n"
+"\n"
+"Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_digested_dict)\n"
+"1. Some advanced compression parameters of compressor may be overridden\n"
+" by parameters of digested dictionary.\n"
+"2. ZstdDict has a digested dictionaries cache for each compression level.\n"
+" It\'s faster when loading again a digested dictionary with the same\n"
+" compression level.\n"
+"3. No need to use this for decompression.");
+#if defined(_zstd_ZstdDict_as_digested_dict_DOCSTR)
+# undef _zstd_ZstdDict_as_digested_dict_DOCSTR
+#endif
+#define _zstd_ZstdDict_as_digested_dict_DOCSTR _zstd_ZstdDict_as_digested_dict__doc__
+
+#if !defined(_zstd_ZstdDict_as_digested_dict_DOCSTR)
+# define _zstd_ZstdDict_as_digested_dict_DOCSTR NULL
+#endif
+#if defined(_ZSTD_ZSTDDICT_AS_DIGESTED_DICT_GETSETDEF)
+# undef _ZSTD_ZSTDDICT_AS_DIGESTED_DICT_GETSETDEF
+# define _ZSTD_ZSTDDICT_AS_DIGESTED_DICT_GETSETDEF {"as_digested_dict", (getter)_zstd_ZstdDict_as_digested_dict_get, (setter)_zstd_ZstdDict_as_digested_dict_set, _zstd_ZstdDict_as_digested_dict_DOCSTR},
+#else
+# define _ZSTD_ZSTDDICT_AS_DIGESTED_DICT_GETSETDEF {"as_digested_dict", (getter)_zstd_ZstdDict_as_digested_dict_get, NULL, _zstd_ZstdDict_as_digested_dict_DOCSTR},
+#endif
+
+static PyObject *
+_zstd_ZstdDict_as_digested_dict_get_impl(ZstdDict *self);
+
+static PyObject *
+_zstd_ZstdDict_as_digested_dict_get(PyObject *self, void *Py_UNUSED(context))
+{
+ PyObject *return_value = NULL;
+
+ Py_BEGIN_CRITICAL_SECTION(self);
+ return_value = _zstd_ZstdDict_as_digested_dict_get_impl((ZstdDict *)self);
+ Py_END_CRITICAL_SECTION();
+
+ return return_value;
+}
+
+PyDoc_STRVAR(_zstd_ZstdDict_as_undigested_dict__doc__,
+"Load as an undigested dictionary to compressor.\n"
+"\n"
+"Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_undigested_dict)\n"
+"1. The advanced compression parameters of compressor will not be overridden.\n"
+"2. Loading an undigested dictionary is costly. If load an undigested dictionary\n"
+" multiple times, consider reusing a compressor object.\n"
+"3. No need to use this for decompression.");
+#if defined(_zstd_ZstdDict_as_undigested_dict_DOCSTR)
+# undef _zstd_ZstdDict_as_undigested_dict_DOCSTR
+#endif
+#define _zstd_ZstdDict_as_undigested_dict_DOCSTR _zstd_ZstdDict_as_undigested_dict__doc__
+
+#if !defined(_zstd_ZstdDict_as_undigested_dict_DOCSTR)
+# define _zstd_ZstdDict_as_undigested_dict_DOCSTR NULL
+#endif
+#if defined(_ZSTD_ZSTDDICT_AS_UNDIGESTED_DICT_GETSETDEF)
+# undef _ZSTD_ZSTDDICT_AS_UNDIGESTED_DICT_GETSETDEF
+# define _ZSTD_ZSTDDICT_AS_UNDIGESTED_DICT_GETSETDEF {"as_undigested_dict", (getter)_zstd_ZstdDict_as_undigested_dict_get, (setter)_zstd_ZstdDict_as_undigested_dict_set, _zstd_ZstdDict_as_undigested_dict_DOCSTR},
+#else
+# define _ZSTD_ZSTDDICT_AS_UNDIGESTED_DICT_GETSETDEF {"as_undigested_dict", (getter)_zstd_ZstdDict_as_undigested_dict_get, NULL, _zstd_ZstdDict_as_undigested_dict_DOCSTR},
+#endif
+
+static PyObject *
+_zstd_ZstdDict_as_undigested_dict_get_impl(ZstdDict *self);
+
+static PyObject *
+_zstd_ZstdDict_as_undigested_dict_get(PyObject *self, void *Py_UNUSED(context))
+{
+ PyObject *return_value = NULL;
+
+ Py_BEGIN_CRITICAL_SECTION(self);
+ return_value = _zstd_ZstdDict_as_undigested_dict_get_impl((ZstdDict *)self);
+ Py_END_CRITICAL_SECTION();
+
+ return return_value;
+}
+
+PyDoc_STRVAR(_zstd_ZstdDict_as_prefix__doc__,
+"Load as a prefix to compressor/decompressor.\n"
+"\n"
+"Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_prefix)\n"
+"1. Prefix is compatible with long distance matching, while dictionary is not.\n"
+"2. It only works for the first frame, then the compressor/decompressor will\n"
+" return to no prefix state.\n"
+"3. When decompressing, must use the same prefix as when compressing.\"");
+#if defined(_zstd_ZstdDict_as_prefix_DOCSTR)
+# undef _zstd_ZstdDict_as_prefix_DOCSTR
+#endif
+#define _zstd_ZstdDict_as_prefix_DOCSTR _zstd_ZstdDict_as_prefix__doc__
+
+#if !defined(_zstd_ZstdDict_as_prefix_DOCSTR)
+# define _zstd_ZstdDict_as_prefix_DOCSTR NULL
+#endif
+#if defined(_ZSTD_ZSTDDICT_AS_PREFIX_GETSETDEF)
+# undef _ZSTD_ZSTDDICT_AS_PREFIX_GETSETDEF
+# define _ZSTD_ZSTDDICT_AS_PREFIX_GETSETDEF {"as_prefix", (getter)_zstd_ZstdDict_as_prefix_get, (setter)_zstd_ZstdDict_as_prefix_set, _zstd_ZstdDict_as_prefix_DOCSTR},
+#else
+# define _ZSTD_ZSTDDICT_AS_PREFIX_GETSETDEF {"as_prefix", (getter)_zstd_ZstdDict_as_prefix_get, NULL, _zstd_ZstdDict_as_prefix_DOCSTR},
+#endif
+
+static PyObject *
+_zstd_ZstdDict_as_prefix_get_impl(ZstdDict *self);
+
+static PyObject *
+_zstd_ZstdDict_as_prefix_get(PyObject *self, void *Py_UNUSED(context))
+{
+ PyObject *return_value = NULL;
+
+ Py_BEGIN_CRITICAL_SECTION(self);
+ return_value = _zstd_ZstdDict_as_prefix_get_impl((ZstdDict *)self);
+ Py_END_CRITICAL_SECTION();
+
+ return return_value;
+}
+/*[clinic end generated code: output=59257c053f74eda7 input=a9049054013a1b77]*/
diff --git a/Modules/_zstd/compressor.c b/Modules/_zstd/compressor.c
new file mode 100644
index 00000000000000..d0f677be821572
--- /dev/null
+++ b/Modules/_zstd/compressor.c
@@ -0,0 +1,707 @@
+/*
+Low level interface to Meta's zstd library for use in the compression.zstd
+Python module.
+*/
+
+/* ZstdCompressor class definitions */
+
+/*[clinic input]
+module _zstd
+class _zstd.ZstdCompressor "ZstdCompressor *" "clinic_state()->ZstdCompressor_type"
+[clinic start generated code]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=875bf614798f80cb]*/
+
+
+#ifndef Py_BUILD_CORE_BUILTIN
+# define Py_BUILD_CORE_MODULE 1
+#endif
+
+#include "_zstdmodule.h"
+
+#include "buffer.h"
+
+#include // offsetof()
+
+
+#define ZstdCompressor_CAST(op) ((ZstdCompressor *)op)
+
+int
+_PyZstd_set_c_parameters(ZstdCompressor *self, PyObject *level_or_options,
+ const char *arg_name, const char* arg_type)
+{
+ size_t zstd_ret;
+ _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self));
+ if (mod_state == NULL) {
+ return -1;
+ }
+
+ /* Integer compression level */
+ if (PyLong_Check(level_or_options)) {
+ int level = PyLong_AsInt(level_or_options);
+ if (level == -1 && PyErr_Occurred()) {
+ PyErr_Format(PyExc_ValueError,
+ "Compression level should be an int value between %d and %d.",
+ ZSTD_minCLevel(), ZSTD_maxCLevel());
+ return -1;
+ }
+
+ /* Save for generating ZSTD_CDICT */
+ self->compression_level = level;
+
+ /* Set compressionLevel to compression context */
+ zstd_ret = ZSTD_CCtx_setParameter(self->cctx,
+ ZSTD_c_compressionLevel,
+ level);
+
+ /* Check error */
+ if (ZSTD_isError(zstd_ret)) {
+ set_zstd_error(mod_state, ERR_SET_C_LEVEL, zstd_ret);
+ return -1;
+ }
+ return 0;
+ }
+
+ /* Options dict */
+ if (PyDict_Check(level_or_options)) {
+ PyObject *key, *value;
+ Py_ssize_t pos = 0;
+
+ while (PyDict_Next(level_or_options, &pos, &key, &value)) {
+ /* Check key type */
+ if (Py_TYPE(key) == mod_state->DParameter_type) {
+ PyErr_SetString(PyExc_TypeError,
+ "Key of compression option dict should "
+ "NOT be DParameter.");
+ return -1;
+ }
+
+ int key_v = PyLong_AsInt(key);
+ if (key_v == -1 && PyErr_Occurred()) {
+ PyErr_SetString(PyExc_ValueError,
+ "Key of options dict should be a CParameter attribute.");
+ return -1;
+ }
+
+ // TODO(emmatyping): check bounds when there is a value error here for better
+ // error message?
+ int value_v = PyLong_AsInt(value);
+ if (value_v == -1 && PyErr_Occurred()) {
+ PyErr_SetString(PyExc_ValueError,
+ "Value of option dict should be an int.");
+ return -1;
+ }
+
+ if (key_v == ZSTD_c_compressionLevel) {
+ /* Save for generating ZSTD_CDICT */
+ self->compression_level = value_v;
+ }
+ else if (key_v == ZSTD_c_nbWorkers) {
+ /* From zstd library doc:
+ 1. When nbWorkers >= 1, triggers asynchronous mode when
+ used with ZSTD_compressStream2().
+ 2, Default value is `0`, aka "single-threaded mode" : no
+ worker is spawned, compression is performed inside
+ caller's thread, all invocations are blocking. */
+ if (value_v != 0) {
+ self->use_multithread = 1;
+ }
+ }
+
+ /* Set parameter to compression context */
+ zstd_ret = ZSTD_CCtx_setParameter(self->cctx, key_v, value_v);
+ if (ZSTD_isError(zstd_ret)) {
+ set_parameter_error(mod_state, 1, key_v, value_v);
+ return -1;
+ }
+ }
+ return 0;
+ }
+ PyErr_Format(PyExc_TypeError, "Invalid type for %s. Expected %s", arg_name, arg_type);
+ return -1;
+}
+
+static void
+capsule_free_cdict(PyObject *capsule)
+{
+ ZSTD_CDict *cdict = PyCapsule_GetPointer(capsule, NULL);
+ ZSTD_freeCDict(cdict);
+}
+
+ZSTD_CDict *
+_get_CDict(ZstdDict *self, int compressionLevel)
+{
+ PyObject *level = NULL;
+ PyObject *capsule;
+ ZSTD_CDict *cdict;
+
+ // TODO(emmatyping): refactor critical section code into a lock_held function
+ Py_BEGIN_CRITICAL_SECTION(self);
+
+ /* int level object */
+ level = PyLong_FromLong(compressionLevel);
+ if (level == NULL) {
+ goto error;
+ }
+
+ /* Get PyCapsule object from self->c_dicts */
+ capsule = PyDict_GetItemWithError(self->c_dicts, level);
+ if (capsule == NULL) {
+ if (PyErr_Occurred()) {
+ goto error;
+ }
+
+ /* Create ZSTD_CDict instance */
+ char *dict_buffer = PyBytes_AS_STRING(self->dict_content);
+ Py_ssize_t dict_len = Py_SIZE(self->dict_content);
+ Py_BEGIN_ALLOW_THREADS
+ cdict = ZSTD_createCDict(dict_buffer,
+ dict_len,
+ compressionLevel);
+ Py_END_ALLOW_THREADS
+
+ if (cdict == NULL) {
+ _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self));
+ if (mod_state != NULL) {
+ PyErr_SetString(mod_state->ZstdError,
+ "Failed to create ZSTD_CDict instance from zstd "
+ "dictionary content. Maybe the content is corrupted.");
+ }
+ goto error;
+ }
+
+ /* Put ZSTD_CDict instance into PyCapsule object */
+ capsule = PyCapsule_New(cdict, NULL, capsule_free_cdict);
+ if (capsule == NULL) {
+ ZSTD_freeCDict(cdict);
+ goto error;
+ }
+
+ /* Add PyCapsule object to self->c_dicts */
+ if (PyDict_SetItem(self->c_dicts, level, capsule) < 0) {
+ Py_DECREF(capsule);
+ goto error;
+ }
+ Py_DECREF(capsule);
+ }
+ else {
+ /* ZSTD_CDict instance already exists */
+ cdict = PyCapsule_GetPointer(capsule, NULL);
+ }
+ goto success;
+
+error:
+ cdict = NULL;
+success:
+ Py_XDECREF(level);
+ Py_END_CRITICAL_SECTION();
+ return cdict;
+}
+
+int
+_PyZstd_load_c_dict(ZstdCompressor *self, PyObject *dict) {
+
+ size_t zstd_ret;
+ _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self));
+ if (mod_state == NULL) {
+ return -1;
+ }
+ ZstdDict *zd;
+ int type, ret;
+
+ /* Check ZstdDict */
+ ret = PyObject_IsInstance(dict, (PyObject*)mod_state->ZstdDict_type);
+ if (ret < 0) {
+ return -1;
+ }
+ else if (ret > 0) {
+ /* When compressing, use undigested dictionary by default. */
+ zd = (ZstdDict*)dict;
+ type = DICT_TYPE_UNDIGESTED;
+ goto load;
+ }
+
+ /* Check (ZstdDict, type) */
+ if (PyTuple_CheckExact(dict) && PyTuple_GET_SIZE(dict) == 2) {
+ /* Check ZstdDict */
+ ret = PyObject_IsInstance(PyTuple_GET_ITEM(dict, 0),
+ (PyObject*)mod_state->ZstdDict_type);
+ if (ret < 0) {
+ return -1;
+ }
+ else if (ret > 0) {
+ /* type == -1 may indicate an error. */
+ type = PyLong_AsInt(PyTuple_GET_ITEM(dict, 1));
+ if (type == DICT_TYPE_DIGESTED ||
+ type == DICT_TYPE_UNDIGESTED ||
+ type == DICT_TYPE_PREFIX)
+ {
+ assert(type >= 0);
+ zd = (ZstdDict*)PyTuple_GET_ITEM(dict, 0);
+ goto load;
+ }
+ }
+ }
+
+ /* Wrong type */
+ PyErr_SetString(PyExc_TypeError,
+ "zstd_dict argument should be ZstdDict object.");
+ return -1;
+
+load:
+ if (type == DICT_TYPE_DIGESTED) {
+ /* Get ZSTD_CDict */
+ ZSTD_CDict *c_dict = _get_CDict(zd, self->compression_level);
+ if (c_dict == NULL) {
+ return -1;
+ }
+ /* Reference a prepared dictionary.
+ It overrides some compression context's parameters. */
+ Py_BEGIN_CRITICAL_SECTION(self);
+ zstd_ret = ZSTD_CCtx_refCDict(self->cctx, c_dict);
+ Py_END_CRITICAL_SECTION();
+ }
+ else if (type == DICT_TYPE_UNDIGESTED) {
+ /* Load a dictionary.
+ It doesn't override compression context's parameters. */
+ Py_BEGIN_CRITICAL_SECTION2(self, zd);
+ zstd_ret = ZSTD_CCtx_loadDictionary(
+ self->cctx,
+ PyBytes_AS_STRING(zd->dict_content),
+ Py_SIZE(zd->dict_content));
+ Py_END_CRITICAL_SECTION2();
+ }
+ else if (type == DICT_TYPE_PREFIX) {
+ /* Load a prefix */
+ Py_BEGIN_CRITICAL_SECTION2(self, zd);
+ zstd_ret = ZSTD_CCtx_refPrefix(
+ self->cctx,
+ PyBytes_AS_STRING(zd->dict_content),
+ Py_SIZE(zd->dict_content));
+ Py_END_CRITICAL_SECTION2();
+ }
+ else {
+ Py_UNREACHABLE();
+ }
+
+ /* Check error */
+ if (ZSTD_isError(zstd_ret)) {
+ set_zstd_error(mod_state, ERR_LOAD_C_DICT, zstd_ret);
+ return -1;
+ }
+ return 0;
+}
+
+#define clinic_state() (get_zstd_state_from_type(type))
+#include "clinic/compressor.c.h"
+#undef clinic_state
+
+static PyObject *
+_zstd_ZstdCompressor_new(PyTypeObject *type, PyObject *Py_UNUSED(args), PyObject *Py_UNUSED(kwargs))
+{
+ ZstdCompressor *self;
+ self = PyObject_GC_New(ZstdCompressor, type);
+ if (self == NULL) {
+ goto error;
+ }
+
+ self->inited = 0;
+ self->dict = NULL;
+ self->use_multithread = 0;
+
+
+ /* Compression context */
+ self->cctx = ZSTD_createCCtx();
+ if (self->cctx == NULL) {
+ _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self));
+ if (mod_state != NULL) {
+ PyErr_SetString(mod_state->ZstdError,
+ "Unable to create ZSTD_CCtx instance.");
+ }
+ goto error;
+ }
+
+ /* Last mode */
+ self->last_mode = ZSTD_e_end;
+
+ return (PyObject*)self;
+
+error:
+ if (self != NULL) {
+ PyObject_GC_Del(self);
+ }
+ return NULL;
+}
+
+static void
+ZstdCompressor_dealloc(PyObject *ob)
+{
+ ZstdCompressor *self = ZstdCompressor_CAST(ob);
+
+ PyObject_GC_UnTrack(self);
+
+ /* Free compression context */
+ ZSTD_freeCCtx(self->cctx);
+
+ /* Py_XDECREF the dict after free the compression context */
+ Py_CLEAR(self->dict);
+
+ PyTypeObject *tp = Py_TYPE(self);
+ PyObject_GC_Del(ob);
+ Py_DECREF(tp);
+}
+
+/*[clinic input]
+_zstd.ZstdCompressor.__init__
+
+ level: object = None
+ The compression level to use, defaults to ZSTD_CLEVEL_DEFAULT.
+ options: object = None
+ A dict object that contains advanced compression parameters.
+ zstd_dict: object = None
+ A ZstdDict object, a pre-trained zstd dictionary.
+
+Create a compressor object for compressing data incrementally.
+
+Thread-safe at method level. For one-shot compression, use the compress()
+function instead.
+[clinic start generated code]*/
+
+static int
+_zstd_ZstdCompressor___init___impl(ZstdCompressor *self, PyObject *level,
+ PyObject *options, PyObject *zstd_dict)
+/*[clinic end generated code: output=215e6c4342732f96 input=9f79b0d8d34c8ef0]*/
+{
+ /* Only called once */
+ if (self->inited) {
+ PyErr_SetString(PyExc_RuntimeError, init_twice_msg);
+ return -1;
+ }
+ self->inited = 1;
+
+ if (level != Py_None && options != Py_None) {
+ PyErr_SetString(PyExc_RuntimeError, "Only one of level or options should be used.");
+ return -1;
+ }
+
+ /* Set compressLevel/options to compression context */
+ if (level != Py_None) {
+ if (_PyZstd_set_c_parameters(self, level, "level", "int") < 0) {
+ return -1;
+ }
+ }
+
+ if (options != Py_None) {
+ if (_PyZstd_set_c_parameters(self, options, "options", "dict") < 0) {
+ return -1;
+ }
+ }
+
+ /* Load dictionary to compression context */
+ if (zstd_dict != Py_None) {
+ if (_PyZstd_load_c_dict(self, zstd_dict) < 0) {
+ return -1;
+ }
+
+ /* Py_INCREF the dict */
+ Py_INCREF(zstd_dict);
+ self->dict = zstd_dict;
+ }
+
+ // We can only start tracking self with the GC once self->dict is set.
+ PyObject_GC_Track(self);
+ return 0;
+}
+
+PyObject *
+compress_impl(ZstdCompressor *self, Py_buffer *data,
+ ZSTD_EndDirective end_directive)
+{
+ ZSTD_inBuffer in;
+ ZSTD_outBuffer out;
+ _BlocksOutputBuffer buffer = {.list = NULL};
+ size_t zstd_ret;
+ PyObject *ret;
+
+ /* Prepare input & output buffers */
+ if (data != NULL) {
+ in.src = data->buf;
+ in.size = data->len;
+ in.pos = 0;
+ }
+ else {
+ in.src = ∈
+ in.size = 0;
+ in.pos = 0;
+ }
+
+ /* Calculate output buffer's size */
+ size_t output_buffer_size = ZSTD_compressBound(in.size);
+ if (output_buffer_size > (size_t) PY_SSIZE_T_MAX) {
+ PyErr_NoMemory();
+ goto error;
+ }
+
+ if (_OutputBuffer_InitWithSize(&buffer, &out, -1,
+ (Py_ssize_t) output_buffer_size) < 0) {
+ goto error;
+ }
+
+
+ /* zstd stream compress */
+ while (1) {
+ Py_BEGIN_ALLOW_THREADS
+ zstd_ret = ZSTD_compressStream2(self->cctx, &out, &in, end_directive);
+ Py_END_ALLOW_THREADS
+
+ /* Check error */
+ if (ZSTD_isError(zstd_ret)) {
+ _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self));
+ if (mod_state != NULL) {
+ set_zstd_error(mod_state, ERR_COMPRESS, zstd_ret);
+ }
+ goto error;
+ }
+
+ /* Finished */
+ if (zstd_ret == 0) {
+ break;
+ }
+
+ /* Output buffer should be exhausted, grow the buffer. */
+ assert(out.pos == out.size);
+ if (out.pos == out.size) {
+ if (_OutputBuffer_Grow(&buffer, &out) < 0) {
+ goto error;
+ }
+ }
+ }
+
+ /* Return a bytes object */
+ ret = _OutputBuffer_Finish(&buffer, &out);
+ if (ret != NULL) {
+ return ret;
+ }
+
+error:
+ _OutputBuffer_OnError(&buffer);
+ return NULL;
+}
+
+static PyObject *
+compress_mt_continue_impl(ZstdCompressor *self, Py_buffer *data)
+{
+ ZSTD_inBuffer in;
+ ZSTD_outBuffer out;
+ _BlocksOutputBuffer buffer = {.list = NULL};
+ size_t zstd_ret;
+ PyObject *ret;
+
+ /* Prepare input & output buffers */
+ in.src = data->buf;
+ in.size = data->len;
+ in.pos = 0;
+
+ if (_OutputBuffer_InitAndGrow(&buffer, &out, -1) < 0) {
+ goto error;
+ }
+
+ /* zstd stream compress */
+ while (1) {
+ Py_BEGIN_ALLOW_THREADS
+ do {
+ zstd_ret = ZSTD_compressStream2(self->cctx, &out, &in, ZSTD_e_continue);
+ } while (out.pos != out.size && in.pos != in.size && !ZSTD_isError(zstd_ret));
+ Py_END_ALLOW_THREADS
+
+ /* Check error */
+ if (ZSTD_isError(zstd_ret)) {
+ _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self));
+ if (mod_state != NULL) {
+ set_zstd_error(mod_state, ERR_COMPRESS, zstd_ret);
+ }
+ goto error;
+ }
+
+ /* Like compress_impl(), output as much as possible. */
+ if (out.pos == out.size) {
+ if (_OutputBuffer_Grow(&buffer, &out) < 0) {
+ goto error;
+ }
+ }
+ else if (in.pos == in.size) {
+ /* Finished */
+ assert(mt_continue_should_break(&in, &out));
+ break;
+ }
+ }
+
+ /* Return a bytes object */
+ ret = _OutputBuffer_Finish(&buffer, &out);
+ if (ret != NULL) {
+ return ret;
+ }
+
+error:
+ _OutputBuffer_OnError(&buffer);
+ return NULL;
+}
+
+/*[clinic input]
+_zstd.ZstdCompressor.compress
+
+ data: Py_buffer
+ mode: int(c_default="ZSTD_e_continue") = ZstdCompressor.CONTINUE
+ Can be these 3 values ZstdCompressor.CONTINUE,
+ ZstdCompressor.FLUSH_BLOCK, ZstdCompressor.FLUSH_FRAME
+
+Provide data to the compressor object.
+
+Return a chunk of compressed data if possible, or b'' otherwise. When you have
+finished providing data to the compressor, call the flush() method to finish
+the compression process.
+[clinic start generated code]*/
+
+static PyObject *
+_zstd_ZstdCompressor_compress_impl(ZstdCompressor *self, Py_buffer *data,
+ int mode)
+/*[clinic end generated code: output=ed7982d1cf7b4f98 input=ac2c21d180f579ea]*/
+{
+ PyObject *ret;
+
+ /* Check mode value */
+ if (mode != ZSTD_e_continue &&
+ mode != ZSTD_e_flush &&
+ mode != ZSTD_e_end)
+ {
+ PyErr_SetString(PyExc_ValueError,
+ "mode argument wrong value, it should be one of "
+ "ZstdCompressor.CONTINUE, ZstdCompressor.FLUSH_BLOCK, "
+ "ZstdCompressor.FLUSH_FRAME.");
+ return NULL;
+ }
+
+ /* Thread-safe code */
+ Py_BEGIN_CRITICAL_SECTION(self);
+
+ /* Compress */
+ if (self->use_multithread && mode == ZSTD_e_continue) {
+ ret = compress_mt_continue_impl(self, data);
+ }
+ else {
+ ret = compress_impl(self, data, mode);
+ }
+
+ if (ret) {
+ self->last_mode = mode;
+ }
+ else {
+ self->last_mode = ZSTD_e_end;
+
+ /* Resetting cctx's session never fail */
+ ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
+ }
+ Py_END_CRITICAL_SECTION();
+
+ return ret;
+}
+
+/*[clinic input]
+_zstd.ZstdCompressor.flush
+
+ mode: int(c_default="ZSTD_e_end") = ZstdCompressor.FLUSH_FRAME
+ Can be these 2 values ZstdCompressor.FLUSH_FRAME,
+ ZstdCompressor.FLUSH_BLOCK
+
+Finish the compression process.
+
+Flush any remaining data left in internal buffers. Since zstd data consists
+of one or more independent frames, the compressor object can still be used
+after this method is called.
+[clinic start generated code]*/
+
+static PyObject *
+_zstd_ZstdCompressor_flush_impl(ZstdCompressor *self, int mode)
+/*[clinic end generated code: output=b7cf2c8d64dcf2e3 input=a766870301932b85]*/
+{
+ PyObject *ret;
+
+ /* Check mode value */
+ if (mode != ZSTD_e_end && mode != ZSTD_e_flush) {
+ PyErr_SetString(PyExc_ValueError,
+ "mode argument wrong value, it should be "
+ "ZstdCompressor.FLUSH_FRAME or "
+ "ZstdCompressor.FLUSH_BLOCK.");
+ return NULL;
+ }
+
+ /* Thread-safe code */
+ Py_BEGIN_CRITICAL_SECTION(self);
+ ret = compress_impl(self, NULL, mode);
+
+ if (ret) {
+ self->last_mode = mode;
+ }
+ else {
+ self->last_mode = ZSTD_e_end;
+
+ /* Resetting cctx's session never fail */
+ ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
+ }
+ Py_END_CRITICAL_SECTION();
+
+ return ret;
+}
+
+static PyMethodDef ZstdCompressor_methods[] = {
+ _ZSTD_ZSTDCOMPRESSOR_COMPRESS_METHODDEF
+ _ZSTD_ZSTDCOMPRESSOR_FLUSH_METHODDEF
+
+ {0}
+};
+
+PyDoc_STRVAR(ZstdCompressor_last_mode_doc,
+"The last mode used to this compressor object, its value can be .CONTINUE,\n"
+".FLUSH_BLOCK, .FLUSH_FRAME. Initialized to .FLUSH_FRAME.\n\n"
+"It can be used to get the current state of a compressor, such as, data flushed,\n"
+"a frame ended.");
+
+static PyMemberDef ZstdCompressor_members[] = {
+ {"last_mode", Py_T_INT, offsetof(ZstdCompressor, last_mode),
+ Py_READONLY, ZstdCompressor_last_mode_doc},
+ {0}
+};
+
+static int
+ZstdCompressor_traverse(PyObject *ob, visitproc visit, void *arg)
+{
+ ZstdCompressor *self = ZstdCompressor_CAST(ob);
+ Py_VISIT(self->dict);
+ return 0;
+}
+
+static int
+ZstdCompressor_clear(PyObject *ob)
+{
+ ZstdCompressor *self = ZstdCompressor_CAST(ob);
+ Py_CLEAR(self->dict);
+ return 0;
+}
+
+static PyType_Slot zstdcompressor_slots[] = {
+ {Py_tp_new, _zstd_ZstdCompressor_new},
+ {Py_tp_dealloc, ZstdCompressor_dealloc},
+ {Py_tp_init, _zstd_ZstdCompressor___init__},
+ {Py_tp_methods, ZstdCompressor_methods},
+ {Py_tp_members, ZstdCompressor_members},
+ {Py_tp_doc, (char*)_zstd_ZstdCompressor___init____doc__},
+ {Py_tp_traverse, ZstdCompressor_traverse},
+ {Py_tp_clear, ZstdCompressor_clear},
+ {0}
+};
+
+PyType_Spec zstdcompressor_type_spec = {
+ .name = "_zstd.ZstdCompressor",
+ .basicsize = sizeof(ZstdCompressor),
+ .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
+ .slots = zstdcompressor_slots,
+};
diff --git a/Modules/_zstd/decompressor.c b/Modules/_zstd/decompressor.c
new file mode 100644
index 00000000000000..4e3a28068be130
--- /dev/null
+++ b/Modules/_zstd/decompressor.c
@@ -0,0 +1,891 @@
+/*
+Low level interface to Meta's zstd library for use in the compression.zstd
+Python module.
+*/
+
+/* ZstdDecompressor class definition */
+
+/*[clinic input]
+module _zstd
+class _zstd.ZstdDecompressor "ZstdDecompressor *" "clinic_state()->ZstdDecompressor_type"
+[clinic start generated code]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=4e6eae327c0c0c76]*/
+
+#ifndef Py_BUILD_CORE_BUILTIN
+# define Py_BUILD_CORE_MODULE 1
+#endif
+
+#include "_zstdmodule.h"
+
+#include "buffer.h"
+
+#include // offsetof()
+
+#define ZstdDecompressor_CAST(op) ((ZstdDecompressor *)op)
+
+static inline ZSTD_DDict *
+_get_DDict(ZstdDict *self)
+{
+ ZSTD_DDict *ret;
+
+ /* Already created */
+ if (self->d_dict != NULL) {
+ return self->d_dict;
+ }
+
+ Py_BEGIN_CRITICAL_SECTION(self);
+ if (self->d_dict == NULL) {
+ /* Create ZSTD_DDict instance from dictionary content */
+ char *dict_buffer = PyBytes_AS_STRING(self->dict_content);
+ Py_ssize_t dict_len = Py_SIZE(self->dict_content);
+ Py_BEGIN_ALLOW_THREADS
+ self->d_dict = ZSTD_createDDict(dict_buffer,
+ dict_len);
+ Py_END_ALLOW_THREADS
+
+ if (self->d_dict == NULL) {
+ _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self));
+ if (mod_state != NULL) {
+ PyErr_SetString(mod_state->ZstdError,
+ "Failed to create ZSTD_DDict instance from zstd "
+ "dictionary content. Maybe the content is corrupted.");
+ }
+ }
+ }
+
+ /* Don't lose any exception */
+ ret = self->d_dict;
+ Py_END_CRITICAL_SECTION();
+
+ return ret;
+}
+
+/* Set decompression parameters to decompression context */
+int
+_PyZstd_set_d_parameters(ZstdDecompressor *self, PyObject *options)
+{
+ size_t zstd_ret;
+ PyObject *key, *value;
+ Py_ssize_t pos;
+ _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self));
+ if (mod_state == NULL) {
+ return -1;
+ }
+
+ if (!PyDict_Check(options)) {
+ PyErr_SetString(PyExc_TypeError,
+ "options argument should be dict object.");
+ return -1;
+ }
+
+ pos = 0;
+ while (PyDict_Next(options, &pos, &key, &value)) {
+ /* Check key type */
+ if (Py_TYPE(key) == mod_state->CParameter_type) {
+ PyErr_SetString(PyExc_TypeError,
+ "Key of decompression options dict should "
+ "NOT be CParameter.");
+ return -1;
+ }
+
+ /* Both key & value should be 32-bit signed int */
+ int key_v = PyLong_AsInt(key);
+ if (key_v == -1 && PyErr_Occurred()) {
+ PyErr_SetString(PyExc_ValueError,
+ "Key of options dict should be a DParameter attribute.");
+ return -1;
+ }
+
+ // TODO(emmatyping): check bounds when there is a value error here for better
+ // error message?
+ int value_v = PyLong_AsInt(value);
+ if (value_v == -1 && PyErr_Occurred()) {
+ PyErr_SetString(PyExc_ValueError,
+ "Value of options dict should be an int.");
+ return -1;
+ }
+
+ /* Set parameter to compression context */
+ Py_BEGIN_CRITICAL_SECTION(self);
+ zstd_ret = ZSTD_DCtx_setParameter(self->dctx, key_v, value_v);
+ Py_END_CRITICAL_SECTION();
+
+ /* Check error */
+ if (ZSTD_isError(zstd_ret)) {
+ set_parameter_error(mod_state, 0, key_v, value_v);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+/* Load dictionary or prefix to decompression context */
+int
+_PyZstd_load_d_dict(ZstdDecompressor *self, PyObject *dict)
+{
+ size_t zstd_ret;
+ _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self));
+ if (mod_state == NULL) {
+ return -1;
+ }
+ ZstdDict *zd;
+ int type, ret;
+
+ /* Check ZstdDict */
+ ret = PyObject_IsInstance(dict, (PyObject*)mod_state->ZstdDict_type);
+ if (ret < 0) {
+ return -1;
+ }
+ else if (ret > 0) {
+ /* When decompressing, use digested dictionary by default. */
+ zd = (ZstdDict*)dict;
+ type = DICT_TYPE_DIGESTED;
+ goto load;
+ }
+
+ /* Check (ZstdDict, type) */
+ if (PyTuple_CheckExact(dict) && PyTuple_GET_SIZE(dict) == 2) {
+ /* Check ZstdDict */
+ ret = PyObject_IsInstance(PyTuple_GET_ITEM(dict, 0),
+ (PyObject*)mod_state->ZstdDict_type);
+ if (ret < 0) {
+ return -1;
+ }
+ else if (ret > 0) {
+ /* type == -1 may indicate an error. */
+ type = PyLong_AsInt(PyTuple_GET_ITEM(dict, 1));
+ if (type == DICT_TYPE_DIGESTED ||
+ type == DICT_TYPE_UNDIGESTED ||
+ type == DICT_TYPE_PREFIX)
+ {
+ assert(type >= 0);
+ zd = (ZstdDict*)PyTuple_GET_ITEM(dict, 0);
+ goto load;
+ }
+ }
+ }
+
+ /* Wrong type */
+ PyErr_SetString(PyExc_TypeError,
+ "zstd_dict argument should be ZstdDict object.");
+ return -1;
+
+load:
+ if (type == DICT_TYPE_DIGESTED) {
+ /* Get ZSTD_DDict */
+ ZSTD_DDict *d_dict = _get_DDict(zd);
+ if (d_dict == NULL) {
+ return -1;
+ }
+ /* Reference a prepared dictionary */
+ Py_BEGIN_CRITICAL_SECTION(self);
+ zstd_ret = ZSTD_DCtx_refDDict(self->dctx, d_dict);
+ Py_END_CRITICAL_SECTION();
+ }
+ else if (type == DICT_TYPE_UNDIGESTED) {
+ /* Load a dictionary */
+ Py_BEGIN_CRITICAL_SECTION2(self, zd);
+ zstd_ret = ZSTD_DCtx_loadDictionary(
+ self->dctx,
+ PyBytes_AS_STRING(zd->dict_content),
+ Py_SIZE(zd->dict_content));
+ Py_END_CRITICAL_SECTION2();
+ }
+ else if (type == DICT_TYPE_PREFIX) {
+ /* Load a prefix */
+ Py_BEGIN_CRITICAL_SECTION2(self, zd);
+ zstd_ret = ZSTD_DCtx_refPrefix(
+ self->dctx,
+ PyBytes_AS_STRING(zd->dict_content),
+ Py_SIZE(zd->dict_content));
+ Py_END_CRITICAL_SECTION2();
+ }
+ else {
+ /* Impossible code path */
+ PyErr_SetString(PyExc_SystemError,
+ "load_d_dict() impossible code path");
+ return -1;
+ }
+
+ /* Check error */
+ if (ZSTD_isError(zstd_ret)) {
+ set_zstd_error(mod_state, ERR_LOAD_D_DICT, zstd_ret);
+ return -1;
+ }
+ return 0;
+}
+
+
+
+/*
+ Given the two types of decompressors (defined in _zstdmodule.h):
+
+ typedef enum {
+ TYPE_DECOMPRESSOR, // , ZstdDecompressor class
+ TYPE_ENDLESS_DECOMPRESSOR, // , decompress() function
+ } decompress_type;
+
+ Decompress implementation for , , pseudo code:
+
+ initialize_output_buffer
+ while True:
+ decompress_data
+ set_object_flag # .eof for , .at_frame_edge for .
+
+ if output_buffer_exhausted:
+ if output_buffer_reached_max_length:
+ finish
+ grow_output_buffer
+ elif input_buffer_exhausted:
+ finish
+
+ ZSTD_decompressStream()'s size_t return value:
+ - 0 when a frame is completely decoded and fully flushed, zstd's internal
+ buffer has no data.
+ - An error code, which can be tested using ZSTD_isError().
+ - Or any other value > 0, which means there is still some decoding or
+ flushing to do to complete current frame.
+
+ Note, decompressing "an empty input" in any case will make it > 0.
+
+ supports multiple frames, has an .at_frame_edge flag, it means both the
+ input and output streams are at a frame edge. The flag can be set by this
+ statement:
+
+ .at_frame_edge = (zstd_ret == 0) ? 1 : 0
+
+ But if decompressing "an empty input" at "a frame edge", zstd_ret will be
+ non-zero, then .at_frame_edge will be wrongly set to false. To solve this
+ problem, two AFE checks are needed to ensure that: when at "a frame edge",
+ empty input will not be decompressed.
+
+ // AFE check
+ if (self->at_frame_edge && in->pos == in->size) {
+ finish
+ }
+
+ In , if .at_frame_edge is eventually set to true, but input stream has
+ unconsumed data (in->pos < in->size), then the outer function
+ stream_decompress() will set .at_frame_edge to false. In this case,
+ although the output stream is at a frame edge, for the caller, the input
+ stream is not at a frame edge, see below diagram. This behavior does not
+ affect the next AFE check, since (in->pos < in->size).
+
+ input stream: --------------|---
+ ^
+ output stream: ====================|
+ ^
+*/
+PyObject *
+decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in,
+ Py_ssize_t max_length,
+ Py_ssize_t initial_size,
+ decompress_type type)
+{
+ size_t zstd_ret;
+ ZSTD_outBuffer out;
+ _BlocksOutputBuffer buffer = {.list = NULL};
+ PyObject *ret;
+
+ /* The first AFE check for setting .at_frame_edge flag */
+ if (type == TYPE_ENDLESS_DECOMPRESSOR) {
+ if (self->at_frame_edge && in->pos == in->size) {
+ _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self));
+ if (mod_state == NULL) {
+ return NULL;
+ }
+ ret = mod_state->empty_bytes;
+ Py_INCREF(ret);
+ return ret;
+ }
+ }
+
+ /* Initialize the output buffer */
+ if (initial_size >= 0) {
+ if (_OutputBuffer_InitWithSize(&buffer, &out, max_length, initial_size) < 0) {
+ goto error;
+ }
+ }
+ else {
+ if (_OutputBuffer_InitAndGrow(&buffer, &out, max_length) < 0) {
+ goto error;
+ }
+ }
+ assert(out.pos == 0);
+
+ while (1) {
+ /* Decompress */
+ Py_BEGIN_ALLOW_THREADS
+ zstd_ret = ZSTD_decompressStream(self->dctx, &out, in);
+ Py_END_ALLOW_THREADS
+
+ /* Check error */
+ if (ZSTD_isError(zstd_ret)) {
+ _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self));
+ if (mod_state != NULL) {
+ set_zstd_error(mod_state, ERR_DECOMPRESS, zstd_ret);
+ }
+ goto error;
+ }
+
+ /* Set .eof/.af_frame_edge flag */
+ if (type == TYPE_DECOMPRESSOR) {
+ /* ZstdDecompressor class stops when a frame is decompressed */
+ if (zstd_ret == 0) {
+ self->eof = 1;
+ break;
+ }
+ }
+ else if (type == TYPE_ENDLESS_DECOMPRESSOR) {
+ /* decompress() function supports multiple frames */
+ self->at_frame_edge = (zstd_ret == 0) ? 1 : 0;
+
+ /* The second AFE check for setting .at_frame_edge flag */
+ if (self->at_frame_edge && in->pos == in->size) {
+ break;
+ }
+ }
+
+ /* Need to check out before in. Maybe zstd's internal buffer still has
+ a few bytes can be output, grow the buffer and continue. */
+ if (out.pos == out.size) {
+ /* Output buffer exhausted */
+
+ /* Output buffer reached max_length */
+ if (_OutputBuffer_ReachedMaxLength(&buffer, &out)) {
+ break;
+ }
+
+ /* Grow output buffer */
+ if (_OutputBuffer_Grow(&buffer, &out) < 0) {
+ goto error;
+ }
+ assert(out.pos == 0);
+
+ }
+ else if (in->pos == in->size) {
+ /* Finished */
+ break;
+ }
+ }
+
+ /* Return a bytes object */
+ ret = _OutputBuffer_Finish(&buffer, &out);
+ if (ret != NULL) {
+ return ret;
+ }
+
+error:
+ _OutputBuffer_OnError(&buffer);
+ return NULL;
+}
+
+void
+decompressor_reset_session(ZstdDecompressor *self,
+ decompress_type type)
+{
+ // TODO(emmatyping): use _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED here
+ // and ensure lock is always held
+
+ /* Reset variables */
+ self->in_begin = 0;
+ self->in_end = 0;
+
+ if (type == TYPE_DECOMPRESSOR) {
+ Py_CLEAR(self->unused_data);
+ }
+
+ /* Reset variables in one operation */
+ self->needs_input = 1;
+ self->at_frame_edge = 1;
+ self->eof = 0;
+ self->_unused_char_for_align = 0;
+
+ /* Resetting session never fail */
+ ZSTD_DCtx_reset(self->dctx, ZSTD_reset_session_only);
+}
+
+PyObject *
+stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length,
+ decompress_type type)
+{
+ Py_ssize_t initial_buffer_size = -1;
+ ZSTD_inBuffer in;
+ PyObject *ret = NULL;
+ int use_input_buffer;
+
+ if (type == TYPE_DECOMPRESSOR) {
+ /* Check .eof flag */
+ if (self->eof) {
+ PyErr_SetString(PyExc_EOFError, "Already at the end of a zstd frame.");
+ assert(ret == NULL);
+ goto success;
+ }
+ }
+ else if (type == TYPE_ENDLESS_DECOMPRESSOR) {
+ /* Fast path for the first frame */
+ if (self->at_frame_edge && self->in_begin == self->in_end) {
+ /* Read decompressed size */
+ uint64_t decompressed_size = ZSTD_getFrameContentSize(data->buf, data->len);
+
+ /* These two zstd constants always > PY_SSIZE_T_MAX:
+ ZSTD_CONTENTSIZE_UNKNOWN is (0ULL - 1)
+ ZSTD_CONTENTSIZE_ERROR is (0ULL - 2)
+
+ Use ZSTD_findFrameCompressedSize() to check complete frame,
+ prevent allocating too much memory for small input chunk. */
+
+ if (decompressed_size <= (uint64_t) PY_SSIZE_T_MAX &&
+ !ZSTD_isError(ZSTD_findFrameCompressedSize(data->buf, data->len)) )
+ {
+ initial_buffer_size = (Py_ssize_t) decompressed_size;
+ }
+ }
+ }
+
+ /* Prepare input buffer w/wo unconsumed data */
+ if (self->in_begin == self->in_end) {
+ /* No unconsumed data */
+ use_input_buffer = 0;
+
+ in.src = data->buf;
+ in.size = data->len;
+ in.pos = 0;
+ }
+ else if (data->len == 0) {
+ /* Has unconsumed data, fast path for b'' */
+ assert(self->in_begin < self->in_end);
+
+ use_input_buffer = 1;
+
+ in.src = self->input_buffer + self->in_begin;
+ in.size = self->in_end - self->in_begin;
+ in.pos = 0;
+ }
+ else {
+ /* Has unconsumed data */
+ use_input_buffer = 1;
+
+ /* Unconsumed data size in input_buffer */
+ size_t used_now = self->in_end - self->in_begin;
+ assert(self->in_end > self->in_begin);
+
+ /* Number of bytes we can append to input buffer */
+ size_t avail_now = self->input_buffer_size - self->in_end;
+ assert(self->input_buffer_size >= self->in_end);
+
+ /* Number of bytes we can append if we move existing contents to
+ beginning of buffer */
+ size_t avail_total = self->input_buffer_size - used_now;
+ assert(self->input_buffer_size >= used_now);
+
+ if (avail_total < (size_t) data->len) {
+ char *tmp;
+ size_t new_size = used_now + data->len;
+
+ /* Allocate with new size */
+ tmp = PyMem_Malloc(new_size);
+ if (tmp == NULL) {
+ PyErr_NoMemory();
+ goto error;
+ }
+
+ /* Copy unconsumed data to the beginning of new buffer */
+ memcpy(tmp,
+ self->input_buffer + self->in_begin,
+ used_now);
+
+ /* Switch to new buffer */
+ PyMem_Free(self->input_buffer);
+ self->input_buffer = tmp;
+ self->input_buffer_size = new_size;
+
+ /* Set begin & end position */
+ self->in_begin = 0;
+ self->in_end = used_now;
+ }
+ else if (avail_now < (size_t) data->len) {
+ /* Move unconsumed data to the beginning.
+ Overlap is possible, so use memmove(). */
+ memmove(self->input_buffer,
+ self->input_buffer + self->in_begin,
+ used_now);
+
+ /* Set begin & end position */
+ self->in_begin = 0;
+ self->in_end = used_now;
+ }
+
+ /* Copy data to input buffer */
+ memcpy(self->input_buffer + self->in_end, data->buf, data->len);
+ self->in_end += data->len;
+
+ in.src = self->input_buffer + self->in_begin;
+ in.size = used_now + data->len;
+ in.pos = 0;
+ }
+ assert(in.pos == 0);
+
+ /* Decompress */
+ ret = decompress_impl(self, &in,
+ max_length, initial_buffer_size,
+ type);
+ if (ret == NULL) {
+ goto error;
+ }
+
+ /* Unconsumed input data */
+ if (in.pos == in.size) {
+ if (type == TYPE_DECOMPRESSOR) {
+ if (Py_SIZE(ret) == max_length || self->eof) {
+ self->needs_input = 0;
+ }
+ else {
+ self->needs_input = 1;
+ }
+ }
+ else if (type == TYPE_ENDLESS_DECOMPRESSOR) {
+ if (Py_SIZE(ret) == max_length && !self->at_frame_edge) {
+ self->needs_input = 0;
+ }
+ else {
+ self->needs_input = 1;
+ }
+ }
+
+ if (use_input_buffer) {
+ /* Clear input_buffer */
+ self->in_begin = 0;
+ self->in_end = 0;
+ }
+ }
+ else {
+ size_t data_size = in.size - in.pos;
+
+ self->needs_input = 0;
+
+ if (type == TYPE_ENDLESS_DECOMPRESSOR) {
+ self->at_frame_edge = 0;
+ }
+
+ if (!use_input_buffer) {
+ /* Discard buffer if it's too small
+ (resizing it may needlessly copy the current contents) */
+ if (self->input_buffer != NULL &&
+ self->input_buffer_size < data_size)
+ {
+ PyMem_Free(self->input_buffer);
+ self->input_buffer = NULL;
+ self->input_buffer_size = 0;
+ }
+
+ /* Allocate if necessary */
+ if (self->input_buffer == NULL) {
+ self->input_buffer = PyMem_Malloc(data_size);
+ if (self->input_buffer == NULL) {
+ PyErr_NoMemory();
+ goto error;
+ }
+ self->input_buffer_size = data_size;
+ }
+
+ /* Copy unconsumed data */
+ memcpy(self->input_buffer, (char*)in.src + in.pos, data_size);
+ self->in_begin = 0;
+ self->in_end = data_size;
+ }
+ else {
+ /* Use input buffer */
+ self->in_begin += in.pos;
+ }
+ }
+
+ goto success;
+
+error:
+ /* Reset decompressor's states/session */
+ decompressor_reset_session(self, type);
+
+ Py_CLEAR(ret);
+success:
+
+ return ret;
+}
+
+
+static PyObject *
+_zstd_ZstdDecompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+ ZstdDecompressor *self;
+ self = PyObject_GC_New(ZstdDecompressor, type);
+ if (self == NULL) {
+ goto error;
+ }
+
+ self->inited = 0;
+ self->dict = NULL;
+ self->input_buffer = NULL;
+ self->input_buffer_size = 0;
+ self->in_begin = -1;
+ self->in_end = -1;
+ self->unused_data = NULL;
+ self->eof = 0;
+
+ /* needs_input flag */
+ self->needs_input = 1;
+
+ /* at_frame_edge flag */
+ self->at_frame_edge = 1;
+
+ /* Decompression context */
+ self->dctx = ZSTD_createDCtx();
+ if (self->dctx == NULL) {
+ _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self));
+ if (mod_state != NULL) {
+ PyErr_SetString(mod_state->ZstdError,
+ "Unable to create ZSTD_DCtx instance.");
+ }
+ goto error;
+ }
+
+ return (PyObject*)self;
+
+error:
+ if (self != NULL) {
+ PyObject_GC_Del(self);
+ }
+ return NULL;
+}
+
+static void
+ZstdDecompressor_dealloc(PyObject *ob)
+{
+ ZstdDecompressor *self = ZstdDecompressor_CAST(ob);
+
+ PyObject_GC_UnTrack(self);
+
+ /* Free decompression context */
+ ZSTD_freeDCtx(self->dctx);
+
+ /* Py_CLEAR the dict after free decompression context */
+ Py_CLEAR(self->dict);
+
+ /* Free unconsumed input data buffer */
+ PyMem_Free(self->input_buffer);
+
+ /* Free unused data */
+ Py_CLEAR(self->unused_data);
+
+ PyTypeObject *tp = Py_TYPE(self);
+ PyObject_GC_Del(ob);
+ Py_DECREF(tp);
+}
+
+/*[clinic input]
+_zstd.ZstdDecompressor.__init__
+
+ zstd_dict: object = None
+ A ZstdDict object, a pre-trained zstd dictionary.
+ options: object = None
+ A dict object that contains advanced decompression parameters.
+
+Create a decompressor object for decompressing data incrementally.
+
+Thread-safe at method level. For one-shot decompression, use the decompress()
+function instead.
+[clinic start generated code]*/
+
+static int
+_zstd_ZstdDecompressor___init___impl(ZstdDecompressor *self,
+ PyObject *zstd_dict, PyObject *options)
+/*[clinic end generated code: output=703af2f1ec226642 input=8fd72999acc1a146]*/
+{
+ /* Only called once */
+ if (self->inited) {
+ PyErr_SetString(PyExc_RuntimeError, init_twice_msg);
+ return -1;
+ }
+ self->inited = 1;
+
+ /* Load dictionary to decompression context */
+ if (zstd_dict != Py_None) {
+ if (_PyZstd_load_d_dict(self, zstd_dict) < 0) {
+ return -1;
+ }
+
+ /* Py_INCREF the dict */
+ Py_INCREF(zstd_dict);
+ self->dict = zstd_dict;
+ }
+
+ /* Set option to decompression context */
+ if (options != Py_None) {
+ if (_PyZstd_set_d_parameters(self, options) < 0) {
+ return -1;
+ }
+ }
+
+ // We can only start tracking self with the GC once self->dict is set.
+ PyObject_GC_Track(self);
+ return 0;
+}
+
+/*[clinic input]
+@critical_section
+@getter
+_zstd.ZstdDecompressor.unused_data
+
+A bytes object of un-consumed input data.
+
+When ZstdDecompressor object stops after a frame is
+decompressed, unused input data after the frame. Otherwise this will be b''.
+[clinic start generated code]*/
+
+static PyObject *
+_zstd_ZstdDecompressor_unused_data_get_impl(ZstdDecompressor *self)
+/*[clinic end generated code: output=f3a20940f11b6b09 input=5233800bef00df04]*/
+{
+ PyObject *ret;
+
+ /* Thread-safe code */
+ Py_BEGIN_CRITICAL_SECTION(self);
+
+ if (!self->eof) {
+ _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self));
+ if (mod_state == NULL) {
+ return NULL;
+ }
+ ret = mod_state->empty_bytes;
+ Py_INCREF(ret);
+ }
+ else {
+ if (self->unused_data == NULL) {
+ self->unused_data = PyBytes_FromStringAndSize(
+ self->input_buffer + self->in_begin,
+ self->in_end - self->in_begin);
+ ret = self->unused_data;
+ Py_XINCREF(ret);
+ }
+ else {
+ ret = self->unused_data;
+ Py_INCREF(ret);
+ }
+ }
+
+ Py_END_CRITICAL_SECTION();
+
+ return ret;
+}
+
+/*[clinic input]
+_zstd.ZstdDecompressor.decompress
+
+ data: Py_buffer
+ A bytes-like object, zstd data to be decompressed.
+ max_length: Py_ssize_t = -1
+ Maximum size of returned data. When it is negative, the size of
+ output buffer is unlimited. When it is nonnegative, returns at
+ most max_length bytes of decompressed data.
+
+Decompress *data*, returning uncompressed bytes if possible, or b'' otherwise.
+
+If *max_length* is nonnegative, returns at most *max_length* bytes of
+decompressed data. If this limit is reached and further output can be
+produced, *self.needs_input* will be set to ``False``. In this case, the next
+call to *decompress()* may provide *data* as b'' to obtain more of the output.
+
+If all of the input data was decompressed and returned (either because this
+was less than *max_length* bytes, or because *max_length* was negative),
+*self.needs_input* will be set to True.
+
+Attempting to decompress data after the end of a frame is reached raises an
+EOFError. Any data found after the end of the frame is ignored and saved in
+the self.unused_data attribute.
+[clinic start generated code]*/
+
+static PyObject *
+_zstd_ZstdDecompressor_decompress_impl(ZstdDecompressor *self,
+ Py_buffer *data,
+ Py_ssize_t max_length)
+/*[clinic end generated code: output=a4302b3c940dbec6 input=830e455bc9a50b6e]*/
+{
+ PyObject *ret;
+ /* Thread-safe code */
+ Py_BEGIN_CRITICAL_SECTION(self);
+
+ ret = stream_decompress(self, data, max_length, TYPE_DECOMPRESSOR);
+ Py_END_CRITICAL_SECTION();
+ return ret;
+}
+
+#define clinic_state() (get_zstd_state_from_type(type))
+#include "clinic/decompressor.c.h"
+#undef clinic_state
+
+static PyMethodDef ZstdDecompressor_methods[] = {
+ _ZSTD_ZSTDDECOMPRESSOR_DECOMPRESS_METHODDEF
+
+ {0}
+};
+
+PyDoc_STRVAR(ZstdDecompressor_eof_doc,
+"True means the end of the first frame has been reached. If decompress data\n"
+"after that, an EOFError exception will be raised.");
+
+PyDoc_STRVAR(ZstdDecompressor_needs_input_doc,
+"If the max_length output limit in .decompress() method has been reached, and\n"
+"the decompressor has (or may has) unconsumed input data, it will be set to\n"
+"False. In this case, pass b'' to .decompress() method may output further data.");
+
+static PyMemberDef ZstdDecompressor_members[] = {
+ {"eof", Py_T_BOOL, offsetof(ZstdDecompressor, eof),
+ Py_READONLY, ZstdDecompressor_eof_doc},
+
+ {"needs_input", Py_T_BOOL, offsetof(ZstdDecompressor, needs_input),
+ Py_READONLY, ZstdDecompressor_needs_input_doc},
+
+ {0}
+};
+
+static PyGetSetDef ZstdDecompressor_getset[] = {
+ _ZSTD_ZSTDDECOMPRESSOR_UNUSED_DATA_GETSETDEF
+
+ {0}
+};
+
+static int
+ZstdDecompressor_traverse(PyObject *ob, visitproc visit, void *arg)
+{
+ ZstdDecompressor *self = ZstdDecompressor_CAST(ob);
+ Py_VISIT(self->dict);
+ return 0;
+}
+
+static int
+ZstdDecompressor_clear(PyObject *ob)
+{
+ ZstdDecompressor *self = ZstdDecompressor_CAST(ob);
+ Py_CLEAR(self->dict);
+ Py_CLEAR(self->unused_data);
+ return 0;
+}
+
+static PyType_Slot ZstdDecompressor_slots[] = {
+ {Py_tp_new, _zstd_ZstdDecompressor_new},
+ {Py_tp_dealloc, ZstdDecompressor_dealloc},
+ {Py_tp_init, _zstd_ZstdDecompressor___init__},
+ {Py_tp_methods, ZstdDecompressor_methods},
+ {Py_tp_members, ZstdDecompressor_members},
+ {Py_tp_getset, ZstdDecompressor_getset},
+ {Py_tp_doc, (char*)_zstd_ZstdDecompressor___init____doc__},
+ {Py_tp_traverse, ZstdDecompressor_traverse},
+ {Py_tp_clear, ZstdDecompressor_clear},
+ {0}
+};
+
+PyType_Spec ZstdDecompressor_type_spec = {
+ .name = "_zstd.ZstdDecompressor",
+ .basicsize = sizeof(ZstdDecompressor),
+ .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
+ .slots = ZstdDecompressor_slots,
+};
diff --git a/Modules/_zstd/zdict.c b/Modules/_zstd/zdict.c
new file mode 100644
index 00000000000000..28ab964a6caa87
--- /dev/null
+++ b/Modules/_zstd/zdict.c
@@ -0,0 +1,286 @@
+/*
+Low level interface to Meta's zstd library for use in the compression.zstd
+Python module.
+*/
+
+/* ZstdDict class definitions */
+
+/*[clinic input]
+module _zstd
+class _zstd.ZstdDict "ZstdDict *" "clinic_state()->ZstdDict_type"
+[clinic start generated code]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=a5d1254c497e52ba]*/
+
+#ifndef Py_BUILD_CORE_BUILTIN
+# define Py_BUILD_CORE_MODULE 1
+#endif
+
+#include "_zstdmodule.h"
+
+#include // offsetof()
+
+#define ZstdDict_CAST(op) ((ZstdDict *)op)
+
+static PyObject *
+_zstd_ZstdDict_new(PyTypeObject *type, PyObject *Py_UNUSED(args), PyObject *Py_UNUSED(kwargs))
+{
+ ZstdDict *self;
+ self = PyObject_GC_New(ZstdDict, type);
+ if (self == NULL) {
+ goto error;
+ }
+
+ self->dict_content = NULL;
+ self->inited = 0;
+ self->d_dict = NULL;
+
+ /* ZSTD_CDict dict */
+ self->c_dicts = PyDict_New();
+ if (self->c_dicts == NULL) {
+ goto error;
+ }
+
+ return (PyObject*)self;
+
+error:
+ if (self != NULL) {
+ PyObject_GC_Del(self);
+ }
+ return NULL;
+}
+
+static void
+ZstdDict_dealloc(PyObject *ob)
+{
+ ZstdDict *self = ZstdDict_CAST(ob);
+
+ PyObject_GC_UnTrack(self);
+
+ /* Free ZSTD_DDict instance */
+ ZSTD_freeDDict(self->d_dict);
+
+ /* Release dict_content after Free ZSTD_CDict/ZSTD_DDict instances */
+ Py_CLEAR(self->dict_content);
+ Py_CLEAR(self->c_dicts);
+
+ PyTypeObject *tp = Py_TYPE(self);
+ PyObject_GC_Del(ob);
+ Py_DECREF(tp);
+}
+
+/*[clinic input]
+_zstd.ZstdDict.__init__
+
+ dict_content: object
+ A bytes-like object, dictionary's content.
+ is_raw: bool = False
+ This parameter is for advanced user. True means dict_content
+ argument is a "raw content" dictionary, free of any format
+ restriction. False means dict_content argument is an ordinary
+ zstd dictionary, was created by zstd functions, follow a
+ specified format.
+
+Represents a zstd dictionary, which can be used for compression/decompression.
+
+It's thread-safe, and can be shared by multiple ZstdCompressor /
+ZstdDecompressor objects.
+[clinic start generated code]*/
+
+static int
+_zstd_ZstdDict___init___impl(ZstdDict *self, PyObject *dict_content,
+ int is_raw)
+/*[clinic end generated code: output=c5f5a0d8377d037c input=e6750f62a513b3ee]*/
+{
+ /* Only called once */
+ if (self->inited) {
+ PyErr_SetString(PyExc_RuntimeError, init_twice_msg);
+ return -1;
+ }
+ self->inited = 1;
+
+ /* Check dict_content's type */
+ self->dict_content = PyBytes_FromObject(dict_content);
+ if (self->dict_content == NULL) {
+ PyErr_SetString(PyExc_TypeError,
+ "dict_content argument should be bytes-like object.");
+ return -1;
+ }
+
+ /* Both ordinary dictionary and "raw content" dictionary should
+ at least 8 bytes */
+ if (Py_SIZE(self->dict_content) < 8) {
+ PyErr_SetString(PyExc_ValueError,
+ "Zstd dictionary content should at least 8 bytes.");
+ return -1;
+ }
+
+ /* Get dict_id, 0 means "raw content" dictionary. */
+ self->dict_id = ZSTD_getDictID_fromDict(PyBytes_AS_STRING(self->dict_content),
+ Py_SIZE(self->dict_content));
+
+ /* Check validity for ordinary dictionary */
+ if (!is_raw && self->dict_id == 0) {
+ char *msg = "The dict_content argument is not a valid zstd "
+ "dictionary. The first 4 bytes of a valid zstd dictionary "
+ "should be a magic number: b'\\x37\\xA4\\x30\\xEC'.\n"
+ "If you are an advanced user, and can be sure that "
+ "dict_content argument is a \"raw content\" zstd "
+ "dictionary, set is_raw parameter to True.";
+ PyErr_SetString(PyExc_ValueError, msg);
+ return -1;
+ }
+
+ // Can only track self once self->dict_content is included
+ PyObject_GC_Track(self);
+ return 0;
+}
+
+#define clinic_state() (get_zstd_state(type))
+#include "clinic/zdict.c.h"
+#undef clinic_state
+
+PyDoc_STRVAR(ZstdDict_dictid_doc,
+"ID of zstd dictionary, a 32-bit unsigned int value.\n\n"
+"Non-zero means ordinary dictionary, was created by zstd functions, follow\n"
+"a specified format.\n\n"
+"0 means a \"raw content\" dictionary, free of any format restriction, used\n"
+"for advanced user.");
+
+PyDoc_STRVAR(ZstdDict_dictcontent_doc,
+"The content of zstd dictionary, a bytes object, it's the same as dict_content\n"
+"argument in ZstdDict.__init__() method. It can be used with other programs.");
+
+static PyObject *
+ZstdDict_str(PyObject *ob)
+{
+ ZstdDict *dict = ZstdDict_CAST(ob);
+ return PyUnicode_FromFormat("",
+ dict->dict_id, Py_SIZE(dict->dict_content));
+}
+
+static PyMemberDef ZstdDict_members[] = {
+ {"dict_id", Py_T_UINT, offsetof(ZstdDict, dict_id), Py_READONLY, ZstdDict_dictid_doc},
+ {"dict_content", Py_T_OBJECT_EX, offsetof(ZstdDict, dict_content), Py_READONLY, ZstdDict_dictcontent_doc},
+ {0}
+};
+
+/*[clinic input]
+@critical_section
+@getter
+_zstd.ZstdDict.as_digested_dict
+
+Load as a digested dictionary to compressor.
+
+Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_digested_dict)
+1. Some advanced compression parameters of compressor may be overridden
+ by parameters of digested dictionary.
+2. ZstdDict has a digested dictionaries cache for each compression level.
+ It's faster when loading again a digested dictionary with the same
+ compression level.
+3. No need to use this for decompression.
+[clinic start generated code]*/
+
+static PyObject *
+_zstd_ZstdDict_as_digested_dict_get_impl(ZstdDict *self)
+/*[clinic end generated code: output=09b086e7a7320dbb input=585448c79f31f74a]*/
+{
+ return Py_BuildValue("Oi", self, DICT_TYPE_DIGESTED);
+}
+
+/*[clinic input]
+@critical_section
+@getter
+_zstd.ZstdDict.as_undigested_dict
+
+Load as an undigested dictionary to compressor.
+
+Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_undigested_dict)
+1. The advanced compression parameters of compressor will not be overridden.
+2. Loading an undigested dictionary is costly. If load an undigested dictionary
+ multiple times, consider reusing a compressor object.
+3. No need to use this for decompression.
+[clinic start generated code]*/
+
+static PyObject *
+_zstd_ZstdDict_as_undigested_dict_get_impl(ZstdDict *self)
+/*[clinic end generated code: output=43c7a989e6d4253a input=022b0829ffb1c220]*/
+{
+ return Py_BuildValue("Oi", self, DICT_TYPE_UNDIGESTED);
+}
+
+/*[clinic input]
+@critical_section
+@getter
+_zstd.ZstdDict.as_prefix
+
+Load as a prefix to compressor/decompressor.
+
+Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_prefix)
+1. Prefix is compatible with long distance matching, while dictionary is not.
+2. It only works for the first frame, then the compressor/decompressor will
+ return to no prefix state.
+3. When decompressing, must use the same prefix as when compressing."
+[clinic start generated code]*/
+
+static PyObject *
+_zstd_ZstdDict_as_prefix_get_impl(ZstdDict *self)
+/*[clinic end generated code: output=6f7130c356595a16 input=09fb82a6a5407e87]*/
+{
+ return Py_BuildValue("Oi", self, DICT_TYPE_PREFIX);
+}
+
+static PyGetSetDef ZstdDict_getset[] = {
+ _ZSTD_ZSTDDICT_AS_DIGESTED_DICT_GETSETDEF
+
+ _ZSTD_ZSTDDICT_AS_UNDIGESTED_DICT_GETSETDEF
+
+ _ZSTD_ZSTDDICT_AS_PREFIX_GETSETDEF
+
+ {0}
+};
+
+static Py_ssize_t
+ZstdDict_length(PyObject *ob)
+{
+ ZstdDict *self = ZstdDict_CAST(ob);
+ assert(PyBytes_Check(self->dict_content));
+ return Py_SIZE(self->dict_content);
+}
+
+static int
+ZstdDict_traverse(PyObject *ob, visitproc visit, void *arg)
+{
+ ZstdDict *self = ZstdDict_CAST(ob);
+ Py_VISIT(self->c_dicts);
+ Py_VISIT(self->dict_content);
+ return 0;
+}
+
+static int
+ZstdDict_clear(PyObject *ob)
+{
+ ZstdDict *self = ZstdDict_CAST(ob);
+ Py_CLEAR(self->dict_content);
+ return 0;
+}
+
+static PyType_Slot zstddict_slots[] = {
+ {Py_tp_members, ZstdDict_members},
+ {Py_tp_getset, ZstdDict_getset},
+ {Py_tp_new, _zstd_ZstdDict_new},
+ {Py_tp_dealloc, ZstdDict_dealloc},
+ {Py_tp_init, _zstd_ZstdDict___init__},
+ {Py_tp_str, ZstdDict_str},
+ {Py_tp_doc, (char*)_zstd_ZstdDict___init____doc__},
+ {Py_sq_length, ZstdDict_length},
+ {Py_tp_traverse, ZstdDict_traverse},
+ {Py_tp_clear, ZstdDict_clear},
+ {0}
+};
+
+PyType_Spec zstddict_type_spec = {
+ .name = "_zstd.ZstdDict",
+ .basicsize = sizeof(ZstdDict),
+ .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
+ .slots = zstddict_slots,
+};
diff --git a/Python/stdlib_module_names.h b/Python/stdlib_module_names.h
index fcef7419bd397b..92cc5afedc8429 100644
--- a/Python/stdlib_module_names.h
+++ b/Python/stdlib_module_names.h
@@ -103,6 +103,7 @@ static const char* _Py_stdlib_module_names[] = {
"_winapi",
"_wmi",
"_zoneinfo",
+"_zstd",
"abc",
"annotationlib",
"antigravity",
diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv
index a33619b1b345e2..2be3e1a420b91d 100644
--- a/Tools/c-analyzer/cpython/ignored.tsv
+++ b/Tools/c-analyzer/cpython/ignored.tsv
@@ -748,6 +748,7 @@ Modules/expat/xmlrole.c - error -
## other
Modules/_io/_iomodule.c - _PyIO_Module -
Modules/_sqlite/module.c - _sqlite3module -
+Modules/_zstd/_zstdmodule.c - _zstdmodule -
Modules/clinic/md5module.c.h _md5_md5 _keywords -
Modules/clinic/grpmodule.c.h grp_getgrgid _keywords -
Modules/clinic/grpmodule.c.h grp_getgrnam _keywords -
diff --git a/configure b/configure
index 3b74554d5a2e64..397f867df34dc2 100755
--- a/configure
+++ b/configure
@@ -678,6 +678,8 @@ MODULE__HASHLIB_FALSE
MODULE__HASHLIB_TRUE
MODULE__SSL_FALSE
MODULE__SSL_TRUE
+MODULE__ZSTD_FALSE
+MODULE__ZSTD_TRUE
MODULE__LZMA_FALSE
MODULE__LZMA_TRUE
MODULE__BZ2_FALSE
@@ -852,6 +854,8 @@ HAVE_GETHOSTBYNAME_R_3_ARG
HAVE_GETHOSTBYNAME_R_5_ARG
HAVE_GETHOSTBYNAME_R_6_ARG
LIBOBJS
+LIBZSTD_LIBS
+LIBZSTD_CFLAGS
LIBLZMA_LIBS
LIBLZMA_CFLAGS
BZIP2_LIBS
@@ -1172,6 +1176,8 @@ BZIP2_CFLAGS
BZIP2_LIBS
LIBLZMA_CFLAGS
LIBLZMA_LIBS
+LIBZSTD_CFLAGS
+LIBZSTD_LIBS
LIBREADLINE_CFLAGS
LIBREADLINE_LIBS
LIBEDIT_CFLAGS
@@ -2011,6 +2017,10 @@ Some influential environment variables:
C compiler flags for LIBLZMA, overriding pkg-config
LIBLZMA_LIBS
linker flags for LIBLZMA, overriding pkg-config
+ LIBZSTD_CFLAGS
+ C compiler flags for LIBZSTD, overriding pkg-config
+ LIBZSTD_LIBS
+ linker flags for LIBZSTD, overriding pkg-config
LIBREADLINE_CFLAGS
C compiler flags for LIBREADLINE, overriding pkg-config
LIBREADLINE_LIBS
@@ -22429,6 +22439,383 @@ printf "%s\n" "yes" >&6; }
have_liblzma=yes
fi
+have_libzstd=no
+
+
+
+pkg_failed=no
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for libzstd" >&5
+printf %s "checking for libzstd... " >&6; }
+
+if test -n "$LIBZSTD_CFLAGS"; then
+ pkg_cv_LIBZSTD_CFLAGS="$LIBZSTD_CFLAGS"
+ elif test -n "$PKG_CONFIG"; then
+ if test -n "$PKG_CONFIG" && \
+ { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libzstd\""; } >&5
+ ($PKG_CONFIG --exists --print-errors "libzstd") 2>&5
+ ac_status=$?
+ printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then
+ pkg_cv_LIBZSTD_CFLAGS=`$PKG_CONFIG --cflags "libzstd" 2>/dev/null`
+ test "x$?" != "x0" && pkg_failed=yes
+else
+ pkg_failed=yes
+fi
+ else
+ pkg_failed=untried
+fi
+if test -n "$LIBZSTD_LIBS"; then
+ pkg_cv_LIBZSTD_LIBS="$LIBZSTD_LIBS"
+ elif test -n "$PKG_CONFIG"; then
+ if test -n "$PKG_CONFIG" && \
+ { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libzstd\""; } >&5
+ ($PKG_CONFIG --exists --print-errors "libzstd") 2>&5
+ ac_status=$?
+ printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then
+ pkg_cv_LIBZSTD_LIBS=`$PKG_CONFIG --libs "libzstd" 2>/dev/null`
+ test "x$?" != "x0" && pkg_failed=yes
+else
+ pkg_failed=yes
+fi
+ else
+ pkg_failed=untried
+fi
+
+
+
+if test $pkg_failed = yes; then
+ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+
+if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
+ _pkg_short_errors_supported=yes
+else
+ _pkg_short_errors_supported=no
+fi
+ if test $_pkg_short_errors_supported = yes; then
+ LIBZSTD_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libzstd" 2>&1`
+ else
+ LIBZSTD_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libzstd" 2>&1`
+ fi
+ # Put the nasty error message in config.log where it belongs
+ echo "$LIBZSTD_PKG_ERRORS" >&5
+
+
+ save_CFLAGS=$CFLAGS
+save_CPPFLAGS=$CPPFLAGS
+save_LDFLAGS=$LDFLAGS
+save_LIBS=$LIBS
+
+
+ CPPFLAGS="$CPPFLAGS $LIBZSTD_CFLAGS"
+ LIBS="$LIBS $LIBZSTD_LIBS"
+ for ac_header in zstd.h zdict.h
+do :
+ as_ac_Header=`printf "%s\n" "ac_cv_header_$ac_header" | sed "$as_sed_sh"`
+ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default"
+if eval test \"x\$"$as_ac_Header"\" = x"yes"
+then :
+ cat >>confdefs.h <<_ACEOF
+#define `printf "%s\n" "HAVE_$ac_header" | sed "$as_sed_cpp"` 1
+_ACEOF
+
+ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for ZDICT_finalizeDictionary in -lzstd" >&5
+printf %s "checking for ZDICT_finalizeDictionary in -lzstd... " >&6; }
+if test ${ac_cv_lib_zstd_ZDICT_finalizeDictionary+y}
+then :
+ printf %s "(cached) " >&6
+else case e in #(
+ e) ac_check_lib_save_LIBS=$LIBS
+LIBS="-lzstd $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply.
+ The 'extern "C"' is for builds by C++ compilers;
+ although this is not generally supported in C code supporting it here
+ has little cost and some practical benefit (sr 110532). */
+#ifdef __cplusplus
+extern "C"
+#endif
+char ZDICT_finalizeDictionary (void);
+int
+main (void)
+{
+return ZDICT_finalizeDictionary ();
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+ ac_cv_lib_zstd_ZDICT_finalizeDictionary=yes
+else case e in #(
+ e) ac_cv_lib_zstd_ZDICT_finalizeDictionary=no ;;
+esac
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+ conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS ;;
+esac
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_zstd_ZDICT_finalizeDictionary" >&5
+printf "%s\n" "$ac_cv_lib_zstd_ZDICT_finalizeDictionary" >&6; }
+if test "x$ac_cv_lib_zstd_ZDICT_finalizeDictionary" = xyes
+then :
+
+ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if libzstd is new enough" >&5
+printf %s "checking if libzstd is new enough... " >&6; }
+
+
+ if test "$cross_compiling" = yes
+then :
+ { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5
+printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;}
+as_fn_error $? "cannot run test program while cross compiling
+See 'config.log' for more details" "$LINENO" 5; }
+else case e in #(
+ e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include "zstd.h"
+int
+main (void)
+{
+
+ #if ZSTD_VERSION_NUMBER < 10405
+ exit(1);
+ #endif
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_run "$LINENO"
+then :
+ py__zstd_too_old=no
+else case e in #(
+ e) py__zstd_too_old=yes ;;
+esac
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
+ conftest.$ac_objext conftest.beam conftest.$ac_ext ;;
+esac
+fi
+
+ have_libzstd=yes
+
+else case e in #(
+ e) have_libzstd=no ;;
+esac
+fi
+
+
+else case e in #(
+ e) have_libzstd=no ;;
+esac
+fi
+
+done
+ if test "x$have_libzstd" = xyes
+then :
+
+ LIBZSTD_CFLAGS=${LIBZSTD_CFLAGS-""}
+ LIBZSTD_LIBS=${LIBZSTD_LIBS-"-lzstd"}
+
+fi
+
+CFLAGS=$save_CFLAGS
+CPPFLAGS=$save_CPPFLAGS
+LDFLAGS=$save_LDFLAGS
+LIBS=$save_LIBS
+
+
+
+elif test $pkg_failed = untried; then
+ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+
+ save_CFLAGS=$CFLAGS
+save_CPPFLAGS=$CPPFLAGS
+save_LDFLAGS=$LDFLAGS
+save_LIBS=$LIBS
+
+
+ CPPFLAGS="$CPPFLAGS $LIBZSTD_CFLAGS"
+ LIBS="$LIBS $LIBZSTD_LIBS"
+ for ac_header in zstd.h zdict.h
+do :
+ as_ac_Header=`printf "%s\n" "ac_cv_header_$ac_header" | sed "$as_sed_sh"`
+ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default"
+if eval test \"x\$"$as_ac_Header"\" = x"yes"
+then :
+ cat >>confdefs.h <<_ACEOF
+#define `printf "%s\n" "HAVE_$ac_header" | sed "$as_sed_cpp"` 1
+_ACEOF
+
+ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for ZDICT_finalizeDictionary in -lzstd" >&5
+printf %s "checking for ZDICT_finalizeDictionary in -lzstd... " >&6; }
+if test ${ac_cv_lib_zstd_ZDICT_finalizeDictionary+y}
+then :
+ printf %s "(cached) " >&6
+else case e in #(
+ e) ac_check_lib_save_LIBS=$LIBS
+LIBS="-lzstd $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply.
+ The 'extern "C"' is for builds by C++ compilers;
+ although this is not generally supported in C code supporting it here
+ has little cost and some practical benefit (sr 110532). */
+#ifdef __cplusplus
+extern "C"
+#endif
+char ZDICT_finalizeDictionary (void);
+int
+main (void)
+{
+return ZDICT_finalizeDictionary ();
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+ ac_cv_lib_zstd_ZDICT_finalizeDictionary=yes
+else case e in #(
+ e) ac_cv_lib_zstd_ZDICT_finalizeDictionary=no ;;
+esac
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+ conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS ;;
+esac
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_zstd_ZDICT_finalizeDictionary" >&5
+printf "%s\n" "$ac_cv_lib_zstd_ZDICT_finalizeDictionary" >&6; }
+if test "x$ac_cv_lib_zstd_ZDICT_finalizeDictionary" = xyes
+then :
+
+ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if libzstd is new enough" >&5
+printf %s "checking if libzstd is new enough... " >&6; }
+
+
+ if test "$cross_compiling" = yes
+then :
+ { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5
+printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;}
+as_fn_error $? "cannot run test program while cross compiling
+See 'config.log' for more details" "$LINENO" 5; }
+else case e in #(
+ e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include "zstd.h"
+int
+main (void)
+{
+
+ #if ZSTD_VERSION_NUMBER < 10405
+ exit(1);
+ #endif
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_run "$LINENO"
+then :
+ py__zstd_too_old=no
+else case e in #(
+ e) py__zstd_too_old=yes ;;
+esac
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
+ conftest.$ac_objext conftest.beam conftest.$ac_ext ;;
+esac
+fi
+
+ have_libzstd=yes
+
+else case e in #(
+ e) have_libzstd=no ;;
+esac
+fi
+
+
+else case e in #(
+ e) have_libzstd=no ;;
+esac
+fi
+
+done
+ if test "x$have_libzstd" = xyes
+then :
+
+ LIBZSTD_CFLAGS=${LIBZSTD_CFLAGS-""}
+ LIBZSTD_LIBS=${LIBZSTD_LIBS-"-lzstd"}
+
+fi
+
+CFLAGS=$save_CFLAGS
+CPPFLAGS=$save_CPPFLAGS
+LDFLAGS=$save_LDFLAGS
+LIBS=$save_LIBS
+
+
+
+else
+ LIBZSTD_CFLAGS=$pkg_cv_LIBZSTD_CFLAGS
+ LIBZSTD_LIBS=$pkg_cv_LIBZSTD_LIBS
+ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+printf "%s\n" "yes" >&6; }
+
+ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if libzstd is new enough" >&5
+printf %s "checking if libzstd is new enough... " >&6; }
+
+
+ if test "$cross_compiling" = yes
+then :
+ { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5
+printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;}
+as_fn_error $? "cannot run test program while cross compiling
+See 'config.log' for more details" "$LINENO" 5; }
+else case e in #(
+ e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include "zstd.h"
+int
+main (void)
+{
+
+ #if ZSTD_VERSION_NUMBER < 10405
+ exit(1);
+ #endif
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_run "$LINENO"
+then :
+ py__zstd_too_old=no
+else case e in #(
+ e) py__zstd_too_old=yes ;;
+esac
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
+ conftest.$ac_objext conftest.beam conftest.$ac_ext ;;
+esac
+fi
+
+ have_libzstd=yes
+
+fi
+
@@ -29431,6 +29818,7 @@ SRCDIRS="\
Modules/_xxtestfuzz \
Modules/cjkcodecs \
Modules/expat \
+ Modules/_zstd \
Objects \
Objects/mimalloc \
Objects/mimalloc/prim \
@@ -33007,6 +33395,46 @@ fi
printf "%s\n" "$py_cv_module__lzma" >&6; }
+ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for stdlib extension module _zstd" >&5
+printf %s "checking for stdlib extension module _zstd... " >&6; }
+ if test "$py_cv_module__zstd" != "n/a"
+then :
+
+ if test "$py__zstd_too_old" = no
+then :
+ if test "$have_libzstd" = yes
+then :
+ py_cv_module__zstd=yes
+else case e in #(
+ e) py_cv_module__zstd=missing ;;
+esac
+fi
+else case e in #(
+ e) py_cv_module__zstd=disabled ;;
+esac
+fi
+
+fi
+ as_fn_append MODULE_BLOCK "MODULE__ZSTD_STATE=$py_cv_module__zstd$as_nl"
+ if test "x$py_cv_module__zstd" = xyes
+then :
+
+ as_fn_append MODULE_BLOCK "MODULE__ZSTD_CFLAGS=$LIBZSTD_CFLAGS$as_nl"
+ as_fn_append MODULE_BLOCK "MODULE__ZSTD_LDFLAGS=$LIBZSTD_LIBS$as_nl"
+
+fi
+ if test "$py_cv_module__zstd" = yes; then
+ MODULE__ZSTD_TRUE=
+ MODULE__ZSTD_FALSE='#'
+else
+ MODULE__ZSTD_TRUE='#'
+ MODULE__ZSTD_FALSE=
+fi
+
+ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $py_cv_module__zstd" >&5
+printf "%s\n" "$py_cv_module__zstd" >&6; }
+
+
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for stdlib extension module _ssl" >&5
printf %s "checking for stdlib extension module _ssl... " >&6; }
@@ -34075,6 +34503,10 @@ if test -z "${MODULE__LZMA_TRUE}" && test -z "${MODULE__LZMA_FALSE}"; then
as_fn_error $? "conditional \"MODULE__LZMA\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
fi
+if test -z "${MODULE__ZSTD_TRUE}" && test -z "${MODULE__ZSTD_FALSE}"; then
+ as_fn_error $? "conditional \"MODULE__ZSTD\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
if test -z "${MODULE__SSL_TRUE}" && test -z "${MODULE__SSL_FALSE}"; then
as_fn_error $? "conditional \"MODULE__SSL\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
diff --git a/configure.ac b/configure.ac
index ed5c65ecbcc2be..76042522ae1e54 100644
--- a/configure.ac
+++ b/configure.ac
@@ -5415,6 +5415,40 @@ PKG_CHECK_MODULES([LIBLZMA], [liblzma], [have_liblzma=yes], [
])
])
+have_libzstd=no
+AC_DEFUN([TEST_ZSTD_VERSION],[
+ AC_MSG_CHECKING([if libzstd is new enough])
+
+ dnl TODO(emmatyping): check if we can set a C define for the minimum version
+ dnl to re-use here and in the header compile check
+
+ dnl This is checked when both a package is found and we do fallback searches
+ dnl If you update the minimum version here, you should update the compile
+ dnl check in Modules/_zstd/_zstdmodule.h
+ AC_RUN_IFELSE([AC_LANG_PROGRAM([@%:@include "zstd.h"], [
+ #if ZSTD_VERSION_NUMBER < 10405
+ exit(1);
+ #endif
+ ])], [py__zstd_too_old=no], [py__zstd_too_old=yes])
+ dnl Important to record that we have libzstd
+ have_libzstd=yes
+])
+
+PKG_CHECK_MODULES([LIBZSTD], [libzstd], [TEST_ZSTD_VERSION()], [
+ WITH_SAVE_ENV([
+ CPPFLAGS="$CPPFLAGS $LIBZSTD_CFLAGS"
+ LIBS="$LIBS $LIBZSTD_LIBS"
+ AC_CHECK_HEADERS([zstd.h zdict.h], [
+ AC_CHECK_LIB([zstd], [ZDICT_finalizeDictionary],
+ [TEST_ZSTD_VERSION()], [have_libzstd=no])
+ ], [have_libzstd=no])
+ AS_VAR_IF([have_libzstd], [yes], [
+ LIBZSTD_CFLAGS=${LIBZSTD_CFLAGS-""}
+ LIBZSTD_LIBS=${LIBZSTD_LIBS-"-lzstd"}
+ ])
+ ])
+])
+
dnl PY_CHECK_NETDB_FUNC(FUNCTION)
AC_DEFUN([PY_CHECK_NETDB_FUNC], [PY_CHECK_FUNC([$1], [@%:@include ])])
@@ -7116,6 +7150,7 @@ SRCDIRS="\
Modules/_xxtestfuzz \
Modules/cjkcodecs \
Modules/expat \
+ Modules/_zstd \
Objects \
Objects/mimalloc \
Objects/mimalloc/prim \
@@ -8062,6 +8097,8 @@ PY_STDLIB_MOD([_bz2], [], [test "$have_bzip2" = yes],
[$BZIP2_CFLAGS], [$BZIP2_LIBS])
PY_STDLIB_MOD([_lzma], [], [test "$have_liblzma" = yes],
[$LIBLZMA_CFLAGS], [$LIBLZMA_LIBS])
+PY_STDLIB_MOD([_zstd], [test "$py__zstd_too_old" = no], [test "$have_libzstd" = yes],
+ [$LIBZSTD_CFLAGS], [$LIBZSTD_LIBS])
dnl OpenSSL bindings
PY_STDLIB_MOD([_ssl], [], [test "$ac_cv_working_openssl_ssl" = yes],
diff --git a/pyconfig.h.in b/pyconfig.h.in
index 6c17685e22a078..8c2c6ab5cea4cd 100644
--- a/pyconfig.h.in
+++ b/pyconfig.h.in
@@ -1630,12 +1630,18 @@
/* Define to 1 if you have the 'writev' function. */
#undef HAVE_WRITEV
+/* Define to 1 if you have the header file. */
+#undef HAVE_ZDICT_H
+
/* Define if the zlib library has inflateCopy */
#undef HAVE_ZLIB_COPY
/* Define to 1 if you have the header file. */
#undef HAVE_ZLIB_H
+/* Define to 1 if you have the header file. */
+#undef HAVE_ZSTD_H
+
/* Define to 1 if you have the '_getpty' function. */
#undef HAVE__GETPTY