-
-
Notifications
You must be signed in to change notification settings - Fork 32.1k
gh-76961: Fix the PEP3118 format string for ctypes.Structure #5561
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
ac4e5bb
e6021e1
97e9d95
6bf5a25
3b7f14f
27b1601
e300e16
7e8b4f9
4de7a8a
26311ae
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
Inter-field padding is now inserted into the PEP3118 format strings obtained | ||
from ``ctypes.Structure`` objects, reflecting their true representation in | ||
memory. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -340,6 +340,48 @@ MakeAnonFields(PyObject *type) | |
return 0; | ||
} | ||
|
||
/* | ||
Compute ceil(log10(x)), for the purpose of determining string lengths. | ||
*/ | ||
static Py_ssize_t | ||
clog10(Py_ssize_t n) | ||
{ | ||
Py_ssize_t log_n = 0; | ||
while (n > 0) { | ||
log_n++; | ||
n /= 10; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Multiplication is often faster than division. Can this be rewritten by computing powers of 10 until n is exceeded? Better yet, just inline linear search. if (n < 10ULL)
return 1;
if (n < 100ULL)
return 2;
...
if (n < 10_000_000_000_000_000_000ULL)
return 20; There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On the second thought, I would be surprised if this has not been implemented elsewhere in cpython code base. Off the top of my head, I cannot recall where it could be, but I will try to search. If someone beats me to it - please leave a note. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This is risky because the power can overflow There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
It took me a little longer than I expected to get back to this, but the code that I was looking for is in Python/dtoa.c. @mdickinson - Is the approximation used in dtoa applicable to the problem at hand? If so, do you think that code can be factored out and called here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @abalkin Looks like the relevant code is no longer part of the PR. But the |
||
} | ||
return log_n; | ||
} | ||
|
||
/* | ||
Append {padding}x to the PEP3118 format string. | ||
eric-wieser marked this conversation as resolved.
Show resolved
Hide resolved
|
||
*/ | ||
char * | ||
_ctypes_alloc_format_padding(const char *prefix, Py_ssize_t padding) | ||
{ | ||
char *result; | ||
char *buf; | ||
|
||
assert(padding > 0); | ||
|
||
if (padding == 1) { | ||
/* Use x instead of 1x, for brevity */ | ||
return _ctypes_alloc_format_string(prefix, "x"); | ||
} | ||
|
||
/* decimal characters + x + null */ | ||
buf = PyMem_Malloc(clog10(padding) + 2); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I left a comment about log10 implementation above, but looking at the actual use, I don't see why it is needed. Can't we just make buf char buf[20]; and not allocate it on the heap? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wanted to avoid risking introducing a buffer overflow by accident by choosing too short a buffer; especially since I don't think we care about performance here. The whole framework for building the format strings here consists of repeated heap allocations, so one more allocation doesn't seem like a big deal. I think 20 isn't actually enough, as an int64 can need up to 19 digits, and then we need the
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've pushed the version with stack allocation as requested |
||
if (buf == NULL) { | ||
PyErr_NoMemory(); | ||
return NULL; | ||
} | ||
sprintf(buf, "%zdx", padding); | ||
eric-wieser marked this conversation as resolved.
Show resolved
Hide resolved
|
||
result = _ctypes_alloc_format_string(prefix, buf); | ||
PyMem_Free(buf); | ||
return result; | ||
} | ||
|
||
/* | ||
Retrieve the (optional) _pack_ attribute from a type, the _fields_ attribute, | ||
and create an StgDictObject. Used for Structure and Union subclasses. | ||
|
@@ -352,11 +394,10 @@ PyCStructUnionType_update_stgdict(PyObject *type, PyObject *fields, int isStruct | |
_Py_IDENTIFIER(_pack_); | ||
StgDictObject *stgdict, *basedict; | ||
Py_ssize_t len, offset, size, align, i; | ||
Py_ssize_t union_size, total_align; | ||
Py_ssize_t union_size, total_align, aligned_size; | ||
Py_ssize_t field_size = 0; | ||
int bitofs; | ||
PyObject *tmp; | ||
int isPacked; | ||
int pack; | ||
Py_ssize_t ffi_ofs; | ||
int big_endian; | ||
|
@@ -401,7 +442,6 @@ PyCStructUnionType_update_stgdict(PyObject *type, PyObject *fields, int isStruct | |
return -1; | ||
} | ||
if (tmp) { | ||
isPacked = 1; | ||
pack = _PyLong_AsInt(tmp); | ||
Py_DECREF(tmp); | ||
if (pack < 0) { | ||
|
@@ -416,7 +456,7 @@ PyCStructUnionType_update_stgdict(PyObject *type, PyObject *fields, int isStruct | |
} | ||
} | ||
else { | ||
isPacked = 0; | ||
/* Setting `_pack_ = 0` amounts to using the default alignment */ | ||
pack = 0; | ||
} | ||
|
||
|
@@ -494,12 +534,10 @@ PyCStructUnionType_update_stgdict(PyObject *type, PyObject *fields, int isStruct | |
} | ||
|
||
assert(stgdict->format == NULL); | ||
if (isStruct && !isPacked) { | ||
if (isStruct) { | ||
stgdict->format = _ctypes_alloc_format_string(NULL, "T{"); | ||
} else { | ||
/* PEP3118 doesn't support union, or packed structures (well, | ||
only standard packing, but we don't support the pep for | ||
that). Use 'B' for bytes. */ | ||
/* PEP3118 doesn't support union. Use 'B' for bytes. */ | ||
stgdict->format = _ctypes_alloc_format_string(NULL, "B"); | ||
} | ||
if (stgdict->format == NULL) | ||
|
@@ -567,24 +605,53 @@ PyCStructUnionType_update_stgdict(PyObject *type, PyObject *fields, int isStruct | |
} else | ||
bitsize = 0; | ||
|
||
if (isStruct && !isPacked) { | ||
if (isStruct) { | ||
const char *fieldfmt = dict->format ? dict->format : "B"; | ||
const char *fieldname = PyUnicode_AsUTF8(name); | ||
char *ptr; | ||
Py_ssize_t len; | ||
char *buf; | ||
Py_ssize_t last_size = size; | ||
Py_ssize_t padding; | ||
|
||
if (fieldname == NULL) | ||
{ | ||
Py_DECREF(pair); | ||
return -1; | ||
} | ||
|
||
/* construct the field now, as `prop->offset` is `offset` with | ||
corrected alignment */ | ||
prop = PyCField_FromDesc(desc, i, | ||
&field_size, bitsize, &bitofs, | ||
&size, &offset, &align, | ||
pack, big_endian); | ||
if (prop == NULL) { | ||
Py_DECREF(pair); | ||
return -1; | ||
} | ||
|
||
/* number of bytes between the end of the last field and the start | ||
of this one */ | ||
padding = ((CFieldObject *)prop)->offset - last_size; | ||
|
||
if (padding > 0) { | ||
ptr = stgdict->format; | ||
stgdict->format = _ctypes_alloc_format_padding(ptr, padding); | ||
PyMem_Free(ptr); | ||
if (stgdict->format == NULL) { | ||
Py_DECREF(pair); | ||
Py_DECREF(prop); | ||
return -1; | ||
} | ||
} | ||
|
||
len = strlen(fieldname) + strlen(fieldfmt); | ||
|
||
buf = PyMem_Malloc(len + 2 + 1); | ||
if (buf == NULL) { | ||
Py_DECREF(pair); | ||
Py_DECREF(prop); | ||
PyErr_NoMemory(); | ||
return -1; | ||
} | ||
|
@@ -602,15 +669,9 @@ PyCStructUnionType_update_stgdict(PyObject *type, PyObject *fields, int isStruct | |
|
||
if (stgdict->format == NULL) { | ||
Py_DECREF(pair); | ||
Py_DECREF(prop); | ||
return -1; | ||
} | ||
} | ||
|
||
if (isStruct) { | ||
prop = PyCField_FromDesc(desc, i, | ||
&field_size, bitsize, &bitofs, | ||
&size, &offset, &align, | ||
pack, big_endian); | ||
} else /* union */ { | ||
size = 0; | ||
offset = 0; | ||
|
@@ -619,14 +680,14 @@ PyCStructUnionType_update_stgdict(PyObject *type, PyObject *fields, int isStruct | |
&field_size, bitsize, &bitofs, | ||
&size, &offset, &align, | ||
pack, big_endian); | ||
if (prop == NULL) { | ||
Py_DECREF(pair); | ||
return -1; | ||
} | ||
union_size = max(size, union_size); | ||
} | ||
total_align = max(align, total_align); | ||
|
||
if (!prop) { | ||
Py_DECREF(pair); | ||
return -1; | ||
} | ||
if (-1 == PyObject_SetAttr(type, name, prop)) { | ||
Py_DECREF(prop); | ||
Py_DECREF(pair); | ||
|
@@ -636,26 +697,41 @@ PyCStructUnionType_update_stgdict(PyObject *type, PyObject *fields, int isStruct | |
Py_DECREF(prop); | ||
} | ||
|
||
if (isStruct && !isPacked) { | ||
char *ptr = stgdict->format; | ||
if (!isStruct) { | ||
size = union_size; | ||
} | ||
|
||
/* Adjust the size according to the alignment requirements */ | ||
aligned_size = ((size + total_align - 1) / total_align) * total_align; | ||
|
||
if (isStruct) { | ||
char *ptr; | ||
Py_ssize_t padding; | ||
|
||
/* Pad up to the full size of the struct */ | ||
padding = aligned_size - size; | ||
if (padding > 0) { | ||
ptr = stgdict->format; | ||
stgdict->format = _ctypes_alloc_format_padding(ptr, padding); | ||
PyMem_Free(ptr); | ||
if (stgdict->format == NULL) { | ||
return -1; | ||
} | ||
} | ||
|
||
ptr = stgdict->format; | ||
stgdict->format = _ctypes_alloc_format_string(stgdict->format, "}"); | ||
PyMem_Free(ptr); | ||
if (stgdict->format == NULL) | ||
return -1; | ||
} | ||
|
||
if (!isStruct) | ||
size = union_size; | ||
|
||
/* Adjust the size according to the alignment requirements */ | ||
size = ((size + total_align - 1) / total_align) * total_align; | ||
|
||
stgdict->ffi_type_pointer.alignment = Py_SAFE_DOWNCAST(total_align, | ||
Py_ssize_t, | ||
unsigned short); | ||
stgdict->ffi_type_pointer.size = size; | ||
stgdict->ffi_type_pointer.size = aligned_size; | ||
|
||
stgdict->size = size; | ||
stgdict->size = aligned_size; | ||
stgdict->align = total_align; | ||
stgdict->length = len; /* ADD ffi_ofs? */ | ||
|
||
|
Uh oh!
There was an error while loading. Please reload this page.