Skip to content

Commit e81d2ea

Browse files
committed
Optimise string routines for architectures with non-natural alignment
C only requires that sizeof(x) is a multiple of alignof(x), not that the two are equal. Thus anywhere where we optimise based on alignment we should be using alignof(x) not sizeof(x). This is more annoying than it would be in C11 where we could just use _Alignof(x) (and alignof(x) in C++11), but since we still require only C99 we must plumb the information all the way from autoconf through the various typedefs and defines.
1 parent f617ce7 commit e81d2ea

File tree

8 files changed

+52
-19
lines changed

8 files changed

+52
-19
lines changed

Objects/bytes_methods.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -115,15 +115,14 @@ _Py_bytes_isascii(const char *cptr, Py_ssize_t len)
115115
{
116116
const char *p = cptr;
117117
const char *end = p + len;
118-
const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
119118

120119
while (p < end) {
121120
/* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
122121
for an explanation. */
123-
if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
122+
if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) {
124123
/* Help allocation */
125124
const char *_p = p;
126-
while (_p < aligned_end) {
125+
while (_p + SIZEOF_SIZE_T <= end) {
127126
size_t value = *(const size_t *) _p;
128127
if (value & ASCII_CHAR_MASK) {
129128
Py_RETURN_FALSE;

Objects/stringlib/codecs.h

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
2626
{
2727
Py_UCS4 ch;
2828
const char *s = *inptr;
29-
const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
3029
STRINGLIB_CHAR *p = dest + *outpos;
3130

3231
while (s < end) {
@@ -40,11 +39,11 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
4039
First, check if we can do an aligned read, as most CPUs have
4140
a penalty for unaligned reads.
4241
*/
43-
if (_Py_IS_ALIGNED(s, SIZEOF_SIZE_T)) {
42+
if (_Py_IS_ALIGNED(s, ALIGNOF_SIZE_T)) {
4443
/* Help register allocation */
4544
const char *_s = s;
4645
STRINGLIB_CHAR *_p = p;
47-
while (_s < aligned_end) {
46+
while (_s + SIZEOF_SIZE_T <= end) {
4847
/* Read a whole size_t at a time (either 4 or 8 bytes),
4948
and do a fast unrolled copy if it only contains ASCII
5049
characters. */
@@ -496,8 +495,6 @@ STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e,
496495
int native_ordering)
497496
{
498497
Py_UCS4 ch;
499-
const unsigned char *aligned_end =
500-
(const unsigned char *) _Py_ALIGN_DOWN(e, SIZEOF_LONG);
501498
const unsigned char *q = *inptr;
502499
STRINGLIB_CHAR *p = dest + *outpos;
503500
/* Offsets from q for retrieving byte pairs in the right order. */
@@ -512,10 +509,10 @@ STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e,
512509
Py_UCS4 ch2;
513510
/* First check for possible aligned read of a C 'long'. Unaligned
514511
reads are more expensive, better to defer to another iteration. */
515-
if (_Py_IS_ALIGNED(q, SIZEOF_LONG)) {
512+
if (_Py_IS_ALIGNED(q, ALIGNOF_LONG)) {
516513
/* Fast path for runs of in-range non-surrogate chars. */
517514
const unsigned char *_q = q;
518-
while (_q < aligned_end) {
515+
while (_q + SIZEOF_LONG <= e) {
519516
unsigned long block = * (const unsigned long *) _q;
520517
if (native_ordering) {
521518
/* Can use buffer directly */

Objects/stringlib/find_max_char.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,12 @@ Py_LOCAL_INLINE(Py_UCS4)
2020
STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end)
2121
{
2222
const unsigned char *p = (const unsigned char *) begin;
23-
const unsigned char *aligned_end =
24-
(const unsigned char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
2523

2624
while (p < end) {
27-
if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
25+
if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) {
2826
/* Help register allocation */
2927
const unsigned char *_p = p;
30-
while (_p < aligned_end) {
28+
while (_p + SIZEOF_SIZE_T <= end) {
3129
size_t value = *(const size_t *) _p;
3230
if (value & UCS1_ASCII_CHAR_MASK)
3331
return 255;

Objects/unicodeobject.c

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5069,17 +5069,16 @@ static Py_ssize_t
50695069
ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
50705070
{
50715071
const char *p = start;
5072-
const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
50735072

50745073
#if SIZEOF_SIZE_T <= SIZEOF_VOID_P
50755074
assert(_Py_IS_ALIGNED(dest, ALIGNOF_SIZE_T));
5076-
if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
5075+
if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) {
50775076
/* Fast path, see in STRINGLIB(utf8_decode) for
50785077
an explanation. */
50795078
/* Help allocation */
50805079
const char *_p = p;
50815080
Py_UCS1 * q = dest;
5082-
while (_p < aligned_end) {
5081+
while (_p + SIZEOF_SIZE_T <= end) {
50835082
size_t value = *(const size_t *) _p;
50845083
if (value & ASCII_CHAR_MASK)
50855084
break;
@@ -5099,10 +5098,10 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
50995098
while (p < end) {
51005099
/* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
51015100
for an explanation. */
5102-
if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
5101+
if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) {
51035102
/* Help allocation */
51045103
const char *_p = p;
5105-
while (_p < aligned_end) {
5104+
while (_p + SIZEOF_SIZE_T <= end) {
51065105
size_t value = *(const size_t *) _p;
51075106
if (value & ASCII_CHAR_MASK)
51085107
break;

PC/pyconfig.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,7 @@ Py_NO_ENABLE_SHARED to find out. Also support MS_NO_COREDLL for b/w compat */
323323
#define SIZEOF_SHORT 2
324324
#define SIZEOF_INT 4
325325
#define SIZEOF_LONG 4
326+
#define ALIGNOF_LONG 4
326327
#define SIZEOF_LONG_LONG 8
327328
#define SIZEOF_DOUBLE 8
328329
#define SIZEOF_FLOAT 4

configure

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8705,6 +8705,41 @@ cat >>confdefs.h <<_ACEOF
87058705
_ACEOF
87068706

87078707

8708+
# The cast to long int works around a bug in the HP C Compiler,
8709+
# see AC_CHECK_SIZEOF for more information.
8710+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking alignment of long" >&5
8711+
$as_echo_n "checking alignment of long... " >&6; }
8712+
if ${ac_cv_alignof_long+:} false; then :
8713+
$as_echo_n "(cached) " >&6
8714+
else
8715+
if ac_fn_c_compute_int "$LINENO" "(long int) offsetof (ac__type_alignof_, y)" "ac_cv_alignof_long" "$ac_includes_default
8716+
#ifndef offsetof
8717+
# define offsetof(type, member) ((char *) &((type *) 0)->member - (char *) 0)
8718+
#endif
8719+
typedef struct { char x; long y; } ac__type_alignof_;"; then :
8720+
8721+
else
8722+
if test "$ac_cv_type_long" = yes; then
8723+
{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
8724+
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
8725+
as_fn_error 77 "cannot compute alignment of long
8726+
See \`config.log' for more details" "$LINENO" 5; }
8727+
else
8728+
ac_cv_alignof_long=0
8729+
fi
8730+
fi
8731+
8732+
fi
8733+
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_alignof_long" >&5
8734+
$as_echo "$ac_cv_alignof_long" >&6; }
8735+
8736+
8737+
8738+
cat >>confdefs.h <<_ACEOF
8739+
#define ALIGNOF_LONG $ac_cv_alignof_long
8740+
_ACEOF
8741+
8742+
87088743
# The cast to long int works around a bug in the HP C Compiler
87098744
# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
87108745
# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.

configure.ac

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2351,6 +2351,7 @@ AC_CHECK_TYPE(__uint128_t,
23512351
# ANSI C requires sizeof(char) == 1, so no need to check it
23522352
AC_CHECK_SIZEOF(int, 4)
23532353
AC_CHECK_SIZEOF(long, 4)
2354+
AC_CHECK_ALIGNOF(long)
23542355
AC_CHECK_SIZEOF(long long, 8)
23552356
AC_CHECK_SIZEOF(void *, 4)
23562357
AC_CHECK_SIZEOF(short, 2)

pyconfig.h.in

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
support for AIX C++ shared extension modules. */
1717
#undef AIX_GENUINE_CPLUSPLUS
1818

19+
/* The normal alignment of `long', in bytes. */
20+
#undef ALIGNOF_LONG
21+
1922
/* The normal alignment of `size_t', in bytes. */
2023
#undef ALIGNOF_SIZE_T
2124

0 commit comments

Comments
 (0)