Skip to content

bpo-43179: Generalise alignment for optimised string routines #24624

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 31, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions Objects/bytes_methods.c
Original file line number Diff line number Diff line change
Expand Up @@ -115,15 +115,14 @@ _Py_bytes_isascii(const char *cptr, Py_ssize_t len)
{
const char *p = cptr;
const char *end = p + len;
const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);

while (p < end) {
/* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
for an explanation. */
if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) {
/* Help allocation */
const char *_p = p;
while (_p < aligned_end) {
while (_p + SIZEOF_SIZE_T <= end) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for your contribution. It looks is good change.

But I have questions about one particular change. Are you sure that there is no overflow here? Are you sure that all compilers optimize this code so that the loop does not contain superfluous addition?

Copy link
Contributor Author

@jrtc27 jrtc27 Feb 24, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can rewrite those all as end - _p >= SIZEOF_SIZE_T if you'd rather, as yes technically this can generate a pointer slightly out of bounds (and in theory overflow if you have a pointer to the last page of the address space, though in practice I hope that page is never given out as most software is unlikely to work if it's used...), though this is a common enough pattern for software that I think compilers end up having to support it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But also, it's 2021, not the late 90s, any compiler worth your time is already rewriting this entire thing behind your back (e.g. the "help allocation" is completely meaningless for any compiler you'll have encountered in the past 10+ years, that's such a basic optimisation for it to do).

size_t value = *(const size_t *) _p;
if (value & ASCII_CHAR_MASK) {
Py_RETURN_FALSE;
Expand Down
11 changes: 4 additions & 7 deletions Objects/stringlib/codecs.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
{
Py_UCS4 ch;
const char *s = *inptr;
const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
STRINGLIB_CHAR *p = dest + *outpos;

while (s < end) {
Expand All @@ -40,11 +39,11 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
First, check if we can do an aligned read, as most CPUs have
a penalty for unaligned reads.
*/
if (_Py_IS_ALIGNED(s, SIZEOF_SIZE_T)) {
if (_Py_IS_ALIGNED(s, ALIGNOF_SIZE_T)) {
/* Help register allocation */
const char *_s = s;
STRINGLIB_CHAR *_p = p;
while (_s < aligned_end) {
while (_s + SIZEOF_SIZE_T <= end) {
/* Read a whole size_t at a time (either 4 or 8 bytes),
and do a fast unrolled copy if it only contains ASCII
characters. */
Expand Down Expand Up @@ -496,8 +495,6 @@ STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e,
int native_ordering)
{
Py_UCS4 ch;
const unsigned char *aligned_end =
(const unsigned char *) _Py_ALIGN_DOWN(e, SIZEOF_LONG);
const unsigned char *q = *inptr;
STRINGLIB_CHAR *p = dest + *outpos;
/* Offsets from q for retrieving byte pairs in the right order. */
Expand All @@ -512,10 +509,10 @@ STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e,
Py_UCS4 ch2;
/* First check for possible aligned read of a C 'long'. Unaligned
reads are more expensive, better to defer to another iteration. */
if (_Py_IS_ALIGNED(q, SIZEOF_LONG)) {
if (_Py_IS_ALIGNED(q, ALIGNOF_LONG)) {
/* Fast path for runs of in-range non-surrogate chars. */
const unsigned char *_q = q;
while (_q < aligned_end) {
while (_q + SIZEOF_LONG <= e) {
unsigned long block = * (const unsigned long *) _q;
if (native_ordering) {
/* Can use buffer directly */
Expand Down
6 changes: 2 additions & 4 deletions Objects/stringlib/find_max_char.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,12 @@ Py_LOCAL_INLINE(Py_UCS4)
STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end)
{
const unsigned char *p = (const unsigned char *) begin;
const unsigned char *aligned_end =
(const unsigned char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);

while (p < end) {
if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) {
/* Help register allocation */
const unsigned char *_p = p;
while (_p < aligned_end) {
while (_p + SIZEOF_SIZE_T <= end) {
size_t value = *(const size_t *) _p;
if (value & UCS1_ASCII_CHAR_MASK)
return 255;
Expand Down
22 changes: 6 additions & 16 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -5069,25 +5069,16 @@ static Py_ssize_t
ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
{
const char *p = start;
const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);

/*
* Issue #17237: m68k is a bit different from most architectures in
* that objects do not use "natural alignment" - for example, int and
* long are only aligned at 2-byte boundaries. Therefore the assert()
* won't work; also, tests have shown that skipping the "optimised
* version" will even speed up m68k.
*/
#if !defined(__m68k__)

#if SIZEOF_SIZE_T <= SIZEOF_VOID_P
assert(_Py_IS_ALIGNED(dest, SIZEOF_SIZE_T));
if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
assert(_Py_IS_ALIGNED(dest, ALIGNOF_SIZE_T));
if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) {
/* Fast path, see in STRINGLIB(utf8_decode) for
an explanation. */
/* Help allocation */
const char *_p = p;
Py_UCS1 * q = dest;
while (_p < aligned_end) {
while (_p + SIZEOF_SIZE_T <= end) {
size_t value = *(const size_t *) _p;
if (value & ASCII_CHAR_MASK)
break;
Expand All @@ -5103,15 +5094,14 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
}
return p - start;
}
#endif
#endif
while (p < end) {
/* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
for an explanation. */
if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) {
/* Help allocation */
const char *_p = p;
while (_p < aligned_end) {
while (_p + SIZEOF_SIZE_T <= end) {
size_t value = *(const size_t *) _p;
if (value & ASCII_CHAR_MASK)
break;
Expand Down
3 changes: 3 additions & 0 deletions PC/pyconfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,7 @@ Py_NO_ENABLE_SHARED to find out. Also support MS_NO_COREDLL for b/w compat */
# define SIZEOF_FPOS_T 8
# define SIZEOF_HKEY 8
# define SIZEOF_SIZE_T 8
# define ALIGNOF_SIZE_T 8
/* configure.ac defines HAVE_LARGEFILE_SUPPORT iff
sizeof(off_t) > sizeof(long), and sizeof(long long) >= sizeof(off_t).
On Win64 the second condition is not true, but if fpos_t replaces off_t
Expand All @@ -303,6 +304,7 @@ Py_NO_ENABLE_SHARED to find out. Also support MS_NO_COREDLL for b/w compat */
# define SIZEOF_FPOS_T 8
# define SIZEOF_HKEY 4
# define SIZEOF_SIZE_T 4
# define ALIGNOF_SIZE_T 4
/* MS VS2005 changes time_t to a 64-bit type on all platforms */
# if defined(_MSC_VER) && _MSC_VER >= 1400
# define SIZEOF_TIME_T 8
Expand All @@ -321,6 +323,7 @@ Py_NO_ENABLE_SHARED to find out. Also support MS_NO_COREDLL for b/w compat */
#define SIZEOF_SHORT 2
#define SIZEOF_INT 4
#define SIZEOF_LONG 4
#define ALIGNOF_LONG 4
#define SIZEOF_LONG_LONG 8
#define SIZEOF_DOUBLE 8
#define SIZEOF_FLOAT 4
Expand Down
72 changes: 71 additions & 1 deletion configure
Original file line number Diff line number Diff line change
Expand Up @@ -8637,7 +8637,7 @@ $as_echo "#define HAVE_GCC_UINT128_T 1" >>confdefs.h
fi


# Sizes of various common basic types
# Sizes and alignments of various common basic types
# ANSI C requires sizeof(char) == 1, so no need to check it
# The cast to long int works around a bug in the HP C Compiler
# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
Expand Down Expand Up @@ -8705,6 +8705,41 @@ cat >>confdefs.h <<_ACEOF
_ACEOF


# The cast to long int works around a bug in the HP C Compiler,
# see AC_CHECK_SIZEOF for more information.
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking alignment of long" >&5
$as_echo_n "checking alignment of long... " >&6; }
if ${ac_cv_alignof_long+:} false; then :
$as_echo_n "(cached) " >&6
else
if ac_fn_c_compute_int "$LINENO" "(long int) offsetof (ac__type_alignof_, y)" "ac_cv_alignof_long" "$ac_includes_default
#ifndef offsetof
# define offsetof(type, member) ((char *) &((type *) 0)->member - (char *) 0)
#endif
typedef struct { char x; long y; } ac__type_alignof_;"; then :

else
if test "$ac_cv_type_long" = yes; then
{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
as_fn_error 77 "cannot compute alignment of long
See \`config.log' for more details" "$LINENO" 5; }
else
ac_cv_alignof_long=0
fi
fi

fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_alignof_long" >&5
$as_echo "$ac_cv_alignof_long" >&6; }



cat >>confdefs.h <<_ACEOF
#define ALIGNOF_LONG $ac_cv_alignof_long
_ACEOF


# The cast to long int works around a bug in the HP C Compiler
# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
Expand Down Expand Up @@ -8936,6 +8971,41 @@ cat >>confdefs.h <<_ACEOF
_ACEOF


# The cast to long int works around a bug in the HP C Compiler,
# see AC_CHECK_SIZEOF for more information.
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking alignment of size_t" >&5
$as_echo_n "checking alignment of size_t... " >&6; }
if ${ac_cv_alignof_size_t+:} false; then :
$as_echo_n "(cached) " >&6
else
if ac_fn_c_compute_int "$LINENO" "(long int) offsetof (ac__type_alignof_, y)" "ac_cv_alignof_size_t" "$ac_includes_default
#ifndef offsetof
# define offsetof(type, member) ((char *) &((type *) 0)->member - (char *) 0)
#endif
typedef struct { char x; size_t y; } ac__type_alignof_;"; then :

else
if test "$ac_cv_type_size_t" = yes; then
{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
as_fn_error 77 "cannot compute alignment of size_t
See \`config.log' for more details" "$LINENO" 5; }
else
ac_cv_alignof_size_t=0
fi
fi

fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_alignof_size_t" >&5
$as_echo "$ac_cv_alignof_size_t" >&6; }



cat >>confdefs.h <<_ACEOF
#define ALIGNOF_SIZE_T $ac_cv_alignof_size_t
_ACEOF


# The cast to long int works around a bug in the HP C Compiler
# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
Expand Down
4 changes: 3 additions & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -2347,17 +2347,19 @@ AC_CHECK_TYPE(ssize_t,
AC_CHECK_TYPE(__uint128_t,
AC_DEFINE(HAVE_GCC_UINT128_T, 1, [Define if your compiler provides __uint128_t]),,)

# Sizes of various common basic types
# Sizes and alignments of various common basic types
# ANSI C requires sizeof(char) == 1, so no need to check it
AC_CHECK_SIZEOF(int, 4)
AC_CHECK_SIZEOF(long, 4)
AC_CHECK_ALIGNOF(long)
AC_CHECK_SIZEOF(long long, 8)
AC_CHECK_SIZEOF(void *, 4)
AC_CHECK_SIZEOF(short, 2)
AC_CHECK_SIZEOF(float, 4)
AC_CHECK_SIZEOF(double, 8)
AC_CHECK_SIZEOF(fpos_t, 4)
AC_CHECK_SIZEOF(size_t, 4)
AC_CHECK_ALIGNOF(size_t)
AC_CHECK_SIZEOF(pid_t, 4)
AC_CHECK_SIZEOF(uintptr_t)

Expand Down
6 changes: 6 additions & 0 deletions pyconfig.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@
support for AIX C++ shared extension modules. */
#undef AIX_GENUINE_CPLUSPLUS

/* The normal alignment of `long', in bytes. */
#undef ALIGNOF_LONG

/* The normal alignment of `size_t', in bytes. */
#undef ALIGNOF_SIZE_T

/* Alternative SOABI used in debug build to load C extensions built in release
mode */
#undef ALT_SOABI
Expand Down