Skip to content

Surface module compiling in SDL3 #3435

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions src_c/_surface.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,28 @@
#include "_pygame.h"
#include "surface.h"

// Some simd compat stuff going here for now.
#if PG_SDL3
// SDL3 no longer includes intrinsics by default, we need to do it explicitly
#include <SDL3/SDL_intrin.h>

/* If SDL_AVX2_INTRINSICS is defined by SDL3, we need to set macros that our
* code checks for avx2 build time support */
#ifdef SDL_AVX2_INTRINSICS
#ifndef HAVE_IMMINTRIN_H
#define HAVE_IMMINTRIN_H 1
#endif /* HAVE_IMMINTRIN_H*/
#ifndef __AVX2__
#define __AVX2__
#endif /* __AVX2__*/
#endif /* SDL_AVX2_INTRINSICS*/

// TODO reenable this to test best
#ifdef SDL_SSE2_INTRINSICS
#ifndef __SSE2__
#define __SSE2__
#endif /* __SSE2__*/
#endif /* SDL_SSE2_INTRINSICS*/
#endif /* PG_SDL3 */

#endif
6 changes: 3 additions & 3 deletions src_c/alphablit.c
Original file line number Diff line number Diff line change
Expand Up @@ -2974,18 +2974,18 @@ premul_surf_color_by_alpha(SDL_Surface *src, SDL_Surface *dst)
#if !defined(__EMSCRIPTEN__)
#if SDL_BYTEORDER == SDL_LIL_ENDIAN
if ((PG_SURF_BytesPerPixel(src) == 4) && pg_has_avx2()) {
premul_surf_color_by_alpha_avx2(src, dst);
premul_surf_color_by_alpha_avx2(src, src_format, dst);
return 0;
}
#if defined(__SSE2__)
if ((PG_SURF_BytesPerPixel(src) == 4) && SDL_HasSSE2()) {
premul_surf_color_by_alpha_sse2(src, dst);
premul_surf_color_by_alpha_sse2(src, src_format, dst);
return 0;
}
#endif /* __SSE2__*/
#if PG_ENABLE_ARM_NEON
if ((PG_SURF_BytesPerPixel(src) == 4) && SDL_HasNEON()) {
premul_surf_color_by_alpha_sse2(src, dst);
premul_surf_color_by_alpha_sse2(src, src_format, dst);
return 0;
}
#endif /* PG_ENABLE_ARM_NEON */
Expand Down
3 changes: 0 additions & 3 deletions src_c/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,6 @@ rwobject = py.extension_module(
subdir: pg,
)

# TODO: support SDL3
if sdl_api != 3
simd_blitters_avx2 = static_library(
'simd_blitters_avx2',
'simd_blitters_avx2.c',
Expand Down Expand Up @@ -132,7 +130,6 @@ surface = py.extension_module(
install: true,
subdir: pg,
)
endif

surflock = py.extension_module(
'surflock',
Expand Down
6 changes: 4 additions & 2 deletions src_c/simd_blitters.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ premul_surf_color_by_alpha_non_simd(SDL_Surface *src,
PG_PixelFormat *dst_format,
SDL_Palette *dst_palette);
void
premul_surf_color_by_alpha_sse2(SDL_Surface *src, SDL_Surface *dst);
premul_surf_color_by_alpha_sse2(SDL_Surface *src, PG_PixelFormat *srcfmt,
SDL_Surface *dst);

void
alphablit_alpha_avx2_argb_no_surf_alpha_opaque_dst(SDL_BlitInfo *info);
Expand Down Expand Up @@ -86,4 +87,5 @@ blit_blend_rgb_min_avx2(SDL_BlitInfo *info);
void
blit_blend_premultiplied_avx2(SDL_BlitInfo *info);
void
premul_surf_color_by_alpha_avx2(SDL_Surface *src, SDL_Surface *dst);
premul_surf_color_by_alpha_avx2(SDL_Surface *src, PG_PixelFormat *src_format,
SDL_Surface *dst);
16 changes: 9 additions & 7 deletions src_c/simd_blitters_avx2.c
Original file line number Diff line number Diff line change
Expand Up @@ -1561,7 +1561,8 @@ blit_blend_premultiplied_avx2(SDL_BlitInfo *info)
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
!defined(SDL_DISABLE_IMMINTRIN_H)
void
premul_surf_color_by_alpha_avx2(SDL_Surface *src, SDL_Surface *dst)
premul_surf_color_by_alpha_avx2(SDL_Surface *src, PG_PixelFormat *src_format,
SDL_Surface *dst)
{
int i, height = src->h;
const int width = src->w;
Expand All @@ -1578,7 +1579,7 @@ premul_surf_color_by_alpha_avx2(SDL_Surface *src, SDL_Surface *dst)
__m256i mm_src, mm_dst, alphaA, alphaB, mm_alpha_in;
__m256i mm_srcA, mm_srcB;

const __m256i mm256_amask = _mm256_set1_epi32(src->format->Amask);
const __m256i mm256_amask = _mm256_set1_epi32(src_format->Amask);
const __m256i mm_zero = _mm256_setzero_si256();
const __m256i partial_mask =
_mm256_set_epi32(0, pxl_excess > 6 ? -1 : 0, pxl_excess > 5 ? -1 : 0,
Expand All @@ -1587,10 +1588,10 @@ premul_surf_color_by_alpha_avx2(SDL_Surface *src, SDL_Surface *dst)
pxl_excess > 0 ? -1 : 0);
const __m256i mm256_ones = _mm256_set1_epi16(0x0001);

char _a_off = ((src->format->Amask >> 8) == 0) ? 0
: ((src->format->Amask >> 16) == 0) ? 1
: ((src->format->Amask >> 24) == 0) ? 2
: 3;
char _a_off = ((src_format->Amask >> 8) == 0) ? 0
: ((src_format->Amask >> 16) == 0) ? 1
: ((src_format->Amask >> 24) == 0) ? 2
: 3;

/* masks for shuffling the alpha to the RGB channels for multiplication */
const __m256i shuffle_maskA = _mm256_set_epi8(
Expand Down Expand Up @@ -1637,7 +1638,8 @@ premul_surf_color_by_alpha_avx2(SDL_Surface *src, SDL_Surface *dst)
}
#else
void
premul_surf_color_by_alpha_avx2(SDL_Surface *src, SDL_Surface *dst)
premul_surf_color_by_alpha_avx2(SDL_Surface *src, PG_PixelFormat *src_format,
SDL_Surface *dst)
{
BAD_AVX2_FUNCTION_CALL;
}
Expand Down
15 changes: 7 additions & 8 deletions src_c/simd_blitters_sse2.c
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,8 @@ alphablit_alpha_sse2_argb_surf_alpha(SDL_BlitInfo *info)
Uint32 *dstp = (Uint32 *)info->d_pixels;
int dstskip = info->d_skip >> 2;

SDL_PixelFormat *srcfmt = info->src;
SDL_PixelFormat *dstfmt = info->dst;
PG_PixelFormat *srcfmt = info->src;
PG_PixelFormat *dstfmt = info->dst;

// int srcbpp = PG_FORMAT_BytesPerPixel(srcfmt);
// int dstbpp = PG_FORMAT_BytesPerPixel(dstfmt);
Expand Down Expand Up @@ -293,8 +293,8 @@ alphablit_alpha_sse2_argb_no_surf_alpha(SDL_BlitInfo *info)
int height = info->height;
int srcskip = info->s_skip >> 2;
int dstskip = info->d_skip >> 2;
SDL_PixelFormat *srcfmt = info->src;
SDL_PixelFormat *dstfmt = info->dst;
PG_PixelFormat *srcfmt = info->src;
PG_PixelFormat *dstfmt = info->dst;

/* Original 'Straight Alpha' blending equation:
--------------------------------------------
Expand Down Expand Up @@ -719,7 +719,7 @@ blit_blend_premultiplied_sse2(SDL_BlitInfo *info)
int srcskip = info->s_skip >> 2;
Uint32 *dstp = (Uint32 *)info->d_pixels;
int dstskip = info->d_skip >> 2;
SDL_PixelFormat *srcfmt = info->src;
PG_PixelFormat *srcfmt = info->src;
Uint32 amask = srcfmt->Amask;
// Uint64 multmask;
Uint64 ones;
Expand Down Expand Up @@ -787,15 +787,14 @@ blit_blend_premultiplied_sse2(SDL_BlitInfo *info)
}

void
premul_surf_color_by_alpha_sse2(SDL_Surface *src, SDL_Surface *dst)
premul_surf_color_by_alpha_sse2(SDL_Surface *src, PG_PixelFormat *srcfmt,
SDL_Surface *dst)
{
int n;
int width = src->w;
int height = src->h;
Uint32 *srcp = (Uint32 *)src->pixels;
Uint32 *dstp = (Uint32 *)dst->pixels;

SDL_PixelFormat *srcfmt = src->format;
Uint32 amask = srcfmt->Amask;
Uint64 ones;

Expand Down
80 changes: 50 additions & 30 deletions src_c/simd_fill.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,64 +40,84 @@ _pg_HasSSE_NEON();

// AVX2 functions
int
surface_fill_blend_add_avx2(SDL_Surface *surface, SDL_Rect *rect,
surface_fill_blend_add_avx2(SDL_Surface *surface,
PG_PixelFormat *surface_format, SDL_Rect *rect,
Uint32 color);
int
surface_fill_blend_rgba_add_avx2(SDL_Surface *surface, SDL_Rect *rect,
Uint32 color);
surface_fill_blend_rgba_add_avx2(SDL_Surface *surface,
PG_PixelFormat *surface_format,
SDL_Rect *rect, Uint32 color);

int
surface_fill_blend_sub_avx2(SDL_Surface *surface, SDL_Rect *rect,
surface_fill_blend_sub_avx2(SDL_Surface *surface,
PG_PixelFormat *surface_format, SDL_Rect *rect,
Uint32 color);
int
surface_fill_blend_rgba_sub_avx2(SDL_Surface *surface, SDL_Rect *rect,
Uint32 color);
surface_fill_blend_rgba_sub_avx2(SDL_Surface *surface,
PG_PixelFormat *surface_format,
SDL_Rect *rect, Uint32 color);
int
surface_fill_blend_mult_avx2(SDL_Surface *surface, SDL_Rect *rect,
surface_fill_blend_mult_avx2(SDL_Surface *surface,
PG_PixelFormat *surface_format, SDL_Rect *rect,
Uint32 color);
int
surface_fill_blend_rgba_mult_avx2(SDL_Surface *surface, SDL_Rect *rect,
Uint32 color);
surface_fill_blend_rgba_mult_avx2(SDL_Surface *surface,
PG_PixelFormat *surface_format,
SDL_Rect *rect, Uint32 color);
int
surface_fill_blend_min_avx2(SDL_Surface *surface, SDL_Rect *rect,
surface_fill_blend_min_avx2(SDL_Surface *surface,
PG_PixelFormat *surface_format, SDL_Rect *rect,
Uint32 color);
int
surface_fill_blend_rgba_min_avx2(SDL_Surface *surface, SDL_Rect *rect,
Uint32 color);
surface_fill_blend_rgba_min_avx2(SDL_Surface *surface,
PG_PixelFormat *surface_format,
SDL_Rect *rect, Uint32 color);
int
surface_fill_blend_max_avx2(SDL_Surface *surface, SDL_Rect *rect,
surface_fill_blend_max_avx2(SDL_Surface *surface,
PG_PixelFormat *surface_format, SDL_Rect *rect,
Uint32 color);
int
surface_fill_blend_rgba_max_avx2(SDL_Surface *surface, SDL_Rect *rect,
Uint32 color);
surface_fill_blend_rgba_max_avx2(SDL_Surface *surface,
PG_PixelFormat *surface_format,
SDL_Rect *rect, Uint32 color);
// SSE2 functions
int
surface_fill_blend_add_sse2(SDL_Surface *surface, SDL_Rect *rect,
surface_fill_blend_add_sse2(SDL_Surface *surface,
PG_PixelFormat *surface_format, SDL_Rect *rect,
Uint32 color);
int
surface_fill_blend_rgba_add_sse2(SDL_Surface *surface, SDL_Rect *rect,
Uint32 color);
surface_fill_blend_rgba_add_sse2(SDL_Surface *surface,
PG_PixelFormat *surface_format,
SDL_Rect *rect, Uint32 color);
int
surface_fill_blend_sub_sse2(SDL_Surface *surface, SDL_Rect *rect,
surface_fill_blend_sub_sse2(SDL_Surface *surface,
PG_PixelFormat *surface_format, SDL_Rect *rect,
Uint32 color);
int
surface_fill_blend_rgba_sub_sse2(SDL_Surface *surface, SDL_Rect *rect,
Uint32 color);
surface_fill_blend_rgba_sub_sse2(SDL_Surface *surface,
PG_PixelFormat *surface_format,
SDL_Rect *rect, Uint32 color);
int
surface_fill_blend_mult_sse2(SDL_Surface *surface, SDL_Rect *rect,
surface_fill_blend_mult_sse2(SDL_Surface *surface,
PG_PixelFormat *surface_format, SDL_Rect *rect,
Uint32 color);
int
surface_fill_blend_rgba_mult_sse2(SDL_Surface *surface, SDL_Rect *rect,
Uint32 color);
surface_fill_blend_rgba_mult_sse2(SDL_Surface *surface,
PG_PixelFormat *surface_format,
SDL_Rect *rect, Uint32 color);
int
surface_fill_blend_min_sse2(SDL_Surface *surface, SDL_Rect *rect,
surface_fill_blend_min_sse2(SDL_Surface *surface,
PG_PixelFormat *surface_format, SDL_Rect *rect,
Uint32 color);
int
surface_fill_blend_rgba_min_sse2(SDL_Surface *surface, SDL_Rect *rect,
Uint32 color);
surface_fill_blend_rgba_min_sse2(SDL_Surface *surface,
PG_PixelFormat *surface_format,
SDL_Rect *rect, Uint32 color);
int
surface_fill_blend_max_sse2(SDL_Surface *surface, SDL_Rect *rect,
surface_fill_blend_max_sse2(SDL_Surface *surface,
PG_PixelFormat *surface_format, SDL_Rect *rect,
Uint32 color);
int
surface_fill_blend_rgba_max_sse2(SDL_Surface *surface, SDL_Rect *rect,
Uint32 color);
surface_fill_blend_rgba_max_sse2(SDL_Surface *surface,
PG_PixelFormat *surface_format,
SDL_Rect *rect, Uint32 color);
Loading
Loading