Skip to content

Commit 0acc932

Browse files
authored
Merge pull request #3351 from pygame-community/ankith26-transform-sdl3
Get transform compiling on SDL3
2 parents 8277eba + 2b13c1f commit 0acc932

File tree

6 files changed

+125
-64
lines changed

6 files changed

+125
-64
lines changed

src_c/_pygame.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@
8484
#define PG_SurfaceHasRLE SDL_SurfaceHasRLE
8585

8686
#define PG_SoftStretchNearest(src, srcrect, dst, dstrect) \
87-
SDL_SoftStretch(src, srcrect, dst, dstrect, SDL_SCALEMODE_NEAREST)
87+
SDL_StretchSurface(src, srcrect, dst, dstrect, SDL_SCALEMODE_NEAREST)
8888

8989
/* Emulating SDL2 SDL_LockMutex API. In SDL3, it returns void. */
9090
static inline int

src_c/meson.build

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -179,8 +179,6 @@ image = py.extension_module(
179179
subdir: pg,
180180
)
181181

182-
# TODO: support SDL3
183-
if sdl_api != 3
184182
simd_transform_avx2 = static_library(
185183
'simd_transform_avx2',
186184
'simd_transform_avx2.c',
@@ -221,7 +219,6 @@ transform = py.extension_module(
221219
install: true,
222220
subdir: pg,
223221
)
224-
endif
225222

226223
mask = py.extension_module(
227224
'mask',

src_c/simd_transform.h

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,24 @@
11
#define NO_PYGAME_C_API
22
#include "_surface.h"
33

4+
/* TODO: This compat code should probably go in some place like simd_shared.h
5+
* That header file however is inconsistently used at the moment and not
6+
* included wherever it should be.
7+
* this block will be needed by simd_blitters and simd_fill */
8+
9+
#if PG_SDL3
10+
// SDL3 no longer includes intrinsics by default, we need to do it explicitly
11+
#include <SDL3/SDL_intrin.h>
12+
13+
/* If SDL_AVX2_INTRINSICS is defined by SDL3, we need to set macros that our
14+
* code checks for avx2 build time support */
15+
#ifdef SDL_AVX2_INTRINSICS
16+
#ifndef HAVE_IMMINTRIN_H
17+
#define HAVE_IMMINTRIN_H 1
18+
#endif /* HAVE_IMMINTRIN_H*/
19+
#endif /* SDL_AVX2_INTRINSICS*/
20+
#endif /* PG_SDL3 */
21+
422
/**
523
* MACRO borrowed from SSE2NEON - useful for making the shuffling family of
624
* intrinsics easier to understand by indicating clearly what will go where.
@@ -26,7 +44,8 @@
2644
#if defined(__SSE2__) || defined(PG_ENABLE_ARM_NEON)
2745

2846
void
29-
grayscale_sse2(SDL_Surface *src, SDL_Surface *newsurf);
47+
grayscale_sse2(SDL_Surface *src, PG_PixelFormat *src_fmt,
48+
SDL_Surface *newsurf);
3049
// smoothscale filters
3150
void
3251
filter_shrink_X_SSE2(Uint8 *srcpix, Uint8 *dstpix, int height, int srcpitch,
@@ -41,12 +60,13 @@ void
4160
filter_expand_Y_SSE2(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch,
4261
int dstpitch, int srcheight, int dstheight);
4362
void
44-
invert_sse2(SDL_Surface *src, SDL_Surface *newsurf);
63+
invert_sse2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf);
4564

4665
#endif /* (defined(__SSE2__) || defined(PG_ENABLE_ARM_NEON)) */
4766

4867
// AVX2 functions
4968
void
50-
grayscale_avx2(SDL_Surface *src, SDL_Surface *newsurf);
69+
grayscale_avx2(SDL_Surface *src, PG_PixelFormat *src_fmt,
70+
SDL_Surface *newsurf);
5171
void
52-
invert_avx2(SDL_Surface *src, SDL_Surface *newsurf);
72+
invert_avx2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf);

src_c/simd_transform_avx2.c

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ pg_avx2_at_runtime_but_uncompiled()
4646
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
4747
!defined(SDL_DISABLE_IMMINTRIN_H)
4848
void
49-
grayscale_avx2(SDL_Surface *src, SDL_Surface *newsurf)
49+
grayscale_avx2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf)
5050
{
5151
/* See the SSE2 code for a simpler overview of this algorithm
5252
* Current AVX2 process
@@ -85,12 +85,11 @@ grayscale_avx2(SDL_Surface *src, SDL_Surface *newsurf)
8585
Uint32 *srcp = (Uint32 *)src->pixels;
8686
Uint32 *dstp = (Uint32 *)newsurf->pixels;
8787

88-
Uint32 amask = src->format->Amask;
88+
Uint32 amask = src_fmt->Amask;
8989
Uint32 rgbmask = ~amask;
9090

91-
int rgb_weights =
92-
((0x4C << src->format->Rshift) | (0x96 << src->format->Gshift) |
93-
(0x1D << src->format->Bshift));
91+
int rgb_weights = ((0x4C << src_fmt->Rshift) | (0x96 << src_fmt->Gshift) |
92+
(0x1D << src_fmt->Bshift));
9493

9594
__m256i *srcp256 = (__m256i *)src->pixels;
9695
__m256i *dstp256 = (__m256i *)newsurf->pixels;
@@ -216,7 +215,7 @@ grayscale_avx2(SDL_Surface *src, SDL_Surface *newsurf)
216215
}
217216

218217
void
219-
invert_avx2(SDL_Surface *src, SDL_Surface *newsurf)
218+
invert_avx2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf)
220219
{
221220
int s_row_skip = (src->pitch - src->w * 4) / 4;
222221

@@ -237,7 +236,7 @@ invert_avx2(SDL_Surface *src, SDL_Surface *newsurf)
237236
Uint32 *srcp = (Uint32 *)src->pixels;
238237
Uint32 *dstp = (Uint32 *)newsurf->pixels;
239238

240-
Uint32 amask = src->format->Amask;
239+
Uint32 amask = src_fmt->Amask;
241240
Uint32 rgbmask = ~amask;
242241

243242
__m256i *srcp256 = (__m256i *)src->pixels;
@@ -300,12 +299,12 @@ invert_avx2(SDL_Surface *src, SDL_Surface *newsurf)
300299
}
301300
#else
302301
void
303-
grayscale_avx2(SDL_Surface *src, SDL_Surface *newsurf)
302+
grayscale_avx2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf)
304303
{
305304
BAD_AVX2_FUNCTION_CALL;
306305
}
307306
void
308-
invert_avx2(SDL_Surface *src, SDL_Surface *newsurf)
307+
invert_avx2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf)
309308
{
310309
BAD_AVX2_FUNCTION_CALL;
311310
}

src_c/simd_transform_sse2.c

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,7 @@ filter_expand_Y_SSE2(Uint8 *srcpix, Uint8 *dstpix, int width, int srcpitch,
424424
}
425425

426426
void
427-
grayscale_sse2(SDL_Surface *src, SDL_Surface *newsurf)
427+
grayscale_sse2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf)
428428
{
429429
/* For the SSE2 SIMD version of grayscale we do one pixel at a time
430430
* Thus we can calculate the number of loops (and pixels) by multiplying
@@ -460,16 +460,15 @@ grayscale_sse2(SDL_Surface *src, SDL_Surface *newsurf)
460460
Uint32 *srcp = (Uint32 *)src->pixels;
461461
Uint32 *dstp = (Uint32 *)newsurf->pixels;
462462

463-
Uint64 amask64 = ((Uint64)src->format->Amask) | src->format->Amask;
463+
Uint64 amask64 = ((Uint64)src_fmt->Amask) | src_fmt->Amask;
464464
Uint64 rgbmask64 = ~amask64;
465465

466466
Uint64 rgb_weights =
467-
((Uint64)((0x4C << src->format->Rshift) |
468-
(0x96 << src->format->Gshift) |
469-
(0x1D << src->format->Bshift))
467+
((Uint64)((0x4C << src_fmt->Rshift) | (0x96 << src_fmt->Gshift) |
468+
(0x1D << src_fmt->Bshift))
470469
<< 32) |
471-
((0x4C << src->format->Rshift) | (0x96 << src->format->Gshift) |
472-
(0x1D << src->format->Bshift));
470+
((0x4C << src_fmt->Rshift) | (0x96 << src_fmt->Gshift) |
471+
(0x1D << src_fmt->Bshift));
473472

474473
Uint64 *srcp64 = (Uint64 *)src->pixels;
475474
Uint64 *dstp64 = (Uint64 *)newsurf->pixels;
@@ -613,7 +612,7 @@ grayscale_sse2(SDL_Surface *src, SDL_Surface *newsurf)
613612
}
614613

615614
void
616-
invert_sse2(SDL_Surface *src, SDL_Surface *newsurf)
615+
invert_sse2(SDL_Surface *src, PG_PixelFormat *src_fmt, SDL_Surface *newsurf)
617616
{
618617
int s_row_skip = (src->pitch - src->w * 4) / 4;
619618

@@ -638,8 +637,8 @@ invert_sse2(SDL_Surface *src, SDL_Surface *newsurf)
638637
__m128i *srcp128 = (__m128i *)src->pixels;
639638
__m128i *dstp128 = (__m128i *)newsurf->pixels;
640639

641-
mm_rgb_invert_mask = _mm_set1_epi32(~src->format->Amask);
642-
mm_alpha_mask = _mm_set1_epi32(src->format->Amask);
640+
mm_rgb_invert_mask = _mm_set1_epi32(~src_fmt->Amask);
641+
mm_alpha_mask = _mm_set1_epi32(src_fmt->Amask);
643642

644643
while (num_batches--) {
645644
perfect_4_pixels_batch_counter = perfect_4_pixels;

0 commit comments

Comments
 (0)