Skip to content

Commit 8ee4f9b

Browse files
committed
Move core alpha_opaque_dst op to macro
1 parent c43140e commit 8ee4f9b

File tree

1 file changed

+27
-58
lines changed

1 file changed

+27
-58
lines changed

src_c/simd_blitters_sse2.c

Lines changed: 27 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,27 @@ alphablit_alpha_sse2_argb_no_surf_alpha(SDL_BlitInfo *info)
470470
}
471471
}
472472

473+
/* Defines the blit procedure at the core of
474+
* alphablit_alpha_sse2_argb_no_surf_alpha_opaque_dst
475+
*
476+
* Input variables: src1, dst1, unpacked_alpha
477+
* containing unpacked 16 bit lanes of src, dst, and src alpha
478+
* Output variables: sub_dst
479+
* */
480+
#define ARGB_NO_SURF_ALPHA_OPAQUE_DST_PROCEDURE \
481+
/* (srcRGB - dstRGB) */ \
482+
sub_dst = _mm_sub_epi16(src1, dst1); \
483+
/* (srcRGB - dstRGB) * srcA */ \
484+
sub_dst = _mm_mullo_epi16(sub_dst, unpacked_alpha); \
485+
/* (srcRGB - dstRGB) * srcA + srcRGB */ \
486+
sub_dst = _mm_add_epi16(sub_dst, src1); \
487+
/* (dstRGB << 8) */ \
488+
dst1 = _mm_slli_epi16(dst1, 8); \
489+
/* ((dstRGB << 8) + (srcRGB - dstRGB) * srcA + srcRGB) */ \
490+
sub_dst = _mm_add_epi16(sub_dst, dst1); \
491+
/* (((dstRGB << 8) + (srcRGB - dstRGB) * srcA + srcRGB) >> 8) */ \
492+
sub_dst = _mm_srli_epi16(sub_dst, 8);
493+
473494
void
474495
alphablit_alpha_sse2_argb_no_surf_alpha_opaque_dst(SDL_BlitInfo *info)
475496
{
@@ -542,24 +563,9 @@ alphablit_alpha_sse2_argb_no_surf_alpha_opaque_dst(SDL_BlitInfo *info)
542563
/* 0A0R0G0B0A0R0G0B -> dst1 */
543564
dst1 = _mm_unpacklo_epi8(pixels_dst, mm_zero);
544565

545-
/* (srcRGB - dstRGB) */
546-
sub_dst = _mm_sub_epi16(src1, dst1);
547-
548-
/* (srcRGB - dstRGB) * srcA */
549-
sub_dst = _mm_mullo_epi16(sub_dst, unpacked_alpha);
550-
551-
/* (srcRGB - dstRGB) * srcA + srcRGB */
552-
sub_dst = _mm_add_epi16(sub_dst, src1);
553-
554-
/* (dstRGB << 8) */
555-
dst1 = _mm_slli_epi16(dst1, 8);
556-
557-
/* ((dstRGB << 8) + (srcRGB - dstRGB) * srcA + srcRGB) */
558-
sub_dst = _mm_add_epi16(sub_dst, dst1);
566+
ARGB_NO_SURF_ALPHA_OPAQUE_DST_PROCEDURE
559567

560-
/* (((dstRGB << 8) + (srcRGB - dstRGB) * srcA + srcRGB) >>
561-
* 8)*/
562-
batch_a_dst = _mm_srli_epi16(sub_dst, 8);
568+
batch_a_dst = sub_dst;
563569

564570
/*
565571
* BATCH B (the 2 high pixels)
@@ -576,34 +582,14 @@ alphablit_alpha_sse2_argb_no_surf_alpha_opaque_dst(SDL_BlitInfo *info)
576582
/* 0A0R0G0B0A0R0G0B -> dst1 */
577583
dst1 = _mm_unpackhi_epi8(pixels_dst, mm_zero);
578584

579-
/* (srcRGB - dstRGB) */
580-
sub_dst = _mm_sub_epi16(src1, dst1);
581-
582-
/* (srcRGB - dstRGB) * srcA */
583-
sub_dst = _mm_mullo_epi16(sub_dst, unpacked_alpha);
584-
585-
/* (srcRGB - dstRGB) * srcA + srcRGB */
586-
sub_dst = _mm_add_epi16(sub_dst, src1);
587-
588-
/* (dstRGB << 8) */
589-
dst1 = _mm_slli_epi16(dst1, 8);
590-
591-
/* ((dstRGB << 8) + (srcRGB - dstRGB) * srcA + srcRGB) */
592-
sub_dst = _mm_add_epi16(sub_dst, dst1);
593-
594-
/* (((dstRGB << 8) + (srcRGB - dstRGB) * srcA + srcRGB) >>
595-
* 8)*/
596-
sub_dst = _mm_srli_epi16(sub_dst, 8);
585+
ARGB_NO_SURF_ALPHA_OPAQUE_DST_PROCEDURE
597586

598587
/*
599588
* Combine the batches and store
600-
*/
601-
602-
/* pack everything back into a pixel with zeroed out alpha
589+
* pack everything back into a pixel with zeroed out alpha
603590
*/
604591
sub_dst = _mm_packus_epi16(batch_a_dst, sub_dst);
605592
sub_dst = _mm_and_si128(sub_dst, mm_rgb_mask);
606-
607593
_mm_storeu_si128(dstp128, sub_dst);
608594

609595
srcp128++;
@@ -625,7 +611,7 @@ alphablit_alpha_sse2_argb_no_surf_alpha_opaque_dst(SDL_BlitInfo *info)
625611
/* 0000000000000A0A -> rgb_src_alpha */
626612
mm_src_alpha = _mm_unpacklo_epi16(mm_src_alpha, mm_src_alpha);
627613
/* 000000000A0A0A0A -> rgb_src_alpha */
628-
mm_src_alpha = _mm_unpacklo_epi32(mm_src_alpha, mm_src_alpha);
614+
unpacked_alpha = _mm_unpacklo_epi32(mm_src_alpha, mm_src_alpha);
629615

630616
/* 000000000A0R0G0B -> src1 */
631617
src1 = _mm_unpacklo_epi8(src1, mm_zero);
@@ -635,24 +621,7 @@ alphablit_alpha_sse2_argb_no_surf_alpha_opaque_dst(SDL_BlitInfo *info)
635621
/* 000000000A0R0G0B -> dst1 */
636622
dst1 = _mm_unpacklo_epi8(dst1, mm_zero);
637623

638-
/* (srcRGB - dstRGB) */
639-
sub_dst = _mm_sub_epi16(src1, dst1);
640-
641-
/* (srcRGB - dstRGB) * srcA */
642-
sub_dst = _mm_mullo_epi16(sub_dst, mm_src_alpha);
643-
644-
/* (srcRGB - dstRGB) * srcA + srcRGB */
645-
sub_dst = _mm_add_epi16(sub_dst, src1);
646-
647-
/* (dstRGB << 8) */
648-
dst1 = _mm_slli_epi16(dst1, 8);
649-
650-
/* ((dstRGB << 8) + (srcRGB - dstRGB) * srcA + srcRGB) */
651-
sub_dst = _mm_add_epi16(sub_dst, dst1);
652-
653-
/* (((dstRGB << 8) + (srcRGB - dstRGB) * srcA + srcRGB) >>
654-
* 8)*/
655-
sub_dst = _mm_srli_epi16(sub_dst, 8);
624+
ARGB_NO_SURF_ALPHA_OPAQUE_DST_PROCEDURE
656625

657626
/* pack everything back into a pixel */
658627
sub_dst = _mm_packus_epi16(sub_dst, mm_zero);

0 commit comments

Comments
 (0)