@@ -470,6 +470,27 @@ alphablit_alpha_sse2_argb_no_surf_alpha(SDL_BlitInfo *info)
470470 }
471471}
472472
473+ /* Defines the blit procedure at the core of
474+ * alphablit_alpha_sse2_argb_no_surf_alpha_opaque_dst
475+ *
476+ * Input variables: src1, dst1, unpacked_alpha
477+ * containing unpacked 16 bit lanes of src, dst, and src alpha
478+ * Output variables: sub_dst
479+ * */
480+ #define ARGB_NO_SURF_ALPHA_OPAQUE_DST_PROCEDURE \
481+ /* (srcRGB - dstRGB) */ \
482+ sub_dst = _mm_sub_epi16(src1, dst1); \
483+ /* (srcRGB - dstRGB) * srcA */ \
484+ sub_dst = _mm_mullo_epi16 (sub_dst , unpacked_alpha ); \
485+ /* (srcRGB - dstRGB) * srcA + srcRGB */ \
486+ sub_dst = _mm_add_epi16 (sub_dst , src1 ); \
487+ /* (dstRGB << 8) */ \
488+ dst1 = _mm_slli_epi16 (dst1 , 8 ); \
489+ /* ((dstRGB << 8) + (srcRGB - dstRGB) * srcA + srcRGB) */ \
490+ sub_dst = _mm_add_epi16 (sub_dst , dst1 ); \
491+ /* (((dstRGB << 8) + (srcRGB - dstRGB) * srcA + srcRGB) >> 8) */ \
492+ sub_dst = _mm_srli_epi16 (sub_dst , 8 );
493+
473494void
474495alphablit_alpha_sse2_argb_no_surf_alpha_opaque_dst (SDL_BlitInfo * info )
475496{
@@ -542,24 +563,9 @@ alphablit_alpha_sse2_argb_no_surf_alpha_opaque_dst(SDL_BlitInfo *info)
542563 /* 0A0R0G0B0A0R0G0B -> dst1 */
543564 dst1 = _mm_unpacklo_epi8 (pixels_dst , mm_zero );
544565
545- /* (srcRGB - dstRGB) */
546- sub_dst = _mm_sub_epi16 (src1 , dst1 );
547-
548- /* (srcRGB - dstRGB) * srcA */
549- sub_dst = _mm_mullo_epi16 (sub_dst , unpacked_alpha );
550-
551- /* (srcRGB - dstRGB) * srcA + srcRGB */
552- sub_dst = _mm_add_epi16 (sub_dst , src1 );
553-
554- /* (dstRGB << 8) */
555- dst1 = _mm_slli_epi16 (dst1 , 8 );
556-
557- /* ((dstRGB << 8) + (srcRGB - dstRGB) * srcA + srcRGB) */
558- sub_dst = _mm_add_epi16 (sub_dst , dst1 );
566+ ARGB_NO_SURF_ALPHA_OPAQUE_DST_PROCEDURE
559567
560- /* (((dstRGB << 8) + (srcRGB - dstRGB) * srcA + srcRGB) >>
561- * 8)*/
562- batch_a_dst = _mm_srli_epi16 (sub_dst , 8 );
568+ batch_a_dst = sub_dst ;
563569
564570 /*
565571 * BATCH B (the 2 high pixels)
@@ -576,34 +582,14 @@ alphablit_alpha_sse2_argb_no_surf_alpha_opaque_dst(SDL_BlitInfo *info)
576582 /* 0A0R0G0B0A0R0G0B -> dst1 */
577583 dst1 = _mm_unpackhi_epi8 (pixels_dst , mm_zero );
578584
579- /* (srcRGB - dstRGB) */
580- sub_dst = _mm_sub_epi16 (src1 , dst1 );
581-
582- /* (srcRGB - dstRGB) * srcA */
583- sub_dst = _mm_mullo_epi16 (sub_dst , unpacked_alpha );
584-
585- /* (srcRGB - dstRGB) * srcA + srcRGB */
586- sub_dst = _mm_add_epi16 (sub_dst , src1 );
587-
588- /* (dstRGB << 8) */
589- dst1 = _mm_slli_epi16 (dst1 , 8 );
590-
591- /* ((dstRGB << 8) + (srcRGB - dstRGB) * srcA + srcRGB) */
592- sub_dst = _mm_add_epi16 (sub_dst , dst1 );
593-
594- /* (((dstRGB << 8) + (srcRGB - dstRGB) * srcA + srcRGB) >>
595- * 8)*/
596- sub_dst = _mm_srli_epi16 (sub_dst , 8 );
585+ ARGB_NO_SURF_ALPHA_OPAQUE_DST_PROCEDURE
597586
598587 /*
599588 * Combine the batches and store
600- */
601-
602- /* pack everything back into a pixel with zeroed out alpha
589+ * pack everything back into a pixel with zeroed out alpha
603590 */
604591 sub_dst = _mm_packus_epi16 (batch_a_dst , sub_dst );
605592 sub_dst = _mm_and_si128 (sub_dst , mm_rgb_mask );
606-
607593 _mm_storeu_si128 (dstp128 , sub_dst );
608594
609595 srcp128 ++ ;
@@ -625,7 +611,7 @@ alphablit_alpha_sse2_argb_no_surf_alpha_opaque_dst(SDL_BlitInfo *info)
625611 /* 0000000000000A0A -> rgb_src_alpha */
626612 mm_src_alpha = _mm_unpacklo_epi16 (mm_src_alpha , mm_src_alpha );
627613 /* 000000000A0A0A0A -> rgb_src_alpha */
628- mm_src_alpha = _mm_unpacklo_epi32 (mm_src_alpha , mm_src_alpha );
614+ unpacked_alpha = _mm_unpacklo_epi32 (mm_src_alpha , mm_src_alpha );
629615
630616 /* 000000000A0R0G0B -> src1 */
631617 src1 = _mm_unpacklo_epi8 (src1 , mm_zero );
@@ -635,24 +621,7 @@ alphablit_alpha_sse2_argb_no_surf_alpha_opaque_dst(SDL_BlitInfo *info)
635621 /* 000000000A0R0G0B -> dst1 */
636622 dst1 = _mm_unpacklo_epi8 (dst1 , mm_zero );
637623
638- /* (srcRGB - dstRGB) */
639- sub_dst = _mm_sub_epi16 (src1 , dst1 );
640-
641- /* (srcRGB - dstRGB) * srcA */
642- sub_dst = _mm_mullo_epi16 (sub_dst , mm_src_alpha );
643-
644- /* (srcRGB - dstRGB) * srcA + srcRGB */
645- sub_dst = _mm_add_epi16 (sub_dst , src1 );
646-
647- /* (dstRGB << 8) */
648- dst1 = _mm_slli_epi16 (dst1 , 8 );
649-
650- /* ((dstRGB << 8) + (srcRGB - dstRGB) * srcA + srcRGB) */
651- sub_dst = _mm_add_epi16 (sub_dst , dst1 );
652-
653- /* (((dstRGB << 8) + (srcRGB - dstRGB) * srcA + srcRGB) >>
654- * 8)*/
655- sub_dst = _mm_srli_epi16 (sub_dst , 8 );
624+ ARGB_NO_SURF_ALPHA_OPAQUE_DST_PROCEDURE
656625
657626 /* pack everything back into a pixel */
658627 sub_dst = _mm_packus_epi16 (sub_dst , mm_zero );
0 commit comments