2626#include "ompi/mca/op/base/base.h"
2727#include "ompi/mca/op/aarch64/op_aarch64.h"
2828
29- #if defined(GENERATE_SVE_CODE )
29+ /**
30+ * Ensure exactly one of GENERATE_SVE_CODE or GENERATE_NEON_CODE is enabled.
31+ * Enabling both is invalid as each builds a separate library. Disabling both
32+ * would leave no implementation to compile.
33+ */
34+ #if GENERATE_SVE_CODE && GENERATE_NEON_CODE
35+ #error "Never build NEON and SVE within the same library"
36+ #elif GENERATE_SVE_CODE
3037# include <arm_sve.h>
3138#define OMPI_OP_TYPE_PREPEND sv
3239#define OMPI_OP_OP_PREPEND sv
3340#define APPEND _sve
34- #elif defined( GENERATE_NEON_CODE )
41+ #elif GENERATE_NEON_CODE
3542# include <arm_neon.h>
3643#define OMPI_OP_TYPE_PREPEND
3744#define OMPI_OP_OP_PREPEND v
3845#define APPEND _neon
3946#else
40- #error we should not reach this
47+ #error "Neither NEON nor SVE code generated. This should never happen"
4148#endif /* OMPI_MCA_OP_HAVE_SVE */
4249
4350/*
5158 */
5259#define OP_CONCAT (A , B ) OP_CONCAT_NX(A, B)
5360
54- #if defined( GENERATE_SVE_CODE )
61+ #if GENERATE_SVE_CODE
5562# define svcnt (X ) \
5663 _Generic((X), \
5764 int8_t: svcntb, \
@@ -101,7 +108,7 @@ _Generic((*(out)), \
101108 uint64_t: __extension__({ switch ((how_much)) { DUMP2(out, in1, in2) }}), \
102109 float32_t: __extension__({ switch ((how_much)) { DUMP4(out, in1, in2) }}), \
103110 float64_t: __extension__({ switch ((how_much)) { DUMP2(out, in1, in2) }}))
104- #endif /* defined( GENERATE_SVE_CODE) */
111+ #endif /* GENERATE_SVE_CODE */
105112
106113/*
107114 * Since all the functions in this file are essentially identical, we
@@ -111,7 +118,7 @@ _Generic((*(out)), \
111118 * This macro is for (out op in).
112119 *
113120 */
114- #if defined( GENERATE_NEON_CODE )
121+ #if GENERATE_NEON_CODE
115122#define OP_AARCH64_FUNC (name , type_name , type_size , type_cnt , type , op ) \
116123 static void OP_CONCAT(ompi_op_aarch64_2buff_##name##_##type##type_size##_t, \
117124 APPEND)(const void *_in, void *_out, int *count, \
@@ -135,7 +142,7 @@ _Generic((*(out)), \
135142 neon_loop(left_over, out, out, in); \
136143 } \
137144 }
138- #elif defined( GENERATE_SVE_CODE )
145+ #elif GENERATE_SVE_CODE
139146#define OP_AARCH64_FUNC (name , type_name , type_size , type_cnt , type , op ) \
140147 OMPI_SVE_ATTR \
141148 static void OP_CONCAT(ompi_op_aarch64_2buff_##name##_##type##type_size##_t, APPEND) \
@@ -169,10 +176,10 @@ _Generic((*(out)), \
169176 OP_AARCH64_FUNC (max , u , 16 , 8 , uint , max )
170177 OP_AARCH64_FUNC (max , s , 32 , 4 , int , max )
171178 OP_AARCH64_FUNC (max , u , 32 , 4 , uint , max )
172- #if defined( GENERATE_SVE_CODE )
179+ #if GENERATE_SVE_CODE
173180 OP_AARCH64_FUNC (max , s , 64 , 2 , int , max )
174181 OP_AARCH64_FUNC (max , u , 64 , 2 , uint , max )
175- #endif /* defined( GENERATE_SVE_CODE) */
182+ #endif /* GENERATE_SVE_CODE */
176183
177184 OP_AARCH64_FUNC (max , f , 32 , 4 , float , max )
178185 OP_AARCH64_FUNC (max , f , 64 , 2 , float , max )
@@ -188,10 +195,10 @@ _Generic((*(out)), \
188195 OP_AARCH64_FUNC (min , u , 16 , 8 , uint , min )
189196 OP_AARCH64_FUNC (min , s , 32 , 4 , int , min )
190197 OP_AARCH64_FUNC (min , u , 32 , 4 , uint , min )
191- #if defined( GENERATE_SVE_CODE )
198+ #if GENERATE_SVE_CODE
192199 OP_AARCH64_FUNC (min , s , 64 , 2 , int , min )
193200 OP_AARCH64_FUNC (min , u , 64 , 2 , uint , min )
194- #endif /* defined( GENERATE_SVE_CODE) */
201+ #endif /* GENERATE_SVE_CODE */
195202
196203 OP_AARCH64_FUNC (min , f , 32 , 4 , float , min )
197204 OP_AARCH64_FUNC (min , f , 64 , 2 , float , min )
@@ -223,10 +230,10 @@ _Generic((*(out)), \
223230 OP_AARCH64_FUNC (prod , u , 16 , 8 , uint , mul )
224231 OP_AARCH64_FUNC (prod , s , 32 , 4 , int , mul )
225232 OP_AARCH64_FUNC (prod , u , 32 , 4 , uint , mul )
226- #if defined( GENERATE_SVE_CODE )
233+ #if GENERATE_SVE_CODE
227234 OP_AARCH64_FUNC (prod , s , 64 , 2 , int , mul )
228235 OP_AARCH64_FUNC (prod , u , 64 , 2 , uint , mul )
229- #endif /* defined( GENERATE_SVE_CODE) */
236+ #endif /* GENERATE_SVE_CODE */
230237
231238 OP_AARCH64_FUNC (prod , f , 32 , 4 , float , mul )
232239 OP_AARCH64_FUNC (prod , f , 64 , 2 , float , mul )
@@ -277,7 +284,7 @@ _Generic((*(out)), \
277284 * This is a three buffer (2 input and 1 output) version of the reduction
278285 * routines, needed for some optimizations.
279286 */
280- #if defined( GENERATE_NEON_CODE )
287+ #if GENERATE_NEON_CODE
281288#define OP_AARCH64_FUNC_3BUFF (name , type_name , type_size , type_cnt , type , op ) \
282289static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPEND) \
283290 (const void *_in1, const void *_in2, void *_out, int *count, \
@@ -302,7 +309,7 @@ static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPE
302309 neon_loop(left_over, out, in1, in2); \
303310 } \
304311}
305- #elif defined( GENERATE_SVE_CODE )
312+ #elif GENERATE_SVE_CODE
306313#define OP_AARCH64_FUNC_3BUFF (name , type_name , type_size , type_cnt , type , op ) \
307314OMPI_SVE_ATTR \
308315static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPEND) \
@@ -324,7 +331,7 @@ static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPE
324331 OP_CONCAT(OMPI_OP_OP_PREPEND, st1)(pred, &out[idx], vdst); \
325332 } \
326333}
327- #endif /* defined( GENERATE_SVE_CODE) */
334+ #endif /* GENERATE_SVE_CODE */
328335
329336/*************************************************************************
330337 * Max
@@ -337,10 +344,10 @@ static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPE
337344 OP_AARCH64_FUNC_3BUFF (max , u , 16 , 8 , uint , max )
338345 OP_AARCH64_FUNC_3BUFF (max , s , 32 , 4 , int , max )
339346 OP_AARCH64_FUNC_3BUFF (max , u , 32 , 4 , uint , max )
340- #if defined( GENERATE_SVE_CODE )
347+ #if GENERATE_SVE_CODE
341348 OP_AARCH64_FUNC_3BUFF (max , s , 64 , 2 , int , max )
342349 OP_AARCH64_FUNC_3BUFF (max , u , 64 , 2 , uint , max )
343- #endif /* defined( GENERATE_SVE_CODE) */
350+ #endif /* GENERATE_SVE_CODE */
344351
345352 OP_AARCH64_FUNC_3BUFF (max , f , 32 , 4 , float , max )
346353 OP_AARCH64_FUNC_3BUFF (max , f , 64 , 2 , float , max )
@@ -356,10 +363,10 @@ static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPE
356363 OP_AARCH64_FUNC_3BUFF (min , u , 16 , 8 , uint , min )
357364 OP_AARCH64_FUNC_3BUFF (min , s , 32 , 4 , int , min )
358365 OP_AARCH64_FUNC_3BUFF (min , u , 32 , 4 , uint , min )
359- #if defined( GENERATE_SVE_CODE )
366+ #if GENERATE_SVE_CODE
360367 OP_AARCH64_FUNC_3BUFF (min , s , 64 , 2 , int , min )
361368 OP_AARCH64_FUNC_3BUFF (min , u , 64 , 2 , uint , min )
362- #endif /* defined( GENERATE_SVE_CODE) */
369+ #endif /* GENERATE_SVE_CODE */
363370
364371 OP_AARCH64_FUNC_3BUFF (min , f , 32 , 4 , float , min )
365372 OP_AARCH64_FUNC_3BUFF (min , f , 64 , 2 , float , min )
@@ -392,10 +399,10 @@ static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPE
392399 OP_AARCH64_FUNC_3BUFF (prod , u , 16 , 8 , uint , mul )
393400 OP_AARCH64_FUNC_3BUFF (prod , s , 32 , 4 , int , mul )
394401 OP_AARCH64_FUNC_3BUFF (prod , u , 32 , 4 , uint , mul )
395- #if defined( GENERATE_SVE_CODE )
402+ #if GENERATE_SVE_CODE
396403 OP_AARCH64_FUNC_3BUFF (prod , s , 64 , 2 , int , mul )
397404 OP_AARCH64_FUNC_3BUFF (prod , u , 64 , 2 , uint , mul )
398- #endif /* defined( GENERATE_SVE_CODE) */
405+ #endif /* GENERATE_SVE_CODE */
399406
400407 OP_AARCH64_FUNC_3BUFF (prod , f , 32 , 4 , float , mul )
401408 OP_AARCH64_FUNC_3BUFF (prod , f , 64 , 2 , float , mul )
@@ -482,17 +489,17 @@ static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPE
482489 /* Corresponds to MPI_MAX */
483490 [OMPI_OP_BASE_FORTRAN_MAX ] = {
484491 C_INTEGER_BASE (max , 2buff ),
485- #if defined( GENERATE_SVE_CODE )
492+ #if GENERATE_SVE_CODE
486493 C_INTEGER_EX (max , 2buff ),
487- #endif /* defined( GENERATE_SVE_CODE) */
494+ #endif /* GENERATE_SVE_CODE */
488495 FLOATING_POINT (max , 2buff ),
489496 },
490497 /* Corresponds to MPI_MIN */
491498 [OMPI_OP_BASE_FORTRAN_MIN ] = {
492499 C_INTEGER_BASE (min , 2buff ),
493- #if defined( GENERATE_SVE_CODE )
500+ #if GENERATE_SVE_CODE
494501 C_INTEGER_EX (min , 2buff ),
495- #endif /* defined( GENERATE_SVE_CODE) */
502+ #endif /* GENERATE_SVE_CODE */
496503 FLOATING_POINT (min , 2buff ),
497504 },
498505 /* Corresponds to MPI_SUM */
@@ -504,9 +511,9 @@ static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPE
504511 /* Corresponds to MPI_PROD */
505512 [OMPI_OP_BASE_FORTRAN_PROD ] = {
506513 C_INTEGER_BASE (prod , 2buff ),
507- #if defined( GENERATE_SVE_CODE )
514+ #if GENERATE_SVE_CODE
508515 C_INTEGER_EX (prod , 2buff ),
509- #endif /* defined( GENERATE_SVE_CODE) */
516+ #endif /* GENERATE_SVE_CODE */
510517 FLOATING_POINT (prod , 2buff ),
511518 },
512519 /* Corresponds to MPI_LAND */
@@ -558,17 +565,17 @@ static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPE
558565 /* Corresponds to MPI_MAX */
559566 [OMPI_OP_BASE_FORTRAN_MAX ] = {
560567 C_INTEGER_BASE (max , 3buff ),
561- #if defined( GENERATE_SVE_CODE )
568+ #if GENERATE_SVE_CODE
562569 C_INTEGER_EX (max , 3buff ),
563- #endif /* defined( GENERATE_SVE_CODE) */
570+ #endif /* GENERATE_SVE_CODE */
564571 FLOATING_POINT (max , 3buff ),
565572 },
566573 /* Corresponds to MPI_MIN */
567574 [OMPI_OP_BASE_FORTRAN_MIN ] = {
568575 C_INTEGER_BASE (min , 3buff ),
569- #if defined( GENERATE_SVE_CODE )
576+ #if GENERATE_SVE_CODE
570577 C_INTEGER_EX (min , 3buff ),
571- #endif /* defined( GENERATE_SVE_CODE) */
578+ #endif /* GENERATE_SVE_CODE */
572579 FLOATING_POINT (min , 3buff ),
573580 },
574581 /* Corresponds to MPI_SUM */
@@ -580,9 +587,9 @@ static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPE
580587 /* Corresponds to MPI_PROD */
581588 [OMPI_OP_BASE_FORTRAN_PROD ] = {
582589 C_INTEGER_BASE (prod , 3buff ),
583- #if defined( GENERATE_SVE_CODE )
590+ #if GENERATE_SVE_CODE
584591 C_INTEGER_EX (prod , 3buff ),
585- #endif /* defined( GENERATE_SVE_CODE) */
592+ #endif /* GENERATE_SVE_CODE */
586593 FLOATING_POINT (prod , 3buff ),
587594 },
588595 /* Corresponds to MPI_LAND */
0 commit comments