|
9 | 9 | #define mult_alpha(acc,alpha,...) "vfmadd213ps ("#__VA_ARGS__"),"#alpha","#acc";" |
10 | 10 | #endif |
11 | 11 |
|
12 | | -#if defined TRMMKERNEL && !defined LEFT |
| 12 | +#if defined(TRMMKERNEL) && !defined(LEFT) |
13 | 13 | #ifdef TRANSA |
14 | 14 | #define HEAD_SET_OFFSET(ndim) {} |
15 | 15 | #define TAIL_SET_OFFSET(ndim) {off+=ndim;} |
|
22 | 22 | #define TAIL_SET_OFFSET(ndim) {} |
23 | 23 | #endif |
24 | 24 |
|
25 | | -#if defined TRMMKERNEL && defined LEFT |
| 25 | +#if defined(TRMMKERNEL) && defined(LEFT) |
26 | 26 | #ifdef TRANSA |
27 | 27 | #define init_update_kskip(val) "subq $"#val",%%r13;" |
28 | 28 | #define save_update_kskip(val) "" |
|
37 | 37 |
|
38 | 38 | #ifdef TRMMKERNEL |
39 | 39 | #define init_set_k "movq %%r12,%4; subq %%r13,%4;" |
40 | | - #if (defined LEFT && !defined TRANSA) || (!defined LEFT && defined TRANSA) |
| 40 | + #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) |
41 | 41 | #define INIT_SET_KSKIP "movq %9,%%r13; salq $2,%%r13;" |
42 | 42 | #define init_set_pointers(a_copy,b_copy) "leaq (%0,%%r13,"#a_copy"),%0; leaq (%1,%%r13,"#b_copy"),%1;" |
43 | 43 | #define save_set_pointers(a_copy,b_copy) "" |
|
63 | 63 | #define save_set_pa_pb_n2(mdim) save_set_pointers(mdim,2) |
64 | 64 | #define save_set_pa_pb_n1(mdim) save_set_pointers(mdim,1) |
65 | 65 |
|
66 | | -#if defined TRMMKERNEL && !defined LEFT && defined TRANSA |
| 66 | +#if defined(TRMMKERNEL) && !defined(LEFT) && defined(TRANSA) |
67 | 67 | #define kernel_kstart_n8(mdim) \ |
68 | 68 | KERNEL_k1m##mdim##n4 KERNEL_k1m##mdim##n4 KERNEL_k1m##mdim##n4 KERNEL_k1m##mdim##n4 "subq $16,%4;" |
69 | 69 | #define kernel_kstart_n12(mdim) \ |
|
109 | 109 | unit_kernel_k1m8n4(%%ymm4,%%ymm5,%%ymm6,%%ymm7,16,24,%1)\ |
110 | 110 | unit_kernel_k1m8n4(%%ymm8,%%ymm9,%%ymm10,%%ymm11,16,24,%1,%%r12,4)\ |
111 | 111 | unit_kernel_k1m8n4(%%ymm12,%%ymm13,%%ymm14,%%ymm15,16,24,%1,%%r12,8) "addq $32,%1;" |
112 | | -#if defined TRMMKERNEL && !defined LEFT && !defined TRANSA |
| 112 | +#if defined(TRMMKERNEL) && !defined(LEFT) && !defined(TRANSA) |
113 | 113 | #define unit_kernel_endn4_k1m8n8(offa1,offb1,offb2) \ |
114 | 114 | "vmovsldup "#offa1"(%0),%%ymm1; vmovshdup "#offa1"(%0),%%ymm2;"\ |
115 | 115 | unit_kernel_k1m8n4(%%ymm8,%%ymm9,%%ymm10,%%ymm11,offb1,offb2,%1,%%r12,4) |
|
192 | 192 | #define KERNEL_k1m4n8 KERNEL_h_k1m4n8 "addq $16,%1;" |
193 | 193 | #define KERNEL_h_k1m4n12 KERNEL_h_k1m4n8 unit_kernel_k1m4n4(%%xmm12,%%xmm13,%%xmm14,%%xmm15,0,8,%1,%%r12,8) |
194 | 194 | #define KERNEL_k1m4n12 KERNEL_h_k1m4n12 "addq $16,%1;" |
195 | | -#if defined TRMMKERNEL && !defined LEFT && !defined TRANSA |
| 195 | +#if defined(TRMMKERNEL) && !defined(LEFT) && !defined(TRANSA) |
196 | 196 | #define unit_kernel_endn4_k1m4n8(offa1,offb1,offb2) \ |
197 | 197 | "vmovsldup "#offa1"(%0),%%xmm1; vmovshdup "#offa1"(%0),%%xmm2;"\ |
198 | 198 | unit_kernel_k1m4n4(%%xmm8,%%xmm9,%%xmm10,%%xmm11,offb1,offb2,%1,%%r12,4) |
|
285 | 285 | "vbroadcastss (%0),%%xmm10; vfmadd231ps %%xmm3,%%xmm10,%%xmm4; vfmadd231ps %%xmm2,%%xmm10,%%xmm6; vfmadd231ps %%xmm1,%%xmm10,%%xmm8;"\ |
286 | 286 | "vbroadcastss 4(%0),%%xmm10; vfmadd231ps %%xmm3,%%xmm10,%%xmm5; vfmadd231ps %%xmm2,%%xmm10,%%xmm7; vfmadd231ps %%xmm1,%%xmm10,%%xmm9;"\ |
287 | 287 | "addq $8,%0;" |
288 | | -#if defined TRMMKERNEL && !defined LEFT && !defined TRANSA |
| 288 | +#if defined(TRMMKERNEL) && !defined(LEFT) && !defined(TRANSA) |
289 | 289 | #define unit_kernel_endn4_k1m2n8(aoff1,aoff2,boff) \ |
290 | 290 | "vmovups "#boff"(%1,%%r12,4),%%xmm3;"\ |
291 | 291 | "vbroadcastss "#aoff1"(%0),%%xmm1; vfmadd231ps %%xmm3,%%xmm1,%%xmm6;"\ |
|
379 | 379 | "vmovups (%1),%%xmm3; vmovups (%1,%%r12,4),%%xmm2; vmovups (%1,%%r12,8),%%xmm1; addq $16,%1;"\ |
380 | 380 | "vbroadcastss (%0),%%xmm10; vfmadd231ps %%xmm3,%%xmm10,%%xmm4; vfmadd231ps %%xmm2,%%xmm10,%%xmm5; vfmadd231ps %%xmm1,%%xmm10,%%xmm6;"\ |
381 | 381 | "addq $4,%0;" |
382 | | -#if defined TRMMKERNEL && !defined LEFT && !defined TRANSA |
| 382 | +#if defined(TRMMKERNEL) && !defined(LEFT) && !defined(TRANSA) |
383 | 383 | #define unit_kernel_endn4_k1m1n8(aoff,boff) \ |
384 | 384 | "vmovups "#boff"(%1,%%r12,4),%%xmm3;"\ |
385 | 385 | "vbroadcastss "#aoff"(%0),%%xmm1; vfmadd231ps %%xmm3,%%xmm1,%%xmm5;" |
|
0 commit comments