11// Generated by the Tensor Algebra Compiler (tensor-compiler.org)
2- /* init_alloc_size should be initialized to a power of two */
3- int32_t init_alloc_size = 1048576 ;
4- allocate A2_pos [init_alloc_size ]
5- allocate A2_idx [init_alloc_size ]
6- A2_pos [0 ] = 0 ;
7-
8- int32_t pA1 = 0 ;
9- int32_t pA2 = A2_pos [pA1 ];
10- for (int32_t iB = 0 ; iB < B1_size ; iB ++ ) {
11- int32_t pB2 = B2_pos [iB ];
12- int32_t pC2 = C2_pos [iB ];
13- while ((pB2 < B2_pos [iB + 1 ]) && (pC2 < C2_pos [iB + 1 ])) {
14- int32_t jB = B2_idx [pB2 ];
15- int32_t jC = C2_idx [pC2 ];
16- int32_t j = min (jB , jC );
17- if ((jB == j ) && (jC == j )) {
18- A2_idx [pA2 ] = j ;
19- pA2 ++ ;
20- if ((0 == ((pA2 + 1 ) & pA2 )) && (init_alloc_size <= (pA2 + 1 )))
21- reallocate A2_idx [(2 * (pA2 + 1 ))]
2+ // taco "A(i,j)=B(i,j)+C(i,j)" -f=A:ds:0,1 -f=B:ds:0,1 -f=C:ds:0,1 -write-source=taco_kernel.c -write-compute=taco_compute.c -write-assembly=taco_assembly.c
3+
4+ int assemble (taco_tensor_t * A , taco_tensor_t * B , taco_tensor_t * C ) {
5+ int A1_dimension = (int )(A -> dimensions [0 ]);
6+ int * restrict A2_pos = (int * )(A -> indices [1 ][0 ]);
7+ int * restrict A2_crd = (int * )(A -> indices [1 ][1 ]);
8+ double * restrict A_vals = (double * )(A -> vals );
9+ int B1_dimension = (int )(B -> dimensions [0 ]);
10+ int * restrict B2_pos = (int * )(B -> indices [1 ][0 ]);
11+ int * restrict B2_crd = (int * )(B -> indices [1 ][1 ]);
12+ int C1_dimension = (int )(C -> dimensions [0 ]);
13+ int * restrict C2_pos = (int * )(C -> indices [1 ][0 ]);
14+ int * restrict C2_crd = (int * )(C -> indices [1 ][1 ]);
15+
16+ A2_pos = (int32_t * )malloc (sizeof (int32_t ) * (A1_dimension + 1 ));
17+ A2_pos [0 ] = 0 ;
18+ for (int32_t pA2 = 1 ; pA2 < (A1_dimension + 1 ); pA2 ++ ) {
19+ A2_pos [pA2 ] = 0 ;
20+ }
21+ int32_t A2_crd_size = 1048576 ;
22+ A2_crd = (int32_t * )malloc (sizeof (int32_t ) * A2_crd_size );
23+ int32_t jA = 0 ;
24+
25+ for (int32_t i = 0 ; i < C1_dimension ; i ++ ) {
26+ int32_t pA2_begin = jA ;
27+
28+ int32_t jB = B2_pos [i ];
29+ int32_t pB2_end = B2_pos [(i + 1 )];
30+ int32_t jC = C2_pos [i ];
31+ int32_t pC2_end = C2_pos [(i + 1 )];
32+
33+ while (jB < pB2_end && jC < pC2_end ) {
34+ int32_t jB0 = B2_crd [jB ];
35+ int32_t jC0 = C2_crd [jC ];
36+ int32_t j = TACO_MIN (jB0 ,jC0 );
37+ if (jB0 == j && jC0 == j ) {
38+ if (A2_crd_size <= jA ) {
39+ A2_crd = (int32_t * )realloc (A2_crd , sizeof (int32_t ) * (A2_crd_size * 2 ));
40+ A2_crd_size *= 2 ;
41+ }
42+ A2_crd [jA ] = j ;
43+ jA ++ ;
44+ }
45+ else if (jB0 == j ) {
46+ if (A2_crd_size <= jA ) {
47+ A2_crd = (int32_t * )realloc (A2_crd , sizeof (int32_t ) * (A2_crd_size * 2 ));
48+ A2_crd_size *= 2 ;
49+ }
50+ A2_crd [jA ] = j ;
51+ jA ++ ;
52+ }
53+ else {
54+ if (A2_crd_size <= jA ) {
55+ A2_crd = (int32_t * )realloc (A2_crd , sizeof (int32_t ) * (A2_crd_size * 2 ));
56+ A2_crd_size *= 2 ;
57+ }
58+ A2_crd [jA ] = j ;
59+ jA ++ ;
60+ }
61+ jB += (int32_t )(jB0 == j );
62+ jC += (int32_t )(jC0 == j );
2263 }
23- else if (jB == j ) {
24- A2_idx [pA2 ] = j ;
25- pA2 ++ ;
26- if ((0 == ((pA2 + 1 ) & pA2 )) && (init_alloc_size <= (pA2 + 1 )))
27- reallocate A2_idx [(2 * (pA2 + 1 ))]
64+ while (jB < pB2_end ) {
65+ int32_t j = B2_crd [jB ];
66+ if (A2_crd_size <= jA ) {
67+ A2_crd = (int32_t * )realloc (A2_crd , sizeof (int32_t ) * (A2_crd_size * 2 ));
68+ A2_crd_size *= 2 ;
69+ }
70+ A2_crd [jA ] = j ;
71+ jA ++ ;
72+ jB ++ ;
2873 }
29- else {
30- A2_idx [pA2 ] = j ;
31- pA2 ++ ;
32- if ((0 == ((pA2 + 1 ) & pA2 )) && (init_alloc_size <= (pA2 + 1 )))
33- reallocate A2_idx [(2 * (pA2 + 1 ))]
74+ while (jC < pC2_end ) {
75+ int32_t j = C2_crd [jC ];
76+ if (A2_crd_size <= jA ) {
77+ A2_crd = (int32_t * )realloc (A2_crd , sizeof (int32_t ) * (A2_crd_size * 2 ));
78+ A2_crd_size *= 2 ;
79+ }
80+ A2_crd [jA ] = j ;
81+ jA ++ ;
82+ jC ++ ;
3483 }
35- if (jB == j ) pB2 ++ ;
36- if (jC == j ) pC2 ++ ;
37- }
38- while (pB2 < B2_pos [iB + 1 ]) {
39- int32_t jB0 = B2_idx [pB2 ];
40- A2_idx [pA2 ] = jB0 ;
41- pA2 ++ ;
42- if ((0 == ((pA2 + 1 ) & pA2 )) && (init_alloc_size <= (pA2 + 1 )))
43- reallocate A2_idx [(2 * (pA2 + 1 ))]
44- pB2 ++ ;
84+
85+ A2_pos [i + 1 ] = jA - pA2_begin ;
4586 }
46- while (pC2 < C2_pos [iB + 1 ]) {
47- int32_t jC0 = C2_idx [pC2 ];
48- A2_idx [pA2 ] = jC0 ;
49- pA2 ++ ;
50- if ((0 == ((pA2 + 1 ) & pA2 )) && (init_alloc_size <= (pA2 + 1 )))
51- reallocate A2_idx [(2 * (pA2 + 1 ))]
52- pC2 ++ ;
87+
88+ int32_t csA2 = 0 ;
89+ for (int32_t pA20 = 1 ; pA20 < (A1_dimension + 1 ); pA20 ++ ) {
90+ csA2 += A2_pos [pA20 ];
91+ A2_pos [pA20 ] = csA2 ;
5392 }
54- A2_pos [(iB + 1 )] = pA2 ;
55- }
5693
57- allocate A_vals [pA2 ]
94+ A_vals = (double * )malloc (sizeof (double ) * jA );
95+
96+ A -> indices [1 ][0 ] = (uint8_t * )(A2_pos );
97+ A -> indices [1 ][1 ] = (uint8_t * )(A2_crd );
98+ A -> vals = (uint8_t * )A_vals ;
99+ return 0 ;
100+ }
0 commit comments