11
2- var NNZ_PER_THREAD = 8 ;
3- var WARP_SIZE = 32 ;
4- var BLOCK_SIZE = 256 ;
5-
62var default_CPU_schedules = {
73 spmv : [
84 {
@@ -11,7 +7,6 @@ var default_CPU_schedules = {
117 } ,
128 {
139 command : "reorder" ,
14- numReordered : 3 ,
1510 parameters : [ "i0" , "i1" , "j" ]
1611 } ,
1712 {
@@ -35,7 +30,6 @@ var default_CPU_schedules = {
3530 } ,
3631 {
3732 command : "reorder" ,
38- numReordered : 3 ,
3933 parameters : [ "chunk" , "fpos2" , "k" ]
4034 } ,
4135 {
@@ -46,12 +40,11 @@ var default_CPU_schedules = {
4640 mttkrp : [
4741 {
4842 command : "reorder" ,
49- numReordered : 4 ,
5043 parameters : [ "i" , "k" , "l" , "j" ]
5144 } ,
5245 {
5346 command : "precompute" ,
54- parameters : [ "j" , "j" , " B(i,k,l) * D(l,j)"]
47+ parameters : [ "B(i,k,l) * D(l,j)" , "j" , "j "]
5548 } ,
5649 {
5750 command : "split" ,
@@ -64,6 +57,7 @@ var default_CPU_schedules = {
6457 ]
6558}
6659
60+
6761var default_GPU_schedules = {
6862 spmv : [
6963 {
@@ -76,28 +70,78 @@ var default_GPU_schedules = {
7670 } ,
7771 {
7872 command : "split" ,
79- parameters : [ "fpos" , "block" , "fpos1" , NNZ_PER_THREAD * BLOCK_SIZE ]
73+ parameters : [ "fpos" , "block" , "fpos1" , 2048 ]
8074 } ,
8175 {
8276 command : "split" ,
83- parameters : [ "fpos1" , "warp" , "fpos2" , NNZ_PER_THREAD * WARP_SIZE ]
77+ parameters : [ "fpos1" , "warp" , "fpos2" , 216 ]
8478 } ,
8579 {
8680 command : "split" ,
87- parameters : [ "fpos2" , "thread" , "thr_nz" , NNZ_PER_THREAD ]
81+ parameters : [ "fpos2" , "thread" , "thr_nz" , 8 ]
8882 } ,
8983 {
9084 command : "reorder" ,
91- numReordered : 4 ,
9285 parameters : [ "block" , "warp" , "thread" , "thr_nz" ]
9386 } ,
9487 {
9588 command : "precompute" ,
96- parameters : [ "thr_nz" , "thr_nz_pre" , "A(i, j) * x(j)" ]
89+ parameters : [ "A(i, j) * x(j)" , "thr_nz" , "thr_nz_pre" ]
90+ } ,
91+ {
92+ command : "unroll" ,
93+ parameters : [ "thr_nz_pre" , 8 ]
94+ } ,
95+ {
96+ command : "parallelize" ,
97+ parameters : [ "block" , "GPU Block" , "Ignore Races" ]
98+ } ,
99+ {
100+ command : "parallelize" ,
101+ parameters : [ "warp" , "GPU Warp" , "Ignore Races" ]
102+ } ,
103+ {
104+ command : "parallelize" ,
105+ parameters : [ "thread" , "GPU Thread" , "Atomics" ]
106+ }
107+ ] ,
108+ add : [ ] ,
109+ ttv : [
110+ {
111+ command : "fuse" ,
112+ parameters : [ "j" , "k" , "jk" ]
113+ } ,
114+ {
115+ command : "fuse" ,
116+ parameters : [ "i" , "jk" , "f" ]
117+ } ,
118+ {
119+ command : "pos" ,
120+ parameters : [ "f" , "fpos" , "B" ]
121+ } ,
122+ {
123+ command : "split" ,
124+ parameters : [ "fpos" , "block" , "fpos1" , 2048 ]
125+ } ,
126+ {
127+ command : "split" ,
128+ parameters : [ "fpos1" , "warp" , "fpos2" , 256 ]
129+ } ,
130+ {
131+ command : "split" ,
132+ parameters : [ "fpos2" , "thread" , "thr_nz" , 8 ]
133+ } ,
134+ {
135+ command : "reorder" ,
136+ parameters : [ "block" , "warp" , "thread" , "thr_nz" ]
137+ } ,
138+ {
139+ command : "precompute" ,
140+ parameters : [ "B(i, j, k) * c(k)" , "thr_nz" , "thr_nz_pre" ]
97141 } ,
98142 {
99143 command : "unroll" ,
100- parameters : [ "thr_nz_pre" , NNZ_PER_THREAD ]
144+ parameters : [ "thr_nz_pre" , 8 ]
101145 } ,
102146 {
103147 command : "parallelize" ,
@@ -111,5 +155,55 @@ var default_GPU_schedules = {
111155 command : "parallelize" ,
112156 parameters : [ "thread" , "GPU Thread" , "Atomics" ]
113157 }
114- ]
158+ ] ,
159+ mttkrp : [
160+ {
161+ command : "reorder" ,
162+ parameters : [ "i" , "k" , "l" , "j" ]
163+ } ,
164+ {
165+ command : "fuse" ,
166+ parameters : [ "k" , "l" , "kl" ]
167+ } ,
168+ {
169+ command : "fuse" ,
170+ parameters : [ "i" , "kl" , "f" ]
171+ } ,
172+ {
173+ command : "pos" ,
174+ parameters : [ "f" , "fpos" , "B" ]
175+ } ,
176+ {
177+ command : "split" ,
178+ parameters : [ "fpos" , "block" , "fpos1" , 128 ]
179+ } ,
180+ {
181+ command : "split" ,
182+ parameters : [ "fpos1" , "warp" , "nnz" , 16 ]
183+ } ,
184+ {
185+ command : "split" ,
186+ parameters : [ "j" , "dense_un" , "thread" , 32 ]
187+ } ,
188+ {
189+ command : "bound" ,
190+ parameters : [ "dense_un" , "dense_val" , 1 , "Max Exact" ]
191+ } ,
192+ {
193+ command : "reorder" ,
194+ parameters : [ "block" , "warp" , "dense_val" , "thread" , "nnz" ]
195+ } ,
196+ {
197+ command : "parallelize" ,
198+ parameters : [ "block" , "GPU Block" , "Ignore Races" ]
199+ } ,
200+ {
201+ command : "parallelize" ,
202+ parameters : [ "warp" , "GPU Warp" , "Ignore Races" ]
203+ } ,
204+ {
205+ command : "parallelize" ,
206+ parameters : [ "thread" , "GPU Thread" , "Atomics" ]
207+ }
208+ ] ,
115209}
0 commit comments