@@ -70,15 +70,15 @@ var default_GPU_schedules = {
7070 } ,
7171 {
7272 command : "split" ,
73- parameters : [ "fpos" , "block" , "fpos1" , 2048 ]
73+ parameters : [ "fpos" , "block" , "fpos1" , 3584 ]
7474 } ,
7575 {
7676 command : "split" ,
77- parameters : [ "fpos1" , "warp" , "fpos2" , 256 ]
77+ parameters : [ "fpos1" , "warp" , "fpos2" , 224 ]
7878 } ,
7979 {
8080 command : "split" ,
81- parameters : [ "fpos2" , "thread" , "thr_nz" , 8 ]
81+ parameters : [ "fpos2" , "thread" , "thr_nz" , 7 ]
8282 } ,
8383 {
8484 command : "reorder" ,
@@ -90,7 +90,7 @@ var default_GPU_schedules = {
9090 } ,
9191 {
9292 command : "unroll" ,
93- parameters : [ "thr_nz_pre" , 8 ]
93+ parameters : [ "thr_nz_pre" , 7 ]
9494 } ,
9595 {
9696 command : "parallelize" ,
@@ -121,15 +121,15 @@ var default_GPU_schedules = {
121121 } ,
122122 {
123123 command : "split" ,
124- parameters : [ "fpos" , "block" , "fpos1" , 2048 ]
124+ parameters : [ "fpos" , "block" , "fpos1" , 256 ]
125125 } ,
126126 {
127127 command : "split" ,
128- parameters : [ "fpos1" , "warp" , "fpos2" , 256 ]
128+ parameters : [ "fpos1" , "warp" , "fpos2" , 16 ]
129129 } ,
130130 {
131131 command : "split" ,
132- parameters : [ "fpos2" , "thread" , "thr_nz" , 8 ]
132+ parameters : [ "fpos2" , "thread" , "thr_nz" , 1 ]
133133 } ,
134134 {
135135 command : "reorder" ,
@@ -141,7 +141,7 @@ var default_GPU_schedules = {
141141 } ,
142142 {
143143 command : "unroll" ,
144- parameters : [ "thr_nz_pre" , 8 ]
144+ parameters : [ "thr_nz_pre" , 1 ]
145145 } ,
146146 {
147147 command : "parallelize" ,
@@ -175,11 +175,11 @@ var default_GPU_schedules = {
175175 } ,
176176 {
177177 command : "split" ,
178- parameters : [ "fpos" , "block" , "fpos1" , 128 ]
178+ parameters : [ "fpos" , "block" , "fpos1" , 64 ]
179179 } ,
180180 {
181181 command : "split" ,
182- parameters : [ "fpos1" , "warp" , "nnz" , 16 ]
182+ parameters : [ "fpos1" , "warp" , "nnz" , 4 ]
183183 } ,
184184 {
185185 command : "split" ,
0 commit comments