@@ -21,14 +21,14 @@ def make_declare(loop_orders, dtypes, sub, compute_stride_jump=True):
2121 # the number of elements in that dimension,
2222 # the stride in that dimension,
2323 # and the jump from an iteration to the next
24- decl += f"npy_intp { var } _n{ value } ;\n ssize_t { var } _stride{ value } ;\n "
24+ decl += f"npy_intp { var } _n{ value } ;\n npy_intp { var } _stride{ value } ;\n "
2525 if compute_stride_jump :
26- decl += f"int { var } _jump{ value } _{ j } ;\n "
26+ decl += f"npy_intp { var } _jump{ value } _{ j } ;\n "
2727
2828 elif compute_stride_jump :
2929 # if the dimension is broadcasted, we only need
3030 # the jump (arbitrary length and stride = 0)
31- decl += f"int { var } _jump{ value } _{ j } ;\n "
31+ decl += f"npy_intp { var } _jump{ value } _{ j } ;\n "
3232
3333 return decl
3434
@@ -257,7 +257,7 @@ def loop_over(preloop, code, indices, i):
257257 forloop = f"""#pragma omp parallel for if( { suitable_n } >={ openmp_elemwise_minsize } )\n """
258258 else :
259259 forloop = ""
260- forloop += f"""for (int { iterv } = 0; { iterv } <{ suitable_n } ; { iterv } ++)"""
260+ forloop += f"""for (npy_intp { iterv } = 0; { iterv } <{ suitable_n } ; { iterv } ++)"""
261261 return f"""
262262 { preloop }
263263 { forloop } {{
@@ -317,8 +317,8 @@ def make_reordered_loop(
317317 # The first element of each pair is the absolute value of the stride
318318 # The second element correspond to the index in the initial loop order
319319 order_loops = f"""
320- std::vector< std::pair<int, int > > { ovar } _loops({ nnested } );
321- std::vector< std::pair<int, int > >::iterator { ovar } _loops_it = { ovar } _loops.begin();
320+ std::vector< std::pair<int, npy_intp > > { ovar } _loops({ nnested } );
321+ std::vector< std::pair<int, npy_intp > >::iterator { ovar } _loops_it = { ovar } _loops.begin();
322322 """
323323
324324 # Fill the loop vector with the appropriate <stride, index> pairs
@@ -347,7 +347,7 @@ def make_reordered_loop(
347347 """
348348
349349 # Get the (sorted) total number of iterations of each loop
350- declare_totals = f"int init_totals[{ nnested } ];\n "
350+ declare_totals = f"npy_intp init_totals[{ nnested } ];\n "
351351 declare_totals += compute_output_dims_lengths ("init_totals" , init_loop_orders , sub )
352352
353353 # Sort totals to match the new order that was computed by sorting
@@ -358,7 +358,7 @@ def make_reordered_loop(
358358
359359 for i in range (nnested ):
360360 declare_totals += f"""
361- int TOTAL_{ i } = init_totals[{ ovar } _loops_it->second];
361+ npy_intp TOTAL_{ i } = init_totals[{ ovar } _loops_it->second];
362362 ++{ ovar } _loops_it;
363363 """
364364
@@ -389,14 +389,14 @@ def get_loop_strides(loop_order, i):
389389 )
390390
391391 declare_strides = f"""
392- int init_strides[{ nvars } ][{ nnested } ] = {{
392+ npy_intp init_strides[{ nvars } ][{ nnested } ] = {{
393393 { strides }
394394 }};"""
395395
396396 # Declare (sorted) stride and for each variable
397397 # we iterate from innermost loop to outermost loop
398398 declare_strides += f"""
399- std::vector< std::pair<int, int > >::reverse_iterator { ovar } _loops_rit;
399+ std::vector< std::pair<int, npy_intp > >::reverse_iterator { ovar } _loops_rit;
400400 """
401401
402402 for i in range (nvars ):
@@ -405,7 +405,7 @@ def get_loop_strides(loop_order, i):
405405 { ovar } _loops_rit = { ovar } _loops.rbegin();"""
406406 for j in reversed (range (nnested )):
407407 declare_strides += f"""
408- int { var } _stride_l{ j } = init_strides[{ i } ][{ ovar } _loops_rit->second];
408+ npy_intp { var } _stride_l{ j } = init_strides[{ i } ][{ ovar } _loops_rit->second];
409409 ++{ ovar } _loops_rit;
410410 """
411411
@@ -436,7 +436,7 @@ def get_loop_strides(loop_order, i):
436436 if openmp :
437437 openmp_elemwise_minsize = config .openmp_elemwise_minsize
438438 forloop += f"""#pragma omp parallel for if( { total } >={ openmp_elemwise_minsize } )\n """
439- forloop += f"for(int { iterv } = 0; { iterv } <{ total } ; { iterv } ++)"
439+ forloop += f"for(npy_intp { iterv } = 0; { iterv } <{ total } ; { iterv } ++)"
440440
441441 loop = f"""
442442 { forloop }
@@ -596,14 +596,14 @@ def make_reordered_loop_careduce(
596596 if (PyArray_SIZE(inp) == 0) {
597597 acc_iter = (npy_float64*)(PyArray_DATA(acc));
598598 int_n = PyArray_SIZE(acc);
599- for(int i = 0; i < n; i++)
599+ for(npy_intp i = 0; i < n; i++)
600600 {
601601 npy_float64 &acc_i = acc_iter[i];
602602 acc_i = 0;
603603 }
604604 } else {
605- std::vector< std::pair<int, int > > loops(2);
606- std::vector< std::pair<int, int > >::iterator loops_it = loops.begin();
605+ std::vector< std::pair<int, npy_intp > > loops(2);
606+ std::vector< std::pair<int, npy_intp > >::iterator loops_it = loops.begin();
607607
608608 loops_it->first = abs(PyArray_STRIDES(inp)[0]);
609609 loops_it->second = 0;
@@ -613,28 +613,28 @@ def make_reordered_loop_careduce(
613613 ++loops_it;
614614 std::sort(loops.rbegin(), loops.rend());
615615
616- int dim_lengths[2] = {inp_n0, inp_n1};
617- int inp_strides[2] = {inp_stride0, inp_stride1};
618- int acc_strides[2] = {acc_stride0, 0};
616+ npy_intp dim_lengths[2] = {inp_n0, inp_n1};
617+ npy_intp inp_strides[2] = {inp_stride0, inp_stride1};
618+ npy_intp acc_strides[2] = {acc_stride0, 0};
619619 bool reduction_axes[2] = {0, 1};
620620
621621 loops_it = loops.begin();
622- int dim_length_0 = dim_lengths[loops_it->second];
623- int is_reduction_axis_0 = reduction_axes[loops_it->second];
624- int inp_stride_0 = inp_strides[loops_it->second];
625- int acc_stride_0 = acc_strides[loops_it->second];
622+ npy_intp dim_length_0 = dim_lengths[loops_it->second];
623+ bool is_reduction_axis_0 = reduction_axes[loops_it->second];
624+ npy_intp inp_stride_0 = inp_strides[loops_it->second];
625+ npy_intp acc_stride_0 = acc_strides[loops_it->second];
626626 ++loops_it;
627- int dim_length_1 = dim_lengths[loops_it->second];
628- int is_reduction_axis_1 = reduction_axes[loops_it->second];
629- int inp_stride_1 = inp_strides[loops_it->second];
630- int acc_stride_1 = acc_strides[loops_it->second];
627+ npy_intp dim_length_1 = dim_lengths[loops_it->second];
628+ bool is_reduction_axis_1 = reduction_axes[loops_it->second];
629+ npy_intp inp_stride_1 = inp_strides[loops_it->second];
630+ npy_intp acc_stride_1 = acc_strides[loops_it->second];
631631 ++loops_it;
632632
633633 inp_iter = (npy_float64*)(PyArray_DATA(inp));
634634 acc_iter = (npy_float64*)(PyArray_DATA(acc));
635635
636- for(int iter_0 = 0; iter_0<dim_length_0; iter_0++){
637- for(int iter_1 = 0; iter_1<dim_length_1; iter_1++){
636+ for(npy_intp iter_0 = 0; iter_0<dim_length_0; iter_0++){
637+ for(npy_intp iter_1 = 0; iter_1<dim_length_1; iter_1++){
638638 npy_float64 &inp_i = *(inp_iter + inp_stride_1*iter_1 + inp_stride_0*iter_0);
639639 npy_float64 &acc_i = *(acc_iter + acc_stride_1*iter_1 + acc_stride_0*iter_0);
640640
@@ -654,8 +654,8 @@ def make_reordered_loop_careduce(
654654 // Special case for empty inputs
655655 if (PyArray_SIZE({ inp_var } ) == 0) {{
656656 { acc_var } _iter = ({ acc_dtype } *)(PyArray_DATA({ acc_var } ));
657- int n = PyArray_SIZE({ acc_var } );
658- for(int i = 0; i < n; i++)
657+ npy_intp n = PyArray_SIZE({ acc_var } );
658+ for(npy_intp i = 0; i < n; i++)
659659 {{
660660 { acc_dtype } &{ acc_var } _i = { acc_var } _iter[i];
661661 { initial_value }
@@ -669,8 +669,8 @@ def make_reordered_loop_careduce(
669669 # The second element correspond to the index in the initial loop order
670670 order_loops = dedent (
671671 f"""
672- std::vector< std::pair<int, int > > loops({ inp_ndim } );
673- std::vector< std::pair<int, int > >::iterator loops_it = loops.begin();
672+ std::vector< std::pair<int, npy_intp > > loops({ inp_ndim } );
673+ std::vector< std::pair<int, npy_intp > >::iterator loops_it = loops.begin();
674674 """
675675 )
676676
@@ -691,9 +691,9 @@ def make_reordered_loop_careduce(
691691 counter = iter (range (inp_ndim ))
692692 unsorted_vars = dedent (
693693 f"""
694- int dim_lengths[{ inp_ndim } ] = {{{ ',' .join (f'{ inp_var } _n{ i } ' for i in range (inp_ndim ))} }};
695- int inp_strides[{ inp_ndim } ] = {{{ ',' .join (f'{ inp_var } _stride{ i } ' for i in range (inp_ndim ))} }};
696- int acc_strides[{ inp_ndim } ] = {{{ ',' .join ("0" if i in reduction_axes else f'{ acc_var } _stride{ next (counter )} ' for i in range (inp_ndim ))} }};
694+ npy_intp dim_lengths[{ inp_ndim } ] = {{{ ',' .join (f'{ inp_var } _n{ i } ' for i in range (inp_ndim ))} }};
695+ npy_intp inp_strides[{ inp_ndim } ] = {{{ ',' .join (f'{ inp_var } _stride{ i } ' for i in range (inp_ndim ))} }};
696+ npy_intp acc_strides[{ inp_ndim } ] = {{{ ',' .join ("0" if i in reduction_axes else f'{ acc_var } _stride{ next (counter )} ' for i in range (inp_ndim ))} }};
697697 bool reduction_axes[{ inp_ndim } ] = {{{ ', ' .join ("1" if i in reduction_axes else "0" for i in range (inp_ndim ))} }};\n
698698 """
699699 )
@@ -702,10 +702,10 @@ def make_reordered_loop_careduce(
702702 for i in range (inp_ndim ):
703703 sorted_vars += dedent (
704704 f"""
705- int dim_length_{ i } = dim_lengths[loops_it->second];
706- int is_reduction_axis_{ i } = reduction_axes[loops_it->second];
707- int { inp_var } _stride_{ i } = inp_strides[loops_it->second];
708- int { acc_var } _stride_{ i } = acc_strides[loops_it->second];
705+ npy_intp dim_length_{ i } = dim_lengths[loops_it->second];
706+ bool is_reduction_axis_{ i } = reduction_axes[loops_it->second];
707+ npy_intp { inp_var } _stride_{ i } = inp_strides[loops_it->second];
708+ npy_intp { acc_var } _stride_{ i } = acc_strides[loops_it->second];
709709 ++loops_it;
710710 """
711711 )
@@ -748,7 +748,7 @@ def make_reordered_loop_careduce(
748748 dim_length = f"dim_length_{ i } "
749749 loop = dedent (
750750 f"""
751- for(int { iter_var } = 0; { iter_var } <{ dim_length } ; { iter_var } ++){{
751+ for(npy_intp { iter_var } = 0; { iter_var } <{ dim_length } ; { iter_var } ++){{
752752 { loop }
753753 }}
754754 """
0 commit comments