@@ -134,7 +134,7 @@ mp_err s_mp_faster_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *w
134134
135135 /* Cutoff at about twice the size of P[0]. */
136136 /* TODO: Check if it makes sense to make it tunable. */
137- if (ilog2a < (2 * k )) {
137+ if (ilog2a < (2 * k * MP_RADIX_BARRETT_START_MULTIPLICATOR )) {
138138 if ((err = s_mp_slower_to_radix (a , sptr , & part_maxlen , & part_written , radix , false)) != MP_OKAY ) goto LTM_ERR ;
139139 /* part_written does not count EOS */
140140 * written = part_written + 1 ;
@@ -165,8 +165,18 @@ mp_err s_mp_faster_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *w
165165 with R_0 = (2^(2*k))/b^y and k = ceil(log_2(n)) as computed above.
166166 */
167167
168+ /* To get the tree a bit flatter. Alternative: do it iteratively instead of recursively */
169+ k = k * MP_RADIX_BARRETT_START_MULTIPLICATOR ;
170+
171+
168172 /* Compute initial reciprocal R[0] and expand it (R[0]^(2^k) */
169173 if ((err = mp_init_i32 (& P [0 ], n )) != MP_OKAY ) goto LTM_ERR ;
174+ /* TODO: chunksize does not seem to matter much above the initial b^y, d.n.f.t. remove this line if
175+ MP_RADIX_BARRETT_START_MULTIPLICATOR is removed but don't forget the possibility that
176+ the OS does not like too many recursions. This routine does use a lot of stack
177+ and it calls other D&C algorithms (fast multiplication, fast division) that need a little
178+ slice of the stack, too (vid.: ulimit -s) */
179+ if ((err = mp_expt_n (& P [0 ], MP_RADIX_BARRETT_START_MULTIPLICATOR , & P [0 ])) != MP_OKAY ) goto LTM_ERR ;
170180 if ((err = mp_init (& R [0 ])) != MP_OKAY ) goto LTM_ERR ;
171181 if ((err = mp_2expt (& R [0 ], 2 * k )) != MP_OKAY ) goto LTM_ERR ;
172182 if ((err = mp_div (& R [0 ], & P [0 ], & R [0 ], NULL )) != MP_OKAY ) goto LTM_ERR ;
0 commit comments