Skip to content

Commit 0c49bb3

Browse files
committed
Tuning for radix conversion enabled
1 parent afe4d04 commit 0c49bb3

File tree

11 files changed

+260
-58
lines changed

11 files changed

+260
-58
lines changed

etc/tune.c

Lines changed: 197 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,159 @@ static uint64_t s_time_sqr(int size)
148148
return t1;
149149
}
150150

151+
/* Set cutoff for radix conversion (base 10 only for now but should be good enough) */
152+
#include <stdlib.h>
153+
static mp_err random_number(char **string, size_t length)
154+
{
155+
char alphabet[] = "0123456789", *str_cpy;
156+
157+
*string = malloc(length + 1);
158+
if (*string == NULL) {
159+
return MP_MEM;
160+
}
161+
str_cpy = *string;
162+
/* No leading zeros */
163+
do {
164+
*str_cpy = alphabet[rand() % 10];
165+
} while (*str_cpy == '0');
166+
length--;
167+
str_cpy++;
168+
169+
do {
170+
*str_cpy = alphabet[rand() % 10];
171+
str_cpy++;
172+
} while (--length > 0);
173+
174+
*str_cpy = '\0';
175+
176+
return MP_OKAY;
177+
}
178+
179+
#include <string.h>
180+
static uint64_t s_time_radix_conversion_read(int size)
181+
{
182+
int x;
183+
size_t length;
184+
size_t written;
185+
mp_err err;
186+
mp_int a;
187+
char *str_a, *str_b;
188+
uint64_t t1;
189+
190+
/* "size" is given as "number of limbs" and starts at 8 */
191+
length = ((size_t)size - 7u) * MP_DIGIT_BIT;
192+
193+
/* Over-estimate number of base 10 digits */
194+
/* TODO: can overflow with small INT_MAX */
195+
length = (length * 28u) / 93u + 2u;
196+
197+
if ((err = random_number(&str_a, length)) != MP_OKAY) {
198+
t1 = UINT64_MAX;
199+
goto LBL_ERR_1;
200+
}
201+
202+
if ((err = mp_init(&a)) != MP_OKAY) {
203+
t1 = UINT64_MAX;
204+
goto LBL_ERR_2;
205+
}
206+
s_timer_start();
207+
for (x = 0; x < s_number_of_test_loops; x++) {
208+
if ((err = mp_read_radix(&a, str_a, 10)) != MP_OKAY) {
209+
t1 = UINT64_MAX;
210+
goto LBL_ERR_3;
211+
}
212+
}
213+
t1 = s_timer_stop();
214+
215+
if ((err = mp_radix_size(&a, 10, &length)) != MP_OKAY) {
216+
t1 = UINT64_MAX;
217+
goto LBL_ERR_3;
218+
}
219+
220+
str_b = malloc(length + 1);
221+
if (str_b == NULL) {
222+
t1 = UINT64_MAX;
223+
goto LBL_ERR_3;
224+
}
225+
if ((err = mp_to_radix(&a, str_b, length, &written, 10)) != MP_OKAY) {
226+
t1 = UINT64_MAX;
227+
goto LBL_ERR;
228+
}
229+
230+
if (strcmp(str_a, str_b) != 0) {
231+
t1 = 0u;
232+
goto LBL_ERR;
233+
}
234+
235+
LBL_ERR:
236+
free(str_b);
237+
LBL_ERR_3:
238+
mp_clear(&a);
239+
LBL_ERR_2:
240+
free(str_a);
241+
LBL_ERR_1:
242+
return t1;
243+
}
244+
245+
static uint64_t s_time_radix_conversion_write(int size)
246+
{
247+
int x;
248+
size_t written, length;
249+
mp_err err;
250+
mp_int a, b;
251+
char *str_a;
252+
uint64_t t1;
253+
254+
255+
if ((err = mp_init_multi(&a, &b, NULL)) != MP_OKAY) {
256+
t1 = UINT64_MAX;
257+
goto LBL_ERR_1;
258+
}
259+
if ((err = mp_rand(&a, size)) != MP_OKAY) {
260+
t1 = UINT64_MAX;
261+
goto LBL_ERR_2;
262+
}
263+
264+
if ((err = mp_radix_size(&a, 10, &length)) != MP_OKAY) {
265+
t1 = UINT64_MAX;
266+
goto LBL_ERR_2;
267+
}
268+
269+
str_a = malloc(length + 1);
270+
if (str_a == NULL) {
271+
t1 = UINT64_MAX;
272+
goto LBL_ERR_2;
273+
}
274+
275+
s_timer_start();
276+
for (x = 0; x < s_number_of_test_loops; x++) {
277+
if ((err = mp_to_radix(&a, str_a, length, &written, 10)) != MP_OKAY) {
278+
t1 = UINT64_MAX;
279+
goto LBL_ERR_2;
280+
}
281+
}
282+
t1 = s_timer_stop();
283+
284+
if ((err = mp_read_radix(&b, str_a, 10)) != MP_OKAY) {
285+
t1 = UINT64_MAX;
286+
goto LBL_ERR;
287+
}
288+
289+
if (mp_cmp(&a, &b) != MP_EQ) {
290+
t1 = 0u;
291+
goto LBL_ERR;
292+
}
293+
294+
295+
LBL_ERR:
296+
free(str_a);
297+
LBL_ERR_2:
298+
mp_clear_multi(&a, &b, NULL);
299+
LBL_ERR_1:
300+
return t1;
301+
}
302+
303+
151304
struct tune_args {
152305
int testmode;
153306
int verbose;
@@ -238,11 +391,13 @@ static void s_usage(char *s)
238391
fprintf(stderr," (Not for computing the cut-offs!)\n");
239392
fprintf(stderr," -s 'preset' use values in 'preset' for printing.\n");
240393
fprintf(stderr," 'preset' is a comma separated string with cut-offs for\n");
241-
fprintf(stderr," ksm, kss, tc3m, tc3s in that order\n");
394+
fprintf(stderr," ksm, kss, tc3m, tc3s, rcr, rcw in that order\n");
242395
fprintf(stderr," ksm = karatsuba multiplication\n");
243396
fprintf(stderr," kss = karatsuba squaring\n");
244397
fprintf(stderr," tc3m = Toom-Cook 3-way multiplication\n");
245398
fprintf(stderr," tc3s = Toom-Cook 3-way squaring\n");
399+
fprintf(stderr," rcr = Fast radix conversion, reading\n");
400+
fprintf(stderr," rcw = Fast radix conversion, writing\n");
246401
fprintf(stderr," Implies '-p'\n");
247402
fprintf(stderr," -h this message\n");
248403
exit(s_exit_code);
@@ -251,17 +406,20 @@ static void s_usage(char *s)
251406
struct cutoffs {
252407
int MUL_KARATSUBA, SQR_KARATSUBA;
253408
int MUL_TOOM, SQR_TOOM;
409+
int RADIX_READ, RADIX_WRITE;
254410
};
255411

256412
const struct cutoffs max_cutoffs =
257-
{ INT_MAX, INT_MAX, INT_MAX, INT_MAX };
413+
{ INT_MAX, INT_MAX, INT_MAX, INT_MAX,INT_MAX, INT_MAX };
258414

259415
static void set_cutoffs(const struct cutoffs *c)
260416
{
261417
MP_MUL_KARATSUBA_CUTOFF = c->MUL_KARATSUBA;
262418
MP_SQR_KARATSUBA_CUTOFF = c->SQR_KARATSUBA;
263419
MP_MUL_TOOM_CUTOFF = c->MUL_TOOM;
264420
MP_SQR_TOOM_CUTOFF = c->SQR_TOOM;
421+
MP_RADIX_READ_CUTOFF = c->RADIX_READ;
422+
MP_RADIX_WRITE_CUTOFF = c->RADIX_WRITE;
265423
}
266424

267425
static void get_cutoffs(struct cutoffs *c)
@@ -270,7 +428,8 @@ static void get_cutoffs(struct cutoffs *c)
270428
c->SQR_KARATSUBA = MP_SQR_KARATSUBA_CUTOFF;
271429
c->MUL_TOOM = MP_MUL_TOOM_CUTOFF;
272430
c->SQR_TOOM = MP_SQR_TOOM_CUTOFF;
273-
431+
c->RADIX_READ = MP_RADIX_READ_CUTOFF;
432+
c->RADIX_WRITE = MP_RADIX_WRITE_CUTOFF;
274433
}
275434

276435
int main(int argc, char **argv)
@@ -416,13 +575,17 @@ int main(int argc, char **argv)
416575
s_usage(argv[0]);
417576
}
418577
str = argv[opt];
419-
MP_MUL_KARATSUBA_CUTOFF = (int)s_strtol(str, &endptr, "[1/4] No value for MP_MUL_KARATSUBA_CUTOFF given");
578+
MP_MUL_KARATSUBA_CUTOFF = (int)s_strtol(str, &endptr, "[1/6] No value for MP_MUL_KARATSUBA_CUTOFF given");
579+
str = endptr + 1;
580+
MP_SQR_KARATSUBA_CUTOFF = (int)s_strtol(str, &endptr, "[2/6] No value for MP_SQR_KARATSUBA_CUTOFF given");
420581
str = endptr + 1;
421-
MP_SQR_KARATSUBA_CUTOFF = (int)s_strtol(str, &endptr, "[2/4] No value for MP_SQR_KARATSUBA_CUTOFF given");
582+
MP_MUL_TOOM_CUTOFF = (int)s_strtol(str, &endptr, "[3/6] No value for MP_MUL_TOOM_CUTOFF given");
422583
str = endptr + 1;
423-
MP_MUL_TOOM_CUTOFF = (int)s_strtol(str, &endptr, "[3/4] No value for MP_MUL_TOOM_CUTOFF given");
584+
MP_SQR_TOOM_CUTOFF = (int)s_strtol(str, &endptr, "[4/6] No value for MP_SQR_TOOM_CUTOFF given");
424585
str = endptr + 1;
425-
MP_SQR_TOOM_CUTOFF = (int)s_strtol(str, &endptr, "[4/4] No value for MP_SQR_TOOM_CUTOFF given");
586+
MP_RADIX_READ_CUTOFF = (int)s_strtol(str, &endptr, "[5/6] No value for MP_RADIX_READ_CUTOFF given");
587+
str = endptr + 1;
588+
MP_RADIX_WRITE_CUTOFF = (int)s_strtol(str, &endptr, "[6/6] No value for MP_RADIX_WRITE_CUTOFF given");
426589
break;
427590
case 'h':
428591
s_exit_code = EXIT_SUCCESS;
@@ -461,31 +624,54 @@ int main(int argc, char **argv)
461624
T_MUL_SQR("Karatsuba squaring", SQR_KARATSUBA, s_time_sqr),
462625
T_MUL_SQR("Toom-Cook 3-way multiplying", MUL_TOOM, s_time_mul),
463626
T_MUL_SQR("Toom-Cook 3-way squaring", SQR_TOOM, s_time_sqr),
627+
/* TODO: adapt macro above (or the names of the cutoffs and/or functions) */
628+
{
629+
"\"Faster radix conversion (reading)\"", &MP_RADIX_READ_CUTOFF,
630+
&(updated.RADIX_READ),MP_HAS(S_MP_FASTER_READ_RADIX) ? s_time_radix_conversion_read : NULL
631+
},
632+
{
633+
"\"Faster radix conversion (writing)\"", &MP_RADIX_WRITE_CUTOFF,
634+
&(updated.RADIX_WRITE),MP_HAS(S_MP_FASTER_TO_RADIX) ? s_time_radix_conversion_write : NULL
635+
}
464636
#undef T_MUL_SQR
465637
};
466638
/* Turn all limits from bncore.c to the max */
467639
set_cutoffs(&max_cutoffs);
468-
for (n = 0; n < sizeof(test)/sizeof(test[0]); ++n) {
640+
for (n = 0; n < (sizeof(test)/sizeof(test[0]) - 2); ++n) {
469641
if (test[n].fn != NULL) {
470642
s_run(test[n].name, test[n].fn, test[n].cutoff);
471643
*test[n].update = *test[n].cutoff;
472644
*test[n].cutoff = INT_MAX;
645+
};
646+
}
647+
/* Cutoffs for radix conversions are in bits to make handling of 62 different radices
648+
more feasible. */
649+
for (; n < sizeof(test)/sizeof(test[0]); ++n) {
650+
if (test[n].fn != NULL) {
651+
s_run(test[n].name, test[n].fn, test[n].cutoff);
652+
/* TODO: can overflow for small INT_MAX */
653+
*test[n].update = ((*test[n].cutoff) * MP_DIGIT_BIT * 93)/28;
654+
*test[n].cutoff = INT_MAX;
473655
}
474656
}
475657
}
476658
if (args.terse == 1) {
477-
printf("%d %d %d %d\n",
659+
printf("%d %d %d %d %d %d\n",
478660
updated.MUL_KARATSUBA,
479661
updated.SQR_KARATSUBA,
480662
updated.MUL_TOOM,
481-
updated.SQR_TOOM);
663+
updated.SQR_TOOM,
664+
updated.RADIX_READ,
665+
updated.RADIX_WRITE);
482666
} else {
483667
printf("MUL_KARATSUBA_CUTOFF = %d\n", updated.MUL_KARATSUBA);
484668
printf("SQR_KARATSUBA_CUTOFF = %d\n", updated.SQR_KARATSUBA);
485669
printf("MUL_TOOM_CUTOFF = %d\n", updated.MUL_TOOM);
486670
printf("SQR_TOOM_CUTOFF = %d\n", updated.SQR_TOOM);
671+
printf("RADIX_READ_CUTOFF = %d\n", updated.RADIX_READ);
672+
printf("RADIX_WRITE_CUTOFF = %d\n", updated.RADIX_WRITE);
487673
}
488-
674+
/* TODO: add graphs for radix conversion, too? */
489675
if (args.print == 1) {
490676
printf("Printing data for graphing to \"%s\" and \"%s\"\n",mullog, sqrlog);
491677

etc/tune_it.sh

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ KEEP_TEMP=1
5656
echo "You might like to watch the numbers go up to $LIMIT but it will take a long time!"
5757

5858
# Might not have sufficient rights or disc full.
59-
echo "km ks tc3m tc3s" > $FILE_NAME || die "Writing header to $FILE_NAME" $?
59+
echo "km ks tc3m tc3s rcr rcw" > $FILE_NAME || die "Writing header to $FILE_NAME" $?
6060
i=1
6161
while [ $i -le $LIMIT ]; do
6262
RNUM=$(LCG)
@@ -104,3 +104,23 @@ echo "#define MP_DEFAULT_MUL_TOOM_CUTOFF $TMP" >> $TOMMATH_CUTOFFS_H || die
104104
TMP=$(median $FILE_NAME 4 $i)
105105
echo "#define MP_DEFAULT_SQR_TOOM_CUTOFF $TMP"
106106
echo "#define MP_DEFAULT_SQR_TOOM_CUTOFF $TMP" >> $TOMMATH_CUTOFFS_H || die "(tc3s) Appending to $TOMMATH_CUTOFFS_H" $?
107+
108+
TMP=$(median $FILE_NAME 5 $i)
109+
echo "#define MP_DEFAULT_RADIX_READ_CUTOFF $TMP"
110+
echo "#define MP_DEFAULT_RADIX_READ_CUTOFF $TMP" >> $TOMMATH_CUTOFFS_H || die "(rcr) Appending to $TOMMATH_CUTOFFS_H" $?
111+
TMP=$(median $FILE_NAME 6 $i)
112+
echo "#define MP_DEFAULT_RADIX_WRITE_CUTOFF $TMP"
113+
echo "#define MP_DEFAULT_RADIX_WRITE_CUTOFF $TMP" >> $TOMMATH_CUTOFFS_H || die "(rcw) Appending to $TOMMATH_CUTOFFS_H" $?
114+
115+
116+
117+
118+
119+
120+
121+
122+
123+
124+
125+
126+

helper.pl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -476,7 +476,8 @@ sub generate_def {
476476
MP_SQR_KARATSUBA_CUTOFF
477477
MP_MUL_TOOM_CUTOFF
478478
MP_SQR_TOOM_CUTOFF
479-
MP_RADIX_CUTOFF
479+
MP_RADIX_READ_CUTOFF,
480+
MP_RADIX_WRITE_CUTOFF
480481
";
481482
return 0;
482483
}

mp_cutoffs.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ int MP_MUL_KARATSUBA_CUTOFF = MP_DEFAULT_MUL_KARATSUBA_CUTOFF,
99
MP_SQR_KARATSUBA_CUTOFF = MP_DEFAULT_SQR_KARATSUBA_CUTOFF,
1010
MP_MUL_TOOM_CUTOFF = MP_DEFAULT_MUL_TOOM_CUTOFF,
1111
MP_SQR_TOOM_CUTOFF = MP_DEFAULT_SQR_TOOM_CUTOFF,
12-
MP_RADIX_CUTOFF = MP_DEFAULT_RADIX_CUTOFF;
12+
MP_RADIX_READ_CUTOFF = MP_DEFAULT_RADIX_READ_CUTOFF,
13+
MP_RADIX_WRITE_CUTOFF = MP_DEFAULT_RADIX_WRITE_CUTOFF;
1314
#endif
1415

1516
#endif

mp_read_radix.c

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55

66
#ifdef MP_USE_MEMOPS
77
# include <string.h>
8-
98
# define MP_STRLEN(s) strlen(s)
109
#else
1110
static size_t s_mp_strlen(const char *s)
@@ -26,7 +25,7 @@ mp_err mp_read_radix(mp_int *a, const char *str, int radix)
2625

2726
mp_err err = MP_OKAY;
2827
mp_sign sign = MP_ZPOS;
29-
size_t slen;
28+
size_t slen, slen_2;
3029

3130
/* make sure the radix is ok */
3231
if ((radix < 2) || (radix > 64)) {
@@ -68,29 +67,28 @@ mp_err mp_read_radix(mp_int *a, const char *str, int radix)
6867
if (MP_IS_2EXPT((unsigned int)radix) &&
6968
((slen * (size_t) s_mp_log2_radix[radix]) > ((MP_MAX_DIGIT_COUNT - 2) * MP_DIGIT_BIT))) {
7069
return MP_OVF;
71-
}
72-
73-
if ((radix == 10) && (slen >
70+
} else if ((radix == 10) && (slen >
7471
#if (MP_DIGIT_BIT == 15)
75-
2183
72+
2183
7673
#elif (MP_DIGIT_BIT == 28)
77-
76695844
74+
76695844
7875
#elif (MP_DIGIT_BIT == 31)
79-
69273664
76+
69273664
8077
#elif (MP_DIGIT_BIT == 60)
81-
35791392
78+
35791392
8279
#endif
83-
)) {
80+
)) {
8481
return MP_OVF;
8582
}
8683

87-
mp_zero(a);
84+
/* Roughly (over)estimate bit-size for cutoff by assuming slen to be ceil(log_{radix}(input))
85+
so bits(slen) ~ slen_{radix} * ceil(log_2(radix)) */
86+
slen_2 = slen * (size_t)(s_mp_log2_radix[radix] + 1);
8887

89-
/* TODO: Reading is quite quick, especially for bases of the form 2^n. Faster even for small input.
90-
Include branches for them and/or (tunable?) cutoff? */
88+
mp_zero(a);
9189

9290
/* Try faster version first */
93-
if (MP_HAS(S_MP_FASTER_READ_RADIX)) {
91+
if (MP_HAS(S_MP_FASTER_READ_RADIX) && (slen_2 < (size_t)MP_RADIX_READ_CUTOFF)) {
9492
if ((err = s_mp_faster_read_radix(a, str, 0, slen, radix)) != MP_OKAY) goto LTM_ERR;
9593
} else if (MP_HAS(S_MP_SLOWER_READ_RADIX)) {
9694
if ((err = s_mp_slower_read_radix(a, str, 0, slen, radix)) != MP_OKAY) goto LTM_ERR;

0 commit comments

Comments
 (0)