Skip to content

Commit b1ad931

Browse files
committed
Addition of faster to_radix function
1 parent c6a00c2 commit b1ad931

File tree

6 files changed

+392
-49
lines changed

6 files changed

+392
-49
lines changed

demo/test.c

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1155,9 +1155,10 @@ static int test_mp_read_radix(void)
11551155
{
11561156
char buf[4096];
11571157
size_t written;
1158+
int bignum, i;
11581159

1159-
mp_int a;
1160-
DOR(mp_init_multi(&a, NULL));
1160+
mp_int a, b;
1161+
DOR(mp_init_multi(&a, &b, NULL));
11611162

11621163
DO(mp_read_radix(&a, "123456", 10));
11631164

@@ -1183,6 +1184,30 @@ static int test_mp_read_radix(void)
11831184
DO(mp_to_radix(&a, buf, sizeof(buf), &written, 10));
11841185
printf("\r '0' a == %s, length = %zu", buf, written);
11851186

1187+
/* Test the fast method with a slightly larger number */
1188+
1189+
/* Must be bigger than the cut-off value, of course */
1190+
bignum = 2* (2 * s_mp_radix_exponent_y[2] * MP_RADIX_BARRETT_START_MULTIPLICATOR);
1191+
printf("Size of bignum_size = %d\n", bignum);
1192+
/* Check if "bignum" is small enough for the result to fit into "buf"
1193+
otherwise lead tester to this function */
1194+
if (bignum >= 4096) {
1195+
fprintf(stderr, "Buffer too small, please check function \"test_mp_read_radix\" in \"test.c\"");
1196+
goto LBL_ERR;
1197+
}
1198+
/* Produce a random number */
1199+
bignum /= MP_DIGIT_BIT;
1200+
DO(mp_rand(&b, bignum));
1201+
/* Check if it makes the round */
1202+
printf("Number of limbs in &b = %d, bit_count of &b = %d\n", bignum, mp_count_bits(&b));
1203+
for (i = 2; i < 65; i++) {
1204+
DO(mp_to_radix(&b, buf, sizeof(buf), &written, i));
1205+
DO(mp_read_radix(&a, buf, i));
1206+
EXPECT(mp_cmp(&a, &b) == MP_EQ);
1207+
/* fprintf(stderr,"radix = %d\n",i); */
1208+
}
1209+
1210+
11861211
while (0) {
11871212
char *s = fgets(buf, sizeof(buf), stdin);
11881213
if (s != buf) break;
@@ -1192,10 +1217,10 @@ static int test_mp_read_radix(void)
11921217
printf("%s, %lu\n", buf, (unsigned long)a.dp[0] & 3uL);
11931218
}
11941219

1195-
mp_clear(&a);
1220+
mp_clear_multi(&a, &b, NULL);
11961221
return EXIT_SUCCESS;
11971222
LBL_ERR:
1198-
mp_clear(&a);
1223+
mp_clear_multi(&a, &b, NULL);
11991224
return EXIT_FAILURE;
12001225
}
12011226

mp_to_radix.c

Lines changed: 18 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -3,29 +3,16 @@
33
/* LibTomMath, multiple-precision integer library -- Tom St Denis */
44
/* SPDX-License-Identifier: Unlicense */
55

6-
/* reverse an array, used for radix code */
7-
static void s_reverse(char *s, size_t len)
8-
{
9-
size_t ix = 0, iy = len - 1u;
10-
while (ix < iy) {
11-
MP_EXCH(char, s[ix], s[iy]);
12-
++ix;
13-
--iy;
14-
}
15-
}
16-
176
/* stores a bignum as a ASCII string in a given radix (2..64)
187
*
198
* Stores upto "size - 1" chars and always a NULL byte, puts the number of characters
209
* written, including the '\0', in "written".
2110
*/
2211
mp_err mp_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *written, int radix)
2312
{
24-
size_t digs;
25-
mp_err err;
26-
mp_int t;
27-
mp_digit d;
28-
char *_s = str;
13+
mp_err err;
14+
mp_int a_bar = *a;
15+
size_t part_written = 0;
2916

3017
/* check range of radix and size*/
3118
if (maxlen < 2u) {
@@ -45,50 +32,36 @@ mp_err mp_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *written, i
4532
return MP_OKAY;
4633
}
4734

48-
if ((err = mp_init_copy(&t, a)) != MP_OKAY) {
49-
return err;
50-
}
51-
5235
/* if it is negative output a - */
53-
if (mp_isneg(&t)) {
54-
/* we have to reverse our digits later... but not the - sign!! */
55-
++_s;
56-
36+
if (mp_isneg(a)) {
5737
/* store the flag and mark the number as positive */
5838
*str++ = '-';
59-
t.sign = MP_ZPOS;
39+
a_bar.sign = MP_ZPOS;
6040

6141
/* subtract a char */
6242
--maxlen;
6343
}
64-
digs = 0u;
65-
while (!mp_iszero(&t)) {
66-
if (--maxlen < 1u) {
67-
/* no more room */
68-
err = MP_BUF;
69-
goto LBL_ERR;
70-
}
71-
if ((err = mp_div_d(&t, (mp_digit)radix, &t, &d)) != MP_OKAY) {
72-
goto LBL_ERR;
44+
45+
/* TODO: check if it can be done better */
46+
if (MP_HAS(S_MP_FASTER_TO_RADIX)) {
47+
if ((err = s_mp_faster_to_radix(&a_bar, str, maxlen, &part_written, radix)) != MP_OKAY) goto LBL_ERR;
48+
} else {
49+
if (MP_HAS(S_MP_SLOWER_TO_RADIX)) {
50+
if ((err = s_mp_slower_to_radix(&a_bar, &str, &maxlen, &part_written, radix, false)) != MP_OKAY) goto LBL_ERR;
51+
/* part_written does not count EOS */
52+
part_written++;
7353
}
74-
*str++ = s_mp_radix_map[d];
75-
++digs;
7654
}
77-
/* reverse the digits of the string. In this case _s points
78-
* to the first digit [excluding the sign] of the number
79-
*/
80-
s_reverse(_s, digs);
8155

82-
/* append a NULL so the string is properly terminated */
83-
*str = '\0';
84-
digs++;
56+
/* TODO: Think about adding a function for base-2 radices only although
57+
s_faster_to_radix is rather quick with such radices. */
8558

8659
if (written != NULL) {
87-
*written = mp_isneg(a) ? (digs + 1u): digs;
60+
part_written += mp_isneg(a) ? 1: 0;
61+
*written = part_written;
8862
}
8963

9064
LBL_ERR:
91-
mp_clear(&t);
9265
return err;
9366
}
9467

s_mp_faster_to_radix.c

Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
#include "tommath_private.h"
2+
#ifdef S_MP_FASTER_TO_RADIX_C
3+
/* LibTomMath, multiple-precision integer library -- Tom St Denis */
4+
/* SPDX-License-Identifier: Unlicense */
5+
6+
/* Portable integer log of two with small footprint */
7+
static int32_t s_floor_ilog2(int32_t value)
8+
{
9+
int r = 0;
10+
while ((value /= 2) != 0) {
11+
r++;
12+
}
13+
return r;
14+
}
15+
16+
/* Exponentiation with small footprint */
17+
static int32_t s_pow(int32_t base, int32_t exponent)
18+
{
19+
int32_t result = 1;
20+
while (exponent != 0) {
21+
if ((exponent % 2) == 1) {
22+
result *= base;
23+
}
24+
exponent /= 2;
25+
base *= base;
26+
}
27+
return result;
28+
}
29+
30+
static mp_err s_mp_to_radix_recursive(const mp_int *a, char **str, size_t *part_maxlen, size_t *part_written,
31+
int radix, int32_t k, int32_t t, bool pad, mp_int *P, mp_int *R)
32+
{
33+
34+
mp_int r, q, a1;
35+
mp_err err;
36+
int Beta;
37+
38+
if (t < 0) {
39+
/* Print the string from the number given*/
40+
if ((err = s_mp_slower_to_radix(a, str, part_maxlen, part_written, radix, pad)) != MP_OKAY) goto LTM_ERR;
41+
42+
} else {
43+
if ((err = mp_init_multi(&q, &r, &a1, NULL)) != MP_OKAY) goto LTM_ERR;
44+
/*
45+
Barrett reduction. A step by step proof can be found at
46+
https://www.nayuki.io/page/barrett-reduction-algorithm
47+
48+
See also: Modern Computer Arithmetic, version 0.5.9, page 59
49+
*/
50+
51+
/* If this cast-feast looks familiar: it is the numerator from computing the reciprocal*/
52+
Beta = (int)((int32_t)((uint32_t)1 << (t+1)) * k);
53+
54+
/* Q = floor(A1 * I / 2^Beta) */
55+
/* I = floor( (2^(2*Beta)) / B) Here we have R[t] = I, P[t] = B */
56+
if ((err = mp_mul(a, &R[t], &q)) != MP_OKAY) goto LTM_ERR;
57+
if ((err = mp_div_2d(&q, Beta, &q, NULL)) != MP_OKAY) goto LTM_ERR;
58+
59+
/* R = A - Q*B */
60+
if ((err = mp_mul(&q, &P[t], &r)) != MP_OKAY) goto LTM_ERR;
61+
if ((err = mp_sub(a, &r, &r)) != MP_OKAY) goto LTM_ERR;
62+
63+
/* We can use this simple correction because of the way we computed the reciprocal */
64+
if (r.sign == MP_NEG) {
65+
if ((err = mp_decr(&q)) != MP_OKAY) goto LTM_ERR;
66+
if ((err = mp_add(&r, &P[t], &r)) != MP_OKAY) goto LTM_ERR;
67+
}
68+
69+
/* Go down the lists while climbing up the tree. */
70+
t--;
71+
72+
/* Follow branches */
73+
if (mp_iszero(&q) && (!pad)) {
74+
if ((err = s_mp_to_radix_recursive(&r, str, part_maxlen, part_written, radix,
75+
k, t, false, P, R)) != MP_OKAY) goto LTM_ERR;
76+
} else {
77+
if ((err = s_mp_to_radix_recursive(&q, str, part_maxlen, part_written, radix,
78+
k, t, pad, P, R)) != MP_OKAY) goto LTM_ERR;
79+
if ((err = s_mp_to_radix_recursive(&r, str, part_maxlen, part_written, radix,
80+
k, t, true, P, R)) != MP_OKAY) goto LTM_ERR;
81+
}
82+
mp_clear_multi(&q, &r, &a1, NULL);
83+
}
84+
85+
err = MP_OKAY;
86+
LTM_ERR:
87+
return err;
88+
}
89+
90+
91+
mp_err s_mp_faster_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *written, int radix)
92+
{
93+
mp_err err;
94+
int32_t n = 0, k, t = 0, steps;
95+
int ilog2a;
96+
97+
/* Use given buffer directly, no temporary buffers for the individual chunks */
98+
char **sptr = &str;
99+
/* Size of the chunk */
100+
size_t part_written = 0;
101+
size_t part_maxlen = maxlen;
102+
103+
/* List of reciprocals */
104+
mp_int *R = NULL;
105+
/* List of moduli */
106+
mp_int *P = NULL;
107+
108+
/* Denominator for the reciprocal: b^y */
109+
n = s_pow((int32_t)radix, (int32_t)s_mp_radix_exponent_y[radix]);
110+
111+
/* Numerator of the reciprocal: ceil(log_2(n)) */
112+
k = s_floor_ilog2(n) + 1;
113+
114+
/* steps = floor(log_2(floor(log_2(a))))*/
115+
ilog2a = mp_count_bits(a) - 1;
116+
117+
/* Cutoff at about twice the size of P[0]. Interestingly far below Karatsuba cut-off. */
118+
if (ilog2a < (2 * k * MP_RADIX_BARRETT_START_MULTIPLICATOR)) {
119+
if ((err = s_mp_slower_to_radix(a, sptr, &part_maxlen, &part_written, radix, false)) != MP_OKAY) goto LTM_ERR;
120+
/* part_written does not count EOS */
121+
*written = part_written + 1;
122+
return err;
123+
}
124+
/*
125+
floor(log_2(floor(log_2(a)))) is not enough but we check for
126+
the end inside the loop and the list is just a list of pointers,
127+
not much memory wasted here.
128+
*/
129+
steps = s_floor_ilog2((int32_t)ilog2a) + 2;
130+
131+
/* Allocate memory for list of reciprocals */
132+
R = (mp_int *) MP_MALLOC((size_t) steps * sizeof(mp_int));
133+
if (R == NULL) {
134+
return MP_MEM;
135+
}
136+
/* Allocate memory for list of moduli */
137+
P = (mp_int *) MP_MALLOC((size_t) steps * sizeof(mp_int));
138+
if (P == NULL) {
139+
MP_FREE_BUF(R, (size_t) steps * sizeof(mp_int));
140+
return MP_MEM;
141+
}
142+
143+
/*
144+
The approximation to the reciprocal used in Barrett's method is
145+
R_t = ceil(2^((2^t)*k)/n^(2^t))
146+
with R_0 = (2^(2*k))/b^y and k = ceil(log_2(n)) as computed above.
147+
*/
148+
149+
/* To get the tree a bit flatter. Alternative: do it iteratively instead of recursively */
150+
k = k * MP_RADIX_BARRETT_START_MULTIPLICATOR;
151+
152+
/* Compute initial reciprocal R[0] and expand it (R[0]^(2^k) */
153+
if ((err = mp_init_i32(&P[0], n)) != MP_OKAY) goto LTM_ERR;
154+
if ((err = mp_expt_n(&P[0], MP_RADIX_BARRETT_START_MULTIPLICATOR, &P[0])) != MP_OKAY) goto LTM_ERR;
155+
156+
if ((err = mp_init(&R[0])) != MP_OKAY) goto LTM_ERR;
157+
if ((err = mp_2expt(&R[0], 2*k)) != MP_OKAY) goto LTM_ERR;
158+
159+
if ((err = mp_div(&R[0], &P[0], &R[0], NULL)) != MP_OKAY) goto LTM_ERR;
160+
if ((err = mp_incr(&R[0])) != MP_OKAY) goto LTM_ERR;
161+
162+
/* Compute the rest of the reciprocals if as needed */
163+
for (t = 1; t < steps; t++) {
164+
/* P_t = (b^y)^(2^t) = n^(2^t) */
165+
/*
166+
We cannot just square because it can
167+
a) overflow MP_MAX_DIGIT_COUNT
168+
b) it can get bigger than "a" which it shouldn't
169+
which also means that
170+
c) if it gets bigger than "a" we have all necessary
171+
reciprocals and can break out of the loop
172+
*/
173+
/* Check for overflow of 2^((2^t)*k) i.e. bigger than 2^MP_MAX_DIGIT_COUNT */
174+
if (((int)(1u << t)*k) > MP_MAX_DIGIT_COUNT) {
175+
/* TODO: This can only happen near MP_MAX_DIGIT_COUNT and we can use
176+
the reciprocal R[t-1] to do the division but R[t] != R[t-1]^2
177+
so we cannot just divide by R[t-1] twice.
178+
*/
179+
err = MP_OVF;
180+
goto LTM_ERR;
181+
}
182+
183+
/* P[t-1]^2 > a at most likely more than just a bit or too, so check if we
184+
can bail out early without actually computing the square. The
185+
constant "10" is comprised of unity plus some angst-allowance */
186+
if ((2 * mp_count_bits(&P[t-1]) - 10) > ilog2a) {
187+
/* Correct index */
188+
t--;
189+
break;
190+
}
191+
192+
/* Compute denominator */
193+
if ((err = mp_init(&P[t])) != MP_OKAY) goto LTM_ERR;
194+
/* P[t] = P[t-1]^2 */
195+
if ((err = mp_sqr(&P[t-1], &P[t])) != MP_OKAY) goto LTM_ERR;
196+
/* Check if P[t]^2 > a */
197+
if (mp_cmp(&P[t],a) == MP_GT) {
198+
/* We don't need P[t] anymore */
199+
mp_clear(&P[t]);
200+
/* Correct index */
201+
t--;
202+
break;
203+
}
204+
/* Compute numerator */
205+
if ((err = mp_init(&R[t])) != MP_OKAY) goto LTM_ERR;
206+
207+
/* R[t] = R[t] << (2^t * k) The factor cannot overflow, we checked that above */
208+
/* TODO: these are more castings than in the ER in Mayrhofen at New Year's Eve! */
209+
if ((err = mp_2expt(&(R[t]), (int)((int32_t)((uint32_t)1 << (t+1)) * k))) != MP_OKAY) goto LTM_ERR;
210+
211+
/* Compute reciprocal */
212+
/* R[t] = floor(2^(2^t * k) / P[t] */
213+
if ((err = mp_div(&R[t], &P[t], &R[t], NULL)) != MP_OKAY) goto LTM_ERR;
214+
/* Ceiling if P[t] is not a power of two but it is not a problem if P[t] is a power of two. */
215+
if ((err = mp_incr(&R[t])) != MP_OKAY) goto LTM_ERR;
216+
}
217+
218+
/* And finally: start the recursion. */
219+
if ((err = s_mp_to_radix_recursive(a, sptr, &part_maxlen, &part_written, radix,
220+
k, t, false, P, R)) != MP_OKAY) goto LTM_ERR;
221+
/* part_written does not account for EOS */
222+
*written = part_written + 1;
223+
224+
err = MP_OKAY;
225+
LTM_ERR:
226+
do {
227+
mp_clear(&P[t]);
228+
mp_clear(&R[t]);
229+
} while (t--);
230+
MP_FREE_BUF(P, (size_t) steps * sizeof(mp_int));
231+
MP_FREE_BUF(R, (size_t) steps * sizeof(mp_int));
232+
return err;
233+
}
234+
235+
#endif

0 commit comments

Comments
 (0)