libtom
diff --git a/‎demo/test.c‎
Lines changed: 29 additions & 4 deletions b/‎demo/test.c‎
Lines changed: 29 additions & 4 deletions
diff --git a/‎mp_to_radix.c‎
Lines changed: 18 additions & 45 deletions b/‎mp_to_radix.c‎
Lines changed: 18 additions & 45 deletions
diff --git a/‎s_mp_faster_to_radix.c‎
Lines changed: 235 additions & 0 deletions b/‎s_mp_faster_to_radix.c‎
Lines changed: 235 additions & 0 deletions
@@ -1155,9 +1155,10 @@ static int test_mp_read_radix(void)
 {
    char buf[4096];
    size_t written;
+   int bignum, i;
 
-   mp_int a;
-   DOR(mp_init_multi(&a, NULL));
+   mp_int a, b;
+   DOR(mp_init_multi(&a, &b, NULL));
 
    DO(mp_read_radix(&a, "123456", 10));
 
@@ -1183,6 +1184,30 @@ static int test_mp_read_radix(void)
    DO(mp_to_radix(&a, buf, sizeof(buf), &written, 10));
    printf("\r '0' a == %s, length = %zu", buf, written);
 
+   /* Test the fast method with a slightly larger number */
+
+   /* Must be bigger than the cut-off value, of course */
+   bignum = 2* (2 * s_mp_radix_exponent_y[2] * MP_RADIX_BARRETT_START_MULTIPLICATOR);
+   printf("Size of bignum_size = %d\n", bignum);
+   /* Check if "bignum" is small enough for the result to fit into "buf"
+      otherwise lead tester to this function */
+   if (bignum >= 4096) {
+      fprintf(stderr, "Buffer too small, please check function \"test_mp_read_radix\" in \"test.c\"");
+      goto LBL_ERR;
+   }
+   /* Produce a random number */
+   bignum /= MP_DIGIT_BIT;
+   DO(mp_rand(&b, bignum));
+   /* Check if it makes the round */
+   printf("Number of limbs in &b = %d, bit_count of &b = %d\n", bignum, mp_count_bits(&b));
+   for (i = 2; i < 65; i++) {
+      DO(mp_to_radix(&b, buf, sizeof(buf), &written, i));
+      DO(mp_read_radix(&a, buf, i));
+      EXPECT(mp_cmp(&a, &b) == MP_EQ);
+      /* fprintf(stderr,"radix = %d\n",i); */
+   }
+
+
    while (0) {
       char *s = fgets(buf, sizeof(buf), stdin);
       if (s != buf) break;
@@ -1192,10 +1217,10 @@ static int test_mp_read_radix(void)
       printf("%s, %lu\n", buf, (unsigned long)a.dp[0] & 3uL);
    }
 
-   mp_clear(&a);
+   mp_clear_multi(&a, &b, NULL);
    return EXIT_SUCCESS;
 LBL_ERR:
-   mp_clear(&a);
+   mp_clear_multi(&a, &b, NULL);
    return EXIT_FAILURE;
 }
 
 
@@ -3,29 +3,16 @@
 /* LibTomMath, multiple-precision integer library -- Tom St Denis */
 /* SPDX-License-Identifier: Unlicense */
 
-/* reverse an array, used for radix code */
-static void s_reverse(char *s, size_t len)
-{
-   size_t ix = 0, iy = len - 1u;
-   while (ix < iy) {
-      MP_EXCH(char, s[ix], s[iy]);
-      ++ix;
-      --iy;
-   }
-}
-
 /* stores a bignum as a ASCII string in a given radix (2..64)
  *
  * Stores upto "size - 1" chars and always a NULL byte, puts the number of characters
  * written, including the '\0', in "written".
  */
 mp_err mp_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *written, int radix)
 {
-   size_t  digs;
-   mp_err  err;
-   mp_int  t;
-   mp_digit d;
-   char   *_s = str;
+   mp_err err;
+   mp_int a_bar = *a;
+   size_t part_written = 0;
 
    /* check range of radix and size*/
    if (maxlen < 2u) {
@@ -45,50 +32,36 @@ mp_err mp_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *written, i
       return MP_OKAY;
    }
 
-   if ((err = mp_init_copy(&t, a)) != MP_OKAY) {
-      return err;
-   }
-
    /* if it is negative output a - */
-   if (mp_isneg(&t)) {
-      /* we have to reverse our digits later... but not the - sign!! */
-      ++_s;
-
+   if (mp_isneg(a)) {
       /* store the flag and mark the number as positive */
       *str++ = '-';
-      t.sign = MP_ZPOS;
+      a_bar.sign = MP_ZPOS;
 
       /* subtract a char */
       --maxlen;
    }
-   digs = 0u;
-   while (!mp_iszero(&t)) {
-      if (--maxlen < 1u) {
-         /* no more room */
-         err = MP_BUF;
-         goto LBL_ERR;
-      }
-      if ((err = mp_div_d(&t, (mp_digit)radix, &t, &d)) != MP_OKAY) {
-         goto LBL_ERR;
+
+   /* TODO: check if it can be done better */
+   if (MP_HAS(S_MP_FASTER_TO_RADIX)) {
+      if ((err = s_mp_faster_to_radix(&a_bar, str, maxlen, &part_written, radix)) != MP_OKAY)            goto LBL_ERR;
+   } else {
+      if (MP_HAS(S_MP_SLOWER_TO_RADIX)) {
+         if ((err = s_mp_slower_to_radix(&a_bar, &str, &maxlen, &part_written, radix, false)) != MP_OKAY) goto LBL_ERR;
+         /* part_written does not count EOS */
+         part_written++;
       }
-      *str++ = s_mp_radix_map[d];
-      ++digs;
    }
-   /* reverse the digits of the string.  In this case _s points
-    * to the first digit [excluding the sign] of the number
-    */
-   s_reverse(_s, digs);
 
-   /* append a NULL so the string is properly terminated */
-   *str = '\0';
-   digs++;
+   /* TODO: Think about adding a function for base-2 radices only although
+            s_faster_to_radix is rather quick with such radices. */
 
    if (written != NULL) {
-      *written = mp_isneg(a) ? (digs + 1u): digs;
+      part_written += mp_isneg(a) ? 1: 0;
+      *written = part_written;
    }
 
 LBL_ERR:
-   mp_clear(&t);
    return err;
 }
 
 
@@ -0,0 +1,235 @@
+#include "tommath_private.h"
+#ifdef S_MP_FASTER_TO_RADIX_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis */
+/* SPDX-License-Identifier: Unlicense */
+
+/* Portable integer log of two with small footprint */
+static int32_t s_floor_ilog2(int32_t value)
+{
+   int r = 0;
+   while ((value /= 2) != 0) {
+      r++;
+   }
+   return r;
+}
+
+/* Exponentiation with small footprint */
+static int32_t s_pow(int32_t base, int32_t exponent)
+{
+   int32_t result = 1;
+   while (exponent != 0) {
+      if ((exponent % 2) == 1) {
+         result *= base;
+      }
+      exponent /= 2;
+      base *= base;
+   }
+   return result;
+}
+
+static mp_err s_mp_to_radix_recursive(const mp_int *a, char **str, size_t *part_maxlen, size_t *part_written,
+                                      int radix, int32_t k, int32_t t, bool pad, mp_int *P, mp_int *R)
+{
+
+   mp_int r, q, a1;
+   mp_err err;
+   int Beta;
+
+   if (t < 0) {
+      /* Print the string from the number given*/
+      if ((err = s_mp_slower_to_radix(a, str, part_maxlen, part_written, radix, pad)) != MP_OKAY)        goto LTM_ERR;
+
+   }  else {
+      if ((err = mp_init_multi(&q, &r, &a1, NULL)) != MP_OKAY)                                           goto LTM_ERR;
+      /*
+         Barrett reduction. A step by step proof can be found at
+         https://www.nayuki.io/page/barrett-reduction-algorithm
+
+         See also: Modern Computer Arithmetic, version 0.5.9, page 59
+       */
+
+      /* If this cast-feast looks familiar: it is the numerator from computing the reciprocal*/
+      Beta = (int)((int32_t)((uint32_t)1 << (t+1)) * k);
+
+      /* Q = floor(A1 * I / 2^Beta) */
+      /* I = floor( (2^(2*Beta)) / B) Here we have R[t] = I, P[t] = B */
+      if ((err = mp_mul(a, &R[t], &q)) != MP_OKAY)                                                       goto LTM_ERR;
+      if ((err = mp_div_2d(&q, Beta, &q, NULL)) != MP_OKAY)                                              goto LTM_ERR;
+
+      /* R = A - Q*B */
+      if ((err = mp_mul(&q, &P[t], &r)) != MP_OKAY)                                                      goto LTM_ERR;
+      if ((err = mp_sub(a, &r, &r)) != MP_OKAY)                                                          goto LTM_ERR;
+
+      /* We can use this simple correction because of the way we computed the reciprocal */
+      if (r.sign == MP_NEG) {
+         if ((err = mp_decr(&q)) != MP_OKAY)                                                             goto LTM_ERR;
+         if ((err = mp_add(&r, &P[t], &r)) != MP_OKAY)                                                   goto LTM_ERR;
+      }
+
+      /* Go down the lists while climbing up the tree. */
+      t--;
+
+      /* Follow branches */
+      if (mp_iszero(&q) && (!pad)) {
+         if ((err = s_mp_to_radix_recursive(&r, str, part_maxlen, part_written, radix,
+                                            k, t, false, P, R)) != MP_OKAY)                              goto LTM_ERR;
+      } else {
+         if ((err = s_mp_to_radix_recursive(&q, str, part_maxlen, part_written, radix,
+                                            k, t,  pad, P, R)) != MP_OKAY)                               goto LTM_ERR;
+         if ((err = s_mp_to_radix_recursive(&r, str, part_maxlen, part_written, radix,
+                                            k, t, true, P, R)) != MP_OKAY)                               goto LTM_ERR;
+      }
+      mp_clear_multi(&q, &r, &a1, NULL);
+   }
+
+   err = MP_OKAY;
+LTM_ERR:
+   return err;
+}
+
+
+mp_err s_mp_faster_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *written, int radix)
+{
+   mp_err err;
+   int32_t n = 0, k, t = 0, steps;
+   int ilog2a;
+
+   /* Use given buffer directly, no temporary buffers for the individual chunks */
+   char **sptr = &str;
+   /* Size of the chunk */
+   size_t part_written = 0;
+   size_t part_maxlen = maxlen;
+
+   /* List of reciprocals */
+   mp_int *R = NULL;
+   /* List of moduli */
+   mp_int *P = NULL;
+
+   /* Denominator for the reciprocal: b^y */
+   n = s_pow((int32_t)radix, (int32_t)s_mp_radix_exponent_y[radix]);
+
+   /* Numerator of the reciprocal: ceil(log_2(n)) */
+   k = s_floor_ilog2(n) + 1;
+
+   /* steps = floor(log_2(floor(log_2(a))))*/
+   ilog2a = mp_count_bits(a) - 1;
+
+   /* Cutoff at about twice the size of P[0]. Interestingly far below Karatsuba cut-off. */
+   if (ilog2a < (2 * k * MP_RADIX_BARRETT_START_MULTIPLICATOR)) {
+      if ((err = s_mp_slower_to_radix(a, sptr, &part_maxlen, &part_written, radix, false)) != MP_OKAY)   goto LTM_ERR;
+      /* part_written does not count EOS */
+      *written = part_written + 1;
+      return err;
+   }
+   /*
+      floor(log_2(floor(log_2(a)))) is not enough but we check for
+      the end inside the loop and the list is just a list of pointers,
+      not much memory wasted here.
+    */
+   steps  = s_floor_ilog2((int32_t)ilog2a) + 2;
+
+   /* Allocate memory for list of reciprocals */
+   R = (mp_int *) MP_MALLOC((size_t) steps * sizeof(mp_int));
+   if (R == NULL) {
+      return MP_MEM;
+   }
+   /* Allocate memory for list of moduli */
+   P = (mp_int *) MP_MALLOC((size_t) steps * sizeof(mp_int));
+   if (P == NULL) {
+      MP_FREE_BUF(R, (size_t) steps * sizeof(mp_int));
+      return MP_MEM;
+   }
+
+   /*
+      The approximation to the reciprocal used in Barrett's method is
+          R_t = ceil(2^((2^t)*k)/n^(2^t))
+      with R_0 = (2^(2*k))/b^y and k = ceil(log_2(n)) as computed above.
+    */
+
+   /* To get the tree a bit flatter. Alternative: do it iteratively instead of recursively */
+   k = k * MP_RADIX_BARRETT_START_MULTIPLICATOR;
+
+   /* Compute initial reciprocal R[0] and expand it (R[0]^(2^k) */
+   if ((err = mp_init_i32(&P[0], n)) != MP_OKAY)                                                         goto LTM_ERR;
+   if ((err = mp_expt_n(&P[0], MP_RADIX_BARRETT_START_MULTIPLICATOR, &P[0])) != MP_OKAY)                 goto LTM_ERR;
+
+   if ((err = mp_init(&R[0])) != MP_OKAY)                                                                goto LTM_ERR;
+   if ((err = mp_2expt(&R[0], 2*k)) != MP_OKAY)                                                          goto LTM_ERR;
+
+   if ((err = mp_div(&R[0], &P[0], &R[0], NULL)) != MP_OKAY)                                             goto LTM_ERR;
+   if ((err = mp_incr(&R[0])) != MP_OKAY)                                                                goto LTM_ERR;
+
+   /* Compute the rest of the reciprocals if as needed */
+   for (t = 1; t < steps; t++) {
+      /* P_t = (b^y)^(2^t) = n^(2^t) */
+      /*
+         We cannot just square because it can
+            a) overflow MP_MAX_DIGIT_COUNT
+            b) it can get bigger than "a" which it shouldn't
+               which also means that
+            c) if it gets bigger than "a" we have all necessary
+               reciprocals and can break out of the loop
+      */
+      /* Check for overflow of 2^((2^t)*k) i.e. bigger than 2^MP_MAX_DIGIT_COUNT */
+      if (((int)(1u << t)*k) > MP_MAX_DIGIT_COUNT) {
+         /* TODO: This can only happen near MP_MAX_DIGIT_COUNT and we can use
+                  the reciprocal R[t-1] to do the division but R[t] != R[t-1]^2
+                  so we cannot just divide by R[t-1] twice.
+          */
+         err = MP_OVF;
+         goto LTM_ERR;
+      }
+
+      /* P[t-1]^2 > a at most likely more than just a bit or too, so check if we
+         can bail out early without actually computing the square. The
+         constant "10" is comprised of unity plus some angst-allowance */
+      if ((2 * mp_count_bits(&P[t-1]) - 10) > ilog2a) {
+         /* Correct index */
+         t--;
+         break;
+      }
+
+      /* Compute denominator */
+      if ((err = mp_init(&P[t])) != MP_OKAY)                                                             goto LTM_ERR;
+      /* P[t] = P[t-1]^2 */
+      if ((err = mp_sqr(&P[t-1], &P[t])) != MP_OKAY)                                                     goto LTM_ERR;
+      /* Check if P[t]^2 > a */
+      if (mp_cmp(&P[t],a) == MP_GT) {
+         /* We don't need P[t] anymore */
+         mp_clear(&P[t]);
+         /* Correct index */
+         t--;
+         break;
+      }
+      /* Compute numerator */
+      if ((err = mp_init(&R[t])) != MP_OKAY)                                                             goto LTM_ERR;
+
+      /* R[t] = R[t] << (2^t * k) The factor cannot overflow, we checked that above */
+      /* TODO: these are more castings than in the ER in Mayrhofen at New Year's Eve! */
+      if ((err = mp_2expt(&(R[t]), (int)((int32_t)((uint32_t)1 << (t+1)) * k))) != MP_OKAY)              goto LTM_ERR;
+
+      /* Compute reciprocal */
+      /* R[t] = floor(2^(2^t * k) / P[t] */
+      if ((err = mp_div(&R[t], &P[t], &R[t], NULL)) != MP_OKAY)                                          goto LTM_ERR;
+      /* Ceiling if P[t] is not a power of two but it is not a problem if P[t] is a power of two. */
+      if ((err = mp_incr(&R[t])) != MP_OKAY)                                                             goto LTM_ERR;
+   }
+
+   /* And finally: start the recursion. */
+   if ((err = s_mp_to_radix_recursive(a, sptr, &part_maxlen, &part_written, radix,
+                                      k, t, false, P, R)) != MP_OKAY)                                    goto LTM_ERR;
+   /* part_written does not account for EOS */
+   *written = part_written + 1;
+
+   err = MP_OKAY;
+LTM_ERR:
+   do {
+      mp_clear(&P[t]);
+      mp_clear(&R[t]);
+   } while (t--);
+   MP_FREE_BUF(P, (size_t) steps * sizeof(mp_int));
+   MP_FREE_BUF(R, (size_t) steps * sizeof(mp_int));
+   return err;
+}
+
+#endif