Removed support or 64-bit large sieve. Was just too big.

czurnieden · czurnieden · commit 50de80014207 · 2023-03-13T22:09:52.000+01:00
diff --git a/demo/test.c b/demo/test.c
@@ -811,7 +811,7 @@ static int test_mp_prime_rand(void)
 
    /* test for size */
    for (ix = 10; ix < 128; ix++) {
-      printf("Testing (not safe-prime): %9d bits    \n", ix);
+      printf("\rTesting (not safe-prime): %9d bits    ", ix);
       fflush(stdout);
       DO(mp_prime_rand(&a, 8, ix, (rand_int() & 1) ? 0 : MP_PRIME_2MSB_ON));
       EXPECT(mp_count_bits(&a) == ix);
@@ -1623,27 +1623,12 @@ static int test_mp_next_small_prime(void)
       1019879761, 72282701, 2048787577, 2058368113
    };
 
-/* TODO: No, remove 64-bit version, way too big and a bit unwieldy, too! */
-#if ( (defined MP_64BIT) && (defined MP_SIEVE_USE_LARGE_SIEVE) )
-   /* primesum up to 2^64
-      $ time /home/czurnieden/GITHUB/primesum/primesum 2^64
-       3879578600671960388666457126750869198
-
-       real 37m6,448s
-       user 107m17,056s
-       sys  0m12,152s
-      I think we should use something smaller.
-   */
-   /* const char *primesum_64 = "3879578600671960388666457126750869198"; */
-
-   const char *primesum_64 = "208139436659661";
-#else
 #ifdef BENCHMARK_SIEVE
    const char *primesum_32 = "425649736193687430";
 #else
    const char *primesum_32 = "202259606268580";
 #endif
-#endif
+
 
    mp_sieve_init(&sieve);
 
@@ -1666,7 +1651,7 @@ static int test_mp_next_small_prime(void)
       }
 #ifdef BENCHMARK_SIEVE
       stopgt = gettime();
-      printf("Single random access for prime: %*u: ", 10, ret);
+      printf("Single random access for prime: %*"MP_SIEVE_PR_UINT": ", 10, ret);
       print_timer("",startgt, stopgt);
 #endif
    }
@@ -1675,14 +1660,7 @@ static int test_mp_next_small_prime(void)
 #ifdef BENCHMARK_SIEVE
    startgt = gettime();
 #endif
-#if ( (defined MP_64BIT) && (defined MP_SIEVE_USE_LARGE_SIEVE) )
-   for (p = 4293918720lu; ret < (mp_sieve_prime)4294997297lu;) {
-      DO(mp_next_small_prime(p, &ret, &sieve));
-      p = ret + 1;
-      mp_set_u64(&t, ret);
-      DO(mp_add(&primesum, &t, &primesum));
-   }
-#else
+
 #ifdef BENCHMARK_SIEVE
    for (p = 0ul; ret < (mp_sieve_prime)MP_SIEVE_BIGGEST_PRIME;) {
 #else
@@ -1693,19 +1671,15 @@ static int test_mp_next_small_prime(void)
       mp_set_u32(&t, ret);
       DO(mp_add(&primesum, &t, &primesum));
    }
-#endif
+
 #ifdef BENCHMARK_SIEVE
    stopgt = gettime();
-   printf("Sum of all primes between 0 and %u = ", MP_SIEVE_BIGGEST_PRIME);
+   printf("Sum of all primes between 0 and %"MP_SIEVE_PR_UINT" = ", MP_SIEVE_BIGGEST_PRIME);
    DO(mp_fwrite(&primesum, 10, stdout));
-   print_timer(", computed in ",startgt, stopgt);
+   print_timer(", computed in \n",startgt, stopgt);
 #endif
 
-#if ( (defined MP_64BIT) && (defined MP_SIEVE_USE_LARGE_SIEVE) )
-   DO(mp_read_radix(&t, primesum_64, 10));
-#else
    DO(mp_read_radix(&t, primesum_32, 10));
-#endif
    EXPECT(mp_cmp(&primesum, &t) == MP_EQ);
 
    mp_sieve_clear(&sieve);
diff --git a/doc/bn.tex b/doc/bn.tex
@@ -2349,9 +2349,35 @@ \section{Random Primes}
 
 
 \section{Prime Sieve}
-The prime sieve is implemented as a simple segmented Sieve of Eratosthenes. It is only moderately optimized for
-space and runtime but should be small enough and also fast enough for almost all use-cases; quite quick for
-sequential access but relatively slow for random access.
+The prime sieve is implemented as a simple segmented Sieve of Eratosthenes.
+
+This library needs some small sequential amounts for divisibility tests starting at two and
+some random small primes for the Miller--Rabin test. That means we need a small base sieve
+for quick results with a cold start and small segments to get random small primes fast.
+
+The base sieve holds odd values only and even with a size of \texttt{4096} bytes it is
+small enough to get build quickly, in about 50 $\mu$sec on the author's old machine.
+
+The choice of the size for the individual segments varies more in the results. See table
+ \ref{table:sievebenchmarks} for some data.
+
+\begin{table}[h]
+  \begin{center}
+    \begin{tabular}{r c l}
+      \textbf{Segment Size (bits)} & \textbf{Random Access in $\mu$sec} & \textbf{Full Primesum} \\
+           \texttt{ 0x7F}          &    70                                     &  90(!) min      \\
+          \texttt{0x1000}          &    100                                    &  2 min 33 sec   \\
+          \texttt{0x4000}          &    150                                    &  1 min 18 sec   \\
+         \texttt{ 0xFFFF}          &    350                                    &  0 min 57 sec   \\
+         \texttt{0x20000}          &    600                                    &  0 min 55 sec   \\
+     \texttt{ 0xFFFFFFFF}          &    300                                    &  0 min 35 sec   
+    \end{tabular}
+    \caption{Average access times (warm) with the default base sieve (\texttt{MP-64BIT})} \label{table:sievebenchmarks}
+  \end{center}
+\end{table}
+
+The default sizes are in \texttt{tommath\_private.h}: \texttt{MP\_SIEVE\_PRIME\_MAX\_SQRT} is
+the size of the base sieve and \texttt{MP\_SIEVE\_RANGE\_A\_B} is the size of the segment.
 
 \subsection{Initialization and Clearing}
 Initializing. It cannot fail because it only sets some default values. Memory is allocated later according to needs.
@@ -2402,20 +2428,12 @@ \subsection{Find Adjacent Primes}
 
 \subsection{Useful Constants}
 \begin{description}
-\item[\texttt{MP\_SIEVE\_BIGGEST\_PRIME}] \texttt{read-only} The biggest prime the sieve can offer. It is
- $4\,294\,967\,291$ for \texttt{MP\_16BIT}, \texttt{MP\_32BIT} and \texttt{MP\_64BIT}; and
- $18\,446\,744\,073\,709\,551\,557$ for \texttt{MP\_64BIT} if the macro\\
- \texttt{LTM\_SIEVE\_USE\_LARGE\_SIEVE} is defined.
+\item[\texttt{MP\_SIEVE\_BIGGEST\_PRIME}] \texttt{read-only} The biggest prime the sieve can offer.
+It is $4\,294\,967\,291$ for all \texttt{MP\_xBIT}.
 
 \item[\texttt{mp\_sieve\_prime}] \texttt{read-only}  The basic type for the numbers in the sieve. It
- is \texttt{uint32\_t} for \texttt{MP\_16BIT}, \texttt{MP\_32BIT} and \texttt{MP\_64BIT}; and
- \texttt{uint64\_t} for \texttt{MP\_64BIT} if the macro \texttt{LTM\_SIEVE\_USE\_LARGE\_SIEVE} is defined.
-
-\item[\texttt{LTM\_SIEVE\_USE\_LARGE\_SIEVE}] \texttt{read-only} A compile time flag to make a large sieve.
- No advantage has been seen in using 64-bit integers if available except the ability to get a sieve up
-to $2^64$. But in this case the base sieve gets 0.25 Gibibytes large and the segments 0.5 Gibibytes
-(although you can change \texttt{LTM\_SIEVE\_RANGE\_A\_B} in \texttt{bn\_mp\_sieve.c} to get smaller segments)
-and it needs a long time to fill.
+ is \texttt{uint32\_t} for \texttt{MP\_16BIT}, \texttt{MP\_32BIT}; and
+ \texttt{uint64\_t} for \texttt{MP\_64BIT}..
 
 \item[\texttt{MP\_SIEVE\_PR\_UINT}] Choses the correct macro from \texttt{inttypes.h} to print a\\
  \texttt{mp\_sieve\_prime}. The header \texttt{inttypes.h} must be included before\\
@@ -2453,148 +2471,36 @@ \subsubsection{Primality Test}
 int main(int argc, char **argv)
 {
    mp_sieve_prime number;
-   mp_sieve *base = NULL;
-   mp_sieve *segment = NULL;
-   mp_sieve_prime single_segment_a = 0;
+   mp_sieve sieve;
    int e;
-
    /* variable holding the result of mp_is_small_prime */
    mp_sieve_prime result;
 
    if (argc != 2) {
       fprintf(stderr,"Usage %s number\textbackslash{}n", argv[0]);
       exit(EXIT_FAILURE);
    }
-
    number = (mp_sieve_prime) strtoul(argv[1],NULL, 10);
    if (errno == ERANGE) {
       fprintf(stderr,"strtoul(l) failed: input out of range\textbackslash{}n");
       goto LTM_ERR;
    }
-
    mp_sieve_init(&sieve);
-
    if ((e = mp_is_small_prime(number, &result, &sieve)) != MP_OKAY) {
       fprintf(stderr,"mp_is_small_prime failed: \textbackslash{}"%s\textbackslash{}"\textbackslash{}n",
               mp_error_to_string(e));
       goto LTM_ERR;
    }
-
    printf("The number %" LTM_SIEVE_PR_UINT " is %s prime\textbackslash{}n",
            number,(result)?"":"not");
 
-
-   mp_sieve_clear(&sieve);
-   exit(EXIT_SUCCESS);
-LTM_ERR:
-   mp_sieve_clear(&sieve);
-   exit(EXIT_FAILURE);
-}
-\end{alltt}
-\subsubsection{Find Adjacent Primes}
-To sum up all primes up to and including \texttt{MP\_SIEVE\_BIGGEST\_PRIME} you might do something like:
-\begin{alltt}
-#include <stdlib.h>
-#include <stdio.h>
-#include <errno.h>
-#include <tommath.h>
-int main(int argc, char **argv)
-{
-   mp_sieve_prime number;
-   mp_sieve sieve;
-   mp_sieve_prime k, ret;
-   mp_int total, t;
-   int e;
-
-   if (argc != 2) {
-      fprintf(stderr,"Usage %s integer\textbackslash{}n", argv[0]);
-      exit(EXIT_FAILURE);
-   }
-
-   if ((e = mp_init_multi(&total, &t, NULL)) != MP_OKAY) {
-      fprintf(stderr,"mp_init_multi(segment): \textbackslash{}"%s\textbackslash{}"\textbackslash{}n",
-              mp_error_to_string(e));
-      goto LTM_ERR_1;
-   }
-   errno = 0;
-#if ( (defined MP_64BIT) && (defined LTM_SIEVE_USE_LARGE_SIEVE) )
-   number = (mp_sieve_prime) strtoull(argv[1],NULL, 10);
-#else
-   number = (mp_sieve_prime) strtoul(argv[1],NULL, 10);
-#endif
-   if (errno == ERANGE) {
-      fprintf(stderr,"strtoul(l) failed: input out of range\textbackslash{}n");
-      return EXIT_FAILURE
-   }
-
-   mp_sieve_init(&sieve);
-
-   for (k = 0, ret = 0; ret < number; k = ret) {
-      if ((e = mp_next_small_prime(k + 1, &ret, &sieve)) != MP_OKAY) {
-         if (e == LTM_SIEVE_MAX_REACHED) {
-#ifdef MP_64BIT
-            if ((e = mp_add_d(&total, (mp_digit) k, &total)) != MP_OKAY) {
-               fprintf(stderr,"mp_add_d (1) failed: \textbackslash{}"%s\textbackslash{}"\textbackslash{}n",
-                       mp_error_to_string(e));
-               goto LTM_ERR;
-            }
-#else
-            if ((e = mp_set_long(&t, k)) != MP_OKAY) {
-               fprintf(stderr,"mp_set_long (1) failed: \textbackslash{}"%s\textbackslash{}"\textbackslash{}n",
-                       mp_error_to_string(e));
-               goto LTM_ERR;
-            }
-            if ((e = mp_add(&total, &t, &total)) != MP_OKAY) {
-               fprintf(stderr,"mp_add (1) failed: \textbackslash{}"%s\textbackslash{}"\textbackslash{}n",
-                       mp_error_to_string(e));
-               goto LTM_ERR;
-            }
-#endif
-            break;
-         }
-         fprintf(stderr,"mp_next_small_prime failed: \textbackslash{}"%s\textbackslash{}"\textbackslash{}n",
-                 mp_error_to_string(e));
-         goto LTM_ERR;
-      }
-      /* The check if the prime is below the given limit
-       * cannot be done in the for-loop conditions because if
-       * it could we wouldn't need the sieve in the first place.
-       */
-      if (ret <= number) {
-#ifdef MP_64BIT
-         if ((e = mp_add_d(&total, (mp_digit) k, &total)) != MP_OKAY) {
-            fprintf(stderr,"mp_add_d failed: \textbackslash{}"%s\textbackslash{}"\textbackslash{}n",
-                    mp_error_to_string(e));
-            goto LTM_ERR;
-         }
-#else
-         if ((e = mp_set_long(&t, k)) != MP_OKAY) {
-            fprintf(stderr,"mp_set_long failed: \textbackslash{}"%s\textbackslash{}"\textbackslash{}n",
-                    mp_error_to_string(e));
-            goto LTM_ERR;
-         }
-         if ((e = mp_add(&total, &t, &total)) != MP_OKAY) {
-            fprintf(stderr,"mp_add failed: \textbackslash{}"%s\textbackslash{}"\textbackslash{}n",
-            mp_error_to_string(e));
-            goto LTM_ERR;
-         }
-#endif
-      }
-   }
-   printf("total: ");
-   mp_fwrite(&total,10,stdout);
-   putchar('\textbackslash{}n');
-
-   mp_clear_multi(&total, &t, NULL);
    mp_sieve_clear(&sieve);
    exit(EXIT_SUCCESS);
 LTM_ERR:
-   mp_clear_multi(&total, &t, NULL);
    mp_sieve_clear(&sieve);
    exit(EXIT_FAILURE);
 }
 \end{alltt}
-It took about a minute on the authors machine from 2015 to get the expected $425\,649\,736\,193\,687\,430$ for the sum of all primes up to $2^{32}$, about the same runtime as Pari/GP version 2.9.4 (with a GMP-5.1.3 kernel).
 
 \chapter{Random Number Generation}
 \section{PRNG}
diff --git a/mp_is_small_prime.c b/mp_is_small_prime.c
@@ -36,7 +36,7 @@ static void s_mp_sieve_setall(mp_single_sieve *bst)
    mp_sieve_prime i, bs_size;
    bs_size = bst->alloc / sizeof(mp_sieve_prime);
    for (i = 0; i < bs_size; i++) {
-      (bst)->content[i] = MP_SIEVE_PRIME_MAX;
+      (bst)->content[i] = MP_SIEVE_FILL;
    }
 }
 
diff --git a/tommath.h b/tommath.h
@@ -566,16 +566,16 @@ mp_err mp_prime_rand(mp_int *a, int t, int size, int flags) MP_WUR;
 
 /* ---> full prime sieve <--- */
 
-#if ( (defined MP_64BIT) && (defined MP_SIEVE_USE_LARGE_SIEVE) )
+#ifdef MP_64BIT
 typedef  uint64_t mp_sieve_prime;
-#   define MP_SIEVE_BIGGEST_PRIME      18446744073709551557lu
 #   define MP_SIEVE_PR_UINT            PRIu64
 #else
 typedef  uint32_t mp_sieve_prime;
-#   define MP_SIEVE_BIGGEST_PRIME      4294967291u
 #   define MP_SIEVE_PR_UINT            PRIu32
 #endif
 
+#define MP_SIEVE_BIGGEST_PRIME      4294967291lu
+
 typedef struct mp_single_sieve_t {
    mp_sieve_prime *content;   /* bitset holding the sieve */
    mp_sieve_prime size;       /* number of entries (which is a slightly misleading description) */
diff --git a/tommath_private.h b/tommath_private.h
@@ -105,32 +105,47 @@ extern void MP_FREE(void *mem, size_t size);
 #endif
 
 /* Size of the base sieve of mp_sieve*/
-#if ( (defined MP_64BIT) && (defined MP_SIEVE_USE_LARGE_SIEVE) )
-#   define MP_SIEVE_PRIME_MAX          0xFFFFFFFFFFFFFFFFllu
+
+#define MP_SIEVE_PRIME_MAX          0xFFFFFFFFlu
 #ifndef MP_SIEVE_PRIME_MAX_SQRT
-#   define MP_SIEVE_PRIME_MAX_SQRT     0xFFFFFFFFllu
+#define MP_SIEVE_PRIME_MAX_SQRT     0xFFFFlu
 #endif
+#ifdef MP_64BIT
+#define MP_SIEVE_FILL               0xFFFFFFFFFFFFFFFFllu
 #else
-#   define MP_SIEVE_PRIME_MAX          0xFFFFFFFFlu
-#   define MP_SIEVE_PRIME_MAX_SQRT     0xFFFFlu
+#define MP_SIEVE_FILL               0xFFFFFFFFlu
 #endif
 
+
 /*
- * Set range_a_b to sqrt(MP_SIEVE_PRIME_MAX)
- * TODO: Make it const or put it in bncore.c because it is said to be faster
- * if the size of range_a_b fits into the L2-cache.
- * Not much difference on the author's machine for 32 bit but quite
- * a large one for 64 bit and large limits. YMMV, as always.
- * Please be aware that range_a_b is in bits, not bytes and memory
- * allocation rounds up and adds CHAR_BIT*sizeof(mp_sieve_prime) bits.
+   It is faster for random access to set it to a very small number but that qill
+   cost quite significantly in sequential access and vice versa.
+   (NB: L1d cache on the author's machine is 16k bytes (0x20000 bits) large)
+
+   Building a base sieve with size 0xFFFF needs about 60 usec, random access in a
+   segment about 100 ns.
+
+      MP_SIEVE_RANGE_A_B              Random Access          Full Primesum
+             0x7f                         70 usec                 90 min(!)
+           0x1000                        100 usec                  2 min 33 sec
+           0x4000                        150 usec                  1 min 18 sec
+           0xFFFF                        350 usec                  0 min 57 sec
+          0x20000                        600 usec                  0 min 55 sec
+       0xFFFFFFFF (base sieve only)      300 usec                  0 min 35 sec
+
+
+   The default is optimized for random access but adding all primes would also work
+   in a reasonable time.
  */
+/* Size is in bits! */
 #ifndef MP_SIEVE_RANGE_A_B
-#if ( (defined MP_64BIT) && (defined MP_SIEVE_USE_LARGE_SIEVE) )
-#define MP_SIEVE_RANGE_A_B  0x400000uL
+#ifdef MP_64BIT
+#define MP_SIEVE_RANGE_A_B 0x1000
 #else
-#define MP_SIEVE_RANGE_A_B ((mp_sieve_prime) MP_SIEVE_PRIME_MAX_SQRT)
+#define MP_SIEVE_RANGE_A_B 0x1000
 #endif
 #endif
+
 #define MP_SIEVE_BASE_SIEVE_SIZE  ((mp_sieve_prime)MP_SIEVE_PRIME_MAX_SQRT)
 
 

Original file line number	Diff line number	Diff line change
`@@ -36,7 +36,7 @@ static void s_mp_sieve_setall(mp_single_sieve *bst)`
`36`	`36`	`mp_sieve_prime i, bs_size;`
`37`	`37`	`bs_size = bst->alloc / sizeof(mp_sieve_prime);`
`38`	`38`	`for (i = 0; i < bs_size; i++) {`
`39`		`- (bst)->content[i] = MP_SIEVE_PRIME_MAX;`
	`39`	`+ (bst)->content[i] = MP_SIEVE_FILL;`
`40`	`40`	`}`
`41`	`41`	`}`
`42`	`42`