Skip to content

Commit 50de800

Browse files
committed
Removed support or 64-bit large sieve. Was just too big.
1 parent f7cef19 commit 50de800

File tree

5 files changed

+75
-180
lines changed

5 files changed

+75
-180
lines changed

demo/test.c

Lines changed: 7 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -811,7 +811,7 @@ static int test_mp_prime_rand(void)
811811

812812
/* test for size */
813813
for (ix = 10; ix < 128; ix++) {
814-
printf("Testing (not safe-prime): %9d bits \n", ix);
814+
printf("\rTesting (not safe-prime): %9d bits ", ix);
815815
fflush(stdout);
816816
DO(mp_prime_rand(&a, 8, ix, (rand_int() & 1) ? 0 : MP_PRIME_2MSB_ON));
817817
EXPECT(mp_count_bits(&a) == ix);
@@ -1623,27 +1623,12 @@ static int test_mp_next_small_prime(void)
16231623
1019879761, 72282701, 2048787577, 2058368113
16241624
};
16251625

1626-
/* TODO: No, remove 64-bit version, way too big and a bit unwieldy, too! */
1627-
#if ( (defined MP_64BIT) && (defined MP_SIEVE_USE_LARGE_SIEVE) )
1628-
/* primesum up to 2^64
1629-
$ time /home/czurnieden/GITHUB/primesum/primesum 2^64
1630-
3879578600671960388666457126750869198
1631-
1632-
real 37m6,448s
1633-
user 107m17,056s
1634-
sys 0m12,152s
1635-
I think we should use something smaller.
1636-
*/
1637-
/* const char *primesum_64 = "3879578600671960388666457126750869198"; */
1638-
1639-
const char *primesum_64 = "208139436659661";
1640-
#else
16411626
#ifdef BENCHMARK_SIEVE
16421627
const char *primesum_32 = "425649736193687430";
16431628
#else
16441629
const char *primesum_32 = "202259606268580";
16451630
#endif
1646-
#endif
1631+
16471632

16481633
mp_sieve_init(&sieve);
16491634

@@ -1666,7 +1651,7 @@ static int test_mp_next_small_prime(void)
16661651
}
16671652
#ifdef BENCHMARK_SIEVE
16681653
stopgt = gettime();
1669-
printf("Single random access for prime: %*u: ", 10, ret);
1654+
printf("Single random access for prime: %*"MP_SIEVE_PR_UINT": ", 10, ret);
16701655
print_timer("",startgt, stopgt);
16711656
#endif
16721657
}
@@ -1675,14 +1660,7 @@ static int test_mp_next_small_prime(void)
16751660
#ifdef BENCHMARK_SIEVE
16761661
startgt = gettime();
16771662
#endif
1678-
#if ( (defined MP_64BIT) && (defined MP_SIEVE_USE_LARGE_SIEVE) )
1679-
for (p = 4293918720lu; ret < (mp_sieve_prime)4294997297lu;) {
1680-
DO(mp_next_small_prime(p, &ret, &sieve));
1681-
p = ret + 1;
1682-
mp_set_u64(&t, ret);
1683-
DO(mp_add(&primesum, &t, &primesum));
1684-
}
1685-
#else
1663+
16861664
#ifdef BENCHMARK_SIEVE
16871665
for (p = 0ul; ret < (mp_sieve_prime)MP_SIEVE_BIGGEST_PRIME;) {
16881666
#else
@@ -1693,19 +1671,15 @@ static int test_mp_next_small_prime(void)
16931671
mp_set_u32(&t, ret);
16941672
DO(mp_add(&primesum, &t, &primesum));
16951673
}
1696-
#endif
1674+
16971675
#ifdef BENCHMARK_SIEVE
16981676
stopgt = gettime();
1699-
printf("Sum of all primes between 0 and %u = ", MP_SIEVE_BIGGEST_PRIME);
1677+
printf("Sum of all primes between 0 and %"MP_SIEVE_PR_UINT" = ", MP_SIEVE_BIGGEST_PRIME);
17001678
DO(mp_fwrite(&primesum, 10, stdout));
1701-
print_timer(", computed in ",startgt, stopgt);
1679+
print_timer(", computed in \n",startgt, stopgt);
17021680
#endif
17031681

1704-
#if ( (defined MP_64BIT) && (defined MP_SIEVE_USE_LARGE_SIEVE) )
1705-
DO(mp_read_radix(&t, primesum_64, 10));
1706-
#else
17071682
DO(mp_read_radix(&t, primesum_32, 10));
1708-
#endif
17091683
EXPECT(mp_cmp(&primesum, &t) == MP_EQ);
17101684

17111685
mp_sieve_clear(&sieve);

doc/bn.tex

Lines changed: 34 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -2349,9 +2349,35 @@ \section{Random Primes}
23492349

23502350

23512351
\section{Prime Sieve}
2352-
The prime sieve is implemented as a simple segmented Sieve of Eratosthenes. It is only moderately optimized for
2353-
space and runtime but should be small enough and also fast enough for almost all use-cases; quite quick for
2354-
sequential access but relatively slow for random access.
2352+
The prime sieve is implemented as a simple segmented Sieve of Eratosthenes.
2353+
2354+
This library needs some small sequential amounts for divisibility tests starting at two and
2355+
some random small primes for the Miller--Rabin test. That means we need a small base sieve
2356+
for quick results with a cold start and small segments to get random small primes fast.
2357+
2358+
The base sieve holds odd values only and even with a size of \texttt{4096} bytes it is
2359+
small enough to get build quickly, in about 50 $\mu$sec on the author's old machine.
2360+
2361+
The choice of the size for the individual segments varies more in the results. See table
2362+
\ref{table:sievebenchmarks} for some data.
2363+
2364+
\begin{table}[h]
2365+
\begin{center}
2366+
\begin{tabular}{r c l}
2367+
\textbf{Segment Size (bits)} & \textbf{Random Access in $\mu$sec} & \textbf{Full Primesum} \\
2368+
\texttt{ 0x7F} & 70 & 90(!) min \\
2369+
\texttt{0x1000} & 100 & 2 min 33 sec \\
2370+
\texttt{0x4000} & 150 & 1 min 18 sec \\
2371+
\texttt{ 0xFFFF} & 350 & 0 min 57 sec \\
2372+
\texttt{0x20000} & 600 & 0 min 55 sec \\
2373+
\texttt{ 0xFFFFFFFF} & 300 & 0 min 35 sec
2374+
\end{tabular}
2375+
\caption{Average access times (warm) with the default base sieve (\texttt{MP-64BIT})} \label{table:sievebenchmarks}
2376+
\end{center}
2377+
\end{table}
2378+
2379+
The default sizes are in \texttt{tommath\_private.h}: \texttt{MP\_SIEVE\_PRIME\_MAX\_SQRT} is
2380+
the size of the base sieve and \texttt{MP\_SIEVE\_RANGE\_A\_B} is the size of the segment.
23552381

23562382
\subsection{Initialization and Clearing}
23572383
Initializing. It cannot fail because it only sets some default values. Memory is allocated later according to needs.
@@ -2402,20 +2428,12 @@ \subsection{Find Adjacent Primes}
24022428

24032429
\subsection{Useful Constants}
24042430
\begin{description}
2405-
\item[\texttt{MP\_SIEVE\_BIGGEST\_PRIME}] \texttt{read-only} The biggest prime the sieve can offer. It is
2406-
$4\,294\,967\,291$ for \texttt{MP\_16BIT}, \texttt{MP\_32BIT} and \texttt{MP\_64BIT}; and
2407-
$18\,446\,744\,073\,709\,551\,557$ for \texttt{MP\_64BIT} if the macro\\
2408-
\texttt{LTM\_SIEVE\_USE\_LARGE\_SIEVE} is defined.
2431+
\item[\texttt{MP\_SIEVE\_BIGGEST\_PRIME}] \texttt{read-only} The biggest prime the sieve can offer.
2432+
It is $4\,294\,967\,291$ for all \texttt{MP\_xBIT}.
24092433

24102434
\item[\texttt{mp\_sieve\_prime}] \texttt{read-only} The basic type for the numbers in the sieve. It
2411-
is \texttt{uint32\_t} for \texttt{MP\_16BIT}, \texttt{MP\_32BIT} and \texttt{MP\_64BIT}; and
2412-
\texttt{uint64\_t} for \texttt{MP\_64BIT} if the macro \texttt{LTM\_SIEVE\_USE\_LARGE\_SIEVE} is defined.
2413-
2414-
\item[\texttt{LTM\_SIEVE\_USE\_LARGE\_SIEVE}] \texttt{read-only} A compile time flag to make a large sieve.
2415-
No advantage has been seen in using 64-bit integers if available except the ability to get a sieve up
2416-
to $2^64$. But in this case the base sieve gets 0.25 Gibibytes large and the segments 0.5 Gibibytes
2417-
(although you can change \texttt{LTM\_SIEVE\_RANGE\_A\_B} in \texttt{bn\_mp\_sieve.c} to get smaller segments)
2418-
and it needs a long time to fill.
2435+
is \texttt{uint32\_t} for \texttt{MP\_16BIT}, \texttt{MP\_32BIT}; and
2436+
\texttt{uint64\_t} for \texttt{MP\_64BIT}..
24192437

24202438
\item[\texttt{MP\_SIEVE\_PR\_UINT}] Choses the correct macro from \texttt{inttypes.h} to print a\\
24212439
\texttt{mp\_sieve\_prime}. The header \texttt{inttypes.h} must be included before\\
@@ -2453,148 +2471,36 @@ \subsubsection{Primality Test}
24532471
int main(int argc, char **argv)
24542472
{
24552473
mp_sieve_prime number;
2456-
mp_sieve *base = NULL;
2457-
mp_sieve *segment = NULL;
2458-
mp_sieve_prime single_segment_a = 0;
2474+
mp_sieve sieve;
24592475
int e;
2460-
24612476
/* variable holding the result of mp_is_small_prime */
24622477
mp_sieve_prime result;
24632478
24642479
if (argc != 2) {
24652480
fprintf(stderr,"Usage %s number\textbackslash{}n", argv[0]);
24662481
exit(EXIT_FAILURE);
24672482
}
2468-
24692483
number = (mp_sieve_prime) strtoul(argv[1],NULL, 10);
24702484
if (errno == ERANGE) {
24712485
fprintf(stderr,"strtoul(l) failed: input out of range\textbackslash{}n");
24722486
goto LTM_ERR;
24732487
}
2474-
24752488
mp_sieve_init(&sieve);
2476-
24772489
if ((e = mp_is_small_prime(number, &result, &sieve)) != MP_OKAY) {
24782490
fprintf(stderr,"mp_is_small_prime failed: \textbackslash{}"%s\textbackslash{}"\textbackslash{}n",
24792491
mp_error_to_string(e));
24802492
goto LTM_ERR;
24812493
}
2482-
24832494
printf("The number %" LTM_SIEVE_PR_UINT " is %s prime\textbackslash{}n",
24842495
number,(result)?"":"not");
24852496
2486-
2487-
mp_sieve_clear(&sieve);
2488-
exit(EXIT_SUCCESS);
2489-
LTM_ERR:
2490-
mp_sieve_clear(&sieve);
2491-
exit(EXIT_FAILURE);
2492-
}
2493-
\end{alltt}
2494-
\subsubsection{Find Adjacent Primes}
2495-
To sum up all primes up to and including \texttt{MP\_SIEVE\_BIGGEST\_PRIME} you might do something like:
2496-
\begin{alltt}
2497-
#include <stdlib.h>
2498-
#include <stdio.h>
2499-
#include <errno.h>
2500-
#include <tommath.h>
2501-
int main(int argc, char **argv)
2502-
{
2503-
mp_sieve_prime number;
2504-
mp_sieve sieve;
2505-
mp_sieve_prime k, ret;
2506-
mp_int total, t;
2507-
int e;
2508-
2509-
if (argc != 2) {
2510-
fprintf(stderr,"Usage %s integer\textbackslash{}n", argv[0]);
2511-
exit(EXIT_FAILURE);
2512-
}
2513-
2514-
if ((e = mp_init_multi(&total, &t, NULL)) != MP_OKAY) {
2515-
fprintf(stderr,"mp_init_multi(segment): \textbackslash{}"%s\textbackslash{}"\textbackslash{}n",
2516-
mp_error_to_string(e));
2517-
goto LTM_ERR_1;
2518-
}
2519-
errno = 0;
2520-
#if ( (defined MP_64BIT) && (defined LTM_SIEVE_USE_LARGE_SIEVE) )
2521-
number = (mp_sieve_prime) strtoull(argv[1],NULL, 10);
2522-
#else
2523-
number = (mp_sieve_prime) strtoul(argv[1],NULL, 10);
2524-
#endif
2525-
if (errno == ERANGE) {
2526-
fprintf(stderr,"strtoul(l) failed: input out of range\textbackslash{}n");
2527-
return EXIT_FAILURE
2528-
}
2529-
2530-
mp_sieve_init(&sieve);
2531-
2532-
for (k = 0, ret = 0; ret < number; k = ret) {
2533-
if ((e = mp_next_small_prime(k + 1, &ret, &sieve)) != MP_OKAY) {
2534-
if (e == LTM_SIEVE_MAX_REACHED) {
2535-
#ifdef MP_64BIT
2536-
if ((e = mp_add_d(&total, (mp_digit) k, &total)) != MP_OKAY) {
2537-
fprintf(stderr,"mp_add_d (1) failed: \textbackslash{}"%s\textbackslash{}"\textbackslash{}n",
2538-
mp_error_to_string(e));
2539-
goto LTM_ERR;
2540-
}
2541-
#else
2542-
if ((e = mp_set_long(&t, k)) != MP_OKAY) {
2543-
fprintf(stderr,"mp_set_long (1) failed: \textbackslash{}"%s\textbackslash{}"\textbackslash{}n",
2544-
mp_error_to_string(e));
2545-
goto LTM_ERR;
2546-
}
2547-
if ((e = mp_add(&total, &t, &total)) != MP_OKAY) {
2548-
fprintf(stderr,"mp_add (1) failed: \textbackslash{}"%s\textbackslash{}"\textbackslash{}n",
2549-
mp_error_to_string(e));
2550-
goto LTM_ERR;
2551-
}
2552-
#endif
2553-
break;
2554-
}
2555-
fprintf(stderr,"mp_next_small_prime failed: \textbackslash{}"%s\textbackslash{}"\textbackslash{}n",
2556-
mp_error_to_string(e));
2557-
goto LTM_ERR;
2558-
}
2559-
/* The check if the prime is below the given limit
2560-
* cannot be done in the for-loop conditions because if
2561-
* it could we wouldn't need the sieve in the first place.
2562-
*/
2563-
if (ret <= number) {
2564-
#ifdef MP_64BIT
2565-
if ((e = mp_add_d(&total, (mp_digit) k, &total)) != MP_OKAY) {
2566-
fprintf(stderr,"mp_add_d failed: \textbackslash{}"%s\textbackslash{}"\textbackslash{}n",
2567-
mp_error_to_string(e));
2568-
goto LTM_ERR;
2569-
}
2570-
#else
2571-
if ((e = mp_set_long(&t, k)) != MP_OKAY) {
2572-
fprintf(stderr,"mp_set_long failed: \textbackslash{}"%s\textbackslash{}"\textbackslash{}n",
2573-
mp_error_to_string(e));
2574-
goto LTM_ERR;
2575-
}
2576-
if ((e = mp_add(&total, &t, &total)) != MP_OKAY) {
2577-
fprintf(stderr,"mp_add failed: \textbackslash{}"%s\textbackslash{}"\textbackslash{}n",
2578-
mp_error_to_string(e));
2579-
goto LTM_ERR;
2580-
}
2581-
#endif
2582-
}
2583-
}
2584-
printf("total: ");
2585-
mp_fwrite(&total,10,stdout);
2586-
putchar('\textbackslash{}n');
2587-
2588-
mp_clear_multi(&total, &t, NULL);
25892497
mp_sieve_clear(&sieve);
25902498
exit(EXIT_SUCCESS);
25912499
LTM_ERR:
2592-
mp_clear_multi(&total, &t, NULL);
25932500
mp_sieve_clear(&sieve);
25942501
exit(EXIT_FAILURE);
25952502
}
25962503
\end{alltt}
2597-
It took about a minute on the authors machine from 2015 to get the expected $425\,649\,736\,193\,687\,430$ for the sum of all primes up to $2^{32}$, about the same runtime as Pari/GP version 2.9.4 (with a GMP-5.1.3 kernel).
25982504

25992505
\chapter{Random Number Generation}
26002506
\section{PRNG}

mp_is_small_prime.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ static void s_mp_sieve_setall(mp_single_sieve *bst)
3636
mp_sieve_prime i, bs_size;
3737
bs_size = bst->alloc / sizeof(mp_sieve_prime);
3838
for (i = 0; i < bs_size; i++) {
39-
(bst)->content[i] = MP_SIEVE_PRIME_MAX;
39+
(bst)->content[i] = MP_SIEVE_FILL;
4040
}
4141
}
4242

tommath.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -566,16 +566,16 @@ mp_err mp_prime_rand(mp_int *a, int t, int size, int flags) MP_WUR;
566566

567567
/* ---> full prime sieve <--- */
568568

569-
#if ( (defined MP_64BIT) && (defined MP_SIEVE_USE_LARGE_SIEVE) )
569+
#ifdef MP_64BIT
570570
typedef uint64_t mp_sieve_prime;
571-
# define MP_SIEVE_BIGGEST_PRIME 18446744073709551557lu
572571
# define MP_SIEVE_PR_UINT PRIu64
573572
#else
574573
typedef uint32_t mp_sieve_prime;
575-
# define MP_SIEVE_BIGGEST_PRIME 4294967291u
576574
# define MP_SIEVE_PR_UINT PRIu32
577575
#endif
578576

577+
#define MP_SIEVE_BIGGEST_PRIME 4294967291lu
578+
579579
typedef struct mp_single_sieve_t {
580580
mp_sieve_prime *content; /* bitset holding the sieve */
581581
mp_sieve_prime size; /* number of entries (which is a slightly misleading description) */

tommath_private.h

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -105,32 +105,47 @@ extern void MP_FREE(void *mem, size_t size);
105105
#endif
106106

107107
/* Size of the base sieve of mp_sieve*/
108-
#if ( (defined MP_64BIT) && (defined MP_SIEVE_USE_LARGE_SIEVE) )
109-
# define MP_SIEVE_PRIME_MAX 0xFFFFFFFFFFFFFFFFllu
108+
109+
#define MP_SIEVE_PRIME_MAX 0xFFFFFFFFlu
110110
#ifndef MP_SIEVE_PRIME_MAX_SQRT
111-
# define MP_SIEVE_PRIME_MAX_SQRT 0xFFFFFFFFllu
111+
#define MP_SIEVE_PRIME_MAX_SQRT 0xFFFFlu
112112
#endif
113+
#ifdef MP_64BIT
114+
#define MP_SIEVE_FILL 0xFFFFFFFFFFFFFFFFllu
113115
#else
114-
# define MP_SIEVE_PRIME_MAX 0xFFFFFFFFlu
115-
# define MP_SIEVE_PRIME_MAX_SQRT 0xFFFFlu
116+
#define MP_SIEVE_FILL 0xFFFFFFFFlu
116117
#endif
117118

119+
118120
/*
119-
* Set range_a_b to sqrt(MP_SIEVE_PRIME_MAX)
120-
* TODO: Make it const or put it in bncore.c because it is said to be faster
121-
* if the size of range_a_b fits into the L2-cache.
122-
* Not much difference on the author's machine for 32 bit but quite
123-
* a large one for 64 bit and large limits. YMMV, as always.
124-
* Please be aware that range_a_b is in bits, not bytes and memory
125-
* allocation rounds up and adds CHAR_BIT*sizeof(mp_sieve_prime) bits.
121+
It is faster for random access to set it to a very small number but that qill
122+
cost quite significantly in sequential access and vice versa.
123+
(NB: L1d cache on the author's machine is 16k bytes (0x20000 bits) large)
124+
125+
Building a base sieve with size 0xFFFF needs about 60 usec, random access in a
126+
segment about 100 ns.
127+
128+
MP_SIEVE_RANGE_A_B Random Access Full Primesum
129+
0x7f 70 usec 90 min(!)
130+
0x1000 100 usec 2 min 33 sec
131+
0x4000 150 usec 1 min 18 sec
132+
0xFFFF 350 usec 0 min 57 sec
133+
0x20000 600 usec 0 min 55 sec
134+
0xFFFFFFFF (base sieve only) 300 usec 0 min 35 sec
135+
136+
137+
The default is optimized for random access but adding all primes would also work
138+
in a reasonable time.
126139
*/
140+
/* Size is in bits! */
127141
#ifndef MP_SIEVE_RANGE_A_B
128-
#if ( (defined MP_64BIT) && (defined MP_SIEVE_USE_LARGE_SIEVE) )
129-
#define MP_SIEVE_RANGE_A_B 0x400000uL
142+
#ifdef MP_64BIT
143+
#define MP_SIEVE_RANGE_A_B 0x1000
130144
#else
131-
#define MP_SIEVE_RANGE_A_B ((mp_sieve_prime) MP_SIEVE_PRIME_MAX_SQRT)
145+
#define MP_SIEVE_RANGE_A_B 0x1000
132146
#endif
133147
#endif
148+
134149
#define MP_SIEVE_BASE_SIEVE_SIZE ((mp_sieve_prime)MP_SIEVE_PRIME_MAX_SQRT)
135150

136151

0 commit comments

Comments
 (0)