11/*
22 Copyright (C) 2022 Daniel Schultz
3+ Copyright (C) 2024 Fredrik Johansson
34
45 This file is part of FLINT.
56
1718#include "crt_helpers.h"
1819#include "fft_small.h"
1920
21+ /*
22+ The following profiles are hardcoded.
23+
24+ np bits n m
25+ 4 84 4 3
26+ 4 88 4 3
27+ 4 92 4 3
28+ 5 112 4 4
29+ 5 116 4 4
30+ 5 120 4 4
31+ 6 136 5 4
32+ 6 140 5 4
33+ 6 144 5 4
34+ 7 160 6 5
35+ 7 164 6 5
36+ 7 168 6 5
37+ 8 184 7 6
38+ 8 188 7 6
39+ 8 192 7 6
40+
41+ Here np is the number of primes {p[0], p[1], ...p[np-1]}. By default the
42+ following 50-bit primes are used:
43+
44+ p[0] = 1108307720798209
45+ p[1] = 659706976665601
46+ p[2] = 1086317488242689
47+ p[3] = 910395627798529
48+ p[4] = 699289395265537
49+ p[5] = 1022545813831681
50+ p[6] = 1013749720809473
51+ p[7] = 868614185943041
52+
53+ We suppose that the inputs to multiply are
54+
55+ {a[0], ..., a[an-1]} and {b[0], ..., b[bn-1]}
56+
57+ in base 2^64. They will be converted to
58+
59+ {a'[0], ..., a'[an'-1]} and {b'[0], ..., a'[bn'-1]}.
60+
61+ in base 2^bits, where an' = ceil(64*an/bits) and
62+ bn' = ceil(64*bn/bits). The code for performing the conversion
63+ actually processes the inputs as base 2^32 arrays of twice the lengths.
64+
65+ Each dot product in the convolution of a' and b' is bounded by
66+
67+ bn' * (2^bits-1)^2
68+
69+ and we need this to be smaller than prod_primes = p[0], ..., p[np-1]
70+ for the CRT reconstruction. A slight relaxation of this inequality is
71+
72+ ceil(64*bn/bits) * 2^(2*bits) <= prod_primes.
73+
74+ The function crt_data_find_bn_bound determines an upper bound for
75+ permissible bn such that this holds. Numerical values of the bn bounds
76+ are as follows:
77+
78+ np = 4, bits = 84, bn <= 2536637511
79+ np = 4, bits = 88, bn <= 10380583
80+ np = 4, bits = 92, bn <= 42390
81+ np = 5, bits = 112, bn <= 32822700
82+ np = 5, bits = 116, bn <= 132790
83+ np = 5, bits = 120, bn <= 534
84+ np = 6, bits = 136, bn <= 144789875
85+ np = 6, bits = 140, bn <= 582218
86+ np = 6, bits = 144, bn <= 2337
87+ np = 7, bits = 160, bn <= 613493872
88+ np = 7, bits = 164, bn <= 2456369
89+ np = 7, bits = 168, bn <= 9826
90+ np = 8, bits = 184, bn <= 2177184315
91+ np = 8, bits = 188, bn <= 8689506
92+ np = 8, bits = 192, bn <= 34662
93+
94+ Note that we do not require an >= bn for correctness, but for highly
95+ unbalanced multiplications calling mpn_ctx_mpn_mul with operands
96+ in this order gives a better bound.
97+
98+ The profile parameters n and m describe the size of CRT operands.
99+ Let P = p[0] * ... * p[n-1]. One CRT sum has the form
100+
101+ s = f[0]*x[0] + ... + f[np-1]*x[np-1]
102+
103+ where x[i] is a single limb, f[i] has m limbs, and the limb count n
104+ is large enough to hold s <= P * np.
105+ */
106+
20107void crt_data_init (crt_data_t C , ulong prime , ulong coeff_len , ulong nprimes )
21108{
22109 C -> prime = prime ;
@@ -30,8 +117,6 @@ void crt_data_clear(crt_data_t C)
30117 flint_free (C -> data );
31118}
32119
33-
34-
35120/*
36121 need ceil(64*bn/bits) <= prod_primes/2^(2*bits)
37122 i.e. (64*bn+bits-1)/bits <= prod_primes/2^(2*bits)
@@ -987,7 +1072,7 @@ void mpn_ctx_init(mpn_ctx_t R, ulong p)
9871072 R->profiles[i].bits = bits_; \
9881073 R->profiles[i].bn_bound = crt_data_find_bn_bound(R->crts + np_ - 1, bits_); \
9891074 R->profiles[i].to_ffts = CAT3(mpn_to_ffts, np_, bits_); \
990- /*flint_printf("profile np = %wu, bits = %3wu, bn <= 0x%16wx \n", R->profiles[i].np, R->profiles[i].bits, R->profiles[i].bn_bound);*/ \
1075+ /*flint_printf("profile np = %wu, bits = %3wu, bn <= %wu \n", R->profiles[i].np, R->profiles[i].bits, R->profiles[i].bn_bound); */ \
9911076 R -> profiles_size = i + 1 ;
9921077
9931078 PUSH_PROFILE (4 , 84 , 4 ,3 );
0 commit comments