33 * Date: 2019-01-09
44 * License: CC0
55 * Source: http://neerc.ifmo.ru/trains/toulouse/2017/fft2.pdf (do read, it's excellent)
6- Papers about accuracy: http://www.daemonology.net/papers/fft.pdf, http://www.cs.berkeley.edu/~fateman/papers/fftvsothers.pdf
7- For integers rounding works if $(|a| + |b|)\max(a, b) < \mathtt{\sim} 10^9$, or in theory maybe $10^6$.
8- * Description: fft(a, ...) computes $\hat f(k) = \sum_x a[x] \exp(2\pi i \cdot k x / N)$ for all $k$. Useful for convolution:
6+ Accuracy bound from http://www.daemonology.net/papers/fft.pdf
7+ * Description: fft(a) computes $\hat f(k) = \sum_x a[x] \exp(2\pi i \cdot k x / N)$ for all $k$. Useful for convolution:
98 \texttt{conv(a, b) = c}, where $c[x] = \sum a[i]b[x-i]$.
109 For convolution of complex numbers or more than two vectors: FFT, multiply
1110 pointwise, divide by n, reverse(start+1, end), FFT back.
12- For integers, consider using a number-theoretic transform instead, to avoid rounding issues.
13- * Time: O(N \log N) with $N = |A|+|B|-1$ ($\tilde 1s$ for $N=2^{22}$)
11+ Rounding is safe if $(\sum a_i^2 + \sum b_i^2)\log_2{N} < 9\cdot10^{14}$
12+ (in practice $10^{16}$; higher for random inputs).
13+ Otherwise, use long doubles/NTT/FFTMod.
14+ * Time: O(N \log N) with $N = |A|+|B|$ ($\tilde 1s$ for $N=2^{22}$)
1415 * Status: somewhat tested
1516 */
1617#pragma once
1718
1819typedef complex < double > C ;
1920typedef vector < double > vd ;
20-
21- void fft (vector < C > & a , vector < C > & rt , vi & rev , int n ) {
21+ void fft (vector < C > & a ) {
22+ int n = sz (a ), L = 31 - __builtin_clz (n );
23+ static vector < complex < long double >> R (2 , 1 );
24+ static vector < C > rt (2 , 1 ); // (^ 10% faster if double)
25+ for (static int k = 2 ; k < n ; k *= 2 ) {
26+ R .resize (n ); rt .resize (n );
27+ auto x = polar (1.0L , M_PIl / k ); // M_PI, lower-case L
28+ rep (i ,k ,2 * k ) rt [i ] = R [i ] = i & 1 ? R [i /2 ] * x : R [i /2 ];
29+ }
30+ vi rev (n );
31+ rep (i ,0 ,n ) rev [i ] = (rev [i / 2 ] | (i & 1 ) << L ) / 2 ;
2232 rep (i ,0 ,n ) if (i < rev [i ]) swap (a [i ], a [rev [i ]]);
2333 for (int k = 1 ; k < n ; k *= 2 )
2434 for (int i = 0 ; i < n ; i += 2 * k ) rep (j ,0 ,k ) {
@@ -27,25 +37,19 @@ void fft(vector<C> &a, vector<C> &rt, vi& rev, int n) {
2737 C z (x [0 ]* y [0 ] - x [1 ]* y [1 ], x [0 ]* y [1 ] + x [1 ]* y [0 ]); /// exclude-line
2838 a [i + j + k ] = a [i + j ] - z ;
2939 a [i + j ] += z ;
30- }
40+ }
3141}
32-
3342vd conv (const vd & a , const vd & b ) {
3443 if (a .empty () || b .empty ()) return {};
3544 vd res (sz (a ) + sz (b ) - 1 );
3645 int L = 32 - __builtin_clz (sz (res )), n = 1 << L ;
37- vector < C > in (n ), out (n ), rt (n , 1 ); vi rev (n );
38- rep (i ,0 ,n ) rev [i ] = (rev [i /2 ] | (i & 1 ) << L ) / 2 ;
39- for (int k = 2 ; k < n ; k *= 2 ) {
40- C z [] = {1 , polar (1.0 , M_PI / k )};
41- rep (i ,k ,2 * k ) rt [i ] = rt [i /2 ] * z [i & 1 ];
42- }
46+ vector < C > in (n ), out (n );
4347 copy (all (a ), begin (in ));
4448 rep (i ,0 ,sz (b )) in [i ].imag (b [i ]);
45- fft (in , rt , rev , n );
49+ fft (in );
4650 trav (x , in ) x *= x ;
4751 rep (i ,0 ,n ) out [i ] = in [- i & (n - 1 )] - conj (in [i ]);
48- fft (out , rt , rev , n );
49- rep (i ,0 ,sz (res )) res [i ] = imag (out [i ]) / (4 * n );
52+ fft (out );
53+ rep (i ,0 ,sz (res )) res [i ] = imag (out [i ]) / (4 * n );
5054 return res ;
5155}
0 commit comments