A parallel polynomial Schoenhage--Strassen FFT algorithm for very large univariate polynomial multiplication has been developed for V2.26. This applies to univariate polynomials with coefficients in the integer ring, an integer residue class ring, or a prime finite field. Currently non-trivial speedups are achieved when the polynomials have large degree and large coefficients.
Set whether the parallel polynomial Schoenhage--Strassen FFT algorithm should be used for univariate polynomial multiplication when a non-trivial number of threads is selected (by SetNthreads).
> SetParallelFFT(true); > Z := IntegerRing(); > P<z> := PolynomialRing(Z); > range := [12 .. 18]; SCALE := 8; Time := Realtime; > for b in range do > n := 2^b; > nb := Round(n / SCALE); R := IntegerRing(2^nb); > F := P![Z!Random(R): i in [1 .. n]]; > G := P![Z!Random(R): i in [1 .. n]]; > SetNthreads(1); T1 := Time(); p1 := F*G; T1 := Time(T1); > SetNthreads(16); T16 := Time(); p2 := F*G; T16 := Time(T16); > assert p1 eq p2; > printf > "n: %o; #bits: %o, 1T time: %.2o, 16T time: %.1o, speedup: %.1o n", > n, nb, T1, T16, T1/T16; > end for; Deg: 4096; #bits: 512, 1T time: 0.06, 16T time: 0.01, speedup: 6.0 Deg: 8192; #bits: 1024, 1T time: 0.30, 16T time: 0.06, speedup: 9.0 Deg: 16384; #bits: 2048, 1T time: 1.10, 16T time: 0.10, speedup: 8.8 Deg: 32768; #bits: 4096, 1T time: 5.20, 16T time: 0.60, speedup: 8.9 Deg: 65536; #bits: 8192, 1T time: 25.40, 16T time: 2.70, speedup: 9.3 Deg: 131072; #bits: 16384, 1T time: 127.30, 16T time: 12.90, speedup: 9.9 Deg: 262144; #bits: 32768, 1T time: 622.90, 16T time: 60.50, speedup: 10.3