/* Program to check status of the line conjecture for cubic extensions. Written by Geoff Bailey, Magma group, Feb 2017. Sample compile line (Linux): gcc -O4 -o L3check L3check.c -lpthread For more explanation, see http://magma.maths.usyd.edu.au/~geoff/L3/ */ #include #include #include #include #include #include #define USE_ASSERTIONS 1 #define USE_MOD_BY_MULT 1 #define QLIMIT 5792 typedef uint32_t t_uint32; typedef uint64_t t_uint64; typedef int t_boolean; #define FALSE 0 #define TRUE 1 /* global options */ t_boolean opt_verbose; t_boolean opt_antisort; t_uint32 opt_nthreads; t_boolean opt_find_all; /*************** Miscellaneous utility stuff ***************/ #if USE_ASSERTIONS #define ASSERT(expr) \ do { \ if (!(expr)) \ { \ fprintf(stderr, "Assertion failed: %s\n", #expr); \ exit(1); \ } \ } while (0) #else #define ASSERT(expr) do { /* nothing */ } while (0) #endif #define IF_VERBOSE(code) \ if (opt_verbose) { \ code; \ fflush(stdout); \ } void *checked_calloc(size_t nitems, size_t itemsize) { void *mem; mem = calloc(nitems, itemsize); if (mem == NULL) { fprintf(stderr, "Calloc(%zu, %zu) failed\n", nitems, itemsize); exit(1); } return mem; } #define ARRAY_ALLOC(n, t) checked_calloc((n), sizeof(t)) #define ARRAY_FREE(a) free(a) #define ALLOC(t) ARRAY_ALLOC(1, t) #define ONES32(n) (~((~(t_uint32)0) << (n))) #define ONES64(n) (~((~(t_uint64)0) << (n))) t_uint32 highbitpos32(t_uint32 x) /* Returns the index of the highest set bit in x. x must not be zero. If k is the returned index, then 2^k <= x < 2^(k + 1). */ { t_uint32 k; for (k = 31; k > 0; k--) if ((x >> k) != 0) break; return k; } double time_current() { struct timeval tv; gettimeofday(&tv, NULL); return (double)tv.tv_sec + ((double)tv.tv_usec) / 1000000.0; } double time_since(double start) { return time_current() - start; } #define START_TIMER() double tm = time_current() #define RESTART_TIMER() tm = time_current() #define STOP_TIMER(desc) \ IF_VERBOSE( printf("Time for %s: %.2f s\n", desc, time_since(tm)) ) /* As should be well-known, division and modulus are slow operations. Provided certain conditions are met, we can replace division by a multiply and shift. Suppose that x < 2^A and m < 2^B, and set M = Ceiling(2^(A + B)/m). Then Floor(x / m) = Floor((M*x) / 2^(A + B)). This is the promised multiply and shift, and is fine provided that M*x does not overflow. Although better versions are possible, for our needs it is enough to take the simple restriction 2*A + B <= 64 which will guarantee that. Of course, once we have cheap division, then we can get modulus by a further multiply and subtract. */ #if MOD_BY_MULT #define MOD_INIT(modulus, shiftvalue) \ t_uint32 mm_modulus; \ int mm_shift; \ t_uint64 mm_mult; \ \ mm_modulus = modulus; \ mm_shift = shiftvalue; \ mm_mult = 1 + (ONES64(Mshift) / (t_uint64)mm_modulus) #define MOD_REDUCE(x) \ x -= mm_modulus*(t_uint32)((mm_mult*(t_uint64)x) >> mm_shift) #else #define MOD_INIT(modulus, shiftvalue) t_uint32 mm_modulus = modulus #define MOD_REDUCE(x) x %= mm_modulus; #endif /*************** Multi-iterator ***************/ typedef struct { t_uint64 lbit, hbit, multicounter, moduli; t_uint32 shift, largeprime, largecounter; t_boolean uselarge; } str_multi_iterator; void compute_multi_iterator(str_multi_iterator *mi, t_uint32 nprimes, t_uint32 *primes) { t_uint64 lbit, hbit, multicounter, moduli; t_uint32 shift, largeprime, largecounter; t_boolean uselarge; t_uint32 np, W, i; t_uint32 *pptr; /* skip the prime two; it will be handled differently by other code */ pptr = primes; np = nprimes; if (pptr[0] == 2) { pptr++; np--; } largeprime = pptr[np - 1]; W = 2 + highbitpos32(largeprime); uselarge = (np*W > 64); if (uselarge) /* largest prime does not fit into the multicounter */ { np--; W = 2 + highbitpos32(pptr[np - 1]); /* The remaining primes must fit, since the first case where they would not is q = 12391, which is much large than we allow. */ ASSERT(np*W <= 64); } lbit = 0; moduli = 0; for (i = 0; i < np; i++) { lbit |= ((t_uint64)1) << (W*i); moduli |= ((t_uint64)pptr[i]) << (W*i); } shift = W - 1; hbit = lbit << shift; multicounter = hbit - moduli; largecounter = 0; IF_VERBOSE( printf("Multicounter:\n"); printf(" %u small primes (%u", np, pptr[0]); for (i = 1; i < np; i++) printf(" %u", pptr[i]); printf("), %u bits per prime\n", W); if (uselarge) printf(" large prime: %u\n", largeprime); ) mi->lbit = lbit; mi->hbit = hbit; mi->multicounter = multicounter; mi->moduli = moduli; mi->shift = shift; mi->largeprime = largeprime; mi->largecounter = largecounter; mi->uselarge = uselarge; } #define INIT_MULTI_ITERATOR(mi) \ t_uint64 lbit, hbit, multicounter, moduli; \ t_uint32 shift, largeprime, largecounter; \ t_boolean uselarge; \ t_uint64 H, haszero; \ \ lbit = mi.lbit; \ hbit = mi.hbit; \ multicounter = mi.multicounter; \ moduli = mi.moduli; \ shift = mi.shift; \ largeprime = mi.largeprime; \ largecounter = mi.largecounter; \ uselarge = mi.uselarge; \ haszero = TRUE #define ADVANCE_MULTI_ITERATOR() \ do { \ multicounter += lbit; \ H = multicounter & hbit; \ multicounter -= (moduli & (H - (H >> shift))); \ haszero = H; \ if (uselarge) \ { \ largecounter++; \ if (largecounter == largeprime) \ { \ largecounter = 0; \ haszero = 1; \ } \ } \ } while (0) #define ADVANCE_MULTI_ITERATOR_SMALL() \ do { \ multicounter += lbit; \ H = multicounter & hbit; \ multicounter -= (moduli & (H - (H >> shift))); \ haszero = H; \ } while (0) #define ADVANCE_MULTI_ITERATOR_LARGE() \ do { \ multicounter += lbit; \ H = multicounter & hbit; \ multicounter -= (moduli & (H - (H >> shift))); \ haszero = H; \ largecounter++; \ if (largecounter == largeprime) \ { \ largecounter = 0; \ haszero = 1; \ } \ } while (0) /*************** Finite field poly rep (over F_p) ***************/ /* This is for field elements represented as polynomials over GF(p). Essentially the only arithmetic operation we need is to multiply by the primitive element. */ typedef struct { t_uint32 p, n, q; /* q = p^n */ t_uint32 *f; /* primitive elt z has z^n = Sum(f[i]*z^i) */ } str_ffpoly; typedef str_ffpoly *t_ffpoly; typedef t_uint32 *t_ffpoly_elt; t_ffpoly ffpoly_create(t_uint32 p, t_uint32 n, t_uint32 *poly) /* Returns the ffpoly structure F for the given arguments. IMPORTANT: Must have p^n < 2^32. */ { t_ffpoly F; t_uint32 q, i, x; t_uint32 *f; q = 1; for (i = 0; i < n; i++) q *= p; f = ARRAY_ALLOC(n, t_uint32); for (i = 0; i < n; i++) { x = p - poly[i]; if (x == p) x = 0; f[i] = x; } F = ALLOC(str_ffpoly); F->p = p; F->n = n; F->q = q; F->f = f; return F; } void ffpoly_free(t_ffpoly F) { ARRAY_FREE(F->f); free(F); } t_ffpoly_elt ffpoly_zero(t_ffpoly F) /* Returns an element of F equal to 0. */ { return ARRAY_ALLOC(F->n, t_uint32); } t_ffpoly_elt ffpoly_one(t_ffpoly F) /* Returns an element of F equal to 1. */ { t_ffpoly_elt one; one = ffpoly_zero(F); one[0] = 1; return one; } void ffpoly_elt_free(t_ffpoly_elt a) { ARRAY_FREE(a); } void ffpoly_elt_mult_prim_into(t_ffpoly F, t_ffpoly_elt a, t_ffpoly_elt b) /* Sets b to be a*z, where z is the primitive element of F. */ { t_uint32 p, n, i, x; t_uint32 *f; p = F->p; n = F->n; f = F->f; x = a[n - 1]; for (i = n - 1; i > 0; i--) b[i] = (a[i - 1] + x*f[i]) % p; b[0] = (x*f[0]) % p; } t_uint32 ffpoly_elt_index(t_ffpoly F, t_ffpoly_elt a) /* Returns a unique integer in [0, p^n) corresponding to a, by treating the coefficients in a as a base-p representation of an integer. */ { t_uint32 p, n, index, i; p = F->p; n = F->n; index = a[n - 1]; for (i = n - 1; i > 0; i--) index = index*p + a[i - 1]; return index; } /*************** Finite field zech rep ***************/ /* This is for field elements represented by their logs with respect to a primitive element. All arithmetic operations are carried out by use of the Zech table. */ typedef struct { t_ffpoly F; t_uint32 q, /* size of field */ zero, /* value representing zero (in practice: q - 1) */ logm1; /* log of minus one (0 or (q - 1)/2) */ t_uint32 *zech; /* zech table mapping log(x) to log(x + 1) */ } str_ffzech; typedef str_ffzech *t_ffzech; typedef t_uint32 t_ffzech_elt; t_ffzech ffzech_create(t_ffpoly F) /* Creates the Zech representation for the ffpoly F. */ { t_ffzech Z; t_uint32 p, n, q, logm1, log, index, k; t_uint32 *itol, *zech; t_ffpoly_elt zk; START_TIMER(); Z = ALLOC(str_ffzech); p = F->p; n = F->n; q = F->q; itol = ARRAY_ALLOC(q, t_uint32); zech = ARRAY_ALLOC(q, t_uint32); zk = ffpoly_one(F); for (log = 0; log < q - 1; log++) { index = ffpoly_elt_index(F, zk); itol[index] = log; ffpoly_elt_mult_prim_into(F, zk, zk); } ffpoly_elt_free(zk); /* We use q - 1 for the log of zero, completing the permutations */ log = q - 1; index = 0; itol[index] = log; for (index = 0; index < q; index += p) { for (k = 0; k < p - 1; k++) zech[itol[index + k]] = itol[index + k + 1]; zech[itol[index + k]] = itol[index]; } ARRAY_FREE(itol); if (p == 2) logm1 = 0; else logm1 = (q - 1) / 2; ASSERT(zech[logm1] == q - 1); Z->F = F; Z->q = q; Z->zero = q - 1; Z->logm1 = logm1; Z->zech = zech; STOP_TIMER("zech creation"); return Z; } #define ZECH_ORDER(Z) ((Z)->q - 1) #define ZECH_ZERO(Z) ((Z)->q - 1) #define ZECH_ONE 0 #define ZECH_PRIM 1 void ffzech_free(t_ffzech Z) { ARRAY_FREE(Z->zech); ffpoly_free(Z->F); free(Z); } t_ffzech_elt ffzech_elt_add(t_ffzech Z, t_ffzech_elt a, t_ffzech_elt b) /* Returns a + b. */ { t_uint32 zero, order, c; t_uint32 *zech; zero = ZECH_ZERO(Z); if (a == zero) return b; if (b == zero) return a; order = ZECH_ORDER(Z); zech = Z->zech; if (a <= b) { c = zech[b - a]; if (c == zero) return zero; c += a; if (c >= order) c -= order; return c; } else { c = zech[a - b]; if (c == zero) return zero; c += b; if (c >= order) c -= order; return c; } } t_ffzech_elt ffzech_elt_mult(t_ffzech Z, t_ffzech_elt a, t_ffzech_elt b) /* Returns a*b. */ { t_uint32 zero, order, c; zero = ZECH_ZERO(Z); if (a == zero) return zero; if (b == zero) return zero; order = ZECH_ORDER(Z); c = a + b; if (c >= order) c -= order; return c; } t_ffzech_elt ffzech_elt_add_mult(t_ffzech Z, t_ffzech_elt a, t_ffzech_elt b, t_ffzech_elt c) /* Returns a + b*c. */ { t_ffzech_elt d; d = ffzech_elt_mult(Z, b, c); return ffzech_elt_add(Z, a, d); } t_ffzech_elt ffzech_elt_negate(t_ffzech Z, t_ffzech_elt a) /* Returns -a. */ { return ffzech_elt_mult(Z, a, Z->logm1); } t_uint32 prime_zech_to_integer(t_ffzech Z, t_ffzech_elt a) /* When Z is a prime field, return the integer corresponding to a. */ { t_ffpoly F; t_uint32 p, z, m, zpow, bitpos; if (a == Z->zero) return 0; if (a == ZECH_ONE) return 1; F = Z->F; ASSERT(F->n == 1); p = F->p; z = F->f[0]; m = (t_uint32)a; zpow = z; bitpos = highbitpos32(m); while (bitpos > 0) { zpow = (zpow*zpow) % p; bitpos--; if ((m >> bitpos) & 1) zpow = (zpow*z) % p; } return zpow; } /*************** Finite field poly-over-zech rep ***************/ /* This is for field elements represented as polynomials over a zech-rep field. Essentially the only arithmetic operation we need is to multiply by the primitive element. We store a few extra items in the field that don't strictly belong there but are convenient to pass around in this way. */ typedef enum { FFPZ_CHAR_2, FFPZ_P3_F20, FFPZ_CHAR_ODD } t_ffpz_type; typedef struct { t_ffzech Z; t_uint32 deg; /* deg = deg(f), f is over Z */ t_uint64 q; /* (Z->q)^deg */ t_ffzech_elt *f; /* primitive elt w has w^n = Sum(f[i]*w^i) */ t_ffpz_type type; t_uint32 nprimes; t_uint32 *primes; t_uint64 rad; str_multi_iterator mi; } str_ffpz; typedef str_ffpz *t_ffpz; typedef t_ffzech_elt *t_ffpz_elt; t_ffpz ffpz_create(t_ffzech Z, t_uint32 deg, t_ffzech_elt *poly) /* Returns the ffpz structure for the given arguments. */ { t_ffpz K; t_ffzech_elt *f; t_uint32 i; t_uint64 Zq, q; t_ffpz_type type; f = ARRAY_ALLOC(deg, t_ffzech_elt); for (i = 0; i < deg; i++) f[i] = ffzech_elt_negate(Z, poly[i]); Zq = Z->q; q = 1; for (i = 0; i < deg; i++) q *= Zq; if ((Zq & 1) == 0) type = FFPZ_CHAR_2; else if (Z->F->n == 1 && f[2] == Z->zero) type = FFPZ_P3_F20; else type = FFPZ_CHAR_ODD; K = ALLOC(str_ffpz); K->Z = Z; K->deg = deg; K->q = q; K->f = f; K->type = type; return K; } void ffpz_store_primes(t_ffpz K, t_uint32 nprimes, t_uint32 *primes) { t_uint64 rad; t_uint32 i; rad = 1; for (i = 0; i < nprimes; i++) rad *= (t_uint64)primes[i]; compute_multi_iterator(&(K->mi), nprimes, primes); K->nprimes = nprimes; K->primes = primes; K->rad = rad; } void ffpz_free(t_ffpz K) { if (K->nprimes > 0) ARRAY_FREE(K->primes); ARRAY_FREE(K->f); ffzech_free(K->Z); free(K); } t_ffpz_elt ffpz_zero(t_ffpz K) { t_uint32 deg, i; t_ffzech_elt zero; t_ffpz_elt A; deg = K->deg; zero = ZECH_ZERO(K->Z); A = ARRAY_ALLOC(deg, t_ffzech_elt); for (i = 0; i < deg; i++) A[i] = zero; return A; } t_ffpz_elt ffpz_one(t_ffpz K) { t_ffpz_elt one; one = ffpz_zero(K); one[0] = ZECH_ONE; return one; } void ffpz_elt_free(t_ffpz_elt a) { ARRAY_FREE(a); } void ffpz_elt_mult_prim_into(t_ffpz K, t_ffpz_elt a, t_ffpz_elt b) /* Sets b to be a*w, where w is the primitive element of K. */ { t_ffzech Z; t_uint32 deg, i; t_ffzech_elt *f; t_ffzech_elt x; Z = K->Z; deg = K->deg; f = K->f; x = a[deg - 1]; for (i = deg - 1; i > 0; i--) b[i] = ffzech_elt_add_mult(Z, a[i - 1], x, f[i]); b[0] = ffzech_elt_mult(Z, x, f[0]); } /*************** Gamma-alpha table ***************/ t_uint64 *ga_table; t_uint32 *ga_table_large; t_uint32 *ga_counts; void build_gamma_alpha_table_char2(t_ffpz K) { t_ffzech Z; t_uint32 q, q2, index; t_uint64 Korder, k; t_ffzech_elt zero; t_ffpz_elt wk; t_ffzech_elt a, g; t_uint32 largeadj; t_uint64 adjustment; START_TIMER(); IF_VERBOSE( printf("Gamma-alpha table: Characteristic two (general case)\n"); ) Z = K->Z; q = Z->q; q2 = q*q; Korder = K->q - 1; zero = ZECH_ZERO(Z); ga_table = ARRAY_ALLOC(q2 + q, t_uint64); ga_table_large = ARRAY_ALLOC(q2 + q, t_uint32); INIT_MULTI_ITERATOR(K->mi); /* Note that our multi-iterator lags one step behind k in the loop below. This is intentional! It produces the right values to make the complement adjustment simpler. */ /* adjustment hack to shift iterator to easily-complemented form */ adjustment = hbit - moduli; largeadj = largeprime - 1; wk = ffpz_one(K); for (k = 1; k < Korder; k++) { ffpz_elt_mult_prim_into(K, wk, wk); if ( (wk[2] == ZECH_ONE) || (wk[2] == zero && wk[1] == ZECH_ONE) ) { if (wk[2] == ZECH_ONE) g = wk[1]; else g = q; a = wk[0]; index = q*g + a; ga_table[index] = multicounter - adjustment; if (uselarge) ga_table_large[index] = largeadj - largecounter; } ADVANCE_MULTI_ITERATOR(); } ffpz_elt_free(wk); STOP_TIMER("gamma-alpha table"); } void build_gamma_alpha_table_oddchar(t_ffpz K) { t_ffzech Z; t_uint32 q, q2, base, index; t_uint64 Korder, k; t_ffzech_elt zero; t_ffzech_elt a, g; t_uint32 largeadj; t_uint64 adjustment; t_ffzech_elt wk0, wk1, wk2, f0, f1, f2, u; START_TIMER(); IF_VERBOSE( printf("Gamma-alpha table: Odd characteristic (general case)\n"); ) Z = K->Z; q = Z->q; q2 = q*q; Korder = K->q - 1; zero = ZECH_ZERO(Z); ga_table = ARRAY_ALLOC(q2 + q, t_uint64); ga_table_large = ARRAY_ALLOC(q2 + q, t_uint32); ga_counts = ARRAY_ALLOC(2*(q + 1), t_uint32); INIT_MULTI_ITERATOR(K->mi); /* Note that our multi-iterator lags one step behind k in the loop below. This is intentional! It produces the right values to make the complement adjustment simpler. */ /* adjustment hack to shift iterator to easily-complemented form */ adjustment = hbit - moduli; largeadj = largeprime - 1; /* We loop through w^k, but we explicitly represent it by its coefficients (w^k = wk2*w^2 + wk1*w + wk0) instead of using the ffpz_elt. This means we also have to inline the multiplication by w. The formulae for this are u = wk2 wk2 = u*f2 + wk1 wk1 = u*f1 + wk0 wk0 = u*f0 This turns out to save considerable time, although arguably it's not worth it compared to the time taken by the beta loop later. */ wk2 = zero; wk1 = zero; wk0 = ZECH_ONE; f2 = K->f[2]; f1 = K->f[1]; f0 = K->f[0]; for (k = 1; k < Korder; k++) { u = wk2; wk2 = ffzech_elt_add_mult(Z, wk1, u, f2); wk1 = ffzech_elt_add_mult(Z, wk0, u, f1); wk0 = ffzech_elt_mult(Z, u, f0); if (wk2 == ZECH_ONE || (wk2 == zero && wk1 == ZECH_ONE)) { if (wk2 == ZECH_ONE) g = wk1; else g = q; a = wk0; base = q*g; if (k & 1) { index = base + ga_counts[2*g]; ga_counts[2*g]++; } else { ga_counts[2*g + 1]++; index = base + q - ga_counts[2*g + 1]; } ga_table[index] = multicounter - adjustment; if (uselarge) ga_table_large[index] = largeadj - largecounter; } ADVANCE_MULTI_ITERATOR(); } STOP_TIMER("gamma-alpha table"); } void build_gamma_alpha_table_p3_f20(t_ffpz K) { t_ffzech Z; t_uint32 q, q2, base, index; t_uint64 Korder, k; t_ffzech_elt zero; t_ffzech_elt a, g; t_uint32 largeadj; t_uint64 adjustment; t_uint32 wk0, wk1, wk2, f0, f1, f2, u; t_uint64 M; t_uint32 Mshift; START_TIMER(); IF_VERBOSE( printf("Gamma-alpha table: Odd characteristic (prime case, f2 = 0)\n"); ) Z = K->Z; q = Z->q; q2 = q*q; Korder = K->q - 1; zero = ZECH_ZERO(Z); ga_table = ARRAY_ALLOC(q2 + q, t_uint64); ga_table_large = ARRAY_ALLOC(q2 + q, t_uint32); ga_counts = ARRAY_ALLOC(2*(q + 1), t_uint32); INIT_MULTI_ITERATOR(K->mi); /* Note that our multi-iterator lags one step behind k in the loop below. This is intentional! It produces the right values to make the complement adjustment simpler. */ /* adjustment hack to shift iterator to easily-complemented form */ adjustment = hbit - moduli; largeadj = largeprime - 1; /* We loop through w^k, but we explicitly represent it by its coefficients (w^k = wk2*w^2 + wk1*w + wk0) instead of using the ffpz_elt. This means we also have to inline the multiplication by w. The formulae for this are u = wk2 wk2 = u*f2 + wk1 wk1 = u*f1 + wk0 wk0 = u*f0 But here we are specialising for the case where f2 = 0 (since this pretty much always happens when q is prime), so it simplifies to: u = wk2 wk2 = wk1 wk1 = u*f1 + wk0 wk0 = u*f0 This is the case where q is prime, so we can bypass the zech machinery and go straight to machine integers. However, we have to reconstruct the polynomial coefficients, since they were given as zech values. This all turns out to save considerable time, although arguably it's not worth it compared to the time taken by the beta loop later. */ f2 = prime_zech_to_integer(Z, K->f[2]); f1 = prime_zech_to_integer(Z, K->f[1]); f0 = prime_zech_to_integer(Z, K->f[0]); /* MOD_BY_MULT: We do reductions mod q < 2^13, and the values being reduced are less than q^2 < 2^25 (due to QLIMIT: Sqrt(2^25) = 5792+). There will not be overflow since 2*25 + 13 <= 64. */ MOD_INIT(q, 25 + 13); wk2 = 0; wk1 = 0; wk0 = 1; for (k = 1; k < Korder; k++) { u = wk2; wk2 = wk1; wk1 = u*f1 + wk0; MOD_REDUCE(wk1); wk0 = u*f0; MOD_REDUCE(wk0); if (wk2 == 1 || (wk2 == 0 && wk1 == 1)) { if (wk2 == 1) g = wk1; else g = q; a = wk0; base = q*g; if (k & 1) { index = base + ga_counts[2*g]; ga_counts[2*g]++; } else { ga_counts[2*g + 1]++; index = base + q - ga_counts[2*g + 1]; } ga_table[index] = multicounter - adjustment; if (uselarge) ga_table_large[index] = largeadj - largecounter; } ADVANCE_MULTI_ITERATOR(); } STOP_TIMER("gamma-alpha table"); } typedef struct { t_uint64 *B1, *T1; /* ga_table, and temporary */ t_uint32 *B2, *T2; /* ga_table_large, and temporary */ t_uint32 *counts, *pos; t_uint32 p; t_uint64 pmask; /* for extracting values mod p from B1[] */ t_boolean uselarge; /* whether to apply antisort to B2[] */ } str_antisort_data; int uint32_revcmp(const void *ap, const void *bp) { t_uint32 a, b; a = *(t_uint32 *)ap; b = *(t_uint32 *)bp; if (a > b) return -1; if (a < b) return 1; return 0; } void antisort_range(str_antisort_data *ad, t_uint32 start, t_uint32 end) { t_uint64 *B1, *T1; t_uint32 *B2, *T2, *counts, *pos; t_uint32 p; t_uint64 pmask; t_boolean uselarge; t_uint32 cshift, cmask, chigh, i, vb, pstart, vc, vpos, vcount, next; B1 = ad->B1; T1 = ad->T1; B2 = ad->B2; T2 = ad->T2; counts = ad->counts; pos = ad->pos; p = ad->p; pmask = ad->pmask; uselarge = ad->uselarge; /* Store packed tuples in counts[], giving a value mod p and the number of items of B1 with value. (Note that these values are not the same as the corresponding value of the multicounter; all we need is that they are distinct and in [0..p-1].) By putting the number of items in the high component, we can sort easily to get a (reverse) sort on count. */ cshift = 16; cmask = ONES32(cshift); chigh = (1 << cshift); for (i = 0; i < p; i++) counts[i] = i; for (i = start; i < end; i++) { vb = B1[i] & pmask; ASSERT(0 <= vb && vb < p); counts[vb] += chigh; } qsort(counts, p, sizeof(t_uint32), uint32_revcmp); /* Now re-bucket the B[] arrays into T[], using pos[] to determine the positions of each bucket. */ pstart = start; for (i = 0; i < p; i++) { vc = counts[i]; vpos = vc & cmask; vcount = vc >> cshift; pos[vpos] = pstart; pstart += vcount; } ASSERT(pstart == end); for (i = start; i < end; i++) { vb = B1[i] & pmask; vpos = pos[vb]; T1[vpos] = B1[i]; if (uselarge) T2[vpos] = B2[i]; pos[vb] = vpos + 1; } /* And redistribute the T[] arrays back into B[], using one entry from each bucket at a time. */ pstart = start; for (i = 0; i < p; i++) { vcount = counts[i] >> cshift; pos[i] = pstart; pstart += vcount; counts[i] = vcount; } ASSERT(pstart == end); next = 0; for (i = start; i < end; i++) { vpos = pos[next]; B1[i] = T1[vpos]; if (uselarge) B2[i] = T2[vpos]; pos[next] = vpos + 1; counts[next]--; next++; if (counts[next] == 0) next = 0; } } void antisort_ga_tables(t_ffpz K) { t_uint32 q, p, g; t_uint64 pmask; t_boolean uselarge, char2; str_antisort_data ad; START_TIMER(); q = K->Z->q; p = K->primes[0]; if (p == 2) p = K->primes[1]; if (2*p > q) { IF_VERBOSE( printf("Smallest odd prime (%u) is too large -- skipping antisort\n", p); ) return; } pmask = ONES64(K->mi.shift); uselarge = K->mi.uselarge; ad.B1 = ga_table; ad.T1 = ARRAY_ALLOC(q*q + q, t_uint64); if (uselarge) { ad.B2 = ga_table_large; ad.T2 = ARRAY_ALLOC(q*q + q, t_uint32); } ad.counts = ARRAY_ALLOC(p, t_uint32); ad.pos = ARRAY_ALLOC(p, t_uint32); ad.p = p; ad.pmask = pmask; ad.uselarge = uselarge; char2 = ((q & 1) == 0); if (char2) { for (g = 0; g < q + 1; g++) antisort_range(&ad, q*g, q*g + q); } else { for (g = 0; g < q + 1; g++) { t_uint32 oddstart, oddend, evenstart, evenend; oddstart = q*g; oddend = oddstart + ga_counts[2*g]; evenstart = oddend; evenend = evenstart + ga_counts[2*g + 1]; ASSERT(evenend == q*g + q); antisort_range(&ad, oddstart, oddend); antisort_range(&ad, evenstart, evenend); } } ARRAY_FREE(ad.pos); ARRAY_FREE(ad.counts); if (uselarge) ARRAY_FREE(ad.T2); ARRAY_FREE(ad.T1); STOP_TIMER("antisort"); } void build_gamma_alpha_table(t_ffpz K) { switch (K->type) { case FFPZ_CHAR_2: build_gamma_alpha_table_char2(K); break; case FFPZ_P3_F20: build_gamma_alpha_table_p3_f20(K); break; case FFPZ_CHAR_ODD: build_gamma_alpha_table_oddchar(K); break; } if (opt_antisort) antisort_ga_tables(K); } void print_failure(t_ffpz K, t_uint32 q, t_uint64 logbeta, t_uint32 g) { printf("q = %u: FAIL for beta = ", q); if (logbeta == 0) printf("1"); else if (logbeta == 1) printf("w"); else printf("w^%lu", logbeta); printf(", gamma = "); if (K->type == FFPZ_P3_F20) { if (g == q) printf("w"); else if (g == 0) printf("w^2"); else if (g == 1) printf("w^2 + w"); else printf("w^2 + %u*w", g); } else { if (g == q) printf("w"); else if (g == ZECH_ZERO(K->Z)) printf("w^2"); else if (g == ZECH_ONE) printf("w^2 + w"); else if (g == 1) printf("w^2 + z*w"); else printf("w^2 + z^%u*w", g); } printf("\n"); } t_boolean gamma_beta_loop_char2(t_ffpz K, t_uint32 gstart, t_uint32 gend) { t_uint32 q, q2, g, start, end, index, largebit; t_uint64 logbeta, rad, zero, x, M0; t_boolean allgood; allgood = TRUE; q = K->Z->q; q2 = q*q; rad = K->rad; if (rad == 0) rad = K->q - 1; INIT_MULTI_ITERATOR(K->mi); largebit = 0; if (uselarge) largebit = ((t_uint32)1) << (1 + highbitpos32(largeprime)); M0 = multicounter; for (g = gstart; g < gend; g++) { ASSERT(multicounter == M0); if (uselarge) ASSERT(largecounter == 0); start = q*g; end = start + q; for (logbeta = 0; logbeta < rad; logbeta++) { for (index = start; index < end; index++) { x = ga_table[index]; zero = ((x ^ multicounter) + lbit) & hbit; if (!zero) { if (uselarge) { if (ga_table_large[index] == largecounter) continue; } goto NEXT_BETA; } } print_failure(K, q, logbeta, g); allgood = FALSE; if (!opt_find_all) exit(2); NEXT_BETA: ADVANCE_MULTI_ITERATOR(); } } return allgood; } t_boolean gamma_beta_loop_oddchar_small(t_ffpz K, t_uint32 gstart, t_uint32 gend) { t_uint32 q, q2, g, oddstart, oddend, evenstart, evenend, index; t_uint64 logbeta, rad, halfrad, zero, x, M0; t_boolean allgood; allgood = TRUE; q = K->Z->q; q2 = q*q; rad = K->rad; if (rad == 0) rad = K->q - 1; ASSERT((rad % 2) == 0); halfrad = rad / 2; ASSERT((halfrad % 2) == 1); INIT_MULTI_ITERATOR(K->mi); ASSERT(!uselarge); M0 = multicounter; for (g = gstart; g < gend; g++) { ASSERT(haszero); ASSERT(multicounter == M0); oddstart = q*g; oddend = oddstart + ga_counts[2*g]; evenstart = oddend; evenend = evenstart + ga_counts[2*g + 1]; for (logbeta = 0; logbeta < halfrad; logbeta++) { for (index = oddstart; index < oddend; index++) { x = ga_table[index]; zero = ((x ^ multicounter) + lbit) & hbit; if (!zero) goto EVEN_ALPHA; } if (logbeta & 1) print_failure(K, q, logbeta + halfrad, g); else print_failure(K, q, logbeta, g); allgood = FALSE; if (!opt_find_all) exit(2); EVEN_ALPHA: for (index = evenstart; index < evenend; index++) { x = ga_table[index]; zero = ((x ^ multicounter) + lbit) & hbit; if (!zero) goto NEXT_BETA; } if (logbeta & 1) print_failure(K, q, logbeta, g); else print_failure(K, q, logbeta + halfrad, g); allgood = FALSE; if (!opt_find_all) exit(2); NEXT_BETA: ADVANCE_MULTI_ITERATOR_SMALL(); } } return allgood; } t_boolean gamma_beta_loop_oddchar_large(t_ffpz K, t_uint32 gstart, t_uint32 gend) { t_uint32 q, q2, g, oddstart, oddend, evenstart, evenend, index; t_uint64 logbeta, rad, halfrad, zero, x, M0; t_boolean allgood; allgood = TRUE; q = K->Z->q; q2 = q*q; rad = K->rad; if (rad == 0) rad = K->q - 1; ASSERT((rad % 2) == 0); halfrad = rad / 2; ASSERT((halfrad % 2) == 1); INIT_MULTI_ITERATOR(K->mi); ASSERT(uselarge); M0 = multicounter; for (g = gstart; g < gend; g++) { ASSERT(haszero); ASSERT(multicounter == M0); ASSERT(largecounter == 0); oddstart = q*g; oddend = oddstart + ga_counts[2*g]; evenstart = oddend; evenend = evenstart + ga_counts[2*g + 1]; for (logbeta = 0; logbeta < halfrad; logbeta++) { for (index = oddstart; index < oddend; index++) { x = ga_table[index]; zero = ((x ^ multicounter) + lbit) & hbit; if (!zero) { if (ga_table_large[index] == largecounter) continue; goto EVEN_ALPHA; } } if (logbeta & 1) print_failure(K, q, logbeta + halfrad, g); else print_failure(K, q, logbeta, g); allgood = FALSE; if (!opt_find_all) exit(2); EVEN_ALPHA: for (index = evenstart; index < evenend; index++) { x = ga_table[index]; zero = ((x ^ multicounter) + lbit) & hbit; if (!zero) { if (ga_table_large[index] == largecounter) continue; goto NEXT_BETA; } } if (logbeta & 1) print_failure(K, q, logbeta, g); else print_failure(K, q, logbeta + halfrad, g); allgood = FALSE; if (!opt_find_all) exit(2); NEXT_BETA: ADVANCE_MULTI_ITERATOR_LARGE(); } } return allgood; } t_boolean gamma_beta_loop_range(t_ffpz K, t_uint32 gstart, t_uint32 gend) { switch (K->type) { case FFPZ_CHAR_2: return gamma_beta_loop_char2(K, gstart, gend); break; case FFPZ_P3_F20: case FFPZ_CHAR_ODD: if (K->mi.uselarge) return gamma_beta_loop_oddchar_large(K, gstart, gend); else return gamma_beta_loop_oddchar_small(K, gstart, gend); break; } } typedef struct { t_ffpz K; t_uint32 gstart, gend; t_boolean allgood; } str_thread_info; void *gamma_beta_loop_worker(void *vinfo) { str_thread_info *info; info = (str_thread_info *)vinfo; info->allgood = gamma_beta_loop_range(info->K, info->gstart, info->gend); return NULL; } t_boolean gamma_beta_loop(t_ffpz K) { t_uint32 q, i, A, B, gstart, gend; pthread_t *threads; str_thread_info *args; t_boolean allgood; ASSERT(opt_nthreads >= 1); START_TIMER(); q = K->Z->q; IF_VERBOSE( switch (K->type) { case FFPZ_CHAR_2: printf("Gamma-beta loop: Characteristic two (general case)\n"); break; case FFPZ_P3_F20: case FFPZ_CHAR_ODD: if (K->mi.uselarge) printf("Gamma-beta loop: Odd characteristic (large case)\n"); else printf("Gamma-beta loop: Odd characteristic (small case)\n"); break; } if (opt_nthreads == 1) printf("Using 1 thread\n"); else printf("Using %u threads\n", opt_nthreads); ) threads = ARRAY_ALLOC(opt_nthreads, pthread_t); args = ARRAY_ALLOC(opt_nthreads, str_thread_info); A = (q + 1) / opt_nthreads; B = (q + 1) % opt_nthreads; gend = 0; for (i = 0; i < opt_nthreads; i++) { gstart = gend; gend = gstart + A; if (i < B) gend++; args[i].K = K; args[i].gstart = gstart; args[i].gend = gend; } ASSERT(gend == q + 1); /* thread 0 is this main thread */ for (i = 1; i < opt_nthreads; i++) { if (pthread_create(&(threads[i]), NULL, gamma_beta_loop_worker, &(args[i]))) { fprintf(stderr, "Error launching thread %u\n", i); exit(1); } } gamma_beta_loop_worker(&(args[0])); for (i = 1; i < opt_nthreads; i++) pthread_join(threads[i], NULL); allgood = TRUE; for (i = 0; i < opt_nthreads; i++) if (!args[i].allgood) allgood = FALSE; ARRAY_FREE(args); ARRAY_FREE(threads); STOP_TIMER("gamma-beta loop"); return allgood; } /*************** Main driver routine ***************/ void usage(t_boolean extended) { fprintf(stderr, "Usage: L3check [-h] [-m] [-v] [-na] [-t nthreads] [-all] p \n"); fprintf(stderr, " -h extended usage message with (long) explanation of argument format\n"); fprintf(stderr, " -m show Magma code to generate arguments\n"); fprintf(stderr, " -v verbose printing\n"); fprintf(stderr, " -na disable the antisort step\n"); fprintf(stderr, " -t use threads for the main search\n"); fprintf(stderr, " -all continue searching even if a counterexample is found\n"); if (!extended) return; fprintf(stderr, "\n"); fprintf(stderr, "The Fq-poly gives the coefficients of a polynomial over GF(p). The first item is the degree, n, then the next n + 1 items are the polynomial coefficients starting with the constant term. This polynomial is required to be monic and primitive, so that its root is a primitive element of GF(q) (where q = p^n).\n\n"); fprintf(stderr, "In the common case where q is prime, the polynomial will be x - z where z is some primitive element mod p. The coefficients will then be p-z 1, and the full argument list for the polynomial would be 1 p-z 1.\n\n"); fprintf(stderr, "Examples: For q = 7 where 3 is a primitive element, the Fq-poly part of the argument list would be 1 4 1. For q = 8, where the appropriate minimal polynomial is x^3 + x + 1, we would have 3 1 1 0 1.\n\n"); fprintf(stderr, "\n"); fprintf(stderr, "The Fq^3-poly gives the coefficients of a polynomial over GF(q). Again, the first item is the degree (which will always be 3), and the ensuing values are the coefficients starting with the constant term. Again, this polynomial is required to be monic and primitive, so that its root is a primitive element of GF(q^3).\n\n"); fprintf(stderr, "The coefficients are specified in log form with respect to the primitive element of GF(q). That is, if the coefficient c = z^e, then it is e that is used as the argument. To represent 0, the special value q - 1 is used.\n\n"); fprintf(stderr, "Examples: For q = 7, a suitable minimal polynomial is x^3 + 6*x^2 + 4. In GF(7), 1 is 3^0, 6 is 3^3, and 4 is 3^4; 0 is represented by 7 - 1 = 6. The appropriate coefficients are thus 4 6 3 0, and the Fq^3-poly part of the argument list would be 3 4 6 3 0.\n\n"); fprintf(stderr, "For q = 8, a minimal polynomial is x^3 + z*x^2 + z^5*x + z, where z is a root of t^3 + t + 1 = 0 in GF(8). The argument list component is thus 3 1 5 1 0.\n\n"); fprintf(stderr, "\n"); fprintf(stderr, "The prime list gives the primes that divide q^3 - 1. It consists simply of the number of such primes, followed by those primes in ascending order.\n\n"); fprintf(stderr, "Examples: For q = 7, 7^3 - 1 = 342 = 2 * 3^2 * 19, and so the prime list would be 3 2 3 19. For q = 8, 8^3 - 1 = 511 = 7*73, so the prime list would be 2 7 73.\n\n"); fprintf(stderr, "\n"); fprintf(stderr, "Putting all of the above together, we can see that the appropriate invocations for the example values are:\n\n"); fprintf(stderr, " q = 7: L3check 7 1 4 1 3 4 6 3 0 3 2 3 19\n"); fprintf(stderr, " q = 8: L3check 2 3 1 1 0 1 3 1 5 1 0 2 7 73\n\n"); fprintf(stderr, "\n"); fprintf(stderr, "Magma code to automate the generation of these arguments can be displayed by using the '-m' argument\n"); fprintf(stderr, "\n"); } void usage_exit() { usage(FALSE); exit(1); } void show_magma_code() { printf("\ L3args := function(q)\n\ F := GF(q);\n\ p := Characteristic(F);\n\ n := Degree(F);\n\ a := PrimitiveElement(F);\n\ pol1 := MinimalPolynomial(a);\n\ C1 := ChangeUniverse(Eltseq(pol1), Integers());\n\ K := ext;\n\ w := PrimitiveElement(K);\n\ pol2 := MinimalPolynomial(w, F);\n\ C2 := [ IsZero(c) select q-1 else Log(a, c) : c in Eltseq(pol2) ];\n\ primes := [ t[1] : t in Factorisation(q^3 - 1) ];\n\ args := [p,n] cat C1 cat [3] cat C2 cat [#primes] cat primes;\n\ return Prune(&cat [ IntegerToString(x) cat \" \" : x in args ]);\n\ end function;\n\ "); } void init_options() { opt_verbose = FALSE; opt_antisort = TRUE; opt_nthreads = 1; opt_find_all = FALSE; } #define REQUIRE_ARGS(n, code) \ { \ if (argc < n) \ usage_exit(); \ { code } \ argc -= n; \ argv += n; \ } void process_options(int *argcp, char ***argvp) { int argc; char **argv; argc = *argcp; argv = *argvp; while (argc > 0 && argv[0][0] == '-') { if (strcmp(argv[0], "-h") == 0) { usage(TRUE); exit(0); } else if (strcmp(argv[0], "-m") == 0) { show_magma_code(); exit(0); } else if (strcmp(argv[0], "-v") == 0) { REQUIRE_ARGS(1, opt_verbose = TRUE; ) } else if (strcmp(argv[0], "-na") == 0) { REQUIRE_ARGS(1, opt_antisort = FALSE; ) } else if (strcmp(argv[0], "-t") == 0) { REQUIRE_ARGS(2, opt_nthreads = atoi(argv[1]); ) if (opt_nthreads < 1) { fprintf(stderr, "Invalid number of threads specified\n"); usage_exit(); } } else if (strcmp(argv[0], "-all") == 0) { REQUIRE_ARGS(1, opt_find_all = TRUE; ) } else { fprintf(stderr, "Unrecognised option '%s'\n", argv[0]); usage_exit(); } } *argcp = argc; *argvp = argv; } t_uint32 *args_to_array(char *argv[], t_uint32 n) { t_uint32 *arr; t_uint32 i; arr = ARRAY_ALLOC(n, t_uint32); for (i = 0; i < n; i++) arr[i] = atoi(argv[i]); return arr; } t_ffpz args_to_field(int argc, char *argv[]) { t_uint32 p, n, deg; t_uint32 *Fqpoly; t_ffpoly F; t_ffzech Z; t_ffzech_elt *Fq3poly; t_ffpz K; t_uint32 nprimes; t_uint32 *primes; REQUIRE_ARGS(1, /* skip past program name */) process_options(&argc, &argv); REQUIRE_ARGS(2, p = atoi(argv[0]); n = atoi(argv[1]); ) REQUIRE_ARGS(n + 1, Fqpoly = args_to_array(argv, n + 1); if (Fqpoly[n] != 1) usage_exit(); ) F = ffpoly_create(p, n, Fqpoly); ARRAY_FREE(Fqpoly); if (F->q > QLIMIT) { fprintf(stderr, "q = p^n must be at most %u\n", QLIMIT); usage_exit(); } Z = ffzech_create(F); REQUIRE_ARGS(1, deg = atoi(argv[0]); ) REQUIRE_ARGS(deg + 1, Fq3poly = args_to_array(argv, deg + 1); if (Fq3poly[deg] != 0) usage_exit(); ) K = ffpz_create(Z, deg, Fq3poly); ARRAY_FREE(Fq3poly); REQUIRE_ARGS(1, nprimes = atoi(argv[0]); ) REQUIRE_ARGS(nprimes, primes = args_to_array(argv, nprimes); ) ffpz_store_primes(K, nprimes, primes); return K; } int main(int argc, char *argv[]) { t_ffpz K; init_options(); K = args_to_field(argc, argv); build_gamma_alpha_table(K); if (gamma_beta_loop(K)) printf("q = %u: SUCCESS\n", K->Z->q); return 0; }