186d7f5d3SJohn Marino /* mpn_jacobi_base -- limb/limb Jacobi symbol with restricted arguments.
286d7f5d3SJohn Marino
386d7f5d3SJohn Marino THIS INTERFACE IS PRELIMINARY AND MIGHT DISAPPEAR OR BE SUBJECT TO
486d7f5d3SJohn Marino INCOMPATIBLE CHANGES IN A FUTURE RELEASE OF GMP.
586d7f5d3SJohn Marino
686d7f5d3SJohn Marino Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
786d7f5d3SJohn Marino
886d7f5d3SJohn Marino This file is part of the GNU MP Library.
986d7f5d3SJohn Marino
1086d7f5d3SJohn Marino The GNU MP Library is free software; you can redistribute it and/or modify
1186d7f5d3SJohn Marino it under the terms of the GNU Lesser General Public License as published by
1286d7f5d3SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
1386d7f5d3SJohn Marino option) any later version.
1486d7f5d3SJohn Marino
1586d7f5d3SJohn Marino The GNU MP Library is distributed in the hope that it will be useful, but
1686d7f5d3SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1786d7f5d3SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
1886d7f5d3SJohn Marino License for more details.
1986d7f5d3SJohn Marino
2086d7f5d3SJohn Marino You should have received a copy of the GNU Lesser General Public License
2186d7f5d3SJohn Marino along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
2286d7f5d3SJohn Marino
2386d7f5d3SJohn Marino #include "gmp.h"
2486d7f5d3SJohn Marino #include "gmp-impl.h"
2586d7f5d3SJohn Marino #include "longlong.h"
2686d7f5d3SJohn Marino
2786d7f5d3SJohn Marino
2886d7f5d3SJohn Marino /* Use the simple loop by default. The generic count_trailing_zeros is not
2986d7f5d3SJohn Marino very fast, and the extra trickery of method 3 has proven to be less use
3086d7f5d3SJohn Marino than might have been though. */
3186d7f5d3SJohn Marino #ifndef JACOBI_BASE_METHOD
3286d7f5d3SJohn Marino #define JACOBI_BASE_METHOD 2
3386d7f5d3SJohn Marino #endif
3486d7f5d3SJohn Marino
3586d7f5d3SJohn Marino
3686d7f5d3SJohn Marino /* Use count_trailing_zeros. */
3786d7f5d3SJohn Marino #if JACOBI_BASE_METHOD == 1
3886d7f5d3SJohn Marino #define PROCESS_TWOS_ANY \
3986d7f5d3SJohn Marino { \
4086d7f5d3SJohn Marino mp_limb_t twos; \
4186d7f5d3SJohn Marino count_trailing_zeros (twos, a); \
4286d7f5d3SJohn Marino result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, b); \
4386d7f5d3SJohn Marino a >>= twos; \
4486d7f5d3SJohn Marino }
4586d7f5d3SJohn Marino #define PROCESS_TWOS_EVEN PROCESS_TWOS_ANY
4686d7f5d3SJohn Marino #endif
4786d7f5d3SJohn Marino
4886d7f5d3SJohn Marino /* Use a simple loop. A disadvantage of this is that there's a branch on a
4986d7f5d3SJohn Marino 50/50 chance of a 0 or 1 low bit. */
5086d7f5d3SJohn Marino #if JACOBI_BASE_METHOD == 2
5186d7f5d3SJohn Marino #define PROCESS_TWOS_EVEN \
5286d7f5d3SJohn Marino { \
5386d7f5d3SJohn Marino int two; \
5486d7f5d3SJohn Marino two = JACOBI_TWO_U_BIT1 (b); \
5586d7f5d3SJohn Marino do \
5686d7f5d3SJohn Marino { \
5786d7f5d3SJohn Marino a >>= 1; \
5886d7f5d3SJohn Marino result_bit1 ^= two; \
5986d7f5d3SJohn Marino ASSERT (a != 0); \
6086d7f5d3SJohn Marino } \
6186d7f5d3SJohn Marino while ((a & 1) == 0); \
6286d7f5d3SJohn Marino }
6386d7f5d3SJohn Marino #define PROCESS_TWOS_ANY \
6486d7f5d3SJohn Marino if ((a & 1) == 0) \
6586d7f5d3SJohn Marino PROCESS_TWOS_EVEN;
6686d7f5d3SJohn Marino #endif
6786d7f5d3SJohn Marino
6886d7f5d3SJohn Marino /* Process one bit arithmetically, then a simple loop. This cuts the loop
6986d7f5d3SJohn Marino condition down to a 25/75 chance, which should branch predict better.
7086d7f5d3SJohn Marino The CPU will need a reasonable variable left shift. */
7186d7f5d3SJohn Marino #if JACOBI_BASE_METHOD == 3
7286d7f5d3SJohn Marino #define PROCESS_TWOS_EVEN \
7386d7f5d3SJohn Marino { \
7486d7f5d3SJohn Marino int two, mask, shift; \
7586d7f5d3SJohn Marino \
7686d7f5d3SJohn Marino two = JACOBI_TWO_U_BIT1 (b); \
7786d7f5d3SJohn Marino mask = (~a & 2); \
7886d7f5d3SJohn Marino a >>= 1; \
7986d7f5d3SJohn Marino \
8086d7f5d3SJohn Marino shift = (~a & 1); \
8186d7f5d3SJohn Marino a >>= shift; \
8286d7f5d3SJohn Marino result_bit1 ^= two ^ (two & mask); \
8386d7f5d3SJohn Marino \
8486d7f5d3SJohn Marino while ((a & 1) == 0) \
8586d7f5d3SJohn Marino { \
8686d7f5d3SJohn Marino a >>= 1; \
8786d7f5d3SJohn Marino result_bit1 ^= two; \
8886d7f5d3SJohn Marino ASSERT (a != 0); \
8986d7f5d3SJohn Marino } \
9086d7f5d3SJohn Marino }
9186d7f5d3SJohn Marino #define PROCESS_TWOS_ANY \
9286d7f5d3SJohn Marino { \
9386d7f5d3SJohn Marino int two, mask, shift; \
9486d7f5d3SJohn Marino \
9586d7f5d3SJohn Marino two = JACOBI_TWO_U_BIT1 (b); \
9686d7f5d3SJohn Marino shift = (~a & 1); \
9786d7f5d3SJohn Marino a >>= shift; \
9886d7f5d3SJohn Marino \
9986d7f5d3SJohn Marino mask = shift << 1; \
10086d7f5d3SJohn Marino result_bit1 ^= (two & mask); \
10186d7f5d3SJohn Marino \
10286d7f5d3SJohn Marino while ((a & 1) == 0) \
10386d7f5d3SJohn Marino { \
10486d7f5d3SJohn Marino a >>= 1; \
10586d7f5d3SJohn Marino result_bit1 ^= two; \
10686d7f5d3SJohn Marino ASSERT (a != 0); \
10786d7f5d3SJohn Marino } \
10886d7f5d3SJohn Marino }
10986d7f5d3SJohn Marino #endif
11086d7f5d3SJohn Marino
11186d7f5d3SJohn Marino
11286d7f5d3SJohn Marino /* Calculate the value of the Jacobi symbol (a/b) of two mp_limb_t's, but
11386d7f5d3SJohn Marino with a restricted range of inputs accepted, namely b>1, b odd, and a<=b.
11486d7f5d3SJohn Marino
11586d7f5d3SJohn Marino The initial result_bit1 is taken as a parameter for the convenience of
11686d7f5d3SJohn Marino mpz_kronecker_ui() et al. The sign changes both here and in those
11786d7f5d3SJohn Marino routines accumulate nicely in bit 1, see the JACOBI macros.
11886d7f5d3SJohn Marino
11986d7f5d3SJohn Marino The return value here is the normal +1, 0, or -1. Note that +1 and -1
12086d7f5d3SJohn Marino have bit 1 in the "BIT1" sense, which could be useful if the caller is
12186d7f5d3SJohn Marino accumulating it into some extended calculation.
12286d7f5d3SJohn Marino
12386d7f5d3SJohn Marino Duplicating the loop body to avoid the MP_LIMB_T_SWAP(a,b) would be
12486d7f5d3SJohn Marino possible, but a couple of tests suggest it's not a significant speedup,
12586d7f5d3SJohn Marino and may even be a slowdown, so what's here is good enough for now.
12686d7f5d3SJohn Marino
12786d7f5d3SJohn Marino Future: The code doesn't demand a<=b actually, so maybe this could be
12886d7f5d3SJohn Marino relaxed. All the places this is used currently call with a<=b though. */
12986d7f5d3SJohn Marino
13086d7f5d3SJohn Marino int
mpn_jacobi_base(mp_limb_t a,mp_limb_t b,int result_bit1)13186d7f5d3SJohn Marino mpn_jacobi_base (mp_limb_t a, mp_limb_t b, int result_bit1)
13286d7f5d3SJohn Marino {
13386d7f5d3SJohn Marino ASSERT (b & 1); /* b odd */
13486d7f5d3SJohn Marino ASSERT (b != 1);
13586d7f5d3SJohn Marino ASSERT (a <= b);
13686d7f5d3SJohn Marino
13786d7f5d3SJohn Marino if (a == 0)
13886d7f5d3SJohn Marino return 0;
13986d7f5d3SJohn Marino
14086d7f5d3SJohn Marino PROCESS_TWOS_ANY;
14186d7f5d3SJohn Marino if (a == 1)
14286d7f5d3SJohn Marino goto done;
14386d7f5d3SJohn Marino
14486d7f5d3SJohn Marino for (;;)
14586d7f5d3SJohn Marino {
14686d7f5d3SJohn Marino result_bit1 ^= JACOBI_RECIP_UU_BIT1 (a, b);
14786d7f5d3SJohn Marino MP_LIMB_T_SWAP (a, b);
14886d7f5d3SJohn Marino
14986d7f5d3SJohn Marino do
15086d7f5d3SJohn Marino {
15186d7f5d3SJohn Marino /* working on (a/b), a,b odd, a>=b */
15286d7f5d3SJohn Marino ASSERT (a & 1);
15386d7f5d3SJohn Marino ASSERT (b & 1);
15486d7f5d3SJohn Marino ASSERT (a >= b);
15586d7f5d3SJohn Marino
15686d7f5d3SJohn Marino if ((a -= b) == 0)
15786d7f5d3SJohn Marino return 0;
15886d7f5d3SJohn Marino
15986d7f5d3SJohn Marino PROCESS_TWOS_EVEN;
16086d7f5d3SJohn Marino if (a == 1)
16186d7f5d3SJohn Marino goto done;
16286d7f5d3SJohn Marino }
16386d7f5d3SJohn Marino while (a >= b);
16486d7f5d3SJohn Marino }
16586d7f5d3SJohn Marino
16686d7f5d3SJohn Marino done:
16786d7f5d3SJohn Marino return JACOBI_BIT1_TO_PN (result_bit1);
16886d7f5d3SJohn Marino }
169