xref: /dflybsd-src/contrib/gmp/mpz/jacobi.c (revision 86d7f5d305c6adaa56ff4582ece9859d73106103)
186d7f5d3SJohn Marino /* mpz_jacobi, mpz_legendre, mpz_kronecker -- mpz/mpz Jacobi symbols.
286d7f5d3SJohn Marino 
386d7f5d3SJohn Marino Copyright 2000, 2001, 2002, 2005 Free Software Foundation, Inc.
486d7f5d3SJohn Marino 
586d7f5d3SJohn Marino This file is part of the GNU MP Library.
686d7f5d3SJohn Marino 
786d7f5d3SJohn Marino The GNU MP Library is free software; you can redistribute it and/or modify it
886d7f5d3SJohn Marino under the terms of the GNU Lesser General Public License as published by the
986d7f5d3SJohn Marino Free Software Foundation; either version 3 of the License, or (at your
1086d7f5d3SJohn Marino option) any later version.
1186d7f5d3SJohn Marino 
1286d7f5d3SJohn Marino The GNU MP Library is distributed in the hope that it will be useful, but
1386d7f5d3SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
1486d7f5d3SJohn Marino FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
1586d7f5d3SJohn Marino for more details.
1686d7f5d3SJohn Marino 
1786d7f5d3SJohn Marino You should have received a copy of the GNU Lesser General Public License along
1886d7f5d3SJohn Marino with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
1986d7f5d3SJohn Marino 
2086d7f5d3SJohn Marino #include <stdio.h>
2186d7f5d3SJohn Marino #include "gmp.h"
2286d7f5d3SJohn Marino #include "gmp-impl.h"
2386d7f5d3SJohn Marino #include "longlong.h"
2486d7f5d3SJohn Marino 
2586d7f5d3SJohn Marino 
2686d7f5d3SJohn Marino /* Change this to "#define TRACE(x) x" for some traces. */
2786d7f5d3SJohn Marino #define TRACE(x)
2886d7f5d3SJohn Marino 
2986d7f5d3SJohn Marino 
3086d7f5d3SJohn Marino #define MPN_RSHIFT_OR_COPY(dst,src,size,shift)                  \
3186d7f5d3SJohn Marino   do {                                                          \
3286d7f5d3SJohn Marino     if ((shift) != 0)                                           \
3386d7f5d3SJohn Marino       {                                                         \
3486d7f5d3SJohn Marino         ASSERT_NOCARRY (mpn_rshift (dst, src, size, shift));    \
3586d7f5d3SJohn Marino         (size) -= ((dst)[(size)-1] == 0);                       \
3686d7f5d3SJohn Marino       }                                                         \
3786d7f5d3SJohn Marino     else                                                        \
3886d7f5d3SJohn Marino       MPN_COPY (dst, src, size);                                \
3986d7f5d3SJohn Marino   } while (0)
4086d7f5d3SJohn Marino 
4186d7f5d3SJohn Marino 
4286d7f5d3SJohn Marino /* This code does triple duty as mpz_jacobi, mpz_legendre and mpz_kronecker.
4386d7f5d3SJohn Marino 
4486d7f5d3SJohn Marino    mpz_jacobi could assume b is odd, but the improvements from that seem
4586d7f5d3SJohn Marino    small compared to other operations, and anything significant should be
4686d7f5d3SJohn Marino    checked at run-time since we'd like odd b to go fast in mpz_kronecker
4786d7f5d3SJohn Marino    too.
4886d7f5d3SJohn Marino 
4986d7f5d3SJohn Marino    mpz_legendre could assume b is an odd prime, but knowing this doesn't
5086d7f5d3SJohn Marino    present any obvious benefits.  Result 0 wouldn't arise (unless "a" is a
5186d7f5d3SJohn Marino    multiple of b), but the checking for that takes little time compared to
5286d7f5d3SJohn Marino    other operations.
5386d7f5d3SJohn Marino 
5486d7f5d3SJohn Marino    The main loop is just a simple binary GCD with the jacobi symbol result
5586d7f5d3SJohn Marino    tracked during the reduction.
5686d7f5d3SJohn Marino 
5786d7f5d3SJohn Marino    The special cases for a or b fitting in one limb let mod_1 or modexact_1
5886d7f5d3SJohn Marino    get used, without any copying, and end up just as efficient as the mixed
5986d7f5d3SJohn Marino    precision mpz_kronecker_ui etc.
6086d7f5d3SJohn Marino 
6186d7f5d3SJohn Marino    When tdiv_qr is called it's not necessary to make "a" odd or make a
6286d7f5d3SJohn Marino    working copy of it, but tdiv_qr is going to be pretty slow so it's not
6386d7f5d3SJohn Marino    worth bothering trying to save anything for that case.
6486d7f5d3SJohn Marino 
6586d7f5d3SJohn Marino    Enhancements:
6686d7f5d3SJohn Marino 
6786d7f5d3SJohn Marino    mpn_bdiv_qr should be used instead of mpn_tdiv_qr.
6886d7f5d3SJohn Marino 
6986d7f5d3SJohn Marino    Some sort of multi-step algorithm should be used.  The current subtract
7086d7f5d3SJohn Marino    and shift for every bit is very inefficient.  Lehmer (per current gcdext)
7186d7f5d3SJohn Marino    would need some low bits included in its calculation to apply the sign
7286d7f5d3SJohn Marino    change for reciprocity.  Binary Lehmer keeps low bits to strip twos
7386d7f5d3SJohn Marino    anyway, so might be better suited.  Maybe the accelerated GCD style k-ary
7486d7f5d3SJohn Marino    reduction would work, if sign changes due to the extra factors it
7586d7f5d3SJohn Marino    introduces can be accounted for (or maybe they can be ignored).  */
7686d7f5d3SJohn Marino 
7786d7f5d3SJohn Marino 
7886d7f5d3SJohn Marino int
mpz_jacobi(mpz_srcptr a,mpz_srcptr b)7986d7f5d3SJohn Marino mpz_jacobi (mpz_srcptr a, mpz_srcptr b)
8086d7f5d3SJohn Marino {
8186d7f5d3SJohn Marino   mp_srcptr  asrcp, bsrcp;
8286d7f5d3SJohn Marino   mp_size_t  asize, bsize;
8386d7f5d3SJohn Marino   mp_ptr     ap, bp;
8486d7f5d3SJohn Marino   mp_limb_t  alow, blow, ahigh, bhigh, asecond, bsecond;
8586d7f5d3SJohn Marino   unsigned   atwos, btwos;
8686d7f5d3SJohn Marino   int        result_bit1;
8786d7f5d3SJohn Marino   TMP_DECL;
8886d7f5d3SJohn Marino 
8986d7f5d3SJohn Marino   TRACE (printf ("start asize=%d bsize=%d\n", SIZ(a), SIZ(b));
9086d7f5d3SJohn Marino          mpz_trace (" a", a);
9186d7f5d3SJohn Marino          mpz_trace (" b", b));
9286d7f5d3SJohn Marino 
9386d7f5d3SJohn Marino   asize = SIZ(a);
9486d7f5d3SJohn Marino   asrcp = PTR(a);
9586d7f5d3SJohn Marino   alow = asrcp[0];
9686d7f5d3SJohn Marino 
9786d7f5d3SJohn Marino   bsize = SIZ(b);
9886d7f5d3SJohn Marino   if (bsize == 0)
9986d7f5d3SJohn Marino     return JACOBI_LS0 (alow, asize);  /* (a/0) */
10086d7f5d3SJohn Marino 
10186d7f5d3SJohn Marino   bsrcp = PTR(b);
10286d7f5d3SJohn Marino   blow = bsrcp[0];
10386d7f5d3SJohn Marino 
10486d7f5d3SJohn Marino   if (asize == 0)
10586d7f5d3SJohn Marino     return JACOBI_0LS (blow, bsize);  /* (0/b) */
10686d7f5d3SJohn Marino 
10786d7f5d3SJohn Marino   /* (even/even)=0 */
10886d7f5d3SJohn Marino   if (((alow | blow) & 1) == 0)
10986d7f5d3SJohn Marino     return 0;
11086d7f5d3SJohn Marino 
11186d7f5d3SJohn Marino   /* account for effect of sign of b, then ignore it */
11286d7f5d3SJohn Marino   result_bit1 = JACOBI_BSGN_SS_BIT1 (asize, bsize);
11386d7f5d3SJohn Marino   bsize = ABS (bsize);
11486d7f5d3SJohn Marino 
11586d7f5d3SJohn Marino   /* low zero limbs on b can be discarded */
11686d7f5d3SJohn Marino   JACOBI_STRIP_LOW_ZEROS (result_bit1, alow, bsrcp, bsize, blow);
11786d7f5d3SJohn Marino 
11886d7f5d3SJohn Marino   count_trailing_zeros (btwos, blow);
11986d7f5d3SJohn Marino   TRACE (printf ("b twos %u\n", btwos));
12086d7f5d3SJohn Marino 
12186d7f5d3SJohn Marino   /* establish shifted blow */
12286d7f5d3SJohn Marino   blow >>= btwos;
12386d7f5d3SJohn Marino   if (bsize > 1)
12486d7f5d3SJohn Marino     {
12586d7f5d3SJohn Marino       bsecond = bsrcp[1];
12686d7f5d3SJohn Marino       if (btwos != 0)
12786d7f5d3SJohn Marino         blow |= (bsecond << (GMP_NUMB_BITS - btwos)) & GMP_NUMB_MASK;
12886d7f5d3SJohn Marino     }
12986d7f5d3SJohn Marino 
13086d7f5d3SJohn Marino   /* account for effect of sign of a, then ignore it */
13186d7f5d3SJohn Marino   result_bit1 ^= JACOBI_ASGN_SU_BIT1 (asize, blow);
13286d7f5d3SJohn Marino   asize = ABS (asize);
13386d7f5d3SJohn Marino 
13486d7f5d3SJohn Marino   if (bsize == 1 || (bsize == 2 && (bsecond >> btwos) == 0))
13586d7f5d3SJohn Marino     {
13686d7f5d3SJohn Marino       /* special case one limb b, use modexact and no copying */
13786d7f5d3SJohn Marino 
13886d7f5d3SJohn Marino       /* (a/2)=(2/a) with a odd, and if b is even then a is odd here */
13986d7f5d3SJohn Marino       result_bit1 ^= JACOBI_TWOS_U_BIT1 (btwos, alow);
14086d7f5d3SJohn Marino 
14186d7f5d3SJohn Marino       if (blow == 1)   /* (a/1)=1 always */
14286d7f5d3SJohn Marino         return JACOBI_BIT1_TO_PN (result_bit1);
14386d7f5d3SJohn Marino 
14486d7f5d3SJohn Marino       JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, alow, asrcp, asize, blow);
14586d7f5d3SJohn Marino       TRACE (printf ("base (%lu/%lu) with %d\n",
14686d7f5d3SJohn Marino                      alow, blow, JACOBI_BIT1_TO_PN (result_bit1)));
14786d7f5d3SJohn Marino       return mpn_jacobi_base (alow, blow, result_bit1);
14886d7f5d3SJohn Marino     }
14986d7f5d3SJohn Marino 
15086d7f5d3SJohn Marino   /* Discard low zero limbs of a.  Usually there won't be anything to
15186d7f5d3SJohn Marino      strip, hence not bothering with it for the bsize==1 case.  */
15286d7f5d3SJohn Marino   JACOBI_STRIP_LOW_ZEROS (result_bit1, blow, asrcp, asize, alow);
15386d7f5d3SJohn Marino 
15486d7f5d3SJohn Marino   count_trailing_zeros (atwos, alow);
15586d7f5d3SJohn Marino   TRACE (printf ("a twos %u\n", atwos));
15686d7f5d3SJohn Marino   result_bit1 ^= JACOBI_TWOS_U_BIT1 (atwos, blow);
15786d7f5d3SJohn Marino 
15886d7f5d3SJohn Marino   /* establish shifted alow */
15986d7f5d3SJohn Marino   alow >>= atwos;
16086d7f5d3SJohn Marino   if (asize > 1)
16186d7f5d3SJohn Marino     {
16286d7f5d3SJohn Marino       asecond = asrcp[1];
16386d7f5d3SJohn Marino       if (atwos != 0)
16486d7f5d3SJohn Marino         alow |= (asecond << (GMP_NUMB_BITS - atwos)) & GMP_NUMB_MASK;
16586d7f5d3SJohn Marino     }
16686d7f5d3SJohn Marino 
16786d7f5d3SJohn Marino   /* (a/2)=(2/a) with a odd */
16886d7f5d3SJohn Marino   result_bit1 ^= JACOBI_TWOS_U_BIT1 (btwos, alow);
16986d7f5d3SJohn Marino 
17086d7f5d3SJohn Marino   if (asize == 1 || (asize == 2 && (asecond >> atwos) == 0))
17186d7f5d3SJohn Marino     {
17286d7f5d3SJohn Marino       /* another special case with modexact and no copying */
17386d7f5d3SJohn Marino 
17486d7f5d3SJohn Marino       if (alow == 1)  /* (1/b)=1 always */
17586d7f5d3SJohn Marino         return JACOBI_BIT1_TO_PN (result_bit1);
17686d7f5d3SJohn Marino 
17786d7f5d3SJohn Marino       /* b still has its twos, so cancel out their effect */
17886d7f5d3SJohn Marino       result_bit1 ^= JACOBI_TWOS_U_BIT1 (btwos, alow);
17986d7f5d3SJohn Marino 
18086d7f5d3SJohn Marino       result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow);  /* now (b/a) */
18186d7f5d3SJohn Marino       JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, blow, bsrcp, bsize, alow);
18286d7f5d3SJohn Marino       TRACE (printf ("base (%lu/%lu) with %d\n",
18386d7f5d3SJohn Marino                      blow, alow, JACOBI_BIT1_TO_PN (result_bit1)));
18486d7f5d3SJohn Marino       return mpn_jacobi_base (blow, alow, result_bit1);
18586d7f5d3SJohn Marino     }
18686d7f5d3SJohn Marino 
18786d7f5d3SJohn Marino 
18886d7f5d3SJohn Marino   TMP_MARK;
18986d7f5d3SJohn Marino   TMP_ALLOC_LIMBS_2 (ap, asize, bp, bsize);
19086d7f5d3SJohn Marino 
19186d7f5d3SJohn Marino   MPN_RSHIFT_OR_COPY (ap, asrcp, asize, atwos);
19286d7f5d3SJohn Marino   ASSERT (alow == ap[0]);
19386d7f5d3SJohn Marino   TRACE (mpn_trace ("stripped a", ap, asize));
19486d7f5d3SJohn Marino 
19586d7f5d3SJohn Marino   MPN_RSHIFT_OR_COPY (bp, bsrcp, bsize, btwos);
19686d7f5d3SJohn Marino   ASSERT (blow == bp[0]);
19786d7f5d3SJohn Marino   TRACE (mpn_trace ("stripped b", bp, bsize));
19886d7f5d3SJohn Marino 
19986d7f5d3SJohn Marino   /* swap if necessary to make a longer than b */
20086d7f5d3SJohn Marino   if (asize < bsize)
20186d7f5d3SJohn Marino     {
20286d7f5d3SJohn Marino       TRACE (printf ("swap\n"));
20386d7f5d3SJohn Marino       MPN_PTR_SWAP (ap,asize, bp,bsize);
20486d7f5d3SJohn Marino       MP_LIMB_T_SWAP (alow, blow);
20586d7f5d3SJohn Marino       result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow);
20686d7f5d3SJohn Marino     }
20786d7f5d3SJohn Marino 
20886d7f5d3SJohn Marino   /* If a is bigger than b then reduce to a mod b.
20986d7f5d3SJohn Marino      Division is much faster than chipping away at "a" bit-by-bit. */
21086d7f5d3SJohn Marino   if (asize > bsize)
21186d7f5d3SJohn Marino     {
21286d7f5d3SJohn Marino       mp_ptr  rp, qp;
21386d7f5d3SJohn Marino 
21486d7f5d3SJohn Marino       TRACE (printf ("tdiv_qr asize=%ld bsize=%ld\n", asize, bsize));
21586d7f5d3SJohn Marino 
21686d7f5d3SJohn Marino       TMP_ALLOC_LIMBS_2 (rp, bsize, qp, asize-bsize+1);
21786d7f5d3SJohn Marino       mpn_tdiv_qr (qp, rp, (mp_size_t) 0, ap, asize, bp, bsize);
21886d7f5d3SJohn Marino       ap = rp;
21986d7f5d3SJohn Marino       asize = bsize;
22086d7f5d3SJohn Marino       MPN_NORMALIZE (ap, asize);
22186d7f5d3SJohn Marino 
22286d7f5d3SJohn Marino       TRACE (printf ("tdiv_qr asize=%ld bsize=%ld\n", asize, bsize);
22386d7f5d3SJohn Marino              mpn_trace (" a", ap, asize);
22486d7f5d3SJohn Marino              mpn_trace (" b", bp, bsize));
22586d7f5d3SJohn Marino 
22686d7f5d3SJohn Marino       if (asize == 0)  /* (0/b)=0 for b!=1 */
22786d7f5d3SJohn Marino         goto zero;
22886d7f5d3SJohn Marino 
22986d7f5d3SJohn Marino       alow = ap[0];
23086d7f5d3SJohn Marino       goto strip_a;
23186d7f5d3SJohn Marino     }
23286d7f5d3SJohn Marino 
23386d7f5d3SJohn Marino   for (;;)
23486d7f5d3SJohn Marino     {
23586d7f5d3SJohn Marino       ASSERT (asize >= 1);         /* a,b non-empty */
23686d7f5d3SJohn Marino       ASSERT (bsize >= 1);
23786d7f5d3SJohn Marino       ASSERT (ap[asize-1] != 0);   /* a,b normalized (and hence non-zero) */
23886d7f5d3SJohn Marino       ASSERT (bp[bsize-1] != 0);
23986d7f5d3SJohn Marino       ASSERT (alow == ap[0]);      /* low limb copies should be correct */
24086d7f5d3SJohn Marino       ASSERT (blow == bp[0]);
24186d7f5d3SJohn Marino       ASSERT (alow & 1);           /* a,b odd */
24286d7f5d3SJohn Marino       ASSERT (blow & 1);
24386d7f5d3SJohn Marino 
24486d7f5d3SJohn Marino       TRACE (printf ("top asize=%ld bsize=%ld\n", asize, bsize);
24586d7f5d3SJohn Marino              mpn_trace (" a", ap, asize);
24686d7f5d3SJohn Marino              mpn_trace (" b", bp, bsize));
24786d7f5d3SJohn Marino 
24886d7f5d3SJohn Marino       /* swap if necessary to make a>=b, applying reciprocity
24986d7f5d3SJohn Marino          high limbs are almost always enough to tell which is bigger */
25086d7f5d3SJohn Marino       if (asize < bsize
25186d7f5d3SJohn Marino           || (asize == bsize
25286d7f5d3SJohn Marino               && ((ahigh=ap[asize-1]) < (bhigh=bp[asize-1])
25386d7f5d3SJohn Marino                   || (ahigh == bhigh
25486d7f5d3SJohn Marino                       && mpn_cmp (ap, bp, asize-1) < 0))))
25586d7f5d3SJohn Marino         {
25686d7f5d3SJohn Marino           TRACE (printf ("swap\n"));
25786d7f5d3SJohn Marino           MPN_PTR_SWAP (ap,asize, bp,bsize);
25886d7f5d3SJohn Marino           MP_LIMB_T_SWAP (alow, blow);
25986d7f5d3SJohn Marino           result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow);
26086d7f5d3SJohn Marino         }
26186d7f5d3SJohn Marino 
26286d7f5d3SJohn Marino       if (asize == 1)
26386d7f5d3SJohn Marino         break;
26486d7f5d3SJohn Marino 
26586d7f5d3SJohn Marino       /* a = a-b */
26686d7f5d3SJohn Marino       ASSERT (asize >= bsize);
26786d7f5d3SJohn Marino       ASSERT_NOCARRY (mpn_sub (ap, ap, asize, bp, bsize));
26886d7f5d3SJohn Marino       MPN_NORMALIZE (ap, asize);
26986d7f5d3SJohn Marino       alow = ap[0];
27086d7f5d3SJohn Marino 
27186d7f5d3SJohn Marino       /* (0/b)=0 for b!=1.  b!=1 when a==0 because otherwise would have had
27286d7f5d3SJohn Marino          a==1 which is asize==1 and would have exited above.  */
27386d7f5d3SJohn Marino       if (asize == 0)
27486d7f5d3SJohn Marino         goto zero;
27586d7f5d3SJohn Marino 
27686d7f5d3SJohn Marino     strip_a:
27786d7f5d3SJohn Marino       /* low zero limbs on a can be discarded */
27886d7f5d3SJohn Marino       JACOBI_STRIP_LOW_ZEROS (result_bit1, blow, ap, asize, alow);
27986d7f5d3SJohn Marino 
28086d7f5d3SJohn Marino       if ((alow & 1) == 0)
28186d7f5d3SJohn Marino         {
28286d7f5d3SJohn Marino           /* factors of 2 from a */
28386d7f5d3SJohn Marino           unsigned  twos;
28486d7f5d3SJohn Marino           count_trailing_zeros (twos, alow);
28586d7f5d3SJohn Marino           TRACE (printf ("twos %u\n", twos));
28686d7f5d3SJohn Marino           result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, blow);
28786d7f5d3SJohn Marino           ASSERT_NOCARRY (mpn_rshift (ap, ap, asize, twos));
28886d7f5d3SJohn Marino           asize -= (ap[asize-1] == 0);
28986d7f5d3SJohn Marino           alow = ap[0];
29086d7f5d3SJohn Marino         }
29186d7f5d3SJohn Marino     }
29286d7f5d3SJohn Marino 
29386d7f5d3SJohn Marino   ASSERT (asize == 1 && bsize == 1);  /* just alow and blow left */
29486d7f5d3SJohn Marino   TMP_FREE;
29586d7f5d3SJohn Marino 
29686d7f5d3SJohn Marino   /* (1/b)=1 always (in this case have b==1 because a>=b) */
29786d7f5d3SJohn Marino   if (alow == 1)
29886d7f5d3SJohn Marino     return JACOBI_BIT1_TO_PN (result_bit1);
29986d7f5d3SJohn Marino 
30086d7f5d3SJohn Marino   /* swap with reciprocity and do (b/a) */
30186d7f5d3SJohn Marino   result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow);
30286d7f5d3SJohn Marino   TRACE (printf ("base (%lu/%lu) with %d\n",
30386d7f5d3SJohn Marino                  blow, alow, JACOBI_BIT1_TO_PN (result_bit1)));
30486d7f5d3SJohn Marino   return mpn_jacobi_base (blow, alow, result_bit1);
30586d7f5d3SJohn Marino 
30686d7f5d3SJohn Marino  zero:
30786d7f5d3SJohn Marino   TMP_FREE;
30886d7f5d3SJohn Marino   return 0;
30986d7f5d3SJohn Marino }
310