1*86d7f5d3SJohn Marino /* gmp_nextprime -- generate small primes reasonably efficiently for internal
2*86d7f5d3SJohn Marino GMP needs.
3*86d7f5d3SJohn Marino
4*86d7f5d3SJohn Marino Contributed to the GNU project by Torbjorn Granlund. Miscellaneous
5*86d7f5d3SJohn Marino improvements by Martin Boij.
6*86d7f5d3SJohn Marino
7*86d7f5d3SJohn Marino THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
8*86d7f5d3SJohn Marino SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
9*86d7f5d3SJohn Marino GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
10*86d7f5d3SJohn Marino
11*86d7f5d3SJohn Marino Copyright 2009 Free Software Foundation, Inc.
12*86d7f5d3SJohn Marino
13*86d7f5d3SJohn Marino This file is part of the GNU MP Library.
14*86d7f5d3SJohn Marino
15*86d7f5d3SJohn Marino The GNU MP Library is free software; you can redistribute it and/or modify
16*86d7f5d3SJohn Marino it under the terms of the GNU Lesser General Public License as published by
17*86d7f5d3SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
18*86d7f5d3SJohn Marino option) any later version.
19*86d7f5d3SJohn Marino
20*86d7f5d3SJohn Marino The GNU MP Library is distributed in the hope that it will be useful, but
21*86d7f5d3SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
22*86d7f5d3SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
23*86d7f5d3SJohn Marino License for more details.
24*86d7f5d3SJohn Marino
25*86d7f5d3SJohn Marino You should have received a copy of the GNU Lesser General Public License
26*86d7f5d3SJohn Marino along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
27*86d7f5d3SJohn Marino
28*86d7f5d3SJohn Marino /*
29*86d7f5d3SJohn Marino Optimisation ideas:
30*86d7f5d3SJohn Marino
31*86d7f5d3SJohn Marino 1. Unroll the sieving loops. Should reach 1 write/cycle. That would be a 2x
32*86d7f5d3SJohn Marino improvement.
33*86d7f5d3SJohn Marino
34*86d7f5d3SJohn Marino 2. Separate sieving with primes p < SIEVESIZE and p >= SIEVESIZE. The latter
35*86d7f5d3SJohn Marino will need at most one write, and thus not need any inner loop.
36*86d7f5d3SJohn Marino
37*86d7f5d3SJohn Marino 3. For primes p >= SIEVESIZE, i.e., typically the majority of primes, we
38*86d7f5d3SJohn Marino perform more than one division per sieving write. That might dominate the
39*86d7f5d3SJohn Marino entire run time for the nextprime function. A incrementally initialised
40*86d7f5d3SJohn Marino remainder table of Pi(65536) = 6542 16-bit entries could replace that
41*86d7f5d3SJohn Marino division.
42*86d7f5d3SJohn Marino */
43*86d7f5d3SJohn Marino
44*86d7f5d3SJohn Marino #include "gmp.h"
45*86d7f5d3SJohn Marino #include "gmp-impl.h"
46*86d7f5d3SJohn Marino #include <string.h> /* for memset */
47*86d7f5d3SJohn Marino
48*86d7f5d3SJohn Marino
49*86d7f5d3SJohn Marino unsigned long int
gmp_nextprime(gmp_primesieve_t * ps)50*86d7f5d3SJohn Marino gmp_nextprime (gmp_primesieve_t *ps)
51*86d7f5d3SJohn Marino {
52*86d7f5d3SJohn Marino unsigned long p, d, pi;
53*86d7f5d3SJohn Marino unsigned char *sp;
54*86d7f5d3SJohn Marino static unsigned char addtab[] =
55*86d7f5d3SJohn Marino { 2,4,2,4,6,2,6,4,2,4,6,6,2,6,4,2,6,4,6,8,4,2,4,2,4,8,6,4,6,2,4,6,2,6,6,4,
56*86d7f5d3SJohn Marino 2,4,6,2,6,4,2,4,2,10,2,10 };
57*86d7f5d3SJohn Marino unsigned char *addp = addtab;
58*86d7f5d3SJohn Marino unsigned long ai;
59*86d7f5d3SJohn Marino
60*86d7f5d3SJohn Marino /* Look for already sieved primes. A sentinel at the end of the sieving
61*86d7f5d3SJohn Marino area allows us to use a very simple loop here. */
62*86d7f5d3SJohn Marino d = ps->d;
63*86d7f5d3SJohn Marino sp = ps->s + d;
64*86d7f5d3SJohn Marino while (*sp != 0)
65*86d7f5d3SJohn Marino sp++;
66*86d7f5d3SJohn Marino if (sp != ps->s + SIEVESIZE)
67*86d7f5d3SJohn Marino {
68*86d7f5d3SJohn Marino d = sp - ps->s;
69*86d7f5d3SJohn Marino ps->d = d + 1;
70*86d7f5d3SJohn Marino return ps->s0 + 2 * d;
71*86d7f5d3SJohn Marino }
72*86d7f5d3SJohn Marino
73*86d7f5d3SJohn Marino /* Handle the number 2 separately. */
74*86d7f5d3SJohn Marino if (ps->s0 < 3)
75*86d7f5d3SJohn Marino {
76*86d7f5d3SJohn Marino ps->s0 = 3 - 2 * SIEVESIZE; /* Tricky */
77*86d7f5d3SJohn Marino return 2;
78*86d7f5d3SJohn Marino }
79*86d7f5d3SJohn Marino
80*86d7f5d3SJohn Marino /* Exhausted computed primes. Resieve, then call ourselves recursively. */
81*86d7f5d3SJohn Marino
82*86d7f5d3SJohn Marino #if 0
83*86d7f5d3SJohn Marino for (sp = ps->s; sp < ps->s + SIEVESIZE; sp++)
84*86d7f5d3SJohn Marino *sp = 0;
85*86d7f5d3SJohn Marino #else
86*86d7f5d3SJohn Marino memset (ps->s, 0, SIEVESIZE);
87*86d7f5d3SJohn Marino #endif
88*86d7f5d3SJohn Marino
89*86d7f5d3SJohn Marino ps->s0 += 2 * SIEVESIZE;
90*86d7f5d3SJohn Marino
91*86d7f5d3SJohn Marino /* Update sqrt_s0 as needed. */
92*86d7f5d3SJohn Marino while ((ps->sqrt_s0 + 1) * (ps->sqrt_s0 + 1) <= ps->s0 + 2 * SIEVESIZE - 1)
93*86d7f5d3SJohn Marino ps->sqrt_s0++;
94*86d7f5d3SJohn Marino
95*86d7f5d3SJohn Marino pi = ((ps->s0 + 3) / 2) % 3;
96*86d7f5d3SJohn Marino if (pi > 0)
97*86d7f5d3SJohn Marino pi = 3 - pi;
98*86d7f5d3SJohn Marino if (ps->s0 + 2 * pi <= 3)
99*86d7f5d3SJohn Marino pi += 3;
100*86d7f5d3SJohn Marino sp = ps->s + pi;
101*86d7f5d3SJohn Marino while (sp < ps->s + SIEVESIZE)
102*86d7f5d3SJohn Marino {
103*86d7f5d3SJohn Marino *sp = 1, sp += 3;
104*86d7f5d3SJohn Marino }
105*86d7f5d3SJohn Marino
106*86d7f5d3SJohn Marino pi = ((ps->s0 + 5) / 2) % 5;
107*86d7f5d3SJohn Marino if (pi > 0)
108*86d7f5d3SJohn Marino pi = 5 - pi;
109*86d7f5d3SJohn Marino if (ps->s0 + 2 * pi <= 5)
110*86d7f5d3SJohn Marino pi += 5;
111*86d7f5d3SJohn Marino sp = ps->s + pi;
112*86d7f5d3SJohn Marino while (sp < ps->s + SIEVESIZE)
113*86d7f5d3SJohn Marino {
114*86d7f5d3SJohn Marino *sp = 1, sp += 5;
115*86d7f5d3SJohn Marino }
116*86d7f5d3SJohn Marino
117*86d7f5d3SJohn Marino pi = ((ps->s0 + 7) / 2) % 7;
118*86d7f5d3SJohn Marino if (pi > 0)
119*86d7f5d3SJohn Marino pi = 7 - pi;
120*86d7f5d3SJohn Marino if (ps->s0 + 2 * pi <= 7)
121*86d7f5d3SJohn Marino pi += 7;
122*86d7f5d3SJohn Marino sp = ps->s + pi;
123*86d7f5d3SJohn Marino while (sp < ps->s + SIEVESIZE)
124*86d7f5d3SJohn Marino {
125*86d7f5d3SJohn Marino *sp = 1, sp += 7;
126*86d7f5d3SJohn Marino }
127*86d7f5d3SJohn Marino
128*86d7f5d3SJohn Marino p = 11;
129*86d7f5d3SJohn Marino ai = 0;
130*86d7f5d3SJohn Marino while (p <= ps->sqrt_s0)
131*86d7f5d3SJohn Marino {
132*86d7f5d3SJohn Marino pi = ((ps->s0 + p) / 2) % p;
133*86d7f5d3SJohn Marino if (pi > 0)
134*86d7f5d3SJohn Marino pi = p - pi;
135*86d7f5d3SJohn Marino if (ps->s0 + 2 * pi <= p)
136*86d7f5d3SJohn Marino pi += p;
137*86d7f5d3SJohn Marino sp = ps->s + pi;
138*86d7f5d3SJohn Marino while (sp < ps->s + SIEVESIZE)
139*86d7f5d3SJohn Marino {
140*86d7f5d3SJohn Marino *sp = 1, sp += p;
141*86d7f5d3SJohn Marino }
142*86d7f5d3SJohn Marino p += addp[ai];
143*86d7f5d3SJohn Marino ai = (ai + 1) % 48;
144*86d7f5d3SJohn Marino }
145*86d7f5d3SJohn Marino ps->d = 0;
146*86d7f5d3SJohn Marino return gmp_nextprime (ps);
147*86d7f5d3SJohn Marino }
148*86d7f5d3SJohn Marino
149*86d7f5d3SJohn Marino void
gmp_init_primesieve(gmp_primesieve_t * ps)150*86d7f5d3SJohn Marino gmp_init_primesieve (gmp_primesieve_t *ps)
151*86d7f5d3SJohn Marino {
152*86d7f5d3SJohn Marino ps->s0 = 0;
153*86d7f5d3SJohn Marino ps->sqrt_s0 = 0;
154*86d7f5d3SJohn Marino ps->d = SIEVESIZE;
155*86d7f5d3SJohn Marino ps->s[SIEVESIZE] = 0; /* sentinel */
156*86d7f5d3SJohn Marino }
157