xref: /netbsd-src/external/lgpl3/gmp/dist/primesieve.c (revision 72c7faa4dbb41dbb0238d6b4a109da0d4b236dd4)
1 /* primesieve (BIT_ARRAY, N) -- Fills the BIT_ARRAY with a mask for primes up to N.
2 
3 Contributed to the GNU project by Marco Bodrato.
4 
5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.
6 IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.
7 IN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR
8 DISAPPEAR IN A FUTURE GNU MP RELEASE.
9 
10 Copyright 2010-2012, 2015, 2016 Free Software Foundation, Inc.
11 
12 This file is part of the GNU MP Library.
13 
14 The GNU MP Library is free software; you can redistribute it and/or modify
15 it under the terms of either:
16 
17   * the GNU Lesser General Public License as published by the Free
18     Software Foundation; either version 3 of the License, or (at your
19     option) any later version.
20 
21 or
22 
23   * the GNU General Public License as published by the Free Software
24     Foundation; either version 2 of the License, or (at your option) any
25     later version.
26 
27 or both in parallel, as here.
28 
29 The GNU MP Library is distributed in the hope that it will be useful, but
30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
32 for more details.
33 
34 You should have received copies of the GNU General Public License and the
35 GNU Lesser General Public License along with the GNU MP Library.  If not,
36 see https://www.gnu.org/licenses/.  */
37 
38 #include "gmp-impl.h"
39 
40 #if 0
41 static mp_limb_t
42 bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }
43 #endif
44 
45 /* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/
46 static mp_limb_t
id_to_n(mp_limb_t id)47 id_to_n  (mp_limb_t id)  { return id*3+1+(id&1); }
48 
49 /* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */
50 static mp_limb_t
n_to_bit(mp_limb_t n)51 n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
52 
53 #if 0
54 static mp_size_t
55 primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
56 #endif
57 
58 #if GMP_LIMB_BITS > 61
59 #define SIEVE_SEED CNST_LIMB(0x3294C9E069128480)
60 #if GMP_LIMB_BITS == 64
61 /* 110bits pre-sieved mask for primes 5, 11*/
62 #define SIEVE_MASK1 CNST_LIMB(0x81214a1204892058)
63 #define SIEVE_MASKT CNST_LIMB(0xc8130681244)
64 /* 182bits pre-sieved mask for primes 7, 13*/
65 #define SIEVE_2MSK1 CNST_LIMB(0x9402180c40230184)
66 #define SIEVE_2MSK2 CNST_LIMB(0x0285021088402120)
67 #define SIEVE_2MSKT CNST_LIMB(0xa41210084421)
68 #define SEED_LIMIT 210
69 #else
70 #define SEED_LIMIT 202
71 #endif
72 #else
73 #if GMP_LIMB_BITS > 30
74 #define SIEVE_SEED CNST_LIMB(0x69128480)
75 #if GMP_LIMB_BITS == 32
76 /* 70bits pre-sieved mask for primes 5, 7*/
77 #define SIEVE_MASK1 CNST_LIMB(0x12148960)
78 #define SIEVE_MASK2 CNST_LIMB(0x44a120cc)
79 #define SIEVE_MASKT CNST_LIMB(0x1a)
80 #define SEED_LIMIT 120
81 #else
82 #define SEED_LIMIT 114
83 #endif
84 #else
85 #if GMP_LIMB_BITS > 15
86 #define SIEVE_SEED CNST_LIMB(0x8480)
87 #define SEED_LIMIT 54
88 #else
89 #if GMP_LIMB_BITS > 7
90 #define SIEVE_SEED CNST_LIMB(0x80)
91 #define SEED_LIMIT 34
92 #else
93 #define SIEVE_SEED CNST_LIMB(0x0)
94 #define SEED_LIMIT 24
95 #endif /* 7 */
96 #endif /* 15 */
97 #endif /* 30 */
98 #endif /* 61 */
99 
100 #define SET_OFF1(m1, m2, M1, M2, off, BITS)		\
101   if (off) {						\
102     if (off < GMP_LIMB_BITS) {				\
103       m1 = (M1 >> off) | (M2 << (GMP_LIMB_BITS - off));	\
104       if (off <= BITS - GMP_LIMB_BITS) {		\
105 	m2 = M1 << (BITS - GMP_LIMB_BITS - off)		\
106 	  | M2 >> off;					\
107       } else {						\
108 	m1 |= M1 << (BITS - off);			\
109 	m2 = M1 >> (off + GMP_LIMB_BITS - BITS);	\
110       }							\
111     } else {						\
112       m1 = M1 << (BITS - off)				\
113 	| M2 >> (off - GMP_LIMB_BITS);			\
114       m2 = M2 << (BITS - off)				\
115 	| M1 >> (off + GMP_LIMB_BITS - BITS);		\
116     }							\
117   } else {						\
118     m1 = M1; m2 = M2;					\
119   }
120 
121 #define SET_OFF2(m1, m2, m3, M1, M2, M3, off, BITS)	\
122   if (off) {						\
123     if (off <= GMP_LIMB_BITS) {				\
124       m1 = M2 << (GMP_LIMB_BITS - off);			\
125       m2 = M3 << (GMP_LIMB_BITS - off);			\
126       if (off != GMP_LIMB_BITS) {			\
127 	m1 |= (M1 >> off);				\
128 	m2 |= (M2 >> off);				\
129       }							\
130       if (off <= BITS - 2 * GMP_LIMB_BITS) {		\
131 	m3 = M1 << (BITS - 2 * GMP_LIMB_BITS - off)	\
132 	  | M3 >> off;					\
133       } else {						\
134 	m2 |= M1 << (BITS - GMP_LIMB_BITS - off);	\
135 	m3 = M1 >> (off + 2 * GMP_LIMB_BITS - BITS);	\
136       }							\
137     } else if (off < 2 *GMP_LIMB_BITS) {		\
138       m1 = M2 >> (off - GMP_LIMB_BITS)			\
139 	| M3 << (2 * GMP_LIMB_BITS - off);		\
140       if (off <= BITS - GMP_LIMB_BITS) {		\
141 	m2 = M3 >> (off - GMP_LIMB_BITS)		\
142 	  | M1 << (BITS - GMP_LIMB_BITS - off);		\
143 	m3 = M2 << (BITS - GMP_LIMB_BITS - off);	\
144 	if (off != BITS - GMP_LIMB_BITS) {		\
145 	  m3 |= M1 >> (off + 2 * GMP_LIMB_BITS - BITS);	\
146 	}						\
147       } else {						\
148 	m1 |= M1 << (BITS - off);			\
149 	m2 = M2 << (BITS - off)				\
150 	  | M1 >> (GMP_LIMB_BITS - BITS + off);		\
151 	m3 = M2 >> (GMP_LIMB_BITS - BITS + off);	\
152       }							\
153     } else {						\
154       m1 = M1 << (BITS - off)				\
155 	| M3 >> (off - 2 * GMP_LIMB_BITS);		\
156       m2 = M2 << (BITS - off)				\
157 	| M1 >> (off + GMP_LIMB_BITS - BITS);		\
158       m3 = M3 << (BITS - off)				\
159 	| M2 >> (off + GMP_LIMB_BITS - BITS);		\
160     }							\
161   } else {						\
162     m1 = M1; m2 = M2; m3 = M3;				\
163   }
164 
165 #define ROTATE1(m1, m2, BITS)			\
166   do {						\
167     mp_limb_t __tmp;				\
168     __tmp = m1 >> (2 * GMP_LIMB_BITS - BITS);	\
169     m1 = (m1 << (BITS - GMP_LIMB_BITS)) | m2;	\
170     m2 = __tmp;					\
171   } while (0)
172 
173 #define ROTATE2(m1, m2, m3, BITS)		\
174   do {						\
175     mp_limb_t __tmp;				\
176     __tmp = m2 >> (3 * GMP_LIMB_BITS - BITS);	\
177     m2 = m2 << (BITS - GMP_LIMB_BITS * 2)	\
178       | m1 >> (3 * GMP_LIMB_BITS - BITS);	\
179     m1 = m1 << (BITS - GMP_LIMB_BITS * 2) | m3;	\
180     m3 = __tmp;					\
181   } while (0)
182 
183 static mp_limb_t
fill_bitpattern(mp_ptr bit_array,mp_size_t limbs,mp_limb_t offset)184 fill_bitpattern (mp_ptr bit_array, mp_size_t limbs, mp_limb_t offset)
185 {
186 #ifdef SIEVE_2MSK2
187   mp_limb_t m11, m12, m21, m22, m23;
188 
189   if (offset == 0) { /* This branch is not needed. */
190     m11 = SIEVE_MASK1;
191     m12 = SIEVE_MASKT;
192     m21 = SIEVE_2MSK1;
193     m22 = SIEVE_2MSK2;
194     m23 = SIEVE_2MSKT;
195   } else { /* correctly handle offset == 0... */
196     m21 = offset % 110;
197     SET_OFF1 (m11, m12, SIEVE_MASK1, SIEVE_MASKT, m21, 110);
198     offset %= 182;
199     SET_OFF2 (m21, m22, m23, SIEVE_2MSK1, SIEVE_2MSK2, SIEVE_2MSKT, offset, 182);
200   }
201   /* THINK: Consider handling odd values of 'limbs' outside the loop,
202      to have a single exit condition. */
203   do {
204     bit_array[0] = m11 | m21;
205     if (--limbs == 0)
206       break;
207     ROTATE1 (m11, m12, 110);
208     bit_array[1] = m11 | m22;
209     bit_array += 2;
210     ROTATE1 (m11, m12, 110);
211     ROTATE2 (m21, m22, m23, 182);
212   } while (--limbs != 0);
213   return 4;
214 #else
215 #ifdef SIEVE_MASK2
216   mp_limb_t mask, mask2, tail;
217 
218   if (offset == 0) { /* This branch is not needed. */
219     mask = SIEVE_MASK1;
220     mask2 = SIEVE_MASK2;
221     tail = SIEVE_MASKT;
222   } else { /* correctly handle offset == 0... */
223     offset %= 70;
224     SET_OFF2 (mask, mask2, tail, SIEVE_MASK1, SIEVE_MASK2, SIEVE_MASKT, offset, 70);
225   }
226   /* THINK: Consider handling odd values of 'limbs' outside the loop,
227      to have a single exit condition. */
228   do {
229     bit_array[0] = mask;
230     if (--limbs == 0)
231       break;
232     bit_array[1] = mask2;
233     bit_array += 2;
234     ROTATE2 (mask, mask2, tail, 70);
235   } while (--limbs != 0);
236   return 2;
237 #else
238   MPN_FILL (bit_array, limbs, CNST_LIMB(0));
239   return 0;
240 #endif
241 #endif
242 }
243 
244 static void
first_block_primesieve(mp_ptr bit_array,mp_limb_t n)245 first_block_primesieve (mp_ptr bit_array, mp_limb_t n)
246 {
247   mp_size_t bits, limbs;
248   mp_limb_t i;
249 
250   ASSERT (n > 4);
251 
252   bits  = n_to_bit(n);
253   limbs = bits / GMP_LIMB_BITS;
254 
255   if (limbs != 0)
256     i = fill_bitpattern (bit_array + 1, limbs, 0);
257   bit_array[0] = SIEVE_SEED;
258 
259   if ((bits + 1) % GMP_LIMB_BITS != 0)
260     bit_array[limbs] |= MP_LIMB_T_MAX << ((bits + 1) % GMP_LIMB_BITS);
261 
262   if (n > SEED_LIMIT) {
263     mp_limb_t mask, index;
264 
265     ASSERT (i < GMP_LIMB_BITS);
266 
267     if (n_to_bit (SEED_LIMIT + 1) < GMP_LIMB_BITS)
268       i = 0;
269     mask = CNST_LIMB(1) << i;
270     index = 0;
271     do {
272       ++i;
273       if ((bit_array[index] & mask) == 0)
274 	{
275 	  mp_size_t step, lindex;
276 	  mp_limb_t lmask;
277 	  unsigned  maskrot;
278 
279 	  step = id_to_n(i);
280 /*	  lindex = n_to_bit(id_to_n(i)*id_to_n(i)); */
281 	  lindex = i*(step+1)-1+(-(i&1)&(i+1));
282 /*	  lindex = i*(step+1+(i&1))-1+(i&1); */
283 	  if (lindex > bits)
284 	    break;
285 
286 	  step <<= 1;
287 	  maskrot = step % GMP_LIMB_BITS;
288 
289 	  lmask = CNST_LIMB(1) << (lindex % GMP_LIMB_BITS);
290 	  do {
291 	    bit_array[lindex / GMP_LIMB_BITS] |= lmask;
292 	    lmask = lmask << maskrot | lmask >> (GMP_LIMB_BITS - maskrot);
293 	    lindex += step;
294 	  } while (lindex <= bits);
295 
296 /*	  lindex = n_to_bit(id_to_n(i)*bit_to_n(i)); */
297 	  lindex = i*(i*3+6)+(i&1);
298 
299 	  lmask = CNST_LIMB(1) << (lindex % GMP_LIMB_BITS);
300 	  for ( ; lindex <= bits; lindex += step) {
301 	    bit_array[lindex / GMP_LIMB_BITS] |= lmask;
302 	    lmask = lmask << maskrot | lmask >> (GMP_LIMB_BITS - maskrot);
303 	  };
304 	}
305       mask = mask << 1 | mask >> (GMP_LIMB_BITS-1);
306       index += mask & 1;
307     } while (1);
308   }
309 }
310 
311 static void
block_resieve(mp_ptr bit_array,mp_size_t limbs,mp_limb_t offset,mp_srcptr sieve)312 block_resieve (mp_ptr bit_array, mp_size_t limbs, mp_limb_t offset,
313 	       mp_srcptr sieve)
314 {
315   mp_size_t bits, off = offset;
316   mp_limb_t mask, index, i;
317 
318   ASSERT (limbs > 0);
319   ASSERT (offset >= GMP_LIMB_BITS);
320 
321   bits = limbs * GMP_LIMB_BITS - 1;
322 
323   i = fill_bitpattern (bit_array, limbs, offset - GMP_LIMB_BITS);
324 
325   ASSERT (i < GMP_LIMB_BITS);
326 
327   mask = CNST_LIMB(1) << i;
328   index = 0;
329   do {
330     ++i;
331     if ((sieve[index] & mask) == 0)
332       {
333 	mp_size_t step, lindex;
334 	mp_limb_t lmask;
335 	unsigned  maskrot;
336 
337 	step = id_to_n(i);
338 
339 /*	lindex = n_to_bit(id_to_n(i)*id_to_n(i)); */
340 	lindex = i*(step+1)-1+(-(i&1)&(i+1));
341 /*	lindex = i*(step+1+(i&1))-1+(i&1); */
342 	if (lindex > bits + off)
343 	  break;
344 
345 	step <<= 1;
346 	maskrot = step % GMP_LIMB_BITS;
347 
348 	if (lindex < off)
349 	  lindex += step * ((off - lindex - 1) / step + 1);
350 
351 	lindex -= off;
352 
353 	lmask = CNST_LIMB(1) << (lindex % GMP_LIMB_BITS);
354 	for ( ; lindex <= bits; lindex += step) {
355 	  bit_array[lindex / GMP_LIMB_BITS] |= lmask;
356 	  lmask = lmask << maskrot | lmask >> (GMP_LIMB_BITS - maskrot);
357 	};
358 
359 /*	lindex = n_to_bit(id_to_n(i)*bit_to_n(i)); */
360 	lindex = i*(i*3+6)+(i&1);
361 
362 	if (lindex < off)
363 	  lindex += step * ((off - lindex - 1) / step + 1);
364 
365 	lindex -= off;
366 
367 	lmask = CNST_LIMB(1) << (lindex % GMP_LIMB_BITS);
368 	for ( ; lindex <= bits; lindex += step) {
369 	  bit_array[lindex / GMP_LIMB_BITS] |= lmask;
370 	  lmask = lmask << maskrot | lmask >> (GMP_LIMB_BITS - maskrot);
371 	};
372       }
373       mask = mask << 1 | mask >> (GMP_LIMB_BITS-1);
374       index += mask & 1;
375   } while (1);
376 }
377 
378 #define BLOCK_SIZE 2048
379 
380 /* Fills bit_array with the characteristic function of composite
381    numbers up to the parameter n. I.e. a bit set to "1" represent a
382    composite, a "0" represent a prime.
383 
384    The primesieve_size(n) limbs pointed to by bit_array are
385    overwritten. The returned value counts prime integers in the
386    interval [4, n]. Note that n > 4.
387 
388    Even numbers and multiples of 3 are excluded "a priori", only
389    numbers equivalent to +/- 1 mod 6 have their bit in the array.
390 
391    Once sieved, if the bit b is ZERO it represent a prime, the
392    represented prime is bit_to_n(b), if the LSbit is bit 0, or
393    id_to_n(b), if you call "1" the first bit.
394  */
395 
396 mp_limb_t
gmp_primesieve(mp_ptr bit_array,mp_limb_t n)397 gmp_primesieve (mp_ptr bit_array, mp_limb_t n)
398 {
399   mp_size_t size;
400   mp_limb_t bits;
401 
402   ASSERT (n > 4);
403 
404   bits = n_to_bit(n);
405   size = bits / GMP_LIMB_BITS + 1;
406 
407   if (size > BLOCK_SIZE * 2) {
408     mp_size_t off;
409     off = BLOCK_SIZE + (size % BLOCK_SIZE);
410     first_block_primesieve (bit_array, id_to_n (off * GMP_LIMB_BITS));
411     do {
412       block_resieve (bit_array + off, BLOCK_SIZE, off * GMP_LIMB_BITS, bit_array);
413     } while ((off += BLOCK_SIZE) < size);
414   } else {
415     first_block_primesieve (bit_array, n);
416   }
417 
418   if ((bits + 1) % GMP_LIMB_BITS != 0)
419     bit_array[size-1] |= MP_LIMB_T_MAX << ((bits + 1) % GMP_LIMB_BITS);
420 
421   return size * GMP_LIMB_BITS - mpn_popcount (bit_array, size);
422 }
423 
424 #undef BLOCK_SIZE
425 #undef SEED_LIMIT
426 #undef SIEVE_SEED
427 #undef SIEVE_MASK1
428 #undef SIEVE_MASK2
429 #undef SIEVE_MASKT
430 #undef SIEVE_2MSK1
431 #undef SIEVE_2MSK2
432 #undef SIEVE_2MSKT
433 #undef SET_OFF1
434 #undef SET_OFF2
435 #undef ROTATE1
436 #undef ROTATE2
437