xref: /dflybsd-src/contrib/gmp/mpn/generic/popham.c (revision d365564473a20a528d07c59cad8ee2f4bea5546f)
14b6a78b7SSimon Schubert /* mpn_popcount, mpn_hamdist -- mpn bit population count/hamming distance.
24b6a78b7SSimon Schubert 
3*d2d4b659SJohn Marino Copyright 1994, 1996, 2000, 2001, 2002, 2005, 2011 Free Software Foundation, Inc.
44b6a78b7SSimon Schubert 
54b6a78b7SSimon Schubert This file is part of the GNU MP Library.
64b6a78b7SSimon Schubert 
74b6a78b7SSimon Schubert The GNU MP Library is free software; you can redistribute it and/or modify
84b6a78b7SSimon Schubert it under the terms of the GNU Lesser General Public License as published by
94b6a78b7SSimon Schubert the Free Software Foundation; either version 3 of the License, or (at your
104b6a78b7SSimon Schubert option) any later version.
114b6a78b7SSimon Schubert 
124b6a78b7SSimon Schubert The GNU MP Library is distributed in the hope that it will be useful, but
134b6a78b7SSimon Schubert WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
144b6a78b7SSimon Schubert or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
154b6a78b7SSimon Schubert License for more details.
164b6a78b7SSimon Schubert 
174b6a78b7SSimon Schubert You should have received a copy of the GNU Lesser General Public License
184b6a78b7SSimon Schubert along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
194b6a78b7SSimon Schubert 
204b6a78b7SSimon Schubert #include "gmp.h"
214b6a78b7SSimon Schubert #include "gmp-impl.h"
224b6a78b7SSimon Schubert 
234b6a78b7SSimon Schubert #if OPERATION_popcount
244b6a78b7SSimon Schubert #define FNAME mpn_popcount
254b6a78b7SSimon Schubert #define POPHAM(u,v) u
264b6a78b7SSimon Schubert #endif
274b6a78b7SSimon Schubert 
284b6a78b7SSimon Schubert #if OPERATION_hamdist
294b6a78b7SSimon Schubert #define FNAME mpn_hamdist
304b6a78b7SSimon Schubert #define POPHAM(u,v) u ^ v
314b6a78b7SSimon Schubert #endif
324b6a78b7SSimon Schubert 
3354028e53SJohn Marino mp_bitcnt_t
FNAME(mp_srcptr up,mp_srcptr vp,mp_size_t n)344b6a78b7SSimon Schubert FNAME (mp_srcptr up,
354b6a78b7SSimon Schubert #if OPERATION_hamdist
364b6a78b7SSimon Schubert        mp_srcptr vp,
374b6a78b7SSimon Schubert #endif
38*d2d4b659SJohn Marino        mp_size_t n) __GMP_NOTHROW
394b6a78b7SSimon Schubert {
4054028e53SJohn Marino   mp_bitcnt_t result = 0;
414b6a78b7SSimon Schubert   mp_limb_t p0, p1, p2, p3, x, p01, p23;
424b6a78b7SSimon Schubert   mp_size_t i;
434b6a78b7SSimon Schubert 
444b6a78b7SSimon Schubert   ASSERT (n >= 1);		/* Actually, this code handles any n, but some
454b6a78b7SSimon Schubert 				   assembly implementations do not.  */
464b6a78b7SSimon Schubert 
474b6a78b7SSimon Schubert   for (i = n >> 2; i != 0; i--)
484b6a78b7SSimon Schubert     {
494b6a78b7SSimon Schubert       p0 = POPHAM (up[0], vp[0]);
504b6a78b7SSimon Schubert       p0 -= (p0 >> 1) & MP_LIMB_T_MAX/3;				/* 2 0-2 */
514b6a78b7SSimon Schubert       p0 = ((p0 >> 2) & MP_LIMB_T_MAX/5) + (p0 & MP_LIMB_T_MAX/5);	/* 4 0-4 */
524b6a78b7SSimon Schubert 
534b6a78b7SSimon Schubert       p1 = POPHAM (up[1], vp[1]);
544b6a78b7SSimon Schubert       p1 -= (p1 >> 1) & MP_LIMB_T_MAX/3;				/* 2 0-2 */
554b6a78b7SSimon Schubert       p1 = ((p1 >> 2) & MP_LIMB_T_MAX/5) + (p1 & MP_LIMB_T_MAX/5);	/* 4 0-4 */
564b6a78b7SSimon Schubert 
574b6a78b7SSimon Schubert       p01 = p0 + p1;							/* 8 0-8 */
584b6a78b7SSimon Schubert       p01 = ((p01 >> 4) & MP_LIMB_T_MAX/17) + (p01 & MP_LIMB_T_MAX/17);	/* 8 0-16 */
594b6a78b7SSimon Schubert 
604b6a78b7SSimon Schubert       p2 = POPHAM (up[2], vp[2]);
614b6a78b7SSimon Schubert       p2 -= (p2 >> 1) & MP_LIMB_T_MAX/3;				/* 2 0-2 */
624b6a78b7SSimon Schubert       p2 = ((p2 >> 2) & MP_LIMB_T_MAX/5) + (p2 & MP_LIMB_T_MAX/5);	/* 4 0-4 */
634b6a78b7SSimon Schubert 
644b6a78b7SSimon Schubert       p3 = POPHAM (up[3], vp[3]);
654b6a78b7SSimon Schubert       p3 -= (p3 >> 1) & MP_LIMB_T_MAX/3;				/* 2 0-2 */
664b6a78b7SSimon Schubert       p3 = ((p3 >> 2) & MP_LIMB_T_MAX/5) + (p3 & MP_LIMB_T_MAX/5);	/* 4 0-4 */
674b6a78b7SSimon Schubert 
684b6a78b7SSimon Schubert       p23 = p2 + p3;							/* 8 0-8 */
694b6a78b7SSimon Schubert       p23 = ((p23 >> 4) & MP_LIMB_T_MAX/17) + (p23 & MP_LIMB_T_MAX/17);	/* 8 0-16 */
704b6a78b7SSimon Schubert 
714b6a78b7SSimon Schubert       x = p01 + p23;							/* 8 0-32 */
724b6a78b7SSimon Schubert       x = (x >> 8) + x;							/* 8 0-64 */
734b6a78b7SSimon Schubert       x = (x >> 16) + x;						/* 8 0-128 */
744b6a78b7SSimon Schubert #if GMP_LIMB_BITS > 32
754b6a78b7SSimon Schubert       x = ((x >> 32) & 0xff) + (x & 0xff);				/* 8 0-256 */
764b6a78b7SSimon Schubert       result += x;
774b6a78b7SSimon Schubert #else
784b6a78b7SSimon Schubert       result += x & 0xff;
794b6a78b7SSimon Schubert #endif
804b6a78b7SSimon Schubert       up += 4;
814b6a78b7SSimon Schubert #if OPERATION_hamdist
824b6a78b7SSimon Schubert       vp += 4;
834b6a78b7SSimon Schubert #endif
844b6a78b7SSimon Schubert     }
854b6a78b7SSimon Schubert 
864b6a78b7SSimon Schubert   n &= 3;
874b6a78b7SSimon Schubert   if (n != 0)
884b6a78b7SSimon Schubert     {
894b6a78b7SSimon Schubert       x = 0;
904b6a78b7SSimon Schubert       do
914b6a78b7SSimon Schubert 	{
924b6a78b7SSimon Schubert 	  p0 = POPHAM (up[0], vp[0]);
934b6a78b7SSimon Schubert 	  p0 -= (p0 >> 1) & MP_LIMB_T_MAX/3;				/* 2 0-2 */
944b6a78b7SSimon Schubert 	  p0 = ((p0 >> 2) & MP_LIMB_T_MAX/5) + (p0 & MP_LIMB_T_MAX/5);	/* 4 0-4 */
954b6a78b7SSimon Schubert 	  p0 = ((p0 >> 4) + p0) & MP_LIMB_T_MAX/17;			/* 8 0-8 */
964b6a78b7SSimon Schubert 
974b6a78b7SSimon Schubert 	  x += p0;
984b6a78b7SSimon Schubert 	  up += 1;
994b6a78b7SSimon Schubert #if OPERATION_hamdist
1004b6a78b7SSimon Schubert 	  vp += 1;
1014b6a78b7SSimon Schubert #endif
1024b6a78b7SSimon Schubert 	}
1034b6a78b7SSimon Schubert       while (--n);
1044b6a78b7SSimon Schubert 
1054b6a78b7SSimon Schubert       x = (x >> 8) + x;
1064b6a78b7SSimon Schubert       x = (x >> 16) + x;
1074b6a78b7SSimon Schubert #if GMP_LIMB_BITS > 32
1084b6a78b7SSimon Schubert       x = (x >> 32) + x;
1094b6a78b7SSimon Schubert #endif
1104b6a78b7SSimon Schubert       result += x & 0xff;
1114b6a78b7SSimon Schubert     }
1124b6a78b7SSimon Schubert 
1134b6a78b7SSimon Schubert   return result;
1144b6a78b7SSimon Schubert }
115