xref: /onnv-gate/usr/src/common/crypto/ecc/ecl_gf.c (revision 5697:324be5104707)
1*5697Smcpowers /*
2*5697Smcpowers  * ***** BEGIN LICENSE BLOCK *****
3*5697Smcpowers  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4*5697Smcpowers  *
5*5697Smcpowers  * The contents of this file are subject to the Mozilla Public License Version
6*5697Smcpowers  * 1.1 (the "License"); you may not use this file except in compliance with
7*5697Smcpowers  * the License. You may obtain a copy of the License at
8*5697Smcpowers  * http://www.mozilla.org/MPL/
9*5697Smcpowers  *
10*5697Smcpowers  * Software distributed under the License is distributed on an "AS IS" basis,
11*5697Smcpowers  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12*5697Smcpowers  * for the specific language governing rights and limitations under the
13*5697Smcpowers  * License.
14*5697Smcpowers  *
15*5697Smcpowers  * The Original Code is the elliptic curve math library.
16*5697Smcpowers  *
17*5697Smcpowers  * The Initial Developer of the Original Code is
18*5697Smcpowers  * Sun Microsystems, Inc.
19*5697Smcpowers  * Portions created by the Initial Developer are Copyright (C) 2003
20*5697Smcpowers  * the Initial Developer. All Rights Reserved.
21*5697Smcpowers  *
22*5697Smcpowers  * Contributor(s):
23*5697Smcpowers  *   Stephen Fung <fungstep@hotmail.com> and
24*5697Smcpowers  *   Douglas Stebila <douglas@stebila.ca>, Sun Microsystems Laboratories
25*5697Smcpowers  *
26*5697Smcpowers  * Alternatively, the contents of this file may be used under the terms of
27*5697Smcpowers  * either the GNU General Public License Version 2 or later (the "GPL"), or
28*5697Smcpowers  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29*5697Smcpowers  * in which case the provisions of the GPL or the LGPL are applicable instead
30*5697Smcpowers  * of those above. If you wish to allow use of your version of this file only
31*5697Smcpowers  * under the terms of either the GPL or the LGPL, and not to allow others to
32*5697Smcpowers  * use your version of this file under the terms of the MPL, indicate your
33*5697Smcpowers  * decision by deleting the provisions above and replace them with the notice
34*5697Smcpowers  * and other provisions required by the GPL or the LGPL. If you do not delete
35*5697Smcpowers  * the provisions above, a recipient may use your version of this file under
36*5697Smcpowers  * the terms of any one of the MPL, the GPL or the LGPL.
37*5697Smcpowers  *
38*5697Smcpowers  * ***** END LICENSE BLOCK ***** */
39*5697Smcpowers /*
40*5697Smcpowers  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
41*5697Smcpowers  * Use is subject to license terms.
42*5697Smcpowers  *
43*5697Smcpowers  * Sun elects to use this software under the MPL license.
44*5697Smcpowers  */
45*5697Smcpowers 
46*5697Smcpowers #pragma ident	"%Z%%M%	%I%	%E% SMI"
47*5697Smcpowers 
48*5697Smcpowers #include "mpi.h"
49*5697Smcpowers #include "mp_gf2m.h"
50*5697Smcpowers #include "ecl-priv.h"
51*5697Smcpowers #include "mpi-priv.h"
52*5697Smcpowers #ifndef _KERNEL
53*5697Smcpowers #include <stdlib.h>
54*5697Smcpowers #endif
55*5697Smcpowers 
56*5697Smcpowers /* Allocate memory for a new GFMethod object. */
57*5697Smcpowers GFMethod *
GFMethod_new(int kmflag)58*5697Smcpowers GFMethod_new(int kmflag)
59*5697Smcpowers {
60*5697Smcpowers 	mp_err res = MP_OKAY;
61*5697Smcpowers 	GFMethod *meth;
62*5697Smcpowers #ifdef _KERNEL
63*5697Smcpowers 	meth = (GFMethod *) kmem_alloc(sizeof(GFMethod), kmflag);
64*5697Smcpowers #else
65*5697Smcpowers 	meth = (GFMethod *) malloc(sizeof(GFMethod));
66*5697Smcpowers 	if (meth == NULL)
67*5697Smcpowers 		return NULL;
68*5697Smcpowers #endif
69*5697Smcpowers 	meth->constructed = MP_YES;
70*5697Smcpowers 	MP_DIGITS(&meth->irr) = 0;
71*5697Smcpowers 	meth->extra_free = NULL;
72*5697Smcpowers 	MP_CHECKOK(mp_init(&meth->irr, kmflag));
73*5697Smcpowers 
74*5697Smcpowers   CLEANUP:
75*5697Smcpowers 	if (res != MP_OKAY) {
76*5697Smcpowers 		GFMethod_free(meth);
77*5697Smcpowers 		return NULL;
78*5697Smcpowers 	}
79*5697Smcpowers 	return meth;
80*5697Smcpowers }
81*5697Smcpowers 
82*5697Smcpowers /* Construct a generic GFMethod for arithmetic over prime fields with
83*5697Smcpowers  * irreducible irr. */
84*5697Smcpowers GFMethod *
GFMethod_consGFp(const mp_int * irr)85*5697Smcpowers GFMethod_consGFp(const mp_int *irr)
86*5697Smcpowers {
87*5697Smcpowers 	mp_err res = MP_OKAY;
88*5697Smcpowers 	GFMethod *meth = NULL;
89*5697Smcpowers 
90*5697Smcpowers 	meth = GFMethod_new(FLAG(irr));
91*5697Smcpowers 	if (meth == NULL)
92*5697Smcpowers 		return NULL;
93*5697Smcpowers 
94*5697Smcpowers 	MP_CHECKOK(mp_copy(irr, &meth->irr));
95*5697Smcpowers 	meth->irr_arr[0] = mpl_significant_bits(irr);
96*5697Smcpowers 	meth->irr_arr[1] = meth->irr_arr[2] = meth->irr_arr[3] =
97*5697Smcpowers 		meth->irr_arr[4] = 0;
98*5697Smcpowers 	switch(MP_USED(&meth->irr)) {
99*5697Smcpowers 	/* maybe we need 1 and 2 words here as well?*/
100*5697Smcpowers 	case 3:
101*5697Smcpowers 		meth->field_add = &ec_GFp_add_3;
102*5697Smcpowers 		meth->field_sub = &ec_GFp_sub_3;
103*5697Smcpowers 		break;
104*5697Smcpowers 	case 4:
105*5697Smcpowers 		meth->field_add = &ec_GFp_add_4;
106*5697Smcpowers 		meth->field_sub = &ec_GFp_sub_4;
107*5697Smcpowers 		break;
108*5697Smcpowers 	case 5:
109*5697Smcpowers 		meth->field_add = &ec_GFp_add_5;
110*5697Smcpowers 		meth->field_sub = &ec_GFp_sub_5;
111*5697Smcpowers 		break;
112*5697Smcpowers 	case 6:
113*5697Smcpowers 		meth->field_add = &ec_GFp_add_6;
114*5697Smcpowers 		meth->field_sub = &ec_GFp_sub_6;
115*5697Smcpowers 		break;
116*5697Smcpowers 	default:
117*5697Smcpowers 		meth->field_add = &ec_GFp_add;
118*5697Smcpowers 		meth->field_sub = &ec_GFp_sub;
119*5697Smcpowers 	}
120*5697Smcpowers 	meth->field_neg = &ec_GFp_neg;
121*5697Smcpowers 	meth->field_mod = &ec_GFp_mod;
122*5697Smcpowers 	meth->field_mul = &ec_GFp_mul;
123*5697Smcpowers 	meth->field_sqr = &ec_GFp_sqr;
124*5697Smcpowers 	meth->field_div = &ec_GFp_div;
125*5697Smcpowers 	meth->field_enc = NULL;
126*5697Smcpowers 	meth->field_dec = NULL;
127*5697Smcpowers 	meth->extra1 = NULL;
128*5697Smcpowers 	meth->extra2 = NULL;
129*5697Smcpowers 	meth->extra_free = NULL;
130*5697Smcpowers 
131*5697Smcpowers   CLEANUP:
132*5697Smcpowers 	if (res != MP_OKAY) {
133*5697Smcpowers 		GFMethod_free(meth);
134*5697Smcpowers 		return NULL;
135*5697Smcpowers 	}
136*5697Smcpowers 	return meth;
137*5697Smcpowers }
138*5697Smcpowers 
139*5697Smcpowers /* Construct a generic GFMethod for arithmetic over binary polynomial
140*5697Smcpowers  * fields with irreducible irr that has array representation irr_arr (see
141*5697Smcpowers  * ecl-priv.h for description of the representation).  If irr_arr is NULL,
142*5697Smcpowers  * then it is constructed from the bitstring representation. */
143*5697Smcpowers GFMethod *
GFMethod_consGF2m(const mp_int * irr,const unsigned int irr_arr[5])144*5697Smcpowers GFMethod_consGF2m(const mp_int *irr, const unsigned int irr_arr[5])
145*5697Smcpowers {
146*5697Smcpowers 	mp_err res = MP_OKAY;
147*5697Smcpowers 	int ret;
148*5697Smcpowers 	GFMethod *meth = NULL;
149*5697Smcpowers 
150*5697Smcpowers 	meth = GFMethod_new(FLAG(irr));
151*5697Smcpowers 	if (meth == NULL)
152*5697Smcpowers 		return NULL;
153*5697Smcpowers 
154*5697Smcpowers 	MP_CHECKOK(mp_copy(irr, &meth->irr));
155*5697Smcpowers 	if (irr_arr != NULL) {
156*5697Smcpowers 		/* Irreducible polynomials are either trinomials or pentanomials. */
157*5697Smcpowers 		meth->irr_arr[0] = irr_arr[0];
158*5697Smcpowers 		meth->irr_arr[1] = irr_arr[1];
159*5697Smcpowers 		meth->irr_arr[2] = irr_arr[2];
160*5697Smcpowers 		if (irr_arr[2] > 0) {
161*5697Smcpowers 			meth->irr_arr[3] = irr_arr[3];
162*5697Smcpowers 			meth->irr_arr[4] = irr_arr[4];
163*5697Smcpowers 		} else {
164*5697Smcpowers 			meth->irr_arr[3] = meth->irr_arr[4] = 0;
165*5697Smcpowers 		}
166*5697Smcpowers 	} else {
167*5697Smcpowers 		ret = mp_bpoly2arr(irr, meth->irr_arr, 5);
168*5697Smcpowers 		/* Irreducible polynomials are either trinomials or pentanomials. */
169*5697Smcpowers 		if ((ret != 5) && (ret != 3)) {
170*5697Smcpowers 			res = MP_UNDEF;
171*5697Smcpowers 			goto CLEANUP;
172*5697Smcpowers 		}
173*5697Smcpowers 	}
174*5697Smcpowers 	meth->field_add = &ec_GF2m_add;
175*5697Smcpowers 	meth->field_neg = &ec_GF2m_neg;
176*5697Smcpowers 	meth->field_sub = &ec_GF2m_add;
177*5697Smcpowers 	meth->field_mod = &ec_GF2m_mod;
178*5697Smcpowers 	meth->field_mul = &ec_GF2m_mul;
179*5697Smcpowers 	meth->field_sqr = &ec_GF2m_sqr;
180*5697Smcpowers 	meth->field_div = &ec_GF2m_div;
181*5697Smcpowers 	meth->field_enc = NULL;
182*5697Smcpowers 	meth->field_dec = NULL;
183*5697Smcpowers 	meth->extra1 = NULL;
184*5697Smcpowers 	meth->extra2 = NULL;
185*5697Smcpowers 	meth->extra_free = NULL;
186*5697Smcpowers 
187*5697Smcpowers   CLEANUP:
188*5697Smcpowers 	if (res != MP_OKAY) {
189*5697Smcpowers 		GFMethod_free(meth);
190*5697Smcpowers 		return NULL;
191*5697Smcpowers 	}
192*5697Smcpowers 	return meth;
193*5697Smcpowers }
194*5697Smcpowers 
195*5697Smcpowers /* Free the memory allocated (if any) to a GFMethod object. */
196*5697Smcpowers void
GFMethod_free(GFMethod * meth)197*5697Smcpowers GFMethod_free(GFMethod *meth)
198*5697Smcpowers {
199*5697Smcpowers 	if (meth == NULL)
200*5697Smcpowers 		return;
201*5697Smcpowers 	if (meth->constructed == MP_NO)
202*5697Smcpowers 		return;
203*5697Smcpowers 	mp_clear(&meth->irr);
204*5697Smcpowers 	if (meth->extra_free != NULL)
205*5697Smcpowers 		meth->extra_free(meth);
206*5697Smcpowers #ifdef _KERNEL
207*5697Smcpowers 	kmem_free(meth, sizeof(GFMethod));
208*5697Smcpowers #else
209*5697Smcpowers 	free(meth);
210*5697Smcpowers #endif
211*5697Smcpowers }
212*5697Smcpowers 
213*5697Smcpowers /* Wrapper functions for generic prime field arithmetic. */
214*5697Smcpowers 
215*5697Smcpowers /* Add two field elements.  Assumes that 0 <= a, b < meth->irr */
216*5697Smcpowers mp_err
ec_GFp_add(const mp_int * a,const mp_int * b,mp_int * r,const GFMethod * meth)217*5697Smcpowers ec_GFp_add(const mp_int *a, const mp_int *b, mp_int *r,
218*5697Smcpowers 		   const GFMethod *meth)
219*5697Smcpowers {
220*5697Smcpowers 	/* PRE: 0 <= a, b < p = meth->irr POST: 0 <= r < p, r = a + b (mod p) */
221*5697Smcpowers 	mp_err res;
222*5697Smcpowers 
223*5697Smcpowers 	if ((res = mp_add(a, b, r)) != MP_OKAY) {
224*5697Smcpowers 		return res;
225*5697Smcpowers 	}
226*5697Smcpowers 	if (mp_cmp(r, &meth->irr) >= 0) {
227*5697Smcpowers 		return mp_sub(r, &meth->irr, r);
228*5697Smcpowers 	}
229*5697Smcpowers 	return res;
230*5697Smcpowers }
231*5697Smcpowers 
232*5697Smcpowers /* Negates a field element.  Assumes that 0 <= a < meth->irr */
233*5697Smcpowers mp_err
ec_GFp_neg(const mp_int * a,mp_int * r,const GFMethod * meth)234*5697Smcpowers ec_GFp_neg(const mp_int *a, mp_int *r, const GFMethod *meth)
235*5697Smcpowers {
236*5697Smcpowers 	/* PRE: 0 <= a < p = meth->irr POST: 0 <= r < p, r = -a (mod p) */
237*5697Smcpowers 
238*5697Smcpowers 	if (mp_cmp_z(a) == 0) {
239*5697Smcpowers 		mp_zero(r);
240*5697Smcpowers 		return MP_OKAY;
241*5697Smcpowers 	}
242*5697Smcpowers 	return mp_sub(&meth->irr, a, r);
243*5697Smcpowers }
244*5697Smcpowers 
245*5697Smcpowers /* Subtracts two field elements.  Assumes that 0 <= a, b < meth->irr */
246*5697Smcpowers mp_err
ec_GFp_sub(const mp_int * a,const mp_int * b,mp_int * r,const GFMethod * meth)247*5697Smcpowers ec_GFp_sub(const mp_int *a, const mp_int *b, mp_int *r,
248*5697Smcpowers 		   const GFMethod *meth)
249*5697Smcpowers {
250*5697Smcpowers 	mp_err res = MP_OKAY;
251*5697Smcpowers 
252*5697Smcpowers 	/* PRE: 0 <= a, b < p = meth->irr POST: 0 <= r < p, r = a - b (mod p) */
253*5697Smcpowers 	res = mp_sub(a, b, r);
254*5697Smcpowers 	if (res == MP_RANGE) {
255*5697Smcpowers 		MP_CHECKOK(mp_sub(b, a, r));
256*5697Smcpowers 		if (mp_cmp_z(r) < 0) {
257*5697Smcpowers 			MP_CHECKOK(mp_add(r, &meth->irr, r));
258*5697Smcpowers 		}
259*5697Smcpowers 		MP_CHECKOK(ec_GFp_neg(r, r, meth));
260*5697Smcpowers 	}
261*5697Smcpowers 	if (mp_cmp_z(r) < 0) {
262*5697Smcpowers 		MP_CHECKOK(mp_add(r, &meth->irr, r));
263*5697Smcpowers 	}
264*5697Smcpowers   CLEANUP:
265*5697Smcpowers 	return res;
266*5697Smcpowers }
267*5697Smcpowers /*
268*5697Smcpowers  * Inline adds for small curve lengths.
269*5697Smcpowers  */
270*5697Smcpowers /* 3 words */
271*5697Smcpowers mp_err
ec_GFp_add_3(const mp_int * a,const mp_int * b,mp_int * r,const GFMethod * meth)272*5697Smcpowers ec_GFp_add_3(const mp_int *a, const mp_int *b, mp_int *r,
273*5697Smcpowers 			const GFMethod *meth)
274*5697Smcpowers {
275*5697Smcpowers 	mp_err res = MP_OKAY;
276*5697Smcpowers 	mp_digit a0 = 0, a1 = 0, a2 = 0;
277*5697Smcpowers 	mp_digit r0 = 0, r1 = 0, r2 = 0;
278*5697Smcpowers 	mp_digit carry;
279*5697Smcpowers 
280*5697Smcpowers 	switch(MP_USED(a)) {
281*5697Smcpowers 	case 3:
282*5697Smcpowers 		a2 = MP_DIGIT(a,2);
283*5697Smcpowers 	case 2:
284*5697Smcpowers 		a1 = MP_DIGIT(a,1);
285*5697Smcpowers 	case 1:
286*5697Smcpowers 		a0 = MP_DIGIT(a,0);
287*5697Smcpowers 	}
288*5697Smcpowers 	switch(MP_USED(b)) {
289*5697Smcpowers 	case 3:
290*5697Smcpowers 		r2 = MP_DIGIT(b,2);
291*5697Smcpowers 	case 2:
292*5697Smcpowers 		r1 = MP_DIGIT(b,1);
293*5697Smcpowers 	case 1:
294*5697Smcpowers 		r0 = MP_DIGIT(b,0);
295*5697Smcpowers 	}
296*5697Smcpowers 
297*5697Smcpowers #ifndef MPI_AMD64_ADD
298*5697Smcpowers 	MP_ADD_CARRY(a0, r0, r0, 0,     carry);
299*5697Smcpowers 	MP_ADD_CARRY(a1, r1, r1, carry, carry);
300*5697Smcpowers 	MP_ADD_CARRY(a2, r2, r2, carry, carry);
301*5697Smcpowers #else
302*5697Smcpowers 	__asm__ (
303*5697Smcpowers                 "xorq   %3,%3           \n\t"
304*5697Smcpowers                 "addq   %4,%0           \n\t"
305*5697Smcpowers                 "adcq   %5,%1           \n\t"
306*5697Smcpowers                 "adcq   %6,%2           \n\t"
307*5697Smcpowers                 "adcq   $0,%3           \n\t"
308*5697Smcpowers                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(carry)
309*5697Smcpowers                 : "r" (a0), "r" (a1), "r" (a2),
310*5697Smcpowers 		  "0" (r0), "1" (r1), "2" (r2)
311*5697Smcpowers                 : "%cc" );
312*5697Smcpowers #endif
313*5697Smcpowers 
314*5697Smcpowers 	MP_CHECKOK(s_mp_pad(r, 3));
315*5697Smcpowers 	MP_DIGIT(r, 2) = r2;
316*5697Smcpowers 	MP_DIGIT(r, 1) = r1;
317*5697Smcpowers 	MP_DIGIT(r, 0) = r0;
318*5697Smcpowers 	MP_SIGN(r) = MP_ZPOS;
319*5697Smcpowers 	MP_USED(r) = 3;
320*5697Smcpowers 
321*5697Smcpowers 	/* Do quick 'subract' if we've gone over
322*5697Smcpowers 	 * (add the 2's complement of the curve field) */
323*5697Smcpowers 	 a2 = MP_DIGIT(&meth->irr,2);
324*5697Smcpowers 	if (carry ||  r2 >  a2 ||
325*5697Smcpowers 		((r2 == a2) && mp_cmp(r,&meth->irr) != MP_LT)) {
326*5697Smcpowers 		a1 = MP_DIGIT(&meth->irr,1);
327*5697Smcpowers 		a0 = MP_DIGIT(&meth->irr,0);
328*5697Smcpowers #ifndef MPI_AMD64_ADD
329*5697Smcpowers 		MP_SUB_BORROW(r0, a0, r0, 0,     carry);
330*5697Smcpowers 		MP_SUB_BORROW(r1, a1, r1, carry, carry);
331*5697Smcpowers 		MP_SUB_BORROW(r2, a2, r2, carry, carry);
332*5697Smcpowers #else
333*5697Smcpowers 		__asm__ (
334*5697Smcpowers 			"subq   %3,%0           \n\t"
335*5697Smcpowers 			"sbbq   %4,%1           \n\t"
336*5697Smcpowers 			"sbbq   %5,%2           \n\t"
337*5697Smcpowers 			: "=r"(r0), "=r"(r1), "=r"(r2)
338*5697Smcpowers 			: "r" (a0), "r" (a1), "r" (a2),
339*5697Smcpowers 			  "0" (r0), "1" (r1), "2" (r2)
340*5697Smcpowers 			: "%cc" );
341*5697Smcpowers #endif
342*5697Smcpowers 		MP_DIGIT(r, 2) = r2;
343*5697Smcpowers 		MP_DIGIT(r, 1) = r1;
344*5697Smcpowers 		MP_DIGIT(r, 0) = r0;
345*5697Smcpowers 	}
346*5697Smcpowers 
347*5697Smcpowers 	s_mp_clamp(r);
348*5697Smcpowers 
349*5697Smcpowers   CLEANUP:
350*5697Smcpowers 	return res;
351*5697Smcpowers }
352*5697Smcpowers 
353*5697Smcpowers /* 4 words */
354*5697Smcpowers mp_err
ec_GFp_add_4(const mp_int * a,const mp_int * b,mp_int * r,const GFMethod * meth)355*5697Smcpowers ec_GFp_add_4(const mp_int *a, const mp_int *b, mp_int *r,
356*5697Smcpowers 			const GFMethod *meth)
357*5697Smcpowers {
358*5697Smcpowers 	mp_err res = MP_OKAY;
359*5697Smcpowers 	mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0;
360*5697Smcpowers 	mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0;
361*5697Smcpowers 	mp_digit carry;
362*5697Smcpowers 
363*5697Smcpowers 	switch(MP_USED(a)) {
364*5697Smcpowers 	case 4:
365*5697Smcpowers 		a3 = MP_DIGIT(a,3);
366*5697Smcpowers 	case 3:
367*5697Smcpowers 		a2 = MP_DIGIT(a,2);
368*5697Smcpowers 	case 2:
369*5697Smcpowers 		a1 = MP_DIGIT(a,1);
370*5697Smcpowers 	case 1:
371*5697Smcpowers 		a0 = MP_DIGIT(a,0);
372*5697Smcpowers 	}
373*5697Smcpowers 	switch(MP_USED(b)) {
374*5697Smcpowers 	case 4:
375*5697Smcpowers 		r3 = MP_DIGIT(b,3);
376*5697Smcpowers 	case 3:
377*5697Smcpowers 		r2 = MP_DIGIT(b,2);
378*5697Smcpowers 	case 2:
379*5697Smcpowers 		r1 = MP_DIGIT(b,1);
380*5697Smcpowers 	case 1:
381*5697Smcpowers 		r0 = MP_DIGIT(b,0);
382*5697Smcpowers 	}
383*5697Smcpowers 
384*5697Smcpowers #ifndef MPI_AMD64_ADD
385*5697Smcpowers 	MP_ADD_CARRY(a0, r0, r0, 0,     carry);
386*5697Smcpowers 	MP_ADD_CARRY(a1, r1, r1, carry, carry);
387*5697Smcpowers 	MP_ADD_CARRY(a2, r2, r2, carry, carry);
388*5697Smcpowers 	MP_ADD_CARRY(a3, r3, r3, carry, carry);
389*5697Smcpowers #else
390*5697Smcpowers 	__asm__ (
391*5697Smcpowers                 "xorq   %4,%4           \n\t"
392*5697Smcpowers                 "addq   %5,%0           \n\t"
393*5697Smcpowers                 "adcq   %6,%1           \n\t"
394*5697Smcpowers                 "adcq   %7,%2           \n\t"
395*5697Smcpowers                 "adcq   %8,%3           \n\t"
396*5697Smcpowers                 "adcq   $0,%4           \n\t"
397*5697Smcpowers                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(carry)
398*5697Smcpowers                 : "r" (a0), "r" (a1), "r" (a2), "r" (a3),
399*5697Smcpowers 		  "0" (r0), "1" (r1), "2" (r2), "3" (r3)
400*5697Smcpowers                 : "%cc" );
401*5697Smcpowers #endif
402*5697Smcpowers 
403*5697Smcpowers 	MP_CHECKOK(s_mp_pad(r, 4));
404*5697Smcpowers 	MP_DIGIT(r, 3) = r3;
405*5697Smcpowers 	MP_DIGIT(r, 2) = r2;
406*5697Smcpowers 	MP_DIGIT(r, 1) = r1;
407*5697Smcpowers 	MP_DIGIT(r, 0) = r0;
408*5697Smcpowers 	MP_SIGN(r) = MP_ZPOS;
409*5697Smcpowers 	MP_USED(r) = 4;
410*5697Smcpowers 
411*5697Smcpowers 	/* Do quick 'subract' if we've gone over
412*5697Smcpowers 	 * (add the 2's complement of the curve field) */
413*5697Smcpowers 	 a3 = MP_DIGIT(&meth->irr,3);
414*5697Smcpowers 	if (carry ||  r3 >  a3 ||
415*5697Smcpowers 		((r3 == a3) && mp_cmp(r,&meth->irr) != MP_LT)) {
416*5697Smcpowers 		a2 = MP_DIGIT(&meth->irr,2);
417*5697Smcpowers 		a1 = MP_DIGIT(&meth->irr,1);
418*5697Smcpowers 		a0 = MP_DIGIT(&meth->irr,0);
419*5697Smcpowers #ifndef MPI_AMD64_ADD
420*5697Smcpowers 		MP_SUB_BORROW(r0, a0, r0, 0,     carry);
421*5697Smcpowers 		MP_SUB_BORROW(r1, a1, r1, carry, carry);
422*5697Smcpowers 		MP_SUB_BORROW(r2, a2, r2, carry, carry);
423*5697Smcpowers 		MP_SUB_BORROW(r3, a3, r3, carry, carry);
424*5697Smcpowers #else
425*5697Smcpowers 		__asm__ (
426*5697Smcpowers 			"subq   %4,%0           \n\t"
427*5697Smcpowers 			"sbbq   %5,%1           \n\t"
428*5697Smcpowers 			"sbbq   %6,%2           \n\t"
429*5697Smcpowers 			"sbbq   %7,%3           \n\t"
430*5697Smcpowers 			: "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3)
431*5697Smcpowers 			: "r" (a0), "r" (a1), "r" (a2), "r" (a3),
432*5697Smcpowers 			  "0" (r0), "1" (r1), "2" (r2), "3" (r3)
433*5697Smcpowers 			: "%cc" );
434*5697Smcpowers #endif
435*5697Smcpowers 		MP_DIGIT(r, 3) = r3;
436*5697Smcpowers 		MP_DIGIT(r, 2) = r2;
437*5697Smcpowers 		MP_DIGIT(r, 1) = r1;
438*5697Smcpowers 		MP_DIGIT(r, 0) = r0;
439*5697Smcpowers 	}
440*5697Smcpowers 
441*5697Smcpowers 	s_mp_clamp(r);
442*5697Smcpowers 
443*5697Smcpowers   CLEANUP:
444*5697Smcpowers 	return res;
445*5697Smcpowers }
446*5697Smcpowers 
447*5697Smcpowers /* 5 words */
448*5697Smcpowers mp_err
ec_GFp_add_5(const mp_int * a,const mp_int * b,mp_int * r,const GFMethod * meth)449*5697Smcpowers ec_GFp_add_5(const mp_int *a, const mp_int *b, mp_int *r,
450*5697Smcpowers 			const GFMethod *meth)
451*5697Smcpowers {
452*5697Smcpowers 	mp_err res = MP_OKAY;
453*5697Smcpowers 	mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0, a4 = 0;
454*5697Smcpowers 	mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0;
455*5697Smcpowers 	mp_digit carry;
456*5697Smcpowers 
457*5697Smcpowers 	switch(MP_USED(a)) {
458*5697Smcpowers 	case 5:
459*5697Smcpowers 		a4 = MP_DIGIT(a,4);
460*5697Smcpowers 	case 4:
461*5697Smcpowers 		a3 = MP_DIGIT(a,3);
462*5697Smcpowers 	case 3:
463*5697Smcpowers 		a2 = MP_DIGIT(a,2);
464*5697Smcpowers 	case 2:
465*5697Smcpowers 		a1 = MP_DIGIT(a,1);
466*5697Smcpowers 	case 1:
467*5697Smcpowers 		a0 = MP_DIGIT(a,0);
468*5697Smcpowers 	}
469*5697Smcpowers 	switch(MP_USED(b)) {
470*5697Smcpowers 	case 5:
471*5697Smcpowers 		r4 = MP_DIGIT(b,4);
472*5697Smcpowers 	case 4:
473*5697Smcpowers 		r3 = MP_DIGIT(b,3);
474*5697Smcpowers 	case 3:
475*5697Smcpowers 		r2 = MP_DIGIT(b,2);
476*5697Smcpowers 	case 2:
477*5697Smcpowers 		r1 = MP_DIGIT(b,1);
478*5697Smcpowers 	case 1:
479*5697Smcpowers 		r0 = MP_DIGIT(b,0);
480*5697Smcpowers 	}
481*5697Smcpowers 
482*5697Smcpowers 	MP_ADD_CARRY(a0, r0, r0, 0,     carry);
483*5697Smcpowers 	MP_ADD_CARRY(a1, r1, r1, carry, carry);
484*5697Smcpowers 	MP_ADD_CARRY(a2, r2, r2, carry, carry);
485*5697Smcpowers 	MP_ADD_CARRY(a3, r3, r3, carry, carry);
486*5697Smcpowers 	MP_ADD_CARRY(a4, r4, r4, carry, carry);
487*5697Smcpowers 
488*5697Smcpowers 	MP_CHECKOK(s_mp_pad(r, 5));
489*5697Smcpowers 	MP_DIGIT(r, 4) = r4;
490*5697Smcpowers 	MP_DIGIT(r, 3) = r3;
491*5697Smcpowers 	MP_DIGIT(r, 2) = r2;
492*5697Smcpowers 	MP_DIGIT(r, 1) = r1;
493*5697Smcpowers 	MP_DIGIT(r, 0) = r0;
494*5697Smcpowers 	MP_SIGN(r) = MP_ZPOS;
495*5697Smcpowers 	MP_USED(r) = 5;
496*5697Smcpowers 
497*5697Smcpowers 	/* Do quick 'subract' if we've gone over
498*5697Smcpowers 	 * (add the 2's complement of the curve field) */
499*5697Smcpowers 	 a4 = MP_DIGIT(&meth->irr,4);
500*5697Smcpowers 	if (carry ||  r4 >  a4 ||
501*5697Smcpowers 		((r4 == a4) && mp_cmp(r,&meth->irr) != MP_LT)) {
502*5697Smcpowers 		a3 = MP_DIGIT(&meth->irr,3);
503*5697Smcpowers 		a2 = MP_DIGIT(&meth->irr,2);
504*5697Smcpowers 		a1 = MP_DIGIT(&meth->irr,1);
505*5697Smcpowers 		a0 = MP_DIGIT(&meth->irr,0);
506*5697Smcpowers 		MP_SUB_BORROW(r0, a0, r0, 0,     carry);
507*5697Smcpowers 		MP_SUB_BORROW(r1, a1, r1, carry, carry);
508*5697Smcpowers 		MP_SUB_BORROW(r2, a2, r2, carry, carry);
509*5697Smcpowers 		MP_SUB_BORROW(r3, a3, r3, carry, carry);
510*5697Smcpowers 		MP_SUB_BORROW(r4, a4, r4, carry, carry);
511*5697Smcpowers 		MP_DIGIT(r, 4) = r4;
512*5697Smcpowers 		MP_DIGIT(r, 3) = r3;
513*5697Smcpowers 		MP_DIGIT(r, 2) = r2;
514*5697Smcpowers 		MP_DIGIT(r, 1) = r1;
515*5697Smcpowers 		MP_DIGIT(r, 0) = r0;
516*5697Smcpowers 	}
517*5697Smcpowers 
518*5697Smcpowers 	s_mp_clamp(r);
519*5697Smcpowers 
520*5697Smcpowers   CLEANUP:
521*5697Smcpowers 	return res;
522*5697Smcpowers }
523*5697Smcpowers 
524*5697Smcpowers /* 6 words */
525*5697Smcpowers mp_err
ec_GFp_add_6(const mp_int * a,const mp_int * b,mp_int * r,const GFMethod * meth)526*5697Smcpowers ec_GFp_add_6(const mp_int *a, const mp_int *b, mp_int *r,
527*5697Smcpowers 			const GFMethod *meth)
528*5697Smcpowers {
529*5697Smcpowers 	mp_err res = MP_OKAY;
530*5697Smcpowers 	mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0, a4 = 0, a5 = 0;
531*5697Smcpowers 	mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0, r5 = 0;
532*5697Smcpowers 	mp_digit carry;
533*5697Smcpowers 
534*5697Smcpowers 	switch(MP_USED(a)) {
535*5697Smcpowers 	case 6:
536*5697Smcpowers 		a5 = MP_DIGIT(a,5);
537*5697Smcpowers 	case 5:
538*5697Smcpowers 		a4 = MP_DIGIT(a,4);
539*5697Smcpowers 	case 4:
540*5697Smcpowers 		a3 = MP_DIGIT(a,3);
541*5697Smcpowers 	case 3:
542*5697Smcpowers 		a2 = MP_DIGIT(a,2);
543*5697Smcpowers 	case 2:
544*5697Smcpowers 		a1 = MP_DIGIT(a,1);
545*5697Smcpowers 	case 1:
546*5697Smcpowers 		a0 = MP_DIGIT(a,0);
547*5697Smcpowers 	}
548*5697Smcpowers 	switch(MP_USED(b)) {
549*5697Smcpowers 	case 6:
550*5697Smcpowers 		r5 = MP_DIGIT(b,5);
551*5697Smcpowers 	case 5:
552*5697Smcpowers 		r4 = MP_DIGIT(b,4);
553*5697Smcpowers 	case 4:
554*5697Smcpowers 		r3 = MP_DIGIT(b,3);
555*5697Smcpowers 	case 3:
556*5697Smcpowers 		r2 = MP_DIGIT(b,2);
557*5697Smcpowers 	case 2:
558*5697Smcpowers 		r1 = MP_DIGIT(b,1);
559*5697Smcpowers 	case 1:
560*5697Smcpowers 		r0 = MP_DIGIT(b,0);
561*5697Smcpowers 	}
562*5697Smcpowers 
563*5697Smcpowers 	MP_ADD_CARRY(a0, r0, r0, 0,     carry);
564*5697Smcpowers 	MP_ADD_CARRY(a1, r1, r1, carry, carry);
565*5697Smcpowers 	MP_ADD_CARRY(a2, r2, r2, carry, carry);
566*5697Smcpowers 	MP_ADD_CARRY(a3, r3, r3, carry, carry);
567*5697Smcpowers 	MP_ADD_CARRY(a4, r4, r4, carry, carry);
568*5697Smcpowers 	MP_ADD_CARRY(a5, r5, r5, carry, carry);
569*5697Smcpowers 
570*5697Smcpowers 	MP_CHECKOK(s_mp_pad(r, 6));
571*5697Smcpowers 	MP_DIGIT(r, 5) = r5;
572*5697Smcpowers 	MP_DIGIT(r, 4) = r4;
573*5697Smcpowers 	MP_DIGIT(r, 3) = r3;
574*5697Smcpowers 	MP_DIGIT(r, 2) = r2;
575*5697Smcpowers 	MP_DIGIT(r, 1) = r1;
576*5697Smcpowers 	MP_DIGIT(r, 0) = r0;
577*5697Smcpowers 	MP_SIGN(r) = MP_ZPOS;
578*5697Smcpowers 	MP_USED(r) = 6;
579*5697Smcpowers 
580*5697Smcpowers 	/* Do quick 'subract' if we've gone over
581*5697Smcpowers 	 * (add the 2's complement of the curve field) */
582*5697Smcpowers 	a5 = MP_DIGIT(&meth->irr,5);
583*5697Smcpowers 	if (carry ||  r5 >  a5 ||
584*5697Smcpowers 		((r5 == a5) && mp_cmp(r,&meth->irr) != MP_LT)) {
585*5697Smcpowers 		a4 = MP_DIGIT(&meth->irr,4);
586*5697Smcpowers 		a3 = MP_DIGIT(&meth->irr,3);
587*5697Smcpowers 		a2 = MP_DIGIT(&meth->irr,2);
588*5697Smcpowers 		a1 = MP_DIGIT(&meth->irr,1);
589*5697Smcpowers 		a0 = MP_DIGIT(&meth->irr,0);
590*5697Smcpowers 		MP_SUB_BORROW(r0, a0, r0, 0,     carry);
591*5697Smcpowers 		MP_SUB_BORROW(r1, a1, r1, carry, carry);
592*5697Smcpowers 		MP_SUB_BORROW(r2, a2, r2, carry, carry);
593*5697Smcpowers 		MP_SUB_BORROW(r3, a3, r3, carry, carry);
594*5697Smcpowers 		MP_SUB_BORROW(r4, a4, r4, carry, carry);
595*5697Smcpowers 		MP_SUB_BORROW(r5, a5, r5, carry, carry);
596*5697Smcpowers 		MP_DIGIT(r, 5) = r5;
597*5697Smcpowers 		MP_DIGIT(r, 4) = r4;
598*5697Smcpowers 		MP_DIGIT(r, 3) = r3;
599*5697Smcpowers 		MP_DIGIT(r, 2) = r2;
600*5697Smcpowers 		MP_DIGIT(r, 1) = r1;
601*5697Smcpowers 		MP_DIGIT(r, 0) = r0;
602*5697Smcpowers 	}
603*5697Smcpowers 
604*5697Smcpowers 	s_mp_clamp(r);
605*5697Smcpowers 
606*5697Smcpowers   CLEANUP:
607*5697Smcpowers 	return res;
608*5697Smcpowers }
609*5697Smcpowers 
610*5697Smcpowers /*
611*5697Smcpowers  * The following subraction functions do in-line subractions based
612*5697Smcpowers  * on our curve size.
613*5697Smcpowers  *
614*5697Smcpowers  * ... 3 words
615*5697Smcpowers  */
616*5697Smcpowers mp_err
ec_GFp_sub_3(const mp_int * a,const mp_int * b,mp_int * r,const GFMethod * meth)617*5697Smcpowers ec_GFp_sub_3(const mp_int *a, const mp_int *b, mp_int *r,
618*5697Smcpowers 			const GFMethod *meth)
619*5697Smcpowers {
620*5697Smcpowers 	mp_err res = MP_OKAY;
621*5697Smcpowers 	mp_digit b0 = 0, b1 = 0, b2 = 0;
622*5697Smcpowers 	mp_digit r0 = 0, r1 = 0, r2 = 0;
623*5697Smcpowers 	mp_digit borrow;
624*5697Smcpowers 
625*5697Smcpowers 	switch(MP_USED(a)) {
626*5697Smcpowers 	case 3:
627*5697Smcpowers 		r2 = MP_DIGIT(a,2);
628*5697Smcpowers 	case 2:
629*5697Smcpowers 		r1 = MP_DIGIT(a,1);
630*5697Smcpowers 	case 1:
631*5697Smcpowers 		r0 = MP_DIGIT(a,0);
632*5697Smcpowers 	}
633*5697Smcpowers 	switch(MP_USED(b)) {
634*5697Smcpowers 	case 3:
635*5697Smcpowers 		b2 = MP_DIGIT(b,2);
636*5697Smcpowers 	case 2:
637*5697Smcpowers 		b1 = MP_DIGIT(b,1);
638*5697Smcpowers 	case 1:
639*5697Smcpowers 		b0 = MP_DIGIT(b,0);
640*5697Smcpowers 	}
641*5697Smcpowers 
642*5697Smcpowers #ifndef MPI_AMD64_ADD
643*5697Smcpowers 	MP_SUB_BORROW(r0, b0, r0, 0,     borrow);
644*5697Smcpowers 	MP_SUB_BORROW(r1, b1, r1, borrow, borrow);
645*5697Smcpowers 	MP_SUB_BORROW(r2, b2, r2, borrow, borrow);
646*5697Smcpowers #else
647*5697Smcpowers 	__asm__ (
648*5697Smcpowers                 "xorq   %3,%3           \n\t"
649*5697Smcpowers                 "subq   %4,%0           \n\t"
650*5697Smcpowers                 "sbbq   %5,%1           \n\t"
651*5697Smcpowers                 "sbbq   %6,%2           \n\t"
652*5697Smcpowers                 "adcq   $0,%3           \n\t"
653*5697Smcpowers                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r" (borrow)
654*5697Smcpowers                 : "r" (b0), "r" (b1), "r" (b2),
655*5697Smcpowers 		  "0" (r0), "1" (r1), "2" (r2)
656*5697Smcpowers                 : "%cc" );
657*5697Smcpowers #endif
658*5697Smcpowers 
659*5697Smcpowers 	/* Do quick 'add' if we've gone under 0
660*5697Smcpowers 	 * (subtract the 2's complement of the curve field) */
661*5697Smcpowers 	if (borrow) {
662*5697Smcpowers 	 	b2 = MP_DIGIT(&meth->irr,2);
663*5697Smcpowers 		b1 = MP_DIGIT(&meth->irr,1);
664*5697Smcpowers 		b0 = MP_DIGIT(&meth->irr,0);
665*5697Smcpowers #ifndef MPI_AMD64_ADD
666*5697Smcpowers 		MP_ADD_CARRY(b0, r0, r0, 0,      borrow);
667*5697Smcpowers 		MP_ADD_CARRY(b1, r1, r1, borrow, borrow);
668*5697Smcpowers 		MP_ADD_CARRY(b2, r2, r2, borrow, borrow);
669*5697Smcpowers #else
670*5697Smcpowers 		__asm__ (
671*5697Smcpowers 			"addq   %3,%0           \n\t"
672*5697Smcpowers 			"adcq   %4,%1           \n\t"
673*5697Smcpowers 			"adcq   %5,%2           \n\t"
674*5697Smcpowers 			: "=r"(r0), "=r"(r1), "=r"(r2)
675*5697Smcpowers 			: "r" (b0), "r" (b1), "r" (b2),
676*5697Smcpowers   			  "0" (r0), "1" (r1), "2" (r2)
677*5697Smcpowers 			: "%cc" );
678*5697Smcpowers #endif
679*5697Smcpowers 	}
680*5697Smcpowers 
681*5697Smcpowers #ifdef MPI_AMD64_ADD
682*5697Smcpowers 	/* compiler fakeout? */
683*5697Smcpowers 	if ((r2 == b0) && (r1 == b0) && (r0 == b0)) {
684*5697Smcpowers 		MP_CHECKOK(s_mp_pad(r, 4));
685*5697Smcpowers 	}
686*5697Smcpowers #endif
687*5697Smcpowers 	MP_CHECKOK(s_mp_pad(r, 3));
688*5697Smcpowers 	MP_DIGIT(r, 2) = r2;
689*5697Smcpowers 	MP_DIGIT(r, 1) = r1;
690*5697Smcpowers 	MP_DIGIT(r, 0) = r0;
691*5697Smcpowers 	MP_SIGN(r) = MP_ZPOS;
692*5697Smcpowers 	MP_USED(r) = 3;
693*5697Smcpowers 	s_mp_clamp(r);
694*5697Smcpowers 
695*5697Smcpowers   CLEANUP:
696*5697Smcpowers 	return res;
697*5697Smcpowers }
698*5697Smcpowers 
699*5697Smcpowers /* 4 words */
700*5697Smcpowers mp_err
ec_GFp_sub_4(const mp_int * a,const mp_int * b,mp_int * r,const GFMethod * meth)701*5697Smcpowers ec_GFp_sub_4(const mp_int *a, const mp_int *b, mp_int *r,
702*5697Smcpowers 			const GFMethod *meth)
703*5697Smcpowers {
704*5697Smcpowers 	mp_err res = MP_OKAY;
705*5697Smcpowers 	mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0;
706*5697Smcpowers 	mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0;
707*5697Smcpowers 	mp_digit borrow;
708*5697Smcpowers 
709*5697Smcpowers 	switch(MP_USED(a)) {
710*5697Smcpowers 	case 4:
711*5697Smcpowers 		r3 = MP_DIGIT(a,3);
712*5697Smcpowers 	case 3:
713*5697Smcpowers 		r2 = MP_DIGIT(a,2);
714*5697Smcpowers 	case 2:
715*5697Smcpowers 		r1 = MP_DIGIT(a,1);
716*5697Smcpowers 	case 1:
717*5697Smcpowers 		r0 = MP_DIGIT(a,0);
718*5697Smcpowers 	}
719*5697Smcpowers 	switch(MP_USED(b)) {
720*5697Smcpowers 	case 4:
721*5697Smcpowers 		b3 = MP_DIGIT(b,3);
722*5697Smcpowers 	case 3:
723*5697Smcpowers 		b2 = MP_DIGIT(b,2);
724*5697Smcpowers 	case 2:
725*5697Smcpowers 		b1 = MP_DIGIT(b,1);
726*5697Smcpowers 	case 1:
727*5697Smcpowers 		b0 = MP_DIGIT(b,0);
728*5697Smcpowers 	}
729*5697Smcpowers 
730*5697Smcpowers #ifndef MPI_AMD64_ADD
731*5697Smcpowers 	MP_SUB_BORROW(r0, b0, r0, 0,     borrow);
732*5697Smcpowers 	MP_SUB_BORROW(r1, b1, r1, borrow, borrow);
733*5697Smcpowers 	MP_SUB_BORROW(r2, b2, r2, borrow, borrow);
734*5697Smcpowers 	MP_SUB_BORROW(r3, b3, r3, borrow, borrow);
735*5697Smcpowers #else
736*5697Smcpowers 	__asm__ (
737*5697Smcpowers                 "xorq   %4,%4           \n\t"
738*5697Smcpowers                 "subq   %5,%0           \n\t"
739*5697Smcpowers                 "sbbq   %6,%1           \n\t"
740*5697Smcpowers                 "sbbq   %7,%2           \n\t"
741*5697Smcpowers                 "sbbq   %8,%3           \n\t"
742*5697Smcpowers                 "adcq   $0,%4           \n\t"
743*5697Smcpowers                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r" (borrow)
744*5697Smcpowers                 : "r" (b0), "r" (b1), "r" (b2), "r" (b3),
745*5697Smcpowers 		  "0" (r0), "1" (r1), "2" (r2), "3" (r3)
746*5697Smcpowers                 : "%cc" );
747*5697Smcpowers #endif
748*5697Smcpowers 
749*5697Smcpowers 	/* Do quick 'add' if we've gone under 0
750*5697Smcpowers 	 * (subtract the 2's complement of the curve field) */
751*5697Smcpowers 	if (borrow) {
752*5697Smcpowers 	 	b3 = MP_DIGIT(&meth->irr,3);
753*5697Smcpowers 	 	b2 = MP_DIGIT(&meth->irr,2);
754*5697Smcpowers 		b1 = MP_DIGIT(&meth->irr,1);
755*5697Smcpowers 		b0 = MP_DIGIT(&meth->irr,0);
756*5697Smcpowers #ifndef MPI_AMD64_ADD
757*5697Smcpowers 		MP_ADD_CARRY(b0, r0, r0, 0,      borrow);
758*5697Smcpowers 		MP_ADD_CARRY(b1, r1, r1, borrow, borrow);
759*5697Smcpowers 		MP_ADD_CARRY(b2, r2, r2, borrow, borrow);
760*5697Smcpowers 		MP_ADD_CARRY(b3, r3, r3, borrow, borrow);
761*5697Smcpowers #else
762*5697Smcpowers 		__asm__ (
763*5697Smcpowers 			"addq   %4,%0           \n\t"
764*5697Smcpowers 			"adcq   %5,%1           \n\t"
765*5697Smcpowers 			"adcq   %6,%2           \n\t"
766*5697Smcpowers 			"adcq   %7,%3           \n\t"
767*5697Smcpowers 			: "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3)
768*5697Smcpowers 			: "r" (b0), "r" (b1), "r" (b2), "r" (b3),
769*5697Smcpowers   			  "0" (r0), "1" (r1), "2" (r2), "3" (r3)
770*5697Smcpowers 			: "%cc" );
771*5697Smcpowers #endif
772*5697Smcpowers 	}
773*5697Smcpowers #ifdef MPI_AMD64_ADD
774*5697Smcpowers 	/* compiler fakeout? */
775*5697Smcpowers 	if ((r3 == b0) && (r1 == b0) && (r0 == b0)) {
776*5697Smcpowers 		MP_CHECKOK(s_mp_pad(r, 4));
777*5697Smcpowers 	}
778*5697Smcpowers #endif
779*5697Smcpowers 	MP_CHECKOK(s_mp_pad(r, 4));
780*5697Smcpowers 	MP_DIGIT(r, 3) = r3;
781*5697Smcpowers 	MP_DIGIT(r, 2) = r2;
782*5697Smcpowers 	MP_DIGIT(r, 1) = r1;
783*5697Smcpowers 	MP_DIGIT(r, 0) = r0;
784*5697Smcpowers 	MP_SIGN(r) = MP_ZPOS;
785*5697Smcpowers 	MP_USED(r) = 4;
786*5697Smcpowers 	s_mp_clamp(r);
787*5697Smcpowers 
788*5697Smcpowers   CLEANUP:
789*5697Smcpowers 	return res;
790*5697Smcpowers }
791*5697Smcpowers 
792*5697Smcpowers /* 5 words */
793*5697Smcpowers mp_err
ec_GFp_sub_5(const mp_int * a,const mp_int * b,mp_int * r,const GFMethod * meth)794*5697Smcpowers ec_GFp_sub_5(const mp_int *a, const mp_int *b, mp_int *r,
795*5697Smcpowers 			const GFMethod *meth)
796*5697Smcpowers {
797*5697Smcpowers 	mp_err res = MP_OKAY;
798*5697Smcpowers 	mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0, b4 = 0;
799*5697Smcpowers 	mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0;
800*5697Smcpowers 	mp_digit borrow;
801*5697Smcpowers 
802*5697Smcpowers 	switch(MP_USED(a)) {
803*5697Smcpowers 	case 5:
804*5697Smcpowers 		r4 = MP_DIGIT(a,4);
805*5697Smcpowers 	case 4:
806*5697Smcpowers 		r3 = MP_DIGIT(a,3);
807*5697Smcpowers 	case 3:
808*5697Smcpowers 		r2 = MP_DIGIT(a,2);
809*5697Smcpowers 	case 2:
810*5697Smcpowers 		r1 = MP_DIGIT(a,1);
811*5697Smcpowers 	case 1:
812*5697Smcpowers 		r0 = MP_DIGIT(a,0);
813*5697Smcpowers 	}
814*5697Smcpowers 	switch(MP_USED(b)) {
815*5697Smcpowers 	case 5:
816*5697Smcpowers 		b4 = MP_DIGIT(b,4);
817*5697Smcpowers 	case 4:
818*5697Smcpowers 		b3 = MP_DIGIT(b,3);
819*5697Smcpowers 	case 3:
820*5697Smcpowers 		b2 = MP_DIGIT(b,2);
821*5697Smcpowers 	case 2:
822*5697Smcpowers 		b1 = MP_DIGIT(b,1);
823*5697Smcpowers 	case 1:
824*5697Smcpowers 		b0 = MP_DIGIT(b,0);
825*5697Smcpowers 	}
826*5697Smcpowers 
827*5697Smcpowers 	MP_SUB_BORROW(r0, b0, r0, 0,     borrow);
828*5697Smcpowers 	MP_SUB_BORROW(r1, b1, r1, borrow, borrow);
829*5697Smcpowers 	MP_SUB_BORROW(r2, b2, r2, borrow, borrow);
830*5697Smcpowers 	MP_SUB_BORROW(r3, b3, r3, borrow, borrow);
831*5697Smcpowers 	MP_SUB_BORROW(r4, b4, r4, borrow, borrow);
832*5697Smcpowers 
833*5697Smcpowers 	/* Do quick 'add' if we've gone under 0
834*5697Smcpowers 	 * (subtract the 2's complement of the curve field) */
835*5697Smcpowers 	if (borrow) {
836*5697Smcpowers 	 	b4 = MP_DIGIT(&meth->irr,4);
837*5697Smcpowers 	 	b3 = MP_DIGIT(&meth->irr,3);
838*5697Smcpowers 	 	b2 = MP_DIGIT(&meth->irr,2);
839*5697Smcpowers 		b1 = MP_DIGIT(&meth->irr,1);
840*5697Smcpowers 		b0 = MP_DIGIT(&meth->irr,0);
841*5697Smcpowers 		MP_ADD_CARRY(b0, r0, r0, 0,      borrow);
842*5697Smcpowers 		MP_ADD_CARRY(b1, r1, r1, borrow, borrow);
843*5697Smcpowers 		MP_ADD_CARRY(b2, r2, r2, borrow, borrow);
844*5697Smcpowers 		MP_ADD_CARRY(b3, r3, r3, borrow, borrow);
845*5697Smcpowers 	}
846*5697Smcpowers 	MP_CHECKOK(s_mp_pad(r, 5));
847*5697Smcpowers 	MP_DIGIT(r, 4) = r4;
848*5697Smcpowers 	MP_DIGIT(r, 3) = r3;
849*5697Smcpowers 	MP_DIGIT(r, 2) = r2;
850*5697Smcpowers 	MP_DIGIT(r, 1) = r1;
851*5697Smcpowers 	MP_DIGIT(r, 0) = r0;
852*5697Smcpowers 	MP_SIGN(r) = MP_ZPOS;
853*5697Smcpowers 	MP_USED(r) = 5;
854*5697Smcpowers 	s_mp_clamp(r);
855*5697Smcpowers 
856*5697Smcpowers   CLEANUP:
857*5697Smcpowers 	return res;
858*5697Smcpowers }
859*5697Smcpowers 
860*5697Smcpowers /* 6 words */
861*5697Smcpowers mp_err
ec_GFp_sub_6(const mp_int * a,const mp_int * b,mp_int * r,const GFMethod * meth)862*5697Smcpowers ec_GFp_sub_6(const mp_int *a, const mp_int *b, mp_int *r,
863*5697Smcpowers 			const GFMethod *meth)
864*5697Smcpowers {
865*5697Smcpowers 	mp_err res = MP_OKAY;
866*5697Smcpowers 	mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0, b4 = 0, b5 = 0;
867*5697Smcpowers 	mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0, r5 = 0;
868*5697Smcpowers 	mp_digit borrow;
869*5697Smcpowers 
870*5697Smcpowers 	switch(MP_USED(a)) {
871*5697Smcpowers 	case 6:
872*5697Smcpowers 		r5 = MP_DIGIT(a,5);
873*5697Smcpowers 	case 5:
874*5697Smcpowers 		r4 = MP_DIGIT(a,4);
875*5697Smcpowers 	case 4:
876*5697Smcpowers 		r3 = MP_DIGIT(a,3);
877*5697Smcpowers 	case 3:
878*5697Smcpowers 		r2 = MP_DIGIT(a,2);
879*5697Smcpowers 	case 2:
880*5697Smcpowers 		r1 = MP_DIGIT(a,1);
881*5697Smcpowers 	case 1:
882*5697Smcpowers 		r0 = MP_DIGIT(a,0);
883*5697Smcpowers 	}
884*5697Smcpowers 	switch(MP_USED(b)) {
885*5697Smcpowers 	case 6:
886*5697Smcpowers 		b5 = MP_DIGIT(b,5);
887*5697Smcpowers 	case 5:
888*5697Smcpowers 		b4 = MP_DIGIT(b,4);
889*5697Smcpowers 	case 4:
890*5697Smcpowers 		b3 = MP_DIGIT(b,3);
891*5697Smcpowers 	case 3:
892*5697Smcpowers 		b2 = MP_DIGIT(b,2);
893*5697Smcpowers 	case 2:
894*5697Smcpowers 		b1 = MP_DIGIT(b,1);
895*5697Smcpowers 	case 1:
896*5697Smcpowers 		b0 = MP_DIGIT(b,0);
897*5697Smcpowers 	}
898*5697Smcpowers 
899*5697Smcpowers 	MP_SUB_BORROW(r0, b0, r0, 0,     borrow);
900*5697Smcpowers 	MP_SUB_BORROW(r1, b1, r1, borrow, borrow);
901*5697Smcpowers 	MP_SUB_BORROW(r2, b2, r2, borrow, borrow);
902*5697Smcpowers 	MP_SUB_BORROW(r3, b3, r3, borrow, borrow);
903*5697Smcpowers 	MP_SUB_BORROW(r4, b4, r4, borrow, borrow);
904*5697Smcpowers 	MP_SUB_BORROW(r5, b5, r5, borrow, borrow);
905*5697Smcpowers 
906*5697Smcpowers 	/* Do quick 'add' if we've gone under 0
907*5697Smcpowers 	 * (subtract the 2's complement of the curve field) */
908*5697Smcpowers 	if (borrow) {
909*5697Smcpowers 	 	b5 = MP_DIGIT(&meth->irr,5);
910*5697Smcpowers 	 	b4 = MP_DIGIT(&meth->irr,4);
911*5697Smcpowers 	 	b3 = MP_DIGIT(&meth->irr,3);
912*5697Smcpowers 	 	b2 = MP_DIGIT(&meth->irr,2);
913*5697Smcpowers 		b1 = MP_DIGIT(&meth->irr,1);
914*5697Smcpowers 		b0 = MP_DIGIT(&meth->irr,0);
915*5697Smcpowers 		MP_ADD_CARRY(b0, r0, r0, 0,      borrow);
916*5697Smcpowers 		MP_ADD_CARRY(b1, r1, r1, borrow, borrow);
917*5697Smcpowers 		MP_ADD_CARRY(b2, r2, r2, borrow, borrow);
918*5697Smcpowers 		MP_ADD_CARRY(b3, r3, r3, borrow, borrow);
919*5697Smcpowers 		MP_ADD_CARRY(b4, r4, r4, borrow, borrow);
920*5697Smcpowers 	}
921*5697Smcpowers 
922*5697Smcpowers 	MP_CHECKOK(s_mp_pad(r, 6));
923*5697Smcpowers 	MP_DIGIT(r, 5) = r5;
924*5697Smcpowers 	MP_DIGIT(r, 4) = r4;
925*5697Smcpowers 	MP_DIGIT(r, 3) = r3;
926*5697Smcpowers 	MP_DIGIT(r, 2) = r2;
927*5697Smcpowers 	MP_DIGIT(r, 1) = r1;
928*5697Smcpowers 	MP_DIGIT(r, 0) = r0;
929*5697Smcpowers 	MP_SIGN(r) = MP_ZPOS;
930*5697Smcpowers 	MP_USED(r) = 6;
931*5697Smcpowers 	s_mp_clamp(r);
932*5697Smcpowers 
933*5697Smcpowers   CLEANUP:
934*5697Smcpowers 	return res;
935*5697Smcpowers }
936*5697Smcpowers 
937*5697Smcpowers 
938*5697Smcpowers /* Reduces an integer to a field element. */
939*5697Smcpowers mp_err
ec_GFp_mod(const mp_int * a,mp_int * r,const GFMethod * meth)940*5697Smcpowers ec_GFp_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
941*5697Smcpowers {
942*5697Smcpowers 	return mp_mod(a, &meth->irr, r);
943*5697Smcpowers }
944*5697Smcpowers 
945*5697Smcpowers /* Multiplies two field elements. */
946*5697Smcpowers mp_err
ec_GFp_mul(const mp_int * a,const mp_int * b,mp_int * r,const GFMethod * meth)947*5697Smcpowers ec_GFp_mul(const mp_int *a, const mp_int *b, mp_int *r,
948*5697Smcpowers 		   const GFMethod *meth)
949*5697Smcpowers {
950*5697Smcpowers 	return mp_mulmod(a, b, &meth->irr, r);
951*5697Smcpowers }
952*5697Smcpowers 
953*5697Smcpowers /* Squares a field element. */
954*5697Smcpowers mp_err
ec_GFp_sqr(const mp_int * a,mp_int * r,const GFMethod * meth)955*5697Smcpowers ec_GFp_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
956*5697Smcpowers {
957*5697Smcpowers 	return mp_sqrmod(a, &meth->irr, r);
958*5697Smcpowers }
959*5697Smcpowers 
960*5697Smcpowers /* Divides two field elements. If a is NULL, then returns the inverse of
961*5697Smcpowers  * b. */
962*5697Smcpowers mp_err
ec_GFp_div(const mp_int * a,const mp_int * b,mp_int * r,const GFMethod * meth)963*5697Smcpowers ec_GFp_div(const mp_int *a, const mp_int *b, mp_int *r,
964*5697Smcpowers 		   const GFMethod *meth)
965*5697Smcpowers {
966*5697Smcpowers 	mp_err res = MP_OKAY;
967*5697Smcpowers 	mp_int t;
968*5697Smcpowers 
969*5697Smcpowers 	/* If a is NULL, then return the inverse of b, otherwise return a/b. */
970*5697Smcpowers 	if (a == NULL) {
971*5697Smcpowers 		return mp_invmod(b, &meth->irr, r);
972*5697Smcpowers 	} else {
973*5697Smcpowers 		/* MPI doesn't support divmod, so we implement it using invmod and
974*5697Smcpowers 		 * mulmod. */
975*5697Smcpowers 		MP_CHECKOK(mp_init(&t, FLAG(b)));
976*5697Smcpowers 		MP_CHECKOK(mp_invmod(b, &meth->irr, &t));
977*5697Smcpowers 		MP_CHECKOK(mp_mulmod(a, &t, &meth->irr, r));
978*5697Smcpowers 	  CLEANUP:
979*5697Smcpowers 		mp_clear(&t);
980*5697Smcpowers 		return res;
981*5697Smcpowers 	}
982*5697Smcpowers }
983*5697Smcpowers 
984*5697Smcpowers /* Wrapper functions for generic binary polynomial field arithmetic. */
985*5697Smcpowers 
986*5697Smcpowers /* Adds two field elements. */
987*5697Smcpowers mp_err
ec_GF2m_add(const mp_int * a,const mp_int * b,mp_int * r,const GFMethod * meth)988*5697Smcpowers ec_GF2m_add(const mp_int *a, const mp_int *b, mp_int *r,
989*5697Smcpowers 			const GFMethod *meth)
990*5697Smcpowers {
991*5697Smcpowers 	return mp_badd(a, b, r);
992*5697Smcpowers }
993*5697Smcpowers 
994*5697Smcpowers /* Negates a field element. Note that for binary polynomial fields, the
995*5697Smcpowers  * negation of a field element is the field element itself. */
996*5697Smcpowers mp_err
ec_GF2m_neg(const mp_int * a,mp_int * r,const GFMethod * meth)997*5697Smcpowers ec_GF2m_neg(const mp_int *a, mp_int *r, const GFMethod *meth)
998*5697Smcpowers {
999*5697Smcpowers 	if (a == r) {
1000*5697Smcpowers 		return MP_OKAY;
1001*5697Smcpowers 	} else {
1002*5697Smcpowers 		return mp_copy(a, r);
1003*5697Smcpowers 	}
1004*5697Smcpowers }
1005*5697Smcpowers 
1006*5697Smcpowers /* Reduces a binary polynomial to a field element. */
1007*5697Smcpowers mp_err
ec_GF2m_mod(const mp_int * a,mp_int * r,const GFMethod * meth)1008*5697Smcpowers ec_GF2m_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
1009*5697Smcpowers {
1010*5697Smcpowers 	return mp_bmod(a, meth->irr_arr, r);
1011*5697Smcpowers }
1012*5697Smcpowers 
1013*5697Smcpowers /* Multiplies two field elements. */
1014*5697Smcpowers mp_err
ec_GF2m_mul(const mp_int * a,const mp_int * b,mp_int * r,const GFMethod * meth)1015*5697Smcpowers ec_GF2m_mul(const mp_int *a, const mp_int *b, mp_int *r,
1016*5697Smcpowers 			const GFMethod *meth)
1017*5697Smcpowers {
1018*5697Smcpowers 	return mp_bmulmod(a, b, meth->irr_arr, r);
1019*5697Smcpowers }
1020*5697Smcpowers 
1021*5697Smcpowers /* Squares a field element. */
1022*5697Smcpowers mp_err
ec_GF2m_sqr(const mp_int * a,mp_int * r,const GFMethod * meth)1023*5697Smcpowers ec_GF2m_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
1024*5697Smcpowers {
1025*5697Smcpowers 	return mp_bsqrmod(a, meth->irr_arr, r);
1026*5697Smcpowers }
1027*5697Smcpowers 
1028*5697Smcpowers /* Divides two field elements. If a is NULL, then returns the inverse of
1029*5697Smcpowers  * b. */
1030*5697Smcpowers mp_err
ec_GF2m_div(const mp_int * a,const mp_int * b,mp_int * r,const GFMethod * meth)1031*5697Smcpowers ec_GF2m_div(const mp_int *a, const mp_int *b, mp_int *r,
1032*5697Smcpowers 			const GFMethod *meth)
1033*5697Smcpowers {
1034*5697Smcpowers 	mp_err res = MP_OKAY;
1035*5697Smcpowers 	mp_int t;
1036*5697Smcpowers 
1037*5697Smcpowers 	/* If a is NULL, then return the inverse of b, otherwise return a/b. */
1038*5697Smcpowers 	if (a == NULL) {
1039*5697Smcpowers 		/* The GF(2^m) portion of MPI doesn't support invmod, so we
1040*5697Smcpowers 		 * compute 1/b. */
1041*5697Smcpowers 		MP_CHECKOK(mp_init(&t, FLAG(b)));
1042*5697Smcpowers 		MP_CHECKOK(mp_set_int(&t, 1));
1043*5697Smcpowers 		MP_CHECKOK(mp_bdivmod(&t, b, &meth->irr, meth->irr_arr, r));
1044*5697Smcpowers 	  CLEANUP:
1045*5697Smcpowers 		mp_clear(&t);
1046*5697Smcpowers 		return res;
1047*5697Smcpowers 	} else {
1048*5697Smcpowers 		return mp_bdivmod(a, b, &meth->irr, meth->irr_arr, r);
1049*5697Smcpowers 	}
1050*5697Smcpowers }
1051