1*5697Smcpowers /*
2*5697Smcpowers * ***** BEGIN LICENSE BLOCK *****
3*5697Smcpowers * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4*5697Smcpowers *
5*5697Smcpowers * The contents of this file are subject to the Mozilla Public License Version
6*5697Smcpowers * 1.1 (the "License"); you may not use this file except in compliance with
7*5697Smcpowers * the License. You may obtain a copy of the License at
8*5697Smcpowers * http://www.mozilla.org/MPL/
9*5697Smcpowers *
10*5697Smcpowers * Software distributed under the License is distributed on an "AS IS" basis,
11*5697Smcpowers * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12*5697Smcpowers * for the specific language governing rights and limitations under the
13*5697Smcpowers * License.
14*5697Smcpowers *
15*5697Smcpowers * The Original Code is the elliptic curve math library for prime field curves.
16*5697Smcpowers *
17*5697Smcpowers * The Initial Developer of the Original Code is
18*5697Smcpowers * Sun Microsystems, Inc.
19*5697Smcpowers * Portions created by the Initial Developer are Copyright (C) 2003
20*5697Smcpowers * the Initial Developer. All Rights Reserved.
21*5697Smcpowers *
22*5697Smcpowers * Contributor(s):
23*5697Smcpowers * Douglas Stebila <douglas@stebila.ca>, Sun Microsystems Laboratories
24*5697Smcpowers *
25*5697Smcpowers * Alternatively, the contents of this file may be used under the terms of
26*5697Smcpowers * either the GNU General Public License Version 2 or later (the "GPL"), or
27*5697Smcpowers * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28*5697Smcpowers * in which case the provisions of the GPL or the LGPL are applicable instead
29*5697Smcpowers * of those above. If you wish to allow use of your version of this file only
30*5697Smcpowers * under the terms of either the GPL or the LGPL, and not to allow others to
31*5697Smcpowers * use your version of this file under the terms of the MPL, indicate your
32*5697Smcpowers * decision by deleting the provisions above and replace them with the notice
33*5697Smcpowers * and other provisions required by the GPL or the LGPL. If you do not delete
34*5697Smcpowers * the provisions above, a recipient may use your version of this file under
35*5697Smcpowers * the terms of any one of the MPL, the GPL or the LGPL.
36*5697Smcpowers *
37*5697Smcpowers * ***** END LICENSE BLOCK ***** */
38*5697Smcpowers /*
39*5697Smcpowers * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
40*5697Smcpowers * Use is subject to license terms.
41*5697Smcpowers *
42*5697Smcpowers * Sun elects to use this software under the MPL license.
43*5697Smcpowers */
44*5697Smcpowers
45*5697Smcpowers #pragma ident "%Z%%M% %I% %E% SMI"
46*5697Smcpowers
47*5697Smcpowers #include "ecp.h"
48*5697Smcpowers #include "mpi.h"
49*5697Smcpowers #include "mplogic.h"
50*5697Smcpowers #include "mpi-priv.h"
51*5697Smcpowers #ifndef _KERNEL
52*5697Smcpowers #include <stdlib.h>
53*5697Smcpowers #endif
54*5697Smcpowers
55*5697Smcpowers #define ECP192_DIGITS ECL_CURVE_DIGITS(192)
56*5697Smcpowers
57*5697Smcpowers /* Fast modular reduction for p192 = 2^192 - 2^64 - 1. a can be r. Uses
58*5697Smcpowers * algorithm 7 from Brown, Hankerson, Lopez, Menezes. Software
59*5697Smcpowers * Implementation of the NIST Elliptic Curves over Prime Fields. */
60*5697Smcpowers mp_err
ec_GFp_nistp192_mod(const mp_int * a,mp_int * r,const GFMethod * meth)61*5697Smcpowers ec_GFp_nistp192_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
62*5697Smcpowers {
63*5697Smcpowers mp_err res = MP_OKAY;
64*5697Smcpowers mp_size a_used = MP_USED(a);
65*5697Smcpowers mp_digit r3;
66*5697Smcpowers #ifndef MPI_AMD64_ADD
67*5697Smcpowers mp_digit carry;
68*5697Smcpowers #endif
69*5697Smcpowers #ifdef ECL_THIRTY_TWO_BIT
70*5697Smcpowers mp_digit a5a = 0, a5b = 0, a4a = 0, a4b = 0, a3a = 0, a3b = 0;
71*5697Smcpowers mp_digit r0a, r0b, r1a, r1b, r2a, r2b;
72*5697Smcpowers #else
73*5697Smcpowers mp_digit a5 = 0, a4 = 0, a3 = 0;
74*5697Smcpowers mp_digit r0, r1, r2;
75*5697Smcpowers #endif
76*5697Smcpowers
77*5697Smcpowers /* reduction not needed if a is not larger than field size */
78*5697Smcpowers if (a_used < ECP192_DIGITS) {
79*5697Smcpowers if (a == r) {
80*5697Smcpowers return MP_OKAY;
81*5697Smcpowers }
82*5697Smcpowers return mp_copy(a, r);
83*5697Smcpowers }
84*5697Smcpowers
85*5697Smcpowers /* for polynomials larger than twice the field size, use regular
86*5697Smcpowers * reduction */
87*5697Smcpowers if (a_used > ECP192_DIGITS*2) {
88*5697Smcpowers MP_CHECKOK(mp_mod(a, &meth->irr, r));
89*5697Smcpowers } else {
90*5697Smcpowers /* copy out upper words of a */
91*5697Smcpowers
92*5697Smcpowers #ifdef ECL_THIRTY_TWO_BIT
93*5697Smcpowers
94*5697Smcpowers /* in all the math below,
95*5697Smcpowers * nXb is most signifiant, nXa is least significant */
96*5697Smcpowers switch (a_used) {
97*5697Smcpowers case 12:
98*5697Smcpowers a5b = MP_DIGIT(a, 11);
99*5697Smcpowers case 11:
100*5697Smcpowers a5a = MP_DIGIT(a, 10);
101*5697Smcpowers case 10:
102*5697Smcpowers a4b = MP_DIGIT(a, 9);
103*5697Smcpowers case 9:
104*5697Smcpowers a4a = MP_DIGIT(a, 8);
105*5697Smcpowers case 8:
106*5697Smcpowers a3b = MP_DIGIT(a, 7);
107*5697Smcpowers case 7:
108*5697Smcpowers a3a = MP_DIGIT(a, 6);
109*5697Smcpowers }
110*5697Smcpowers
111*5697Smcpowers
112*5697Smcpowers r2b= MP_DIGIT(a, 5);
113*5697Smcpowers r2a= MP_DIGIT(a, 4);
114*5697Smcpowers r1b = MP_DIGIT(a, 3);
115*5697Smcpowers r1a = MP_DIGIT(a, 2);
116*5697Smcpowers r0b = MP_DIGIT(a, 1);
117*5697Smcpowers r0a = MP_DIGIT(a, 0);
118*5697Smcpowers
119*5697Smcpowers /* implement r = (a2,a1,a0)+(a5,a5,a5)+(a4,a4,0)+(0,a3,a3) */
120*5697Smcpowers MP_ADD_CARRY(r0a, a3a, r0a, 0, carry);
121*5697Smcpowers MP_ADD_CARRY(r0b, a3b, r0b, carry, carry);
122*5697Smcpowers MP_ADD_CARRY(r1a, a3a, r1a, carry, carry);
123*5697Smcpowers MP_ADD_CARRY(r1b, a3b, r1b, carry, carry);
124*5697Smcpowers MP_ADD_CARRY(r2a, a4a, r2a, carry, carry);
125*5697Smcpowers MP_ADD_CARRY(r2b, a4b, r2b, carry, carry);
126*5697Smcpowers r3 = carry; carry = 0;
127*5697Smcpowers MP_ADD_CARRY(r0a, a5a, r0a, 0, carry);
128*5697Smcpowers MP_ADD_CARRY(r0b, a5b, r0b, carry, carry);
129*5697Smcpowers MP_ADD_CARRY(r1a, a5a, r1a, carry, carry);
130*5697Smcpowers MP_ADD_CARRY(r1b, a5b, r1b, carry, carry);
131*5697Smcpowers MP_ADD_CARRY(r2a, a5a, r2a, carry, carry);
132*5697Smcpowers MP_ADD_CARRY(r2b, a5b, r2b, carry, carry);
133*5697Smcpowers r3 += carry;
134*5697Smcpowers MP_ADD_CARRY(r1a, a4a, r1a, 0, carry);
135*5697Smcpowers MP_ADD_CARRY(r1b, a4b, r1b, carry, carry);
136*5697Smcpowers MP_ADD_CARRY(r2a, 0, r2a, carry, carry);
137*5697Smcpowers MP_ADD_CARRY(r2b, 0, r2b, carry, carry);
138*5697Smcpowers r3 += carry;
139*5697Smcpowers
140*5697Smcpowers /* reduce out the carry */
141*5697Smcpowers while (r3) {
142*5697Smcpowers MP_ADD_CARRY(r0a, r3, r0a, 0, carry);
143*5697Smcpowers MP_ADD_CARRY(r0b, 0, r0b, carry, carry);
144*5697Smcpowers MP_ADD_CARRY(r1a, r3, r1a, carry, carry);
145*5697Smcpowers MP_ADD_CARRY(r1b, 0, r1b, carry, carry);
146*5697Smcpowers MP_ADD_CARRY(r2a, 0, r2a, carry, carry);
147*5697Smcpowers MP_ADD_CARRY(r2b, 0, r2b, carry, carry);
148*5697Smcpowers r3 = carry;
149*5697Smcpowers }
150*5697Smcpowers
151*5697Smcpowers /* check for final reduction */
152*5697Smcpowers /*
153*5697Smcpowers * our field is 0xffffffffffffffff, 0xfffffffffffffffe,
154*5697Smcpowers * 0xffffffffffffffff. That means we can only be over and need
155*5697Smcpowers * one more reduction
156*5697Smcpowers * if r2 == 0xffffffffffffffffff (same as r2+1 == 0)
157*5697Smcpowers * and
158*5697Smcpowers * r1 == 0xffffffffffffffffff or
159*5697Smcpowers * r1 == 0xfffffffffffffffffe and r0 = 0xfffffffffffffffff
160*5697Smcpowers * In all cases, we subtract the field (or add the 2's
161*5697Smcpowers * complement value (1,1,0)). (r0, r1, r2)
162*5697Smcpowers */
163*5697Smcpowers if (((r2b == 0xffffffff) && (r2a == 0xffffffff)
164*5697Smcpowers && (r1b == 0xffffffff) ) &&
165*5697Smcpowers ((r1a == 0xffffffff) ||
166*5697Smcpowers (r1a == 0xfffffffe) && (r0a == 0xffffffff) &&
167*5697Smcpowers (r0b == 0xffffffff)) ) {
168*5697Smcpowers /* do a quick subtract */
169*5697Smcpowers MP_ADD_CARRY(r0a, 1, r0a, 0, carry);
170*5697Smcpowers r0b += carry;
171*5697Smcpowers r1a = r1b = r2a = r2b = 0;
172*5697Smcpowers }
173*5697Smcpowers
174*5697Smcpowers /* set the lower words of r */
175*5697Smcpowers if (a != r) {
176*5697Smcpowers MP_CHECKOK(s_mp_pad(r, 6));
177*5697Smcpowers }
178*5697Smcpowers MP_DIGIT(r, 5) = r2b;
179*5697Smcpowers MP_DIGIT(r, 4) = r2a;
180*5697Smcpowers MP_DIGIT(r, 3) = r1b;
181*5697Smcpowers MP_DIGIT(r, 2) = r1a;
182*5697Smcpowers MP_DIGIT(r, 1) = r0b;
183*5697Smcpowers MP_DIGIT(r, 0) = r0a;
184*5697Smcpowers MP_USED(r) = 6;
185*5697Smcpowers #else
186*5697Smcpowers switch (a_used) {
187*5697Smcpowers case 6:
188*5697Smcpowers a5 = MP_DIGIT(a, 5);
189*5697Smcpowers case 5:
190*5697Smcpowers a4 = MP_DIGIT(a, 4);
191*5697Smcpowers case 4:
192*5697Smcpowers a3 = MP_DIGIT(a, 3);
193*5697Smcpowers }
194*5697Smcpowers
195*5697Smcpowers r2 = MP_DIGIT(a, 2);
196*5697Smcpowers r1 = MP_DIGIT(a, 1);
197*5697Smcpowers r0 = MP_DIGIT(a, 0);
198*5697Smcpowers
199*5697Smcpowers /* implement r = (a2,a1,a0)+(a5,a5,a5)+(a4,a4,0)+(0,a3,a3) */
200*5697Smcpowers #ifndef MPI_AMD64_ADD
201*5697Smcpowers MP_ADD_CARRY(r0, a3, r0, 0, carry);
202*5697Smcpowers MP_ADD_CARRY(r1, a3, r1, carry, carry);
203*5697Smcpowers MP_ADD_CARRY(r2, a4, r2, carry, carry);
204*5697Smcpowers r3 = carry;
205*5697Smcpowers MP_ADD_CARRY(r0, a5, r0, 0, carry);
206*5697Smcpowers MP_ADD_CARRY(r1, a5, r1, carry, carry);
207*5697Smcpowers MP_ADD_CARRY(r2, a5, r2, carry, carry);
208*5697Smcpowers r3 += carry;
209*5697Smcpowers MP_ADD_CARRY(r1, a4, r1, 0, carry);
210*5697Smcpowers MP_ADD_CARRY(r2, 0, r2, carry, carry);
211*5697Smcpowers r3 += carry;
212*5697Smcpowers
213*5697Smcpowers #else
214*5697Smcpowers r2 = MP_DIGIT(a, 2);
215*5697Smcpowers r1 = MP_DIGIT(a, 1);
216*5697Smcpowers r0 = MP_DIGIT(a, 0);
217*5697Smcpowers
218*5697Smcpowers /* set the lower words of r */
219*5697Smcpowers __asm__ (
220*5697Smcpowers "xorq %3,%3 \n\t"
221*5697Smcpowers "addq %4,%0 \n\t"
222*5697Smcpowers "adcq %4,%1 \n\t"
223*5697Smcpowers "adcq %5,%2 \n\t"
224*5697Smcpowers "adcq $0,%3 \n\t"
225*5697Smcpowers "addq %6,%0 \n\t"
226*5697Smcpowers "adcq %6,%1 \n\t"
227*5697Smcpowers "adcq %6,%2 \n\t"
228*5697Smcpowers "adcq $0,%3 \n\t"
229*5697Smcpowers "addq %5,%1 \n\t"
230*5697Smcpowers "adcq $0,%2 \n\t"
231*5697Smcpowers "adcq $0,%3 \n\t"
232*5697Smcpowers : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(a3),
233*5697Smcpowers "=r"(a4), "=r"(a5)
234*5697Smcpowers : "0" (r0), "1" (r1), "2" (r2), "3" (r3),
235*5697Smcpowers "4" (a3), "5" (a4), "6"(a5)
236*5697Smcpowers : "%cc" );
237*5697Smcpowers #endif
238*5697Smcpowers
239*5697Smcpowers /* reduce out the carry */
240*5697Smcpowers while (r3) {
241*5697Smcpowers #ifndef MPI_AMD64_ADD
242*5697Smcpowers MP_ADD_CARRY(r0, r3, r0, 0, carry);
243*5697Smcpowers MP_ADD_CARRY(r1, r3, r1, carry, carry);
244*5697Smcpowers MP_ADD_CARRY(r2, 0, r2, carry, carry);
245*5697Smcpowers r3 = carry;
246*5697Smcpowers #else
247*5697Smcpowers a3=r3;
248*5697Smcpowers __asm__ (
249*5697Smcpowers "xorq %3,%3 \n\t"
250*5697Smcpowers "addq %4,%0 \n\t"
251*5697Smcpowers "adcq %4,%1 \n\t"
252*5697Smcpowers "adcq $0,%2 \n\t"
253*5697Smcpowers "adcq $0,%3 \n\t"
254*5697Smcpowers : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(a3)
255*5697Smcpowers : "0" (r0), "1" (r1), "2" (r2), "3" (r3), "4"(a3)
256*5697Smcpowers : "%cc" );
257*5697Smcpowers #endif
258*5697Smcpowers }
259*5697Smcpowers
260*5697Smcpowers /* check for final reduction */
261*5697Smcpowers /*
262*5697Smcpowers * our field is 0xffffffffffffffff, 0xfffffffffffffffe,
263*5697Smcpowers * 0xffffffffffffffff. That means we can only be over and need
264*5697Smcpowers * one more reduction
265*5697Smcpowers * if r2 == 0xffffffffffffffffff (same as r2+1 == 0)
266*5697Smcpowers * and
267*5697Smcpowers * r1 == 0xffffffffffffffffff or
268*5697Smcpowers * r1 == 0xfffffffffffffffffe and r0 = 0xfffffffffffffffff
269*5697Smcpowers * In all cases, we subtract the field (or add the 2's
270*5697Smcpowers * complement value (1,1,0)). (r0, r1, r2)
271*5697Smcpowers */
272*5697Smcpowers if (r3 || ((r2 == MP_DIGIT_MAX) &&
273*5697Smcpowers ((r1 == MP_DIGIT_MAX) ||
274*5697Smcpowers ((r1 == (MP_DIGIT_MAX-1)) && (r0 == MP_DIGIT_MAX))))) {
275*5697Smcpowers /* do a quick subtract */
276*5697Smcpowers r0++;
277*5697Smcpowers r1 = r2 = 0;
278*5697Smcpowers }
279*5697Smcpowers /* set the lower words of r */
280*5697Smcpowers if (a != r) {
281*5697Smcpowers MP_CHECKOK(s_mp_pad(r, 3));
282*5697Smcpowers }
283*5697Smcpowers MP_DIGIT(r, 2) = r2;
284*5697Smcpowers MP_DIGIT(r, 1) = r1;
285*5697Smcpowers MP_DIGIT(r, 0) = r0;
286*5697Smcpowers MP_USED(r) = 3;
287*5697Smcpowers #endif
288*5697Smcpowers }
289*5697Smcpowers
290*5697Smcpowers CLEANUP:
291*5697Smcpowers return res;
292*5697Smcpowers }
293*5697Smcpowers
294*5697Smcpowers #ifndef ECL_THIRTY_TWO_BIT
295*5697Smcpowers /* Compute the sum of 192 bit curves. Do the work in-line since the
296*5697Smcpowers * number of words are so small, we don't want to overhead of mp function
297*5697Smcpowers * calls. Uses optimized modular reduction for p192.
298*5697Smcpowers */
299*5697Smcpowers mp_err
ec_GFp_nistp192_add(const mp_int * a,const mp_int * b,mp_int * r,const GFMethod * meth)300*5697Smcpowers ec_GFp_nistp192_add(const mp_int *a, const mp_int *b, mp_int *r,
301*5697Smcpowers const GFMethod *meth)
302*5697Smcpowers {
303*5697Smcpowers mp_err res = MP_OKAY;
304*5697Smcpowers mp_digit a0 = 0, a1 = 0, a2 = 0;
305*5697Smcpowers mp_digit r0 = 0, r1 = 0, r2 = 0;
306*5697Smcpowers mp_digit carry;
307*5697Smcpowers
308*5697Smcpowers switch(MP_USED(a)) {
309*5697Smcpowers case 3:
310*5697Smcpowers a2 = MP_DIGIT(a,2);
311*5697Smcpowers case 2:
312*5697Smcpowers a1 = MP_DIGIT(a,1);
313*5697Smcpowers case 1:
314*5697Smcpowers a0 = MP_DIGIT(a,0);
315*5697Smcpowers }
316*5697Smcpowers switch(MP_USED(b)) {
317*5697Smcpowers case 3:
318*5697Smcpowers r2 = MP_DIGIT(b,2);
319*5697Smcpowers case 2:
320*5697Smcpowers r1 = MP_DIGIT(b,1);
321*5697Smcpowers case 1:
322*5697Smcpowers r0 = MP_DIGIT(b,0);
323*5697Smcpowers }
324*5697Smcpowers
325*5697Smcpowers #ifndef MPI_AMD64_ADD
326*5697Smcpowers MP_ADD_CARRY(a0, r0, r0, 0, carry);
327*5697Smcpowers MP_ADD_CARRY(a1, r1, r1, carry, carry);
328*5697Smcpowers MP_ADD_CARRY(a2, r2, r2, carry, carry);
329*5697Smcpowers #else
330*5697Smcpowers __asm__ (
331*5697Smcpowers "xorq %3,%3 \n\t"
332*5697Smcpowers "addq %4,%0 \n\t"
333*5697Smcpowers "adcq %5,%1 \n\t"
334*5697Smcpowers "adcq %6,%2 \n\t"
335*5697Smcpowers "adcq $0,%3 \n\t"
336*5697Smcpowers : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(carry)
337*5697Smcpowers : "r" (a0), "r" (a1), "r" (a2), "0" (r0),
338*5697Smcpowers "1" (r1), "2" (r2)
339*5697Smcpowers : "%cc" );
340*5697Smcpowers #endif
341*5697Smcpowers
342*5697Smcpowers /* Do quick 'subract' if we've gone over
343*5697Smcpowers * (add the 2's complement of the curve field) */
344*5697Smcpowers if (carry || ((r2 == MP_DIGIT_MAX) &&
345*5697Smcpowers ((r1 == MP_DIGIT_MAX) ||
346*5697Smcpowers ((r1 == (MP_DIGIT_MAX-1)) && (r0 == MP_DIGIT_MAX))))) {
347*5697Smcpowers #ifndef MPI_AMD64_ADD
348*5697Smcpowers MP_ADD_CARRY(r0, 1, r0, 0, carry);
349*5697Smcpowers MP_ADD_CARRY(r1, 1, r1, carry, carry);
350*5697Smcpowers MP_ADD_CARRY(r2, 0, r2, carry, carry);
351*5697Smcpowers #else
352*5697Smcpowers __asm__ (
353*5697Smcpowers "addq $1,%0 \n\t"
354*5697Smcpowers "adcq $1,%1 \n\t"
355*5697Smcpowers "adcq $0,%2 \n\t"
356*5697Smcpowers : "=r"(r0), "=r"(r1), "=r"(r2)
357*5697Smcpowers : "0" (r0), "1" (r1), "2" (r2)
358*5697Smcpowers : "%cc" );
359*5697Smcpowers #endif
360*5697Smcpowers }
361*5697Smcpowers
362*5697Smcpowers
363*5697Smcpowers MP_CHECKOK(s_mp_pad(r, 3));
364*5697Smcpowers MP_DIGIT(r, 2) = r2;
365*5697Smcpowers MP_DIGIT(r, 1) = r1;
366*5697Smcpowers MP_DIGIT(r, 0) = r0;
367*5697Smcpowers MP_SIGN(r) = MP_ZPOS;
368*5697Smcpowers MP_USED(r) = 3;
369*5697Smcpowers s_mp_clamp(r);
370*5697Smcpowers
371*5697Smcpowers
372*5697Smcpowers CLEANUP:
373*5697Smcpowers return res;
374*5697Smcpowers }
375*5697Smcpowers
376*5697Smcpowers /* Compute the diff of 192 bit curves. Do the work in-line since the
377*5697Smcpowers * number of words are so small, we don't want to overhead of mp function
378*5697Smcpowers * calls. Uses optimized modular reduction for p192.
379*5697Smcpowers */
380*5697Smcpowers mp_err
ec_GFp_nistp192_sub(const mp_int * a,const mp_int * b,mp_int * r,const GFMethod * meth)381*5697Smcpowers ec_GFp_nistp192_sub(const mp_int *a, const mp_int *b, mp_int *r,
382*5697Smcpowers const GFMethod *meth)
383*5697Smcpowers {
384*5697Smcpowers mp_err res = MP_OKAY;
385*5697Smcpowers mp_digit b0 = 0, b1 = 0, b2 = 0;
386*5697Smcpowers mp_digit r0 = 0, r1 = 0, r2 = 0;
387*5697Smcpowers mp_digit borrow;
388*5697Smcpowers
389*5697Smcpowers switch(MP_USED(a)) {
390*5697Smcpowers case 3:
391*5697Smcpowers r2 = MP_DIGIT(a,2);
392*5697Smcpowers case 2:
393*5697Smcpowers r1 = MP_DIGIT(a,1);
394*5697Smcpowers case 1:
395*5697Smcpowers r0 = MP_DIGIT(a,0);
396*5697Smcpowers }
397*5697Smcpowers
398*5697Smcpowers switch(MP_USED(b)) {
399*5697Smcpowers case 3:
400*5697Smcpowers b2 = MP_DIGIT(b,2);
401*5697Smcpowers case 2:
402*5697Smcpowers b1 = MP_DIGIT(b,1);
403*5697Smcpowers case 1:
404*5697Smcpowers b0 = MP_DIGIT(b,0);
405*5697Smcpowers }
406*5697Smcpowers
407*5697Smcpowers #ifndef MPI_AMD64_ADD
408*5697Smcpowers MP_SUB_BORROW(r0, b0, r0, 0, borrow);
409*5697Smcpowers MP_SUB_BORROW(r1, b1, r1, borrow, borrow);
410*5697Smcpowers MP_SUB_BORROW(r2, b2, r2, borrow, borrow);
411*5697Smcpowers #else
412*5697Smcpowers __asm__ (
413*5697Smcpowers "xorq %3,%3 \n\t"
414*5697Smcpowers "subq %4,%0 \n\t"
415*5697Smcpowers "sbbq %5,%1 \n\t"
416*5697Smcpowers "sbbq %6,%2 \n\t"
417*5697Smcpowers "adcq $0,%3 \n\t"
418*5697Smcpowers : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(borrow)
419*5697Smcpowers : "r" (b0), "r" (b1), "r" (b2), "0" (r0),
420*5697Smcpowers "1" (r1), "2" (r2)
421*5697Smcpowers : "%cc" );
422*5697Smcpowers #endif
423*5697Smcpowers
424*5697Smcpowers /* Do quick 'add' if we've gone under 0
425*5697Smcpowers * (subtract the 2's complement of the curve field) */
426*5697Smcpowers if (borrow) {
427*5697Smcpowers #ifndef MPI_AMD64_ADD
428*5697Smcpowers MP_SUB_BORROW(r0, 1, r0, 0, borrow);
429*5697Smcpowers MP_SUB_BORROW(r1, 1, r1, borrow, borrow);
430*5697Smcpowers MP_SUB_BORROW(r2, 0, r2, borrow, borrow);
431*5697Smcpowers #else
432*5697Smcpowers __asm__ (
433*5697Smcpowers "subq $1,%0 \n\t"
434*5697Smcpowers "sbbq $1,%1 \n\t"
435*5697Smcpowers "sbbq $0,%2 \n\t"
436*5697Smcpowers : "=r"(r0), "=r"(r1), "=r"(r2)
437*5697Smcpowers : "0" (r0), "1" (r1), "2" (r2)
438*5697Smcpowers : "%cc" );
439*5697Smcpowers #endif
440*5697Smcpowers }
441*5697Smcpowers
442*5697Smcpowers MP_CHECKOK(s_mp_pad(r, 3));
443*5697Smcpowers MP_DIGIT(r, 2) = r2;
444*5697Smcpowers MP_DIGIT(r, 1) = r1;
445*5697Smcpowers MP_DIGIT(r, 0) = r0;
446*5697Smcpowers MP_SIGN(r) = MP_ZPOS;
447*5697Smcpowers MP_USED(r) = 3;
448*5697Smcpowers s_mp_clamp(r);
449*5697Smcpowers
450*5697Smcpowers CLEANUP:
451*5697Smcpowers return res;
452*5697Smcpowers }
453*5697Smcpowers
454*5697Smcpowers #endif
455*5697Smcpowers
456*5697Smcpowers /* Compute the square of polynomial a, reduce modulo p192. Store the
457*5697Smcpowers * result in r. r could be a. Uses optimized modular reduction for p192.
458*5697Smcpowers */
459*5697Smcpowers mp_err
ec_GFp_nistp192_sqr(const mp_int * a,mp_int * r,const GFMethod * meth)460*5697Smcpowers ec_GFp_nistp192_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
461*5697Smcpowers {
462*5697Smcpowers mp_err res = MP_OKAY;
463*5697Smcpowers
464*5697Smcpowers MP_CHECKOK(mp_sqr(a, r));
465*5697Smcpowers MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth));
466*5697Smcpowers CLEANUP:
467*5697Smcpowers return res;
468*5697Smcpowers }
469*5697Smcpowers
470*5697Smcpowers /* Compute the product of two polynomials a and b, reduce modulo p192.
471*5697Smcpowers * Store the result in r. r could be a or b; a could be b. Uses
472*5697Smcpowers * optimized modular reduction for p192. */
473*5697Smcpowers mp_err
ec_GFp_nistp192_mul(const mp_int * a,const mp_int * b,mp_int * r,const GFMethod * meth)474*5697Smcpowers ec_GFp_nistp192_mul(const mp_int *a, const mp_int *b, mp_int *r,
475*5697Smcpowers const GFMethod *meth)
476*5697Smcpowers {
477*5697Smcpowers mp_err res = MP_OKAY;
478*5697Smcpowers
479*5697Smcpowers MP_CHECKOK(mp_mul(a, b, r));
480*5697Smcpowers MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth));
481*5697Smcpowers CLEANUP:
482*5697Smcpowers return res;
483*5697Smcpowers }
484*5697Smcpowers
485*5697Smcpowers /* Divides two field elements. If a is NULL, then returns the inverse of
486*5697Smcpowers * b. */
487*5697Smcpowers mp_err
ec_GFp_nistp192_div(const mp_int * a,const mp_int * b,mp_int * r,const GFMethod * meth)488*5697Smcpowers ec_GFp_nistp192_div(const mp_int *a, const mp_int *b, mp_int *r,
489*5697Smcpowers const GFMethod *meth)
490*5697Smcpowers {
491*5697Smcpowers mp_err res = MP_OKAY;
492*5697Smcpowers mp_int t;
493*5697Smcpowers
494*5697Smcpowers /* If a is NULL, then return the inverse of b, otherwise return a/b. */
495*5697Smcpowers if (a == NULL) {
496*5697Smcpowers return mp_invmod(b, &meth->irr, r);
497*5697Smcpowers } else {
498*5697Smcpowers /* MPI doesn't support divmod, so we implement it using invmod and
499*5697Smcpowers * mulmod. */
500*5697Smcpowers MP_CHECKOK(mp_init(&t, FLAG(b)));
501*5697Smcpowers MP_CHECKOK(mp_invmod(b, &meth->irr, &t));
502*5697Smcpowers MP_CHECKOK(mp_mul(a, &t, r));
503*5697Smcpowers MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth));
504*5697Smcpowers CLEANUP:
505*5697Smcpowers mp_clear(&t);
506*5697Smcpowers return res;
507*5697Smcpowers }
508*5697Smcpowers }
509*5697Smcpowers
510*5697Smcpowers /* Wire in fast field arithmetic and precomputation of base point for
511*5697Smcpowers * named curves. */
512*5697Smcpowers mp_err
ec_group_set_gfp192(ECGroup * group,ECCurveName name)513*5697Smcpowers ec_group_set_gfp192(ECGroup *group, ECCurveName name)
514*5697Smcpowers {
515*5697Smcpowers if (name == ECCurve_NIST_P192) {
516*5697Smcpowers group->meth->field_mod = &ec_GFp_nistp192_mod;
517*5697Smcpowers group->meth->field_mul = &ec_GFp_nistp192_mul;
518*5697Smcpowers group->meth->field_sqr = &ec_GFp_nistp192_sqr;
519*5697Smcpowers group->meth->field_div = &ec_GFp_nistp192_div;
520*5697Smcpowers #ifndef ECL_THIRTY_TWO_BIT
521*5697Smcpowers group->meth->field_add = &ec_GFp_nistp192_add;
522*5697Smcpowers group->meth->field_sub = &ec_GFp_nistp192_sub;
523*5697Smcpowers #endif
524*5697Smcpowers }
525*5697Smcpowers return MP_OKAY;
526*5697Smcpowers }
527