xref: /openbsd-src/sbin/iked/smult_curve25519_ref.c (revision 45135ebc470e964668b4bd6a6d38f0ebdab5c216)
1*45135ebcSreyk /* $OpenBSD: smult_curve25519_ref.c,v 1.1 2014/08/27 10:28:57 reyk Exp $ */
2*45135ebcSreyk /*
3*45135ebcSreyk version 20081011
4*45135ebcSreyk Matthew Dempsky
5*45135ebcSreyk Public domain.
6*45135ebcSreyk Derived from public domain code by D. J. Bernstein.
7*45135ebcSreyk */
8*45135ebcSreyk 
9*45135ebcSreyk int crypto_scalarmult_curve25519(unsigned char *, const unsigned char *, const unsigned char *);
10*45135ebcSreyk 
add(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])11*45135ebcSreyk static void add(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])
12*45135ebcSreyk {
13*45135ebcSreyk   unsigned int j;
14*45135ebcSreyk   unsigned int u;
15*45135ebcSreyk   u = 0;
16*45135ebcSreyk   for (j = 0;j < 31;++j) { u += a[j] + b[j]; out[j] = u & 255; u >>= 8; }
17*45135ebcSreyk   u += a[31] + b[31]; out[31] = u;
18*45135ebcSreyk }
19*45135ebcSreyk 
sub(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])20*45135ebcSreyk static void sub(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])
21*45135ebcSreyk {
22*45135ebcSreyk   unsigned int j;
23*45135ebcSreyk   unsigned int u;
24*45135ebcSreyk   u = 218;
25*45135ebcSreyk   for (j = 0;j < 31;++j) {
26*45135ebcSreyk     u += a[j] + 65280 - b[j];
27*45135ebcSreyk     out[j] = u & 255;
28*45135ebcSreyk     u >>= 8;
29*45135ebcSreyk   }
30*45135ebcSreyk   u += a[31] - b[31];
31*45135ebcSreyk   out[31] = u;
32*45135ebcSreyk }
33*45135ebcSreyk 
squeeze(unsigned int a[32])34*45135ebcSreyk static void squeeze(unsigned int a[32])
35*45135ebcSreyk {
36*45135ebcSreyk   unsigned int j;
37*45135ebcSreyk   unsigned int u;
38*45135ebcSreyk   u = 0;
39*45135ebcSreyk   for (j = 0;j < 31;++j) { u += a[j]; a[j] = u & 255; u >>= 8; }
40*45135ebcSreyk   u += a[31]; a[31] = u & 127;
41*45135ebcSreyk   u = 19 * (u >> 7);
42*45135ebcSreyk   for (j = 0;j < 31;++j) { u += a[j]; a[j] = u & 255; u >>= 8; }
43*45135ebcSreyk   u += a[31]; a[31] = u;
44*45135ebcSreyk }
45*45135ebcSreyk 
46*45135ebcSreyk static const unsigned int minusp[32] = {
47*45135ebcSreyk  19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128
48*45135ebcSreyk } ;
49*45135ebcSreyk 
freeze(unsigned int a[32])50*45135ebcSreyk static void freeze(unsigned int a[32])
51*45135ebcSreyk {
52*45135ebcSreyk   unsigned int aorig[32];
53*45135ebcSreyk   unsigned int j;
54*45135ebcSreyk   unsigned int negative;
55*45135ebcSreyk 
56*45135ebcSreyk   for (j = 0;j < 32;++j) aorig[j] = a[j];
57*45135ebcSreyk   add(a,a,minusp);
58*45135ebcSreyk   negative = -((a[31] >> 7) & 1);
59*45135ebcSreyk   for (j = 0;j < 32;++j) a[j] ^= negative & (aorig[j] ^ a[j]);
60*45135ebcSreyk }
61*45135ebcSreyk 
mult(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])62*45135ebcSreyk static void mult(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])
63*45135ebcSreyk {
64*45135ebcSreyk   unsigned int i;
65*45135ebcSreyk   unsigned int j;
66*45135ebcSreyk   unsigned int u;
67*45135ebcSreyk 
68*45135ebcSreyk   for (i = 0;i < 32;++i) {
69*45135ebcSreyk     u = 0;
70*45135ebcSreyk     for (j = 0;j <= i;++j) u += a[j] * b[i - j];
71*45135ebcSreyk     for (j = i + 1;j < 32;++j) u += 38 * a[j] * b[i + 32 - j];
72*45135ebcSreyk     out[i] = u;
73*45135ebcSreyk   }
74*45135ebcSreyk   squeeze(out);
75*45135ebcSreyk }
76*45135ebcSreyk 
mult121665(unsigned int out[32],const unsigned int a[32])77*45135ebcSreyk static void mult121665(unsigned int out[32],const unsigned int a[32])
78*45135ebcSreyk {
79*45135ebcSreyk   unsigned int j;
80*45135ebcSreyk   unsigned int u;
81*45135ebcSreyk 
82*45135ebcSreyk   u = 0;
83*45135ebcSreyk   for (j = 0;j < 31;++j) { u += 121665 * a[j]; out[j] = u & 255; u >>= 8; }
84*45135ebcSreyk   u += 121665 * a[31]; out[31] = u & 127;
85*45135ebcSreyk   u = 19 * (u >> 7);
86*45135ebcSreyk   for (j = 0;j < 31;++j) { u += out[j]; out[j] = u & 255; u >>= 8; }
87*45135ebcSreyk   u += out[j]; out[j] = u;
88*45135ebcSreyk }
89*45135ebcSreyk 
square(unsigned int out[32],const unsigned int a[32])90*45135ebcSreyk static void square(unsigned int out[32],const unsigned int a[32])
91*45135ebcSreyk {
92*45135ebcSreyk   unsigned int i;
93*45135ebcSreyk   unsigned int j;
94*45135ebcSreyk   unsigned int u;
95*45135ebcSreyk 
96*45135ebcSreyk   for (i = 0;i < 32;++i) {
97*45135ebcSreyk     u = 0;
98*45135ebcSreyk     for (j = 0;j < i - j;++j) u += a[j] * a[i - j];
99*45135ebcSreyk     for (j = i + 1;j < i + 32 - j;++j) u += 38 * a[j] * a[i + 32 - j];
100*45135ebcSreyk     u *= 2;
101*45135ebcSreyk     if ((i & 1) == 0) {
102*45135ebcSreyk       u += a[i / 2] * a[i / 2];
103*45135ebcSreyk       u += 38 * a[i / 2 + 16] * a[i / 2 + 16];
104*45135ebcSreyk     }
105*45135ebcSreyk     out[i] = u;
106*45135ebcSreyk   }
107*45135ebcSreyk   squeeze(out);
108*45135ebcSreyk }
109*45135ebcSreyk 
select(unsigned int p[64],unsigned int q[64],const unsigned int r[64],const unsigned int s[64],unsigned int b)110*45135ebcSreyk static void select(unsigned int p[64],unsigned int q[64],const unsigned int r[64],const unsigned int s[64],unsigned int b)
111*45135ebcSreyk {
112*45135ebcSreyk   unsigned int j;
113*45135ebcSreyk   unsigned int t;
114*45135ebcSreyk   unsigned int bminus1;
115*45135ebcSreyk 
116*45135ebcSreyk   bminus1 = b - 1;
117*45135ebcSreyk   for (j = 0;j < 64;++j) {
118*45135ebcSreyk     t = bminus1 & (r[j] ^ s[j]);
119*45135ebcSreyk     p[j] = s[j] ^ t;
120*45135ebcSreyk     q[j] = r[j] ^ t;
121*45135ebcSreyk   }
122*45135ebcSreyk }
123*45135ebcSreyk 
mainloop(unsigned int work[64],const unsigned char e[32])124*45135ebcSreyk static void mainloop(unsigned int work[64],const unsigned char e[32])
125*45135ebcSreyk {
126*45135ebcSreyk   unsigned int xzm1[64];
127*45135ebcSreyk   unsigned int xzm[64];
128*45135ebcSreyk   unsigned int xzmb[64];
129*45135ebcSreyk   unsigned int xzm1b[64];
130*45135ebcSreyk   unsigned int xznb[64];
131*45135ebcSreyk   unsigned int xzn1b[64];
132*45135ebcSreyk   unsigned int a0[64];
133*45135ebcSreyk   unsigned int a1[64];
134*45135ebcSreyk   unsigned int b0[64];
135*45135ebcSreyk   unsigned int b1[64];
136*45135ebcSreyk   unsigned int c1[64];
137*45135ebcSreyk   unsigned int r[32];
138*45135ebcSreyk   unsigned int s[32];
139*45135ebcSreyk   unsigned int t[32];
140*45135ebcSreyk   unsigned int u[32];
141*45135ebcSreyk   unsigned int j;
142*45135ebcSreyk   unsigned int b;
143*45135ebcSreyk   int pos;
144*45135ebcSreyk 
145*45135ebcSreyk   for (j = 0;j < 32;++j) xzm1[j] = work[j];
146*45135ebcSreyk   xzm1[32] = 1;
147*45135ebcSreyk   for (j = 33;j < 64;++j) xzm1[j] = 0;
148*45135ebcSreyk 
149*45135ebcSreyk   xzm[0] = 1;
150*45135ebcSreyk   for (j = 1;j < 64;++j) xzm[j] = 0;
151*45135ebcSreyk 
152*45135ebcSreyk   for (pos = 254;pos >= 0;--pos) {
153*45135ebcSreyk     b = e[pos / 8] >> (pos & 7);
154*45135ebcSreyk     b &= 1;
155*45135ebcSreyk     select(xzmb,xzm1b,xzm,xzm1,b);
156*45135ebcSreyk     add(a0,xzmb,xzmb + 32);
157*45135ebcSreyk     sub(a0 + 32,xzmb,xzmb + 32);
158*45135ebcSreyk     add(a1,xzm1b,xzm1b + 32);
159*45135ebcSreyk     sub(a1 + 32,xzm1b,xzm1b + 32);
160*45135ebcSreyk     square(b0,a0);
161*45135ebcSreyk     square(b0 + 32,a0 + 32);
162*45135ebcSreyk     mult(b1,a1,a0 + 32);
163*45135ebcSreyk     mult(b1 + 32,a1 + 32,a0);
164*45135ebcSreyk     add(c1,b1,b1 + 32);
165*45135ebcSreyk     sub(c1 + 32,b1,b1 + 32);
166*45135ebcSreyk     square(r,c1 + 32);
167*45135ebcSreyk     sub(s,b0,b0 + 32);
168*45135ebcSreyk     mult121665(t,s);
169*45135ebcSreyk     add(u,t,b0);
170*45135ebcSreyk     mult(xznb,b0,b0 + 32);
171*45135ebcSreyk     mult(xznb + 32,s,u);
172*45135ebcSreyk     square(xzn1b,c1);
173*45135ebcSreyk     mult(xzn1b + 32,r,work);
174*45135ebcSreyk     select(xzm,xzm1,xznb,xzn1b,b);
175*45135ebcSreyk   }
176*45135ebcSreyk 
177*45135ebcSreyk   for (j = 0;j < 64;++j) work[j] = xzm[j];
178*45135ebcSreyk }
179*45135ebcSreyk 
recip(unsigned int out[32],const unsigned int z[32])180*45135ebcSreyk static void recip(unsigned int out[32],const unsigned int z[32])
181*45135ebcSreyk {
182*45135ebcSreyk   unsigned int z2[32];
183*45135ebcSreyk   unsigned int z9[32];
184*45135ebcSreyk   unsigned int z11[32];
185*45135ebcSreyk   unsigned int z2_5_0[32];
186*45135ebcSreyk   unsigned int z2_10_0[32];
187*45135ebcSreyk   unsigned int z2_20_0[32];
188*45135ebcSreyk   unsigned int z2_50_0[32];
189*45135ebcSreyk   unsigned int z2_100_0[32];
190*45135ebcSreyk   unsigned int t0[32];
191*45135ebcSreyk   unsigned int t1[32];
192*45135ebcSreyk   int i;
193*45135ebcSreyk 
194*45135ebcSreyk   /* 2 */ square(z2,z);
195*45135ebcSreyk   /* 4 */ square(t1,z2);
196*45135ebcSreyk   /* 8 */ square(t0,t1);
197*45135ebcSreyk   /* 9 */ mult(z9,t0,z);
198*45135ebcSreyk   /* 11 */ mult(z11,z9,z2);
199*45135ebcSreyk   /* 22 */ square(t0,z11);
200*45135ebcSreyk   /* 2^5 - 2^0 = 31 */ mult(z2_5_0,t0,z9);
201*45135ebcSreyk 
202*45135ebcSreyk   /* 2^6 - 2^1 */ square(t0,z2_5_0);
203*45135ebcSreyk   /* 2^7 - 2^2 */ square(t1,t0);
204*45135ebcSreyk   /* 2^8 - 2^3 */ square(t0,t1);
205*45135ebcSreyk   /* 2^9 - 2^4 */ square(t1,t0);
206*45135ebcSreyk   /* 2^10 - 2^5 */ square(t0,t1);
207*45135ebcSreyk   /* 2^10 - 2^0 */ mult(z2_10_0,t0,z2_5_0);
208*45135ebcSreyk 
209*45135ebcSreyk   /* 2^11 - 2^1 */ square(t0,z2_10_0);
210*45135ebcSreyk   /* 2^12 - 2^2 */ square(t1,t0);
211*45135ebcSreyk   /* 2^20 - 2^10 */ for (i = 2;i < 10;i += 2) { square(t0,t1); square(t1,t0); }
212*45135ebcSreyk   /* 2^20 - 2^0 */ mult(z2_20_0,t1,z2_10_0);
213*45135ebcSreyk 
214*45135ebcSreyk   /* 2^21 - 2^1 */ square(t0,z2_20_0);
215*45135ebcSreyk   /* 2^22 - 2^2 */ square(t1,t0);
216*45135ebcSreyk   /* 2^40 - 2^20 */ for (i = 2;i < 20;i += 2) { square(t0,t1); square(t1,t0); }
217*45135ebcSreyk   /* 2^40 - 2^0 */ mult(t0,t1,z2_20_0);
218*45135ebcSreyk 
219*45135ebcSreyk   /* 2^41 - 2^1 */ square(t1,t0);
220*45135ebcSreyk   /* 2^42 - 2^2 */ square(t0,t1);
221*45135ebcSreyk   /* 2^50 - 2^10 */ for (i = 2;i < 10;i += 2) { square(t1,t0); square(t0,t1); }
222*45135ebcSreyk   /* 2^50 - 2^0 */ mult(z2_50_0,t0,z2_10_0);
223*45135ebcSreyk 
224*45135ebcSreyk   /* 2^51 - 2^1 */ square(t0,z2_50_0);
225*45135ebcSreyk   /* 2^52 - 2^2 */ square(t1,t0);
226*45135ebcSreyk   /* 2^100 - 2^50 */ for (i = 2;i < 50;i += 2) { square(t0,t1); square(t1,t0); }
227*45135ebcSreyk   /* 2^100 - 2^0 */ mult(z2_100_0,t1,z2_50_0);
228*45135ebcSreyk 
229*45135ebcSreyk   /* 2^101 - 2^1 */ square(t1,z2_100_0);
230*45135ebcSreyk   /* 2^102 - 2^2 */ square(t0,t1);
231*45135ebcSreyk   /* 2^200 - 2^100 */ for (i = 2;i < 100;i += 2) { square(t1,t0); square(t0,t1); }
232*45135ebcSreyk   /* 2^200 - 2^0 */ mult(t1,t0,z2_100_0);
233*45135ebcSreyk 
234*45135ebcSreyk   /* 2^201 - 2^1 */ square(t0,t1);
235*45135ebcSreyk   /* 2^202 - 2^2 */ square(t1,t0);
236*45135ebcSreyk   /* 2^250 - 2^50 */ for (i = 2;i < 50;i += 2) { square(t0,t1); square(t1,t0); }
237*45135ebcSreyk   /* 2^250 - 2^0 */ mult(t0,t1,z2_50_0);
238*45135ebcSreyk 
239*45135ebcSreyk   /* 2^251 - 2^1 */ square(t1,t0);
240*45135ebcSreyk   /* 2^252 - 2^2 */ square(t0,t1);
241*45135ebcSreyk   /* 2^253 - 2^3 */ square(t1,t0);
242*45135ebcSreyk   /* 2^254 - 2^4 */ square(t0,t1);
243*45135ebcSreyk   /* 2^255 - 2^5 */ square(t1,t0);
244*45135ebcSreyk   /* 2^255 - 21 */ mult(out,t1,z11);
245*45135ebcSreyk }
246*45135ebcSreyk 
crypto_scalarmult_curve25519(unsigned char * q,const unsigned char * n,const unsigned char * p)247*45135ebcSreyk int crypto_scalarmult_curve25519(unsigned char *q,
248*45135ebcSreyk   const unsigned char *n,
249*45135ebcSreyk   const unsigned char *p)
250*45135ebcSreyk {
251*45135ebcSreyk   unsigned int work[96];
252*45135ebcSreyk   unsigned char e[32];
253*45135ebcSreyk   unsigned int i;
254*45135ebcSreyk   for (i = 0;i < 32;++i) e[i] = n[i];
255*45135ebcSreyk   e[0] &= 248;
256*45135ebcSreyk   e[31] &= 127;
257*45135ebcSreyk   e[31] |= 64;
258*45135ebcSreyk   for (i = 0;i < 32;++i) work[i] = p[i];
259*45135ebcSreyk   mainloop(work,e);
260*45135ebcSreyk   recip(work + 32,work + 32);
261*45135ebcSreyk   mult(work + 64,work,work + 32);
262*45135ebcSreyk   freeze(work + 64);
263*45135ebcSreyk   for (i = 0;i < 32;++i) q[i] = work[64 + i];
264*45135ebcSreyk   return 0;
265*45135ebcSreyk }
266