1*45135ebcSreyk /* $OpenBSD: smult_curve25519_ref.c,v 1.1 2014/08/27 10:28:57 reyk Exp $ */
2*45135ebcSreyk /*
3*45135ebcSreyk version 20081011
4*45135ebcSreyk Matthew Dempsky
5*45135ebcSreyk Public domain.
6*45135ebcSreyk Derived from public domain code by D. J. Bernstein.
7*45135ebcSreyk */
8*45135ebcSreyk
9*45135ebcSreyk int crypto_scalarmult_curve25519(unsigned char *, const unsigned char *, const unsigned char *);
10*45135ebcSreyk
add(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])11*45135ebcSreyk static void add(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])
12*45135ebcSreyk {
13*45135ebcSreyk unsigned int j;
14*45135ebcSreyk unsigned int u;
15*45135ebcSreyk u = 0;
16*45135ebcSreyk for (j = 0;j < 31;++j) { u += a[j] + b[j]; out[j] = u & 255; u >>= 8; }
17*45135ebcSreyk u += a[31] + b[31]; out[31] = u;
18*45135ebcSreyk }
19*45135ebcSreyk
sub(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])20*45135ebcSreyk static void sub(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])
21*45135ebcSreyk {
22*45135ebcSreyk unsigned int j;
23*45135ebcSreyk unsigned int u;
24*45135ebcSreyk u = 218;
25*45135ebcSreyk for (j = 0;j < 31;++j) {
26*45135ebcSreyk u += a[j] + 65280 - b[j];
27*45135ebcSreyk out[j] = u & 255;
28*45135ebcSreyk u >>= 8;
29*45135ebcSreyk }
30*45135ebcSreyk u += a[31] - b[31];
31*45135ebcSreyk out[31] = u;
32*45135ebcSreyk }
33*45135ebcSreyk
squeeze(unsigned int a[32])34*45135ebcSreyk static void squeeze(unsigned int a[32])
35*45135ebcSreyk {
36*45135ebcSreyk unsigned int j;
37*45135ebcSreyk unsigned int u;
38*45135ebcSreyk u = 0;
39*45135ebcSreyk for (j = 0;j < 31;++j) { u += a[j]; a[j] = u & 255; u >>= 8; }
40*45135ebcSreyk u += a[31]; a[31] = u & 127;
41*45135ebcSreyk u = 19 * (u >> 7);
42*45135ebcSreyk for (j = 0;j < 31;++j) { u += a[j]; a[j] = u & 255; u >>= 8; }
43*45135ebcSreyk u += a[31]; a[31] = u;
44*45135ebcSreyk }
45*45135ebcSreyk
46*45135ebcSreyk static const unsigned int minusp[32] = {
47*45135ebcSreyk 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128
48*45135ebcSreyk } ;
49*45135ebcSreyk
freeze(unsigned int a[32])50*45135ebcSreyk static void freeze(unsigned int a[32])
51*45135ebcSreyk {
52*45135ebcSreyk unsigned int aorig[32];
53*45135ebcSreyk unsigned int j;
54*45135ebcSreyk unsigned int negative;
55*45135ebcSreyk
56*45135ebcSreyk for (j = 0;j < 32;++j) aorig[j] = a[j];
57*45135ebcSreyk add(a,a,minusp);
58*45135ebcSreyk negative = -((a[31] >> 7) & 1);
59*45135ebcSreyk for (j = 0;j < 32;++j) a[j] ^= negative & (aorig[j] ^ a[j]);
60*45135ebcSreyk }
61*45135ebcSreyk
mult(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])62*45135ebcSreyk static void mult(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])
63*45135ebcSreyk {
64*45135ebcSreyk unsigned int i;
65*45135ebcSreyk unsigned int j;
66*45135ebcSreyk unsigned int u;
67*45135ebcSreyk
68*45135ebcSreyk for (i = 0;i < 32;++i) {
69*45135ebcSreyk u = 0;
70*45135ebcSreyk for (j = 0;j <= i;++j) u += a[j] * b[i - j];
71*45135ebcSreyk for (j = i + 1;j < 32;++j) u += 38 * a[j] * b[i + 32 - j];
72*45135ebcSreyk out[i] = u;
73*45135ebcSreyk }
74*45135ebcSreyk squeeze(out);
75*45135ebcSreyk }
76*45135ebcSreyk
mult121665(unsigned int out[32],const unsigned int a[32])77*45135ebcSreyk static void mult121665(unsigned int out[32],const unsigned int a[32])
78*45135ebcSreyk {
79*45135ebcSreyk unsigned int j;
80*45135ebcSreyk unsigned int u;
81*45135ebcSreyk
82*45135ebcSreyk u = 0;
83*45135ebcSreyk for (j = 0;j < 31;++j) { u += 121665 * a[j]; out[j] = u & 255; u >>= 8; }
84*45135ebcSreyk u += 121665 * a[31]; out[31] = u & 127;
85*45135ebcSreyk u = 19 * (u >> 7);
86*45135ebcSreyk for (j = 0;j < 31;++j) { u += out[j]; out[j] = u & 255; u >>= 8; }
87*45135ebcSreyk u += out[j]; out[j] = u;
88*45135ebcSreyk }
89*45135ebcSreyk
square(unsigned int out[32],const unsigned int a[32])90*45135ebcSreyk static void square(unsigned int out[32],const unsigned int a[32])
91*45135ebcSreyk {
92*45135ebcSreyk unsigned int i;
93*45135ebcSreyk unsigned int j;
94*45135ebcSreyk unsigned int u;
95*45135ebcSreyk
96*45135ebcSreyk for (i = 0;i < 32;++i) {
97*45135ebcSreyk u = 0;
98*45135ebcSreyk for (j = 0;j < i - j;++j) u += a[j] * a[i - j];
99*45135ebcSreyk for (j = i + 1;j < i + 32 - j;++j) u += 38 * a[j] * a[i + 32 - j];
100*45135ebcSreyk u *= 2;
101*45135ebcSreyk if ((i & 1) == 0) {
102*45135ebcSreyk u += a[i / 2] * a[i / 2];
103*45135ebcSreyk u += 38 * a[i / 2 + 16] * a[i / 2 + 16];
104*45135ebcSreyk }
105*45135ebcSreyk out[i] = u;
106*45135ebcSreyk }
107*45135ebcSreyk squeeze(out);
108*45135ebcSreyk }
109*45135ebcSreyk
select(unsigned int p[64],unsigned int q[64],const unsigned int r[64],const unsigned int s[64],unsigned int b)110*45135ebcSreyk static void select(unsigned int p[64],unsigned int q[64],const unsigned int r[64],const unsigned int s[64],unsigned int b)
111*45135ebcSreyk {
112*45135ebcSreyk unsigned int j;
113*45135ebcSreyk unsigned int t;
114*45135ebcSreyk unsigned int bminus1;
115*45135ebcSreyk
116*45135ebcSreyk bminus1 = b - 1;
117*45135ebcSreyk for (j = 0;j < 64;++j) {
118*45135ebcSreyk t = bminus1 & (r[j] ^ s[j]);
119*45135ebcSreyk p[j] = s[j] ^ t;
120*45135ebcSreyk q[j] = r[j] ^ t;
121*45135ebcSreyk }
122*45135ebcSreyk }
123*45135ebcSreyk
mainloop(unsigned int work[64],const unsigned char e[32])124*45135ebcSreyk static void mainloop(unsigned int work[64],const unsigned char e[32])
125*45135ebcSreyk {
126*45135ebcSreyk unsigned int xzm1[64];
127*45135ebcSreyk unsigned int xzm[64];
128*45135ebcSreyk unsigned int xzmb[64];
129*45135ebcSreyk unsigned int xzm1b[64];
130*45135ebcSreyk unsigned int xznb[64];
131*45135ebcSreyk unsigned int xzn1b[64];
132*45135ebcSreyk unsigned int a0[64];
133*45135ebcSreyk unsigned int a1[64];
134*45135ebcSreyk unsigned int b0[64];
135*45135ebcSreyk unsigned int b1[64];
136*45135ebcSreyk unsigned int c1[64];
137*45135ebcSreyk unsigned int r[32];
138*45135ebcSreyk unsigned int s[32];
139*45135ebcSreyk unsigned int t[32];
140*45135ebcSreyk unsigned int u[32];
141*45135ebcSreyk unsigned int j;
142*45135ebcSreyk unsigned int b;
143*45135ebcSreyk int pos;
144*45135ebcSreyk
145*45135ebcSreyk for (j = 0;j < 32;++j) xzm1[j] = work[j];
146*45135ebcSreyk xzm1[32] = 1;
147*45135ebcSreyk for (j = 33;j < 64;++j) xzm1[j] = 0;
148*45135ebcSreyk
149*45135ebcSreyk xzm[0] = 1;
150*45135ebcSreyk for (j = 1;j < 64;++j) xzm[j] = 0;
151*45135ebcSreyk
152*45135ebcSreyk for (pos = 254;pos >= 0;--pos) {
153*45135ebcSreyk b = e[pos / 8] >> (pos & 7);
154*45135ebcSreyk b &= 1;
155*45135ebcSreyk select(xzmb,xzm1b,xzm,xzm1,b);
156*45135ebcSreyk add(a0,xzmb,xzmb + 32);
157*45135ebcSreyk sub(a0 + 32,xzmb,xzmb + 32);
158*45135ebcSreyk add(a1,xzm1b,xzm1b + 32);
159*45135ebcSreyk sub(a1 + 32,xzm1b,xzm1b + 32);
160*45135ebcSreyk square(b0,a0);
161*45135ebcSreyk square(b0 + 32,a0 + 32);
162*45135ebcSreyk mult(b1,a1,a0 + 32);
163*45135ebcSreyk mult(b1 + 32,a1 + 32,a0);
164*45135ebcSreyk add(c1,b1,b1 + 32);
165*45135ebcSreyk sub(c1 + 32,b1,b1 + 32);
166*45135ebcSreyk square(r,c1 + 32);
167*45135ebcSreyk sub(s,b0,b0 + 32);
168*45135ebcSreyk mult121665(t,s);
169*45135ebcSreyk add(u,t,b0);
170*45135ebcSreyk mult(xznb,b0,b0 + 32);
171*45135ebcSreyk mult(xznb + 32,s,u);
172*45135ebcSreyk square(xzn1b,c1);
173*45135ebcSreyk mult(xzn1b + 32,r,work);
174*45135ebcSreyk select(xzm,xzm1,xznb,xzn1b,b);
175*45135ebcSreyk }
176*45135ebcSreyk
177*45135ebcSreyk for (j = 0;j < 64;++j) work[j] = xzm[j];
178*45135ebcSreyk }
179*45135ebcSreyk
recip(unsigned int out[32],const unsigned int z[32])180*45135ebcSreyk static void recip(unsigned int out[32],const unsigned int z[32])
181*45135ebcSreyk {
182*45135ebcSreyk unsigned int z2[32];
183*45135ebcSreyk unsigned int z9[32];
184*45135ebcSreyk unsigned int z11[32];
185*45135ebcSreyk unsigned int z2_5_0[32];
186*45135ebcSreyk unsigned int z2_10_0[32];
187*45135ebcSreyk unsigned int z2_20_0[32];
188*45135ebcSreyk unsigned int z2_50_0[32];
189*45135ebcSreyk unsigned int z2_100_0[32];
190*45135ebcSreyk unsigned int t0[32];
191*45135ebcSreyk unsigned int t1[32];
192*45135ebcSreyk int i;
193*45135ebcSreyk
194*45135ebcSreyk /* 2 */ square(z2,z);
195*45135ebcSreyk /* 4 */ square(t1,z2);
196*45135ebcSreyk /* 8 */ square(t0,t1);
197*45135ebcSreyk /* 9 */ mult(z9,t0,z);
198*45135ebcSreyk /* 11 */ mult(z11,z9,z2);
199*45135ebcSreyk /* 22 */ square(t0,z11);
200*45135ebcSreyk /* 2^5 - 2^0 = 31 */ mult(z2_5_0,t0,z9);
201*45135ebcSreyk
202*45135ebcSreyk /* 2^6 - 2^1 */ square(t0,z2_5_0);
203*45135ebcSreyk /* 2^7 - 2^2 */ square(t1,t0);
204*45135ebcSreyk /* 2^8 - 2^3 */ square(t0,t1);
205*45135ebcSreyk /* 2^9 - 2^4 */ square(t1,t0);
206*45135ebcSreyk /* 2^10 - 2^5 */ square(t0,t1);
207*45135ebcSreyk /* 2^10 - 2^0 */ mult(z2_10_0,t0,z2_5_0);
208*45135ebcSreyk
209*45135ebcSreyk /* 2^11 - 2^1 */ square(t0,z2_10_0);
210*45135ebcSreyk /* 2^12 - 2^2 */ square(t1,t0);
211*45135ebcSreyk /* 2^20 - 2^10 */ for (i = 2;i < 10;i += 2) { square(t0,t1); square(t1,t0); }
212*45135ebcSreyk /* 2^20 - 2^0 */ mult(z2_20_0,t1,z2_10_0);
213*45135ebcSreyk
214*45135ebcSreyk /* 2^21 - 2^1 */ square(t0,z2_20_0);
215*45135ebcSreyk /* 2^22 - 2^2 */ square(t1,t0);
216*45135ebcSreyk /* 2^40 - 2^20 */ for (i = 2;i < 20;i += 2) { square(t0,t1); square(t1,t0); }
217*45135ebcSreyk /* 2^40 - 2^0 */ mult(t0,t1,z2_20_0);
218*45135ebcSreyk
219*45135ebcSreyk /* 2^41 - 2^1 */ square(t1,t0);
220*45135ebcSreyk /* 2^42 - 2^2 */ square(t0,t1);
221*45135ebcSreyk /* 2^50 - 2^10 */ for (i = 2;i < 10;i += 2) { square(t1,t0); square(t0,t1); }
222*45135ebcSreyk /* 2^50 - 2^0 */ mult(z2_50_0,t0,z2_10_0);
223*45135ebcSreyk
224*45135ebcSreyk /* 2^51 - 2^1 */ square(t0,z2_50_0);
225*45135ebcSreyk /* 2^52 - 2^2 */ square(t1,t0);
226*45135ebcSreyk /* 2^100 - 2^50 */ for (i = 2;i < 50;i += 2) { square(t0,t1); square(t1,t0); }
227*45135ebcSreyk /* 2^100 - 2^0 */ mult(z2_100_0,t1,z2_50_0);
228*45135ebcSreyk
229*45135ebcSreyk /* 2^101 - 2^1 */ square(t1,z2_100_0);
230*45135ebcSreyk /* 2^102 - 2^2 */ square(t0,t1);
231*45135ebcSreyk /* 2^200 - 2^100 */ for (i = 2;i < 100;i += 2) { square(t1,t0); square(t0,t1); }
232*45135ebcSreyk /* 2^200 - 2^0 */ mult(t1,t0,z2_100_0);
233*45135ebcSreyk
234*45135ebcSreyk /* 2^201 - 2^1 */ square(t0,t1);
235*45135ebcSreyk /* 2^202 - 2^2 */ square(t1,t0);
236*45135ebcSreyk /* 2^250 - 2^50 */ for (i = 2;i < 50;i += 2) { square(t0,t1); square(t1,t0); }
237*45135ebcSreyk /* 2^250 - 2^0 */ mult(t0,t1,z2_50_0);
238*45135ebcSreyk
239*45135ebcSreyk /* 2^251 - 2^1 */ square(t1,t0);
240*45135ebcSreyk /* 2^252 - 2^2 */ square(t0,t1);
241*45135ebcSreyk /* 2^253 - 2^3 */ square(t1,t0);
242*45135ebcSreyk /* 2^254 - 2^4 */ square(t0,t1);
243*45135ebcSreyk /* 2^255 - 2^5 */ square(t1,t0);
244*45135ebcSreyk /* 2^255 - 21 */ mult(out,t1,z11);
245*45135ebcSreyk }
246*45135ebcSreyk
crypto_scalarmult_curve25519(unsigned char * q,const unsigned char * n,const unsigned char * p)247*45135ebcSreyk int crypto_scalarmult_curve25519(unsigned char *q,
248*45135ebcSreyk const unsigned char *n,
249*45135ebcSreyk const unsigned char *p)
250*45135ebcSreyk {
251*45135ebcSreyk unsigned int work[96];
252*45135ebcSreyk unsigned char e[32];
253*45135ebcSreyk unsigned int i;
254*45135ebcSreyk for (i = 0;i < 32;++i) e[i] = n[i];
255*45135ebcSreyk e[0] &= 248;
256*45135ebcSreyk e[31] &= 127;
257*45135ebcSreyk e[31] |= 64;
258*45135ebcSreyk for (i = 0;i < 32;++i) work[i] = p[i];
259*45135ebcSreyk mainloop(work,e);
260*45135ebcSreyk recip(work + 32,work + 32);
261*45135ebcSreyk mult(work + 64,work,work + 32);
262*45135ebcSreyk freeze(work + 64);
263*45135ebcSreyk for (i = 0;i < 32;++i) q[i] = work[64 + i];
264*45135ebcSreyk return 0;
265*45135ebcSreyk }
266