xref: /netbsd-src/crypto/external/bsd/openssl/lib/libcrypto/arch/sparc/ghash-sparcv9.S (revision cff8db61e4cfa7157ffd9512c0096e31b6cbc8c5)
1.section	".text",#alloc,#execinstr
2
3.align	64
4rem_4bit:
5	.long	0,0,471859200,0,943718400,0,610271232,0
6	.long	1887436800,0,1822425088,0,1220542464,0,1423966208,0
7	.long	3774873600,0,4246732800,0,3644850176,0,3311403008,0
8	.long	2441084928,0,2376073216,0,2847932416,0,3051356160,0
9.type	rem_4bit,#object
10.size	rem_4bit,(.-rem_4bit)
11
12.globl	gcm_ghash_4bit
13.align	32
14gcm_ghash_4bit:
15	save	%sp,-112,%sp
16	ldub	[%i2+15],%l1
17	ldub	[%i0+15],%l2
18	ldub	[%i0+14],%l3
19	add	%i3,%i2,%i3
20	add	%i1,8,%l6
21
221:	call	.+8
23	add	%o7,rem_4bit-1b,%l4
24
25.Louter:
26	xor	%l2,%l1,%l1
27	and	%l1,0xf0,%l0
28	and	%l1,0x0f,%l1
29	sll	%l1,4,%l1
30	ldx	[%l6+%l1],%o1
31	ldx	[%i1+%l1],%o0
32
33	ldub	[%i2+14],%l1
34
35	ldx	[%l6+%l0],%o3
36	and	%o1,0xf,%l5
37	ldx	[%i1+%l0],%o2
38	sll	%l5,3,%l5
39	ldx	[%l4+%l5],%o4
40	srlx	%o1,4,%o1
41	mov	13,%l7
42	sllx	%o0,60,%o5
43	xor	%o3,%o1,%o1
44	srlx	%o0,4,%o0
45	xor	%o1,%o5,%o1
46
47	xor	%l3,%l1,%l1
48	and	%o1,0xf,%l5
49	and	%l1,0xf0,%l0
50	and	%l1,0x0f,%l1
51	ba	.Lghash_inner
52	sll	%l1,4,%l1
53.align	32
54.Lghash_inner:
55	ldx	[%l6+%l1],%o3
56	sll	%l5,3,%l5
57	xor	%o2,%o0,%o0
58	ldx	[%i1+%l1],%o2
59	srlx	%o1,4,%o1
60	xor	%o4,%o0,%o0
61	ldx	[%l4+%l5],%o4
62	sllx	%o0,60,%o5
63	xor	%o3,%o1,%o1
64	ldub	[%i2+%l7],%l1
65	srlx	%o0,4,%o0
66	xor	%o1,%o5,%o1
67	ldub	[%i0+%l7],%l3
68	xor	%o2,%o0,%o0
69	and	%o1,0xf,%l5
70
71	ldx	[%l6+%l0],%o3
72	sll	%l5,3,%l5
73	xor	%o4,%o0,%o0
74	ldx	[%i1+%l0],%o2
75	srlx	%o1,4,%o1
76	ldx	[%l4+%l5],%o4
77	sllx	%o0,60,%o5
78	xor	%l3,%l1,%l1
79	srlx	%o0,4,%o0
80	and	%l1,0xf0,%l0
81	addcc	%l7,-1,%l7
82	xor	%o1,%o5,%o1
83	and	%l1,0x0f,%l1
84	xor	%o3,%o1,%o1
85	sll	%l1,4,%l1
86	blu	.Lghash_inner
87	and	%o1,0xf,%l5
88
89	ldx	[%l6+%l1],%o3
90	sll	%l5,3,%l5
91	xor	%o2,%o0,%o0
92	ldx	[%i1+%l1],%o2
93	srlx	%o1,4,%o1
94	xor	%o4,%o0,%o0
95	ldx	[%l4+%l5],%o4
96	sllx	%o0,60,%o5
97	xor	%o3,%o1,%o1
98	srlx	%o0,4,%o0
99	xor	%o1,%o5,%o1
100	xor	%o2,%o0,%o0
101
102	add	%i2,16,%i2
103	cmp	%i2,%i3
104	be,pn	%icc,.Ldone
105	and	%o1,0xf,%l5
106
107	ldx	[%l6+%l0],%o3
108	sll	%l5,3,%l5
109	xor	%o4,%o0,%o0
110	ldx	[%i1+%l0],%o2
111	srlx	%o1,4,%o1
112	ldx	[%l4+%l5],%o4
113	sllx	%o0,60,%o5
114	xor	%o3,%o1,%o1
115	ldub	[%i2+15],%l1
116	srlx	%o0,4,%o0
117	xor	%o1,%o5,%o1
118	xor	%o2,%o0,%o0
119	stx	%o1,[%i0+8]
120	xor	%o4,%o0,%o0
121	stx	%o0,[%i0]
122	srl	%o1,8,%l3
123	and	%o1,0xff,%l2
124	ba	.Louter
125	and	%l3,0xff,%l3
126.align	32
127.Ldone:
128	ldx	[%l6+%l0],%o3
129	sll	%l5,3,%l5
130	xor	%o4,%o0,%o0
131	ldx	[%i1+%l0],%o2
132	srlx	%o1,4,%o1
133	ldx	[%l4+%l5],%o4
134	sllx	%o0,60,%o5
135	xor	%o3,%o1,%o1
136	srlx	%o0,4,%o0
137	xor	%o1,%o5,%o1
138	xor	%o2,%o0,%o0
139	stx	%o1,[%i0+8]
140	xor	%o4,%o0,%o0
141	stx	%o0,[%i0]
142
143	ret
144	restore
145.type	gcm_ghash_4bit,#function
146.size	gcm_ghash_4bit,(.-gcm_ghash_4bit)
147.globl	gcm_gmult_4bit
148.align	32
149gcm_gmult_4bit:
150	save	%sp,-112,%sp
151	ldub	[%i0+15],%l1
152	add	%i1,8,%l6
153
1541:	call	.+8
155	add	%o7,rem_4bit-1b,%l4
156
157	and	%l1,0xf0,%l0
158	and	%l1,0x0f,%l1
159	sll	%l1,4,%l1
160	ldx	[%l6+%l1],%o1
161	ldx	[%i1+%l1],%o0
162
163	ldub	[%i0+14],%l1
164
165	ldx	[%l6+%l0],%o3
166	and	%o1,0xf,%l5
167	ldx	[%i1+%l0],%o2
168	sll	%l5,3,%l5
169	ldx	[%l4+%l5],%o4
170	srlx	%o1,4,%o1
171	mov	13,%l7
172	sllx	%o0,60,%o5
173	xor	%o3,%o1,%o1
174	srlx	%o0,4,%o0
175	xor	%o1,%o5,%o1
176
177	and	%o1,0xf,%l5
178	and	%l1,0xf0,%l0
179	and	%l1,0x0f,%l1
180	ba	.Lgmult_inner
181	sll	%l1,4,%l1
182.align	32
183.Lgmult_inner:
184	ldx	[%l6+%l1],%o3
185	sll	%l5,3,%l5
186	xor	%o2,%o0,%o0
187	ldx	[%i1+%l1],%o2
188	srlx	%o1,4,%o1
189	xor	%o4,%o0,%o0
190	ldx	[%l4+%l5],%o4
191	sllx	%o0,60,%o5
192	xor	%o3,%o1,%o1
193	ldub	[%i0+%l7],%l1
194	srlx	%o0,4,%o0
195	xor	%o1,%o5,%o1
196	xor	%o2,%o0,%o0
197	and	%o1,0xf,%l5
198
199	ldx	[%l6+%l0],%o3
200	sll	%l5,3,%l5
201	xor	%o4,%o0,%o0
202	ldx	[%i1+%l0],%o2
203	srlx	%o1,4,%o1
204	ldx	[%l4+%l5],%o4
205	sllx	%o0,60,%o5
206	srlx	%o0,4,%o0
207	and	%l1,0xf0,%l0
208	addcc	%l7,-1,%l7
209	xor	%o1,%o5,%o1
210	and	%l1,0x0f,%l1
211	xor	%o3,%o1,%o1
212	sll	%l1,4,%l1
213	blu	.Lgmult_inner
214	and	%o1,0xf,%l5
215
216	ldx	[%l6+%l1],%o3
217	sll	%l5,3,%l5
218	xor	%o2,%o0,%o0
219	ldx	[%i1+%l1],%o2
220	srlx	%o1,4,%o1
221	xor	%o4,%o0,%o0
222	ldx	[%l4+%l5],%o4
223	sllx	%o0,60,%o5
224	xor	%o3,%o1,%o1
225	srlx	%o0,4,%o0
226	xor	%o1,%o5,%o1
227	xor	%o2,%o0,%o0
228	and	%o1,0xf,%l5
229
230	ldx	[%l6+%l0],%o3
231	sll	%l5,3,%l5
232	xor	%o4,%o0,%o0
233	ldx	[%i1+%l0],%o2
234	srlx	%o1,4,%o1
235	ldx	[%l4+%l5],%o4
236	sllx	%o0,60,%o5
237	xor	%o3,%o1,%o1
238	srlx	%o0,4,%o0
239	xor	%o1,%o5,%o1
240	xor	%o2,%o0,%o0
241	stx	%o1,[%i0+8]
242	xor	%o4,%o0,%o0
243	stx	%o0,[%i0]
244
245	ret
246	restore
247.type	gcm_gmult_4bit,#function
248.size	gcm_gmult_4bit,(.-gcm_gmult_4bit)
249.globl	gcm_init_vis3
250.align	32
251gcm_init_vis3:
252	save	%sp,-112,%sp
253
254	ldx	[%i1+0],%o2
255	ldx	[%i1+8],%o1
256	mov	0xE1,%o4
257	mov	1,%o3
258	sllx	%o4,57,%o4
259	srax	%o2,63,%g1		! broadcast carry
260	addcc	%o1,%o1,%o1		! H<<=1
261	.word	0x95b2822a !addxc	%o2,%o2,%o2
262	and	%g1,%o3,%o3
263	and	%g1,%o4,%o4
264	xor	%o3,%o1,%o1
265	xor	%o4,%o2,%o2
266	stx	%o1,[%i0+8]		! save twisted H
267	stx	%o2,[%i0+0]
268
269	sethi	%hi(0xA0406080),%g5
270	sethi	%hi(0x20C0E000),%l0
271	or	%g5,%lo(0xA0406080),%g5
272	or	%l0,%lo(0x20C0E000),%l0
273	sllx	%g5,32,%g5
274	or	%l0,%g5,%g5		! (0xE0·i)&0xff=0xA040608020C0E000
275	stx	%g5,[%i0+16]
276
277	ret
278	restore
279.type	gcm_init_vis3,#function
280.size	gcm_init_vis3,.-gcm_init_vis3
281
282.globl	gcm_gmult_vis3
283.align	32
284gcm_gmult_vis3:
285	save	%sp,-112,%sp
286
287	ldx	[%i0+8],%o3		! load Xi
288	ldx	[%i0+0],%o4
289	ldx	[%i1+8],%o1	! load twisted H
290	ldx	[%i1+0],%o2
291
292	mov	0xE1,%l7
293	sllx	%l7,57,%o5		! 57 is not a typo
294	ldx	[%i1+16],%g5		! (0xE0·i)&0xff=0xA040608020C0E000
295
296	xor	%o2,%o1,%o0		! Karatsuba pre-processing
297	.word	0x83b2e2a9 !xmulx	%o3,%o1,%g1
298	xor	%o3,%o4,%g3		! Karatsuba pre-processing
299	.word	0x85b0e2a8 !xmulx	%g3,%o0,%g2
300	.word	0x97b2e2c9 !xmulxhi	%o3,%o1,%o3
301	.word	0x87b0e2c8 !xmulxhi	%g3,%o0,%g3
302	.word	0x89b322ca !xmulxhi	%o4,%o2,%g4
303	.word	0x99b322aa !xmulx	%o4,%o2,%o4
304
305	sll	%g1,3,%o7
306	srlx	%g5,%o7,%o7		! ·0xE0 [implicit &(7<<3)]
307	xor	%g1,%o7,%o7
308	sllx	%o7,57,%o7		! (%g1·0xE1)<<1<<56 [implicit &0x7f]
309
310	xor	%g1,%g2,%g2		! Karatsuba post-processing
311	xor	%o3,%g3,%g3
312	 xor	%o7,%o3,%o3		! real destination is %g2
313	xor	%g4,%g3,%g3
314	xor	%o3,%g2,%g2
315	xor	%o4,%g3,%g3
316	xor	%o4,%g2,%g2
317
318	.word	0x97b062cd !xmulxhi	%g1,%o5,%o3		! ·0xE1<<1<<56
319	 xor	%g1,%g3,%g3
320	.word	0x83b0a2ad !xmulx	%g2,%o5,%g1
321	 xor	%g2,%g4,%g4
322	.word	0x85b0a2cd !xmulxhi	%g2,%o5,%g2
323
324	xor	%o3,%g3,%g3
325	xor	%g1,%g3,%g3
326	xor	%g2,%g4,%g4
327
328	stx	%g3,[%i0+8]		! save Xi
329	stx	%g4,[%i0+0]
330
331	ret
332	restore
333.type	gcm_gmult_vis3,#function
334.size	gcm_gmult_vis3,.-gcm_gmult_vis3
335
336.globl	gcm_ghash_vis3
337.align	32
338gcm_ghash_vis3:
339	save	%sp,-112,%sp
340	nop
341	srln	%i3,0,%i3		! needed on v8+, "nop" on v9
342
343	ldx	[%i0+8],%g3		! load Xi
344	ldx	[%i0+0],%g4
345	ldx	[%i1+8],%o1	! load twisted H
346	ldx	[%i1+0],%o2
347
348	mov	0xE1,%l7
349	sllx	%l7,57,%o5		! 57 is not a typo
350	ldx	[%i1+16],%g5		! (0xE0·i)&0xff=0xA040608020C0E000
351
352	and	%i2,7,%l0
353	andn	%i2,7,%i2
354	sll	%l0,3,%l0
355	prefetch [%i2+63], 20
356	sub	%g0,%l0,%l1
357
358	xor	%o2,%o1,%o0		! Karatsuba pre-processing
359.Loop:
360	ldx	[%i2+8],%o3
361	brz,pt	%l0,1f
362	ldx	[%i2+0],%o4
363
364	ldx	[%i2+16],%g2		! align data
365	srlx	%o3,%l1,%g1
366	sllx	%o3,%l0,%o3
367	sllx	%o4,%l0,%o4
368	srlx	%g2,%l1,%g2
369	or	%g1,%o4,%o4
370	or	%g2,%o3,%o3
3711:
372	add	%i2,16,%i2
373	sub	%i3,16,%i3
374	xor	%g3,%o3,%o3
375	xor	%g4,%o4,%o4
376	prefetch [%i2+63], 20
377
378	.word	0x83b2e2a9 !xmulx	%o3,%o1,%g1
379	xor	%o3,%o4,%g3		! Karatsuba pre-processing
380	.word	0x85b0e2a8 !xmulx	%g3,%o0,%g2
381	.word	0x97b2e2c9 !xmulxhi	%o3,%o1,%o3
382	.word	0x87b0e2c8 !xmulxhi	%g3,%o0,%g3
383	.word	0x89b322ca !xmulxhi	%o4,%o2,%g4
384	.word	0x99b322aa !xmulx	%o4,%o2,%o4
385
386	sll	%g1,3,%o7
387	srlx	%g5,%o7,%o7		! ·0xE0 [implicit &(7<<3)]
388	xor	%g1,%o7,%o7
389	sllx	%o7,57,%o7		! (%g1·0xE1)<<1<<56 [implicit &0x7f]
390
391	xor	%g1,%g2,%g2		! Karatsuba post-processing
392	xor	%o3,%g3,%g3
393	 xor	%o7,%o3,%o3		! real destination is %g2
394	xor	%g4,%g3,%g3
395	xor	%o3,%g2,%g2
396	xor	%o4,%g3,%g3
397	xor	%o4,%g2,%g2
398
399	.word	0x97b062cd !xmulxhi	%g1,%o5,%o3		! ·0xE1<<1<<56
400	 xor	%g1,%g3,%g3
401	.word	0x83b0a2ad !xmulx	%g2,%o5,%g1
402	 xor	%g2,%g4,%g4
403	.word	0x85b0a2cd !xmulxhi	%g2,%o5,%g2
404
405	xor	%o3,%g3,%g3
406	xor	%g1,%g3,%g3
407	brnz,pt	%i3,.Loop
408	xor	%g2,%g4,%g4
409
410	stx	%g3,[%i0+8]		! save Xi
411	stx	%g4,[%i0+0]
412
413	ret
414	restore
415.type	gcm_ghash_vis3,#function
416.size	gcm_ghash_vis3,.-gcm_ghash_vis3
417.asciz	"GHASH for SPARCv9/VIS3, CRYPTOGAMS by <appro@openssl.org>"
418.align	4
419