xref: /netbsd-src/crypto/external/bsd/openssl.old/lib/libcrypto/arch/sparc64/ghash-sparcv9.S (revision c38e7cc395b1472a774ff828e46123de44c628e9)
1.register	%g2,#scratch
2.register	%g3,#scratch
3.section	".text",#alloc,#execinstr
4
5.align	64
6rem_4bit:
7	.long	0,0,471859200,0,943718400,0,610271232,0
8	.long	1887436800,0,1822425088,0,1220542464,0,1423966208,0
9	.long	3774873600,0,4246732800,0,3644850176,0,3311403008,0
10	.long	2441084928,0,2376073216,0,2847932416,0,3051356160,0
11.type	rem_4bit,#object
12.size	rem_4bit,(.-rem_4bit)
13
14.globl	gcm_ghash_4bit
15.align	32
16gcm_ghash_4bit:
17	save	%sp,-192,%sp
18	ldub	[%i2+15],%l1
19	ldub	[%i0+15],%l2
20	ldub	[%i0+14],%l3
21	add	%i3,%i2,%i3
22	add	%i1,8,%l6
23
241:	call	.+8
25	add	%o7,rem_4bit-1b,%l4
26
27.Louter:
28	xor	%l2,%l1,%l1
29	and	%l1,0xf0,%l0
30	and	%l1,0x0f,%l1
31	sll	%l1,4,%l1
32	ldx	[%l6+%l1],%o1
33	ldx	[%i1+%l1],%o0
34
35	ldub	[%i2+14],%l1
36
37	ldx	[%l6+%l0],%o3
38	and	%o1,0xf,%l5
39	ldx	[%i1+%l0],%o2
40	sll	%l5,3,%l5
41	ldx	[%l4+%l5],%o4
42	srlx	%o1,4,%o1
43	mov	13,%l7
44	sllx	%o0,60,%o5
45	xor	%o3,%o1,%o1
46	srlx	%o0,4,%o0
47	xor	%o1,%o5,%o1
48
49	xor	%l3,%l1,%l1
50	and	%o1,0xf,%l5
51	and	%l1,0xf0,%l0
52	and	%l1,0x0f,%l1
53	ba	.Lghash_inner
54	sll	%l1,4,%l1
55.align	32
56.Lghash_inner:
57	ldx	[%l6+%l1],%o3
58	sll	%l5,3,%l5
59	xor	%o2,%o0,%o0
60	ldx	[%i1+%l1],%o2
61	srlx	%o1,4,%o1
62	xor	%o4,%o0,%o0
63	ldx	[%l4+%l5],%o4
64	sllx	%o0,60,%o5
65	xor	%o3,%o1,%o1
66	ldub	[%i2+%l7],%l1
67	srlx	%o0,4,%o0
68	xor	%o1,%o5,%o1
69	ldub	[%i0+%l7],%l3
70	xor	%o2,%o0,%o0
71	and	%o1,0xf,%l5
72
73	ldx	[%l6+%l0],%o3
74	sll	%l5,3,%l5
75	xor	%o4,%o0,%o0
76	ldx	[%i1+%l0],%o2
77	srlx	%o1,4,%o1
78	ldx	[%l4+%l5],%o4
79	sllx	%o0,60,%o5
80	xor	%l3,%l1,%l1
81	srlx	%o0,4,%o0
82	and	%l1,0xf0,%l0
83	addcc	%l7,-1,%l7
84	xor	%o1,%o5,%o1
85	and	%l1,0x0f,%l1
86	xor	%o3,%o1,%o1
87	sll	%l1,4,%l1
88	blu	.Lghash_inner
89	and	%o1,0xf,%l5
90
91	ldx	[%l6+%l1],%o3
92	sll	%l5,3,%l5
93	xor	%o2,%o0,%o0
94	ldx	[%i1+%l1],%o2
95	srlx	%o1,4,%o1
96	xor	%o4,%o0,%o0
97	ldx	[%l4+%l5],%o4
98	sllx	%o0,60,%o5
99	xor	%o3,%o1,%o1
100	srlx	%o0,4,%o0
101	xor	%o1,%o5,%o1
102	xor	%o2,%o0,%o0
103
104	add	%i2,16,%i2
105	cmp	%i2,%i3
106	be,pn	%xcc,.Ldone
107	and	%o1,0xf,%l5
108
109	ldx	[%l6+%l0],%o3
110	sll	%l5,3,%l5
111	xor	%o4,%o0,%o0
112	ldx	[%i1+%l0],%o2
113	srlx	%o1,4,%o1
114	ldx	[%l4+%l5],%o4
115	sllx	%o0,60,%o5
116	xor	%o3,%o1,%o1
117	ldub	[%i2+15],%l1
118	srlx	%o0,4,%o0
119	xor	%o1,%o5,%o1
120	xor	%o2,%o0,%o0
121	stx	%o1,[%i0+8]
122	xor	%o4,%o0,%o0
123	stx	%o0,[%i0]
124	srl	%o1,8,%l3
125	and	%o1,0xff,%l2
126	ba	.Louter
127	and	%l3,0xff,%l3
128.align	32
129.Ldone:
130	ldx	[%l6+%l0],%o3
131	sll	%l5,3,%l5
132	xor	%o4,%o0,%o0
133	ldx	[%i1+%l0],%o2
134	srlx	%o1,4,%o1
135	ldx	[%l4+%l5],%o4
136	sllx	%o0,60,%o5
137	xor	%o3,%o1,%o1
138	srlx	%o0,4,%o0
139	xor	%o1,%o5,%o1
140	xor	%o2,%o0,%o0
141	stx	%o1,[%i0+8]
142	xor	%o4,%o0,%o0
143	stx	%o0,[%i0]
144
145	ret
146	restore
147.type	gcm_ghash_4bit,#function
148.size	gcm_ghash_4bit,(.-gcm_ghash_4bit)
149.globl	gcm_gmult_4bit
150.align	32
151gcm_gmult_4bit:
152	save	%sp,-192,%sp
153	ldub	[%i0+15],%l1
154	add	%i1,8,%l6
155
1561:	call	.+8
157	add	%o7,rem_4bit-1b,%l4
158
159	and	%l1,0xf0,%l0
160	and	%l1,0x0f,%l1
161	sll	%l1,4,%l1
162	ldx	[%l6+%l1],%o1
163	ldx	[%i1+%l1],%o0
164
165	ldub	[%i0+14],%l1
166
167	ldx	[%l6+%l0],%o3
168	and	%o1,0xf,%l5
169	ldx	[%i1+%l0],%o2
170	sll	%l5,3,%l5
171	ldx	[%l4+%l5],%o4
172	srlx	%o1,4,%o1
173	mov	13,%l7
174	sllx	%o0,60,%o5
175	xor	%o3,%o1,%o1
176	srlx	%o0,4,%o0
177	xor	%o1,%o5,%o1
178
179	and	%o1,0xf,%l5
180	and	%l1,0xf0,%l0
181	and	%l1,0x0f,%l1
182	ba	.Lgmult_inner
183	sll	%l1,4,%l1
184.align	32
185.Lgmult_inner:
186	ldx	[%l6+%l1],%o3
187	sll	%l5,3,%l5
188	xor	%o2,%o0,%o0
189	ldx	[%i1+%l1],%o2
190	srlx	%o1,4,%o1
191	xor	%o4,%o0,%o0
192	ldx	[%l4+%l5],%o4
193	sllx	%o0,60,%o5
194	xor	%o3,%o1,%o1
195	ldub	[%i0+%l7],%l1
196	srlx	%o0,4,%o0
197	xor	%o1,%o5,%o1
198	xor	%o2,%o0,%o0
199	and	%o1,0xf,%l5
200
201	ldx	[%l6+%l0],%o3
202	sll	%l5,3,%l5
203	xor	%o4,%o0,%o0
204	ldx	[%i1+%l0],%o2
205	srlx	%o1,4,%o1
206	ldx	[%l4+%l5],%o4
207	sllx	%o0,60,%o5
208	srlx	%o0,4,%o0
209	and	%l1,0xf0,%l0
210	addcc	%l7,-1,%l7
211	xor	%o1,%o5,%o1
212	and	%l1,0x0f,%l1
213	xor	%o3,%o1,%o1
214	sll	%l1,4,%l1
215	blu	.Lgmult_inner
216	and	%o1,0xf,%l5
217
218	ldx	[%l6+%l1],%o3
219	sll	%l5,3,%l5
220	xor	%o2,%o0,%o0
221	ldx	[%i1+%l1],%o2
222	srlx	%o1,4,%o1
223	xor	%o4,%o0,%o0
224	ldx	[%l4+%l5],%o4
225	sllx	%o0,60,%o5
226	xor	%o3,%o1,%o1
227	srlx	%o0,4,%o0
228	xor	%o1,%o5,%o1
229	xor	%o2,%o0,%o0
230	and	%o1,0xf,%l5
231
232	ldx	[%l6+%l0],%o3
233	sll	%l5,3,%l5
234	xor	%o4,%o0,%o0
235	ldx	[%i1+%l0],%o2
236	srlx	%o1,4,%o1
237	ldx	[%l4+%l5],%o4
238	sllx	%o0,60,%o5
239	xor	%o3,%o1,%o1
240	srlx	%o0,4,%o0
241	xor	%o1,%o5,%o1
242	xor	%o2,%o0,%o0
243	stx	%o1,[%i0+8]
244	xor	%o4,%o0,%o0
245	stx	%o0,[%i0]
246
247	ret
248	restore
249.type	gcm_gmult_4bit,#function
250.size	gcm_gmult_4bit,(.-gcm_gmult_4bit)
251.globl	gcm_init_vis3
252.align	32
253gcm_init_vis3:
254	save	%sp,-192,%sp
255
256	ldx	[%i1+0],%o2
257	ldx	[%i1+8],%o1
258	mov	0xE1,%o4
259	mov	1,%o3
260	sllx	%o4,57,%o4
261	srax	%o2,63,%g1		! broadcast carry
262	addcc	%o1,%o1,%o1		! H<<=1
263	.word	0x95b2822a !addxc	%o2,%o2,%o2
264	and	%g1,%o3,%o3
265	and	%g1,%o4,%o4
266	xor	%o3,%o1,%o1
267	xor	%o4,%o2,%o2
268	stx	%o1,[%i0+8]		! save twisted H
269	stx	%o2,[%i0+0]
270
271	sethi	%hi(0xA0406080),%g5
272	sethi	%hi(0x20C0E000),%l0
273	or	%g5,%lo(0xA0406080),%g5
274	or	%l0,%lo(0x20C0E000),%l0
275	sllx	%g5,32,%g5
276	or	%l0,%g5,%g5		! (0xE0·i)&0xff=0xA040608020C0E000
277	stx	%g5,[%i0+16]
278
279	ret
280	restore
281.type	gcm_init_vis3,#function
282.size	gcm_init_vis3,.-gcm_init_vis3
283
284.globl	gcm_gmult_vis3
285.align	32
286gcm_gmult_vis3:
287	save	%sp,-192,%sp
288
289	ldx	[%i0+8],%o3		! load Xi
290	ldx	[%i0+0],%o4
291	ldx	[%i1+8],%o1	! load twisted H
292	ldx	[%i1+0],%o2
293
294	mov	0xE1,%l7
295	sllx	%l7,57,%o5		! 57 is not a typo
296	ldx	[%i1+16],%g5		! (0xE0·i)&0xff=0xA040608020C0E000
297
298	xor	%o2,%o1,%o0		! Karatsuba pre-processing
299	.word	0x83b2e2a9 !xmulx	%o3,%o1,%g1
300	xor	%o3,%o4,%g3		! Karatsuba pre-processing
301	.word	0x85b0e2a8 !xmulx	%g3,%o0,%g2
302	.word	0x97b2e2c9 !xmulxhi	%o3,%o1,%o3
303	.word	0x87b0e2c8 !xmulxhi	%g3,%o0,%g3
304	.word	0x89b322ca !xmulxhi	%o4,%o2,%g4
305	.word	0x99b322aa !xmulx	%o4,%o2,%o4
306
307	sll	%g1,3,%o7
308	srlx	%g5,%o7,%o7		! ·0xE0 [implicit &(7<<3)]
309	xor	%g1,%o7,%o7
310	sllx	%o7,57,%o7		! (%g1·0xE1)<<1<<56 [implicit &0x7f]
311
312	xor	%g1,%g2,%g2		! Karatsuba post-processing
313	xor	%o3,%g3,%g3
314	 xor	%o7,%o3,%o3		! real destination is %g2
315	xor	%g4,%g3,%g3
316	xor	%o3,%g2,%g2
317	xor	%o4,%g3,%g3
318	xor	%o4,%g2,%g2
319
320	.word	0x97b062cd !xmulxhi	%g1,%o5,%o3		! ·0xE1<<1<<56
321	 xor	%g1,%g3,%g3
322	.word	0x83b0a2ad !xmulx	%g2,%o5,%g1
323	 xor	%g2,%g4,%g4
324	.word	0x85b0a2cd !xmulxhi	%g2,%o5,%g2
325
326	xor	%o3,%g3,%g3
327	xor	%g1,%g3,%g3
328	xor	%g2,%g4,%g4
329
330	stx	%g3,[%i0+8]		! save Xi
331	stx	%g4,[%i0+0]
332
333	ret
334	restore
335.type	gcm_gmult_vis3,#function
336.size	gcm_gmult_vis3,.-gcm_gmult_vis3
337
338.globl	gcm_ghash_vis3
339.align	32
340gcm_ghash_vis3:
341	save	%sp,-192,%sp
342	nop
343	srln	%i3,0,%i3		! needed on v8+, "nop" on v9
344
345	ldx	[%i0+8],%g3		! load Xi
346	ldx	[%i0+0],%g4
347	ldx	[%i1+8],%o1	! load twisted H
348	ldx	[%i1+0],%o2
349
350	mov	0xE1,%l7
351	sllx	%l7,57,%o5		! 57 is not a typo
352	ldx	[%i1+16],%g5		! (0xE0·i)&0xff=0xA040608020C0E000
353
354	and	%i2,7,%l0
355	andn	%i2,7,%i2
356	sll	%l0,3,%l0
357	prefetch [%i2+63], 20
358	sub	%g0,%l0,%l1
359
360	xor	%o2,%o1,%o0		! Karatsuba pre-processing
361.Loop:
362	ldx	[%i2+8],%o3
363	brz,pt	%l0,1f
364	ldx	[%i2+0],%o4
365
366	ldx	[%i2+16],%g2		! align data
367	srlx	%o3,%l1,%g1
368	sllx	%o3,%l0,%o3
369	sllx	%o4,%l0,%o4
370	srlx	%g2,%l1,%g2
371	or	%g1,%o4,%o4
372	or	%g2,%o3,%o3
3731:
374	add	%i2,16,%i2
375	sub	%i3,16,%i3
376	xor	%g3,%o3,%o3
377	xor	%g4,%o4,%o4
378	prefetch [%i2+63], 20
379
380	.word	0x83b2e2a9 !xmulx	%o3,%o1,%g1
381	xor	%o3,%o4,%g3		! Karatsuba pre-processing
382	.word	0x85b0e2a8 !xmulx	%g3,%o0,%g2
383	.word	0x97b2e2c9 !xmulxhi	%o3,%o1,%o3
384	.word	0x87b0e2c8 !xmulxhi	%g3,%o0,%g3
385	.word	0x89b322ca !xmulxhi	%o4,%o2,%g4
386	.word	0x99b322aa !xmulx	%o4,%o2,%o4
387
388	sll	%g1,3,%o7
389	srlx	%g5,%o7,%o7		! ·0xE0 [implicit &(7<<3)]
390	xor	%g1,%o7,%o7
391	sllx	%o7,57,%o7		! (%g1·0xE1)<<1<<56 [implicit &0x7f]
392
393	xor	%g1,%g2,%g2		! Karatsuba post-processing
394	xor	%o3,%g3,%g3
395	 xor	%o7,%o3,%o3		! real destination is %g2
396	xor	%g4,%g3,%g3
397	xor	%o3,%g2,%g2
398	xor	%o4,%g3,%g3
399	xor	%o4,%g2,%g2
400
401	.word	0x97b062cd !xmulxhi	%g1,%o5,%o3		! ·0xE1<<1<<56
402	 xor	%g1,%g3,%g3
403	.word	0x83b0a2ad !xmulx	%g2,%o5,%g1
404	 xor	%g2,%g4,%g4
405	.word	0x85b0a2cd !xmulxhi	%g2,%o5,%g2
406
407	xor	%o3,%g3,%g3
408	xor	%g1,%g3,%g3
409	brnz,pt	%i3,.Loop
410	xor	%g2,%g4,%g4
411
412	stx	%g3,[%i0+8]		! save Xi
413	stx	%g4,[%i0+0]
414
415	ret
416	restore
417.type	gcm_ghash_vis3,#function
418.size	gcm_ghash_vis3,.-gcm_ghash_vis3
419.asciz	"GHASH for SPARCv9/VIS3, CRYPTOGAMS by <appro@openssl.org>"
420.align	4
421