xref: /netbsd-src/crypto/external/bsd/openssl/lib/libcrypto/arch/i386/rc4-586.S (revision 1b3d6f93806f8821fe459e13ad13e605b37c6d43)
1#include <machine/asm.h>
2.text
3.globl	RC4
4.type	RC4,@function
5.align	16
6RC4:
7.L_RC4_begin:
8	#ifdef __CET__
9
10.byte	243,15,30,251
11	#endif
12
13	pushl	%ebp
14	pushl	%ebx
15	pushl	%esi
16	pushl	%edi
17	movl	20(%esp),%edi
18	movl	24(%esp),%edx
19	movl	28(%esp),%esi
20	movl	32(%esp),%ebp
21	xorl	%eax,%eax
22	xorl	%ebx,%ebx
23	cmpl	$0,%edx
24	je	.L000abort
25	movb	(%edi),%al
26	movb	4(%edi),%bl
27	addl	$8,%edi
28	leal	(%esi,%edx,1),%ecx
29	subl	%esi,%ebp
30	movl	%ecx,24(%esp)
31	incb	%al
32	cmpl	$-1,256(%edi)
33	je	.L001RC4_CHAR
34	movl	(%edi,%eax,4),%ecx
35	andl	$-4,%edx
36	jz	.L002loop1
37	movl	%ebp,32(%esp)
38	testl	$-8,%edx
39	jz	.L003go4loop4
40	call	.L004PIC_me_up
41.L004PIC_me_up:
42	popl	%ebp
43	leal	OPENSSL_ia32cap_P-.L004PIC_me_up(%ebp),%ebp
44	btl	$26,(%ebp)
45	jnc	.L003go4loop4
46	movl	32(%esp),%ebp
47	andl	$-8,%edx
48	leal	-8(%esi,%edx,1),%edx
49	movl	%edx,-4(%edi)
50	addb	%cl,%bl
51	movl	(%edi,%ebx,4),%edx
52	movl	%ecx,(%edi,%ebx,4)
53	movl	%edx,(%edi,%eax,4)
54	incl	%eax
55	addl	%ecx,%edx
56	movzbl	%al,%eax
57	movzbl	%dl,%edx
58	movq	(%esi),%mm0
59	movl	(%edi,%eax,4),%ecx
60	movd	(%edi,%edx,4),%mm2
61	jmp	.L005loop_mmx_enter
62.align	16
63.L006loop_mmx:
64	addb	%cl,%bl
65	psllq	$56,%mm1
66	movl	(%edi,%ebx,4),%edx
67	movl	%ecx,(%edi,%ebx,4)
68	movl	%edx,(%edi,%eax,4)
69	incl	%eax
70	addl	%ecx,%edx
71	movzbl	%al,%eax
72	movzbl	%dl,%edx
73	pxor	%mm1,%mm2
74	movq	(%esi),%mm0
75	movq	%mm2,-8(%ebp,%esi,1)
76	movl	(%edi,%eax,4),%ecx
77	movd	(%edi,%edx,4),%mm2
78.L005loop_mmx_enter:
79	addb	%cl,%bl
80	movl	(%edi,%ebx,4),%edx
81	movl	%ecx,(%edi,%ebx,4)
82	movl	%edx,(%edi,%eax,4)
83	incl	%eax
84	addl	%ecx,%edx
85	movzbl	%al,%eax
86	movzbl	%dl,%edx
87	pxor	%mm0,%mm2
88	movl	(%edi,%eax,4),%ecx
89	movd	(%edi,%edx,4),%mm1
90	addb	%cl,%bl
91	psllq	$8,%mm1
92	movl	(%edi,%ebx,4),%edx
93	movl	%ecx,(%edi,%ebx,4)
94	movl	%edx,(%edi,%eax,4)
95	incl	%eax
96	addl	%ecx,%edx
97	movzbl	%al,%eax
98	movzbl	%dl,%edx
99	pxor	%mm1,%mm2
100	movl	(%edi,%eax,4),%ecx
101	movd	(%edi,%edx,4),%mm1
102	addb	%cl,%bl
103	psllq	$16,%mm1
104	movl	(%edi,%ebx,4),%edx
105	movl	%ecx,(%edi,%ebx,4)
106	movl	%edx,(%edi,%eax,4)
107	incl	%eax
108	addl	%ecx,%edx
109	movzbl	%al,%eax
110	movzbl	%dl,%edx
111	pxor	%mm1,%mm2
112	movl	(%edi,%eax,4),%ecx
113	movd	(%edi,%edx,4),%mm1
114	addb	%cl,%bl
115	psllq	$24,%mm1
116	movl	(%edi,%ebx,4),%edx
117	movl	%ecx,(%edi,%ebx,4)
118	movl	%edx,(%edi,%eax,4)
119	incl	%eax
120	addl	%ecx,%edx
121	movzbl	%al,%eax
122	movzbl	%dl,%edx
123	pxor	%mm1,%mm2
124	movl	(%edi,%eax,4),%ecx
125	movd	(%edi,%edx,4),%mm1
126	addb	%cl,%bl
127	psllq	$32,%mm1
128	movl	(%edi,%ebx,4),%edx
129	movl	%ecx,(%edi,%ebx,4)
130	movl	%edx,(%edi,%eax,4)
131	incl	%eax
132	addl	%ecx,%edx
133	movzbl	%al,%eax
134	movzbl	%dl,%edx
135	pxor	%mm1,%mm2
136	movl	(%edi,%eax,4),%ecx
137	movd	(%edi,%edx,4),%mm1
138	addb	%cl,%bl
139	psllq	$40,%mm1
140	movl	(%edi,%ebx,4),%edx
141	movl	%ecx,(%edi,%ebx,4)
142	movl	%edx,(%edi,%eax,4)
143	incl	%eax
144	addl	%ecx,%edx
145	movzbl	%al,%eax
146	movzbl	%dl,%edx
147	pxor	%mm1,%mm2
148	movl	(%edi,%eax,4),%ecx
149	movd	(%edi,%edx,4),%mm1
150	addb	%cl,%bl
151	psllq	$48,%mm1
152	movl	(%edi,%ebx,4),%edx
153	movl	%ecx,(%edi,%ebx,4)
154	movl	%edx,(%edi,%eax,4)
155	incl	%eax
156	addl	%ecx,%edx
157	movzbl	%al,%eax
158	movzbl	%dl,%edx
159	pxor	%mm1,%mm2
160	movl	(%edi,%eax,4),%ecx
161	movd	(%edi,%edx,4),%mm1
162	movl	%ebx,%edx
163	xorl	%ebx,%ebx
164	movb	%dl,%bl
165	cmpl	-4(%edi),%esi
166	leal	8(%esi),%esi
167	jb	.L006loop_mmx
168	psllq	$56,%mm1
169	pxor	%mm1,%mm2
170	movq	%mm2,-8(%ebp,%esi,1)
171	emms
172	cmpl	24(%esp),%esi
173	je	.L007done
174	jmp	.L002loop1
175.align	16
176.L003go4loop4:
177	leal	-4(%esi,%edx,1),%edx
178	movl	%edx,28(%esp)
179.L008loop4:
180	addb	%cl,%bl
181	movl	(%edi,%ebx,4),%edx
182	movl	%ecx,(%edi,%ebx,4)
183	movl	%edx,(%edi,%eax,4)
184	addl	%ecx,%edx
185	incb	%al
186	andl	$255,%edx
187	movl	(%edi,%eax,4),%ecx
188	movl	(%edi,%edx,4),%ebp
189	addb	%cl,%bl
190	movl	(%edi,%ebx,4),%edx
191	movl	%ecx,(%edi,%ebx,4)
192	movl	%edx,(%edi,%eax,4)
193	addl	%ecx,%edx
194	incb	%al
195	andl	$255,%edx
196	rorl	$8,%ebp
197	movl	(%edi,%eax,4),%ecx
198	orl	(%edi,%edx,4),%ebp
199	addb	%cl,%bl
200	movl	(%edi,%ebx,4),%edx
201	movl	%ecx,(%edi,%ebx,4)
202	movl	%edx,(%edi,%eax,4)
203	addl	%ecx,%edx
204	incb	%al
205	andl	$255,%edx
206	rorl	$8,%ebp
207	movl	(%edi,%eax,4),%ecx
208	orl	(%edi,%edx,4),%ebp
209	addb	%cl,%bl
210	movl	(%edi,%ebx,4),%edx
211	movl	%ecx,(%edi,%ebx,4)
212	movl	%edx,(%edi,%eax,4)
213	addl	%ecx,%edx
214	incb	%al
215	andl	$255,%edx
216	rorl	$8,%ebp
217	movl	32(%esp),%ecx
218	orl	(%edi,%edx,4),%ebp
219	rorl	$8,%ebp
220	xorl	(%esi),%ebp
221	cmpl	28(%esp),%esi
222	movl	%ebp,(%ecx,%esi,1)
223	leal	4(%esi),%esi
224	movl	(%edi,%eax,4),%ecx
225	jb	.L008loop4
226	cmpl	24(%esp),%esi
227	je	.L007done
228	movl	32(%esp),%ebp
229.align	16
230.L002loop1:
231	addb	%cl,%bl
232	movl	(%edi,%ebx,4),%edx
233	movl	%ecx,(%edi,%ebx,4)
234	movl	%edx,(%edi,%eax,4)
235	addl	%ecx,%edx
236	incb	%al
237	andl	$255,%edx
238	movl	(%edi,%edx,4),%edx
239	xorb	(%esi),%dl
240	leal	1(%esi),%esi
241	movl	(%edi,%eax,4),%ecx
242	cmpl	24(%esp),%esi
243	movb	%dl,-1(%ebp,%esi,1)
244	jb	.L002loop1
245	jmp	.L007done
246.align	16
247.L001RC4_CHAR:
248	movzbl	(%edi,%eax,1),%ecx
249.L009cloop1:
250	addb	%cl,%bl
251	movzbl	(%edi,%ebx,1),%edx
252	movb	%cl,(%edi,%ebx,1)
253	movb	%dl,(%edi,%eax,1)
254	addb	%cl,%dl
255	movzbl	(%edi,%edx,1),%edx
256	addb	$1,%al
257	xorb	(%esi),%dl
258	leal	1(%esi),%esi
259	movzbl	(%edi,%eax,1),%ecx
260	cmpl	24(%esp),%esi
261	movb	%dl,-1(%ebp,%esi,1)
262	jb	.L009cloop1
263.L007done:
264	decb	%al
265	movl	%ebx,-4(%edi)
266	movb	%al,-8(%edi)
267.L000abort:
268	popl	%edi
269	popl	%esi
270	popl	%ebx
271	popl	%ebp
272	ret
273.size	RC4,.-.L_RC4_begin
274.globl	RC4_set_key
275.type	RC4_set_key,@function
276.align	16
277RC4_set_key:
278.L_RC4_set_key_begin:
279	#ifdef __CET__
280
281.byte	243,15,30,251
282	#endif
283
284	pushl	%ebp
285	pushl	%ebx
286	pushl	%esi
287	pushl	%edi
288	movl	20(%esp),%edi
289	movl	24(%esp),%ebp
290	movl	28(%esp),%esi
291	call	.L010PIC_me_up
292.L010PIC_me_up:
293	popl	%edx
294	leal	OPENSSL_ia32cap_P-.L010PIC_me_up(%edx),%edx
295	leal	8(%edi),%edi
296	leal	(%esi,%ebp,1),%esi
297	negl	%ebp
298	xorl	%eax,%eax
299	movl	%ebp,-4(%edi)
300	btl	$20,(%edx)
301	jc	.L011c1stloop
302.align	16
303.L012w1stloop:
304	movl	%eax,(%edi,%eax,4)
305	addb	$1,%al
306	jnc	.L012w1stloop
307	xorl	%ecx,%ecx
308	xorl	%edx,%edx
309.align	16
310.L013w2ndloop:
311	movl	(%edi,%ecx,4),%eax
312	addb	(%esi,%ebp,1),%dl
313	addb	%al,%dl
314	addl	$1,%ebp
315	movl	(%edi,%edx,4),%ebx
316	jnz	.L014wnowrap
317	movl	-4(%edi),%ebp
318.L014wnowrap:
319	movl	%eax,(%edi,%edx,4)
320	movl	%ebx,(%edi,%ecx,4)
321	addb	$1,%cl
322	jnc	.L013w2ndloop
323	jmp	.L015exit
324.align	16
325.L011c1stloop:
326	movb	%al,(%edi,%eax,1)
327	addb	$1,%al
328	jnc	.L011c1stloop
329	xorl	%ecx,%ecx
330	xorl	%edx,%edx
331	xorl	%ebx,%ebx
332.align	16
333.L016c2ndloop:
334	movb	(%edi,%ecx,1),%al
335	addb	(%esi,%ebp,1),%dl
336	addb	%al,%dl
337	addl	$1,%ebp
338	movb	(%edi,%edx,1),%bl
339	jnz	.L017cnowrap
340	movl	-4(%edi),%ebp
341.L017cnowrap:
342	movb	%al,(%edi,%edx,1)
343	movb	%bl,(%edi,%ecx,1)
344	addb	$1,%cl
345	jnc	.L016c2ndloop
346	movl	$-1,256(%edi)
347.L015exit:
348	xorl	%eax,%eax
349	movl	%eax,-8(%edi)
350	movl	%eax,-4(%edi)
351	popl	%edi
352	popl	%esi
353	popl	%ebx
354	popl	%ebp
355	ret
356.size	RC4_set_key,.-.L_RC4_set_key_begin
357.globl	RC4_options
358.type	RC4_options,@function
359.align	16
360RC4_options:
361.L_RC4_options_begin:
362	#ifdef __CET__
363
364.byte	243,15,30,251
365	#endif
366
367	call	.L018pic_point
368.L018pic_point:
369	popl	%eax
370	leal	.L019opts-.L018pic_point(%eax),%eax
371	call	.L020PIC_me_up
372.L020PIC_me_up:
373	popl	%edx
374	leal	OPENSSL_ia32cap_P-.L020PIC_me_up(%edx),%edx
375	movl	(%edx),%edx
376	btl	$20,%edx
377	jc	.L0211xchar
378	btl	$26,%edx
379	jnc	.L022ret
380	addl	$25,%eax
381	ret
382.L0211xchar:
383	addl	$12,%eax
384.L022ret:
385	ret
386.align	64
387.L019opts:
388.byte	114,99,52,40,52,120,44,105,110,116,41,0
389.byte	114,99,52,40,49,120,44,99,104,97,114,41,0
390.byte	114,99,52,40,56,120,44,109,109,120,41,0
391.byte	82,67,52,32,102,111,114,32,120,56,54,44,32,67,82,89
392.byte	80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114
393.byte	111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
394.align	64
395.size	RC4_options,.-.L_RC4_options_begin
396.comm	OPENSSL_ia32cap_P,16,4
397
398	.section ".note.gnu.property", "a"
399	.p2align 2
400	.long 1f - 0f
401	.long 4f - 1f
402	.long 5
4030:
404	.asciz "GNU"
4051:
406	.p2align 2
407	.long 0xc0000002
408	.long 3f - 2f
4092:
410	.long 3
4113:
412	.p2align 2
4134:
414