xref: /netbsd-src/crypto/external/bsd/openssl.old/lib/libcrypto/arch/i386/rc4-586.S (revision 4724848cf0da353df257f730694b7882798e5daf)
1#include <machine/asm.h>
2.text
3.globl	RC4
4.type	RC4,@function
5.align	16
6RC4:
7.L_RC4_begin:
8	pushl	%ebp
9	pushl	%ebx
10	pushl	%esi
11	pushl	%edi
12	movl	20(%esp),%edi
13	movl	24(%esp),%edx
14	movl	28(%esp),%esi
15	movl	32(%esp),%ebp
16	xorl	%eax,%eax
17	xorl	%ebx,%ebx
18	cmpl	$0,%edx
19	je	.L000abort
20	movb	(%edi),%al
21	movb	4(%edi),%bl
22	addl	$8,%edi
23	leal	(%esi,%edx,1),%ecx
24	subl	%esi,%ebp
25	movl	%ecx,24(%esp)
26	incb	%al
27	cmpl	$-1,256(%edi)
28	je	.L001RC4_CHAR
29	movl	(%edi,%eax,4),%ecx
30	andl	$-4,%edx
31	jz	.L002loop1
32	movl	%ebp,32(%esp)
33	testl	$-8,%edx
34	jz	.L003go4loop4
35	call	.L004PIC_me_up
36.L004PIC_me_up:
37	popl	%ebp
38	leal	OPENSSL_ia32cap_P-.L004PIC_me_up(%ebp),%ebp
39	btl	$26,(%ebp)
40	jnc	.L003go4loop4
41	movl	32(%esp),%ebp
42	andl	$-8,%edx
43	leal	-8(%esi,%edx,1),%edx
44	movl	%edx,-4(%edi)
45	addb	%cl,%bl
46	movl	(%edi,%ebx,4),%edx
47	movl	%ecx,(%edi,%ebx,4)
48	movl	%edx,(%edi,%eax,4)
49	incl	%eax
50	addl	%ecx,%edx
51	movzbl	%al,%eax
52	movzbl	%dl,%edx
53	movq	(%esi),%mm0
54	movl	(%edi,%eax,4),%ecx
55	movd	(%edi,%edx,4),%mm2
56	jmp	.L005loop_mmx_enter
57.align	16
58.L006loop_mmx:
59	addb	%cl,%bl
60	psllq	$56,%mm1
61	movl	(%edi,%ebx,4),%edx
62	movl	%ecx,(%edi,%ebx,4)
63	movl	%edx,(%edi,%eax,4)
64	incl	%eax
65	addl	%ecx,%edx
66	movzbl	%al,%eax
67	movzbl	%dl,%edx
68	pxor	%mm1,%mm2
69	movq	(%esi),%mm0
70	movq	%mm2,-8(%ebp,%esi,1)
71	movl	(%edi,%eax,4),%ecx
72	movd	(%edi,%edx,4),%mm2
73.L005loop_mmx_enter:
74	addb	%cl,%bl
75	movl	(%edi,%ebx,4),%edx
76	movl	%ecx,(%edi,%ebx,4)
77	movl	%edx,(%edi,%eax,4)
78	incl	%eax
79	addl	%ecx,%edx
80	movzbl	%al,%eax
81	movzbl	%dl,%edx
82	pxor	%mm0,%mm2
83	movl	(%edi,%eax,4),%ecx
84	movd	(%edi,%edx,4),%mm1
85	addb	%cl,%bl
86	psllq	$8,%mm1
87	movl	(%edi,%ebx,4),%edx
88	movl	%ecx,(%edi,%ebx,4)
89	movl	%edx,(%edi,%eax,4)
90	incl	%eax
91	addl	%ecx,%edx
92	movzbl	%al,%eax
93	movzbl	%dl,%edx
94	pxor	%mm1,%mm2
95	movl	(%edi,%eax,4),%ecx
96	movd	(%edi,%edx,4),%mm1
97	addb	%cl,%bl
98	psllq	$16,%mm1
99	movl	(%edi,%ebx,4),%edx
100	movl	%ecx,(%edi,%ebx,4)
101	movl	%edx,(%edi,%eax,4)
102	incl	%eax
103	addl	%ecx,%edx
104	movzbl	%al,%eax
105	movzbl	%dl,%edx
106	pxor	%mm1,%mm2
107	movl	(%edi,%eax,4),%ecx
108	movd	(%edi,%edx,4),%mm1
109	addb	%cl,%bl
110	psllq	$24,%mm1
111	movl	(%edi,%ebx,4),%edx
112	movl	%ecx,(%edi,%ebx,4)
113	movl	%edx,(%edi,%eax,4)
114	incl	%eax
115	addl	%ecx,%edx
116	movzbl	%al,%eax
117	movzbl	%dl,%edx
118	pxor	%mm1,%mm2
119	movl	(%edi,%eax,4),%ecx
120	movd	(%edi,%edx,4),%mm1
121	addb	%cl,%bl
122	psllq	$32,%mm1
123	movl	(%edi,%ebx,4),%edx
124	movl	%ecx,(%edi,%ebx,4)
125	movl	%edx,(%edi,%eax,4)
126	incl	%eax
127	addl	%ecx,%edx
128	movzbl	%al,%eax
129	movzbl	%dl,%edx
130	pxor	%mm1,%mm2
131	movl	(%edi,%eax,4),%ecx
132	movd	(%edi,%edx,4),%mm1
133	addb	%cl,%bl
134	psllq	$40,%mm1
135	movl	(%edi,%ebx,4),%edx
136	movl	%ecx,(%edi,%ebx,4)
137	movl	%edx,(%edi,%eax,4)
138	incl	%eax
139	addl	%ecx,%edx
140	movzbl	%al,%eax
141	movzbl	%dl,%edx
142	pxor	%mm1,%mm2
143	movl	(%edi,%eax,4),%ecx
144	movd	(%edi,%edx,4),%mm1
145	addb	%cl,%bl
146	psllq	$48,%mm1
147	movl	(%edi,%ebx,4),%edx
148	movl	%ecx,(%edi,%ebx,4)
149	movl	%edx,(%edi,%eax,4)
150	incl	%eax
151	addl	%ecx,%edx
152	movzbl	%al,%eax
153	movzbl	%dl,%edx
154	pxor	%mm1,%mm2
155	movl	(%edi,%eax,4),%ecx
156	movd	(%edi,%edx,4),%mm1
157	movl	%ebx,%edx
158	xorl	%ebx,%ebx
159	movb	%dl,%bl
160	cmpl	-4(%edi),%esi
161	leal	8(%esi),%esi
162	jb	.L006loop_mmx
163	psllq	$56,%mm1
164	pxor	%mm1,%mm2
165	movq	%mm2,-8(%ebp,%esi,1)
166	emms
167	cmpl	24(%esp),%esi
168	je	.L007done
169	jmp	.L002loop1
170.align	16
171.L003go4loop4:
172	leal	-4(%esi,%edx,1),%edx
173	movl	%edx,28(%esp)
174.L008loop4:
175	addb	%cl,%bl
176	movl	(%edi,%ebx,4),%edx
177	movl	%ecx,(%edi,%ebx,4)
178	movl	%edx,(%edi,%eax,4)
179	addl	%ecx,%edx
180	incb	%al
181	andl	$255,%edx
182	movl	(%edi,%eax,4),%ecx
183	movl	(%edi,%edx,4),%ebp
184	addb	%cl,%bl
185	movl	(%edi,%ebx,4),%edx
186	movl	%ecx,(%edi,%ebx,4)
187	movl	%edx,(%edi,%eax,4)
188	addl	%ecx,%edx
189	incb	%al
190	andl	$255,%edx
191	rorl	$8,%ebp
192	movl	(%edi,%eax,4),%ecx
193	orl	(%edi,%edx,4),%ebp
194	addb	%cl,%bl
195	movl	(%edi,%ebx,4),%edx
196	movl	%ecx,(%edi,%ebx,4)
197	movl	%edx,(%edi,%eax,4)
198	addl	%ecx,%edx
199	incb	%al
200	andl	$255,%edx
201	rorl	$8,%ebp
202	movl	(%edi,%eax,4),%ecx
203	orl	(%edi,%edx,4),%ebp
204	addb	%cl,%bl
205	movl	(%edi,%ebx,4),%edx
206	movl	%ecx,(%edi,%ebx,4)
207	movl	%edx,(%edi,%eax,4)
208	addl	%ecx,%edx
209	incb	%al
210	andl	$255,%edx
211	rorl	$8,%ebp
212	movl	32(%esp),%ecx
213	orl	(%edi,%edx,4),%ebp
214	rorl	$8,%ebp
215	xorl	(%esi),%ebp
216	cmpl	28(%esp),%esi
217	movl	%ebp,(%ecx,%esi,1)
218	leal	4(%esi),%esi
219	movl	(%edi,%eax,4),%ecx
220	jb	.L008loop4
221	cmpl	24(%esp),%esi
222	je	.L007done
223	movl	32(%esp),%ebp
224.align	16
225.L002loop1:
226	addb	%cl,%bl
227	movl	(%edi,%ebx,4),%edx
228	movl	%ecx,(%edi,%ebx,4)
229	movl	%edx,(%edi,%eax,4)
230	addl	%ecx,%edx
231	incb	%al
232	andl	$255,%edx
233	movl	(%edi,%edx,4),%edx
234	xorb	(%esi),%dl
235	leal	1(%esi),%esi
236	movl	(%edi,%eax,4),%ecx
237	cmpl	24(%esp),%esi
238	movb	%dl,-1(%ebp,%esi,1)
239	jb	.L002loop1
240	jmp	.L007done
241.align	16
242.L001RC4_CHAR:
243	movzbl	(%edi,%eax,1),%ecx
244.L009cloop1:
245	addb	%cl,%bl
246	movzbl	(%edi,%ebx,1),%edx
247	movb	%cl,(%edi,%ebx,1)
248	movb	%dl,(%edi,%eax,1)
249	addb	%cl,%dl
250	movzbl	(%edi,%edx,1),%edx
251	addb	$1,%al
252	xorb	(%esi),%dl
253	leal	1(%esi),%esi
254	movzbl	(%edi,%eax,1),%ecx
255	cmpl	24(%esp),%esi
256	movb	%dl,-1(%ebp,%esi,1)
257	jb	.L009cloop1
258.L007done:
259	decb	%al
260	movl	%ebx,-4(%edi)
261	movb	%al,-8(%edi)
262.L000abort:
263	popl	%edi
264	popl	%esi
265	popl	%ebx
266	popl	%ebp
267	ret
268.size	RC4,.-.L_RC4_begin
269.globl	RC4_set_key
270.type	RC4_set_key,@function
271.align	16
272RC4_set_key:
273.L_RC4_set_key_begin:
274	pushl	%ebp
275	pushl	%ebx
276	pushl	%esi
277	pushl	%edi
278	movl	20(%esp),%edi
279	movl	24(%esp),%ebp
280	movl	28(%esp),%esi
281	call	.L010PIC_me_up
282.L010PIC_me_up:
283	popl	%edx
284	leal	OPENSSL_ia32cap_P-.L010PIC_me_up(%edx),%edx
285	leal	8(%edi),%edi
286	leal	(%esi,%ebp,1),%esi
287	negl	%ebp
288	xorl	%eax,%eax
289	movl	%ebp,-4(%edi)
290	btl	$20,(%edx)
291	jc	.L011c1stloop
292.align	16
293.L012w1stloop:
294	movl	%eax,(%edi,%eax,4)
295	addb	$1,%al
296	jnc	.L012w1stloop
297	xorl	%ecx,%ecx
298	xorl	%edx,%edx
299.align	16
300.L013w2ndloop:
301	movl	(%edi,%ecx,4),%eax
302	addb	(%esi,%ebp,1),%dl
303	addb	%al,%dl
304	addl	$1,%ebp
305	movl	(%edi,%edx,4),%ebx
306	jnz	.L014wnowrap
307	movl	-4(%edi),%ebp
308.L014wnowrap:
309	movl	%eax,(%edi,%edx,4)
310	movl	%ebx,(%edi,%ecx,4)
311	addb	$1,%cl
312	jnc	.L013w2ndloop
313	jmp	.L015exit
314.align	16
315.L011c1stloop:
316	movb	%al,(%edi,%eax,1)
317	addb	$1,%al
318	jnc	.L011c1stloop
319	xorl	%ecx,%ecx
320	xorl	%edx,%edx
321	xorl	%ebx,%ebx
322.align	16
323.L016c2ndloop:
324	movb	(%edi,%ecx,1),%al
325	addb	(%esi,%ebp,1),%dl
326	addb	%al,%dl
327	addl	$1,%ebp
328	movb	(%edi,%edx,1),%bl
329	jnz	.L017cnowrap
330	movl	-4(%edi),%ebp
331.L017cnowrap:
332	movb	%al,(%edi,%edx,1)
333	movb	%bl,(%edi,%ecx,1)
334	addb	$1,%cl
335	jnc	.L016c2ndloop
336	movl	$-1,256(%edi)
337.L015exit:
338	xorl	%eax,%eax
339	movl	%eax,-8(%edi)
340	movl	%eax,-4(%edi)
341	popl	%edi
342	popl	%esi
343	popl	%ebx
344	popl	%ebp
345	ret
346.size	RC4_set_key,.-.L_RC4_set_key_begin
347.globl	RC4_options
348.type	RC4_options,@function
349.align	16
350RC4_options:
351.L_RC4_options_begin:
352	call	.L018pic_point
353.L018pic_point:
354	popl	%eax
355	leal	.L019opts-.L018pic_point(%eax),%eax
356	call	.L020PIC_me_up
357.L020PIC_me_up:
358	popl	%edx
359	leal	OPENSSL_ia32cap_P-.L020PIC_me_up(%edx),%edx
360	movl	(%edx),%edx
361	btl	$20,%edx
362	jc	.L0211xchar
363	btl	$26,%edx
364	jnc	.L022ret
365	addl	$25,%eax
366	ret
367.L0211xchar:
368	addl	$12,%eax
369.L022ret:
370	ret
371.align	64
372.L019opts:
373.byte	114,99,52,40,52,120,44,105,110,116,41,0
374.byte	114,99,52,40,49,120,44,99,104,97,114,41,0
375.byte	114,99,52,40,56,120,44,109,109,120,41,0
376.byte	82,67,52,32,102,111,114,32,120,56,54,44,32,67,82,89
377.byte	80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114
378.byte	111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
379.align	64
380.size	RC4_options,.-.L_RC4_options_begin
381.comm	OPENSSL_ia32cap_P,16,4
382