xref: /netbsd-src/crypto/external/bsd/openssl.old/lib/libcrypto/arch/x86_64/x86_64cpuid.S (revision 4724848cf0da353df257f730694b7882798e5daf)
1#include <machine/asm.h>
2
3.globl	OPENSSL_cpuid_setup
4.section	.init
5	call	PIC_PLT(OPENSSL_cpuid_setup)
6
7.hidden	OPENSSL_ia32cap_P
8.comm	OPENSSL_ia32cap_P,16,4
9
10.text
11
12.globl	OPENSSL_atomic_add
13.type	OPENSSL_atomic_add,@function
14.align	16
15OPENSSL_atomic_add:
16.cfi_startproc
17	movl	(%rdi),%eax
18.Lspin:	leaq	(%rsi,%rax,1),%r8
19.byte	0xf0
20	cmpxchgl	%r8d,(%rdi)
21	jne	.Lspin
22	movl	%r8d,%eax
23.byte	0x48,0x98
24	.byte	0xf3,0xc3
25.cfi_endproc
26.size	OPENSSL_atomic_add,.-OPENSSL_atomic_add
27
28.globl	OPENSSL_rdtsc
29.type	OPENSSL_rdtsc,@function
30.align	16
31OPENSSL_rdtsc:
32.cfi_startproc
33	rdtsc
34	shlq	$32,%rdx
35	orq	%rdx,%rax
36	.byte	0xf3,0xc3
37.cfi_endproc
38.size	OPENSSL_rdtsc,.-OPENSSL_rdtsc
39
40.globl	OPENSSL_ia32_cpuid
41.type	OPENSSL_ia32_cpuid,@function
42.align	16
43OPENSSL_ia32_cpuid:
44.cfi_startproc
45	movq	%rbx,%r8
46.cfi_register	%rbx,%r8
47
48	xorl	%eax,%eax
49	movq	%rax,8(%rdi)
50	cpuid
51	movl	%eax,%r11d
52
53	xorl	%eax,%eax
54	cmpl	$0x756e6547,%ebx
55	setne	%al
56	movl	%eax,%r9d
57	cmpl	$0x49656e69,%edx
58	setne	%al
59	orl	%eax,%r9d
60	cmpl	$0x6c65746e,%ecx
61	setne	%al
62	orl	%eax,%r9d
63	jz	.Lintel
64
65	cmpl	$0x68747541,%ebx
66	setne	%al
67	movl	%eax,%r10d
68	cmpl	$0x69746E65,%edx
69	setne	%al
70	orl	%eax,%r10d
71	cmpl	$0x444D4163,%ecx
72	setne	%al
73	orl	%eax,%r10d
74	jnz	.Lintel
75
76
77	movl	$0x80000000,%eax
78	cpuid
79	cmpl	$0x80000001,%eax
80	jb	.Lintel
81	movl	%eax,%r10d
82	movl	$0x80000001,%eax
83	cpuid
84	orl	%ecx,%r9d
85	andl	$0x00000801,%r9d
86
87	cmpl	$0x80000008,%r10d
88	jb	.Lintel
89
90	movl	$0x80000008,%eax
91	cpuid
92	movzbq	%cl,%r10
93	incq	%r10
94
95	movl	$1,%eax
96	cpuid
97	btl	$28,%edx
98	jnc	.Lgeneric
99	shrl	$16,%ebx
100	cmpb	%r10b,%bl
101	ja	.Lgeneric
102	andl	$0xefffffff,%edx
103	jmp	.Lgeneric
104
105.Lintel:
106	cmpl	$4,%r11d
107	movl	$-1,%r10d
108	jb	.Lnocacheinfo
109
110	movl	$4,%eax
111	movl	$0,%ecx
112	cpuid
113	movl	%eax,%r10d
114	shrl	$14,%r10d
115	andl	$0xfff,%r10d
116
117.Lnocacheinfo:
118	movl	$1,%eax
119	cpuid
120	movd	%eax,%xmm0
121	andl	$0xbfefffff,%edx
122	cmpl	$0,%r9d
123	jne	.Lnotintel
124	orl	$0x40000000,%edx
125	andb	$15,%ah
126	cmpb	$15,%ah
127	jne	.LnotP4
128	orl	$0x00100000,%edx
129.LnotP4:
130	cmpb	$6,%ah
131	jne	.Lnotintel
132	andl	$0x0fff0ff0,%eax
133	cmpl	$0x00050670,%eax
134	je	.Lknights
135	cmpl	$0x00080650,%eax
136	jne	.Lnotintel
137.Lknights:
138	andl	$0xfbffffff,%ecx
139
140.Lnotintel:
141	btl	$28,%edx
142	jnc	.Lgeneric
143	andl	$0xefffffff,%edx
144	cmpl	$0,%r10d
145	je	.Lgeneric
146
147	orl	$0x10000000,%edx
148	shrl	$16,%ebx
149	cmpb	$1,%bl
150	ja	.Lgeneric
151	andl	$0xefffffff,%edx
152.Lgeneric:
153	andl	$0x00000800,%r9d
154	andl	$0xfffff7ff,%ecx
155	orl	%ecx,%r9d
156
157	movl	%edx,%r10d
158
159	cmpl	$7,%r11d
160	jb	.Lno_extended_info
161	movl	$7,%eax
162	xorl	%ecx,%ecx
163	cpuid
164	btl	$26,%r9d
165	jc	.Lnotknights
166	andl	$0xfff7ffff,%ebx
167.Lnotknights:
168	movd	%xmm0,%eax
169	andl	$0x0fff0ff0,%eax
170	cmpl	$0x00050650,%eax
171	jne	.Lnotskylakex
172	andl	$0xfffeffff,%ebx
173
174.Lnotskylakex:
175	movl	%ebx,8(%rdi)
176	movl	%ecx,12(%rdi)
177.Lno_extended_info:
178
179	btl	$27,%r9d
180	jnc	.Lclear_avx
181	xorl	%ecx,%ecx
182.byte	0x0f,0x01,0xd0
183	andl	$0xe6,%eax
184	cmpl	$0xe6,%eax
185	je	.Ldone
186	andl	$0x3fdeffff,8(%rdi)
187
188
189
190
191	andl	$6,%eax
192	cmpl	$6,%eax
193	je	.Ldone
194.Lclear_avx:
195	movl	$0xefffe7ff,%eax
196	andl	%eax,%r9d
197	movl	$0x3fdeffdf,%eax
198	andl	%eax,8(%rdi)
199.Ldone:
200	shlq	$32,%r9
201	movl	%r10d,%eax
202	movq	%r8,%rbx
203.cfi_restore	%rbx
204	orq	%r9,%rax
205	.byte	0xf3,0xc3
206.cfi_endproc
207.size	OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
208
209.globl	OPENSSL_cleanse
210.type	OPENSSL_cleanse,@function
211.align	16
212OPENSSL_cleanse:
213.cfi_startproc
214	xorq	%rax,%rax
215	cmpq	$15,%rsi
216	jae	.Lot
217	cmpq	$0,%rsi
218	je	.Lret
219.Little:
220	movb	%al,(%rdi)
221	subq	$1,%rsi
222	leaq	1(%rdi),%rdi
223	jnz	.Little
224.Lret:
225	.byte	0xf3,0xc3
226.align	16
227.Lot:
228	testq	$7,%rdi
229	jz	.Laligned
230	movb	%al,(%rdi)
231	leaq	-1(%rsi),%rsi
232	leaq	1(%rdi),%rdi
233	jmp	.Lot
234.Laligned:
235	movq	%rax,(%rdi)
236	leaq	-8(%rsi),%rsi
237	testq	$-8,%rsi
238	leaq	8(%rdi),%rdi
239	jnz	.Laligned
240	cmpq	$0,%rsi
241	jne	.Little
242	.byte	0xf3,0xc3
243.cfi_endproc
244.size	OPENSSL_cleanse,.-OPENSSL_cleanse
245
246.globl	CRYPTO_memcmp
247.type	CRYPTO_memcmp,@function
248.align	16
249CRYPTO_memcmp:
250.cfi_startproc
251	xorq	%rax,%rax
252	xorq	%r10,%r10
253	cmpq	$0,%rdx
254	je	.Lno_data
255	cmpq	$16,%rdx
256	jne	.Loop_cmp
257	movq	(%rdi),%r10
258	movq	8(%rdi),%r11
259	movq	$1,%rdx
260	xorq	(%rsi),%r10
261	xorq	8(%rsi),%r11
262	orq	%r11,%r10
263	cmovnzq	%rdx,%rax
264	.byte	0xf3,0xc3
265
266.align	16
267.Loop_cmp:
268	movb	(%rdi),%r10b
269	leaq	1(%rdi),%rdi
270	xorb	(%rsi),%r10b
271	leaq	1(%rsi),%rsi
272	orb	%r10b,%al
273	decq	%rdx
274	jnz	.Loop_cmp
275	negq	%rax
276	shrq	$63,%rax
277.Lno_data:
278	.byte	0xf3,0xc3
279.cfi_endproc
280.size	CRYPTO_memcmp,.-CRYPTO_memcmp
281.globl	OPENSSL_wipe_cpu
282.type	OPENSSL_wipe_cpu,@function
283.align	16
284OPENSSL_wipe_cpu:
285.cfi_startproc
286	pxor	%xmm0,%xmm0
287	pxor	%xmm1,%xmm1
288	pxor	%xmm2,%xmm2
289	pxor	%xmm3,%xmm3
290	pxor	%xmm4,%xmm4
291	pxor	%xmm5,%xmm5
292	pxor	%xmm6,%xmm6
293	pxor	%xmm7,%xmm7
294	pxor	%xmm8,%xmm8
295	pxor	%xmm9,%xmm9
296	pxor	%xmm10,%xmm10
297	pxor	%xmm11,%xmm11
298	pxor	%xmm12,%xmm12
299	pxor	%xmm13,%xmm13
300	pxor	%xmm14,%xmm14
301	pxor	%xmm15,%xmm15
302	xorq	%rcx,%rcx
303	xorq	%rdx,%rdx
304	xorq	%rsi,%rsi
305	xorq	%rdi,%rdi
306	xorq	%r8,%r8
307	xorq	%r9,%r9
308	xorq	%r10,%r10
309	xorq	%r11,%r11
310	leaq	8(%rsp),%rax
311	.byte	0xf3,0xc3
312.cfi_endproc
313.size	OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
314.globl	OPENSSL_instrument_bus
315.type	OPENSSL_instrument_bus,@function
316.align	16
317OPENSSL_instrument_bus:
318.cfi_startproc
319	movq	%rdi,%r10
320	movq	%rsi,%rcx
321	movq	%rsi,%r11
322
323	rdtsc
324	movl	%eax,%r8d
325	movl	$0,%r9d
326	clflush	(%r10)
327.byte	0xf0
328	addl	%r9d,(%r10)
329	jmp	.Loop
330.align	16
331.Loop:	rdtsc
332	movl	%eax,%edx
333	subl	%r8d,%eax
334	movl	%edx,%r8d
335	movl	%eax,%r9d
336	clflush	(%r10)
337.byte	0xf0
338	addl	%eax,(%r10)
339	leaq	4(%r10),%r10
340	subq	$1,%rcx
341	jnz	.Loop
342
343	movq	%r11,%rax
344	.byte	0xf3,0xc3
345.cfi_endproc
346.size	OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
347
348.globl	OPENSSL_instrument_bus2
349.type	OPENSSL_instrument_bus2,@function
350.align	16
351OPENSSL_instrument_bus2:
352.cfi_startproc
353	movq	%rdi,%r10
354	movq	%rsi,%rcx
355	movq	%rdx,%r11
356	movq	%rcx,8(%rsp)
357
358	rdtsc
359	movl	%eax,%r8d
360	movl	$0,%r9d
361
362	clflush	(%r10)
363.byte	0xf0
364	addl	%r9d,(%r10)
365
366	rdtsc
367	movl	%eax,%edx
368	subl	%r8d,%eax
369	movl	%edx,%r8d
370	movl	%eax,%r9d
371.Loop2:
372	clflush	(%r10)
373.byte	0xf0
374	addl	%eax,(%r10)
375
376	subq	$1,%r11
377	jz	.Ldone2
378
379	rdtsc
380	movl	%eax,%edx
381	subl	%r8d,%eax
382	movl	%edx,%r8d
383	cmpl	%r9d,%eax
384	movl	%eax,%r9d
385	movl	$0,%edx
386	setne	%dl
387	subq	%rdx,%rcx
388	leaq	(%r10,%rdx,4),%r10
389	jnz	.Loop2
390
391.Ldone2:
392	movq	8(%rsp),%rax
393	subq	%rcx,%rax
394	.byte	0xf3,0xc3
395.cfi_endproc
396.size	OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
397.globl	OPENSSL_ia32_rdrand_bytes
398.type	OPENSSL_ia32_rdrand_bytes,@function
399.align	16
400OPENSSL_ia32_rdrand_bytes:
401.cfi_startproc
402	xorq	%rax,%rax
403	cmpq	$0,%rsi
404	je	.Ldone_rdrand_bytes
405
406	movq	$8,%r11
407.Loop_rdrand_bytes:
408.byte	73,15,199,242
409	jc	.Lbreak_rdrand_bytes
410	decq	%r11
411	jnz	.Loop_rdrand_bytes
412	jmp	.Ldone_rdrand_bytes
413
414.align	16
415.Lbreak_rdrand_bytes:
416	cmpq	$8,%rsi
417	jb	.Ltail_rdrand_bytes
418	movq	%r10,(%rdi)
419	leaq	8(%rdi),%rdi
420	addq	$8,%rax
421	subq	$8,%rsi
422	jz	.Ldone_rdrand_bytes
423	movq	$8,%r11
424	jmp	.Loop_rdrand_bytes
425
426.align	16
427.Ltail_rdrand_bytes:
428	movb	%r10b,(%rdi)
429	leaq	1(%rdi),%rdi
430	incq	%rax
431	shrq	$8,%r10
432	decq	%rsi
433	jnz	.Ltail_rdrand_bytes
434
435.Ldone_rdrand_bytes:
436	xorq	%r10,%r10
437	.byte	0xf3,0xc3
438.cfi_endproc
439.size	OPENSSL_ia32_rdrand_bytes,.-OPENSSL_ia32_rdrand_bytes
440.globl	OPENSSL_ia32_rdseed_bytes
441.type	OPENSSL_ia32_rdseed_bytes,@function
442.align	16
443OPENSSL_ia32_rdseed_bytes:
444.cfi_startproc
445	xorq	%rax,%rax
446	cmpq	$0,%rsi
447	je	.Ldone_rdseed_bytes
448
449	movq	$8,%r11
450.Loop_rdseed_bytes:
451.byte	73,15,199,250
452	jc	.Lbreak_rdseed_bytes
453	decq	%r11
454	jnz	.Loop_rdseed_bytes
455	jmp	.Ldone_rdseed_bytes
456
457.align	16
458.Lbreak_rdseed_bytes:
459	cmpq	$8,%rsi
460	jb	.Ltail_rdseed_bytes
461	movq	%r10,(%rdi)
462	leaq	8(%rdi),%rdi
463	addq	$8,%rax
464	subq	$8,%rsi
465	jz	.Ldone_rdseed_bytes
466	movq	$8,%r11
467	jmp	.Loop_rdseed_bytes
468
469.align	16
470.Ltail_rdseed_bytes:
471	movb	%r10b,(%rdi)
472	leaq	1(%rdi),%rdi
473	incq	%rax
474	shrq	$8,%r10
475	decq	%rsi
476	jnz	.Ltail_rdseed_bytes
477
478.Ldone_rdseed_bytes:
479	xorq	%r10,%r10
480	.byte	0xf3,0xc3
481.cfi_endproc
482.size	OPENSSL_ia32_rdseed_bytes,.-OPENSSL_ia32_rdseed_bytes
483