xref: /netbsd-src/crypto/external/bsd/openssl/dist/crypto/sparccpuid.S (revision e8bf6de2c75b9134b81eb782c45222bafa702c77)
1! Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved.
2!
3! Licensed under the Apache License 2.0 (the "License").  You may not use
4! this file except in compliance with the License.  You can obtain a copy
5! in the file LICENSE in the source distribution or at
6! https://www.openssl.org/source/license.html
7
8#if defined(__SUNPRO_C) && defined(__sparcv9)
9# define ABI64  /* They've said -xarch=v9 at command line */
10#elif defined(__GNUC__) && defined(__arch64__)
11# define ABI64  /* They've said -m64 at command line */
12#endif
13
14#ifdef ABI64
15  .register	%g2,#scratch
16  .register	%g3,#scratch
17# define	FRAME	-192
18# define	BIAS	2047
19#else
20# define	FRAME	-96
21# define	BIAS	0
22#endif
23
24.text
25.align	32
26.global	OPENSSL_wipe_cpu
27.type	OPENSSL_wipe_cpu,#function
28! Keep in mind that this does not excuse us from wiping the stack!
29! This routine wipes registers, but not the backing store [which
30! resides on the stack, toward lower addresses]. To facilitate for
31! stack wiping I return pointer to the top of stack of the *caller*.
32OPENSSL_wipe_cpu:
33	save	%sp,FRAME,%sp
34	nop
35#ifdef __sun
36#include <sys/trap.h>
37	ta	ST_CLEAN_WINDOWS
38#else
39	call	.walk.reg.wins
40#endif
41	nop
42	call	.PIC.zero.up
43	mov	.zero-(.-4),%o0
44	ld	[%o0],%f0
45	ld	[%o0],%f1
46
47	subcc	%g0,1,%o0
48	! Following is V9 "rd %ccr,%o0" instruction. However! V8
49	! specification says that it ("rd %asr2,%o0" in V8 terms) does
50	! not cause illegal_instruction trap, however it may read from
51	! the %y register. It therefore can be used to determine if the
52	! CPU the code is executing on is V8- or V9-compliant, as V9
53	! returns a distinct value of 0x99, "negative" and "borrow"
54	! bits set in both %icc and %xcc. The %y register needs to be
55	! set away from 0x99 to avoid false-positives matches.
56	!
57	wr	0, %y
58	.word	0x91408000	!rd	%ccr,%o0
59	cmp	%o0,0x99
60	bne	.v8
61	nop
62			! Even though we do not use %fp register bank,
63			! we wipe it as memcpy might have used it...
64			.word	0xbfa00040	!fmovd	%f0,%f62
65			.word	0xbba00040	!...
66			.word	0xb7a00040
67			.word	0xb3a00040
68			.word	0xafa00040
69			.word	0xaba00040
70			.word	0xa7a00040
71			.word	0xa3a00040
72			.word	0x9fa00040
73			.word	0x9ba00040
74			.word	0x97a00040
75			.word	0x93a00040
76			.word	0x8fa00040
77			.word	0x8ba00040
78			.word	0x87a00040
79			.word	0x83a00040	!fmovd	%f0,%f32
80.v8:			fmovs	%f1,%f31
81	clr	%o0
82			fmovs	%f0,%f30
83	clr	%o1
84			fmovs	%f1,%f29
85	clr	%o2
86			fmovs	%f0,%f28
87	clr	%o3
88			fmovs	%f1,%f27
89	clr	%o4
90			fmovs	%f0,%f26
91	clr	%o5
92			fmovs	%f1,%f25
93	clr	%o7
94			fmovs	%f0,%f24
95	clr	%l0
96			fmovs	%f1,%f23
97	clr	%l1
98			fmovs	%f0,%f22
99	clr	%l2
100			fmovs	%f1,%f21
101	clr	%l3
102			fmovs	%f0,%f20
103	clr	%l4
104			fmovs	%f1,%f19
105	clr	%l5
106			fmovs	%f0,%f18
107	clr	%l6
108			fmovs	%f1,%f17
109	clr	%l7
110			fmovs	%f0,%f16
111	clr	%i0
112			fmovs	%f1,%f15
113	clr	%i1
114			fmovs	%f0,%f14
115	clr	%i2
116			fmovs	%f1,%f13
117	clr	%i3
118			fmovs	%f0,%f12
119	clr	%i4
120			fmovs	%f1,%f11
121	clr	%i5
122			fmovs	%f0,%f10
123	clr	%g1
124			fmovs	%f1,%f9
125	clr	%g2
126			fmovs	%f0,%f8
127	clr	%g3
128			fmovs	%f1,%f7
129	clr	%g4
130			fmovs	%f0,%f6
131	clr	%g5
132			fmovs	%f1,%f5
133			fmovs	%f0,%f4
134			fmovs	%f1,%f3
135			fmovs	%f0,%f2
136
137	add	%fp,BIAS,%i0	! return pointer to caller´s top of stack
138
139	ret
140	restore
141
142.zero:	.long	0x0,0x0
143.PIC.zero.up:
144	retl
145	add	%o0,%o7,%o0
146#ifdef DEBUG
147.global	walk_reg_wins
148.type	walk_reg_wins,#function
149walk_reg_wins:
150#endif
151.walk.reg.wins:
152	save	%sp,FRAME,%sp
153	cmp	%i7,%o7
154	be	2f
155	clr	%o0
156	cmp	%o7,0	! compiler never cleans %o7...
157	be	1f	! could have been a leaf function...
158	clr	%o1
159	call	.walk.reg.wins
160	nop
1611:	clr	%o2
162	clr	%o3
163	clr	%o4
164	clr	%o5
165	clr	%o7
166	clr	%l0
167	clr	%l1
168	clr	%l2
169	clr	%l3
170	clr	%l4
171	clr	%l5
172	clr	%l6
173	clr	%l7
174	add	%o0,1,%i0	! used for debugging
1752:	ret
176	restore
177.size	OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
178
179.global	OPENSSL_atomic_add
180.type	OPENSSL_atomic_add,#function
181.align	32
182OPENSSL_atomic_add:
183#ifndef ABI64
184	subcc	%g0,1,%o2
185	wr	0, %y
186	.word	0x95408000	!rd	%ccr,%o2, see comment above
187	cmp	%o2,0x99
188	be	.v9
189	nop
190	save	%sp,FRAME,%sp
191	ba	.enter
192	nop
193#ifdef __sun
194! Note that you do not have to link with libthread to call thr_yield,
195! as libc provides a stub, which is overloaded the moment you link
196! with *either* libpthread or libthread...
197#define	YIELD_CPU	thr_yield
198#else
199! applies at least to Linux and FreeBSD... Feedback expected...
200#define	YIELD_CPU	sched_yield
201#endif
202.spin:	call	YIELD_CPU
203	nop
204.enter:	ld	[%i0],%i2
205	cmp	%i2,-4096
206	be	.spin
207	mov	-1,%i2
208	swap	[%i0],%i2
209	cmp	%i2,-1
210	be	.spin
211	add	%i2,%i1,%i2
212	stbar
213	st	%i2,[%i0]
214	sra	%i2,%g0,%i0
215	ret
216	restore
217.v9:
218#endif
219	ld	[%o0],%o2
2201:	add	%o1,%o2,%o3
221	.word	0xd7e2100a	!cas [%o0],%o2,%o3, compare [%o0] with %o2 and swap %o3
222	cmp	%o2,%o3
223	bne	1b
224	mov	%o3,%o2		! cas is always fetching to dest. register
225	add	%o1,%o2,%o0	! OpenSSL expects the new value
226	retl
227	sra	%o0,%g0,%o0	! we return signed int, remember?
228.size	OPENSSL_atomic_add,.-OPENSSL_atomic_add
229
230.global	_sparcv9_rdtick
231.align	32
232_sparcv9_rdtick:
233	subcc	%g0,1,%o0
234	wr	0, %y
235	.word	0x91408000	!rd	%ccr,%o0, see comment above
236	cmp	%o0,0x99
237	bne	.notick
238	xor	%o0,%o0,%o0
239	.word	0x91410000	!rd	%tick,%o0
240	retl
241	.word	0x93323020	!srlx	%o0,32,%o1
242.notick:
243	retl
244	xor	%o1,%o1,%o1
245.type	_sparcv9_rdtick,#function
246.size	_sparcv9_rdtick,.-_sparcv9_rdtick
247
248.global	_sparcv9_vis1_probe
249.align	8
250_sparcv9_vis1_probe:
251	add	%sp,BIAS+2,%o1
252	.word	0xc19a5a40	!ldda	[%o1]ASI_FP16_P,%f0
253	retl
254	.word	0x81b00d80	!fxor	%f0,%f0,%f0
255.type	_sparcv9_vis1_probe,#function
256.size	_sparcv9_vis1_probe,.-_sparcv9_vis1_probe
257
258! Probe and instrument VIS1 instruction. Output is number of cycles it
259! takes to execute rdtick and pair of VIS1 instructions. US-Tx VIS unit
260! is slow (documented to be 6 cycles on T2) and the core is in-order
261! single-issue, it should be possible to distinguish Tx reliably...
262! Observed return values are:
263!
264!	UltraSPARC IIe		7
265!	UltraSPARC III		7
266!	UltraSPARC T1		24
267!	SPARC T4		65(*)
268!
269! (*)	result has lesser to do with VIS instruction latencies, rdtick
270!	appears that slow, but it does the trick in sense that FP and
271!	VIS code paths are still slower than integer-only ones.
272!
273! Numbers for T2 and SPARC64 V-VII are more than welcomed.
274!
275! It would be possible to detect specifically US-T1 by instrumenting
276! fmul8ulx16, which is emulated on T1 and as such accounts for quite
277! a lot of %tick-s, couple of thousand on Linux...
278.global	_sparcv9_vis1_instrument
279.align	8
280_sparcv9_vis1_instrument:
281	.word	0x81b00d80	!fxor	%f0,%f0,%f0
282	.word	0x85b08d82	!fxor	%f2,%f2,%f2
283	.word	0x91410000	!rd	%tick,%o0
284	.word	0x81b00d80	!fxor	%f0,%f0,%f0
285	.word	0x85b08d82	!fxor	%f2,%f2,%f2
286	.word	0x93410000	!rd	%tick,%o1
287	.word	0x81b00d80	!fxor	%f0,%f0,%f0
288	.word	0x85b08d82	!fxor	%f2,%f2,%f2
289	.word	0x95410000	!rd	%tick,%o2
290	.word	0x81b00d80	!fxor	%f0,%f0,%f0
291	.word	0x85b08d82	!fxor	%f2,%f2,%f2
292	.word	0x97410000	!rd	%tick,%o3
293	.word	0x81b00d80	!fxor	%f0,%f0,%f0
294	.word	0x85b08d82	!fxor	%f2,%f2,%f2
295	.word	0x99410000	!rd	%tick,%o4
296
297	! calculate intervals
298	sub	%o1,%o0,%o0
299	sub	%o2,%o1,%o1
300	sub	%o3,%o2,%o2
301	sub	%o4,%o3,%o3
302
303	! find minimum value
304	cmp	%o0,%o1
305	.word	0x38680002	!bgu,a	%xcc,.+8
306	mov	%o1,%o0
307	cmp	%o0,%o2
308	.word	0x38680002	!bgu,a	%xcc,.+8
309	mov	%o2,%o0
310	cmp	%o0,%o3
311	.word	0x38680002	!bgu,a	%xcc,.+8
312	mov	%o3,%o0
313
314	retl
315	nop
316.type	_sparcv9_vis1_instrument,#function
317.size	_sparcv9_vis1_instrument,.-_sparcv9_vis1_instrument
318
319.global	_sparcv9_vis2_probe
320.align	8
321_sparcv9_vis2_probe:
322	retl
323	.word	0x81b00980	!bshuffle	%f0,%f0,%f0
324.type	_sparcv9_vis2_probe,#function
325.size	_sparcv9_vis2_probe,.-_sparcv9_vis2_probe
326
327.global	_sparcv9_fmadd_probe
328.align	8
329_sparcv9_fmadd_probe:
330	.word	0x81b00d80	!fxor	%f0,%f0,%f0
331	.word	0x85b08d82	!fxor	%f2,%f2,%f2
332	retl
333	.word	0x81b80440	!fmaddd	%f0,%f0,%f2,%f0
334.type	_sparcv9_fmadd_probe,#function
335.size	_sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe
336
337.global	_sparcv9_rdcfr
338.align	8
339_sparcv9_rdcfr:
340	retl
341	.word	0x91468000	!rd	%asr26,%o0
342.type	_sparcv9_rdcfr,#function
343.size	_sparcv9_rdcfr,.-_sparcv9_rdcfr
344
345.global	_sparcv9_vis3_probe
346.align	8
347_sparcv9_vis3_probe:
348	retl
349	.word	0x81b022a0	!xmulx	%g0,%g0,%g0
350.type	_sparcv9_vis3_probe,#function
351.size	_sparcv9_vis3_probe,.-_sparcv9_vis3_probe
352
353.global	_sparcv9_random
354.align	8
355_sparcv9_random:
356	retl
357	.word	0x91b002a0	!random	%o0
358.type	_sparcv9_random,#function
359.size	_sparcv9_random,.-_sparcv9_vis3_probe
360
361.global	_sparcv9_fjaesx_probe
362.align	8
363_sparcv9_fjaesx_probe:
364	.word	0x81b09206	!faesencx %f2,%f6,%f0
365	retl
366	nop
367.size	_sparcv9_fjaesx_probe,.-_sparcv9_fjaesx_probe
368
369.global	OPENSSL_cleanse
370.align	32
371OPENSSL_cleanse:
372	cmp	%o1,14
373	nop
374#ifdef ABI64
375	bgu	%xcc,.Lot
376#else
377	bgu	.Lot
378#endif
379	cmp	%o1,0
380	bne	.Little
381	nop
382	retl
383	nop
384
385.Little:
386	stb	%g0,[%o0]
387	subcc	%o1,1,%o1
388	bnz	.Little
389	add	%o0,1,%o0
390	retl
391	nop
392.align	32
393.Lot:
394#ifndef ABI64
395	subcc	%g0,1,%g1
396	! see above for explanation
397	wr	0, %y
398	.word	0x83408000	!rd	%ccr,%g1, see comment above
399	cmp	%g1,0x99
400	bne	.v8lot
401	nop
402#endif
403
404.v9lot:	andcc	%o0,7,%g0
405	bz	.v9aligned
406	nop
407	stb	%g0,[%o0]
408	sub	%o1,1,%o1
409	ba	.v9lot
410	add	%o0,1,%o0
411.align	16,0x01000000
412.v9aligned:
413	.word	0xc0720000	!stx	%g0,[%o0]
414	sub	%o1,8,%o1
415	andcc	%o1,-8,%g0
416#ifdef ABI64
417	.word	0x126ffffd	!bnz	%xcc,.v9aligned
418#else
419	.word	0x124ffffd	!bnz	%icc,.v9aligned
420#endif
421	add	%o0,8,%o0
422
423	cmp	%o1,0
424	bne	.Little
425	nop
426	retl
427	nop
428#ifndef ABI64
429.v8lot:	andcc	%o0,3,%g0
430	bz	.v8aligned
431	nop
432	stb	%g0,[%o0]
433	sub	%o1,1,%o1
434	ba	.v8lot
435	add	%o0,1,%o0
436	nop
437.v8aligned:
438	st	%g0,[%o0]
439	sub	%o1,4,%o1
440	andcc	%o1,-4,%g0
441	bnz	.v8aligned
442	add	%o0,4,%o0
443
444	cmp	%o1,0
445	bne	.Little
446	nop
447	retl
448	nop
449#endif
450.type	OPENSSL_cleanse,#function
451.size	OPENSSL_cleanse,.-OPENSSL_cleanse
452
453.global	CRYPTO_memcmp
454.align	16
455CRYPTO_memcmp:
456	cmp	%o2,0
457#ifdef ABI64
458	beq,pn	%xcc,.Lno_data
459#else
460	beq	.Lno_data
461#endif
462	xor	%g1,%g1,%g1
463	nop
464
465.Loop_cmp:
466	ldub	[%o0],%o3
467	add	%o0,1,%o0
468	ldub	[%o1],%o4
469	add	%o1,1,%o1
470	subcc	%o2,1,%o2
471	xor	%o3,%o4,%o4
472#ifdef ABI64
473	bnz	%xcc,.Loop_cmp
474#else
475	bnz	.Loop_cmp
476#endif
477	or	%o4,%g1,%g1
478
479	sub	%g0,%g1,%g1
480	srl	%g1,31,%g1
481.Lno_data:
482	retl
483	mov	%g1,%o0
484.type	CRYPTO_memcmp,#function
485.size	CRYPTO_memcmp,.-CRYPTO_memcmp
486
487.global	_sparcv9_vis1_instrument_bus
488.align	8
489_sparcv9_vis1_instrument_bus:
490	mov	%o1,%o3					! save cnt
491	.word	0x99410000	!rd	%tick,%o4	! tick
492	mov	%o4,%o5					! lasttick = tick
493	set	0,%g4					! diff
494
495	andn	%o0,63,%g1
496	.word	0xc1985e00	!ldda	[%g1]0xf0,%f0	! block load
497	.word	0x8143e040	!membar	#Sync
498	.word	0xc1b85c00	!stda	%f0,[%g1]0xe0	! block store and commit
499	.word	0x8143e040	!membar	#Sync
500	ld	[%o0],%o4
501	add	%o4,%g4,%g4
502	.word	0xc9e2100c	!cas	[%o0],%o4,%g4
503
504.Loop:	.word	0x99410000	!rd	%tick,%o4
505	sub	%o4,%o5,%g4				! diff=tick-lasttick
506	mov	%o4,%o5					! lasttick=tick
507
508	andn	%o0,63,%g1
509	.word	0xc1985e00	!ldda	[%g1]0xf0,%f0	! block load
510	.word	0x8143e040	!membar	#Sync
511	.word	0xc1b85c00	!stda	%f0,[%g1]0xe0	! block store and commit
512	.word	0x8143e040	!membar	#Sync
513	ld	[%o0],%o4
514	add	%o4,%g4,%g4
515	.word	0xc9e2100c	!cas	[%o0],%o4,%g4
516	subcc	%o1,1,%o1				! --$cnt
517	bnz	.Loop
518	add	%o0,4,%o0				! ++$out
519
520	retl
521	mov	%o3,%o0
522.type	_sparcv9_vis1_instrument_bus,#function
523.size	_sparcv9_vis1_instrument_bus,.-_sparcv9_vis1_instrument_bus
524
525.global	_sparcv9_vis1_instrument_bus2
526.align	8
527_sparcv9_vis1_instrument_bus2:
528	mov	%o1,%o3					! save cnt
529	sll	%o1,2,%o1				! cnt*=4
530
531	.word	0x99410000	!rd	%tick,%o4	! tick
532	mov	%o4,%o5					! lasttick = tick
533	set	0,%g4					! diff
534
535	andn	%o0,63,%g1
536	.word	0xc1985e00	!ldda	[%g1]0xf0,%f0	! block load
537	.word	0x8143e040	!membar	#Sync
538	.word	0xc1b85c00	!stda	%f0,[%g1]0xe0	! block store and commit
539	.word	0x8143e040	!membar	#Sync
540	ld	[%o0],%o4
541	add	%o4,%g4,%g4
542	.word	0xc9e2100c	!cas	[%o0],%o4,%g4
543
544	.word	0x99410000	!rd	%tick,%o4	! tick
545	sub	%o4,%o5,%g4				! diff=tick-lasttick
546	mov	%o4,%o5					! lasttick=tick
547	mov	%g4,%g5					! lastdiff=diff
548.Loop2:
549	andn	%o0,63,%g1
550	.word	0xc1985e00	!ldda	[%g1]0xf0,%f0	! block load
551	.word	0x8143e040	!membar	#Sync
552	.word	0xc1b85c00	!stda	%f0,[%g1]0xe0	! block store and commit
553	.word	0x8143e040	!membar	#Sync
554	ld	[%o0],%o4
555	add	%o4,%g4,%g4
556	.word	0xc9e2100c	!cas	[%o0],%o4,%g4
557
558	subcc	%o2,1,%o2				! --max
559	bz	.Ldone2
560	nop
561
562	.word	0x99410000	!rd	%tick,%o4	! tick
563	sub	%o4,%o5,%g4				! diff=tick-lasttick
564	mov	%o4,%o5					! lasttick=tick
565	cmp	%g4,%g5
566	mov	%g4,%g5					! lastdiff=diff
567
568	.word	0x83408000	!rd	%ccr,%g1
569	and	%g1,4,%g1				! isolate zero flag
570	xor	%g1,4,%g1				! flip zero flag
571
572	subcc	%o1,%g1,%o1				! conditional --$cnt
573	bnz	.Loop2
574	add	%o0,%g1,%o0				! conditional ++$out
575
576.Ldone2:
577	srl	%o1,2,%o1
578	retl
579	sub	%o3,%o1,%o0
580.type	_sparcv9_vis1_instrument_bus2,#function
581.size	_sparcv9_vis1_instrument_bus2,.-_sparcv9_vis1_instrument_bus2
582
583.section	".init",#alloc,#execinstr
584	call	OPENSSL_cpuid_setup
585	nop
586