xref: /onnv-gate/usr/src/uts/intel/amd64/ml/amd64.il (revision 9171:ee979187414d)
10Sstevel@tonic-gate/*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
53446Smrj * Common Development and Distribution License (the "License").
63446Smrj * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
210Sstevel@tonic-gate/*
22*9171Sxiuyan.wang@Sun.COM * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate * Use is subject to license terms.
240Sstevel@tonic-gate */
250Sstevel@tonic-gate
260Sstevel@tonic-gate/
270Sstevel@tonic-gate/ In-line functions for amd64 kernels.
280Sstevel@tonic-gate/
290Sstevel@tonic-gate
300Sstevel@tonic-gate/
310Sstevel@tonic-gate/ return current thread pointer
320Sstevel@tonic-gate/
330Sstevel@tonic-gate/ NOTE: the "0x18" should be replaced by the computed value of the
340Sstevel@tonic-gate/	offset of "cpu_thread" from the beginning of the struct cpu.
350Sstevel@tonic-gate/	Including "assym.h" does not work, however, since that stuff
360Sstevel@tonic-gate/	is PSM-specific and is only visible to the 'unix' build anyway.
370Sstevel@tonic-gate/	Same with current cpu pointer, where "0xc" should be replaced
380Sstevel@tonic-gate/	by the computed value of the offset of "cpu_self".
390Sstevel@tonic-gate/	Ugh -- what a disaster.
400Sstevel@tonic-gate/
410Sstevel@tonic-gate	.inline	threadp,0
420Sstevel@tonic-gate	movq	%gs:0x18, %rax
430Sstevel@tonic-gate	.end
440Sstevel@tonic-gate
450Sstevel@tonic-gate/
460Sstevel@tonic-gate/ return current cpu pointer
470Sstevel@tonic-gate/
480Sstevel@tonic-gate	.inline	curcpup,0
490Sstevel@tonic-gate	movq	%gs:0x10, %rax
500Sstevel@tonic-gate	.end
510Sstevel@tonic-gate
520Sstevel@tonic-gate/
530Sstevel@tonic-gate/ return caller
540Sstevel@tonic-gate/
550Sstevel@tonic-gate	.inline caller,0
560Sstevel@tonic-gate	movq	8(%rbp), %rax
570Sstevel@tonic-gate	.end
580Sstevel@tonic-gate
590Sstevel@tonic-gate/
600Sstevel@tonic-gate/ convert ipl to spl.  This is the identity function for i86
610Sstevel@tonic-gate/
620Sstevel@tonic-gate	.inline	ipltospl,0
630Sstevel@tonic-gate	movq	%rdi, %rax
640Sstevel@tonic-gate	.end
650Sstevel@tonic-gate
660Sstevel@tonic-gate/
670Sstevel@tonic-gate/ find the low order bit in a word
680Sstevel@tonic-gate/
690Sstevel@tonic-gate	.inline lowbit,4
700Sstevel@tonic-gate	movq	$-1, %rax
710Sstevel@tonic-gate	bsfq	%rdi, %rax
720Sstevel@tonic-gate	incq	%rax
730Sstevel@tonic-gate	.end
740Sstevel@tonic-gate
750Sstevel@tonic-gate/
760Sstevel@tonic-gate/ Networking byte order functions (too bad, Intel has the wrong byte order)
770Sstevel@tonic-gate/
780Sstevel@tonic-gate
797421SDaniel.Anderson@Sun.COM	.inline	htonll,4
807421SDaniel.Anderson@Sun.COM	movq	%rdi, %rax
817421SDaniel.Anderson@Sun.COM	bswapq	%rax
827421SDaniel.Anderson@Sun.COM	.end
837421SDaniel.Anderson@Sun.COM
847421SDaniel.Anderson@Sun.COM	.inline	ntohll,4
857421SDaniel.Anderson@Sun.COM	movq	%rdi, %rax
867421SDaniel.Anderson@Sun.COM	bswapq	%rax
877421SDaniel.Anderson@Sun.COM	.end
887421SDaniel.Anderson@Sun.COM
890Sstevel@tonic-gate	.inline	htonl,4
900Sstevel@tonic-gate	movl	%edi, %eax
910Sstevel@tonic-gate	bswap	%eax
920Sstevel@tonic-gate	.end
930Sstevel@tonic-gate
940Sstevel@tonic-gate	.inline	ntohl,4
950Sstevel@tonic-gate	movl	%edi, %eax
960Sstevel@tonic-gate	bswap	%eax
970Sstevel@tonic-gate	.end
980Sstevel@tonic-gate
990Sstevel@tonic-gate	.inline	htons,4
1000Sstevel@tonic-gate	movl	%edi, %eax
1010Sstevel@tonic-gate	bswap	%eax
1020Sstevel@tonic-gate	shrl	$16, %eax
1030Sstevel@tonic-gate	.end
1040Sstevel@tonic-gate
1050Sstevel@tonic-gate	.inline	ntohs,4
1060Sstevel@tonic-gate	movl	%edi, %eax
1070Sstevel@tonic-gate	bswap	%eax
1080Sstevel@tonic-gate	shrl	$16, %eax
1090Sstevel@tonic-gate	.end
1100Sstevel@tonic-gate
1110Sstevel@tonic-gate/*
1120Sstevel@tonic-gate * multiply two long numbers and yield a u_lonlong_t result
1130Sstevel@tonic-gate * Provided to manipulate hrtime_t values.
1140Sstevel@tonic-gate */
1150Sstevel@tonic-gate	/* XX64 These don't work correctly with SOS9 build 13.0 yet
1160Sstevel@tonic-gate	.inline mul32, 8
1170Sstevel@tonic-gate	xorl	%edx, %edx
1180Sstevel@tonic-gate	movl	%edi, %eax
1190Sstevel@tonic-gate	mull	%esi
1200Sstevel@tonic-gate	shlq	$32, %rdx
1210Sstevel@tonic-gate	orq	%rdx, %rax
1220Sstevel@tonic-gate	ret
1230Sstevel@tonic-gate	.end
1240Sstevel@tonic-gate	*/
1250Sstevel@tonic-gate/*
1260Sstevel@tonic-gate * Unlock hres_lock and increment the count value. (See clock.h)
1270Sstevel@tonic-gate */
1280Sstevel@tonic-gate	.inline unlock_hres_lock, 0
1290Sstevel@tonic-gate	lock
1300Sstevel@tonic-gate	incl	hres_lock
1310Sstevel@tonic-gate	.end
1320Sstevel@tonic-gate
1330Sstevel@tonic-gate	.inline	atomic_orb,8
1340Sstevel@tonic-gate	movl	%esi, %eax
1350Sstevel@tonic-gate	lock
1360Sstevel@tonic-gate	orb	%al,(%rdi)
1370Sstevel@tonic-gate	.end
1380Sstevel@tonic-gate
1390Sstevel@tonic-gate	.inline	atomic_andb,8
1400Sstevel@tonic-gate	movl	%esi, %eax
1410Sstevel@tonic-gate	lock
1420Sstevel@tonic-gate	andb	%al,(%rdi)
1430Sstevel@tonic-gate	.end
1440Sstevel@tonic-gate
1450Sstevel@tonic-gate/*
1460Sstevel@tonic-gate * atomic inc/dec operations.
1470Sstevel@tonic-gate *	void atomic_inc16(uint16_t *addr) { ++*addr; }
1480Sstevel@tonic-gate *	void atomic_dec16(uint16_t *addr) { --*addr; }
1490Sstevel@tonic-gate */
1500Sstevel@tonic-gate	.inline	atomic_inc16,4
1510Sstevel@tonic-gate	lock
1520Sstevel@tonic-gate	incw	(%rdi)
1530Sstevel@tonic-gate	.end
1540Sstevel@tonic-gate
1550Sstevel@tonic-gate	.inline	atomic_dec16,4
1560Sstevel@tonic-gate	lock
1570Sstevel@tonic-gate	decw	(%rdi)
1580Sstevel@tonic-gate	.end
1590Sstevel@tonic-gate
1600Sstevel@tonic-gate/*
1610Sstevel@tonic-gate * atomic bit clear
1620Sstevel@tonic-gate */
1630Sstevel@tonic-gate	.inline atomic_btr32,8
1640Sstevel@tonic-gate	lock
1650Sstevel@tonic-gate	btrl %esi, (%rdi)
1660Sstevel@tonic-gate	setc %al
1670Sstevel@tonic-gate	.end
1680Sstevel@tonic-gate
1690Sstevel@tonic-gate/*
1700Sstevel@tonic-gate * Call the pause instruction.  To the Pentium 4 Xeon processor, it acts as
1710Sstevel@tonic-gate * a hint that the code sequence is a busy spin-wait loop.  Without a pause
1720Sstevel@tonic-gate * instruction in these loops, the P4 Xeon processor may suffer a severe
1730Sstevel@tonic-gate * penalty when exiting the loop because the processor detects a possible
1740Sstevel@tonic-gate * memory violation.  Inserting the pause instruction significantly reduces
1750Sstevel@tonic-gate * the likelihood of a memory order violation, improving performance.
1760Sstevel@tonic-gate * The pause instruction is a NOP on all other IA-32 processors.
1770Sstevel@tonic-gate */
1780Sstevel@tonic-gate	.inline ht_pause, 0
1790Sstevel@tonic-gate	pause
1800Sstevel@tonic-gate	.end
1810Sstevel@tonic-gate
1820Sstevel@tonic-gate/*
1833446Smrj * inlines for update_sregs().
1840Sstevel@tonic-gate */
1853446Smrj        .inline __set_ds, 0
1863446Smrj        movw    %di, %ds
1873446Smrj        .end
1883446Smrj
1893446Smrj        .inline __set_es, 0
1903446Smrj        movw    %di, %es
1913446Smrj        .end
1923446Smrj
1933446Smrj        .inline __set_fs, 0
1943446Smrj        movw    %di, %fs
1953446Smrj        .end
1963446Smrj
1973446Smrj        .inline __set_gs, 0
1983446Smrj        movw    %di, %gs
1993446Smrj        .end
2003446Smrj
2013446Smrj	/*
2023446Smrj	 * OPTERON_ERRATUM_88 requires mfence
2033446Smrj	 */
2043446Smrj        .inline __swapgs, 0
2053446Smrj        mfence
2063446Smrj        swapgs
2070Sstevel@tonic-gate	.end
2088286SDave.Plauger@Sun.COM
209*9171Sxiuyan.wang@Sun.COM/*
210*9171Sxiuyan.wang@Sun.COM * prefetch 64 bytes
211*9171Sxiuyan.wang@Sun.COM */
212*9171Sxiuyan.wang@Sun.COM
213*9171Sxiuyan.wang@Sun.COM 	.inline	prefetch_read_many,8
214*9171Sxiuyan.wang@Sun.COM	prefetcht0	(%rdi)
215*9171Sxiuyan.wang@Sun.COM	prefetcht0	32(%rdi)
2168286SDave.Plauger@Sun.COM	.end
217*9171Sxiuyan.wang@Sun.COM
218*9171Sxiuyan.wang@Sun.COM 	.inline	prefetch_read_once,8
219*9171Sxiuyan.wang@Sun.COM	prefetchnta	(%rdi)
220*9171Sxiuyan.wang@Sun.COM	prefetchnta	32(%rdi)
221*9171Sxiuyan.wang@Sun.COM	.end
222*9171Sxiuyan.wang@Sun.COM
223*9171Sxiuyan.wang@Sun.COM 	.inline	prefetch_write_many,8
224*9171Sxiuyan.wang@Sun.COM	prefetcht0	(%rdi)
225*9171Sxiuyan.wang@Sun.COM	prefetcht0	32(%rdi)
226*9171Sxiuyan.wang@Sun.COM	.end
227*9171Sxiuyan.wang@Sun.COM
228*9171Sxiuyan.wang@Sun.COM 	.inline	prefetch_write_once,8
229*9171Sxiuyan.wang@Sun.COM	prefetcht0	(%rdi)
230*9171Sxiuyan.wang@Sun.COM	prefetcht0	32(%rdi)
231*9171Sxiuyan.wang@Sun.COM	.end
232