xref: /netbsd-src/lib/libc/arch/x86_64/string/swab.S (revision 92d1d4411b9f8cbaff3d5eb0b336fcc1c32deb29)
110f59d7bSfvdl/*
299410184Ssalo * Written by J.T. Conklin <jtc@NetBSD.org>.
310f59d7bSfvdl * Public domain.
410f59d7bSfvdl */
510f59d7bSfvdl
610f59d7bSfvdl#include <machine/asm.h>
710f59d7bSfvdl
810f59d7bSfvdl#if defined(LIBC_SCCS)
9*92d1d441Suebayasi	RCSID("$NetBSD: swab.S,v 1.4 2014/05/22 15:01:57 uebayasi Exp $")
1010f59d7bSfvdl#endif
1110f59d7bSfvdl
1210f59d7bSfvdl#define LOAD_SWAP_STORE_WORD \
1310f59d7bSfvdl	lodsw	; \
1410f59d7bSfvdl	xchgb	%al,%ah ; \
1510f59d7bSfvdl	stosw
1610f59d7bSfvdl
1710f59d7bSfvdlENTRY(swab)
1810f59d7bSfvdl	xchgq	%rdi,%rsi
1910f59d7bSfvdl	cld				# set direction forward
2010f59d7bSfvdl
2110f59d7bSfvdl	shrq	$1,%rdx
2210f59d7bSfvdl	testq	$7,%rdx			# copy first group of 1 to 7 words
23270bc85bSrpaulo	jz	L2			# while swapping alternate bytes.
2410f59d7bSfvdlL1:	lodsw
2510f59d7bSfvdl	rorw	$8,%ax
2610f59d7bSfvdl	stosw
2710f59d7bSfvdl	decq	%rdx
2810f59d7bSfvdl	testq	$7,%rdx
2910f59d7bSfvdl	jnz	L1
3010f59d7bSfvdl
3110f59d7bSfvdlL2:	shrq	$3,%rdx			# copy remainder 8 words at a time
3210f59d7bSfvdl	jz	L4			# while swapping alternate bytes.
3310f59d7bSfvdlL3:
3410f59d7bSfvdl	LOAD_SWAP_STORE_WORD
3510f59d7bSfvdl	LOAD_SWAP_STORE_WORD
3610f59d7bSfvdl	LOAD_SWAP_STORE_WORD
3710f59d7bSfvdl	LOAD_SWAP_STORE_WORD
3810f59d7bSfvdl	LOAD_SWAP_STORE_WORD
3910f59d7bSfvdl	LOAD_SWAP_STORE_WORD
4010f59d7bSfvdl	LOAD_SWAP_STORE_WORD
4110f59d7bSfvdl	LOAD_SWAP_STORE_WORD
4210f59d7bSfvdl
4310f59d7bSfvdl	decq	%rdx
4410f59d7bSfvdl	jnz	L3
4510f59d7bSfvdlL4:
4610f59d7bSfvdl	ret
47*92d1d441SuebayasiEND(swab)
48