xref: /netbsd-src/lib/libc/arch/i386/string/swab.S (revision 0d34bfa26d596b4f9bcb49175a92020d91572850)
10b9f5089Scgd/*
299410184Ssalo * Written by J.T. Conklin <jtc@NetBSD.org>.
3954b7961Sjtc * Public domain.
40b9f5089Scgd */
50b9f5089Scgd
6f28455f7Sjtc#include <machine/asm.h>
70b9f5089Scgd
8c95cd47bSjtc#if defined(LIBC_SCCS)
9*0d34bfa2Suebayasi	RCSID("$NetBSD: swab.S,v 1.14 2014/05/23 02:34:19 uebayasi Exp $")
10c95cd47bSjtc#endif
11c95cd47bSjtc
120b9f5089Scgd/*
130b9f5089Scgd * On the i486, this code is negligibly faster than the code generated
140b9f5089Scgd * by gcc at about half the size.  If my i386 databook is correct, it
150b9f5089Scgd * should be considerably faster than the gcc code on a i386.
160b9f5089Scgd */
170b9f5089Scgd
180b9f5089ScgdENTRY(swab)
190b9f5089Scgd	pushl	%esi
200b9f5089Scgd	pushl	%edi
210b9f5089Scgd	movl	12(%esp),%esi
220b9f5089Scgd	movl	16(%esp),%edi
230b9f5089Scgd	movl	20(%esp),%ecx
240b9f5089Scgd
2532d96ccdSjtc	shrl	$1,%ecx
260b9f5089Scgd	testl	$7,%ecx			# copy first group of 1 to 7 words
272310089cSrpaulo	jz	L2			# while swapping alternate bytes.
2874511f97Skleink	_ALIGN_TEXT,0x90
290b9f5089ScgdL1:	lodsw
30f28455f7Sjtc	rorw	$8,%ax
310b9f5089Scgd	stosw
320b9f5089Scgd	decl	%ecx
330b9f5089Scgd	testl	$7,%ecx
3432d96ccdSjtc	jnz	L1
350b9f5089Scgd
360b9f5089ScgdL2:	shrl	$3,%ecx			# copy remainder 8 words at a time
370b9f5089Scgd	jz	L4			# while swapping alternate bytes.
3874511f97Skleink	_ALIGN_TEXT,0x90
390b9f5089ScgdL3:	lodsw
40f28455f7Sjtc	rorw	$8,%ax
410b9f5089Scgd	stosw
420b9f5089Scgd	lodsw
43f28455f7Sjtc	rorw	$8,%ax
440b9f5089Scgd	stosw
450b9f5089Scgd	lodsw
46f28455f7Sjtc	rorw	$8,%ax
470b9f5089Scgd	stosw
480b9f5089Scgd	lodsw
49f28455f7Sjtc	rorw	$8,%ax
500b9f5089Scgd	stosw
510b9f5089Scgd	lodsw
52f28455f7Sjtc	rorw	$8,%ax
530b9f5089Scgd	stosw
540b9f5089Scgd	lodsw
55f28455f7Sjtc	rorw	$8,%ax
560b9f5089Scgd	stosw
570b9f5089Scgd	lodsw
58f28455f7Sjtc	rorw	$8,%ax
590b9f5089Scgd	stosw
600b9f5089Scgd	lodsw
61f28455f7Sjtc	rorw	$8,%ax
620b9f5089Scgd	stosw
630b9f5089Scgd	decl	%ecx
640b9f5089Scgd	jnz	L3
650b9f5089Scgd
66f28455f7SjtcL4:	popl	%edi
670b9f5089Scgd	popl	%esi
680b9f5089Scgd	ret
69*0d34bfa2SuebayasiEND(swab)
70