10b9f5089Scgd/* 299410184Ssalo * Written by J.T. Conklin <jtc@NetBSD.org>. 3954b7961Sjtc * Public domain. 40b9f5089Scgd */ 50b9f5089Scgd 6f28455f7Sjtc#include <machine/asm.h> 70b9f5089Scgd 8c95cd47bSjtc#if defined(LIBC_SCCS) 9*0d34bfa2Suebayasi RCSID("$NetBSD: swab.S,v 1.14 2014/05/23 02:34:19 uebayasi Exp $") 10c95cd47bSjtc#endif 11c95cd47bSjtc 120b9f5089Scgd/* 130b9f5089Scgd * On the i486, this code is negligibly faster than the code generated 140b9f5089Scgd * by gcc at about half the size. If my i386 databook is correct, it 150b9f5089Scgd * should be considerably faster than the gcc code on a i386. 160b9f5089Scgd */ 170b9f5089Scgd 180b9f5089ScgdENTRY(swab) 190b9f5089Scgd pushl %esi 200b9f5089Scgd pushl %edi 210b9f5089Scgd movl 12(%esp),%esi 220b9f5089Scgd movl 16(%esp),%edi 230b9f5089Scgd movl 20(%esp),%ecx 240b9f5089Scgd 2532d96ccdSjtc shrl $1,%ecx 260b9f5089Scgd testl $7,%ecx # copy first group of 1 to 7 words 272310089cSrpaulo jz L2 # while swapping alternate bytes. 2874511f97Skleink _ALIGN_TEXT,0x90 290b9f5089ScgdL1: lodsw 30f28455f7Sjtc rorw $8,%ax 310b9f5089Scgd stosw 320b9f5089Scgd decl %ecx 330b9f5089Scgd testl $7,%ecx 3432d96ccdSjtc jnz L1 350b9f5089Scgd 360b9f5089ScgdL2: shrl $3,%ecx # copy remainder 8 words at a time 370b9f5089Scgd jz L4 # while swapping alternate bytes. 3874511f97Skleink _ALIGN_TEXT,0x90 390b9f5089ScgdL3: lodsw 40f28455f7Sjtc rorw $8,%ax 410b9f5089Scgd stosw 420b9f5089Scgd lodsw 43f28455f7Sjtc rorw $8,%ax 440b9f5089Scgd stosw 450b9f5089Scgd lodsw 46f28455f7Sjtc rorw $8,%ax 470b9f5089Scgd stosw 480b9f5089Scgd lodsw 49f28455f7Sjtc rorw $8,%ax 500b9f5089Scgd stosw 510b9f5089Scgd lodsw 52f28455f7Sjtc rorw $8,%ax 530b9f5089Scgd stosw 540b9f5089Scgd lodsw 55f28455f7Sjtc rorw $8,%ax 560b9f5089Scgd stosw 570b9f5089Scgd lodsw 58f28455f7Sjtc rorw $8,%ax 590b9f5089Scgd stosw 600b9f5089Scgd lodsw 61f28455f7Sjtc rorw $8,%ax 620b9f5089Scgd stosw 630b9f5089Scgd decl %ecx 640b9f5089Scgd jnz L3 650b9f5089Scgd 66f28455f7SjtcL4: popl %edi 670b9f5089Scgd popl %esi 680b9f5089Scgd ret 69*0d34bfa2SuebayasiEND(swab) 70