1b6cbf720SGianluca Guida/* 2b6cbf720SGianluca Guida * Written by J.T. Conklin <jtc@acorntoolworks.com> 3b6cbf720SGianluca Guida * Public domain. 4b6cbf720SGianluca Guida */ 5b6cbf720SGianluca Guida 6b6cbf720SGianluca Guida#include <machine/asm.h> 7b6cbf720SGianluca Guida 8b6cbf720SGianluca Guida#if defined(LIBC_SCCS) 9*0a6a1f1dSLionel Sambuc RCSID("$NetBSD: strcat.S,v 1.2 2014/03/22 19:16:34 jakllsch Exp $") 10b6cbf720SGianluca Guida#endif 11b6cbf720SGianluca Guida 12b6cbf720SGianluca GuidaENTRY(strcat) 13b6cbf720SGianluca Guida movq %rdi,%rax 14b6cbf720SGianluca Guida movabsq $0x0101010101010101,%r8 15b6cbf720SGianluca Guida movabsq $0x8080808080808080,%r9 16b6cbf720SGianluca Guida 17b6cbf720SGianluca Guida /* 18b6cbf720SGianluca Guida * Align destination to word boundary. 19b6cbf720SGianluca Guida * Consider unrolling loop? 20b6cbf720SGianluca Guida */ 21b6cbf720SGianluca Guida.Lscan: 22b6cbf720SGianluca Guida.Lscan_align: 23b6cbf720SGianluca Guida testb $7,%dil 24b6cbf720SGianluca Guida je .Lscan_aligned 25b6cbf720SGianluca Guida cmpb $0,(%rdi) 26b6cbf720SGianluca Guida je .Lcopy 27b6cbf720SGianluca Guida incq %rdi 28b6cbf720SGianluca Guida jmp .Lscan_align 29b6cbf720SGianluca Guida 30b6cbf720SGianluca Guida _ALIGN_TEXT 31b6cbf720SGianluca Guida.Lscan_aligned: 32b6cbf720SGianluca Guida.Lscan_loop: 33b6cbf720SGianluca Guida movq (%rdi),%rdx 34b6cbf720SGianluca Guida addq $8,%rdi 35b6cbf720SGianluca Guida subq %r8,%rdx 36b6cbf720SGianluca Guida testq %r9,%rdx 37b6cbf720SGianluca Guida je .Lscan_loop 38b6cbf720SGianluca Guida 39b6cbf720SGianluca Guida /* 40b6cbf720SGianluca Guida * In rare cases, the above loop may exit prematurely. We must 41b6cbf720SGianluca Guida * return to the loop if none of the bytes in the word equal 0. 42b6cbf720SGianluca Guida */ 43b6cbf720SGianluca Guida 44b6cbf720SGianluca Guida cmpb $0,-8(%rdi) /* 1st byte == 0? */ 45b6cbf720SGianluca Guida jne 1f 46b6cbf720SGianluca Guida subq $8,%rdi 47b6cbf720SGianluca Guida jmp .Lcopy 48b6cbf720SGianluca Guida 49b6cbf720SGianluca Guida1: cmpb $0,-7(%rdi) /* 2nd byte == 0? */ 50b6cbf720SGianluca Guida jne 1f 51b6cbf720SGianluca Guida subq $7,%rdi 52b6cbf720SGianluca Guida jmp .Lcopy 53b6cbf720SGianluca Guida 54b6cbf720SGianluca Guida1: cmpb $0,-6(%rdi) /* 3rd byte == 0? */ 55b6cbf720SGianluca Guida jne 1f 56b6cbf720SGianluca Guida subq $6,%rdi 57b6cbf720SGianluca Guida jmp .Lcopy 58b6cbf720SGianluca Guida 59b6cbf720SGianluca Guida1: cmpb $0,-5(%rdi) /* 4th byte == 0? */ 60b6cbf720SGianluca Guida jne 1f 61b6cbf720SGianluca Guida subq $5,%rdi 62b6cbf720SGianluca Guida jmp .Lcopy 63b6cbf720SGianluca Guida 64b6cbf720SGianluca Guida1: cmpb $0,-4(%rdi) /* 5th byte == 0? */ 65b6cbf720SGianluca Guida jne 1f 66b6cbf720SGianluca Guida subq $4,%rdi 67b6cbf720SGianluca Guida jmp .Lcopy 68b6cbf720SGianluca Guida 69b6cbf720SGianluca Guida1: cmpb $0,-3(%rdi) /* 6th byte == 0? */ 70b6cbf720SGianluca Guida jne 1f 71b6cbf720SGianluca Guida subq $3,%rdi 72b6cbf720SGianluca Guida jmp .Lcopy 73b6cbf720SGianluca Guida 74b6cbf720SGianluca Guida1: cmpb $0,-2(%rdi) /* 7th byte == 0? */ 75b6cbf720SGianluca Guida jne 1f 76b6cbf720SGianluca Guida subq $2,%rdi 77b6cbf720SGianluca Guida jmp .Lcopy 78b6cbf720SGianluca Guida 79b6cbf720SGianluca Guida1: cmpb $0,-1(%rdi) /* 8th byte == 0? */ 80b6cbf720SGianluca Guida jne .Lscan_loop 81b6cbf720SGianluca Guida subq $1,%rdi 82b6cbf720SGianluca Guida 83b6cbf720SGianluca Guida /* 84b6cbf720SGianluca Guida * Align source to a word boundary. 85b6cbf720SGianluca Guida * Consider unrolling loop? 86b6cbf720SGianluca Guida */ 87b6cbf720SGianluca Guida.Lcopy: 88b6cbf720SGianluca Guida.Lcopy_align: 89b6cbf720SGianluca Guida testb $7,%sil 90b6cbf720SGianluca Guida je .Lcopy_aligned 91b6cbf720SGianluca Guida movb (%rsi),%dl 92b6cbf720SGianluca Guida incq %rsi 93b6cbf720SGianluca Guida movb %dl,(%rdi) 94b6cbf720SGianluca Guida incq %rdi 95b6cbf720SGianluca Guida testb %dl,%dl 96b6cbf720SGianluca Guida jne .Lcopy_align 97b6cbf720SGianluca Guida ret 98b6cbf720SGianluca Guida 99b6cbf720SGianluca Guida _ALIGN_TEXT 100b6cbf720SGianluca Guida.Lcopy_loop: 101b6cbf720SGianluca Guida movq %rdx,(%rdi) 102b6cbf720SGianluca Guida addq $8,%rdi 103b6cbf720SGianluca Guida.Lcopy_aligned: 104b6cbf720SGianluca Guida movq (%rsi),%rdx 105b6cbf720SGianluca Guida movq %rdx,%rcx 106b6cbf720SGianluca Guida addq $8,%rsi 107b6cbf720SGianluca Guida subq %r8,%rcx 108b6cbf720SGianluca Guida testq %r9,%rcx 109b6cbf720SGianluca Guida je .Lcopy_loop 110b6cbf720SGianluca Guida 111b6cbf720SGianluca Guida /* 112b6cbf720SGianluca Guida * In rare cases, the above loop may exit prematurely. We must 113b6cbf720SGianluca Guida * return to the loop if none of the bytes in the word equal 0. 114b6cbf720SGianluca Guida */ 115b6cbf720SGianluca Guida 116b6cbf720SGianluca Guida movb %dl,(%rdi) 117b6cbf720SGianluca Guida incq %rdi 118b6cbf720SGianluca Guida testb %dl,%dl /* 1st byte == 0? */ 119b6cbf720SGianluca Guida je .Ldone 120b6cbf720SGianluca Guida 121b6cbf720SGianluca Guida shrq $8,%rdx 122b6cbf720SGianluca Guida movb %dl,(%rdi) 123b6cbf720SGianluca Guida incq %rdi 124b6cbf720SGianluca Guida testb %dl,%dl /* 2nd byte == 0? */ 125b6cbf720SGianluca Guida je .Ldone 126b6cbf720SGianluca Guida 127b6cbf720SGianluca Guida shrq $8,%rdx 128b6cbf720SGianluca Guida movb %dl,(%rdi) 129b6cbf720SGianluca Guida incq %rdi 130b6cbf720SGianluca Guida testb %dl,%dl /* 3rd byte == 0? */ 131b6cbf720SGianluca Guida je .Ldone 132b6cbf720SGianluca Guida 133b6cbf720SGianluca Guida shrq $8,%rdx 134b6cbf720SGianluca Guida movb %dl,(%rdi) 135b6cbf720SGianluca Guida incq %rdi 136b6cbf720SGianluca Guida testb %dl,%dl /* 4th byte == 0? */ 137b6cbf720SGianluca Guida je .Ldone 138b6cbf720SGianluca Guida 139b6cbf720SGianluca Guida shrq $8,%rdx 140b6cbf720SGianluca Guida movb %dl,(%rdi) 141b6cbf720SGianluca Guida incq %rdi 142b6cbf720SGianluca Guida testb %dl,%dl /* 5th byte == 0? */ 143b6cbf720SGianluca Guida je .Ldone 144b6cbf720SGianluca Guida 145b6cbf720SGianluca Guida shrq $8,%rdx 146b6cbf720SGianluca Guida movb %dl,(%rdi) 147b6cbf720SGianluca Guida incq %rdi 148b6cbf720SGianluca Guida testb %dl,%dl /* 6th byte == 0? */ 149b6cbf720SGianluca Guida je .Ldone 150b6cbf720SGianluca Guida 151b6cbf720SGianluca Guida shrq $8,%rdx 152b6cbf720SGianluca Guida movb %dl,(%rdi) 153b6cbf720SGianluca Guida incq %rdi 154b6cbf720SGianluca Guida testb %dl,%dl /* 7th byte == 0? */ 155b6cbf720SGianluca Guida je .Ldone 156b6cbf720SGianluca Guida 157b6cbf720SGianluca Guida shrq $8,%rdx 158b6cbf720SGianluca Guida movb %dl,(%rdi) 159b6cbf720SGianluca Guida incq %rdi 160b6cbf720SGianluca Guida testb %dl,%dl /* 8th byte == 0? */ 161b6cbf720SGianluca Guida jne .Lcopy_aligned 162b6cbf720SGianluca Guida 163b6cbf720SGianluca Guida.Ldone: 164b6cbf720SGianluca Guida ret 165*0a6a1f1dSLionel SambucEND(strcat) 166