1b2b3ffcdSSimon Schubert/* 2b2b3ffcdSSimon Schubert * Written by J.T. Conklin <jtc@acorntoolworks.com> 3b2b3ffcdSSimon Schubert * Public domain. 4b2b3ffcdSSimon Schubert * 5b2b3ffcdSSimon Schubert * $NetBSD: strcat.S,v 1.4 2004/07/26 18:51:21 drochner Exp $ 6b2b3ffcdSSimon Schubert * $FreeBSD: src/lib/libc/amd64/string/strcat.S,v 1.2 2008/11/02 01:10:54 peter Exp $ 7b2b3ffcdSSimon Schubert */ 8b2b3ffcdSSimon Schubert 9b2b3ffcdSSimon Schubert#include <machine/asm.h> 10b2b3ffcdSSimon Schubert 11b2b3ffcdSSimon SchubertENTRY(strcat) 12b2b3ffcdSSimon Schubert movq %rdi,%rax 13b2b3ffcdSSimon Schubert movabsq $0x0101010101010101,%r8 14b2b3ffcdSSimon Schubert movabsq $0x8080808080808080,%r9 15b2b3ffcdSSimon Schubert 16b2b3ffcdSSimon Schubert /* 17b2b3ffcdSSimon Schubert * Align destination to word boundary. 18b2b3ffcdSSimon Schubert * Consider unrolling loop? 19b2b3ffcdSSimon Schubert */ 20b2b3ffcdSSimon Schubert.Lscan: 21b2b3ffcdSSimon Schubert.Lscan_align: 22b2b3ffcdSSimon Schubert testb $7,%dil 23b2b3ffcdSSimon Schubert je .Lscan_aligned 24b2b3ffcdSSimon Schubert cmpb $0,(%rdi) 25b2b3ffcdSSimon Schubert je .Lcopy 26b2b3ffcdSSimon Schubert incq %rdi 27b2b3ffcdSSimon Schubert jmp .Lscan_align 28b2b3ffcdSSimon Schubert 29b2b3ffcdSSimon Schubert .align 4 30b2b3ffcdSSimon Schubert.Lscan_aligned: 31b2b3ffcdSSimon Schubert.Lscan_loop: 32b2b3ffcdSSimon Schubert movq (%rdi),%rdx 33b2b3ffcdSSimon Schubert addq $8,%rdi 34b2b3ffcdSSimon Schubert subq %r8,%rdx 35b2b3ffcdSSimon Schubert testq %r9,%rdx 36b2b3ffcdSSimon Schubert je .Lscan_loop 37b2b3ffcdSSimon Schubert 38b2b3ffcdSSimon Schubert /* 39b2b3ffcdSSimon Schubert * In rare cases, the above loop may exit prematurely. We must 40b2b3ffcdSSimon Schubert * return to the loop if none of the bytes in the word equal 0. 41b2b3ffcdSSimon Schubert */ 42b2b3ffcdSSimon Schubert 43b2b3ffcdSSimon Schubert cmpb $0,-8(%rdi) /* 1st byte == 0? */ 44b2b3ffcdSSimon Schubert jne 1f 45b2b3ffcdSSimon Schubert subq $8,%rdi 46b2b3ffcdSSimon Schubert jmp .Lcopy 47b2b3ffcdSSimon Schubert 48b2b3ffcdSSimon Schubert1: cmpb $0,-7(%rdi) /* 2nd byte == 0? */ 49b2b3ffcdSSimon Schubert jne 1f 50b2b3ffcdSSimon Schubert subq $7,%rdi 51b2b3ffcdSSimon Schubert jmp .Lcopy 52b2b3ffcdSSimon Schubert 53b2b3ffcdSSimon Schubert1: cmpb $0,-6(%rdi) /* 3rd byte == 0? */ 54b2b3ffcdSSimon Schubert jne 1f 55b2b3ffcdSSimon Schubert subq $6,%rdi 56b2b3ffcdSSimon Schubert jmp .Lcopy 57b2b3ffcdSSimon Schubert 58b2b3ffcdSSimon Schubert1: cmpb $0,-5(%rdi) /* 4th byte == 0? */ 59b2b3ffcdSSimon Schubert jne 1f 60b2b3ffcdSSimon Schubert subq $5,%rdi 61b2b3ffcdSSimon Schubert jmp .Lcopy 62b2b3ffcdSSimon Schubert 63b2b3ffcdSSimon Schubert1: cmpb $0,-4(%rdi) /* 5th byte == 0? */ 64b2b3ffcdSSimon Schubert jne 1f 65b2b3ffcdSSimon Schubert subq $4,%rdi 66b2b3ffcdSSimon Schubert jmp .Lcopy 67b2b3ffcdSSimon Schubert 68b2b3ffcdSSimon Schubert1: cmpb $0,-3(%rdi) /* 6th byte == 0? */ 69b2b3ffcdSSimon Schubert jne 1f 70b2b3ffcdSSimon Schubert subq $3,%rdi 71b2b3ffcdSSimon Schubert jmp .Lcopy 72b2b3ffcdSSimon Schubert 73b2b3ffcdSSimon Schubert1: cmpb $0,-2(%rdi) /* 7th byte == 0? */ 74b2b3ffcdSSimon Schubert jne 1f 75b2b3ffcdSSimon Schubert subq $2,%rdi 76b2b3ffcdSSimon Schubert jmp .Lcopy 77b2b3ffcdSSimon Schubert 78b2b3ffcdSSimon Schubert1: cmpb $0,-1(%rdi) /* 8th byte == 0? */ 79b2b3ffcdSSimon Schubert jne .Lscan_loop 80b2b3ffcdSSimon Schubert subq $1,%rdi 81b2b3ffcdSSimon Schubert 82b2b3ffcdSSimon Schubert /* 83b2b3ffcdSSimon Schubert * Align source to a word boundary. 84b2b3ffcdSSimon Schubert * Consider unrolling loop? 85b2b3ffcdSSimon Schubert */ 86b2b3ffcdSSimon Schubert.Lcopy: 87b2b3ffcdSSimon Schubert.Lcopy_align: 88b2b3ffcdSSimon Schubert testb $7,%sil 89b2b3ffcdSSimon Schubert je .Lcopy_aligned 90b2b3ffcdSSimon Schubert movb (%rsi),%dl 91b2b3ffcdSSimon Schubert incq %rsi 92b2b3ffcdSSimon Schubert movb %dl,(%rdi) 93b2b3ffcdSSimon Schubert incq %rdi 94b2b3ffcdSSimon Schubert testb %dl,%dl 95b2b3ffcdSSimon Schubert jne .Lcopy_align 96b2b3ffcdSSimon Schubert ret 97b2b3ffcdSSimon Schubert 98b2b3ffcdSSimon Schubert .align 4 99b2b3ffcdSSimon Schubert.Lcopy_loop: 100b2b3ffcdSSimon Schubert movq %rdx,(%rdi) 101b2b3ffcdSSimon Schubert addq $8,%rdi 102b2b3ffcdSSimon Schubert.Lcopy_aligned: 103b2b3ffcdSSimon Schubert movq (%rsi),%rdx 104b2b3ffcdSSimon Schubert movq %rdx,%rcx 105b2b3ffcdSSimon Schubert addq $8,%rsi 106b2b3ffcdSSimon Schubert subq %r8,%rcx 107b2b3ffcdSSimon Schubert testq %r9,%rcx 108b2b3ffcdSSimon Schubert je .Lcopy_loop 109b2b3ffcdSSimon Schubert 110b2b3ffcdSSimon Schubert /* 111b2b3ffcdSSimon Schubert * In rare cases, the above loop may exit prematurely. We must 112b2b3ffcdSSimon Schubert * return to the loop if none of the bytes in the word equal 0. 113b2b3ffcdSSimon Schubert */ 114b2b3ffcdSSimon Schubert 115b2b3ffcdSSimon Schubert movb %dl,(%rdi) 116b2b3ffcdSSimon Schubert incq %rdi 117b2b3ffcdSSimon Schubert testb %dl,%dl /* 1st byte == 0? */ 118b2b3ffcdSSimon Schubert je .Ldone 119b2b3ffcdSSimon Schubert 120b2b3ffcdSSimon Schubert shrq $8,%rdx 121b2b3ffcdSSimon Schubert movb %dl,(%rdi) 122b2b3ffcdSSimon Schubert incq %rdi 123b2b3ffcdSSimon Schubert testb %dl,%dl /* 2nd byte == 0? */ 124b2b3ffcdSSimon Schubert je .Ldone 125b2b3ffcdSSimon Schubert 126b2b3ffcdSSimon Schubert shrq $8,%rdx 127b2b3ffcdSSimon Schubert movb %dl,(%rdi) 128b2b3ffcdSSimon Schubert incq %rdi 129b2b3ffcdSSimon Schubert testb %dl,%dl /* 3rd byte == 0? */ 130b2b3ffcdSSimon Schubert je .Ldone 131b2b3ffcdSSimon Schubert 132b2b3ffcdSSimon Schubert shrq $8,%rdx 133b2b3ffcdSSimon Schubert movb %dl,(%rdi) 134b2b3ffcdSSimon Schubert incq %rdi 135b2b3ffcdSSimon Schubert testb %dl,%dl /* 4th byte == 0? */ 136b2b3ffcdSSimon Schubert je .Ldone 137b2b3ffcdSSimon Schubert 138b2b3ffcdSSimon Schubert shrq $8,%rdx 139b2b3ffcdSSimon Schubert movb %dl,(%rdi) 140b2b3ffcdSSimon Schubert incq %rdi 141b2b3ffcdSSimon Schubert testb %dl,%dl /* 5th byte == 0? */ 142b2b3ffcdSSimon Schubert je .Ldone 143b2b3ffcdSSimon Schubert 144b2b3ffcdSSimon Schubert shrq $8,%rdx 145b2b3ffcdSSimon Schubert movb %dl,(%rdi) 146b2b3ffcdSSimon Schubert incq %rdi 147b2b3ffcdSSimon Schubert testb %dl,%dl /* 6th byte == 0? */ 148b2b3ffcdSSimon Schubert je .Ldone 149b2b3ffcdSSimon Schubert 150b2b3ffcdSSimon Schubert shrq $8,%rdx 151b2b3ffcdSSimon Schubert movb %dl,(%rdi) 152b2b3ffcdSSimon Schubert incq %rdi 153b2b3ffcdSSimon Schubert testb %dl,%dl /* 7th byte == 0? */ 154b2b3ffcdSSimon Schubert je .Ldone 155b2b3ffcdSSimon Schubert 156b2b3ffcdSSimon Schubert shrq $8,%rdx 157b2b3ffcdSSimon Schubert movb %dl,(%rdi) 158b2b3ffcdSSimon Schubert incq %rdi 159b2b3ffcdSSimon Schubert testb %dl,%dl /* 8th byte == 0? */ 160b2b3ffcdSSimon Schubert jne .Lcopy_aligned 161b2b3ffcdSSimon Schubert 162b2b3ffcdSSimon Schubert.Ldone: 163b2b3ffcdSSimon Schubert ret 164b2b3ffcdSSimon SchubertEND(strcat) 165*8b927cb7SJohn Marino 166*8b927cb7SJohn Marino .section .note.GNU-stack,"",%progbits 167