xref: /freebsd-src/contrib/arm-optimized-routines/string/arm/strcpy.c (revision 072a4ba82a01476eaee33781ccd241033eefcf0b)
131914882SAlex Richardson /*
231914882SAlex Richardson  * strcpy
331914882SAlex Richardson  *
431914882SAlex Richardson  * Copyright (c) 2008-2020, Arm Limited.
5*072a4ba8SAndrew Turner  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
631914882SAlex Richardson  */
731914882SAlex Richardson 
831914882SAlex Richardson #if defined (__thumb2__) && !defined (__thumb__)
931914882SAlex Richardson 
1031914882SAlex Richardson /* For GLIBC:
1131914882SAlex Richardson #include <string.h>
1231914882SAlex Richardson #include <memcopy.h>
1331914882SAlex Richardson 
1431914882SAlex Richardson #undef strcmp
1531914882SAlex Richardson */
1631914882SAlex Richardson 
1731914882SAlex Richardson #ifdef __thumb2__
1831914882SAlex Richardson #define magic1(REG) "#0x01010101"
1931914882SAlex Richardson #define magic2(REG) "#0x80808080"
2031914882SAlex Richardson #else
2131914882SAlex Richardson #define magic1(REG) #REG
2231914882SAlex Richardson #define magic2(REG) #REG ", lsl #7"
2331914882SAlex Richardson #endif
2431914882SAlex Richardson 
2531914882SAlex Richardson char* __attribute__((naked))
__strcpy_arm(char * dst,const char * src)2631914882SAlex Richardson __strcpy_arm (char* dst, const char* src)
2731914882SAlex Richardson {
2831914882SAlex Richardson   __asm__ (
2931914882SAlex Richardson        "pld	[r1, #0]\n\t"
3031914882SAlex Richardson        "eor	r2, r0, r1\n\t"
3131914882SAlex Richardson        "mov	ip, r0\n\t"
3231914882SAlex Richardson        "tst	r2, #3\n\t"
3331914882SAlex Richardson        "bne	4f\n\t"
3431914882SAlex Richardson        "tst	r1, #3\n\t"
3531914882SAlex Richardson        "bne	3f\n"
3631914882SAlex Richardson   "5:\n\t"
3731914882SAlex Richardson # ifndef __thumb2__
3831914882SAlex Richardson        "str	r5, [sp, #-4]!\n\t"
3931914882SAlex Richardson        "mov	r5, #0x01\n\t"
4031914882SAlex Richardson        "orr	r5, r5, r5, lsl #8\n\t"
4131914882SAlex Richardson        "orr	r5, r5, r5, lsl #16\n\t"
4231914882SAlex Richardson # endif
4331914882SAlex Richardson 
4431914882SAlex Richardson        "str	r4, [sp, #-4]!\n\t"
4531914882SAlex Richardson        "tst	r1, #4\n\t"
4631914882SAlex Richardson        "ldr	r3, [r1], #4\n\t"
4731914882SAlex Richardson        "beq	2f\n\t"
4831914882SAlex Richardson        "sub	r2, r3, "magic1(r5)"\n\t"
4931914882SAlex Richardson        "bics	r2, r2, r3\n\t"
5031914882SAlex Richardson        "tst	r2, "magic2(r5)"\n\t"
5131914882SAlex Richardson        "itt	eq\n\t"
5231914882SAlex Richardson        "streq	r3, [ip], #4\n\t"
5331914882SAlex Richardson        "ldreq	r3, [r1], #4\n"
5431914882SAlex Richardson        "bne	1f\n\t"
5531914882SAlex Richardson        /* Inner loop.  We now know that r1 is 64-bit aligned, so we
5631914882SAlex Richardson 	  can safely fetch up to two words.  This allows us to avoid
5731914882SAlex Richardson 	  load stalls.  */
5831914882SAlex Richardson        ".p2align 2\n"
5931914882SAlex Richardson   "2:\n\t"
6031914882SAlex Richardson        "pld	[r1, #8]\n\t"
6131914882SAlex Richardson        "ldr	r4, [r1], #4\n\t"
6231914882SAlex Richardson        "sub	r2, r3, "magic1(r5)"\n\t"
6331914882SAlex Richardson        "bics	r2, r2, r3\n\t"
6431914882SAlex Richardson        "tst	r2, "magic2(r5)"\n\t"
6531914882SAlex Richardson        "sub	r2, r4, "magic1(r5)"\n\t"
6631914882SAlex Richardson        "bne	1f\n\t"
6731914882SAlex Richardson        "str	r3, [ip], #4\n\t"
6831914882SAlex Richardson        "bics	r2, r2, r4\n\t"
6931914882SAlex Richardson        "tst	r2, "magic2(r5)"\n\t"
7031914882SAlex Richardson        "itt	eq\n\t"
7131914882SAlex Richardson        "ldreq	r3, [r1], #4\n\t"
7231914882SAlex Richardson        "streq	r4, [ip], #4\n\t"
7331914882SAlex Richardson        "beq	2b\n\t"
7431914882SAlex Richardson        "mov	r3, r4\n"
7531914882SAlex Richardson   "1:\n\t"
7631914882SAlex Richardson # ifdef __ARMEB__
7731914882SAlex Richardson        "rors	r3, r3, #24\n\t"
7831914882SAlex Richardson # endif
7931914882SAlex Richardson        "strb	r3, [ip], #1\n\t"
8031914882SAlex Richardson        "tst	r3, #0xff\n\t"
8131914882SAlex Richardson # ifdef __ARMEL__
8231914882SAlex Richardson        "ror	r3, r3, #8\n\t"
8331914882SAlex Richardson # endif
8431914882SAlex Richardson        "bne	1b\n\t"
8531914882SAlex Richardson        "ldr	r4, [sp], #4\n\t"
8631914882SAlex Richardson # ifndef __thumb2__
8731914882SAlex Richardson        "ldr	r5, [sp], #4\n\t"
8831914882SAlex Richardson # endif
8931914882SAlex Richardson        "BX LR\n"
9031914882SAlex Richardson 
9131914882SAlex Richardson        /* Strings have the same offset from word alignment, but it's
9231914882SAlex Richardson 	  not zero.  */
9331914882SAlex Richardson   "3:\n\t"
9431914882SAlex Richardson        "tst	r1, #1\n\t"
9531914882SAlex Richardson        "beq	1f\n\t"
9631914882SAlex Richardson        "ldrb	r2, [r1], #1\n\t"
9731914882SAlex Richardson        "strb	r2, [ip], #1\n\t"
9831914882SAlex Richardson        "cmp	r2, #0\n\t"
9931914882SAlex Richardson        "it	eq\n"
10031914882SAlex Richardson        "BXEQ LR\n"
10131914882SAlex Richardson   "1:\n\t"
10231914882SAlex Richardson        "tst	r1, #2\n\t"
10331914882SAlex Richardson        "beq	5b\n\t"
10431914882SAlex Richardson        "ldrh	r2, [r1], #2\n\t"
10531914882SAlex Richardson # ifdef __ARMEB__
10631914882SAlex Richardson        "tst	r2, #0xff00\n\t"
10731914882SAlex Richardson        "iteet	ne\n\t"
10831914882SAlex Richardson        "strneh	r2, [ip], #2\n\t"
10931914882SAlex Richardson        "lsreq	r2, r2, #8\n\t"
11031914882SAlex Richardson        "streqb	r2, [ip]\n\t"
11131914882SAlex Richardson        "tstne	r2, #0xff\n\t"
11231914882SAlex Richardson # else
11331914882SAlex Richardson        "tst	r2, #0xff\n\t"
11431914882SAlex Richardson        "itet	ne\n\t"
11531914882SAlex Richardson        "strneh	r2, [ip], #2\n\t"
11631914882SAlex Richardson        "streqb	r2, [ip]\n\t"
11731914882SAlex Richardson        "tstne	r2, #0xff00\n\t"
11831914882SAlex Richardson # endif
11931914882SAlex Richardson        "bne	5b\n\t"
12031914882SAlex Richardson        "BX LR\n"
12131914882SAlex Richardson 
12231914882SAlex Richardson        /* src and dst do not have a common word-alignement.  Fall back to
12331914882SAlex Richardson 	  byte copying.  */
12431914882SAlex Richardson   "4:\n\t"
12531914882SAlex Richardson        "ldrb	r2, [r1], #1\n\t"
12631914882SAlex Richardson        "strb	r2, [ip], #1\n\t"
12731914882SAlex Richardson        "cmp	r2, #0\n\t"
12831914882SAlex Richardson        "bne	4b\n\t"
12931914882SAlex Richardson        "BX LR");
13031914882SAlex Richardson }
13131914882SAlex Richardson /* For GLIBC: libc_hidden_builtin_def (strcpy) */
13231914882SAlex Richardson 
13331914882SAlex Richardson #endif /* defined (__thumb2__) && !defined (__thumb__)  */
134