1/*- 2 * Copyright (c) 2013 The NetBSD Foundation, Inc. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to The NetBSD Foundation 6 * by Matt Thomas of 3am Software Foundry. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 18 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 19 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 21 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 * POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30#include <machine/asm.h> 31 32RCSID("$NetBSD: strcpy_arm.S,v 1.7 2024/02/08 20:51:24 andvar Exp $") 33 34#ifdef STRLCPY 35#ifdef _LIBC 36WEAK_ALIAS(strlcpy, _strlcpy) 37# define FUNCNAME _strlcpy 38# else 39# define FUNCNAME strlcpy 40# endif 41#elif defined(STRNCPY) 42# ifdef _LIBC 43WEAK_ALIAS(strncpy, _strncpy) 44# define FUNCNAME _strncpy 45# else 46# define FUNCNAME strncpy 47# endif 48#else 49# ifdef _LIBC 50WEAK_ALIAS(strcpy, _strcpy) 51# define FUNCNAME _strcpy 52# else 53# define FUNCNAME strcpy 54# endif 55#endif 56 57#ifdef __ARMEL__ 58#define lslo lsr /* shift to lower address */ 59#define lshi lsl /* shift to higher address */ 60#define BYTE0 0x000000ff 61#define BYTE1 0x0000ff00 62#define BYTE2 0x00ff0000 63#define BYTE3 0xff000000 64#else 65#define lslo lsl /* shift to lower address */ 66#define lshi lsr /* shift to higher address */ 67#define BYTE0 0xff000000 68#define BYTE1 0x00ff0000 69#define BYTE2 0x0000ff00 70#define BYTE3 0x000000ff 71#endif 72 73/* 74 * On armv6 and later, to quickly determine if a word contains a NUL (0) byte, 75 * we add 254 to each byte using the UQADD8 (unsigned saturating add 8) 76 * instruction. For every non-NUL byte, the result for that byte will become 77 * 255. For NUL, it will be 254. When we complement the result of all 4 adds, 78 * if the result is non-0 then we must have encountered a NUL. 79 * 80 * For earlier architecture, we just use tst on all 4 bytes. There are other 81 * algorithms to detect NULs but they take longer and use more instructions. 82 */ 83 84/* 85 * char *strcpy(char *dst, const char *src); 86 * char *strncpy(char *dst, const char *src, size_t len); 87 * size_t strlcpy(char *dst, const char *src, size_t len); 88 */ 89 90 .text 91ENTRY(FUNCNAME) 92#if defined(STRLCPY) 93 cmp r2, #1 /* is length 1 or less? */ 94 bhi 1f /* no, do normal */ 95 moveq r3, #0 /* = 1? load NUL */ 96 strbeq r3, [r0] /* = 1? write NUL to dst */ 97 mov r0, r1 /* move src to r0 */ 98 b PLT_SYM(_C_LABEL(strlen)) /* and tailcall strlen */ 991: 100 sub r2, r2, #1 /* leave one byte for NUL */ 101#endif 102#if defined(STRNCPY) 103 cmp r2, #0 /* 0 length? */ 104 RETc(eq) /* yes, just return */ 105#endif 106 push {r4-r9} /* save some registers */ 107#ifdef _ARM_ARCH_6 108#ifdef _ARM_ARCH_7 109 movw r7, #0xfefe /* magic constant; 254 in each byte */ 110#else 111 mov r7, #0xfe /* put 254 in low byte */ 112 orr r7, r7, r7, lsl #8 /* move to next byte */ 113#endif 114 orr r7, r7, r7, lsl #16 /* move to next halfword */ 115#endif 116 117#if defined(STRLCPY) 118 add r6, r1, #1 /* save for return (deal with NUL) */ 119#else 120 mov r6, r0 /* save for return */ 121#endif 122 123.Ldst_align: 124 tst r0, #3 /* check for dst alignment */ 125 beq .Ldst_aligned /* ok, proceed to next check */ 126 ldrb r5, [r1], #1 /* load a byte */ 127#if defined(STRNCPY) 128 subs r2, r2, #1 /* subtract out from count */ 129 bmi .Ldst_full /* zero? the dst has no more room */ 130#endif 131 strb r5, [r0], #1 /* store a byte */ 132 teq r5, #0 /* was it a NUL? */ 133 beq .Lend_of_string /* yes, we are done */ 134#if defined(STRLCPY) 135 subs r2, r2, #1 /* subtract one from count */ 136 strbeq r2, [r0], #1 /* zero? write trailing NUL */ 137 beq .Ldst_full /* zero? the dst has no more room */ 138#endif 139 b .Ldst_align /* loop around for next byte */ 140.Ldst_aligned: 141 tst r1, #3 /* get the misalignment of src */ 142 bne .Lincongruent /* !=? incongruent (slower) */ 143 144 /* =? congruent (faster) */ 145 146.Lcongruent: 147#if defined(STRLCPY) 148 add r6, r6, #3 /* compensate for word post-inc */ 149#endif 150 b .Lcongruent_mainloop_load 151.Lcongruent_mainloop: 152#if defined(STRLCPY) || defined(STRNCPY) 153 subs r2, r2, #4 /* subtract 4 from the count */ 154 bmi .Lno_more_room 155#endif 156 str r5, [r0], #4 /* store word into dst */ 157#if defined(STRLCPY) 158 beq .Lno_more_room /* count is 0? no room in dst */ 159#endif 160#if defined(STRNCPY) 161 beq .Ldst_full_word_aligned /* count is 0? no room in dst */ 162#endif 163.Lcongruent_mainloop_load: 164 ldr r5, [r1], #4 /* load word from source */ 165#if defined(_ARM_ARCH_6) 166 uqadd8 r3, r5, r7 /* magic happens here */ 167 mvns r3, r3 /* is the complemented result 0? */ 168 beq .Lcongruent_mainloop /* yes, no NULs, do it again */ 169#else 170 tst r5, #BYTE0 /* does byte 0 contain a NUL? */ 171 tstne r5, #BYTE1 /* no, does byte 1 contain a NUL? */ 172 tstne r5, #BYTE2 /* no, does byte 2 contain a NUL? */ 173 tstne r5, #BYTE3 /* no, does byte 3 contain a NUL? */ 174 bne .Lcongruent_mainloop /* yes, no NULs, do it again */ 175#endif 176#if defined(STRLCPY) && 0 177 sub r1, r1, #3 /* back up src pointer */ 178#endif 179#if defined(_ARM_ARCH_6) 180#ifdef __ARMEL__ 181 rev r3, r3 /* CLZ needs BE data */ 182#endif 183 clz r3, r3 /* count leading zeros */ 184#else 185 mov r3, #0 /* assume NUL is in byte 0 */ 186 tst r5, #BYTE0 /* is NUL in byte 2? */ 187 beq .Lcongruent_last_bytes /* yes, done searching. */ 188 mov r3, #8 /* assume NUL is in byte 1 */ 189 tst r5, #BYTE1 /* is NUL in byte 2? */ 190 beq .Lcongruent_last_bytes /* yes, done searching. */ 191 mov r3, #16 /* assume NUL is in byte 2 */ 192 tst r5, #BYTE2 /* is NUL in byte 2? */ 193#if !defined(STRLCPY) 194 beq .Lcongruent_last_bytes /* yes, done searching. */ 195 mov r3, #24 /* NUL must be in byte 3 */ 196#else 197 movne r3, #24 /* no, then NUL is in byte 3 */ 198#endif 199#endif /* _ARM_ARCH_6 */ 200#if defined(STRLCPY) 201.Lcongruent_last_bytes: 202#endif 203#if defined(STRLCPY) 204 add r1, r1, r3, lsr #3 /* position to point at NUL + 4 */ 205#endif 206 b .Llast_bytes /* store the last bytes */ 207 208 209.Lincongruent: 210 /* 211 * At this point dst is word aligned by src is not. Read bytes 212 * from src until it is read aligned. 213 */ 214 and r3, r1, #3 /* extract misalignment */ 215 mov r9, r3, lsl #3 /* calculate discard shift */ 216 rsb r8, r9, #32 /* calculate insertion shift */ 217#if defined(STRLCPY) 218 add r6, r6, #3 /* compensate for word post-inc */ 219#endif 220 bic r1, r1, #3 /* word align src */ 221 ldr r5, [r1], #4 /* load word frm src */ 222 mov r4, r5, lslo r9 /* discard lo bytes from src */ 223 tst r4, #BYTE0 /* does byte 0 contain a NUL? */ 224#if defined(STRNCPY) 225 beq .Lend_of_string /* yes, zero fill rest of string */ 226#else 227 moveq r3, r9 /* yes, set offset */ 228 beq .Lincongruent_end_of_string /* yes, deal with the last bytes */ 229#endif 230 /* 231 * To make our test for NULs below do not generate false positives, 232 * fill the bytes in the word we don't want to match with all 1s. 233 */ 234 mvn r3, #0 /* create a mask */ 235 mov r3, r3, lslo r8 /* zero out bytes being kept */ 236 orr r5, r5, r3 /* merge src and mask */ 237#ifdef _ARM_ARCH_6 238 uqadd8 r3, r5, r7 /* NUL detection magic happens */ 239 mvns r3, r3 /* is the complemented result 0? */ 240 beq .Lincongruent_mainloop_load /* yes, no NUL encountered! */ 241#ifdef __ARMEL__ 242 rev r3, r3 /* CLZ wants BE input */ 243#endif 244 clz r3, r3 /* count leading zeros */ 245#else 246 /* 247 * We already tested for byte 0 above so we don't need to it again. 248 */ 249 mov r3, #24 /* assume NUL is in byte 3 */ 250 tst r5, #BYTE1 /* did we find a NUL in byte 1? */ 251 subeq r3, r3, #8 /* yes, decrement byte position */ 252 tstne r5, #BYTE2 /* no, did we find a NUL in byte 2? */ 253 subeq r3, r3, #8 /* yes, decrement byte position */ 254 tstne r5, #BYTE3 /* no, did we find a NUL in byte 3? */ 255 bne .Lincongruent_mainloop_load /* no, no NUL encountered! */ 256#endif 257 mov r5, r4 /* discard already dealt with bytes */ 258.Lincongruent_end_of_string: 259#if defined(STRLCPY) 260 add r1, r1, r3, lsr #3 /* then add offset to NUL */ 261#endif 262 sub r3, r3, r9 /* adjust NUL offset */ 263 b .Llast_bytes /* NUL encountered! finish up */ 264 265#if defined(STRLCPY) || defined(STRNCPY) 266.Lincongruent_no_more_room: 267 mov r5, r4 /* move data to be stored to r5 */ 268 b .Lno_more_room /* fill remaining space */ 269#endif /* STRLCPY || STRNCPY */ 270 271 /* 272 * At this point both dst and src are word aligned and r4 contains 273 * partial contents from src. 274 */ 275.Lincongruent_mainloop: 276 orr r4, r4, r5, lshi r8 /* put new src data into dst word */ 277#if defined(STRLCPY) || defined(STRNCPY) 278 subs r2, r2, #4 /* subtract 4 from count */ 279 bmi .Lincongruent_no_more_room /* count < 0? dst will be full */ 280#endif 281 str r4, [r0], #4 /* store word in dst */ 282#if defined(STRLCPY) 283 beq .Lno_more_room /* space left is 0? stop copy */ 284#endif 285#if defined(STRNCPY) 286 beq .Ldst_full_word_aligned /* space left is 0? stop copy */ 287#endif 288 mov r4, r5, lslo r9 /* move rest of src into dst word */ 289.Lincongruent_mainloop_load: 290 ldr r5, [r1], #4 /* read src */ 291#ifdef _ARM_ARCH_6 292 uqadd8 r3, r5, r7 /* magic happens here */ 293 mvns r3, r3 /* is the complemented result 0? */ 294 beq .Lincongruent_mainloop /* yes, no NUL encountered! */ 295 /* 296 * fall into this since we encountered a NULL. At this point we have 297 * from 1-5 bytes (excluding trailing NUL) to write. 298 */ 299#ifdef __ARMEL__ 300 rev r3, r3 /* CLZ works on BE data */ 301#endif 302 clz r3, r3 /* count leading zeroes */ 303#else 304 tst r5, #BYTE0 /* does byte 0 contain a NUL? */ 305 tstne r5, #BYTE1 /* no, does byte 1 contain a NUL? */ 306 tstne r5, #BYTE2 /* no, does byte 2 contain a NUL? */ 307 tstne r5, #BYTE3 /* no, does byte 3 contain a NUL? */ 308 bne .Lincongruent_mainloop /* no, no NUL encountered! */ 309 /* 310 * fall into this since we encountered a NULL. At this point we have 311 * from 1-5 bytes (excluding trailing NUL) to write. 312 */ 313 mov r3, #0 /* assume a NUL is in byte 0 */ 314 tst r5, #BYTE0 /* is there a NUL in byte 0? */ 315 beq 1f /* yes, found a NUL! */ 316 mov r3, #8 /* assume a NUL is in byte 1 */ 317 tst r5, #BYTE1 /* is there a NUL in byte 0? */ 318 beq 1f /* yes, found a NUL! */ 319 tst r5, #BYTE2 /* is there a NUL in byte 2? */ 320 moveq r3, #16 /* yes, mark its position */ 321 movne r3, #24 /* no, it must be in byte 3 */ 3221: 323#endif 324 orr r4, r4, r5, lshi r8 /* merge new and old src words */ 325#if defined(STRLCPY) 326 add r1, r1, r3, lsr #3 /* adjust src to point to NUL */ 327#endif 328 add r3, r3, r8 /* add remainder bytes worth */ 329 cmp r3, #32 /* do we have at least one word to write? */ 330 movlt r5, r4 /* no, move source bytes to expected reg */ 331 blt .Llast_bytes /* no, deal with them */ 332#if defined(STRLCPY) 333 subs r2, r2, #4 /* subtract 4 from count */ 334 bpl 1f /* we have space for at least 4 */ 335 /* 336 * Since the space just went minus, we don't have enough room to 337 * write all 4 bytes. In fact, the most we can write is 3 so just 338 * just lie and say we have 3 bytes to write and discard the rest. 339 */ 340 add r2, r2, #4 /* add 4 back */ 341 mov r3, #24 /* say we have 3 bytes */ 342 mov r5, r4 /* discard the bytes we can't store */ 343 b .Llast_bytes /* and treat this as our last word */ 3441: 345#elif defined(STRNCPY) 346 subs r2, r2, #4 /* subtract 4 from count */ 347 bmi .Lincongruent_no_more_room /* count < 0? dst will be full */ 348#endif 349 str r4, [r0], #4 /* store dst word */ 350#if defined(STRNCPY) 351 beq .Ldst_full_word_aligned /* space left is 0? stop copy */ 352#endif 353#if defined(STRLCPY) 354 bne 1f /* we still have space remaining */ 355 strb r2, [r0] /* write final NUL */ 356 b .Lend_of_string /* we are done */ 3571: 358#endif 359 /* 360 * Subtract the 32 bits just written from the number of bits left 361 * to write. If 0 bits are left and not doing strncpy, just write 362 * the trailing NUL and be done. 363 */ 364 subs r3, r3, #32 /* we wrote one word */ 365#if !defined(STRNCPY) 366 bne 1f /* no more data? */ 367 strb r3, [r0] /* write final NUL */ 368 b .Lend_of_string /* we are done */ 3691: 370#endif 371 /* 372 * At this point after writing 4 bytes, we have 0 or 1 bytes left to 373 * write (excluding the trailing NUL). 374 */ 375 mov r5, r5, lslo r9 /* get remainder of src */ 376 377 /* fall into .Llast_bytes */ 378 379#if !defined(STRLCPY) 380.Lcongruent_last_bytes: 381#endif 382.Llast_bytes: 383 /* 384 * r5 contains the last word and is in host byte order. 385 * r3 contains number of bits left to copy (0..31). 386 * r1 should point to the NUL + 4. 387 */ 388 bics ip, r3, #7 /* truncate bits, is result 0? */ 389#if !defined(STRNCPY) 390 bne 1f /* no, have to write some bytes */ 391 strb ip, [r0] /* yes, write trailing NUL */ 392 b .Lend_of_string /* yes, and we are the end */ 3931: 394#endif 395#if defined(STRLCPY) || defined(STRNCPY) 396 cmp r2, ip, lsr #3 /* is there enough room? */ 397 movlt ip, r2, lsl #3 /* no, only fill remaining space */ 398#endif 399 mvn r3, #0 /* create a mask */ 400 mov r3, r3, lshi ip /* clear leading bytes */ 401 bic r5, r5, r3 /* clear trailing bytes */ 402#if defined(STRNCPY) 403 cmp r2, #4 /* room for 4 bytes? */ 404 movge ip, #32 /* yes, we will write 4 bytes */ 405 bge 2f /* yes, and go do it */ 406 mvn r3, #0 /* create a mask (again) */ 407 mov ip, r2, lsl #3 /* remaining space bytes -> bits */ 408 mov r3, r3, lshi ip /* clear remaining bytes */ 409#elif defined(STRLCPY) 410 cmp r2, #3 /* do we have room for 3 bytes & NUL? */ 411 bge 2f /* yes, just clear out dst */ 412 mov r3, r3, lshi #8 /* mask out trailing NUL */ 413#else 414 cmp ip, #24 /* are we writing 3 bytes & a NUL? */ 415 bge 2f /* yes, just overwrite dst */ 416 mov r3, r3, lshi #8 /* mask out trailing NUL */ 417#endif /* !STRNCPY */ 418 ldr r4, [r0] /* fetch dst word */ 419 and r4, r4, r3 /* preserve trailing bytes */ 420 orr r5, r5, r4 /* merge dst with src */ 4212: str r5, [r0], #4 /* store last word */ 422#if defined(STRNCPY) 423 subs r2, r2, ip, lsr #3 /* subtract bytes cleared from count */ 424 beq .Ldst_full_word_aligned 425#endif 426 b .Lend_of_string 427 428#if defined(STRLCPY) || defined(STRNCPY) 429.Lno_more_room: 430#if defined(STRLCPY) 431 cmp r2, #-1 /* tried to write 3 bytes? */ 432 blt 1f /* less, partial word write */ 433 cmp r2, #0 /* no space left? */ 434 strbeq r2, [r0] /* write the final NUL */ 435 bicne r5, r5, #BYTE3 /* clear trailing NUL */ 436 strne r5, [r0] /* write last word */ 437 b .Ldst_full_word_aligned /* the dst buffer is full */ 4381: 439#endif /* STRLCPY */ 440 add r2, r2, #4 /* restore remaining space */ 441 ldr r4, [r0] /* load dst */ 442 mvn r3, #0 /* create a mask */ 443 mov r2, r2, lsl #3 /* bytes -> bits */ 444 mov r3, r3, lshi r2 /* clear leading bytes */ 445 bic r5, r5, r3 /* clear trailing bytes from src */ 446#if defined(STRLCPY) 447 mov r3, r3, lshi #8 /* mask out trailing NUL */ 448#endif /* STRLCPY */ 449 and r4, r4, r3 /* preserve trailing bytes in dst */ 450 orr r4, r4, r5 /* merge src with dst */ 451 str r4, [r0], #4 /* write last word */ 452 b .Ldst_full_word_aligned 453#endif /* STRLCPY || STRNCPY */ 454 455#if defined(STRLCPY) 456 /* 457 * Destination was filled (and NUL terminated). 458 * All that's left is count the number of bytes left in src. 459 */ 460.Ldst_full: 4611: tst r1, #3 /* dst word aligned? */ 462 beq 2f /* yes, so do it word by word */ 463 ldrb r5, [r1], #1 /* load next byte */ 464 teq r5, #0 /* is it a NUL? */ 465 bne 1b /* no, check alignment */ 466 b .Lend_of_string /* and return */ 4672: add r6, r6, #3 /* compensate for post-inc */ 468.Ldst_full_word_aligned: 4693: ldr r5, [r1], #4 /* load word from src */ 470#ifdef _ARM_ARCH_6 471 uqadd8 r5, r5, r7 /* perform NUL magic */ 472 mvns r5, r5 /* complement all 0s? */ 473 beq 3b /* yes, no NUL so get next word */ 474#else 475 tst r5, #BYTE0 /* does byte 0 contain a NUL? */ 476 tstne r5, #BYTE1 /* no, does byte 1 contain a NUL? */ 477 tstne r5, #BYTE2 /* no, does byte 2 contain a NUL? */ 478 tstne r5, #BYTE3 /* no, does byte 3 contain a NUL? */ 479 bne 3b /* no, no NUL encountered! */ 480#endif 481#ifdef _ARM_ARCH_6 482#ifdef __ARMEL__ 483 rev r5, r5 /* CLZ needs BE data */ 484#endif 485 clz r5, r5 /* count leading zeros */ 486 add r1, r1, r5, lsr #3 /* add offset to NUL to src pointer */ 487#else 488 tst r5, #BYTE0 /* is there a NUL in byte 0? */ 489 beq 4f /* yes, don't check any further */ 490 add r1, r1, #1 /* no, advance src pointer by 1 */ 491 tst r5, #BYTE1 /* is there a NUL in byte 1? */ 492 beq 4f /* yes, don't check any further */ 493 add r1, r1, #1 /* no, advance src pointer by 1 */ 494 tst r5, #BYTE2 /* is there a NUL in byte 2? */ 495 addne r1, r1, #1 /* no, there must be in byte 3 */ 4964: 497#endif /* _ARM_ARCH_6 */ 498.Lend_of_string: 499 sub r0, r1, r6 /* subtract start from finish */ 500 pop {r4-r9} /* restore registers */ 501 RET 502#elif defined(STRNCPY) 503.Lend_of_string: 504 teq r2, #0 /* any bytes left to zero? */ 505 beq 3f /* no, just return. */ 506 mov r1, #0 /* yes, prepare to zero */ 507 cmp r2, #16 /* some, but not a lot? */ 508 ble 1f 509 mov r4, lr /* preserve lr */ 510 bl PLT_SYM(_C_LABEL(memset)) /* yes, and let memset do it */ 511 mov lr, r4 /* restore lr */ 512 b 3f /* return */ 5131: add ip, r0, r2 /* calculate stopping point */ 5142: strb r1, [r0], #1 /* clear a byte */ 515 cmp r0, ip /* done? */ 516 blt 2b /* no, clear next byte */ 5173: mov r0, r6 /* restore dst pointer */ 518 pop {r4-r9} /* restore registers */ 519 RET 520.Ldst_full: 521.Ldst_full_word_aligned: 522 /* 523 * Destination was filled (but not NUL terminated). 524 * All that's left is return the start of dst 525 */ 526 mov r0, r6 /* restore dst pointer */ 527 pop {r4-r9} /* restore registers */ 528 RET 529#else 530.Lend_of_string: 531 mov r0, r6 /* restore dst pointer */ 532 pop {r4-r9} /* restore registers */ 533 RET 534#endif 535END(FUNCNAME) 536