1/*- 2 * Copyright (c) 2013 The NetBSD Foundation, Inc. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to The NetBSD Foundation 6 * by Matt Thomas of 3am Software Foundry. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 18 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 19 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 21 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 * POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30#include <machine/asm.h> 31 32RCSID("$NetBSD: strcpy_arm.S,v 1.5 2017/01/14 03:00:13 christos Exp $") 33 34#ifdef STRLCPY 35#ifdef _LIBC 36WEAK_ALIAS(strlcpy, _strlcpy) 37#endif 38#define FUNCNAME _strlcpy 39#elif defined(STRNCPY) 40WEAK_ALIAS(strncpy, _strncpy) 41#define FUNCNAME _strncpy 42#else 43WEAK_ALIAS(strcpy, _strcpy) 44#define FUNCNAME _strcpy 45#endif 46 47#ifdef __ARMEL__ 48#define lslo lsr /* shift to lower address */ 49#define lshi lsl /* shift to higher address */ 50#define BYTE0 0x000000ff 51#define BYTE1 0x0000ff00 52#define BYTE2 0x00ff0000 53#define BYTE3 0xff000000 54#else 55#define lslo lsl /* shift to lower address */ 56#define lshi lsr /* shift to higher address */ 57#define BYTE0 0xff000000 58#define BYTE1 0x00ff0000 59#define BYTE2 0x0000ff00 60#define BYTE3 0x000000ff 61#endif 62 63/* 64 * On armv6 and later, to quickly determine if a word contains a NUL (0) byte, 65 * we add 254 to each byte using the UQADD8 (unsigned saturating add 8) 66 * instruction. For every non-NUL byte, the result for that byte will become 67 * 255. For NUL, it will be 254. When we complement the result of all 4 adds, 68 * if the result is non-0 then we must have encountered a NUL. 69 * 70 * For earlier architecture, we just use tst on all 4 bytes. There are other 71 * algorithms to detect NULs but they take longer and use more instructions. 72 */ 73 74/* 75 * char *strcpy(char *dst, const char *src); 76 * char *strncpy(char *dst, const char *src, size_t len); 77 * size_t strlcpy(char *dst, const char *src, size_t len); 78 */ 79 80 .text 81ENTRY(FUNCNAME) 82#if defined(STRLCPY) 83 cmp r2, #1 /* is length 1 or less? */ 84 bhi 1f /* no, do normal */ 85 moveq r3, #0 /* = 1? load NUL */ 86 strbeq r3, [r0] /* = 1? write NUL to dst */ 87 mov r0, r1 /* move src to r0 */ 88 b PLT_SYM(_C_LABEL(strlen)) /* and tailcall strlen */ 891: 90 sub r2, r2, #1 /* leave one byte for NUL */ 91#endif 92#if defined(STRNCPY) 93 cmp r2, #0 /* 0 length? */ 94 RETc(eq) /* yes, just return */ 95#endif 96 push {r4-r9} /* save some registers */ 97#ifdef _ARM_ARCH_6 98#ifdef _ARM_ARCH_7 99 movw r7, #0xfefe /* magic constant; 254 in each byte */ 100#else 101 mov r7, #0xfe /* put 254 in low byte */ 102 orr r7, r7, r7, lsl #8 /* move to next byte */ 103#endif 104 orr r7, r7, r7, lsl #16 /* move to next halfword */ 105#endif 106 107#if defined(STRLCPY) 108 add r6, r1, #1 /* save for return (deal with NUL) */ 109#else 110 mov r6, r0 /* save for return */ 111#endif 112 113.Ldst_align: 114 tst r0, #3 /* check for dst alignment */ 115 beq .Ldst_aligned /* ok, proceed to next check */ 116 ldrb r5, [r1], #1 /* load a byte */ 117#if defined(STRNCPY) 118 subs r2, r2, #1 /* subtract out from count */ 119 bmi .Ldst_full /* zero? the dst has no more room */ 120#endif 121 strb r5, [r0], #1 /* store a byte */ 122 teq r5, #0 /* was it a NUL? */ 123 beq .Lend_of_string /* yes, we are done */ 124#if defined(STRLCPY) 125 subs r2, r2, #1 /* subtract one from count */ 126 strbeq r2, [r0], #1 /* zero? write trailing NUL */ 127 beq .Ldst_full /* zero? the dst has no more room */ 128#endif 129 b .Ldst_align /* loop around for next byte */ 130.Ldst_aligned: 131 tst r1, #3 /* get the misalignment of src */ 132 bne .Lincongruent /* !=? incongruent (slower) */ 133 134 /* =? congruent (faster) */ 135 136.Lcongruent: 137#if defined(STRLCPY) 138 add r6, r6, #3 /* compensate for word post-inc */ 139#endif 140 b .Lcongruent_mainloop_load 141.Lcongruent_mainloop: 142#if defined(STRLCPY) || defined(STRNCPY) 143 subs r2, r2, #4 /* subtract 4 from the count */ 144 bmi .Lno_more_room 145#endif 146 str r5, [r0], #4 /* store word into dst */ 147#if defined(STRLCPY) 148 beq .Lno_more_room /* count is 0? no room in dst */ 149#endif 150#if defined(STRNCPY) 151 beq .Ldst_full_word_aligned /* count is 0? no room in dst */ 152#endif 153.Lcongruent_mainloop_load: 154 ldr r5, [r1], #4 /* load word from source */ 155#if defined(_ARM_ARCH_6) 156 uqadd8 r3, r5, r7 /* magic happens here */ 157 mvns r3, r3 /* is the complemented result 0? */ 158 beq .Lcongruent_mainloop /* yes, no NULs, do it again */ 159#else 160 tst r5, #BYTE0 /* does byte 0 contain a NUL? */ 161 tstne r5, #BYTE1 /* no, does byte 1 contain a NUL? */ 162 tstne r5, #BYTE2 /* no, does byte 2 contain a NUL? */ 163 tstne r5, #BYTE3 /* no, does byte 3 contain a NUL? */ 164 bne .Lcongruent_mainloop /* yes, no NULs, do it again */ 165#endif 166#if defined(STRLCPY) && 0 167 sub r1, r1, #3 /* back up src pointer */ 168#endif 169#if defined(_ARM_ARCH_6) 170#ifdef __ARMEL__ 171 rev r3, r3 /* CLZ needs BE data */ 172#endif 173 clz r3, r3 /* count leading zeros */ 174#else 175 mov r3, #0 /* assume NUL is in byte 0 */ 176 tst r5, #BYTE0 /* is NUL in byte 2? */ 177 beq .Lcongruent_last_bytes /* yes, done searching. */ 178 mov r3, #8 /* assume NUL is in byte 1 */ 179 tst r5, #BYTE1 /* is NUL in byte 2? */ 180 beq .Lcongruent_last_bytes /* yes, done searching. */ 181 mov r3, #16 /* assume NUL is in byte 2 */ 182 tst r5, #BYTE2 /* is NUL in byte 2? */ 183#if !defined(STRLCPY) 184 beq .Lcongruent_last_bytes /* yes, done searching. */ 185 mov r3, #24 /* NUL must be in byte 3 */ 186#else 187 movne r3, #24 /* no, then NUL is in byte 3 */ 188#endif 189#endif /* _ARM_ARCH_6 */ 190#if defined(STRLCPY) 191.Lcongruent_last_bytes: 192#endif 193#if defined(STRLCPY) 194 add r1, r1, r3, lsr #3 /* position to point at NUL + 4 */ 195#endif 196 b .Llast_bytes /* store the last bytes */ 197 198 199.Lincongruent: 200 /* 201 * At this point dst is word aligned by src is not. Read bytes 202 * from src until it is read aligned. 203 */ 204 and r3, r1, #3 /* extract misalignment */ 205 mov r9, r3, lsl #3 /* calculate discard shift */ 206 rsb r8, r9, #32 /* calculate insertion shift */ 207#if defined(STRLCPY) 208 add r6, r6, #3 /* compensate for word post-inc */ 209#endif 210 bic r1, r1, #3 /* word align src */ 211 ldr r5, [r1], #4 /* load word frm src */ 212 mov r4, r5, lslo r9 /* discard lo bytes from src */ 213 tst r4, #BYTE0 /* does byte 0 contain a NUL? */ 214#if defined(STRNCPY) 215 beq .Lend_of_string /* yes, zero fill rest of string */ 216#else 217 moveq r3, r9 /* yes, set offset */ 218 beq .Lincongruent_end_of_string /* yes, deal with the last bytes */ 219#endif 220 /* 221 * To make our test for NULs below do not generate false positives, 222 * fill the bytes in the word we don't want to match with all 1s. 223 */ 224 mvn r3, #0 /* create a mask */ 225 mov r3, r3, lslo r8 /* zero out bytes being kept */ 226 orr r5, r5, r3 /* merge src and mask */ 227#ifdef _ARM_ARCH_6 228 uqadd8 r3, r5, r7 /* NUL detection magic happens */ 229 mvns r3, r3 /* is the complemented result 0? */ 230 beq .Lincongruent_mainloop_load /* yes, no NUL encountered! */ 231#ifdef __ARMEL__ 232 rev r3, r3 /* CLZ wants BE input */ 233#endif 234 clz r3, r3 /* count leading zeros */ 235#else 236 /* 237 * We already tested for byte 0 above so we don't need to it again. 238 */ 239 mov r3, #24 /* assume NUL is in byte 3 */ 240 tst r5, #BYTE1 /* did we find a NUL in byte 1? */ 241 subeq r3, r3, #8 /* yes, decremnt byte position */ 242 tstne r5, #BYTE2 /* no, did we find a NUL in byte 2? */ 243 subeq r3, r3, #8 /* yes, decremnt byte position */ 244 tstne r5, #BYTE3 /* no, did we find a NUL in byte 3? */ 245 bne .Lincongruent_mainloop_load /* no, no NUL encountered! */ 246#endif 247 mov r5, r4 /* discard already dealt with bytes */ 248.Lincongruent_end_of_string: 249#if defined(STRLCPY) 250 add r1, r1, r3, lsr #3 /* then add offset to NUL */ 251#endif 252 sub r3, r3, r9 /* adjust NUL offset */ 253 b .Llast_bytes /* NUL encountered! finish up */ 254 255#if defined(STRLCPY) || defined(STRNCPY) 256.Lincongruent_no_more_room: 257 mov r5, r4 /* move data to be stored to r5 */ 258 b .Lno_more_room /* fill remaining space */ 259#endif /* STRLCPY || STRNCPY */ 260 261 /* 262 * At this point both dst and src are word aligned and r4 contains 263 * partial contents from src. 264 */ 265.Lincongruent_mainloop: 266 orr r4, r4, r5, lshi r8 /* put new src data into dst word */ 267#if defined(STRLCPY) || defined(STRNCPY) 268 subs r2, r2, #4 /* subtract 4 from count */ 269 bmi .Lincongruent_no_more_room /* count < 0? dst will be full */ 270#endif 271 str r4, [r0], #4 /* store word in dst */ 272#if defined(STRLCPY) 273 beq .Lno_more_room /* space left is 0? stop copy */ 274#endif 275#if defined(STRNCPY) 276 beq .Ldst_full_word_aligned /* space left is 0? stop copy */ 277#endif 278 mov r4, r5, lslo r9 /* move rest of src into dst word */ 279.Lincongruent_mainloop_load: 280 ldr r5, [r1], #4 /* read src */ 281#ifdef _ARM_ARCH_6 282 uqadd8 r3, r5, r7 /* magic happens here */ 283 mvns r3, r3 /* is the complemented result 0? */ 284 beq .Lincongruent_mainloop /* yes, no NUL encountered! */ 285 /* 286 * fall into this since we encountered a NULL. At this point we have 287 * from 1-5 bytes (excluding trailing NUL) to write. 288 */ 289#ifdef __ARMEL__ 290 rev r3, r3 /* CLZ works on BE data */ 291#endif 292 clz r3, r3 /* count leading zeroes */ 293#else 294 tst r5, #BYTE0 /* does byte 0 contain a NUL? */ 295 tstne r5, #BYTE1 /* no, does byte 1 contain a NUL? */ 296 tstne r5, #BYTE2 /* no, does byte 2 contain a NUL? */ 297 tstne r5, #BYTE3 /* no, does byte 3 contain a NUL? */ 298 bne .Lincongruent_mainloop /* no, no NUL encountered! */ 299 /* 300 * fall into this since we encountered a NULL. At this point we have 301 * from 1-5 bytes (excluding trailing NUL) to write. 302 */ 303 mov r3, #0 /* assume a NUL is in byte 0 */ 304 tst r5, #BYTE0 /* is there a NUL in byte 0? */ 305 beq 1f /* yes, found a NUL! */ 306 mov r3, #8 /* assume a NUL is in byte 1 */ 307 tst r5, #BYTE1 /* is there a NUL in byte 0? */ 308 beq 1f /* yes, found a NUL! */ 309 tst r5, #BYTE2 /* is there a NUL in byte 2? */ 310 moveq r3, #16 /* yes, mark its position */ 311 movne r3, #24 /* no, it must be in byte 3 */ 3121: 313#endif 314 orr r4, r4, r5, lshi r8 /* merge new and old src words */ 315#if defined(STRLCPY) 316 add r1, r1, r3, lsr #3 /* adjust src to point to NUL */ 317#endif 318 add r3, r3, r8 /* add remainder bytes worth */ 319 cmp r3, #32 /* do we have at least one word to write? */ 320 movlt r5, r4 /* no, move source bytes to expected reg */ 321 blt .Llast_bytes /* no, deal with them */ 322#if defined(STRLCPY) 323 subs r2, r2, #4 /* subtract 4 from count */ 324 bpl 1f /* we have space for at least 4 */ 325 /* 326 * Since the space just went minus, we don't have enough room to 327 * write all 4 bytes. In fact, the most we can write is 3 so just 328 * just lie and say we have 3 bytes to write and discard the rest. 329 */ 330 add r2, r2, #4 /* add 4 back */ 331 mov r3, #24 /* say we have 3 bytes */ 332 mov r5, r4 /* discard the bytes we can't store */ 333 b .Llast_bytes /* and treat this as our last word */ 3341: 335#elif defined(STRNCPY) 336 subs r2, r2, #4 /* subtract 4 from count */ 337 bmi .Lincongruent_no_more_room /* count < 0? dst will be full */ 338#endif 339 str r4, [r0], #4 /* store dst word */ 340#if defined(STRNCPY) 341 beq .Ldst_full_word_aligned /* space left is 0? stop copy */ 342#endif 343#if defined(STRLCPY) 344 bne 1f /* we still have space remaining */ 345 strb r2, [r0] /* write final NUL */ 346 b .Lend_of_string /* we are done */ 3471: 348#endif 349 /* 350 * Subtract the 32 bits just written from the number of bits left 351 * to write. If 0 bits are left and not doing strncpy, just write 352 * the trailing NUL and be done. 353 */ 354 subs r3, r3, #32 /* we wrote one word */ 355#if !defined(STRNCPY) 356 bne 1f /* no more data? */ 357 strb r3, [r0] /* write final NUL */ 358 b .Lend_of_string /* we are done */ 3591: 360#endif 361 /* 362 * At this point after writing 4 bytes, we have 0 or 1 bytes left to 363 * write (excluding the trailing NUL). 364 */ 365 mov r5, r5, lslo r9 /* get remainder of src */ 366 367 /* fall into .Llast_bytes */ 368 369#if !defined(STRLCPY) 370.Lcongruent_last_bytes: 371#endif 372.Llast_bytes: 373 /* 374 * r5 contains the last word and is in host byte order. 375 * r3 contains number of bits left to copy (0..31). 376 * r1 should point to the NUL + 4. 377 */ 378 bics ip, r3, #7 /* truncate bits, is result 0? */ 379#if !defined(STRNCPY) 380 bne 1f /* no, have to write some bytes */ 381 strb ip, [r0] /* yes, write trailing NUL */ 382 b .Lend_of_string /* yes, and we are the end */ 3831: 384#endif 385#if defined(STRLCPY) || defined(STRNCPY) 386 cmp r2, ip, lsr #3 /* is there enough room? */ 387 movlt ip, r2, lsl #3 /* no, only fill remaining space */ 388#endif 389 mvn r3, #0 /* create a mask */ 390 mov r3, r3, lshi ip /* clear leading bytes */ 391 bic r5, r5, r3 /* clear trailing bytes */ 392#if defined(STRNCPY) 393 cmp r2, #4 /* room for 4 bytes? */ 394 movge ip, #32 /* yes, we will write 4 bytes */ 395 bge 2f /* yes, and go do it */ 396 mvn r3, #0 /* create a mask (again) */ 397 mov ip, r2, lsl #3 /* remaining space bytes -> bits */ 398 mov r3, r3, lshi ip /* clear remaining bytes */ 399#elif defined(STRLCPY) 400 cmp r2, #3 /* do we have room for 3 bytes & NUL? */ 401 bge 2f /* yes, just clear out dst */ 402 mov r3, r3, lshi #8 /* mask out trailing NUL */ 403#else 404 cmp ip, #24 /* are we writing 3 bytes & a NUL? */ 405 bge 2f /* yes, just overwrite dst */ 406 mov r3, r3, lshi #8 /* mask out trailing NUL */ 407#endif /* !STRNCPY */ 408 ldr r4, [r0] /* fetch dst word */ 409 and r4, r4, r3 /* preserve trailing bytes */ 410 orr r5, r5, r4 /* merge dst with src */ 4112: str r5, [r0], #4 /* store last word */ 412#if defined(STRNCPY) 413 subs r2, r2, ip, lsr #3 /* subtract bytes cleared from count */ 414 beq .Ldst_full_word_aligned 415#endif 416 b .Lend_of_string 417 418#if defined(STRLCPY) || defined(STRNCPY) 419.Lno_more_room: 420#if defined(STRLCPY) 421 cmp r2, #-1 /* tried to write 3 bytes? */ 422 blt 1f /* less, partial word write */ 423 cmp r2, #0 /* no space left? */ 424 strbeq r2, [r0] /* write the final NUL */ 425 bicne r5, r5, #BYTE3 /* clear trailing NUL */ 426 strne r5, [r0] /* write last word */ 427 b .Ldst_full_word_aligned /* the dst buffer is full */ 4281: 429#endif /* STRLCPY */ 430 add r2, r2, #4 /* restore remaining space */ 431 ldr r4, [r0] /* load dst */ 432 mvn r3, #0 /* create a mask */ 433 mov r2, r2, lsl #3 /* bytes -> bits */ 434 mov r3, r3, lshi r2 /* clear leading bytes */ 435 bic r5, r5, r3 /* clear trailing bytes from src */ 436#if defined(STRLCPY) 437 mov r3, r3, lshi #8 /* mask out trailing NUL */ 438#endif /* STRLCPY */ 439 and r4, r4, r3 /* preserve trailing bytes in dst */ 440 orr r4, r4, r5 /* merge src with dst */ 441 str r4, [r0], #4 /* write last word */ 442 b .Ldst_full_word_aligned 443#endif /* STRLCPY || STRNCPY */ 444 445#if defined(STRLCPY) 446 /* 447 * Destination was filled (and NUL terminated). 448 * All that's left is count the number of bytes left in src. 449 */ 450.Ldst_full: 4511: tst r1, #3 /* dst word aligned? */ 452 beq 2f /* yes, so do it word by word */ 453 ldrb r5, [r1], #1 /* load next byte */ 454 teq r5, #0 /* is it a NUL? */ 455 bne 1b /* no, check alignment */ 456 b .Lend_of_string /* and return */ 4572: add r6, r6, #3 /* compensate for post-inc */ 458.Ldst_full_word_aligned: 4593: ldr r5, [r1], #4 /* load word from src */ 460#ifdef _ARM_ARCH_6 461 uqadd8 r5, r5, r7 /* perform NUL magic */ 462 mvns r5, r5 /* complement all 0s? */ 463 beq 3b /* yes, no NUL so get next word */ 464#else 465 tst r5, #BYTE0 /* does byte 0 contain a NUL? */ 466 tstne r5, #BYTE1 /* no, does byte 1 contain a NUL? */ 467 tstne r5, #BYTE2 /* no, does byte 2 contain a NUL? */ 468 tstne r5, #BYTE3 /* no, does byte 3 contain a NUL? */ 469 bne 3b /* no, no NUL encountered! */ 470#endif 471#ifdef _ARM_ARCH_6 472#ifdef __ARMEL__ 473 rev r5, r5 /* CLZ needs BE data */ 474#endif 475 clz r5, r5 /* count leading zeros */ 476 add r1, r1, r5, lsr #3 /* add offset to NUL to src pointer */ 477#else 478 tst r5, #BYTE0 /* is there a NUL in byte 0? */ 479 beq 4f /* yes, don't check any further */ 480 add r1, r1, #1 /* no, advance src pointer by 1 */ 481 tst r5, #BYTE1 /* is there a NUL in byte 1? */ 482 beq 4f /* yes, don't check any further */ 483 add r1, r1, #1 /* no, advance src pointer by 1 */ 484 tst r5, #BYTE2 /* is there a NUL in byte 2? */ 485 addne r1, r1, #1 /* no, there must be in byte 3 */ 4864: 487#endif /* _ARM_ARCH_6 */ 488.Lend_of_string: 489 sub r0, r1, r6 /* subtract start from finish */ 490 pop {r4-r9} /* restore registers */ 491 RET 492#elif defined(STRNCPY) 493.Lend_of_string: 494 teq r2, #0 /* any bytes left to zero? */ 495 beq 3f /* no, just return. */ 496 mov r1, #0 /* yes, prepare to zero */ 497 cmp r2, #16 /* some, but not a lot? */ 498 ble 1f 499 mov r4, lr /* preserve lr */ 500 bl PLT_SYM(_C_LABEL(memset)) /* yes, and let memset do it */ 501 mov lr, r4 /* restore lr */ 502 b 3f /* return */ 5031: add ip, r0, r2 /* calculate stopping point */ 5042: strb r1, [r0], #1 /* clear a byte */ 505 cmp r0, ip /* done? */ 506 blt 2b /* no, clear next byte */ 5073: mov r0, r6 /* restore dst pointer */ 508 pop {r4-r9} /* restore registers */ 509 RET 510.Ldst_full: 511.Ldst_full_word_aligned: 512 /* 513 * Destination was filled (but not NUL terminated). 514 * All that's left is return the start of dst 515 */ 516 mov r0, r6 /* restore dst pointer */ 517 pop {r4-r9} /* restore registers */ 518 RET 519#else 520.Lend_of_string: 521 mov r0, r6 /* restore dst pointer */ 522 pop {r4-r9} /* restore registers */ 523 RET 524#endif 525END(FUNCNAME) 526