1/*- 2 * Copyright (c) 2013 The NetBSD Foundation, Inc. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to The NetBSD Foundation 6 * by Matt Thomas of 3am Software Foundry. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 18 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 19 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 21 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 * POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30#include <machine/asm.h> 31 32RCSID("$NetBSD: strcpy_arm.S,v 1.4 2017/01/13 13:14:54 christos Exp $") 33 34#ifdef STRLCPY 35#ifdef _LIBC 36WEAK_ALIAS(strlcpy, _strlcpy) 37#endif 38#define FUNCNAME _strlcpy 39#elif defined(STRNCPY) 40#define FUNCNAME _strncpy 41#else 42#define FUNCNAME _strcpy 43#endif 44 45#ifdef __ARMEL__ 46#define lslo lsr /* shift to lower address */ 47#define lshi lsl /* shift to higher address */ 48#define BYTE0 0x000000ff 49#define BYTE1 0x0000ff00 50#define BYTE2 0x00ff0000 51#define BYTE3 0xff000000 52#else 53#define lslo lsl /* shift to lower address */ 54#define lshi lsr /* shift to higher address */ 55#define BYTE0 0xff000000 56#define BYTE1 0x00ff0000 57#define BYTE2 0x0000ff00 58#define BYTE3 0x000000ff 59#endif 60 61/* 62 * On armv6 and later, to quickly determine if a word contains a NUL (0) byte, 63 * we add 254 to each byte using the UQADD8 (unsigned saturating add 8) 64 * instruction. For every non-NUL byte, the result for that byte will become 65 * 255. For NUL, it will be 254. When we complement the result of all 4 adds, 66 * if the result is non-0 then we must have encountered a NUL. 67 * 68 * For earlier architecture, we just use tst on all 4 bytes. There are other 69 * algorithms to detect NULs but they take longer and use more instructions. 70 */ 71 72/* 73 * char *strcpy(char *dst, const char *src); 74 * char *strncpy(char *dst, const char *src, size_t len); 75 * size_t strlcpy(char *dst, const char *src, size_t len); 76 */ 77 78 .text 79ENTRY(FUNCNAME) 80#if defined(STRLCPY) 81 cmp r2, #1 /* is length 1 or less? */ 82 bhi 1f /* no, do normal */ 83 moveq r3, #0 /* = 1? load NUL */ 84 strbeq r3, [r0] /* = 1? write NUL to dst */ 85 mov r0, r1 /* move src to r0 */ 86 b PLT_SYM(_C_LABEL(strlen)) /* and tailcall strlen */ 871: 88 sub r2, r2, #1 /* leave one byte for NUL */ 89#endif 90#if defined(STRNCPY) 91 cmp r2, #0 /* 0 length? */ 92 RETc(eq) /* yes, just return */ 93#endif 94 push {r4-r9} /* save some registers */ 95#ifdef _ARM_ARCH_6 96#ifdef _ARM_ARCH_7 97 movw r7, #0xfefe /* magic constant; 254 in each byte */ 98#else 99 mov r7, #0xfe /* put 254 in low byte */ 100 orr r7, r7, r7, lsl #8 /* move to next byte */ 101#endif 102 orr r7, r7, r7, lsl #16 /* move to next halfword */ 103#endif 104 105#if defined(STRLCPY) 106 add r6, r1, #1 /* save for return (deal with NUL) */ 107#else 108 mov r6, r0 /* save for return */ 109#endif 110 111.Ldst_align: 112 tst r0, #3 /* check for dst alignment */ 113 beq .Ldst_aligned /* ok, proceed to next check */ 114 ldrb r5, [r1], #1 /* load a byte */ 115#if defined(STRNCPY) 116 subs r2, r2, #1 /* subtract out from count */ 117 bmi .Ldst_full /* zero? the dst has no more room */ 118#endif 119 strb r5, [r0], #1 /* store a byte */ 120 teq r5, #0 /* was it a NUL? */ 121 beq .Lend_of_string /* yes, we are done */ 122#if defined(STRLCPY) 123 subs r2, r2, #1 /* subtract one from count */ 124 strbeq r2, [r0], #1 /* zero? write trailing NUL */ 125 beq .Ldst_full /* zero? the dst has no more room */ 126#endif 127 b .Ldst_align /* loop around for next byte */ 128.Ldst_aligned: 129 tst r1, #3 /* get the misalignment of src */ 130 bne .Lincongruent /* !=? incongruent (slower) */ 131 132 /* =? congruent (faster) */ 133 134.Lcongruent: 135#if defined(STRLCPY) 136 add r6, r6, #3 /* compensate for word post-inc */ 137#endif 138 b .Lcongruent_mainloop_load 139.Lcongruent_mainloop: 140#if defined(STRLCPY) || defined(STRNCPY) 141 subs r2, r2, #4 /* subtract 4 from the count */ 142 bmi .Lno_more_room 143#endif 144 str r5, [r0], #4 /* store word into dst */ 145#if defined(STRLCPY) 146 beq .Lno_more_room /* count is 0? no room in dst */ 147#endif 148#if defined(STRNCPY) 149 beq .Ldst_full_word_aligned /* count is 0? no room in dst */ 150#endif 151.Lcongruent_mainloop_load: 152 ldr r5, [r1], #4 /* load word from source */ 153#if defined(_ARM_ARCH_6) 154 uqadd8 r3, r5, r7 /* magic happens here */ 155 mvns r3, r3 /* is the complemented result 0? */ 156 beq .Lcongruent_mainloop /* yes, no NULs, do it again */ 157#else 158 tst r5, #BYTE0 /* does byte 0 contain a NUL? */ 159 tstne r5, #BYTE1 /* no, does byte 1 contain a NUL? */ 160 tstne r5, #BYTE2 /* no, does byte 2 contain a NUL? */ 161 tstne r5, #BYTE3 /* no, does byte 3 contain a NUL? */ 162 bne .Lcongruent_mainloop /* yes, no NULs, do it again */ 163#endif 164#if defined(STRLCPY) && 0 165 sub r1, r1, #3 /* back up src pointer */ 166#endif 167#if defined(_ARM_ARCH_6) 168#ifdef __ARMEL__ 169 rev r3, r3 /* CLZ needs BE data */ 170#endif 171 clz r3, r3 /* count leading zeros */ 172#else 173 mov r3, #0 /* assume NUL is in byte 0 */ 174 tst r5, #BYTE0 /* is NUL in byte 2? */ 175 beq .Lcongruent_last_bytes /* yes, done searching. */ 176 mov r3, #8 /* assume NUL is in byte 1 */ 177 tst r5, #BYTE1 /* is NUL in byte 2? */ 178 beq .Lcongruent_last_bytes /* yes, done searching. */ 179 mov r3, #16 /* assume NUL is in byte 2 */ 180 tst r5, #BYTE2 /* is NUL in byte 2? */ 181#if !defined(STRLCPY) 182 beq .Lcongruent_last_bytes /* yes, done searching. */ 183 mov r3, #24 /* NUL must be in byte 3 */ 184#else 185 movne r3, #24 /* no, then NUL is in byte 3 */ 186#endif 187#endif /* _ARM_ARCH_6 */ 188#if defined(STRLCPY) 189.Lcongruent_last_bytes: 190#endif 191#if defined(STRLCPY) 192 add r1, r1, r3, lsr #3 /* position to point at NUL + 4 */ 193#endif 194 b .Llast_bytes /* store the last bytes */ 195 196 197.Lincongruent: 198 /* 199 * At this point dst is word aligned by src is not. Read bytes 200 * from src until it is read aligned. 201 */ 202 and r3, r1, #3 /* extract misalignment */ 203 mov r9, r3, lsl #3 /* calculate discard shift */ 204 rsb r8, r9, #32 /* calculate insertion shift */ 205#if defined(STRLCPY) 206 add r6, r6, #3 /* compensate for word post-inc */ 207#endif 208 bic r1, r1, #3 /* word align src */ 209 ldr r5, [r1], #4 /* load word frm src */ 210 mov r4, r5, lslo r9 /* discard lo bytes from src */ 211 tst r4, #BYTE0 /* does byte 0 contain a NUL? */ 212#if defined(STRNCPY) 213 beq .Lend_of_string /* yes, zero fill rest of string */ 214#else 215 moveq r3, r9 /* yes, set offset */ 216 beq .Lincongruent_end_of_string /* yes, deal with the last bytes */ 217#endif 218 /* 219 * To make our test for NULs below do not generate false positives, 220 * fill the bytes in the word we don't want to match with all 1s. 221 */ 222 mvn r3, #0 /* create a mask */ 223 mov r3, r3, lslo r8 /* zero out bytes being kept */ 224 orr r5, r5, r3 /* merge src and mask */ 225#ifdef _ARM_ARCH_6 226 uqadd8 r3, r5, r7 /* NUL detection magic happens */ 227 mvns r3, r3 /* is the complemented result 0? */ 228 beq .Lincongruent_mainloop_load /* yes, no NUL encountered! */ 229#ifdef __ARMEL__ 230 rev r3, r3 /* CLZ wants BE input */ 231#endif 232 clz r3, r3 /* count leading zeros */ 233#else 234 /* 235 * We already tested for byte 0 above so we don't need to it again. 236 */ 237 mov r3, #24 /* assume NUL is in byte 3 */ 238 tst r5, #BYTE1 /* did we find a NUL in byte 1? */ 239 subeq r3, r3, #8 /* yes, decremnt byte position */ 240 tstne r5, #BYTE2 /* no, did we find a NUL in byte 2? */ 241 subeq r3, r3, #8 /* yes, decremnt byte position */ 242 tstne r5, #BYTE3 /* no, did we find a NUL in byte 3? */ 243 bne .Lincongruent_mainloop_load /* no, no NUL encountered! */ 244#endif 245 mov r5, r4 /* discard already dealt with bytes */ 246.Lincongruent_end_of_string: 247#if defined(STRLCPY) 248 add r1, r1, r3, lsr #3 /* then add offset to NUL */ 249#endif 250 sub r3, r3, r9 /* adjust NUL offset */ 251 b .Llast_bytes /* NUL encountered! finish up */ 252 253#if defined(STRLCPY) || defined(STRNCPY) 254.Lincongruent_no_more_room: 255 mov r5, r4 /* move data to be stored to r5 */ 256 b .Lno_more_room /* fill remaining space */ 257#endif /* STRLCPY || STRNCPY */ 258 259 /* 260 * At this point both dst and src are word aligned and r4 contains 261 * partial contents from src. 262 */ 263.Lincongruent_mainloop: 264 orr r4, r4, r5, lshi r8 /* put new src data into dst word */ 265#if defined(STRLCPY) || defined(STRNCPY) 266 subs r2, r2, #4 /* subtract 4 from count */ 267 bmi .Lincongruent_no_more_room /* count < 0? dst will be full */ 268#endif 269 str r4, [r0], #4 /* store word in dst */ 270#if defined(STRLCPY) 271 beq .Lno_more_room /* space left is 0? stop copy */ 272#endif 273#if defined(STRNCPY) 274 beq .Ldst_full_word_aligned /* space left is 0? stop copy */ 275#endif 276 mov r4, r5, lslo r9 /* move rest of src into dst word */ 277.Lincongruent_mainloop_load: 278 ldr r5, [r1], #4 /* read src */ 279#ifdef _ARM_ARCH_6 280 uqadd8 r3, r5, r7 /* magic happens here */ 281 mvns r3, r3 /* is the complemented result 0? */ 282 beq .Lincongruent_mainloop /* yes, no NUL encountered! */ 283 /* 284 * fall into this since we encountered a NULL. At this point we have 285 * from 1-5 bytes (excluding trailing NUL) to write. 286 */ 287#ifdef __ARMEL__ 288 rev r3, r3 /* CLZ works on BE data */ 289#endif 290 clz r3, r3 /* count leading zeroes */ 291#else 292 tst r5, #BYTE0 /* does byte 0 contain a NUL? */ 293 tstne r5, #BYTE1 /* no, does byte 1 contain a NUL? */ 294 tstne r5, #BYTE2 /* no, does byte 2 contain a NUL? */ 295 tstne r5, #BYTE3 /* no, does byte 3 contain a NUL? */ 296 bne .Lincongruent_mainloop /* no, no NUL encountered! */ 297 /* 298 * fall into this since we encountered a NULL. At this point we have 299 * from 1-5 bytes (excluding trailing NUL) to write. 300 */ 301 mov r3, #0 /* assume a NUL is in byte 0 */ 302 tst r5, #BYTE0 /* is there a NUL in byte 0? */ 303 beq 1f /* yes, found a NUL! */ 304 mov r3, #8 /* assume a NUL is in byte 1 */ 305 tst r5, #BYTE1 /* is there a NUL in byte 0? */ 306 beq 1f /* yes, found a NUL! */ 307 tst r5, #BYTE2 /* is there a NUL in byte 2? */ 308 moveq r3, #16 /* yes, mark its position */ 309 movne r3, #24 /* no, it must be in byte 3 */ 3101: 311#endif 312 orr r4, r4, r5, lshi r8 /* merge new and old src words */ 313#if defined(STRLCPY) 314 add r1, r1, r3, lsr #3 /* adjust src to point to NUL */ 315#endif 316 add r3, r3, r8 /* add remainder bytes worth */ 317 cmp r3, #32 /* do we have at least one word to write? */ 318 movlt r5, r4 /* no, move source bytes to expected reg */ 319 blt .Llast_bytes /* no, deal with them */ 320#if defined(STRLCPY) 321 subs r2, r2, #4 /* subtract 4 from count */ 322 bpl 1f /* we have space for at least 4 */ 323 /* 324 * Since the space just went minus, we don't have enough room to 325 * write all 4 bytes. In fact, the most we can write is 3 so just 326 * just lie and say we have 3 bytes to write and discard the rest. 327 */ 328 add r2, r2, #4 /* add 4 back */ 329 mov r3, #24 /* say we have 3 bytes */ 330 mov r5, r4 /* discard the bytes we can't store */ 331 b .Llast_bytes /* and treat this as our last word */ 3321: 333#elif defined(STRNCPY) 334 subs r2, r2, #4 /* subtract 4 from count */ 335 bmi .Lincongruent_no_more_room /* count < 0? dst will be full */ 336#endif 337 str r4, [r0], #4 /* store dst word */ 338#if defined(STRNCPY) 339 beq .Ldst_full_word_aligned /* space left is 0? stop copy */ 340#endif 341#if defined(STRLCPY) 342 bne 1f /* we still have space remaining */ 343 strb r2, [r0] /* write final NUL */ 344 b .Lend_of_string /* we are done */ 3451: 346#endif 347 /* 348 * Subtract the 32 bits just written from the number of bits left 349 * to write. If 0 bits are left and not doing strncpy, just write 350 * the trailing NUL and be done. 351 */ 352 subs r3, r3, #32 /* we wrote one word */ 353#if !defined(STRNCPY) 354 bne 1f /* no more data? */ 355 strb r3, [r0] /* write final NUL */ 356 b .Lend_of_string /* we are done */ 3571: 358#endif 359 /* 360 * At this point after writing 4 bytes, we have 0 or 1 bytes left to 361 * write (excluding the trailing NUL). 362 */ 363 mov r5, r5, lslo r9 /* get remainder of src */ 364 365 /* fall into .Llast_bytes */ 366 367#if !defined(STRLCPY) 368.Lcongruent_last_bytes: 369#endif 370.Llast_bytes: 371 /* 372 * r5 contains the last word and is in host byte order. 373 * r3 contains number of bits left to copy (0..31). 374 * r1 should point to the NUL + 4. 375 */ 376 bics ip, r3, #7 /* truncate bits, is result 0? */ 377#if !defined(STRNCPY) 378 bne 1f /* no, have to write some bytes */ 379 strb ip, [r0] /* yes, write trailing NUL */ 380 b .Lend_of_string /* yes, and we are the end */ 3811: 382#endif 383#if defined(STRLCPY) || defined(STRNCPY) 384 cmp r2, ip, lsr #3 /* is there enough room? */ 385 movlt ip, r2, lsl #3 /* no, only fill remaining space */ 386#endif 387 mvn r3, #0 /* create a mask */ 388 mov r3, r3, lshi ip /* clear leading bytes */ 389 bic r5, r5, r3 /* clear trailing bytes */ 390#if defined(STRNCPY) 391 cmp r2, #4 /* room for 4 bytes? */ 392 movge ip, #32 /* yes, we will write 4 bytes */ 393 bge 2f /* yes, and go do it */ 394 mvn r3, #0 /* create a mask (again) */ 395 mov ip, r2, lsl #3 /* remaining space bytes -> bits */ 396 mov r3, r3, lshi ip /* clear remaining bytes */ 397#elif defined(STRLCPY) 398 cmp r2, #3 /* do we have room for 3 bytes & NUL? */ 399 bge 2f /* yes, just clear out dst */ 400 mov r3, r3, lshi #8 /* mask out trailing NUL */ 401#else 402 cmp ip, #24 /* are we writing 3 bytes & a NUL? */ 403 bge 2f /* yes, just overwrite dst */ 404 mov r3, r3, lshi #8 /* mask out trailing NUL */ 405#endif /* !STRNCPY */ 406 ldr r4, [r0] /* fetch dst word */ 407 and r4, r4, r3 /* preserve trailing bytes */ 408 orr r5, r5, r4 /* merge dst with src */ 4092: str r5, [r0], #4 /* store last word */ 410#if defined(STRNCPY) 411 subs r2, r2, ip, lsr #3 /* subtract bytes cleared from count */ 412 beq .Ldst_full_word_aligned 413#endif 414 b .Lend_of_string 415 416#if defined(STRLCPY) || defined(STRNCPY) 417.Lno_more_room: 418#if defined(STRLCPY) 419 cmp r2, #-1 /* tried to write 3 bytes? */ 420 blt 1f /* less, partial word write */ 421 cmp r2, #0 /* no space left? */ 422 strbeq r2, [r0] /* write the final NUL */ 423 bicne r5, r5, #BYTE3 /* clear trailing NUL */ 424 strne r5, [r0] /* write last word */ 425 b .Ldst_full_word_aligned /* the dst buffer is full */ 4261: 427#endif /* STRLCPY */ 428 add r2, r2, #4 /* restore remaining space */ 429 ldr r4, [r0] /* load dst */ 430 mvn r3, #0 /* create a mask */ 431 mov r2, r2, lsl #3 /* bytes -> bits */ 432 mov r3, r3, lshi r2 /* clear leading bytes */ 433 bic r5, r5, r3 /* clear trailing bytes from src */ 434#if defined(STRLCPY) 435 mov r3, r3, lshi #8 /* mask out trailing NUL */ 436#endif /* STRLCPY */ 437 and r4, r4, r3 /* preserve trailing bytes in dst */ 438 orr r4, r4, r5 /* merge src with dst */ 439 str r4, [r0], #4 /* write last word */ 440 b .Ldst_full_word_aligned 441#endif /* STRLCPY || STRNCPY */ 442 443#if defined(STRLCPY) 444 /* 445 * Destination was filled (and NUL terminated). 446 * All that's left is count the number of bytes left in src. 447 */ 448.Ldst_full: 4491: tst r1, #3 /* dst word aligned? */ 450 beq 2f /* yes, so do it word by word */ 451 ldrb r5, [r1], #1 /* load next byte */ 452 teq r5, #0 /* is it a NUL? */ 453 bne 1b /* no, check alignment */ 454 b .Lend_of_string /* and return */ 4552: add r6, r6, #3 /* compensate for post-inc */ 456.Ldst_full_word_aligned: 4573: ldr r5, [r1], #4 /* load word from src */ 458#ifdef _ARM_ARCH_6 459 uqadd8 r5, r5, r7 /* perform NUL magic */ 460 mvns r5, r5 /* complement all 0s? */ 461 beq 3b /* yes, no NUL so get next word */ 462#else 463 tst r5, #BYTE0 /* does byte 0 contain a NUL? */ 464 tstne r5, #BYTE1 /* no, does byte 1 contain a NUL? */ 465 tstne r5, #BYTE2 /* no, does byte 2 contain a NUL? */ 466 tstne r5, #BYTE3 /* no, does byte 3 contain a NUL? */ 467 bne 3b /* no, no NUL encountered! */ 468#endif 469#ifdef _ARM_ARCH_6 470#ifdef __ARMEL__ 471 rev r5, r5 /* CLZ needs BE data */ 472#endif 473 clz r5, r5 /* count leading zeros */ 474 add r1, r1, r5, lsr #3 /* add offset to NUL to src pointer */ 475#else 476 tst r5, #BYTE0 /* is there a NUL in byte 0? */ 477 beq 4f /* yes, don't check any further */ 478 add r1, r1, #1 /* no, advance src pointer by 1 */ 479 tst r5, #BYTE1 /* is there a NUL in byte 1? */ 480 beq 4f /* yes, don't check any further */ 481 add r1, r1, #1 /* no, advance src pointer by 1 */ 482 tst r5, #BYTE2 /* is there a NUL in byte 2? */ 483 addne r1, r1, #1 /* no, there must be in byte 3 */ 4844: 485#endif /* _ARM_ARCH_6 */ 486.Lend_of_string: 487 sub r0, r1, r6 /* subtract start from finish */ 488 pop {r4-r9} /* restore registers */ 489 RET 490#elif defined(STRNCPY) 491.Lend_of_string: 492 teq r2, #0 /* any bytes left to zero? */ 493 beq 3f /* no, just return. */ 494 mov r1, #0 /* yes, prepare to zero */ 495 cmp r2, #16 /* some, but not a lot? */ 496 ble 1f 497 mov r4, lr /* preserve lr */ 498 bl PLT_SYM(_C_LABEL(memset)) /* yes, and let memset do it */ 499 mov lr, r4 /* restore lr */ 500 b 3f /* return */ 5011: add ip, r0, r2 /* calculate stopping point */ 5022: strb r1, [r0], #1 /* clear a byte */ 503 cmp r0, ip /* done? */ 504 blt 2b /* no, clear next byte */ 5053: mov r0, r6 /* restore dst pointer */ 506 pop {r4-r9} /* restore registers */ 507 RET 508.Ldst_full: 509.Ldst_full_word_aligned: 510 /* 511 * Destination was filled (but not NUL terminated). 512 * All that's left is return the start of dst 513 */ 514 mov r0, r6 /* restore dst pointer */ 515 pop {r4-r9} /* restore registers */ 516 RET 517#else 518.Lend_of_string: 519 mov r0, r6 /* restore dst pointer */ 520 pop {r4-r9} /* restore registers */ 521 RET 522#endif 523END(FUNCNAME) 524