1/*- 2 * Copyright (c) 2013 The NetBSD Foundation, Inc. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to The NetBSD Foundation 6 * by Matt Thomas of 3am Software Foundry. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 18 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 19 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 21 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 * POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30#include <machine/asm.h> 31 32RCSID("$NetBSD: strcpy_arm.S,v 1.3 2013/08/11 04:56:32 matt Exp $") 33 34#ifdef STRLCPY 35#ifdef _LIBC 36WEAK_ALIAS(strlcpy, _strlcpy) 37#endif 38#define FUNCNAME strlcpy 39#elif defined(STRNCPY) 40#define FUNCNAME strncpy 41#else 42#define FUNCNAME strcpy 43#endif 44 45#ifdef _LIBC 46#include "namespace.h" 47#endif 48 49#ifdef __ARMEL__ 50#define lslo lsr /* shift to lower address */ 51#define lshi lsl /* shift to higher address */ 52#define BYTE0 0x000000ff 53#define BYTE1 0x0000ff00 54#define BYTE2 0x00ff0000 55#define BYTE3 0xff000000 56#else 57#define lslo lsl /* shift to lower address */ 58#define lshi lsr /* shift to higher address */ 59#define BYTE0 0xff000000 60#define BYTE1 0x00ff0000 61#define BYTE2 0x0000ff00 62#define BYTE3 0x000000ff 63#endif 64 65/* 66 * On armv6 and later, to quickly determine if a word contains a NUL (0) byte, 67 * we add 254 to each byte using the UQADD8 (unsigned saturating add 8) 68 * instruction. For every non-NUL byte, the result for that byte will become 69 * 255. For NUL, it will be 254. When we complement the result of all 4 adds, 70 * if the result is non-0 then we must have encountered a NUL. 71 * 72 * For earlier architecture, we just use tst on all 4 bytes. There are other 73 * algorithms to detect NULs but they take longer and use more instructions. 74 */ 75 76/* 77 * char *strcpy(char *dst, const char *src); 78 * char *strncpy(char *dst, const char *src, size_t len); 79 * size_t strlcpy(char *dst, const char *src, size_t len); 80 */ 81 82 .text 83ENTRY(FUNCNAME) 84#if defined(STRLCPY) 85 cmp r2, #1 /* is length 1 or less? */ 86 bhi 1f /* no, do normal */ 87 moveq r3, #0 /* = 1? load NUL */ 88 strbeq r3, [r0] /* = 1? write NUL to dst */ 89 mov r0, r1 /* move src to r0 */ 90 b PLT_SYM(_C_LABEL(strlen)) /* and tailcall strlen */ 911: 92 sub r2, r2, #1 /* leave one byte for NUL */ 93#endif 94#if defined(STRNCPY) 95 cmp r2, #0 /* 0 length? */ 96 RETc(eq) /* yes, just return */ 97#endif 98 push {r4-r9} /* save some registers */ 99#ifdef _ARM_ARCH_6 100#ifdef _ARM_ARCH_7 101 movw r7, #0xfefe /* magic constant; 254 in each byte */ 102#else 103 mov r7, #0xfe /* put 254 in low byte */ 104 orr r7, r7, r7, lsl #8 /* move to next byte */ 105#endif 106 orr r7, r7, r7, lsl #16 /* move to next halfword */ 107#endif 108 109#if defined(STRLCPY) 110 add r6, r1, #1 /* save for return (deal with NUL) */ 111#else 112 mov r6, r0 /* save for return */ 113#endif 114 115.Ldst_align: 116 tst r0, #3 /* check for dst alignment */ 117 beq .Ldst_aligned /* ok, proceed to next check */ 118 ldrb r5, [r1], #1 /* load a byte */ 119#if defined(STRNCPY) 120 subs r2, r2, #1 /* subtract out from count */ 121 bmi .Ldst_full /* zero? the dst has no more room */ 122#endif 123 strb r5, [r0], #1 /* store a byte */ 124 teq r5, #0 /* was it a NUL? */ 125 beq .Lend_of_string /* yes, we are done */ 126#if defined(STRLCPY) 127 subs r2, r2, #1 /* subtract one from count */ 128 strbeq r2, [r0], #1 /* zero? write trailing NUL */ 129 beq .Ldst_full /* zero? the dst has no more room */ 130#endif 131 b .Ldst_align /* loop around for next byte */ 132.Ldst_aligned: 133 tst r1, #3 /* get the misalignment of src */ 134 bne .Lincongruent /* !=? incongruent (slower) */ 135 136 /* =? congruent (faster) */ 137 138.Lcongruent: 139#if defined(STRLCPY) 140 add r6, r6, #3 /* compensate for word post-inc */ 141#endif 142 b .Lcongruent_mainloop_load 143.Lcongruent_mainloop: 144#if defined(STRLCPY) || defined(STRNCPY) 145 subs r2, r2, #4 /* subtract 4 from the count */ 146 bmi .Lno_more_room 147#endif 148 str r5, [r0], #4 /* store word into dst */ 149#if defined(STRLCPY) 150 beq .Lno_more_room /* count is 0? no room in dst */ 151#endif 152#if defined(STRNCPY) 153 beq .Ldst_full_word_aligned /* count is 0? no room in dst */ 154#endif 155.Lcongruent_mainloop_load: 156 ldr r5, [r1], #4 /* load word from source */ 157#if defined(_ARM_ARCH_6) 158 uqadd8 r3, r5, r7 /* magic happens here */ 159 mvns r3, r3 /* is the complemented result 0? */ 160 beq .Lcongruent_mainloop /* yes, no NULs, do it again */ 161#else 162 tst r5, #BYTE0 /* does byte 0 contain a NUL? */ 163 tstne r5, #BYTE1 /* no, does byte 1 contain a NUL? */ 164 tstne r5, #BYTE2 /* no, does byte 2 contain a NUL? */ 165 tstne r5, #BYTE3 /* no, does byte 3 contain a NUL? */ 166 bne .Lcongruent_mainloop /* yes, no NULs, do it again */ 167#endif 168#if defined(STRLCPY) && 0 169 sub r1, r1, #3 /* back up src pointer */ 170#endif 171#if defined(_ARM_ARCH_6) 172#ifdef __ARMEL__ 173 rev r3, r3 /* CLZ needs BE data */ 174#endif 175 clz r3, r3 /* count leading zeros */ 176#else 177 mov r3, #0 /* assume NUL is in byte 0 */ 178 tst r5, #BYTE0 /* is NUL in byte 2? */ 179 beq .Lcongruent_last_bytes /* yes, done searching. */ 180 mov r3, #8 /* assume NUL is in byte 1 */ 181 tst r5, #BYTE1 /* is NUL in byte 2? */ 182 beq .Lcongruent_last_bytes /* yes, done searching. */ 183 mov r3, #16 /* assume NUL is in byte 2 */ 184 tst r5, #BYTE2 /* is NUL in byte 2? */ 185#if !defined(STRLCPY) 186 beq .Lcongruent_last_bytes /* yes, done searching. */ 187 mov r3, #24 /* NUL must be in byte 3 */ 188#else 189 movne r3, #24 /* no, then NUL is in byte 3 */ 190#endif 191#endif /* _ARM_ARCH_6 */ 192#if defined(STRLCPY) 193.Lcongruent_last_bytes: 194#endif 195#if defined(STRLCPY) 196 add r1, r1, r3, lsr #3 /* position to point at NUL + 4 */ 197#endif 198 b .Llast_bytes /* store the last bytes */ 199 200 201.Lincongruent: 202 /* 203 * At this point dst is word aligned by src is not. Read bytes 204 * from src until it is read aligned. 205 */ 206 and r3, r1, #3 /* extract misalignment */ 207 mov r9, r3, lsl #3 /* calculate discard shift */ 208 rsb r8, r9, #32 /* calculate insertion shift */ 209#if defined(STRLCPY) 210 add r6, r6, #3 /* compensate for word post-inc */ 211#endif 212 bic r1, r1, #3 /* word align src */ 213 ldr r5, [r1], #4 /* load word frm src */ 214 mov r4, r5, lslo r9 /* discard lo bytes from src */ 215 tst r4, #BYTE0 /* does byte 0 contain a NUL? */ 216#if defined(STRNCPY) 217 beq .Lend_of_string /* yes, zero fill rest of string */ 218#else 219 moveq r3, r9 /* yes, set offset */ 220 beq .Lincongruent_end_of_string /* yes, deal with the last bytes */ 221#endif 222 /* 223 * To make our test for NULs below do not generate false positives, 224 * fill the bytes in the word we don't want to match with all 1s. 225 */ 226 mvn r3, #0 /* create a mask */ 227 mov r3, r3, lslo r8 /* zero out bytes being kept */ 228 orr r5, r5, r3 /* merge src and mask */ 229#ifdef _ARM_ARCH_6 230 uqadd8 r3, r5, r7 /* NUL detection magic happens */ 231 mvns r3, r3 /* is the complemented result 0? */ 232 beq .Lincongruent_mainloop_load /* yes, no NUL encountered! */ 233#ifdef __ARMEL__ 234 rev r3, r3 /* CLZ wants BE input */ 235#endif 236 clz r3, r3 /* count leading zeros */ 237#else 238 /* 239 * We already tested for byte 0 above so we don't need to it again. 240 */ 241 mov r3, #24 /* assume NUL is in byte 3 */ 242 tst r5, #BYTE1 /* did we find a NUL in byte 1? */ 243 subeq r3, r3, #8 /* yes, decremnt byte position */ 244 tstne r5, #BYTE2 /* no, did we find a NUL in byte 2? */ 245 subeq r3, r3, #8 /* yes, decremnt byte position */ 246 tstne r5, #BYTE3 /* no, did we find a NUL in byte 3? */ 247 bne .Lincongruent_mainloop_load /* no, no NUL encountered! */ 248#endif 249 mov r5, r4 /* discard already dealt with bytes */ 250.Lincongruent_end_of_string: 251#if defined(STRLCPY) 252 add r1, r1, r3, lsr #3 /* then add offset to NUL */ 253#endif 254 sub r3, r3, r9 /* adjust NUL offset */ 255 b .Llast_bytes /* NUL encountered! finish up */ 256 257#if defined(STRLCPY) || defined(STRNCPY) 258.Lincongruent_no_more_room: 259 mov r5, r4 /* move data to be stored to r5 */ 260 b .Lno_more_room /* fill remaining space */ 261#endif /* STRLCPY || STRNCPY */ 262 263 /* 264 * At this point both dst and src are word aligned and r4 contains 265 * partial contents from src. 266 */ 267.Lincongruent_mainloop: 268 orr r4, r4, r5, lshi r8 /* put new src data into dst word */ 269#if defined(STRLCPY) || defined(STRNCPY) 270 subs r2, r2, #4 /* subtract 4 from count */ 271 bmi .Lincongruent_no_more_room /* count < 0? dst will be full */ 272#endif 273 str r4, [r0], #4 /* store word in dst */ 274#if defined(STRLCPY) 275 beq .Lno_more_room /* space left is 0? stop copy */ 276#endif 277#if defined(STRNCPY) 278 beq .Ldst_full_word_aligned /* space left is 0? stop copy */ 279#endif 280 mov r4, r5, lslo r9 /* move rest of src into dst word */ 281.Lincongruent_mainloop_load: 282 ldr r5, [r1], #4 /* read src */ 283#ifdef _ARM_ARCH_6 284 uqadd8 r3, r5, r7 /* magic happens here */ 285 mvns r3, r3 /* is the complemented result 0? */ 286 beq .Lincongruent_mainloop /* yes, no NUL encountered! */ 287 /* 288 * fall into this since we encountered a NULL. At this point we have 289 * from 1-5 bytes (excluding trailing NUL) to write. 290 */ 291#ifdef __ARMEL__ 292 rev r3, r3 /* CLZ works on BE data */ 293#endif 294 clz r3, r3 /* count leading zeroes */ 295#else 296 tst r5, #BYTE0 /* does byte 0 contain a NUL? */ 297 tstne r5, #BYTE1 /* no, does byte 1 contain a NUL? */ 298 tstne r5, #BYTE2 /* no, does byte 2 contain a NUL? */ 299 tstne r5, #BYTE3 /* no, does byte 3 contain a NUL? */ 300 bne .Lincongruent_mainloop /* no, no NUL encountered! */ 301 /* 302 * fall into this since we encountered a NULL. At this point we have 303 * from 1-5 bytes (excluding trailing NUL) to write. 304 */ 305 mov r3, #0 /* assume a NUL is in byte 0 */ 306 tst r5, #BYTE0 /* is there a NUL in byte 0? */ 307 beq 1f /* yes, found a NUL! */ 308 mov r3, #8 /* assume a NUL is in byte 1 */ 309 tst r5, #BYTE1 /* is there a NUL in byte 0? */ 310 beq 1f /* yes, found a NUL! */ 311 tst r5, #BYTE2 /* is there a NUL in byte 2? */ 312 moveq r3, #16 /* yes, mark its position */ 313 movne r3, #24 /* no, it must be in byte 3 */ 3141: 315#endif 316 orr r4, r4, r5, lshi r8 /* merge new and old src words */ 317#if defined(STRLCPY) 318 add r1, r1, r3, lsr #3 /* adjust src to point to NUL */ 319#endif 320 add r3, r3, r8 /* add remainder bytes worth */ 321 cmp r3, #32 /* do we have at least one word to write? */ 322 movlt r5, r4 /* no, move source bytes to expected reg */ 323 blt .Llast_bytes /* no, deal with them */ 324#if defined(STRLCPY) 325 subs r2, r2, #4 /* subtract 4 from count */ 326 bpl 1f /* we have space for at least 4 */ 327 /* 328 * Since the space just went minus, we don't have enough room to 329 * write all 4 bytes. In fact, the most we can write is 3 so just 330 * just lie and say we have 3 bytes to write and discard the rest. 331 */ 332 add r2, r2, #4 /* add 4 back */ 333 mov r3, #24 /* say we have 3 bytes */ 334 mov r5, r4 /* discard the bytes we can't store */ 335 b .Llast_bytes /* and treat this as our last word */ 3361: 337#elif defined(STRNCPY) 338 subs r2, r2, #4 /* subtract 4 from count */ 339 bmi .Lincongruent_no_more_room /* count < 0? dst will be full */ 340#endif 341 str r4, [r0], #4 /* store dst word */ 342#if defined(STRNCPY) 343 beq .Ldst_full_word_aligned /* space left is 0? stop copy */ 344#endif 345#if defined(STRLCPY) 346 bne 1f /* we still have space remaining */ 347 strb r2, [r0] /* write final NUL */ 348 b .Lend_of_string /* we are done */ 3491: 350#endif 351 /* 352 * Subtract the 32 bits just written from the number of bits left 353 * to write. If 0 bits are left and not doing strncpy, just write 354 * the trailing NUL and be done. 355 */ 356 subs r3, r3, #32 /* we wrote one word */ 357#if !defined(STRNCPY) 358 bne 1f /* no more data? */ 359 strb r3, [r0] /* write final NUL */ 360 b .Lend_of_string /* we are done */ 3611: 362#endif 363 /* 364 * At this point after writing 4 bytes, we have 0 or 1 bytes left to 365 * write (excluding the trailing NUL). 366 */ 367 mov r5, r5, lslo r9 /* get remainder of src */ 368 369 /* fall into .Llast_bytes */ 370 371#if !defined(STRLCPY) 372.Lcongruent_last_bytes: 373#endif 374.Llast_bytes: 375 /* 376 * r5 contains the last word and is in host byte order. 377 * r3 contains number of bits left to copy (0..31). 378 * r1 should point to the NUL + 4. 379 */ 380 bics ip, r3, #7 /* truncate bits, is result 0? */ 381#if !defined(STRNCPY) 382 bne 1f /* no, have to write some bytes */ 383 strb ip, [r0] /* yes, write trailing NUL */ 384 b .Lend_of_string /* yes, and we are the end */ 3851: 386#endif 387#if defined(STRLCPY) || defined(STRNCPY) 388 cmp r2, ip, lsr #3 /* is there enough room? */ 389 movlt ip, r2, lsl #3 /* no, only fill remaining space */ 390#endif 391 mvn r3, #0 /* create a mask */ 392 mov r3, r3, lshi ip /* clear leading bytes */ 393 bic r5, r5, r3 /* clear trailing bytes */ 394#if defined(STRNCPY) 395 cmp r2, #4 /* room for 4 bytes? */ 396 movge ip, #32 /* yes, we will write 4 bytes */ 397 bge 2f /* yes, and go do it */ 398 mvn r3, #0 /* create a mask (again) */ 399 mov ip, r2, lsl #3 /* remaining space bytes -> bits */ 400 mov r3, r3, lshi ip /* clear remaining bytes */ 401#elif defined(STRLCPY) 402 cmp r2, #3 /* do we have room for 3 bytes & NUL? */ 403 bge 2f /* yes, just clear out dst */ 404 mov r3, r3, lshi #8 /* mask out trailing NUL */ 405#else 406 cmp ip, #24 /* are we writing 3 bytes & a NUL? */ 407 bge 2f /* yes, just overwrite dst */ 408 mov r3, r3, lshi #8 /* mask out trailing NUL */ 409#endif /* !STRNCPY */ 410 ldr r4, [r0] /* fetch dst word */ 411 and r4, r4, r3 /* preserve trailing bytes */ 412 orr r5, r5, r4 /* merge dst with src */ 4132: str r5, [r0], #4 /* store last word */ 414#if defined(STRNCPY) 415 subs r2, r2, ip, lsr #3 /* subtract bytes cleared from count */ 416 beq .Ldst_full_word_aligned 417#endif 418 b .Lend_of_string 419 420#if defined(STRLCPY) || defined(STRNCPY) 421.Lno_more_room: 422#if defined(STRLCPY) 423 cmp r2, #-1 /* tried to write 3 bytes? */ 424 blt 1f /* less, partial word write */ 425 cmp r2, #0 /* no space left? */ 426 strbeq r2, [r0] /* write the final NUL */ 427 bicne r5, r5, #BYTE3 /* clear trailing NUL */ 428 strne r5, [r0] /* write last word */ 429 b .Ldst_full_word_aligned /* the dst buffer is full */ 4301: 431#endif /* STRLCPY */ 432 add r2, r2, #4 /* restore remaining space */ 433 ldr r4, [r0] /* load dst */ 434 mvn r3, #0 /* create a mask */ 435 mov r2, r2, lsl #3 /* bytes -> bits */ 436 mov r3, r3, lshi r2 /* clear leading bytes */ 437 bic r5, r5, r3 /* clear trailing bytes from src */ 438#if defined(STRLCPY) 439 mov r3, r3, lshi #8 /* mask out trailing NUL */ 440#endif /* STRLCPY */ 441 and r4, r4, r3 /* preserve trailing bytes in dst */ 442 orr r4, r4, r5 /* merge src with dst */ 443 str r4, [r0], #4 /* write last word */ 444 b .Ldst_full_word_aligned 445#endif /* STRLCPY || STRNCPY */ 446 447#if defined(STRLCPY) 448 /* 449 * Destination was filled (and NUL terminated). 450 * All that's left is count the number of bytes left in src. 451 */ 452.Ldst_full: 4531: tst r1, #3 /* dst word aligned? */ 454 beq 2f /* yes, so do it word by word */ 455 ldrb r5, [r1], #1 /* load next byte */ 456 teq r5, #0 /* is it a NUL? */ 457 bne 1b /* no, check alignment */ 458 b .Lend_of_string /* and return */ 4592: add r6, r6, #3 /* compensate for post-inc */ 460.Ldst_full_word_aligned: 4613: ldr r5, [r1], #4 /* load word from src */ 462#ifdef _ARM_ARCH_6 463 uqadd8 r5, r5, r7 /* perform NUL magic */ 464 mvns r5, r5 /* complement all 0s? */ 465 beq 3b /* yes, no NUL so get next word */ 466#else 467 tst r5, #BYTE0 /* does byte 0 contain a NUL? */ 468 tstne r5, #BYTE1 /* no, does byte 1 contain a NUL? */ 469 tstne r5, #BYTE2 /* no, does byte 2 contain a NUL? */ 470 tstne r5, #BYTE3 /* no, does byte 3 contain a NUL? */ 471 bne 3b /* no, no NUL encountered! */ 472#endif 473#ifdef _ARM_ARCH_6 474#ifdef __ARMEL__ 475 rev r5, r5 /* CLZ needs BE data */ 476#endif 477 clz r5, r5 /* count leading zeros */ 478 add r1, r1, r5, lsr #3 /* add offset to NUL to src pointer */ 479#else 480 tst r5, #BYTE0 /* is there a NUL in byte 0? */ 481 beq 4f /* yes, don't check any further */ 482 add r1, r1, #1 /* no, advance src pointer by 1 */ 483 tst r5, #BYTE1 /* is there a NUL in byte 1? */ 484 beq 4f /* yes, don't check any further */ 485 add r1, r1, #1 /* no, advance src pointer by 1 */ 486 tst r5, #BYTE2 /* is there a NUL in byte 2? */ 487 addne r1, r1, #1 /* no, there must be in byte 3 */ 4884: 489#endif /* _ARM_ARCH_6 */ 490.Lend_of_string: 491 sub r0, r1, r6 /* subtract start from finish */ 492 pop {r4-r9} /* restore registers */ 493 RET 494#elif defined(STRNCPY) 495.Lend_of_string: 496 teq r2, #0 /* any bytes left to zero? */ 497 beq 3f /* no, just return. */ 498 mov r1, #0 /* yes, prepare to zero */ 499 cmp r2, #16 /* some, but not a lot? */ 500 ble 1f 501 mov r4, lr /* preserve lr */ 502 bl PLT_SYM(_C_LABEL(memset)) /* yes, and let memset do it */ 503 mov lr, r4 /* restore lr */ 504 b 3f /* return */ 5051: add ip, r0, r2 /* calculate stopping point */ 5062: strb r1, [r0], #1 /* clear a byte */ 507 cmp r0, ip /* done? */ 508 blt 2b /* no, clear next byte */ 5093: mov r0, r6 /* restore dst pointer */ 510 pop {r4-r9} /* restore registers */ 511 RET 512.Ldst_full: 513.Ldst_full_word_aligned: 514 /* 515 * Destination was filled (but not NUL terminated). 516 * All that's left is return the start of dst 517 */ 518 mov r0, r6 /* restore dst pointer */ 519 pop {r4-r9} /* restore registers */ 520 RET 521#else 522.Lend_of_string: 523 mov r0, r6 /* restore dst pointer */ 524 pop {r4-r9} /* restore registers */ 525 RET 526#endif 527END(FUNCNAME) 528