1/* $NetBSD: memmove.S,v 1.4 2013/01/28 06:23:44 matt Exp $ */ 2 3/*- 4 * Copyright (c) 1997 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Neil A. Carson and Mark Brinicombe 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include <machine/asm.h> 33 34#if defined(__ARM_EABI__) && !defined(BCOPY) 35STRONG_ALIAS(__aeabi_memmove, memmove) 36#endif 37 38#ifndef _BCOPY 39/* LINTSTUB: Func: void *memmove(void *, const void *, size_t) */ 40ENTRY(memmove) 41#else 42/* bcopy = memcpy/memmove with arguments reversed. */ 43/* LINTSTUB: Func: void bcopy(void *, void *, size_t) */ 44ENTRY(bcopy) 45 /* switch the source and destination registers */ 46 eor r0, r1, r0 47 eor r1, r0, r1 48 eor r0, r1, r0 49#endif 50 /* Do the buffers overlap? */ 51 cmp r0, r1 52 RETc(eq) /* Bail now if src/dst are the same */ 53 subhs r3, r0, r1 /* if (dst > src) r3 = dst - src */ 54 sublo r3, r1, r0 /* if (src > dst) r3 = src - dst */ 55 cmp r3, r2 /* if (r3 >= len) we have an overlap */ 56 bhs PIC_SYM(_C_LABEL(memcpy), PLT) 57 58 /* Determine copy direction */ 59 cmp r1, r0 60 bcc .Lmemmove_backwards 61 62 moveq r0, #0 /* Quick abort for len=0 */ 63 RETc(eq) 64 65 stmdb sp!, {r0, lr} /* memmove() returns dest addr */ 66 subs r2, r2, #4 67 blt .Lmemmove_fl4 /* less than 4 bytes */ 68 ands r12, r0, #3 69 bne .Lmemmove_fdestul /* oh unaligned destination addr */ 70 ands r12, r1, #3 71 bne .Lmemmove_fsrcul /* oh unaligned source addr */ 72 73.Lmemmove_ft8: 74 /* We have aligned source and destination */ 75 subs r2, r2, #8 76 blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */ 77 subs r2, r2, #0x14 78 blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */ 79 stmdb sp!, {r4} /* borrow r4 */ 80 81 /* blat 32 bytes at a time */ 82 /* XXX for really big copies perhaps we should use more registers */ 83.Lmemmove_floop32: 84 ldmia r1!, {r3, r4, r12, lr} 85 stmia r0!, {r3, r4, r12, lr} 86 ldmia r1!, {r3, r4, r12, lr} 87 stmia r0!, {r3, r4, r12, lr} 88 subs r2, r2, #0x20 89 bge .Lmemmove_floop32 90 91 cmn r2, #0x10 92 ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 93 stmgeia r0!, {r3, r4, r12, lr} 94 subge r2, r2, #0x10 95 ldmia sp!, {r4} /* return r4 */ 96 97.Lmemmove_fl32: 98 adds r2, r2, #0x14 99 100 /* blat 12 bytes at a time */ 101.Lmemmove_floop12: 102 ldmgeia r1!, {r3, r12, lr} 103 stmgeia r0!, {r3, r12, lr} 104 subges r2, r2, #0x0c 105 bge .Lmemmove_floop12 106 107.Lmemmove_fl12: 108 adds r2, r2, #8 109 blt .Lmemmove_fl4 110 111 subs r2, r2, #4 112 ldrlt r3, [r1], #4 113 strlt r3, [r0], #4 114 ldmgeia r1!, {r3, r12} 115 stmgeia r0!, {r3, r12} 116 subge r2, r2, #4 117 118.Lmemmove_fl4: 119 /* less than 4 bytes to go */ 120 adds r2, r2, #4 121 ldmeqia sp!, {r0, pc} /* done */ 122 123 /* copy the crud byte at a time */ 124 cmp r2, #2 125 ldrb r3, [r1], #1 126 strb r3, [r0], #1 127 ldrgeb r3, [r1], #1 128 strgeb r3, [r0], #1 129 ldrgtb r3, [r1], #1 130 strgtb r3, [r0], #1 131 ldmia sp!, {r0, pc} 132 133 /* erg - unaligned destination */ 134.Lmemmove_fdestul: 135 rsb r12, r12, #4 136 cmp r12, #2 137 138 /* align destination with byte copies */ 139 ldrb r3, [r1], #1 140 strb r3, [r0], #1 141 ldrgeb r3, [r1], #1 142 strgeb r3, [r0], #1 143 ldrgtb r3, [r1], #1 144 strgtb r3, [r0], #1 145 subs r2, r2, r12 146 blt .Lmemmove_fl4 /* less the 4 bytes */ 147 148 ands r12, r1, #3 149 beq .Lmemmove_ft8 /* we have an aligned source */ 150 151 /* erg - unaligned source */ 152 /* This is where it gets nasty ... */ 153.Lmemmove_fsrcul: 154 bic r1, r1, #3 155 ldr lr, [r1], #4 156 cmp r12, #2 157 bgt .Lmemmove_fsrcul3 158 beq .Lmemmove_fsrcul2 159 cmp r2, #0x0c 160 blt .Lmemmove_fsrcul1loop4 161 sub r2, r2, #0x0c 162 stmdb sp!, {r4, r5} 163 164.Lmemmove_fsrcul1loop16: 165#ifdef __ARMEB__ 166 mov r3, lr, lsl #8 167#else 168 mov r3, lr, lsr #8 169#endif 170 ldmia r1!, {r4, r5, r12, lr} 171#ifdef __ARMEB__ 172 orr r3, r3, r4, lsr #24 173 mov r4, r4, lsl #8 174 orr r4, r4, r5, lsr #24 175 mov r5, r5, lsl #8 176 orr r5, r5, r12, lsr #24 177 mov r12, r12, lsl #8 178 orr r12, r12, lr, lsr #24 179#else 180 orr r3, r3, r4, lsl #24 181 mov r4, r4, lsr #8 182 orr r4, r4, r5, lsl #24 183 mov r5, r5, lsr #8 184 orr r5, r5, r12, lsl #24 185 mov r12, r12, lsr #8 186 orr r12, r12, lr, lsl #24 187#endif 188 stmia r0!, {r3-r5, r12} 189 subs r2, r2, #0x10 190 bge .Lmemmove_fsrcul1loop16 191 ldmia sp!, {r4, r5} 192 adds r2, r2, #0x0c 193 blt .Lmemmove_fsrcul1l4 194 195.Lmemmove_fsrcul1loop4: 196#ifdef __ARMEB__ 197 mov r12, lr, lsl #8 198#else 199 mov r12, lr, lsr #8 200#endif 201 ldr lr, [r1], #4 202#ifdef __ARMEB__ 203 orr r12, r12, lr, lsr #24 204#else 205 orr r12, r12, lr, lsl #24 206#endif 207 str r12, [r0], #4 208 subs r2, r2, #4 209 bge .Lmemmove_fsrcul1loop4 210 211.Lmemmove_fsrcul1l4: 212 sub r1, r1, #3 213 b .Lmemmove_fl4 214 215.Lmemmove_fsrcul2: 216 cmp r2, #0x0c 217 blt .Lmemmove_fsrcul2loop4 218 sub r2, r2, #0x0c 219 stmdb sp!, {r4, r5} 220 221.Lmemmove_fsrcul2loop16: 222#ifdef __ARMEB__ 223 mov r3, lr, lsl #16 224#else 225 mov r3, lr, lsr #16 226#endif 227 ldmia r1!, {r4, r5, r12, lr} 228#ifdef __ARMEB__ 229 orr r3, r3, r4, lsr #16 230 mov r4, r4, lsl #16 231 orr r4, r4, r5, lsr #16 232 mov r5, r5, lsl #16 233 orr r5, r5, r12, lsr #16 234 mov r12, r12, lsl #16 235 orr r12, r12, lr, lsr #16 236#else 237 orr r3, r3, r4, lsl #16 238 mov r4, r4, lsr #16 239 orr r4, r4, r5, lsl #16 240 mov r5, r5, lsr #16 241 orr r5, r5, r12, lsl #16 242 mov r12, r12, lsr #16 243 orr r12, r12, lr, lsl #16 244#endif 245 stmia r0!, {r3-r5, r12} 246 subs r2, r2, #0x10 247 bge .Lmemmove_fsrcul2loop16 248 ldmia sp!, {r4, r5} 249 adds r2, r2, #0x0c 250 blt .Lmemmove_fsrcul2l4 251 252.Lmemmove_fsrcul2loop4: 253#ifdef __ARMEB__ 254 mov r12, lr, lsl #16 255#else 256 mov r12, lr, lsr #16 257#endif 258 ldr lr, [r1], #4 259#ifdef __ARMEB__ 260 orr r12, r12, lr, lsr #16 261#else 262 orr r12, r12, lr, lsl #16 263#endif 264 str r12, [r0], #4 265 subs r2, r2, #4 266 bge .Lmemmove_fsrcul2loop4 267 268.Lmemmove_fsrcul2l4: 269 sub r1, r1, #2 270 b .Lmemmove_fl4 271 272.Lmemmove_fsrcul3: 273 cmp r2, #0x0c 274 blt .Lmemmove_fsrcul3loop4 275 sub r2, r2, #0x0c 276 stmdb sp!, {r4, r5} 277 278.Lmemmove_fsrcul3loop16: 279#ifdef __ARMEB__ 280 mov r3, lr, lsl #24 281#else 282 mov r3, lr, lsr #24 283#endif 284 ldmia r1!, {r4, r5, r12, lr} 285#ifdef __ARMEB__ 286 orr r3, r3, r4, lsr #8 287 mov r4, r4, lsl #24 288 orr r4, r4, r5, lsr #8 289 mov r5, r5, lsl #24 290 orr r5, r5, r12, lsr #8 291 mov r12, r12, lsl #24 292 orr r12, r12, lr, lsr #8 293#else 294 orr r3, r3, r4, lsl #8 295 mov r4, r4, lsr #24 296 orr r4, r4, r5, lsl #8 297 mov r5, r5, lsr #24 298 orr r5, r5, r12, lsl #8 299 mov r12, r12, lsr #24 300 orr r12, r12, lr, lsl #8 301#endif 302 stmia r0!, {r3-r5, r12} 303 subs r2, r2, #0x10 304 bge .Lmemmove_fsrcul3loop16 305 ldmia sp!, {r4, r5} 306 adds r2, r2, #0x0c 307 blt .Lmemmove_fsrcul3l4 308 309.Lmemmove_fsrcul3loop4: 310#ifdef __ARMEB__ 311 mov r12, lr, lsl #24 312#else 313 mov r12, lr, lsr #24 314#endif 315 ldr lr, [r1], #4 316#ifdef __ARMEB__ 317 orr r12, r12, lr, lsr #8 318#else 319 orr r12, r12, lr, lsl #8 320#endif 321 str r12, [r0], #4 322 subs r2, r2, #4 323 bge .Lmemmove_fsrcul3loop4 324 325.Lmemmove_fsrcul3l4: 326 sub r1, r1, #1 327 b .Lmemmove_fl4 328 329.Lmemmove_backwards: 330 add r1, r1, r2 331 add r0, r0, r2 332 subs r2, r2, #4 333 blt .Lmemmove_bl4 /* less than 4 bytes */ 334 ands r12, r0, #3 335 bne .Lmemmove_bdestul /* oh unaligned destination addr */ 336 ands r12, r1, #3 337 bne .Lmemmove_bsrcul /* oh unaligned source addr */ 338 339.Lmemmove_bt8: 340 /* We have aligned source and destination */ 341 subs r2, r2, #8 342 blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */ 343 stmdb sp!, {r4, lr} 344 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */ 345 blt .Lmemmove_bl32 346 347 /* blat 32 bytes at a time */ 348 /* XXX for really big copies perhaps we should use more registers */ 349.Lmemmove_bloop32: 350 ldmdb r1!, {r3, r4, r12, lr} 351 stmdb r0!, {r3, r4, r12, lr} 352 ldmdb r1!, {r3, r4, r12, lr} 353 stmdb r0!, {r3, r4, r12, lr} 354 subs r2, r2, #0x20 355 bge .Lmemmove_bloop32 356 357.Lmemmove_bl32: 358 cmn r2, #0x10 359 ldmgedb r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 360 stmgedb r0!, {r3, r4, r12, lr} 361 subge r2, r2, #0x10 362 adds r2, r2, #0x14 363 ldmgedb r1!, {r3, r12, lr} /* blat a remaining 12 bytes */ 364 stmgedb r0!, {r3, r12, lr} 365 subge r2, r2, #0x0c 366 ldmia sp!, {r4, lr} 367 368.Lmemmove_bl12: 369 adds r2, r2, #8 370 blt .Lmemmove_bl4 371 subs r2, r2, #4 372 ldrlt r3, [r1, #-4]! 373 strlt r3, [r0, #-4]! 374 ldmgedb r1!, {r3, r12} 375 stmgedb r0!, {r3, r12} 376 subge r2, r2, #4 377 378.Lmemmove_bl4: 379 /* less than 4 bytes to go */ 380 adds r2, r2, #4 381 RETc(eq) 382 383 /* copy the crud byte at a time */ 384 cmp r2, #2 385 ldrb r3, [r1, #-1]! 386 strb r3, [r0, #-1]! 387 ldrgeb r3, [r1, #-1]! 388 strgeb r3, [r0, #-1]! 389 ldrgtb r3, [r1, #-1]! 390 strgtb r3, [r0, #-1]! 391 RET 392 393 /* erg - unaligned destination */ 394.Lmemmove_bdestul: 395 cmp r12, #2 396 397 /* align destination with byte copies */ 398 ldrb r3, [r1, #-1]! 399 strb r3, [r0, #-1]! 400 ldrgeb r3, [r1, #-1]! 401 strgeb r3, [r0, #-1]! 402 ldrgtb r3, [r1, #-1]! 403 strgtb r3, [r0, #-1]! 404 subs r2, r2, r12 405 blt .Lmemmove_bl4 /* less than 4 bytes to go */ 406 ands r12, r1, #3 407 beq .Lmemmove_bt8 /* we have an aligned source */ 408 409 /* erg - unaligned source */ 410 /* This is where it gets nasty ... */ 411.Lmemmove_bsrcul: 412 bic r1, r1, #3 413 ldr r3, [r1, #0] 414 cmp r12, #2 415 blt .Lmemmove_bsrcul1 416 beq .Lmemmove_bsrcul2 417 cmp r2, #0x0c 418 blt .Lmemmove_bsrcul3loop4 419 sub r2, r2, #0x0c 420 stmdb sp!, {r4, r5, lr} 421 422.Lmemmove_bsrcul3loop16: 423#ifdef __ARMEB__ 424 mov lr, r3, lsr #8 425#else 426 mov lr, r3, lsl #8 427#endif 428 ldmdb r1!, {r3-r5, r12} 429#ifdef __ARMEB__ 430 orr lr, lr, r12, lsl #24 431 mov r12, r12, lsr #8 432 orr r12, r12, r5, lsl #24 433 mov r5, r5, lsr #8 434 orr r5, r5, r4, lsl #24 435 mov r4, r4, lsr #8 436 orr r4, r4, r3, lsl #24 437#else 438 orr lr, lr, r12, lsr #24 439 mov r12, r12, lsl #8 440 orr r12, r12, r5, lsr #24 441 mov r5, r5, lsl #8 442 orr r5, r5, r4, lsr #24 443 mov r4, r4, lsl #8 444 orr r4, r4, r3, lsr #24 445#endif 446 stmdb r0!, {r4, r5, r12, lr} 447 subs r2, r2, #0x10 448 bge .Lmemmove_bsrcul3loop16 449 ldmia sp!, {r4, r5, lr} 450 adds r2, r2, #0x0c 451 blt .Lmemmove_bsrcul3l4 452 453.Lmemmove_bsrcul3loop4: 454#ifdef __ARMEB__ 455 mov r12, r3, lsr #8 456#else 457 mov r12, r3, lsl #8 458#endif 459 ldr r3, [r1, #-4]! 460#ifdef __ARMEB__ 461 orr r12, r12, r3, lsl #24 462#else 463 orr r12, r12, r3, lsr #24 464#endif 465 str r12, [r0, #-4]! 466 subs r2, r2, #4 467 bge .Lmemmove_bsrcul3loop4 468 469.Lmemmove_bsrcul3l4: 470 add r1, r1, #3 471 b .Lmemmove_bl4 472 473.Lmemmove_bsrcul2: 474 cmp r2, #0x0c 475 blt .Lmemmove_bsrcul2loop4 476 sub r2, r2, #0x0c 477 stmdb sp!, {r4, r5, lr} 478 479.Lmemmove_bsrcul2loop16: 480#ifdef __ARMEB__ 481 mov lr, r3, lsr #16 482#else 483 mov lr, r3, lsl #16 484#endif 485 ldmdb r1!, {r3-r5, r12} 486#ifdef __ARMEB__ 487 orr lr, lr, r12, lsl #16 488 mov r12, r12, lsr #16 489 orr r12, r12, r5, lsl #16 490 mov r5, r5, lsr #16 491 orr r5, r5, r4, lsl #16 492 mov r4, r4, lsr #16 493 orr r4, r4, r3, lsl #16 494#else 495 orr lr, lr, r12, lsr #16 496 mov r12, r12, lsl #16 497 orr r12, r12, r5, lsr #16 498 mov r5, r5, lsl #16 499 orr r5, r5, r4, lsr #16 500 mov r4, r4, lsl #16 501 orr r4, r4, r3, lsr #16 502#endif 503 stmdb r0!, {r4, r5, r12, lr} 504 subs r2, r2, #0x10 505 bge .Lmemmove_bsrcul2loop16 506 ldmia sp!, {r4, r5, lr} 507 adds r2, r2, #0x0c 508 blt .Lmemmove_bsrcul2l4 509 510.Lmemmove_bsrcul2loop4: 511#ifdef __ARMEB__ 512 mov r12, r3, lsr #16 513#else 514 mov r12, r3, lsl #16 515#endif 516 ldr r3, [r1, #-4]! 517#ifdef __ARMEB__ 518 orr r12, r12, r3, lsl #16 519#else 520 orr r12, r12, r3, lsr #16 521#endif 522 str r12, [r0, #-4]! 523 subs r2, r2, #4 524 bge .Lmemmove_bsrcul2loop4 525 526.Lmemmove_bsrcul2l4: 527 add r1, r1, #2 528 b .Lmemmove_bl4 529 530.Lmemmove_bsrcul1: 531 cmp r2, #0x0c 532 blt .Lmemmove_bsrcul1loop4 533 sub r2, r2, #0x0c 534 stmdb sp!, {r4, r5, lr} 535 536.Lmemmove_bsrcul1loop32: 537#ifdef __ARMEB__ 538 mov lr, r3, lsr #24 539#else 540 mov lr, r3, lsl #24 541#endif 542 ldmdb r1!, {r3-r5, r12} 543#ifdef __ARMEB__ 544 orr lr, lr, r12, lsl #8 545 mov r12, r12, lsr #24 546 orr r12, r12, r5, lsl #8 547 mov r5, r5, lsr #24 548 orr r5, r5, r4, lsl #8 549 mov r4, r4, lsr #24 550 orr r4, r4, r3, lsl #8 551#else 552 orr lr, lr, r12, lsr #8 553 mov r12, r12, lsl #24 554 orr r12, r12, r5, lsr #8 555 mov r5, r5, lsl #24 556 orr r5, r5, r4, lsr #8 557 mov r4, r4, lsl #24 558 orr r4, r4, r3, lsr #8 559#endif 560 stmdb r0!, {r4, r5, r12, lr} 561 subs r2, r2, #0x10 562 bge .Lmemmove_bsrcul1loop32 563 ldmia sp!, {r4, r5, lr} 564 adds r2, r2, #0x0c 565 blt .Lmemmove_bsrcul1l4 566 567.Lmemmove_bsrcul1loop4: 568#ifdef __ARMEB__ 569 mov r12, r3, lsr #24 570#else 571 mov r12, r3, lsl #24 572#endif 573 ldr r3, [r1, #-4]! 574#ifdef __ARMEB__ 575 orr r12, r12, r3, lsl #8 576#else 577 orr r12, r12, r3, lsr #8 578#endif 579 str r12, [r0, #-4]! 580 subs r2, r2, #4 581 bge .Lmemmove_bsrcul1loop4 582 583.Lmemmove_bsrcul1l4: 584 add r1, r1, #1 585 b .Lmemmove_bl4 586