1/* $NetBSD: memmove.S,v 1.2 2007/06/20 22:31:21 scw Exp $ */ 2 3/*- 4 * Copyright (c) 1997 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Neil A. Carson and Mark Brinicombe 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39#include <machine/asm.h> 40 41#ifndef _BCOPY 42/* LINTSTUB: Func: void *memmove(void *, const void *, size_t) */ 43ENTRY(memmove) 44#else 45/* bcopy = memcpy/memmove with arguments reversed. */ 46/* LINTSTUB: Func: void bcopy(void *, void *, size_t) */ 47ENTRY(bcopy) 48 /* switch the source and destination registers */ 49 eor r0, r1, r0 50 eor r1, r0, r1 51 eor r0, r1, r0 52#endif 53 /* Do the buffers overlap? */ 54 cmp r0, r1 55 RETc(eq) /* Bail now if src/dst are the same */ 56 subhs r3, r0, r1 /* if (dst > src) r3 = dst - src */ 57 sublo r3, r1, r0 /* if (src > dst) r3 = src - dst */ 58 cmp r3, r2 /* if (r3 >= len) we have an overlap */ 59 bhs PIC_SYM(_C_LABEL(memcpy), PLT) 60 61 /* Determine copy direction */ 62 cmp r1, r0 63 bcc .Lmemmove_backwards 64 65 moveq r0, #0 /* Quick abort for len=0 */ 66 RETc(eq) 67 68 stmdb sp!, {r0, lr} /* memmove() returns dest addr */ 69 subs r2, r2, #4 70 blt .Lmemmove_fl4 /* less than 4 bytes */ 71 ands r12, r0, #3 72 bne .Lmemmove_fdestul /* oh unaligned destination addr */ 73 ands r12, r1, #3 74 bne .Lmemmove_fsrcul /* oh unaligned source addr */ 75 76.Lmemmove_ft8: 77 /* We have aligned source and destination */ 78 subs r2, r2, #8 79 blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */ 80 subs r2, r2, #0x14 81 blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */ 82 stmdb sp!, {r4} /* borrow r4 */ 83 84 /* blat 32 bytes at a time */ 85 /* XXX for really big copies perhaps we should use more registers */ 86.Lmemmove_floop32: 87 ldmia r1!, {r3, r4, r12, lr} 88 stmia r0!, {r3, r4, r12, lr} 89 ldmia r1!, {r3, r4, r12, lr} 90 stmia r0!, {r3, r4, r12, lr} 91 subs r2, r2, #0x20 92 bge .Lmemmove_floop32 93 94 cmn r2, #0x10 95 ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 96 stmgeia r0!, {r3, r4, r12, lr} 97 subge r2, r2, #0x10 98 ldmia sp!, {r4} /* return r4 */ 99 100.Lmemmove_fl32: 101 adds r2, r2, #0x14 102 103 /* blat 12 bytes at a time */ 104.Lmemmove_floop12: 105 ldmgeia r1!, {r3, r12, lr} 106 stmgeia r0!, {r3, r12, lr} 107 subges r2, r2, #0x0c 108 bge .Lmemmove_floop12 109 110.Lmemmove_fl12: 111 adds r2, r2, #8 112 blt .Lmemmove_fl4 113 114 subs r2, r2, #4 115 ldrlt r3, [r1], #4 116 strlt r3, [r0], #4 117 ldmgeia r1!, {r3, r12} 118 stmgeia r0!, {r3, r12} 119 subge r2, r2, #4 120 121.Lmemmove_fl4: 122 /* less than 4 bytes to go */ 123 adds r2, r2, #4 124 ldmeqia sp!, {r0, pc} /* done */ 125 126 /* copy the crud byte at a time */ 127 cmp r2, #2 128 ldrb r3, [r1], #1 129 strb r3, [r0], #1 130 ldrgeb r3, [r1], #1 131 strgeb r3, [r0], #1 132 ldrgtb r3, [r1], #1 133 strgtb r3, [r0], #1 134 ldmia sp!, {r0, pc} 135 136 /* erg - unaligned destination */ 137.Lmemmove_fdestul: 138 rsb r12, r12, #4 139 cmp r12, #2 140 141 /* align destination with byte copies */ 142 ldrb r3, [r1], #1 143 strb r3, [r0], #1 144 ldrgeb r3, [r1], #1 145 strgeb r3, [r0], #1 146 ldrgtb r3, [r1], #1 147 strgtb r3, [r0], #1 148 subs r2, r2, r12 149 blt .Lmemmove_fl4 /* less the 4 bytes */ 150 151 ands r12, r1, #3 152 beq .Lmemmove_ft8 /* we have an aligned source */ 153 154 /* erg - unaligned source */ 155 /* This is where it gets nasty ... */ 156.Lmemmove_fsrcul: 157 bic r1, r1, #3 158 ldr lr, [r1], #4 159 cmp r12, #2 160 bgt .Lmemmove_fsrcul3 161 beq .Lmemmove_fsrcul2 162 cmp r2, #0x0c 163 blt .Lmemmove_fsrcul1loop4 164 sub r2, r2, #0x0c 165 stmdb sp!, {r4, r5} 166 167.Lmemmove_fsrcul1loop16: 168#ifdef __ARMEB__ 169 mov r3, lr, lsl #8 170#else 171 mov r3, lr, lsr #8 172#endif 173 ldmia r1!, {r4, r5, r12, lr} 174#ifdef __ARMEB__ 175 orr r3, r3, r4, lsr #24 176 mov r4, r4, lsl #8 177 orr r4, r4, r5, lsr #24 178 mov r5, r5, lsl #8 179 orr r5, r5, r12, lsr #24 180 mov r12, r12, lsl #8 181 orr r12, r12, lr, lsr #24 182#else 183 orr r3, r3, r4, lsl #24 184 mov r4, r4, lsr #8 185 orr r4, r4, r5, lsl #24 186 mov r5, r5, lsr #8 187 orr r5, r5, r12, lsl #24 188 mov r12, r12, lsr #8 189 orr r12, r12, lr, lsl #24 190#endif 191 stmia r0!, {r3-r5, r12} 192 subs r2, r2, #0x10 193 bge .Lmemmove_fsrcul1loop16 194 ldmia sp!, {r4, r5} 195 adds r2, r2, #0x0c 196 blt .Lmemmove_fsrcul1l4 197 198.Lmemmove_fsrcul1loop4: 199#ifdef __ARMEB__ 200 mov r12, lr, lsl #8 201#else 202 mov r12, lr, lsr #8 203#endif 204 ldr lr, [r1], #4 205#ifdef __ARMEB__ 206 orr r12, r12, lr, lsr #24 207#else 208 orr r12, r12, lr, lsl #24 209#endif 210 str r12, [r0], #4 211 subs r2, r2, #4 212 bge .Lmemmove_fsrcul1loop4 213 214.Lmemmove_fsrcul1l4: 215 sub r1, r1, #3 216 b .Lmemmove_fl4 217 218.Lmemmove_fsrcul2: 219 cmp r2, #0x0c 220 blt .Lmemmove_fsrcul2loop4 221 sub r2, r2, #0x0c 222 stmdb sp!, {r4, r5} 223 224.Lmemmove_fsrcul2loop16: 225#ifdef __ARMEB__ 226 mov r3, lr, lsl #16 227#else 228 mov r3, lr, lsr #16 229#endif 230 ldmia r1!, {r4, r5, r12, lr} 231#ifdef __ARMEB__ 232 orr r3, r3, r4, lsr #16 233 mov r4, r4, lsl #16 234 orr r4, r4, r5, lsr #16 235 mov r5, r5, lsl #16 236 orr r5, r5, r12, lsr #16 237 mov r12, r12, lsl #16 238 orr r12, r12, lr, lsr #16 239#else 240 orr r3, r3, r4, lsl #16 241 mov r4, r4, lsr #16 242 orr r4, r4, r5, lsl #16 243 mov r5, r5, lsr #16 244 orr r5, r5, r12, lsl #16 245 mov r12, r12, lsr #16 246 orr r12, r12, lr, lsl #16 247#endif 248 stmia r0!, {r3-r5, r12} 249 subs r2, r2, #0x10 250 bge .Lmemmove_fsrcul2loop16 251 ldmia sp!, {r4, r5} 252 adds r2, r2, #0x0c 253 blt .Lmemmove_fsrcul2l4 254 255.Lmemmove_fsrcul2loop4: 256#ifdef __ARMEB__ 257 mov r12, lr, lsl #16 258#else 259 mov r12, lr, lsr #16 260#endif 261 ldr lr, [r1], #4 262#ifdef __ARMEB__ 263 orr r12, r12, lr, lsr #16 264#else 265 orr r12, r12, lr, lsl #16 266#endif 267 str r12, [r0], #4 268 subs r2, r2, #4 269 bge .Lmemmove_fsrcul2loop4 270 271.Lmemmove_fsrcul2l4: 272 sub r1, r1, #2 273 b .Lmemmove_fl4 274 275.Lmemmove_fsrcul3: 276 cmp r2, #0x0c 277 blt .Lmemmove_fsrcul3loop4 278 sub r2, r2, #0x0c 279 stmdb sp!, {r4, r5} 280 281.Lmemmove_fsrcul3loop16: 282#ifdef __ARMEB__ 283 mov r3, lr, lsl #24 284#else 285 mov r3, lr, lsr #24 286#endif 287 ldmia r1!, {r4, r5, r12, lr} 288#ifdef __ARMEB__ 289 orr r3, r3, r4, lsr #8 290 mov r4, r4, lsl #24 291 orr r4, r4, r5, lsr #8 292 mov r5, r5, lsl #24 293 orr r5, r5, r12, lsr #8 294 mov r12, r12, lsl #24 295 orr r12, r12, lr, lsr #8 296#else 297 orr r3, r3, r4, lsl #8 298 mov r4, r4, lsr #24 299 orr r4, r4, r5, lsl #8 300 mov r5, r5, lsr #24 301 orr r5, r5, r12, lsl #8 302 mov r12, r12, lsr #24 303 orr r12, r12, lr, lsl #8 304#endif 305 stmia r0!, {r3-r5, r12} 306 subs r2, r2, #0x10 307 bge .Lmemmove_fsrcul3loop16 308 ldmia sp!, {r4, r5} 309 adds r2, r2, #0x0c 310 blt .Lmemmove_fsrcul3l4 311 312.Lmemmove_fsrcul3loop4: 313#ifdef __ARMEB__ 314 mov r12, lr, lsl #24 315#else 316 mov r12, lr, lsr #24 317#endif 318 ldr lr, [r1], #4 319#ifdef __ARMEB__ 320 orr r12, r12, lr, lsr #8 321#else 322 orr r12, r12, lr, lsl #8 323#endif 324 str r12, [r0], #4 325 subs r2, r2, #4 326 bge .Lmemmove_fsrcul3loop4 327 328.Lmemmove_fsrcul3l4: 329 sub r1, r1, #1 330 b .Lmemmove_fl4 331 332.Lmemmove_backwards: 333 add r1, r1, r2 334 add r0, r0, r2 335 subs r2, r2, #4 336 blt .Lmemmove_bl4 /* less than 4 bytes */ 337 ands r12, r0, #3 338 bne .Lmemmove_bdestul /* oh unaligned destination addr */ 339 ands r12, r1, #3 340 bne .Lmemmove_bsrcul /* oh unaligned source addr */ 341 342.Lmemmove_bt8: 343 /* We have aligned source and destination */ 344 subs r2, r2, #8 345 blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */ 346 stmdb sp!, {r4, lr} 347 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */ 348 blt .Lmemmove_bl32 349 350 /* blat 32 bytes at a time */ 351 /* XXX for really big copies perhaps we should use more registers */ 352.Lmemmove_bloop32: 353 ldmdb r1!, {r3, r4, r12, lr} 354 stmdb r0!, {r3, r4, r12, lr} 355 ldmdb r1!, {r3, r4, r12, lr} 356 stmdb r0!, {r3, r4, r12, lr} 357 subs r2, r2, #0x20 358 bge .Lmemmove_bloop32 359 360.Lmemmove_bl32: 361 cmn r2, #0x10 362 ldmgedb r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 363 stmgedb r0!, {r3, r4, r12, lr} 364 subge r2, r2, #0x10 365 adds r2, r2, #0x14 366 ldmgedb r1!, {r3, r12, lr} /* blat a remaining 12 bytes */ 367 stmgedb r0!, {r3, r12, lr} 368 subge r2, r2, #0x0c 369 ldmia sp!, {r4, lr} 370 371.Lmemmove_bl12: 372 adds r2, r2, #8 373 blt .Lmemmove_bl4 374 subs r2, r2, #4 375 ldrlt r3, [r1, #-4]! 376 strlt r3, [r0, #-4]! 377 ldmgedb r1!, {r3, r12} 378 stmgedb r0!, {r3, r12} 379 subge r2, r2, #4 380 381.Lmemmove_bl4: 382 /* less than 4 bytes to go */ 383 adds r2, r2, #4 384 RETc(eq) 385 386 /* copy the crud byte at a time */ 387 cmp r2, #2 388 ldrb r3, [r1, #-1]! 389 strb r3, [r0, #-1]! 390 ldrgeb r3, [r1, #-1]! 391 strgeb r3, [r0, #-1]! 392 ldrgtb r3, [r1, #-1]! 393 strgtb r3, [r0, #-1]! 394 RET 395 396 /* erg - unaligned destination */ 397.Lmemmove_bdestul: 398 cmp r12, #2 399 400 /* align destination with byte copies */ 401 ldrb r3, [r1, #-1]! 402 strb r3, [r0, #-1]! 403 ldrgeb r3, [r1, #-1]! 404 strgeb r3, [r0, #-1]! 405 ldrgtb r3, [r1, #-1]! 406 strgtb r3, [r0, #-1]! 407 subs r2, r2, r12 408 blt .Lmemmove_bl4 /* less than 4 bytes to go */ 409 ands r12, r1, #3 410 beq .Lmemmove_bt8 /* we have an aligned source */ 411 412 /* erg - unaligned source */ 413 /* This is where it gets nasty ... */ 414.Lmemmove_bsrcul: 415 bic r1, r1, #3 416 ldr r3, [r1, #0] 417 cmp r12, #2 418 blt .Lmemmove_bsrcul1 419 beq .Lmemmove_bsrcul2 420 cmp r2, #0x0c 421 blt .Lmemmove_bsrcul3loop4 422 sub r2, r2, #0x0c 423 stmdb sp!, {r4, r5, lr} 424 425.Lmemmove_bsrcul3loop16: 426#ifdef __ARMEB__ 427 mov lr, r3, lsr #8 428#else 429 mov lr, r3, lsl #8 430#endif 431 ldmdb r1!, {r3-r5, r12} 432#ifdef __ARMEB__ 433 orr lr, lr, r12, lsl #24 434 mov r12, r12, lsr #8 435 orr r12, r12, r5, lsl #24 436 mov r5, r5, lsr #8 437 orr r5, r5, r4, lsl #24 438 mov r4, r4, lsr #8 439 orr r4, r4, r3, lsl #24 440#else 441 orr lr, lr, r12, lsr #24 442 mov r12, r12, lsl #8 443 orr r12, r12, r5, lsr #24 444 mov r5, r5, lsl #8 445 orr r5, r5, r4, lsr #24 446 mov r4, r4, lsl #8 447 orr r4, r4, r3, lsr #24 448#endif 449 stmdb r0!, {r4, r5, r12, lr} 450 subs r2, r2, #0x10 451 bge .Lmemmove_bsrcul3loop16 452 ldmia sp!, {r4, r5, lr} 453 adds r2, r2, #0x0c 454 blt .Lmemmove_bsrcul3l4 455 456.Lmemmove_bsrcul3loop4: 457#ifdef __ARMEB__ 458 mov r12, r3, lsr #8 459#else 460 mov r12, r3, lsl #8 461#endif 462 ldr r3, [r1, #-4]! 463#ifdef __ARMEB__ 464 orr r12, r12, r3, lsl #24 465#else 466 orr r12, r12, r3, lsr #24 467#endif 468 str r12, [r0, #-4]! 469 subs r2, r2, #4 470 bge .Lmemmove_bsrcul3loop4 471 472.Lmemmove_bsrcul3l4: 473 add r1, r1, #3 474 b .Lmemmove_bl4 475 476.Lmemmove_bsrcul2: 477 cmp r2, #0x0c 478 blt .Lmemmove_bsrcul2loop4 479 sub r2, r2, #0x0c 480 stmdb sp!, {r4, r5, lr} 481 482.Lmemmove_bsrcul2loop16: 483#ifdef __ARMEB__ 484 mov lr, r3, lsr #16 485#else 486 mov lr, r3, lsl #16 487#endif 488 ldmdb r1!, {r3-r5, r12} 489#ifdef __ARMEB__ 490 orr lr, lr, r12, lsl #16 491 mov r12, r12, lsr #16 492 orr r12, r12, r5, lsl #16 493 mov r5, r5, lsr #16 494 orr r5, r5, r4, lsl #16 495 mov r4, r4, lsr #16 496 orr r4, r4, r3, lsl #16 497#else 498 orr lr, lr, r12, lsr #16 499 mov r12, r12, lsl #16 500 orr r12, r12, r5, lsr #16 501 mov r5, r5, lsl #16 502 orr r5, r5, r4, lsr #16 503 mov r4, r4, lsl #16 504 orr r4, r4, r3, lsr #16 505#endif 506 stmdb r0!, {r4, r5, r12, lr} 507 subs r2, r2, #0x10 508 bge .Lmemmove_bsrcul2loop16 509 ldmia sp!, {r4, r5, lr} 510 adds r2, r2, #0x0c 511 blt .Lmemmove_bsrcul2l4 512 513.Lmemmove_bsrcul2loop4: 514#ifdef __ARMEB__ 515 mov r12, r3, lsr #16 516#else 517 mov r12, r3, lsl #16 518#endif 519 ldr r3, [r1, #-4]! 520#ifdef __ARMEB__ 521 orr r12, r12, r3, lsl #16 522#else 523 orr r12, r12, r3, lsr #16 524#endif 525 str r12, [r0, #-4]! 526 subs r2, r2, #4 527 bge .Lmemmove_bsrcul2loop4 528 529.Lmemmove_bsrcul2l4: 530 add r1, r1, #2 531 b .Lmemmove_bl4 532 533.Lmemmove_bsrcul1: 534 cmp r2, #0x0c 535 blt .Lmemmove_bsrcul1loop4 536 sub r2, r2, #0x0c 537 stmdb sp!, {r4, r5, lr} 538 539.Lmemmove_bsrcul1loop32: 540#ifdef __ARMEB__ 541 mov lr, r3, lsr #24 542#else 543 mov lr, r3, lsl #24 544#endif 545 ldmdb r1!, {r3-r5, r12} 546#ifdef __ARMEB__ 547 orr lr, lr, r12, lsl #8 548 mov r12, r12, lsr #24 549 orr r12, r12, r5, lsl #8 550 mov r5, r5, lsr #24 551 orr r5, r5, r4, lsl #8 552 mov r4, r4, lsr #24 553 orr r4, r4, r3, lsl #8 554#else 555 orr lr, lr, r12, lsr #8 556 mov r12, r12, lsl #24 557 orr r12, r12, r5, lsr #8 558 mov r5, r5, lsl #24 559 orr r5, r5, r4, lsr #8 560 mov r4, r4, lsl #24 561 orr r4, r4, r3, lsr #8 562#endif 563 stmdb r0!, {r4, r5, r12, lr} 564 subs r2, r2, #0x10 565 bge .Lmemmove_bsrcul1loop32 566 ldmia sp!, {r4, r5, lr} 567 adds r2, r2, #0x0c 568 blt .Lmemmove_bsrcul1l4 569 570.Lmemmove_bsrcul1loop4: 571#ifdef __ARMEB__ 572 mov r12, r3, lsr #24 573#else 574 mov r12, r3, lsl #24 575#endif 576 ldr r3, [r1, #-4]! 577#ifdef __ARMEB__ 578 orr r12, r12, r3, lsl #8 579#else 580 orr r12, r12, r3, lsr #8 581#endif 582 str r12, [r0, #-4]! 583 subs r2, r2, #4 584 bge .Lmemmove_bsrcul1loop4 585 586.Lmemmove_bsrcul1l4: 587 add r1, r1, #1 588 b .Lmemmove_bl4 589