1/* $NetBSD: memcpy_xscale.S,v 1.1 2005/12/20 19:28:49 christos Exp $ */ 2 3/* 4 * Copyright 2003 Wasabi Systems, Inc. 5 * All rights reserved. 6 * 7 * Written by Steve C. Woodford for Wasabi Systems, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project by 20 * Wasabi Systems, Inc. 21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 22 * or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38#include <machine/asm.h> 39 40/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */ 41ENTRY(memcpy) 42 pld [r1] 43 cmp r2, #0x0c 44 ble .Lmemcpy_short /* <= 12 bytes */ 45 mov r3, r0 /* We must not clobber r0 */ 46 47 /* Word-align the destination buffer */ 48 ands ip, r3, #0x03 /* Already word aligned? */ 49 beq .Lmemcpy_wordaligned /* Yup */ 50 cmp ip, #0x02 51 ldrb ip, [r1], #0x01 52 sub r2, r2, #0x01 53 strb ip, [r3], #0x01 54 ldrleb ip, [r1], #0x01 55 suble r2, r2, #0x01 56 strleb ip, [r3], #0x01 57 ldrltb ip, [r1], #0x01 58 sublt r2, r2, #0x01 59 strltb ip, [r3], #0x01 60 61 /* Destination buffer is now word aligned */ 62.Lmemcpy_wordaligned: 63 ands ip, r1, #0x03 /* Is src also word-aligned? */ 64 bne .Lmemcpy_bad_align /* Nope. Things just got bad */ 65 66 /* Quad-align the destination buffer */ 67 tst r3, #0x07 /* Already quad aligned? */ 68 ldrne ip, [r1], #0x04 69 stmfd sp!, {r4-r9} /* Free up some registers */ 70 subne r2, r2, #0x04 71 strne ip, [r3], #0x04 72 73 /* Destination buffer quad aligned, source is at least word aligned */ 74 subs r2, r2, #0x80 75 blt .Lmemcpy_w_lessthan128 76 77 /* Copy 128 bytes at a time */ 78.Lmemcpy_w_loop128: 79 ldr r4, [r1], #0x04 /* LD:00-03 */ 80 ldr r5, [r1], #0x04 /* LD:04-07 */ 81 pld [r1, #0x18] /* Prefetch 0x20 */ 82 ldr r6, [r1], #0x04 /* LD:08-0b */ 83 ldr r7, [r1], #0x04 /* LD:0c-0f */ 84 ldr r8, [r1], #0x04 /* LD:10-13 */ 85 ldr r9, [r1], #0x04 /* LD:14-17 */ 86 strd r4, [r3], #0x08 /* ST:00-07 */ 87 ldr r4, [r1], #0x04 /* LD:18-1b */ 88 ldr r5, [r1], #0x04 /* LD:1c-1f */ 89 strd r6, [r3], #0x08 /* ST:08-0f */ 90 ldr r6, [r1], #0x04 /* LD:20-23 */ 91 ldr r7, [r1], #0x04 /* LD:24-27 */ 92 pld [r1, #0x18] /* Prefetch 0x40 */ 93 strd r8, [r3], #0x08 /* ST:10-17 */ 94 ldr r8, [r1], #0x04 /* LD:28-2b */ 95 ldr r9, [r1], #0x04 /* LD:2c-2f */ 96 strd r4, [r3], #0x08 /* ST:18-1f */ 97 ldr r4, [r1], #0x04 /* LD:30-33 */ 98 ldr r5, [r1], #0x04 /* LD:34-37 */ 99 strd r6, [r3], #0x08 /* ST:20-27 */ 100 ldr r6, [r1], #0x04 /* LD:38-3b */ 101 ldr r7, [r1], #0x04 /* LD:3c-3f */ 102 strd r8, [r3], #0x08 /* ST:28-2f */ 103 ldr r8, [r1], #0x04 /* LD:40-43 */ 104 ldr r9, [r1], #0x04 /* LD:44-47 */ 105 pld [r1, #0x18] /* Prefetch 0x60 */ 106 strd r4, [r3], #0x08 /* ST:30-37 */ 107 ldr r4, [r1], #0x04 /* LD:48-4b */ 108 ldr r5, [r1], #0x04 /* LD:4c-4f */ 109 strd r6, [r3], #0x08 /* ST:38-3f */ 110 ldr r6, [r1], #0x04 /* LD:50-53 */ 111 ldr r7, [r1], #0x04 /* LD:54-57 */ 112 strd r8, [r3], #0x08 /* ST:40-47 */ 113 ldr r8, [r1], #0x04 /* LD:58-5b */ 114 ldr r9, [r1], #0x04 /* LD:5c-5f */ 115 strd r4, [r3], #0x08 /* ST:48-4f */ 116 ldr r4, [r1], #0x04 /* LD:60-63 */ 117 ldr r5, [r1], #0x04 /* LD:64-67 */ 118 pld [r1, #0x18] /* Prefetch 0x80 */ 119 strd r6, [r3], #0x08 /* ST:50-57 */ 120 ldr r6, [r1], #0x04 /* LD:68-6b */ 121 ldr r7, [r1], #0x04 /* LD:6c-6f */ 122 strd r8, [r3], #0x08 /* ST:58-5f */ 123 ldr r8, [r1], #0x04 /* LD:70-73 */ 124 ldr r9, [r1], #0x04 /* LD:74-77 */ 125 strd r4, [r3], #0x08 /* ST:60-67 */ 126 ldr r4, [r1], #0x04 /* LD:78-7b */ 127 ldr r5, [r1], #0x04 /* LD:7c-7f */ 128 strd r6, [r3], #0x08 /* ST:68-6f */ 129 strd r8, [r3], #0x08 /* ST:70-77 */ 130 subs r2, r2, #0x80 131 strd r4, [r3], #0x08 /* ST:78-7f */ 132 bge .Lmemcpy_w_loop128 133 134.Lmemcpy_w_lessthan128: 135 adds r2, r2, #0x80 /* Adjust for extra sub */ 136 ldmeqfd sp!, {r4-r9} 137 bxeq lr /* Return now if done */ 138 subs r2, r2, #0x20 139 blt .Lmemcpy_w_lessthan32 140 141 /* Copy 32 bytes at a time */ 142.Lmemcpy_w_loop32: 143 ldr r4, [r1], #0x04 144 ldr r5, [r1], #0x04 145 pld [r1, #0x18] 146 ldr r6, [r1], #0x04 147 ldr r7, [r1], #0x04 148 ldr r8, [r1], #0x04 149 ldr r9, [r1], #0x04 150 strd r4, [r3], #0x08 151 ldr r4, [r1], #0x04 152 ldr r5, [r1], #0x04 153 strd r6, [r3], #0x08 154 strd r8, [r3], #0x08 155 subs r2, r2, #0x20 156 strd r4, [r3], #0x08 157 bge .Lmemcpy_w_loop32 158 159.Lmemcpy_w_lessthan32: 160 adds r2, r2, #0x20 /* Adjust for extra sub */ 161 ldmeqfd sp!, {r4-r9} 162 bxeq lr /* Return now if done */ 163 164 and r4, r2, #0x18 165 rsbs r4, r4, #0x18 166 addne pc, pc, r4, lsl #1 167 nop 168 169 /* At least 24 bytes remaining */ 170 ldr r4, [r1], #0x04 171 ldr r5, [r1], #0x04 172 sub r2, r2, #0x08 173 strd r4, [r3], #0x08 174 175 /* At least 16 bytes remaining */ 176 ldr r4, [r1], #0x04 177 ldr r5, [r1], #0x04 178 sub r2, r2, #0x08 179 strd r4, [r3], #0x08 180 181 /* At least 8 bytes remaining */ 182 ldr r4, [r1], #0x04 183 ldr r5, [r1], #0x04 184 subs r2, r2, #0x08 185 strd r4, [r3], #0x08 186 187 /* Less than 8 bytes remaining */ 188 ldmfd sp!, {r4-r9} 189 bxeq lr /* Return now if done */ 190 subs r2, r2, #0x04 191 ldrge ip, [r1], #0x04 192 strge ip, [r3], #0x04 193 bxeq lr /* Return now if done */ 194 addlt r2, r2, #0x04 195 ldrb ip, [r1], #0x01 196 cmp r2, #0x02 197 ldrgeb r2, [r1], #0x01 198 strb ip, [r3], #0x01 199 ldrgtb ip, [r1] 200 strgeb r2, [r3], #0x01 201 strgtb ip, [r3] 202 bx lr 203 204 205/* 206 * At this point, it has not been possible to word align both buffers. 207 * The destination buffer is word aligned, but the source buffer is not. 208 */ 209.Lmemcpy_bad_align: 210 stmfd sp!, {r4-r7} 211 bic r1, r1, #0x03 212 cmp ip, #2 213 ldr ip, [r1], #0x04 214 bgt .Lmemcpy_bad3 215 beq .Lmemcpy_bad2 216 b .Lmemcpy_bad1 217 218.Lmemcpy_bad1_loop16: 219#ifdef __ARMEB__ 220 mov r4, ip, lsl #8 221#else 222 mov r4, ip, lsr #8 223#endif 224 ldr r5, [r1], #0x04 225 pld [r1, #0x018] 226 ldr r6, [r1], #0x04 227 ldr r7, [r1], #0x04 228 ldr ip, [r1], #0x04 229#ifdef __ARMEB__ 230 orr r4, r4, r5, lsr #24 231 mov r5, r5, lsl #8 232 orr r5, r5, r6, lsr #24 233 mov r6, r6, lsl #8 234 orr r6, r6, r7, lsr #24 235 mov r7, r7, lsl #8 236 orr r7, r7, ip, lsr #24 237#else 238 orr r4, r4, r5, lsl #24 239 mov r5, r5, lsr #8 240 orr r5, r5, r6, lsl #24 241 mov r6, r6, lsr #8 242 orr r6, r6, r7, lsl #24 243 mov r7, r7, lsr #8 244 orr r7, r7, ip, lsl #24 245#endif 246 str r4, [r3], #0x04 247 str r5, [r3], #0x04 248 str r6, [r3], #0x04 249 str r7, [r3], #0x04 250.Lmemcpy_bad1: 251 subs r2, r2, #0x10 252 bge .Lmemcpy_bad1_loop16 253 254 adds r2, r2, #0x10 255 ldmeqfd sp!, {r4-r7} 256 bxeq lr /* Return now if done */ 257 subs r2, r2, #0x04 258 sublt r1, r1, #0x03 259 blt .Lmemcpy_bad_done 260 261.Lmemcpy_bad1_loop4: 262#ifdef __ARMEB__ 263 mov r4, ip, lsl #8 264#else 265 mov r4, ip, lsr #8 266#endif 267 ldr ip, [r1], #0x04 268 subs r2, r2, #0x04 269#ifdef __ARMEB__ 270 orr r4, r4, ip, lsr #24 271#else 272 orr r4, r4, ip, lsl #24 273#endif 274 str r4, [r3], #0x04 275 bge .Lmemcpy_bad1_loop4 276 sub r1, r1, #0x03 277 b .Lmemcpy_bad_done 278 279.Lmemcpy_bad2_loop16: 280#ifdef __ARMEB__ 281 mov r4, ip, lsl #16 282#else 283 mov r4, ip, lsr #16 284#endif 285 ldr r5, [r1], #0x04 286 pld [r1, #0x018] 287 ldr r6, [r1], #0x04 288 ldr r7, [r1], #0x04 289 ldr ip, [r1], #0x04 290#ifdef __ARMEB__ 291 orr r4, r4, r5, lsr #16 292 mov r5, r5, lsl #16 293 orr r5, r5, r6, lsr #16 294 mov r6, r6, lsl #16 295 orr r6, r6, r7, lsr #16 296 mov r7, r7, lsl #16 297 orr r7, r7, ip, lsr #16 298#else 299 orr r4, r4, r5, lsl #16 300 mov r5, r5, lsr #16 301 orr r5, r5, r6, lsl #16 302 mov r6, r6, lsr #16 303 orr r6, r6, r7, lsl #16 304 mov r7, r7, lsr #16 305 orr r7, r7, ip, lsl #16 306#endif 307 str r4, [r3], #0x04 308 str r5, [r3], #0x04 309 str r6, [r3], #0x04 310 str r7, [r3], #0x04 311.Lmemcpy_bad2: 312 subs r2, r2, #0x10 313 bge .Lmemcpy_bad2_loop16 314 315 adds r2, r2, #0x10 316 ldmeqfd sp!, {r4-r7} 317 bxeq lr /* Return now if done */ 318 subs r2, r2, #0x04 319 sublt r1, r1, #0x02 320 blt .Lmemcpy_bad_done 321 322.Lmemcpy_bad2_loop4: 323#ifdef __ARMEB__ 324 mov r4, ip, lsl #16 325#else 326 mov r4, ip, lsr #16 327#endif 328 ldr ip, [r1], #0x04 329 subs r2, r2, #0x04 330#ifdef __ARMEB__ 331 orr r4, r4, ip, lsr #16 332#else 333 orr r4, r4, ip, lsl #16 334#endif 335 str r4, [r3], #0x04 336 bge .Lmemcpy_bad2_loop4 337 sub r1, r1, #0x02 338 b .Lmemcpy_bad_done 339 340.Lmemcpy_bad3_loop16: 341#ifdef __ARMEB__ 342 mov r4, ip, lsl #24 343#else 344 mov r4, ip, lsr #24 345#endif 346 ldr r5, [r1], #0x04 347 pld [r1, #0x018] 348 ldr r6, [r1], #0x04 349 ldr r7, [r1], #0x04 350 ldr ip, [r1], #0x04 351#ifdef __ARMEB__ 352 orr r4, r4, r5, lsr #8 353 mov r5, r5, lsl #24 354 orr r5, r5, r6, lsr #8 355 mov r6, r6, lsl #24 356 orr r6, r6, r7, lsr #8 357 mov r7, r7, lsl #24 358 orr r7, r7, ip, lsr #8 359#else 360 orr r4, r4, r5, lsl #8 361 mov r5, r5, lsr #24 362 orr r5, r5, r6, lsl #8 363 mov r6, r6, lsr #24 364 orr r6, r6, r7, lsl #8 365 mov r7, r7, lsr #24 366 orr r7, r7, ip, lsl #8 367#endif 368 str r4, [r3], #0x04 369 str r5, [r3], #0x04 370 str r6, [r3], #0x04 371 str r7, [r3], #0x04 372.Lmemcpy_bad3: 373 subs r2, r2, #0x10 374 bge .Lmemcpy_bad3_loop16 375 376 adds r2, r2, #0x10 377 ldmeqfd sp!, {r4-r7} 378 bxeq lr /* Return now if done */ 379 subs r2, r2, #0x04 380 sublt r1, r1, #0x01 381 blt .Lmemcpy_bad_done 382 383.Lmemcpy_bad3_loop4: 384#ifdef __ARMEB__ 385 mov r4, ip, lsl #24 386#else 387 mov r4, ip, lsr #24 388#endif 389 ldr ip, [r1], #0x04 390 subs r2, r2, #0x04 391#ifdef __ARMEB__ 392 orr r4, r4, ip, lsr #8 393#else 394 orr r4, r4, ip, lsl #8 395#endif 396 str r4, [r3], #0x04 397 bge .Lmemcpy_bad3_loop4 398 sub r1, r1, #0x01 399 400.Lmemcpy_bad_done: 401 ldmfd sp!, {r4-r7} 402 adds r2, r2, #0x04 403 bxeq lr 404 ldrb ip, [r1], #0x01 405 cmp r2, #0x02 406 ldrgeb r2, [r1], #0x01 407 strb ip, [r3], #0x01 408 ldrgtb ip, [r1] 409 strgeb r2, [r3], #0x01 410 strgtb ip, [r3] 411 bx lr 412 413 414/* 415 * Handle short copies (less than 16 bytes), possibly misaligned. 416 * Some of these are *very* common, thanks to the network stack, 417 * and so are handled specially. 418 */ 419.Lmemcpy_short: 420#ifndef _STANDALONE 421 add pc, pc, r2, lsl #2 422 nop 423 bx lr /* 0x00 */ 424 b .Lmemcpy_bytewise /* 0x01 */ 425 b .Lmemcpy_bytewise /* 0x02 */ 426 b .Lmemcpy_bytewise /* 0x03 */ 427 b .Lmemcpy_4 /* 0x04 */ 428 b .Lmemcpy_bytewise /* 0x05 */ 429 b .Lmemcpy_6 /* 0x06 */ 430 b .Lmemcpy_bytewise /* 0x07 */ 431 b .Lmemcpy_8 /* 0x08 */ 432 b .Lmemcpy_bytewise /* 0x09 */ 433 b .Lmemcpy_bytewise /* 0x0a */ 434 b .Lmemcpy_bytewise /* 0x0b */ 435 b .Lmemcpy_c /* 0x0c */ 436#endif 437.Lmemcpy_bytewise: 438 mov r3, r0 /* We must not clobber r0 */ 439 ldrb ip, [r1], #0x01 4401: subs r2, r2, #0x01 441 strb ip, [r3], #0x01 442 ldrneb ip, [r1], #0x01 443 bne 1b 444 bx lr 445 446#ifndef _STANDALONE 447/****************************************************************************** 448 * Special case for 4 byte copies 449 */ 450#define LMEMCPY_4_LOG2 6 /* 64 bytes */ 451#define LMEMCPY_4_PAD .align LMEMCPY_4_LOG2 452 LMEMCPY_4_PAD 453.Lmemcpy_4: 454 and r2, r1, #0x03 455 orr r2, r2, r0, lsl #2 456 ands r2, r2, #0x0f 457 sub r3, pc, #0x14 458 addne pc, r3, r2, lsl #LMEMCPY_4_LOG2 459 460/* 461 * 0000: dst is 32-bit aligned, src is 32-bit aligned 462 */ 463 ldr r2, [r1] 464 str r2, [r0] 465 bx lr 466 LMEMCPY_4_PAD 467 468/* 469 * 0001: dst is 32-bit aligned, src is 8-bit aligned 470 */ 471 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 472 ldr r2, [r1, #3] /* BE:r2 = 3xxx LE:r2 = xxx3 */ 473#ifdef __ARMEB__ 474 mov r3, r3, lsl #8 /* r3 = 012. */ 475 orr r3, r3, r2, lsr #24 /* r3 = 0123 */ 476#else 477 mov r3, r3, lsr #8 /* r3 = .210 */ 478 orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 479#endif 480 str r3, [r0] 481 bx lr 482 LMEMCPY_4_PAD 483 484/* 485 * 0010: dst is 32-bit aligned, src is 16-bit aligned 486 */ 487#ifdef __ARMEB__ 488 ldrh r3, [r1] 489 ldrh r2, [r1, #0x02] 490#else 491 ldrh r3, [r1, #0x02] 492 ldrh r2, [r1] 493#endif 494 orr r3, r2, r3, lsl #16 495 str r3, [r0] 496 bx lr 497 LMEMCPY_4_PAD 498 499/* 500 * 0011: dst is 32-bit aligned, src is 8-bit aligned 501 */ 502 ldr r3, [r1, #-3] /* BE:r3 = xxx0 LE:r3 = 0xxx */ 503 ldr r2, [r1, #1] /* BE:r2 = 123x LE:r2 = x321 */ 504#ifdef __ARMEB__ 505 mov r3, r3, lsl #24 /* r3 = 0... */ 506 orr r3, r3, r2, lsr #8 /* r3 = 0123 */ 507#else 508 mov r3, r3, lsr #24 /* r3 = ...0 */ 509 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 510#endif 511 str r3, [r0] 512 bx lr 513 LMEMCPY_4_PAD 514 515/* 516 * 0100: dst is 8-bit aligned, src is 32-bit aligned 517 */ 518 ldr r2, [r1] 519#ifdef __ARMEB__ 520 strb r2, [r0, #0x03] 521 mov r3, r2, lsr #8 522 mov r1, r2, lsr #24 523 strb r1, [r0] 524#else 525 strb r2, [r0] 526 mov r3, r2, lsr #8 527 mov r1, r2, lsr #24 528 strb r1, [r0, #0x03] 529#endif 530 strh r3, [r0, #0x01] 531 bx lr 532 LMEMCPY_4_PAD 533 534/* 535 * 0101: dst is 8-bit aligned, src is 8-bit aligned 536 */ 537 ldrb r2, [r1] 538 ldrh r3, [r1, #0x01] 539 ldrb r1, [r1, #0x03] 540 strb r2, [r0] 541 strh r3, [r0, #0x01] 542 strb r1, [r0, #0x03] 543 bx lr 544 LMEMCPY_4_PAD 545 546/* 547 * 0110: dst is 8-bit aligned, src is 16-bit aligned 548 */ 549 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 550 ldrh r3, [r1, #0x02] /* LE:r3 = ..23 LE:r3 = ..32 */ 551#ifdef __ARMEB__ 552 mov r1, r2, lsr #8 /* r1 = ...0 */ 553 strb r1, [r0] 554 mov r2, r2, lsl #8 /* r2 = .01. */ 555 orr r2, r2, r3, lsr #8 /* r2 = .012 */ 556#else 557 strb r2, [r0] 558 mov r2, r2, lsr #8 /* r2 = ...1 */ 559 orr r2, r2, r3, lsl #8 /* r2 = .321 */ 560 mov r3, r3, lsr #8 /* r3 = ...3 */ 561#endif 562 strh r2, [r0, #0x01] 563 strb r3, [r0, #0x03] 564 bx lr 565 LMEMCPY_4_PAD 566 567/* 568 * 0111: dst is 8-bit aligned, src is 8-bit aligned 569 */ 570 ldrb r2, [r1] 571 ldrh r3, [r1, #0x01] 572 ldrb r1, [r1, #0x03] 573 strb r2, [r0] 574 strh r3, [r0, #0x01] 575 strb r1, [r0, #0x03] 576 bx lr 577 LMEMCPY_4_PAD 578 579/* 580 * 1000: dst is 16-bit aligned, src is 32-bit aligned 581 */ 582 ldr r2, [r1] 583#ifdef __ARMEB__ 584 strh r2, [r0, #0x02] 585 mov r3, r2, lsr #16 586 strh r3, [r0] 587#else 588 strh r2, [r0] 589 mov r3, r2, lsr #16 590 strh r3, [r0, #0x02] 591#endif 592 bx lr 593 LMEMCPY_4_PAD 594 595/* 596 * 1001: dst is 16-bit aligned, src is 8-bit aligned 597 */ 598 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 599 ldr r3, [r1, #3] /* BE:r3 = 3xxx LE:r3 = xxx3 */ 600 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 601 strh r1, [r0] 602#ifdef __ARMEB__ 603 mov r2, r2, lsl #8 /* r2 = 012. */ 604 orr r2, r2, r3, lsr #24 /* r2 = 0123 */ 605#else 606 mov r2, r2, lsr #24 /* r2 = ...2 */ 607 orr r2, r2, r3, lsl #8 /* r2 = xx32 */ 608#endif 609 strh r2, [r0, #0x02] 610 bx lr 611 LMEMCPY_4_PAD 612 613/* 614 * 1010: dst is 16-bit aligned, src is 16-bit aligned 615 */ 616 ldrh r2, [r1] 617 ldrh r3, [r1, #0x02] 618 strh r2, [r0] 619 strh r3, [r0, #0x02] 620 bx lr 621 LMEMCPY_4_PAD 622 623/* 624 * 1011: dst is 16-bit aligned, src is 8-bit aligned 625 */ 626 ldr r3, [r1, #1] /* BE:r3 = 123x LE:r3 = x321 */ 627 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 628 mov r1, r3, lsr #8 /* BE:r1 = .123 LE:r1 = .x32 */ 629 strh r1, [r0, #0x02] 630#ifdef __ARMEB__ 631 mov r3, r3, lsr #24 /* r3 = ...1 */ 632 orr r3, r3, r2, lsl #8 /* r3 = xx01 */ 633#else 634 mov r3, r3, lsl #8 /* r3 = 321. */ 635 orr r3, r3, r2, lsr #24 /* r3 = 3210 */ 636#endif 637 strh r3, [r0] 638 bx lr 639 LMEMCPY_4_PAD 640 641/* 642 * 1100: dst is 8-bit aligned, src is 32-bit aligned 643 */ 644 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 645#ifdef __ARMEB__ 646 strb r2, [r0, #0x03] 647 mov r3, r2, lsr #8 648 mov r1, r2, lsr #24 649 strh r3, [r0, #0x01] 650 strb r1, [r0] 651#else 652 strb r2, [r0] 653 mov r3, r2, lsr #8 654 mov r1, r2, lsr #24 655 strh r3, [r0, #0x01] 656 strb r1, [r0, #0x03] 657#endif 658 bx lr 659 LMEMCPY_4_PAD 660 661/* 662 * 1101: dst is 8-bit aligned, src is 8-bit aligned 663 */ 664 ldrb r2, [r1] 665 ldrh r3, [r1, #0x01] 666 ldrb r1, [r1, #0x03] 667 strb r2, [r0] 668 strh r3, [r0, #0x01] 669 strb r1, [r0, #0x03] 670 bx lr 671 LMEMCPY_4_PAD 672 673/* 674 * 1110: dst is 8-bit aligned, src is 16-bit aligned 675 */ 676#ifdef __ARMEB__ 677 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */ 678 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 679 strb r3, [r0, #0x03] 680 mov r3, r3, lsr #8 /* r3 = ...2 */ 681 orr r3, r3, r2, lsl #8 /* r3 = ..12 */ 682 strh r3, [r0, #0x01] 683 mov r2, r2, lsr #8 /* r2 = ...0 */ 684 strb r2, [r0] 685#else 686 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 687 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */ 688 strb r2, [r0] 689 mov r2, r2, lsr #8 /* r2 = ...1 */ 690 orr r2, r2, r3, lsl #8 /* r2 = .321 */ 691 strh r2, [r0, #0x01] 692 mov r3, r3, lsr #8 /* r3 = ...3 */ 693 strb r3, [r0, #0x03] 694#endif 695 bx lr 696 LMEMCPY_4_PAD 697 698/* 699 * 1111: dst is 8-bit aligned, src is 8-bit aligned 700 */ 701 ldrb r2, [r1] 702 ldrh r3, [r1, #0x01] 703 ldrb r1, [r1, #0x03] 704 strb r2, [r0] 705 strh r3, [r0, #0x01] 706 strb r1, [r0, #0x03] 707 bx lr 708 LMEMCPY_4_PAD 709 710 711/****************************************************************************** 712 * Special case for 6 byte copies 713 */ 714#define LMEMCPY_6_LOG2 6 /* 64 bytes */ 715#define LMEMCPY_6_PAD .align LMEMCPY_6_LOG2 716 LMEMCPY_6_PAD 717.Lmemcpy_6: 718 and r2, r1, #0x03 719 orr r2, r2, r0, lsl #2 720 ands r2, r2, #0x0f 721 sub r3, pc, #0x14 722 addne pc, r3, r2, lsl #LMEMCPY_6_LOG2 723 724/* 725 * 0000: dst is 32-bit aligned, src is 32-bit aligned 726 */ 727 ldr r2, [r1] 728 ldrh r3, [r1, #0x04] 729 str r2, [r0] 730 strh r3, [r0, #0x04] 731 bx lr 732 LMEMCPY_6_PAD 733 734/* 735 * 0001: dst is 32-bit aligned, src is 8-bit aligned 736 */ 737 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 738 ldr r3, [r1, #0x03] /* BE:r3 = 345x LE:r3 = x543 */ 739#ifdef __ARMEB__ 740 mov r2, r2, lsl #8 /* r2 = 012. */ 741 orr r2, r2, r3, lsr #24 /* r2 = 0123 */ 742#else 743 mov r2, r2, lsr #8 /* r2 = .210 */ 744 orr r2, r2, r3, lsl #24 /* r2 = 3210 */ 745#endif 746 mov r3, r3, lsr #8 /* BE:r3 = .345 LE:r3 = .x54 */ 747 str r2, [r0] 748 strh r3, [r0, #0x04] 749 bx lr 750 LMEMCPY_6_PAD 751 752/* 753 * 0010: dst is 32-bit aligned, src is 16-bit aligned 754 */ 755 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 756 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 757#ifdef __ARMEB__ 758 mov r1, r3, lsr #16 /* r1 = ..23 */ 759 orr r1, r1, r2, lsl #16 /* r1 = 0123 */ 760 str r1, [r0] 761 strh r3, [r0, #0x04] 762#else 763 mov r1, r3, lsr #16 /* r1 = ..54 */ 764 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 765 str r2, [r0] 766 strh r1, [r0, #0x04] 767#endif 768 bx lr 769 LMEMCPY_6_PAD 770 771/* 772 * 0011: dst is 32-bit aligned, src is 8-bit aligned 773 */ 774 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 775 ldr r3, [r1, #1] /* BE:r3 = 1234 LE:r3 = 4321 */ 776 ldr r1, [r1, #5] /* BE:r1 = 5xxx LE:r3 = xxx5 */ 777#ifdef __ARMEB__ 778 mov r2, r2, lsl #24 /* r2 = 0... */ 779 orr r2, r2, r3, lsr #8 /* r2 = 0123 */ 780 mov r3, r3, lsl #8 /* r3 = 234. */ 781 orr r1, r3, r1, lsr #24 /* r1 = 2345 */ 782#else 783 mov r2, r2, lsr #24 /* r2 = ...0 */ 784 orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 785 mov r1, r1, lsl #8 /* r1 = xx5. */ 786 orr r1, r1, r3, lsr #24 /* r1 = xx54 */ 787#endif 788 str r2, [r0] 789 strh r1, [r0, #0x04] 790 bx lr 791 LMEMCPY_6_PAD 792 793/* 794 * 0100: dst is 8-bit aligned, src is 32-bit aligned 795 */ 796 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 797 ldrh r2, [r1, #0x04] /* BE:r2 = ..45 LE:r2 = ..54 */ 798 mov r1, r3, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 799 strh r1, [r0, #0x01] 800#ifdef __ARMEB__ 801 mov r1, r3, lsr #24 /* r1 = ...0 */ 802 strb r1, [r0] 803 mov r3, r3, lsl #8 /* r3 = 123. */ 804 orr r3, r3, r2, lsr #8 /* r3 = 1234 */ 805#else 806 strb r3, [r0] 807 mov r3, r3, lsr #24 /* r3 = ...3 */ 808 orr r3, r3, r2, lsl #8 /* r3 = .543 */ 809 mov r2, r2, lsr #8 /* r2 = ...5 */ 810#endif 811 strh r3, [r0, #0x03] 812 strb r2, [r0, #0x05] 813 bx lr 814 LMEMCPY_6_PAD 815 816/* 817 * 0101: dst is 8-bit aligned, src is 8-bit aligned 818 */ 819 ldrb r2, [r1] 820 ldrh r3, [r1, #0x01] 821 ldrh ip, [r1, #0x03] 822 ldrb r1, [r1, #0x05] 823 strb r2, [r0] 824 strh r3, [r0, #0x01] 825 strh ip, [r0, #0x03] 826 strb r1, [r0, #0x05] 827 bx lr 828 LMEMCPY_6_PAD 829 830/* 831 * 0110: dst is 8-bit aligned, src is 16-bit aligned 832 */ 833 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 834 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 835#ifdef __ARMEB__ 836 mov r3, r2, lsr #8 /* r3 = ...0 */ 837 strb r3, [r0] 838 strb r1, [r0, #0x05] 839 mov r3, r1, lsr #8 /* r3 = .234 */ 840 strh r3, [r0, #0x03] 841 mov r3, r2, lsl #8 /* r3 = .01. */ 842 orr r3, r3, r1, lsr #24 /* r3 = .012 */ 843 strh r3, [r0, #0x01] 844#else 845 strb r2, [r0] 846 mov r3, r1, lsr #24 847 strb r3, [r0, #0x05] 848 mov r3, r1, lsr #8 /* r3 = .543 */ 849 strh r3, [r0, #0x03] 850 mov r3, r2, lsr #8 /* r3 = ...1 */ 851 orr r3, r3, r1, lsl #8 /* r3 = 4321 */ 852 strh r3, [r0, #0x01] 853#endif 854 bx lr 855 LMEMCPY_6_PAD 856 857/* 858 * 0111: dst is 8-bit aligned, src is 8-bit aligned 859 */ 860 ldrb r2, [r1] 861 ldrh r3, [r1, #0x01] 862 ldrh ip, [r1, #0x03] 863 ldrb r1, [r1, #0x05] 864 strb r2, [r0] 865 strh r3, [r0, #0x01] 866 strh ip, [r0, #0x03] 867 strb r1, [r0, #0x05] 868 bx lr 869 LMEMCPY_6_PAD 870 871/* 872 * 1000: dst is 16-bit aligned, src is 32-bit aligned 873 */ 874#ifdef __ARMEB__ 875 ldr r2, [r1] /* r2 = 0123 */ 876 ldrh r3, [r1, #0x04] /* r3 = ..45 */ 877 mov r1, r2, lsr #16 /* r1 = ..01 */ 878 orr r3, r3, r2, lsl#16 /* r3 = 2345 */ 879 strh r1, [r0] 880 str r3, [r0, #0x02] 881#else 882 ldrh r2, [r1, #0x04] /* r2 = ..54 */ 883 ldr r3, [r1] /* r3 = 3210 */ 884 mov r2, r2, lsl #16 /* r2 = 54.. */ 885 orr r2, r2, r3, lsr #16 /* r2 = 5432 */ 886 strh r3, [r0] 887 str r2, [r0, #0x02] 888#endif 889 bx lr 890 LMEMCPY_6_PAD 891 892/* 893 * 1001: dst is 16-bit aligned, src is 8-bit aligned 894 */ 895 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 896 ldr r2, [r1, #3] /* BE:r2 = 345x LE:r2 = x543 */ 897 mov r1, r3, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 898#ifdef __ARMEB__ 899 mov r2, r2, lsr #8 /* r2 = .345 */ 900 orr r2, r2, r3, lsl #24 /* r2 = 2345 */ 901#else 902 mov r2, r2, lsl #8 /* r2 = 543. */ 903 orr r2, r2, r3, lsr #24 /* r2 = 5432 */ 904#endif 905 strh r1, [r0] 906 str r2, [r0, #0x02] 907 bx lr 908 LMEMCPY_6_PAD 909 910/* 911 * 1010: dst is 16-bit aligned, src is 16-bit aligned 912 */ 913 ldrh r2, [r1] 914 ldr r3, [r1, #0x02] 915 strh r2, [r0] 916 str r3, [r0, #0x02] 917 bx lr 918 LMEMCPY_6_PAD 919 920/* 921 * 1011: dst is 16-bit aligned, src is 8-bit aligned 922 */ 923 ldrb r3, [r1] /* r3 = ...0 */ 924 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 925 ldrb r1, [r1, #0x05] /* r1 = ...5 */ 926#ifdef __ARMEB__ 927 mov r3, r3, lsl #8 /* r3 = ..0. */ 928 orr r3, r3, r2, lsr #24 /* r3 = ..01 */ 929 orr r1, r1, r2, lsl #8 /* r1 = 2345 */ 930#else 931 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 932 mov r1, r1, lsl #24 /* r1 = 5... */ 933 orr r1, r1, r2, lsr #8 /* r1 = 5432 */ 934#endif 935 strh r3, [r0] 936 str r1, [r0, #0x02] 937 bx lr 938 LMEMCPY_6_PAD 939 940/* 941 * 1100: dst is 8-bit aligned, src is 32-bit aligned 942 */ 943 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 944 ldrh r1, [r1, #0x04] /* BE:r1 = ..45 LE:r1 = ..54 */ 945#ifdef __ARMEB__ 946 mov r3, r2, lsr #24 /* r3 = ...0 */ 947 strb r3, [r0] 948 mov r2, r2, lsl #8 /* r2 = 123. */ 949 orr r2, r2, r1, lsr #8 /* r2 = 1234 */ 950#else 951 strb r2, [r0] 952 mov r2, r2, lsr #8 /* r2 = .321 */ 953 orr r2, r2, r1, lsl #24 /* r2 = 4321 */ 954 mov r1, r1, lsr #8 /* r1 = ...5 */ 955#endif 956 str r2, [r0, #0x01] 957 strb r1, [r0, #0x05] 958 bx lr 959 LMEMCPY_6_PAD 960 961/* 962 * 1101: dst is 8-bit aligned, src is 8-bit aligned 963 */ 964 ldrb r2, [r1] 965 ldrh r3, [r1, #0x01] 966 ldrh ip, [r1, #0x03] 967 ldrb r1, [r1, #0x05] 968 strb r2, [r0] 969 strh r3, [r0, #0x01] 970 strh ip, [r0, #0x03] 971 strb r1, [r0, #0x05] 972 bx lr 973 LMEMCPY_6_PAD 974 975/* 976 * 1110: dst is 8-bit aligned, src is 16-bit aligned 977 */ 978 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 979 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 980#ifdef __ARMEB__ 981 mov r3, r2, lsr #8 /* r3 = ...0 */ 982 strb r3, [r0] 983 mov r2, r2, lsl #24 /* r2 = 1... */ 984 orr r2, r2, r1, lsr #8 /* r2 = 1234 */ 985#else 986 strb r2, [r0] 987 mov r2, r2, lsr #8 /* r2 = ...1 */ 988 orr r2, r2, r1, lsl #8 /* r2 = 4321 */ 989 mov r1, r1, lsr #24 /* r1 = ...5 */ 990#endif 991 str r2, [r0, #0x01] 992 strb r1, [r0, #0x05] 993 bx lr 994 LMEMCPY_6_PAD 995 996/* 997 * 1111: dst is 8-bit aligned, src is 8-bit aligned 998 */ 999 ldrb r2, [r1] 1000 ldr r3, [r1, #0x01] 1001 ldrb r1, [r1, #0x05] 1002 strb r2, [r0] 1003 str r3, [r0, #0x01] 1004 strb r1, [r0, #0x05] 1005 bx lr 1006 LMEMCPY_6_PAD 1007 1008 1009/****************************************************************************** 1010 * Special case for 8 byte copies 1011 */ 1012#define LMEMCPY_8_LOG2 6 /* 64 bytes */ 1013#define LMEMCPY_8_PAD .align LMEMCPY_8_LOG2 1014 LMEMCPY_8_PAD 1015.Lmemcpy_8: 1016 and r2, r1, #0x03 1017 orr r2, r2, r0, lsl #2 1018 ands r2, r2, #0x0f 1019 sub r3, pc, #0x14 1020 addne pc, r3, r2, lsl #LMEMCPY_8_LOG2 1021 1022/* 1023 * 0000: dst is 32-bit aligned, src is 32-bit aligned 1024 */ 1025 ldr r2, [r1] 1026 ldr r3, [r1, #0x04] 1027 str r2, [r0] 1028 str r3, [r0, #0x04] 1029 bx lr 1030 LMEMCPY_8_PAD 1031 1032/* 1033 * 0001: dst is 32-bit aligned, src is 8-bit aligned 1034 */ 1035 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 1036 ldr r2, [r1, #0x03] /* BE:r2 = 3456 LE:r2 = 6543 */ 1037 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 1038#ifdef __ARMEB__ 1039 mov r3, r3, lsl #8 /* r3 = 012. */ 1040 orr r3, r3, r2, lsr #24 /* r3 = 0123 */ 1041 orr r2, r1, r2, lsl #8 /* r2 = 4567 */ 1042#else 1043 mov r3, r3, lsr #8 /* r3 = .210 */ 1044 orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 1045 mov r1, r1, lsl #24 /* r1 = 7... */ 1046 orr r2, r1, r2, lsr #8 /* r2 = 7654 */ 1047#endif 1048 str r3, [r0] 1049 str r2, [r0, #0x04] 1050 bx lr 1051 LMEMCPY_8_PAD 1052 1053/* 1054 * 0010: dst is 32-bit aligned, src is 16-bit aligned 1055 */ 1056 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1057 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1058 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 1059#ifdef __ARMEB__ 1060 mov r2, r2, lsl #16 /* r2 = 01.. */ 1061 orr r2, r2, r3, lsr #16 /* r2 = 0123 */ 1062 orr r3, r1, r3, lsl #16 /* r3 = 4567 */ 1063#else 1064 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 1065 mov r3, r3, lsr #16 /* r3 = ..54 */ 1066 orr r3, r3, r1, lsl #16 /* r3 = 7654 */ 1067#endif 1068 str r2, [r0] 1069 str r3, [r0, #0x04] 1070 bx lr 1071 LMEMCPY_8_PAD 1072 1073/* 1074 * 0011: dst is 32-bit aligned, src is 8-bit aligned 1075 */ 1076 ldrb r3, [r1] /* r3 = ...0 */ 1077 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 1078 ldr r1, [r1, #0x05] /* BE:r1 = 567x LE:r1 = x765 */ 1079#ifdef __ARMEB__ 1080 mov r3, r3, lsl #24 /* r3 = 0... */ 1081 orr r3, r3, r2, lsr #8 /* r3 = 0123 */ 1082 mov r2, r2, lsl #24 /* r2 = 4... */ 1083 orr r2, r2, r1, lsr #8 /* r2 = 4567 */ 1084#else 1085 orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 1086 mov r2, r2, lsr #24 /* r2 = ...4 */ 1087 orr r2, r2, r1, lsl #8 /* r2 = 7654 */ 1088#endif 1089 str r3, [r0] 1090 str r2, [r0, #0x04] 1091 bx lr 1092 LMEMCPY_8_PAD 1093 1094/* 1095 * 0100: dst is 8-bit aligned, src is 32-bit aligned 1096 */ 1097 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 1098 ldr r2, [r1, #0x04] /* BE:r2 = 4567 LE:r2 = 7654 */ 1099#ifdef __ARMEB__ 1100 mov r1, r3, lsr #24 /* r1 = ...0 */ 1101 strb r1, [r0] 1102 mov r1, r3, lsr #8 /* r1 = .012 */ 1103 strb r2, [r0, #0x07] 1104 mov r3, r3, lsl #24 /* r3 = 3... */ 1105 orr r3, r3, r2, lsr #8 /* r3 = 3456 */ 1106#else 1107 strb r3, [r0] 1108 mov r1, r2, lsr #24 /* r1 = ...7 */ 1109 strb r1, [r0, #0x07] 1110 mov r1, r3, lsr #8 /* r1 = .321 */ 1111 mov r3, r3, lsr #24 /* r3 = ...3 */ 1112 orr r3, r3, r2, lsl #8 /* r3 = 6543 */ 1113#endif 1114 strh r1, [r0, #0x01] 1115 str r3, [r0, #0x03] 1116 bx lr 1117 LMEMCPY_8_PAD 1118 1119/* 1120 * 0101: dst is 8-bit aligned, src is 8-bit aligned 1121 */ 1122 ldrb r2, [r1] 1123 ldrh r3, [r1, #0x01] 1124 ldr ip, [r1, #0x03] 1125 ldrb r1, [r1, #0x07] 1126 strb r2, [r0] 1127 strh r3, [r0, #0x01] 1128 str ip, [r0, #0x03] 1129 strb r1, [r0, #0x07] 1130 bx lr 1131 LMEMCPY_8_PAD 1132 1133/* 1134 * 0110: dst is 8-bit aligned, src is 16-bit aligned 1135 */ 1136 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1137 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1138 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 1139#ifdef __ARMEB__ 1140 mov ip, r2, lsr #8 /* ip = ...0 */ 1141 strb ip, [r0] 1142 mov ip, r2, lsl #8 /* ip = .01. */ 1143 orr ip, ip, r3, lsr #24 /* ip = .012 */ 1144 strb r1, [r0, #0x07] 1145 mov r3, r3, lsl #8 /* r3 = 345. */ 1146 orr r3, r3, r1, lsr #8 /* r3 = 3456 */ 1147#else 1148 strb r2, [r0] /* 0 */ 1149 mov ip, r1, lsr #8 /* ip = ...7 */ 1150 strb ip, [r0, #0x07] /* 7 */ 1151 mov ip, r2, lsr #8 /* ip = ...1 */ 1152 orr ip, ip, r3, lsl #8 /* ip = 4321 */ 1153 mov r3, r3, lsr #8 /* r3 = .543 */ 1154 orr r3, r3, r1, lsl #24 /* r3 = 6543 */ 1155#endif 1156 strh ip, [r0, #0x01] 1157 str r3, [r0, #0x03] 1158 bx lr 1159 LMEMCPY_8_PAD 1160 1161/* 1162 * 0111: dst is 8-bit aligned, src is 8-bit aligned 1163 */ 1164 ldrb r3, [r1] /* r3 = ...0 */ 1165 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 1166 ldrh r2, [r1, #0x05] /* BE:r2 = ..56 LE:r2 = ..65 */ 1167 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 1168 strb r3, [r0] 1169 mov r3, ip, lsr #16 /* BE:r3 = ..12 LE:r3 = ..43 */ 1170#ifdef __ARMEB__ 1171 strh r3, [r0, #0x01] 1172 orr r2, r2, ip, lsl #16 /* r2 = 3456 */ 1173#else 1174 strh ip, [r0, #0x01] 1175 orr r2, r3, r2, lsl #16 /* r2 = 6543 */ 1176#endif 1177 str r2, [r0, #0x03] 1178 strb r1, [r0, #0x07] 1179 bx lr 1180 LMEMCPY_8_PAD 1181 1182/* 1183 * 1000: dst is 16-bit aligned, src is 32-bit aligned 1184 */ 1185 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1186 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 1187 mov r1, r2, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 1188#ifdef __ARMEB__ 1189 strh r1, [r0] 1190 mov r1, r3, lsr #16 /* r1 = ..45 */ 1191 orr r2, r1 ,r2, lsl #16 /* r2 = 2345 */ 1192#else 1193 strh r2, [r0] 1194 orr r2, r1, r3, lsl #16 /* r2 = 5432 */ 1195 mov r3, r3, lsr #16 /* r3 = ..76 */ 1196#endif 1197 str r2, [r0, #0x02] 1198 strh r3, [r0, #0x06] 1199 bx lr 1200 LMEMCPY_8_PAD 1201 1202/* 1203 * 1001: dst is 16-bit aligned, src is 8-bit aligned 1204 */ 1205 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1206 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 1207 ldrb ip, [r1, #0x07] /* ip = ...7 */ 1208 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 1209 strh r1, [r0] 1210#ifdef __ARMEB__ 1211 mov r1, r2, lsl #24 /* r1 = 2... */ 1212 orr r1, r1, r3, lsr #8 /* r1 = 2345 */ 1213 orr r3, ip, r3, lsl #8 /* r3 = 4567 */ 1214#else 1215 mov r1, r2, lsr #24 /* r1 = ...2 */ 1216 orr r1, r1, r3, lsl #8 /* r1 = 5432 */ 1217 mov r3, r3, lsr #24 /* r3 = ...6 */ 1218 orr r3, r3, ip, lsl #8 /* r3 = ..76 */ 1219#endif 1220 str r1, [r0, #0x02] 1221 strh r3, [r0, #0x06] 1222 bx lr 1223 LMEMCPY_8_PAD 1224 1225/* 1226 * 1010: dst is 16-bit aligned, src is 16-bit aligned 1227 */ 1228 ldrh r2, [r1] 1229 ldr ip, [r1, #0x02] 1230 ldrh r3, [r1, #0x06] 1231 strh r2, [r0] 1232 str ip, [r0, #0x02] 1233 strh r3, [r0, #0x06] 1234 bx lr 1235 LMEMCPY_8_PAD 1236 1237/* 1238 * 1011: dst is 16-bit aligned, src is 8-bit aligned 1239 */ 1240 ldr r3, [r1, #0x05] /* BE:r3 = 567x LE:r3 = x765 */ 1241 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 1242 ldrb ip, [r1] /* ip = ...0 */ 1243 mov r1, r3, lsr #8 /* BE:r1 = .567 LE:r1 = .x76 */ 1244 strh r1, [r0, #0x06] 1245#ifdef __ARMEB__ 1246 mov r3, r3, lsr #24 /* r3 = ...5 */ 1247 orr r3, r3, r2, lsl #8 /* r3 = 2345 */ 1248 mov r2, r2, lsr #24 /* r2 = ...1 */ 1249 orr r2, r2, ip, lsl #8 /* r2 = ..01 */ 1250#else 1251 mov r3, r3, lsl #24 /* r3 = 5... */ 1252 orr r3, r3, r2, lsr #8 /* r3 = 5432 */ 1253 orr r2, ip, r2, lsl #8 /* r2 = 3210 */ 1254#endif 1255 str r3, [r0, #0x02] 1256 strh r2, [r0] 1257 bx lr 1258 LMEMCPY_8_PAD 1259 1260/* 1261 * 1100: dst is 8-bit aligned, src is 32-bit aligned 1262 */ 1263 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 1264 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1265 mov r1, r3, lsr #8 /* BE:r1 = .456 LE:r1 = .765 */ 1266 strh r1, [r0, #0x05] 1267#ifdef __ARMEB__ 1268 strb r3, [r0, #0x07] 1269 mov r1, r2, lsr #24 /* r1 = ...0 */ 1270 strb r1, [r0] 1271 mov r2, r2, lsl #8 /* r2 = 123. */ 1272 orr r2, r2, r3, lsr #24 /* r2 = 1234 */ 1273 str r2, [r0, #0x01] 1274#else 1275 strb r2, [r0] 1276 mov r1, r3, lsr #24 /* r1 = ...7 */ 1277 strb r1, [r0, #0x07] 1278 mov r2, r2, lsr #8 /* r2 = .321 */ 1279 orr r2, r2, r3, lsl #24 /* r2 = 4321 */ 1280 str r2, [r0, #0x01] 1281#endif 1282 bx lr 1283 LMEMCPY_8_PAD 1284 1285/* 1286 * 1101: dst is 8-bit aligned, src is 8-bit aligned 1287 */ 1288 ldrb r3, [r1] /* r3 = ...0 */ 1289 ldrh r2, [r1, #0x01] /* BE:r2 = ..12 LE:r2 = ..21 */ 1290 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 1291 ldrb r1, [r1, #0x07] /* r1 = ...7 */ 1292 strb r3, [r0] 1293 mov r3, ip, lsr #16 /* BE:r3 = ..34 LE:r3 = ..65 */ 1294#ifdef __ARMEB__ 1295 strh ip, [r0, #0x05] 1296 orr r2, r3, r2, lsl #16 /* r2 = 1234 */ 1297#else 1298 strh r3, [r0, #0x05] 1299 orr r2, r2, ip, lsl #16 /* r2 = 4321 */ 1300#endif 1301 str r2, [r0, #0x01] 1302 strb r1, [r0, #0x07] 1303 bx lr 1304 LMEMCPY_8_PAD 1305 1306/* 1307 * 1110: dst is 8-bit aligned, src is 16-bit aligned 1308 */ 1309 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1310 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1311 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 1312#ifdef __ARMEB__ 1313 mov ip, r2, lsr #8 /* ip = ...0 */ 1314 strb ip, [r0] 1315 mov ip, r2, lsl #24 /* ip = 1... */ 1316 orr ip, ip, r3, lsr #8 /* ip = 1234 */ 1317 strb r1, [r0, #0x07] 1318 mov r1, r1, lsr #8 /* r1 = ...6 */ 1319 orr r1, r1, r3, lsl #8 /* r1 = 3456 */ 1320#else 1321 strb r2, [r0] 1322 mov ip, r2, lsr #8 /* ip = ...1 */ 1323 orr ip, ip, r3, lsl #8 /* ip = 4321 */ 1324 mov r2, r1, lsr #8 /* r2 = ...7 */ 1325 strb r2, [r0, #0x07] 1326 mov r1, r1, lsl #8 /* r1 = .76. */ 1327 orr r1, r1, r3, lsr #24 /* r1 = .765 */ 1328#endif 1329 str ip, [r0, #0x01] 1330 strh r1, [r0, #0x05] 1331 bx lr 1332 LMEMCPY_8_PAD 1333 1334/* 1335 * 1111: dst is 8-bit aligned, src is 8-bit aligned 1336 */ 1337 ldrb r2, [r1] 1338 ldr ip, [r1, #0x01] 1339 ldrh r3, [r1, #0x05] 1340 ldrb r1, [r1, #0x07] 1341 strb r2, [r0] 1342 str ip, [r0, #0x01] 1343 strh r3, [r0, #0x05] 1344 strb r1, [r0, #0x07] 1345 bx lr 1346 LMEMCPY_8_PAD 1347 1348/****************************************************************************** 1349 * Special case for 12 byte copies 1350 */ 1351#define LMEMCPY_C_LOG2 7 /* 128 bytes */ 1352#define LMEMCPY_C_PAD .align LMEMCPY_C_LOG2 1353 LMEMCPY_C_PAD 1354.Lmemcpy_c: 1355 and r2, r1, #0x03 1356 orr r2, r2, r0, lsl #2 1357 ands r2, r2, #0x0f 1358 sub r3, pc, #0x14 1359 addne pc, r3, r2, lsl #LMEMCPY_C_LOG2 1360 1361/* 1362 * 0000: dst is 32-bit aligned, src is 32-bit aligned 1363 */ 1364 ldr r2, [r1] 1365 ldr r3, [r1, #0x04] 1366 ldr r1, [r1, #0x08] 1367 str r2, [r0] 1368 str r3, [r0, #0x04] 1369 str r1, [r0, #0x08] 1370 bx lr 1371 LMEMCPY_C_PAD 1372 1373/* 1374 * 0001: dst is 32-bit aligned, src is 8-bit aligned 1375 */ 1376 ldrb r2, [r1, #0xb] /* r2 = ...B */ 1377 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 1378 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 1379 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 1380#ifdef __ARMEB__ 1381 orr r2, r2, ip, lsl #8 /* r2 = 89AB */ 1382 str r2, [r0, #0x08] 1383 mov r2, ip, lsr #24 /* r2 = ...7 */ 1384 orr r2, r2, r3, lsl #8 /* r2 = 4567 */ 1385 mov r1, r1, lsl #8 /* r1 = 012. */ 1386 orr r1, r1, r3, lsr #24 /* r1 = 0123 */ 1387#else 1388 mov r2, r2, lsl #24 /* r2 = B... */ 1389 orr r2, r2, ip, lsr #8 /* r2 = BA98 */ 1390 str r2, [r0, #0x08] 1391 mov r2, ip, lsl #24 /* r2 = 7... */ 1392 orr r2, r2, r3, lsr #8 /* r2 = 7654 */ 1393 mov r1, r1, lsr #8 /* r1 = .210 */ 1394 orr r1, r1, r3, lsl #24 /* r1 = 3210 */ 1395#endif 1396 str r2, [r0, #0x04] 1397 str r1, [r0] 1398 bx lr 1399 LMEMCPY_C_PAD 1400 1401/* 1402 * 0010: dst is 32-bit aligned, src is 16-bit aligned 1403 */ 1404 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1405 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1406 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 1407 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 1408#ifdef __ARMEB__ 1409 mov r2, r2, lsl #16 /* r2 = 01.. */ 1410 orr r2, r2, r3, lsr #16 /* r2 = 0123 */ 1411 str r2, [r0] 1412 mov r3, r3, lsl #16 /* r3 = 45.. */ 1413 orr r3, r3, ip, lsr #16 /* r3 = 4567 */ 1414 orr r1, r1, ip, lsl #16 /* r1 = 89AB */ 1415#else 1416 orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 1417 str r2, [r0] 1418 mov r3, r3, lsr #16 /* r3 = ..54 */ 1419 orr r3, r3, ip, lsl #16 /* r3 = 7654 */ 1420 mov r1, r1, lsl #16 /* r1 = BA.. */ 1421 orr r1, r1, ip, lsr #16 /* r1 = BA98 */ 1422#endif 1423 str r3, [r0, #0x04] 1424 str r1, [r0, #0x08] 1425 bx lr 1426 LMEMCPY_C_PAD 1427 1428/* 1429 * 0011: dst is 32-bit aligned, src is 8-bit aligned 1430 */ 1431 ldrb r2, [r1] /* r2 = ...0 */ 1432 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 1433 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 1434 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 1435#ifdef __ARMEB__ 1436 mov r2, r2, lsl #24 /* r2 = 0... */ 1437 orr r2, r2, r3, lsr #8 /* r2 = 0123 */ 1438 str r2, [r0] 1439 mov r3, r3, lsl #24 /* r3 = 4... */ 1440 orr r3, r3, ip, lsr #8 /* r3 = 4567 */ 1441 mov r1, r1, lsr #8 /* r1 = .9AB */ 1442 orr r1, r1, ip, lsl #24 /* r1 = 89AB */ 1443#else 1444 orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 1445 str r2, [r0] 1446 mov r3, r3, lsr #24 /* r3 = ...4 */ 1447 orr r3, r3, ip, lsl #8 /* r3 = 7654 */ 1448 mov r1, r1, lsl #8 /* r1 = BA9. */ 1449 orr r1, r1, ip, lsr #24 /* r1 = BA98 */ 1450#endif 1451 str r3, [r0, #0x04] 1452 str r1, [r0, #0x08] 1453 bx lr 1454 LMEMCPY_C_PAD 1455 1456/* 1457 * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned 1458 */ 1459 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1460 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 1461 ldr ip, [r1, #0x08] /* BE:ip = 89AB LE:ip = BA98 */ 1462 mov r1, r2, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 1463 strh r1, [r0, #0x01] 1464#ifdef __ARMEB__ 1465 mov r1, r2, lsr #24 /* r1 = ...0 */ 1466 strb r1, [r0] 1467 mov r1, r2, lsl #24 /* r1 = 3... */ 1468 orr r2, r1, r3, lsr #8 /* r1 = 3456 */ 1469 mov r1, r3, lsl #24 /* r1 = 7... */ 1470 orr r1, r1, ip, lsr #8 /* r1 = 789A */ 1471#else 1472 strb r2, [r0] 1473 mov r1, r2, lsr #24 /* r1 = ...3 */ 1474 orr r2, r1, r3, lsl #8 /* r1 = 6543 */ 1475 mov r1, r3, lsr #24 /* r1 = ...7 */ 1476 orr r1, r1, ip, lsl #8 /* r1 = A987 */ 1477 mov ip, ip, lsr #24 /* ip = ...B */ 1478#endif 1479 str r2, [r0, #0x03] 1480 str r1, [r0, #0x07] 1481 strb ip, [r0, #0x0b] 1482 bx lr 1483 LMEMCPY_C_PAD 1484 1485/* 1486 * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1) 1487 */ 1488 ldrb r2, [r1] 1489 ldrh r3, [r1, #0x01] 1490 ldr ip, [r1, #0x03] 1491 strb r2, [r0] 1492 ldr r2, [r1, #0x07] 1493 ldrb r1, [r1, #0x0b] 1494 strh r3, [r0, #0x01] 1495 str ip, [r0, #0x03] 1496 str r2, [r0, #0x07] 1497 strb r1, [r0, #0x0b] 1498 bx lr 1499 LMEMCPY_C_PAD 1500 1501/* 1502 * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned 1503 */ 1504 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1505 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1506 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 1507 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 1508#ifdef __ARMEB__ 1509 mov r2, r2, ror #8 /* r2 = 1..0 */ 1510 strb r2, [r0] 1511 mov r2, r2, lsr #16 /* r2 = ..1. */ 1512 orr r2, r2, r3, lsr #24 /* r2 = ..12 */ 1513 strh r2, [r0, #0x01] 1514 mov r2, r3, lsl #8 /* r2 = 345. */ 1515 orr r3, r2, ip, lsr #24 /* r3 = 3456 */ 1516 mov r2, ip, lsl #8 /* r2 = 789. */ 1517 orr r2, r2, r1, lsr #8 /* r2 = 789A */ 1518#else 1519 strb r2, [r0] 1520 mov r2, r2, lsr #8 /* r2 = ...1 */ 1521 orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 1522 strh r2, [r0, #0x01] 1523 mov r2, r3, lsr #8 /* r2 = .543 */ 1524 orr r3, r2, ip, lsl #24 /* r3 = 6543 */ 1525 mov r2, ip, lsr #8 /* r2 = .987 */ 1526 orr r2, r2, r1, lsl #24 /* r2 = A987 */ 1527 mov r1, r1, lsr #8 /* r1 = ...B */ 1528#endif 1529 str r3, [r0, #0x03] 1530 str r2, [r0, #0x07] 1531 strb r1, [r0, #0x0b] 1532 bx lr 1533 LMEMCPY_C_PAD 1534 1535/* 1536 * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3) 1537 */ 1538 ldrb r2, [r1] 1539 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 1540 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 1541 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 1542 strb r2, [r0] 1543#ifdef __ARMEB__ 1544 mov r2, r3, lsr #16 /* r2 = ..12 */ 1545 strh r2, [r0, #0x01] 1546 mov r3, r3, lsl #16 /* r3 = 34.. */ 1547 orr r3, r3, ip, lsr #16 /* r3 = 3456 */ 1548 mov ip, ip, lsl #16 /* ip = 78.. */ 1549 orr ip, ip, r1, lsr #16 /* ip = 789A */ 1550 mov r1, r1, lsr #8 /* r1 = .9AB */ 1551#else 1552 strh r3, [r0, #0x01] 1553 mov r3, r3, lsr #16 /* r3 = ..43 */ 1554 orr r3, r3, ip, lsl #16 /* r3 = 6543 */ 1555 mov ip, ip, lsr #16 /* ip = ..87 */ 1556 orr ip, ip, r1, lsl #16 /* ip = A987 */ 1557 mov r1, r1, lsr #16 /* r1 = ..xB */ 1558#endif 1559 str r3, [r0, #0x03] 1560 str ip, [r0, #0x07] 1561 strb r1, [r0, #0x0b] 1562 bx lr 1563 LMEMCPY_C_PAD 1564 1565/* 1566 * 1000: dst is 16-bit aligned, src is 32-bit aligned 1567 */ 1568 ldr ip, [r1] /* BE:ip = 0123 LE:ip = 3210 */ 1569 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 1570 ldr r2, [r1, #0x08] /* BE:r2 = 89AB LE:r2 = BA98 */ 1571 mov r1, ip, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 1572#ifdef __ARMEB__ 1573 strh r1, [r0] 1574 mov r1, ip, lsl #16 /* r1 = 23.. */ 1575 orr r1, r1, r3, lsr #16 /* r1 = 2345 */ 1576 mov r3, r3, lsl #16 /* r3 = 67.. */ 1577 orr r3, r3, r2, lsr #16 /* r3 = 6789 */ 1578#else 1579 strh ip, [r0] 1580 orr r1, r1, r3, lsl #16 /* r1 = 5432 */ 1581 mov r3, r3, lsr #16 /* r3 = ..76 */ 1582 orr r3, r3, r2, lsl #16 /* r3 = 9876 */ 1583 mov r2, r2, lsr #16 /* r2 = ..BA */ 1584#endif 1585 str r1, [r0, #0x02] 1586 str r3, [r0, #0x06] 1587 strh r2, [r0, #0x0a] 1588 bx lr 1589 LMEMCPY_C_PAD 1590 1591/* 1592 * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1) 1593 */ 1594 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1595 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 1596 mov ip, r2, lsr #8 /* BE:ip = .x01 LE:ip = .210 */ 1597 strh ip, [r0] 1598 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 1599 ldrb r1, [r1, #0x0b] /* r1 = ...B */ 1600#ifdef __ARMEB__ 1601 mov r2, r2, lsl #24 /* r2 = 2... */ 1602 orr r2, r2, r3, lsr #8 /* r2 = 2345 */ 1603 mov r3, r3, lsl #24 /* r3 = 6... */ 1604 orr r3, r3, ip, lsr #8 /* r3 = 6789 */ 1605 orr r1, r1, ip, lsl #8 /* r1 = 89AB */ 1606#else 1607 mov r2, r2, lsr #24 /* r2 = ...2 */ 1608 orr r2, r2, r3, lsl #8 /* r2 = 5432 */ 1609 mov r3, r3, lsr #24 /* r3 = ...6 */ 1610 orr r3, r3, ip, lsl #8 /* r3 = 9876 */ 1611 mov r1, r1, lsl #8 /* r1 = ..B. */ 1612 orr r1, r1, ip, lsr #24 /* r1 = ..BA */ 1613#endif 1614 str r2, [r0, #0x02] 1615 str r3, [r0, #0x06] 1616 strh r1, [r0, #0x0a] 1617 bx lr 1618 LMEMCPY_C_PAD 1619 1620/* 1621 * 1010: dst is 16-bit aligned, src is 16-bit aligned 1622 */ 1623 ldrh r2, [r1] 1624 ldr r3, [r1, #0x02] 1625 ldr ip, [r1, #0x06] 1626 ldrh r1, [r1, #0x0a] 1627 strh r2, [r0] 1628 str r3, [r0, #0x02] 1629 str ip, [r0, #0x06] 1630 strh r1, [r0, #0x0a] 1631 bx lr 1632 LMEMCPY_C_PAD 1633 1634/* 1635 * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3) 1636 */ 1637 ldr r2, [r1, #0x09] /* BE:r2 = 9ABx LE:r2 = xBA9 */ 1638 ldr r3, [r1, #0x05] /* BE:r3 = 5678 LE:r3 = 8765 */ 1639 mov ip, r2, lsr #8 /* BE:ip = .9AB LE:ip = .xBA */ 1640 strh ip, [r0, #0x0a] 1641 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 1642 ldrb r1, [r1] /* r1 = ...0 */ 1643#ifdef __ARMEB__ 1644 mov r2, r2, lsr #24 /* r2 = ...9 */ 1645 orr r2, r2, r3, lsl #8 /* r2 = 6789 */ 1646 mov r3, r3, lsr #24 /* r3 = ...5 */ 1647 orr r3, r3, ip, lsl #8 /* r3 = 2345 */ 1648 mov r1, r1, lsl #8 /* r1 = ..0. */ 1649 orr r1, r1, ip, lsr #24 /* r1 = ..01 */ 1650#else 1651 mov r2, r2, lsl #24 /* r2 = 9... */ 1652 orr r2, r2, r3, lsr #8 /* r2 = 9876 */ 1653 mov r3, r3, lsl #24 /* r3 = 5... */ 1654 orr r3, r3, ip, lsr #8 /* r3 = 5432 */ 1655 orr r1, r1, ip, lsl #8 /* r1 = 3210 */ 1656#endif 1657 str r2, [r0, #0x06] 1658 str r3, [r0, #0x02] 1659 strh r1, [r0] 1660 bx lr 1661 LMEMCPY_C_PAD 1662 1663/* 1664 * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned 1665 */ 1666 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1667 ldr ip, [r1, #0x04] /* BE:ip = 4567 LE:ip = 7654 */ 1668 ldr r1, [r1, #0x08] /* BE:r1 = 89AB LE:r1 = BA98 */ 1669#ifdef __ARMEB__ 1670 mov r3, r2, lsr #24 /* r3 = ...0 */ 1671 strb r3, [r0] 1672 mov r2, r2, lsl #8 /* r2 = 123. */ 1673 orr r2, r2, ip, lsr #24 /* r2 = 1234 */ 1674 str r2, [r0, #0x01] 1675 mov r2, ip, lsl #8 /* r2 = 567. */ 1676 orr r2, r2, r1, lsr #24 /* r2 = 5678 */ 1677 str r2, [r0, #0x05] 1678 mov r2, r1, lsr #8 /* r2 = ..9A */ 1679 strh r2, [r0, #0x09] 1680 strb r1, [r0, #0x0b] 1681#else 1682 strb r2, [r0] 1683 mov r3, r2, lsr #8 /* r3 = .321 */ 1684 orr r3, r3, ip, lsl #24 /* r3 = 4321 */ 1685 str r3, [r0, #0x01] 1686 mov r3, ip, lsr #8 /* r3 = .765 */ 1687 orr r3, r3, r1, lsl #24 /* r3 = 8765 */ 1688 str r3, [r0, #0x05] 1689 mov r1, r1, lsr #8 /* r1 = .BA9 */ 1690 strh r1, [r0, #0x09] 1691 mov r1, r1, lsr #16 /* r1 = ...B */ 1692 strb r1, [r0, #0x0b] 1693#endif 1694 bx lr 1695 LMEMCPY_C_PAD 1696 1697/* 1698 * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1) 1699 */ 1700 ldrb r2, [r1, #0x0b] /* r2 = ...B */ 1701 ldr r3, [r1, #0x07] /* BE:r3 = 789A LE:r3 = A987 */ 1702 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 1703 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 1704 strb r2, [r0, #0x0b] 1705#ifdef __ARMEB__ 1706 strh r3, [r0, #0x09] 1707 mov r3, r3, lsr #16 /* r3 = ..78 */ 1708 orr r3, r3, ip, lsl #16 /* r3 = 5678 */ 1709 mov ip, ip, lsr #16 /* ip = ..34 */ 1710 orr ip, ip, r1, lsl #16 /* ip = 1234 */ 1711 mov r1, r1, lsr #16 /* r1 = ..x0 */ 1712#else 1713 mov r2, r3, lsr #16 /* r2 = ..A9 */ 1714 strh r2, [r0, #0x09] 1715 mov r3, r3, lsl #16 /* r3 = 87.. */ 1716 orr r3, r3, ip, lsr #16 /* r3 = 8765 */ 1717 mov ip, ip, lsl #16 /* ip = 43.. */ 1718 orr ip, ip, r1, lsr #16 /* ip = 4321 */ 1719 mov r1, r1, lsr #8 /* r1 = .210 */ 1720#endif 1721 str r3, [r0, #0x05] 1722 str ip, [r0, #0x01] 1723 strb r1, [r0] 1724 bx lr 1725 LMEMCPY_C_PAD 1726 1727/* 1728 * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned 1729 */ 1730#ifdef __ARMEB__ 1731 ldrh r2, [r1, #0x0a] /* r2 = ..AB */ 1732 ldr ip, [r1, #0x06] /* ip = 6789 */ 1733 ldr r3, [r1, #0x02] /* r3 = 2345 */ 1734 ldrh r1, [r1] /* r1 = ..01 */ 1735 strb r2, [r0, #0x0b] 1736 mov r2, r2, lsr #8 /* r2 = ...A */ 1737 orr r2, r2, ip, lsl #8 /* r2 = 789A */ 1738 mov ip, ip, lsr #8 /* ip = .678 */ 1739 orr ip, ip, r3, lsl #24 /* ip = 5678 */ 1740 mov r3, r3, lsr #8 /* r3 = .234 */ 1741 orr r3, r3, r1, lsl #24 /* r3 = 1234 */ 1742 mov r1, r1, lsr #8 /* r1 = ...0 */ 1743 strb r1, [r0] 1744 str r3, [r0, #0x01] 1745 str ip, [r0, #0x05] 1746 strh r2, [r0, #0x09] 1747#else 1748 ldrh r2, [r1] /* r2 = ..10 */ 1749 ldr r3, [r1, #0x02] /* r3 = 5432 */ 1750 ldr ip, [r1, #0x06] /* ip = 9876 */ 1751 ldrh r1, [r1, #0x0a] /* r1 = ..BA */ 1752 strb r2, [r0] 1753 mov r2, r2, lsr #8 /* r2 = ...1 */ 1754 orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 1755 mov r3, r3, lsr #24 /* r3 = ...5 */ 1756 orr r3, r3, ip, lsl #8 /* r3 = 8765 */ 1757 mov ip, ip, lsr #24 /* ip = ...9 */ 1758 orr ip, ip, r1, lsl #8 /* ip = .BA9 */ 1759 mov r1, r1, lsr #8 /* r1 = ...B */ 1760 str r2, [r0, #0x01] 1761 str r3, [r0, #0x05] 1762 strh ip, [r0, #0x09] 1763 strb r1, [r0, #0x0b] 1764#endif 1765 bx lr 1766 LMEMCPY_C_PAD 1767 1768/* 1769 * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3) 1770 */ 1771 ldrb r2, [r1] 1772 ldr r3, [r1, #0x01] 1773 ldr ip, [r1, #0x05] 1774 strb r2, [r0] 1775 ldrh r2, [r1, #0x09] 1776 ldrb r1, [r1, #0x0b] 1777 str r3, [r0, #0x01] 1778 str ip, [r0, #0x05] 1779 strh r2, [r0, #0x09] 1780 strb r1, [r0, #0x0b] 1781 bx lr 1782#endif /* !_STANDALONE */ 1783