1*c6b709f5Sjsg/* $OpenBSD: memcpy.S,v 1.6 2015/06/08 14:22:05 jsg Exp $ */ 27c0511a1Sdrahn/* $NetBSD: memcpy.S,v 1.2 2001/11/20 00:29:20 chris Exp $ */ 37c0511a1Sdrahn 47c0511a1Sdrahn/*- 57c0511a1Sdrahn * Copyright (c) 1997 The NetBSD Foundation, Inc. 67c0511a1Sdrahn * All rights reserved. 77c0511a1Sdrahn * 87c0511a1Sdrahn * This code is derived from software contributed to The NetBSD Foundation 97c0511a1Sdrahn * by Neil A. Carson and Mark Brinicombe 107c0511a1Sdrahn * 117c0511a1Sdrahn * Redistribution and use in source and binary forms, with or without 127c0511a1Sdrahn * modification, are permitted provided that the following conditions 137c0511a1Sdrahn * are met: 147c0511a1Sdrahn * 1. Redistributions of source code must retain the above copyright 157c0511a1Sdrahn * notice, this list of conditions and the following disclaimer. 167c0511a1Sdrahn * 2. Redistributions in binary form must reproduce the above copyright 177c0511a1Sdrahn * notice, this list of conditions and the following disclaimer in the 187c0511a1Sdrahn * documentation and/or other materials provided with the distribution. 197c0511a1Sdrahn * 207c0511a1Sdrahn * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 217c0511a1Sdrahn * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 227c0511a1Sdrahn * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 237c0511a1Sdrahn * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 247c0511a1Sdrahn * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 257c0511a1Sdrahn * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 267c0511a1Sdrahn * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 277c0511a1Sdrahn * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 287c0511a1Sdrahn * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 297c0511a1Sdrahn * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 307c0511a1Sdrahn * POSSIBILITY OF SUCH DAMAGE. 317c0511a1Sdrahn */ 327c0511a1Sdrahn 337c0511a1Sdrahn#include <machine/asm.h> 347c0511a1Sdrahn 357c0511a1Sdrahn/* 367c0511a1Sdrahn * This is one fun bit of code ... 377c0511a1Sdrahn * Some easy listening music is suggested while trying to understand this 387c0511a1Sdrahn * code e.g. Iron Maiden 397c0511a1Sdrahn * 407c0511a1Sdrahn * For anyone attempting to understand it : 417c0511a1Sdrahn * 427c0511a1Sdrahn * The core code is implemented here with simple stubs for memcpy() 437c0511a1Sdrahn * memmove() and bcopy(). 447c0511a1Sdrahn * 457c0511a1Sdrahn * All local labels are prefixed with Lmemcpy_ 467c0511a1Sdrahn * Following the prefix a label starting f is used in the forward copy code 477c0511a1Sdrahn * while a label using b is used in the backwards copy code 487c0511a1Sdrahn * The source and destination addresses determine whether a forward or 497c0511a1Sdrahn * backward copy is performed. 507c0511a1Sdrahn * Separate bits of code are used to deal with the following situations 517c0511a1Sdrahn * for both the forward and backwards copy. 527c0511a1Sdrahn * unaligned source address 537c0511a1Sdrahn * unaligned destination address 547c0511a1Sdrahn * Separate copy routines are used to produce an optimised result for each 557c0511a1Sdrahn * of these cases. 567c0511a1Sdrahn * The copy code will use LDM/STM instructions to copy up to 32 bytes at 577c0511a1Sdrahn * a time where possible. 587c0511a1Sdrahn * 597c0511a1Sdrahn * Note: r12 (aka ip) can be trashed during the function along with 607c0511a1Sdrahn * r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out. 617c0511a1Sdrahn * Additional registers are preserved prior to use i.e. r4, r5 & lr 627c0511a1Sdrahn * 637c0511a1Sdrahn * Apologies for the state of the comments ;-) 647c0511a1Sdrahn */ 657c0511a1Sdrahn 66*c6b709f5Sjsg.syntax unified 67*c6b709f5Sjsg 687c0511a1SdrahnENTRY(memcpy) 697c0511a1SdrahnENTRY_NP(memmove) 707c0511a1Sdrahn /* Determine copy direction */ 717c0511a1Sdrahn cmp r1, r0 727c0511a1Sdrahn 737c0511a1Sdrahn moveq pc, lr 747c0511a1Sdrahn 757c0511a1Sdrahn /* save leaf functions having to store this away */ 767c0511a1Sdrahn stmdb sp!, {r0, lr} /* memcpy() returns dest addr */ 777c0511a1Sdrahn 787c0511a1Sdrahn bcc Lmemcpy_backwards 797c0511a1Sdrahn 807c0511a1Sdrahn /* start of forwards copy */ 817c0511a1Sdrahn subs r2, r2, #4 827c0511a1Sdrahn blt Lmemcpy_fl4 /* less than 4 bytes */ 837c0511a1Sdrahn ands r12, r0, #3 847c0511a1Sdrahn bne Lmemcpy_fdestul /* oh unaligned destination addr */ 857c0511a1Sdrahn ands r12, r1, #3 867c0511a1Sdrahn bne Lmemcpy_fsrcul /* oh unaligned source addr */ 877c0511a1Sdrahn 887c0511a1SdrahnLmemcpy_ft8: 897c0511a1Sdrahn /* We have aligned source and destination */ 907c0511a1Sdrahn subs r2, r2, #8 917c0511a1Sdrahn blt Lmemcpy_fl12 /* less than 12 bytes (4 from above) */ 927c0511a1Sdrahn subs r2, r2, #0x14 937c0511a1Sdrahn blt Lmemcpy_fl32 /* less than 32 bytes (12 from above) */ 947c0511a1Sdrahn stmdb sp!, {r4} /* borrow r4 */ 957c0511a1Sdrahn 967c0511a1Sdrahn /* blat 32 bytes at a time */ 977c0511a1Sdrahn /* XXX for really big copies perhaps we should use more registers */ 987c0511a1SdrahnLmemcpy_floop32: 997c0511a1Sdrahn ldmia r1!, {r3, r4, r12, lr} 1007c0511a1Sdrahn stmia r0!, {r3, r4, r12, lr} 1017c0511a1Sdrahn ldmia r1!, {r3, r4, r12, lr} 1027c0511a1Sdrahn stmia r0!, {r3, r4, r12, lr} 1037c0511a1Sdrahn subs r2, r2, #0x20 1047c0511a1Sdrahn bge Lmemcpy_floop32 1057c0511a1Sdrahn 1067c0511a1Sdrahn cmn r2, #0x10 107*c6b709f5Sjsg ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 108*c6b709f5Sjsg stmiage r0!, {r3, r4, r12, lr} 1097c0511a1Sdrahn subge r2, r2, #0x10 1107c0511a1Sdrahn ldmia sp!, {r4} /* return r4 */ 1117c0511a1Sdrahn 1127c0511a1SdrahnLmemcpy_fl32: 1137c0511a1Sdrahn adds r2, r2, #0x14 1147c0511a1Sdrahn 1157c0511a1Sdrahn /* blat 12 bytes at a time */ 1167c0511a1SdrahnLmemcpy_floop12: 117*c6b709f5Sjsg ldmiage r1!, {r3, r12, lr} 118*c6b709f5Sjsg stmiage r0!, {r3, r12, lr} 119*c6b709f5Sjsg subsge r2, r2, #0x0c 1207c0511a1Sdrahn bge Lmemcpy_floop12 1217c0511a1Sdrahn 1227c0511a1SdrahnLmemcpy_fl12: 1237c0511a1Sdrahn adds r2, r2, #8 1247c0511a1Sdrahn blt Lmemcpy_fl4 1257c0511a1Sdrahn 1267c0511a1Sdrahn subs r2, r2, #4 1277c0511a1Sdrahn ldrlt r3, [r1], #4 1287c0511a1Sdrahn strlt r3, [r0], #4 129*c6b709f5Sjsg ldmiage r1!, {r3, r12} 130*c6b709f5Sjsg stmiage r0!, {r3, r12} 1317c0511a1Sdrahn subge r2, r2, #4 1327c0511a1Sdrahn 1337c0511a1SdrahnLmemcpy_fl4: 1347c0511a1Sdrahn /* less than 4 bytes to go */ 1357c0511a1Sdrahn adds r2, r2, #4 1367c0511a1Sdrahn#ifdef __APCS_26_ 137*c6b709f5Sjsg ldmiaeq sp!, {r0, pc}^ /* done */ 1387c0511a1Sdrahn#else 139*c6b709f5Sjsg ldmiaeq sp!, {r0, pc} /* done */ 1407c0511a1Sdrahn#endif 1417c0511a1Sdrahn /* copy the crud byte at a time */ 1427c0511a1Sdrahn cmp r2, #2 1437c0511a1Sdrahn ldrb r3, [r1], #1 1447c0511a1Sdrahn strb r3, [r0], #1 145*c6b709f5Sjsg ldrbge r3, [r1], #1 146*c6b709f5Sjsg strbge r3, [r0], #1 147*c6b709f5Sjsg ldrbgt r3, [r1], #1 148*c6b709f5Sjsg strbgt r3, [r0], #1 1497c0511a1Sdrahn ldmia sp!, {r0, pc} 1507c0511a1Sdrahn 1517c0511a1Sdrahn /* erg - unaligned destination */ 1527c0511a1SdrahnLmemcpy_fdestul: 1537c0511a1Sdrahn rsb r12, r12, #4 1547c0511a1Sdrahn cmp r12, #2 1557c0511a1Sdrahn 1567c0511a1Sdrahn /* align destination with byte copies */ 1577c0511a1Sdrahn ldrb r3, [r1], #1 1587c0511a1Sdrahn strb r3, [r0], #1 159*c6b709f5Sjsg ldrbge r3, [r1], #1 160*c6b709f5Sjsg strbge r3, [r0], #1 161*c6b709f5Sjsg ldrbgt r3, [r1], #1 162*c6b709f5Sjsg strbgt r3, [r0], #1 1637c0511a1Sdrahn subs r2, r2, r12 1647c0511a1Sdrahn blt Lmemcpy_fl4 /* less the 4 bytes */ 1657c0511a1Sdrahn 1667c0511a1Sdrahn ands r12, r1, #3 1677c0511a1Sdrahn beq Lmemcpy_ft8 /* we have an aligned source */ 1687c0511a1Sdrahn 1697c0511a1Sdrahn /* erg - unaligned source */ 1707c0511a1Sdrahn /* This is where it gets nasty ... */ 1717c0511a1SdrahnLmemcpy_fsrcul: 1727c0511a1Sdrahn bic r1, r1, #3 1737c0511a1Sdrahn ldr lr, [r1], #4 1747c0511a1Sdrahn cmp r12, #2 1757c0511a1Sdrahn bgt Lmemcpy_fsrcul3 1767c0511a1Sdrahn beq Lmemcpy_fsrcul2 1777c0511a1Sdrahn cmp r2, #0x0c 1787c0511a1Sdrahn blt Lmemcpy_fsrcul1loop4 1797c0511a1Sdrahn sub r2, r2, #0x0c 1807c0511a1Sdrahn stmdb sp!, {r4, r5} 1817c0511a1Sdrahn 1827c0511a1SdrahnLmemcpy_fsrcul1loop16: 1837c0511a1Sdrahn mov r3, lr, lsr #8 1847c0511a1Sdrahn ldmia r1!, {r4, r5, r12, lr} 1857c0511a1Sdrahn orr r3, r3, r4, lsl #24 1867c0511a1Sdrahn mov r4, r4, lsr #8 1877c0511a1Sdrahn orr r4, r4, r5, lsl #24 1887c0511a1Sdrahn mov r5, r5, lsr #8 1897c0511a1Sdrahn orr r5, r5, r12, lsl #24 1907c0511a1Sdrahn mov r12, r12, lsr #8 1917c0511a1Sdrahn orr r12, r12, lr, lsl #24 1927c0511a1Sdrahn stmia r0!, {r3-r5, r12} 1937c0511a1Sdrahn subs r2, r2, #0x10 1947c0511a1Sdrahn bge Lmemcpy_fsrcul1loop16 1957c0511a1Sdrahn ldmia sp!, {r4, r5} 1967c0511a1Sdrahn adds r2, r2, #0x0c 1977c0511a1Sdrahn blt Lmemcpy_fsrcul1l4 1987c0511a1Sdrahn 1997c0511a1SdrahnLmemcpy_fsrcul1loop4: 2007c0511a1Sdrahn mov r12, lr, lsr #8 2017c0511a1Sdrahn ldr lr, [r1], #4 2027c0511a1Sdrahn orr r12, r12, lr, lsl #24 2037c0511a1Sdrahn str r12, [r0], #4 2047c0511a1Sdrahn subs r2, r2, #4 2057c0511a1Sdrahn bge Lmemcpy_fsrcul1loop4 2067c0511a1Sdrahn 2077c0511a1SdrahnLmemcpy_fsrcul1l4: 2087c0511a1Sdrahn sub r1, r1, #3 2097c0511a1Sdrahn b Lmemcpy_fl4 2107c0511a1Sdrahn 2117c0511a1SdrahnLmemcpy_fsrcul2: 2127c0511a1Sdrahn cmp r2, #0x0c 2137c0511a1Sdrahn blt Lmemcpy_fsrcul2loop4 2147c0511a1Sdrahn sub r2, r2, #0x0c 2157c0511a1Sdrahn stmdb sp!, {r4, r5} 2167c0511a1Sdrahn 2177c0511a1SdrahnLmemcpy_fsrcul2loop16: 2187c0511a1Sdrahn mov r3, lr, lsr #16 2197c0511a1Sdrahn ldmia r1!, {r4, r5, r12, lr} 2207c0511a1Sdrahn orr r3, r3, r4, lsl #16 2217c0511a1Sdrahn mov r4, r4, lsr #16 2227c0511a1Sdrahn orr r4, r4, r5, lsl #16 2237c0511a1Sdrahn mov r5, r5, lsr #16 2247c0511a1Sdrahn orr r5, r5, r12, lsl #16 2257c0511a1Sdrahn mov r12, r12, lsr #16 2267c0511a1Sdrahn orr r12, r12, lr, lsl #16 2277c0511a1Sdrahn stmia r0!, {r3-r5, r12} 2287c0511a1Sdrahn subs r2, r2, #0x10 2297c0511a1Sdrahn bge Lmemcpy_fsrcul2loop16 2307c0511a1Sdrahn ldmia sp!, {r4, r5} 2317c0511a1Sdrahn adds r2, r2, #0x0c 2327c0511a1Sdrahn blt Lmemcpy_fsrcul2l4 2337c0511a1Sdrahn 2347c0511a1SdrahnLmemcpy_fsrcul2loop4: 2357c0511a1Sdrahn mov r12, lr, lsr #16 2367c0511a1Sdrahn ldr lr, [r1], #4 2377c0511a1Sdrahn orr r12, r12, lr, lsl #16 2387c0511a1Sdrahn str r12, [r0], #4 2397c0511a1Sdrahn subs r2, r2, #4 2407c0511a1Sdrahn bge Lmemcpy_fsrcul2loop4 2417c0511a1Sdrahn 2427c0511a1SdrahnLmemcpy_fsrcul2l4: 2437c0511a1Sdrahn sub r1, r1, #2 2447c0511a1Sdrahn b Lmemcpy_fl4 2457c0511a1Sdrahn 2467c0511a1SdrahnLmemcpy_fsrcul3: 2477c0511a1Sdrahn cmp r2, #0x0c 2487c0511a1Sdrahn blt Lmemcpy_fsrcul3loop4 2497c0511a1Sdrahn sub r2, r2, #0x0c 2507c0511a1Sdrahn stmdb sp!, {r4, r5} 2517c0511a1Sdrahn 2527c0511a1SdrahnLmemcpy_fsrcul3loop16: 2537c0511a1Sdrahn mov r3, lr, lsr #24 2547c0511a1Sdrahn ldmia r1!, {r4, r5, r12, lr} 2557c0511a1Sdrahn orr r3, r3, r4, lsl #8 2567c0511a1Sdrahn mov r4, r4, lsr #24 2577c0511a1Sdrahn orr r4, r4, r5, lsl #8 2587c0511a1Sdrahn mov r5, r5, lsr #24 2597c0511a1Sdrahn orr r5, r5, r12, lsl #8 2607c0511a1Sdrahn mov r12, r12, lsr #24 2617c0511a1Sdrahn orr r12, r12, lr, lsl #8 2627c0511a1Sdrahn stmia r0!, {r3-r5, r12} 2637c0511a1Sdrahn subs r2, r2, #0x10 2647c0511a1Sdrahn bge Lmemcpy_fsrcul3loop16 2657c0511a1Sdrahn ldmia sp!, {r4, r5} 2667c0511a1Sdrahn adds r2, r2, #0x0c 2677c0511a1Sdrahn blt Lmemcpy_fsrcul3l4 2687c0511a1Sdrahn 2697c0511a1SdrahnLmemcpy_fsrcul3loop4: 2707c0511a1Sdrahn mov r12, lr, lsr #24 2717c0511a1Sdrahn ldr lr, [r1], #4 2727c0511a1Sdrahn orr r12, r12, lr, lsl #8 2737c0511a1Sdrahn str r12, [r0], #4 2747c0511a1Sdrahn subs r2, r2, #4 2757c0511a1Sdrahn bge Lmemcpy_fsrcul3loop4 2767c0511a1Sdrahn 2777c0511a1SdrahnLmemcpy_fsrcul3l4: 2787c0511a1Sdrahn sub r1, r1, #1 2797c0511a1Sdrahn b Lmemcpy_fl4 2807c0511a1Sdrahn 2817c0511a1SdrahnLmemcpy_backwards: 2827c0511a1Sdrahn add r1, r1, r2 2837c0511a1Sdrahn add r0, r0, r2 2847c0511a1Sdrahn subs r2, r2, #4 2857c0511a1Sdrahn blt Lmemcpy_bl4 /* less than 4 bytes */ 2867c0511a1Sdrahn ands r12, r0, #3 2877c0511a1Sdrahn bne Lmemcpy_bdestul /* oh unaligned destination addr */ 2887c0511a1Sdrahn ands r12, r1, #3 2897c0511a1Sdrahn bne Lmemcpy_bsrcul /* oh unaligned source addr */ 2907c0511a1Sdrahn 2917c0511a1SdrahnLmemcpy_bt8: 2927c0511a1Sdrahn /* We have aligned source and destination */ 2937c0511a1Sdrahn subs r2, r2, #8 2947c0511a1Sdrahn blt Lmemcpy_bl12 /* less than 12 bytes (4 from above) */ 2957c0511a1Sdrahn stmdb sp!, {r4} 2967c0511a1Sdrahn subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */ 2977c0511a1Sdrahn blt Lmemcpy_bl32 2987c0511a1Sdrahn 2997c0511a1Sdrahn /* blat 32 bytes at a time */ 3007c0511a1Sdrahn /* XXX for really big copies perhaps we should use more registers */ 3017c0511a1SdrahnLmemcpy_bloop32: 3027c0511a1Sdrahn ldmdb r1!, {r3, r4, r12, lr} 3037c0511a1Sdrahn stmdb r0!, {r3, r4, r12, lr} 3047c0511a1Sdrahn ldmdb r1!, {r3, r4, r12, lr} 3057c0511a1Sdrahn stmdb r0!, {r3, r4, r12, lr} 3067c0511a1Sdrahn subs r2, r2, #0x20 3077c0511a1Sdrahn bge Lmemcpy_bloop32 3087c0511a1Sdrahn 3097c0511a1SdrahnLmemcpy_bl32: 3107c0511a1Sdrahn cmn r2, #0x10 311*c6b709f5Sjsg ldmdbge r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 312*c6b709f5Sjsg stmdbge r0!, {r3, r4, r12, lr} 3137c0511a1Sdrahn subge r2, r2, #0x10 3147c0511a1Sdrahn adds r2, r2, #0x14 315*c6b709f5Sjsg ldmdbge r1!, {r3, r12, lr} /* blat a remaining 12 bytes */ 316*c6b709f5Sjsg stmdbge r0!, {r3, r12, lr} 3177c0511a1Sdrahn subge r2, r2, #0x0c 3187c0511a1Sdrahn ldmia sp!, {r4} 3197c0511a1Sdrahn 3207c0511a1SdrahnLmemcpy_bl12: 3217c0511a1Sdrahn adds r2, r2, #8 3227c0511a1Sdrahn blt Lmemcpy_bl4 3237c0511a1Sdrahn subs r2, r2, #4 3247c0511a1Sdrahn ldrlt r3, [r1, #-4]! 3257c0511a1Sdrahn strlt r3, [r0, #-4]! 326*c6b709f5Sjsg ldmdbge r1!, {r3, r12} 327*c6b709f5Sjsg stmdbge r0!, {r3, r12} 3287c0511a1Sdrahn subge r2, r2, #4 3297c0511a1Sdrahn 3307c0511a1SdrahnLmemcpy_bl4: 3317c0511a1Sdrahn /* less than 4 bytes to go */ 3327c0511a1Sdrahn adds r2, r2, #4 333*c6b709f5Sjsg ldmiaeq sp!, {r0, pc} 3347c0511a1Sdrahn 3357c0511a1Sdrahn /* copy the crud byte at a time */ 3367c0511a1Sdrahn cmp r2, #2 3377c0511a1Sdrahn ldrb r3, [r1, #-1]! 3387c0511a1Sdrahn strb r3, [r0, #-1]! 339*c6b709f5Sjsg ldrbge r3, [r1, #-1]! 340*c6b709f5Sjsg strbge r3, [r0, #-1]! 341*c6b709f5Sjsg ldrbgt r3, [r1, #-1]! 342*c6b709f5Sjsg strbgt r3, [r0, #-1]! 3437c0511a1Sdrahn ldmia sp!, {r0, pc} 3447c0511a1Sdrahn 3457c0511a1Sdrahn /* erg - unaligned destination */ 3467c0511a1SdrahnLmemcpy_bdestul: 3477c0511a1Sdrahn cmp r12, #2 3487c0511a1Sdrahn 3497c0511a1Sdrahn /* align destination with byte copies */ 3507c0511a1Sdrahn ldrb r3, [r1, #-1]! 3517c0511a1Sdrahn strb r3, [r0, #-1]! 352*c6b709f5Sjsg ldrbge r3, [r1, #-1]! 353*c6b709f5Sjsg strbge r3, [r0, #-1]! 354*c6b709f5Sjsg ldrbgt r3, [r1, #-1]! 355*c6b709f5Sjsg strbgt r3, [r0, #-1]! 3567c0511a1Sdrahn subs r2, r2, r12 3577c0511a1Sdrahn blt Lmemcpy_bl4 /* less than 4 bytes to go */ 3587c0511a1Sdrahn ands r12, r1, #3 3597c0511a1Sdrahn beq Lmemcpy_bt8 /* we have an aligned source */ 3607c0511a1Sdrahn 3617c0511a1Sdrahn /* erg - unaligned source */ 3627c0511a1Sdrahn /* This is where it gets nasty ... */ 3637c0511a1SdrahnLmemcpy_bsrcul: 3647c0511a1Sdrahn bic r1, r1, #3 3657c0511a1Sdrahn ldr r3, [r1, #0] 3667c0511a1Sdrahn cmp r12, #2 3677c0511a1Sdrahn blt Lmemcpy_bsrcul1 3687c0511a1Sdrahn beq Lmemcpy_bsrcul2 3697c0511a1Sdrahn cmp r2, #0x0c 3707c0511a1Sdrahn blt Lmemcpy_bsrcul3loop4 3717c0511a1Sdrahn sub r2, r2, #0x0c 3727c0511a1Sdrahn stmdb sp!, {r4, r5} 3737c0511a1Sdrahn 3747c0511a1SdrahnLmemcpy_bsrcul3loop16: 3757c0511a1Sdrahn mov lr, r3, lsl #8 3767c0511a1Sdrahn ldmdb r1!, {r3-r5, r12} 3777c0511a1Sdrahn orr lr, lr, r12, lsr #24 3787c0511a1Sdrahn mov r12, r12, lsl #8 3797c0511a1Sdrahn orr r12, r12, r5, lsr #24 3807c0511a1Sdrahn mov r5, r5, lsl #8 3817c0511a1Sdrahn orr r5, r5, r4, lsr #24 3827c0511a1Sdrahn mov r4, r4, lsl #8 3837c0511a1Sdrahn orr r4, r4, r3, lsr #24 3847c0511a1Sdrahn stmdb r0!, {r4, r5, r12, lr} 3857c0511a1Sdrahn subs r2, r2, #0x10 3867c0511a1Sdrahn bge Lmemcpy_bsrcul3loop16 3877c0511a1Sdrahn ldmia sp!, {r4, r5} 3887c0511a1Sdrahn adds r2, r2, #0x0c 3897c0511a1Sdrahn blt Lmemcpy_bsrcul3l4 3907c0511a1Sdrahn 3917c0511a1SdrahnLmemcpy_bsrcul3loop4: 3927c0511a1Sdrahn mov r12, r3, lsl #8 3937c0511a1Sdrahn ldr r3, [r1, #-4]! 3947c0511a1Sdrahn orr r12, r12, r3, lsr #24 3957c0511a1Sdrahn str r12, [r0, #-4]! 3967c0511a1Sdrahn subs r2, r2, #4 3977c0511a1Sdrahn bge Lmemcpy_bsrcul3loop4 3987c0511a1Sdrahn 3997c0511a1SdrahnLmemcpy_bsrcul3l4: 4007c0511a1Sdrahn add r1, r1, #3 4017c0511a1Sdrahn b Lmemcpy_bl4 4027c0511a1Sdrahn 4037c0511a1SdrahnLmemcpy_bsrcul2: 4047c0511a1Sdrahn cmp r2, #0x0c 4057c0511a1Sdrahn blt Lmemcpy_bsrcul2loop4 4067c0511a1Sdrahn sub r2, r2, #0x0c 4077c0511a1Sdrahn stmdb sp!, {r4, r5} 4087c0511a1Sdrahn 4097c0511a1SdrahnLmemcpy_bsrcul2loop16: 4107c0511a1Sdrahn mov lr, r3, lsl #16 4117c0511a1Sdrahn ldmdb r1!, {r3-r5, r12} 4127c0511a1Sdrahn orr lr, lr, r12, lsr #16 4137c0511a1Sdrahn mov r12, r12, lsl #16 4147c0511a1Sdrahn orr r12, r12, r5, lsr #16 4157c0511a1Sdrahn mov r5, r5, lsl #16 4167c0511a1Sdrahn orr r5, r5, r4, lsr #16 4177c0511a1Sdrahn mov r4, r4, lsl #16 4187c0511a1Sdrahn orr r4, r4, r3, lsr #16 4197c0511a1Sdrahn stmdb r0!, {r4, r5, r12, lr} 4207c0511a1Sdrahn subs r2, r2, #0x10 4217c0511a1Sdrahn bge Lmemcpy_bsrcul2loop16 4227c0511a1Sdrahn ldmia sp!, {r4, r5} 4237c0511a1Sdrahn adds r2, r2, #0x0c 4247c0511a1Sdrahn blt Lmemcpy_bsrcul2l4 4257c0511a1Sdrahn 4267c0511a1SdrahnLmemcpy_bsrcul2loop4: 4277c0511a1Sdrahn mov r12, r3, lsl #16 4287c0511a1Sdrahn ldr r3, [r1, #-4]! 4297c0511a1Sdrahn orr r12, r12, r3, lsr #16 4307c0511a1Sdrahn str r12, [r0, #-4]! 4317c0511a1Sdrahn subs r2, r2, #4 4327c0511a1Sdrahn bge Lmemcpy_bsrcul2loop4 4337c0511a1Sdrahn 4347c0511a1SdrahnLmemcpy_bsrcul2l4: 4357c0511a1Sdrahn add r1, r1, #2 4367c0511a1Sdrahn b Lmemcpy_bl4 4377c0511a1Sdrahn 4387c0511a1SdrahnLmemcpy_bsrcul1: 4397c0511a1Sdrahn cmp r2, #0x0c 4407c0511a1Sdrahn blt Lmemcpy_bsrcul1loop4 4417c0511a1Sdrahn sub r2, r2, #0x0c 4427c0511a1Sdrahn stmdb sp!, {r4, r5} 4437c0511a1Sdrahn 4447c0511a1SdrahnLmemcpy_bsrcul1loop32: 4457c0511a1Sdrahn mov lr, r3, lsl #24 4467c0511a1Sdrahn ldmdb r1!, {r3-r5, r12} 4477c0511a1Sdrahn orr lr, lr, r12, lsr #8 4487c0511a1Sdrahn mov r12, r12, lsl #24 4497c0511a1Sdrahn orr r12, r12, r5, lsr #8 4507c0511a1Sdrahn mov r5, r5, lsl #24 4517c0511a1Sdrahn orr r5, r5, r4, lsr #8 4527c0511a1Sdrahn mov r4, r4, lsl #24 4537c0511a1Sdrahn orr r4, r4, r3, lsr #8 4547c0511a1Sdrahn stmdb r0!, {r4, r5, r12, lr} 4557c0511a1Sdrahn subs r2, r2, #0x10 4567c0511a1Sdrahn bge Lmemcpy_bsrcul1loop32 4577c0511a1Sdrahn ldmia sp!, {r4, r5} 4587c0511a1Sdrahn adds r2, r2, #0x0c 4597c0511a1Sdrahn blt Lmemcpy_bsrcul1l4 4607c0511a1Sdrahn 4617c0511a1SdrahnLmemcpy_bsrcul1loop4: 4627c0511a1Sdrahn mov r12, r3, lsl #24 4637c0511a1Sdrahn ldr r3, [r1, #-4]! 4647c0511a1Sdrahn orr r12, r12, r3, lsr #8 4657c0511a1Sdrahn str r12, [r0, #-4]! 4667c0511a1Sdrahn subs r2, r2, #4 4677c0511a1Sdrahn bge Lmemcpy_bsrcul1loop4 4687c0511a1Sdrahn 4697c0511a1SdrahnLmemcpy_bsrcul1l4: 4707c0511a1Sdrahn add r1, r1, #1 4717c0511a1Sdrahn b Lmemcpy_bl4 4727c0511a1Sdrahn 473