1*8ead0783Sguenther/* $OpenBSD: _memcpy.S,v 1.7 2017/10/29 02:21:33 guenther Exp $ */ 2d987040fSdrahn/* $NetBSD: _memcpy.S,v 1.4 2003/04/05 23:08:52 bjh21 Exp $ */ 3d987040fSdrahn 4d987040fSdrahn/*- 5d987040fSdrahn * Copyright (c) 1997 The NetBSD Foundation, Inc. 6d987040fSdrahn * All rights reserved. 7d987040fSdrahn * 8d987040fSdrahn * This code is derived from software contributed to The NetBSD Foundation 9d987040fSdrahn * by Neil A. Carson and Mark Brinicombe 10d987040fSdrahn * 11d987040fSdrahn * Redistribution and use in source and binary forms, with or without 12d987040fSdrahn * modification, are permitted provided that the following conditions 13d987040fSdrahn * are met: 14d987040fSdrahn * 1. Redistributions of source code must retain the above copyright 15d987040fSdrahn * notice, this list of conditions and the following disclaimer. 16d987040fSdrahn * 2. Redistributions in binary form must reproduce the above copyright 17d987040fSdrahn * notice, this list of conditions and the following disclaimer in the 18d987040fSdrahn * documentation and/or other materials provided with the distribution. 19d987040fSdrahn * 20d987040fSdrahn * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21d987040fSdrahn * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22d987040fSdrahn * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23d987040fSdrahn * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24d987040fSdrahn * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25d987040fSdrahn * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26d987040fSdrahn * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27d987040fSdrahn * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28d987040fSdrahn * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29d987040fSdrahn * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30d987040fSdrahn * POSSIBILITY OF SUCH DAMAGE. 31d987040fSdrahn */ 32d987040fSdrahn 3338848718Sguenther#include "DEFS.h" 34d987040fSdrahn 35d987040fSdrahn/* 36d987040fSdrahn * This is one fun bit of code ... 37d987040fSdrahn * Some easy listening music is suggested while trying to understand this 38d987040fSdrahn * code e.g. Iron Maiden 39d987040fSdrahn * 40d987040fSdrahn * For anyone attempting to understand it : 41d987040fSdrahn * 42d987040fSdrahn * The core code is implemented here with simple stubs for memcpy() 43d987040fSdrahn * memmove() and bcopy(). 44d987040fSdrahn * 45d987040fSdrahn * All local labels are prefixed with Lmemcpy_ 46d987040fSdrahn * Following the prefix a label starting f is used in the forward copy code 47d987040fSdrahn * while a label using b is used in the backwards copy code 48d987040fSdrahn * The source and destination addresses determine whether a forward or 49d987040fSdrahn * backward copy is performed. 50d987040fSdrahn * Separate bits of code are used to deal with the following situations 51d987040fSdrahn * for both the forward and backwards copy. 52d987040fSdrahn * unaligned source address 53d987040fSdrahn * unaligned destination address 54d987040fSdrahn * Separate copy routines are used to produce an optimised result for each 55d987040fSdrahn * of these cases. 56d987040fSdrahn * The copy code will use LDM/STM instructions to copy up to 32 bytes at 57d987040fSdrahn * a time where possible. 58d987040fSdrahn * 59d987040fSdrahn * Note: r12 (aka ip) can be trashed during the function along with 60d987040fSdrahn * r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out. 61d987040fSdrahn * Additional registers are preserved prior to use i.e. r4, r5 & lr 62d987040fSdrahn * 63d987040fSdrahn * Apologies for the state of the comments ;-) 64d987040fSdrahn */ 65d987040fSdrahn 66c6b709f5Sjsg.syntax unified 67c6b709f5Sjsg 68*8ead0783Sguenther.hidden _memcpy 69*8ead0783Sguenther 70d987040fSdrahnENTRY(_memcpy) 71d987040fSdrahn /* Determine copy direction */ 72d987040fSdrahn cmp r1, r0 73d987040fSdrahn bcc .Lmemcpy_backwards 74d987040fSdrahn 75d987040fSdrahn moveq r0, #0 /* Quick abort for len=0 */ 76d987040fSdrahn moveq pc, lr 77d987040fSdrahn 78d987040fSdrahn stmdb sp!, {r0, lr} /* memcpy() returns dest addr */ 79d987040fSdrahn subs r2, r2, #4 80d987040fSdrahn blt .Lmemcpy_fl4 /* less than 4 bytes */ 81d987040fSdrahn ands r12, r0, #3 82d987040fSdrahn bne .Lmemcpy_fdestul /* oh unaligned destination addr */ 83d987040fSdrahn ands r12, r1, #3 84d987040fSdrahn bne .Lmemcpy_fsrcul /* oh unaligned source addr */ 85d987040fSdrahn 86d987040fSdrahn.Lmemcpy_ft8: 87d987040fSdrahn /* We have aligned source and destination */ 88d987040fSdrahn subs r2, r2, #8 89d987040fSdrahn blt .Lmemcpy_fl12 /* less than 12 bytes (4 from above) */ 90d987040fSdrahn subs r2, r2, #0x14 91d987040fSdrahn blt .Lmemcpy_fl32 /* less than 32 bytes (12 from above) */ 92d987040fSdrahn stmdb sp!, {r4} /* borrow r4 */ 93d987040fSdrahn 94d987040fSdrahn /* blat 32 bytes at a time */ 95d987040fSdrahn /* XXX for really big copies perhaps we should use more registers */ 96d987040fSdrahn.Lmemcpy_floop32: 97d987040fSdrahn ldmia r1!, {r3, r4, r12, lr} 98d987040fSdrahn stmia r0!, {r3, r4, r12, lr} 99d987040fSdrahn ldmia r1!, {r3, r4, r12, lr} 100d987040fSdrahn stmia r0!, {r3, r4, r12, lr} 101d987040fSdrahn subs r2, r2, #0x20 102d987040fSdrahn bge .Lmemcpy_floop32 103d987040fSdrahn 104d987040fSdrahn cmn r2, #0x10 105c6b709f5Sjsg ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 106c6b709f5Sjsg stmiage r0!, {r3, r4, r12, lr} 107d987040fSdrahn subge r2, r2, #0x10 108d987040fSdrahn ldmia sp!, {r4} /* return r4 */ 109d987040fSdrahn 110d987040fSdrahn.Lmemcpy_fl32: 111d987040fSdrahn adds r2, r2, #0x14 112d987040fSdrahn 113d987040fSdrahn /* blat 12 bytes at a time */ 114d987040fSdrahn.Lmemcpy_floop12: 115c6b709f5Sjsg ldmiage r1!, {r3, r12, lr} 116c6b709f5Sjsg stmiage r0!, {r3, r12, lr} 117c6b709f5Sjsg subsge r2, r2, #0x0c 118d987040fSdrahn bge .Lmemcpy_floop12 119d987040fSdrahn 120d987040fSdrahn.Lmemcpy_fl12: 121d987040fSdrahn adds r2, r2, #8 122d987040fSdrahn blt .Lmemcpy_fl4 123d987040fSdrahn 124d987040fSdrahn subs r2, r2, #4 125d987040fSdrahn ldrlt r3, [r1], #4 126d987040fSdrahn strlt r3, [r0], #4 127c6b709f5Sjsg ldmiage r1!, {r3, r12} 128c6b709f5Sjsg stmiage r0!, {r3, r12} 129d987040fSdrahn subge r2, r2, #4 130d987040fSdrahn 131d987040fSdrahn.Lmemcpy_fl4: 132d987040fSdrahn /* less than 4 bytes to go */ 133d987040fSdrahn adds r2, r2, #4 134c6b709f5Sjsg ldmiaeq sp!, {r0, pc} /* done */ 135d987040fSdrahn 136d987040fSdrahn /* copy the crud byte at a time */ 137d987040fSdrahn cmp r2, #2 138d987040fSdrahn ldrb r3, [r1], #1 139d987040fSdrahn strb r3, [r0], #1 140c6b709f5Sjsg ldrbge r3, [r1], #1 141c6b709f5Sjsg strbge r3, [r0], #1 142c6b709f5Sjsg ldrbgt r3, [r1], #1 143c6b709f5Sjsg strbgt r3, [r0], #1 144d987040fSdrahn ldmia sp!, {r0, pc} 145d987040fSdrahn 146d987040fSdrahn /* erg - unaligned destination */ 147d987040fSdrahn.Lmemcpy_fdestul: 148d987040fSdrahn rsb r12, r12, #4 149d987040fSdrahn cmp r12, #2 150d987040fSdrahn 151d987040fSdrahn /* align destination with byte copies */ 152d987040fSdrahn ldrb r3, [r1], #1 153d987040fSdrahn strb r3, [r0], #1 154c6b709f5Sjsg ldrbge r3, [r1], #1 155c6b709f5Sjsg strbge r3, [r0], #1 156c6b709f5Sjsg ldrbgt r3, [r1], #1 157c6b709f5Sjsg strbgt r3, [r0], #1 158d987040fSdrahn subs r2, r2, r12 159d987040fSdrahn blt .Lmemcpy_fl4 /* less the 4 bytes */ 160d987040fSdrahn 161d987040fSdrahn ands r12, r1, #3 162d987040fSdrahn beq .Lmemcpy_ft8 /* we have an aligned source */ 163d987040fSdrahn 164d987040fSdrahn /* erg - unaligned source */ 165d987040fSdrahn /* This is where it gets nasty ... */ 166d987040fSdrahn.Lmemcpy_fsrcul: 167d987040fSdrahn bic r1, r1, #3 168d987040fSdrahn ldr lr, [r1], #4 169d987040fSdrahn cmp r12, #2 170d987040fSdrahn bgt .Lmemcpy_fsrcul3 171d987040fSdrahn beq .Lmemcpy_fsrcul2 172d987040fSdrahn cmp r2, #0x0c 173d987040fSdrahn blt .Lmemcpy_fsrcul1loop4 174d987040fSdrahn sub r2, r2, #0x0c 175d987040fSdrahn stmdb sp!, {r4, r5} 176d987040fSdrahn 177d987040fSdrahn.Lmemcpy_fsrcul1loop16: 178d987040fSdrahn mov r3, lr, lsr #8 179d987040fSdrahn ldmia r1!, {r4, r5, r12, lr} 180d987040fSdrahn orr r3, r3, r4, lsl #24 181d987040fSdrahn mov r4, r4, lsr #8 182d987040fSdrahn orr r4, r4, r5, lsl #24 183d987040fSdrahn mov r5, r5, lsr #8 184d987040fSdrahn orr r5, r5, r12, lsl #24 185d987040fSdrahn mov r12, r12, lsr #8 186d987040fSdrahn orr r12, r12, lr, lsl #24 187d987040fSdrahn stmia r0!, {r3-r5, r12} 188d987040fSdrahn subs r2, r2, #0x10 189d987040fSdrahn bge .Lmemcpy_fsrcul1loop16 190d987040fSdrahn ldmia sp!, {r4, r5} 191d987040fSdrahn adds r2, r2, #0x0c 192d987040fSdrahn blt .Lmemcpy_fsrcul1l4 193d987040fSdrahn 194d987040fSdrahn.Lmemcpy_fsrcul1loop4: 195d987040fSdrahn mov r12, lr, lsr #8 196d987040fSdrahn ldr lr, [r1], #4 197d987040fSdrahn orr r12, r12, lr, lsl #24 198d987040fSdrahn str r12, [r0], #4 199d987040fSdrahn subs r2, r2, #4 200d987040fSdrahn bge .Lmemcpy_fsrcul1loop4 201d987040fSdrahn 202d987040fSdrahn.Lmemcpy_fsrcul1l4: 203d987040fSdrahn sub r1, r1, #3 204d987040fSdrahn b .Lmemcpy_fl4 205d987040fSdrahn 206d987040fSdrahn.Lmemcpy_fsrcul2: 207d987040fSdrahn cmp r2, #0x0c 208d987040fSdrahn blt .Lmemcpy_fsrcul2loop4 209d987040fSdrahn sub r2, r2, #0x0c 210d987040fSdrahn stmdb sp!, {r4, r5} 211d987040fSdrahn 212d987040fSdrahn.Lmemcpy_fsrcul2loop16: 213d987040fSdrahn mov r3, lr, lsr #16 214d987040fSdrahn ldmia r1!, {r4, r5, r12, lr} 215d987040fSdrahn orr r3, r3, r4, lsl #16 216d987040fSdrahn mov r4, r4, lsr #16 217d987040fSdrahn orr r4, r4, r5, lsl #16 218d987040fSdrahn mov r5, r5, lsr #16 219d987040fSdrahn orr r5, r5, r12, lsl #16 220d987040fSdrahn mov r12, r12, lsr #16 221d987040fSdrahn orr r12, r12, lr, lsl #16 222d987040fSdrahn stmia r0!, {r3-r5, r12} 223d987040fSdrahn subs r2, r2, #0x10 224d987040fSdrahn bge .Lmemcpy_fsrcul2loop16 225d987040fSdrahn ldmia sp!, {r4, r5} 226d987040fSdrahn adds r2, r2, #0x0c 227d987040fSdrahn blt .Lmemcpy_fsrcul2l4 228d987040fSdrahn 229d987040fSdrahn.Lmemcpy_fsrcul2loop4: 230d987040fSdrahn mov r12, lr, lsr #16 231d987040fSdrahn ldr lr, [r1], #4 232d987040fSdrahn orr r12, r12, lr, lsl #16 233d987040fSdrahn str r12, [r0], #4 234d987040fSdrahn subs r2, r2, #4 235d987040fSdrahn bge .Lmemcpy_fsrcul2loop4 236d987040fSdrahn 237d987040fSdrahn.Lmemcpy_fsrcul2l4: 238d987040fSdrahn sub r1, r1, #2 239d987040fSdrahn b .Lmemcpy_fl4 240d987040fSdrahn 241d987040fSdrahn.Lmemcpy_fsrcul3: 242d987040fSdrahn cmp r2, #0x0c 243d987040fSdrahn blt .Lmemcpy_fsrcul3loop4 244d987040fSdrahn sub r2, r2, #0x0c 245d987040fSdrahn stmdb sp!, {r4, r5} 246d987040fSdrahn 247d987040fSdrahn.Lmemcpy_fsrcul3loop16: 248d987040fSdrahn mov r3, lr, lsr #24 249d987040fSdrahn ldmia r1!, {r4, r5, r12, lr} 250d987040fSdrahn orr r3, r3, r4, lsl #8 251d987040fSdrahn mov r4, r4, lsr #24 252d987040fSdrahn orr r4, r4, r5, lsl #8 253d987040fSdrahn mov r5, r5, lsr #24 254d987040fSdrahn orr r5, r5, r12, lsl #8 255d987040fSdrahn mov r12, r12, lsr #24 256d987040fSdrahn orr r12, r12, lr, lsl #8 257d987040fSdrahn stmia r0!, {r3-r5, r12} 258d987040fSdrahn subs r2, r2, #0x10 259d987040fSdrahn bge .Lmemcpy_fsrcul3loop16 260d987040fSdrahn ldmia sp!, {r4, r5} 261d987040fSdrahn adds r2, r2, #0x0c 262d987040fSdrahn blt .Lmemcpy_fsrcul3l4 263d987040fSdrahn 264d987040fSdrahn.Lmemcpy_fsrcul3loop4: 265d987040fSdrahn mov r12, lr, lsr #24 266d987040fSdrahn ldr lr, [r1], #4 267d987040fSdrahn orr r12, r12, lr, lsl #8 268d987040fSdrahn str r12, [r0], #4 269d987040fSdrahn subs r2, r2, #4 270d987040fSdrahn bge .Lmemcpy_fsrcul3loop4 271d987040fSdrahn 272d987040fSdrahn.Lmemcpy_fsrcul3l4: 273d987040fSdrahn sub r1, r1, #1 274d987040fSdrahn b .Lmemcpy_fl4 275d987040fSdrahn 276d987040fSdrahn.Lmemcpy_backwards: 277d987040fSdrahn add r1, r1, r2 278d987040fSdrahn add r0, r0, r2 279d987040fSdrahn subs r2, r2, #4 280d987040fSdrahn blt .Lmemcpy_bl4 /* less than 4 bytes */ 281d987040fSdrahn ands r12, r0, #3 282d987040fSdrahn bne .Lmemcpy_bdestul /* oh unaligned destination addr */ 283d987040fSdrahn ands r12, r1, #3 284d987040fSdrahn bne .Lmemcpy_bsrcul /* oh unaligned source addr */ 285d987040fSdrahn 286d987040fSdrahn.Lmemcpy_bt8: 287d987040fSdrahn /* We have aligned source and destination */ 288d987040fSdrahn subs r2, r2, #8 289d987040fSdrahn blt .Lmemcpy_bl12 /* less than 12 bytes (4 from above) */ 290d987040fSdrahn stmdb sp!, {r4, lr} 291d987040fSdrahn subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */ 292d987040fSdrahn blt .Lmemcpy_bl32 293d987040fSdrahn 294d987040fSdrahn /* blat 32 bytes at a time */ 295d987040fSdrahn /* XXX for really big copies perhaps we should use more registers */ 296d987040fSdrahn.Lmemcpy_bloop32: 297d987040fSdrahn ldmdb r1!, {r3, r4, r12, lr} 298d987040fSdrahn stmdb r0!, {r3, r4, r12, lr} 299d987040fSdrahn ldmdb r1!, {r3, r4, r12, lr} 300d987040fSdrahn stmdb r0!, {r3, r4, r12, lr} 301d987040fSdrahn subs r2, r2, #0x20 302d987040fSdrahn bge .Lmemcpy_bloop32 303d987040fSdrahn 304d987040fSdrahn.Lmemcpy_bl32: 305d987040fSdrahn cmn r2, #0x10 306c6b709f5Sjsg ldmdbge r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 307c6b709f5Sjsg stmdbge r0!, {r3, r4, r12, lr} 308d987040fSdrahn subge r2, r2, #0x10 309d987040fSdrahn adds r2, r2, #0x14 310c6b709f5Sjsg ldmdbge r1!, {r3, r12, lr} /* blat a remaining 12 bytes */ 311c6b709f5Sjsg stmdbge r0!, {r3, r12, lr} 312d987040fSdrahn subge r2, r2, #0x0c 313d987040fSdrahn ldmia sp!, {r4, lr} 314d987040fSdrahn 315d987040fSdrahn.Lmemcpy_bl12: 316d987040fSdrahn adds r2, r2, #8 317d987040fSdrahn blt .Lmemcpy_bl4 318d987040fSdrahn subs r2, r2, #4 319d987040fSdrahn ldrlt r3, [r1, #-4]! 320d987040fSdrahn strlt r3, [r0, #-4]! 321c6b709f5Sjsg ldmdbge r1!, {r3, r12} 322c6b709f5Sjsg stmdbge r0!, {r3, r12} 323d987040fSdrahn subge r2, r2, #4 324d987040fSdrahn 325d987040fSdrahn.Lmemcpy_bl4: 326d987040fSdrahn /* less than 4 bytes to go */ 327d987040fSdrahn adds r2, r2, #4 328d987040fSdrahn moveq pc, lr /* done */ 329d987040fSdrahn 330d987040fSdrahn /* copy the crud byte at a time */ 331d987040fSdrahn cmp r2, #2 332d987040fSdrahn ldrb r3, [r1, #-1]! 333d987040fSdrahn strb r3, [r0, #-1]! 334c6b709f5Sjsg ldrbge r3, [r1, #-1]! 335c6b709f5Sjsg strbge r3, [r0, #-1]! 336c6b709f5Sjsg ldrbgt r3, [r1, #-1]! 337c6b709f5Sjsg strbgt r3, [r0, #-1]! 338d987040fSdrahn mov pc, lr 339d987040fSdrahn 340d987040fSdrahn /* erg - unaligned destination */ 341d987040fSdrahn.Lmemcpy_bdestul: 342d987040fSdrahn cmp r12, #2 343d987040fSdrahn 344d987040fSdrahn /* align destination with byte copies */ 345d987040fSdrahn ldrb r3, [r1, #-1]! 346d987040fSdrahn strb r3, [r0, #-1]! 347c6b709f5Sjsg ldrbge r3, [r1, #-1]! 348c6b709f5Sjsg strbge r3, [r0, #-1]! 349c6b709f5Sjsg ldrbgt r3, [r1, #-1]! 350c6b709f5Sjsg strbgt r3, [r0, #-1]! 351d987040fSdrahn subs r2, r2, r12 352d987040fSdrahn blt .Lmemcpy_bl4 /* less than 4 bytes to go */ 353d987040fSdrahn ands r12, r1, #3 354d987040fSdrahn beq .Lmemcpy_bt8 /* we have an aligned source */ 355d987040fSdrahn 356d987040fSdrahn /* erg - unaligned source */ 357d987040fSdrahn /* This is where it gets nasty ... */ 358d987040fSdrahn.Lmemcpy_bsrcul: 359d987040fSdrahn bic r1, r1, #3 360d987040fSdrahn ldr r3, [r1, #0] 361d987040fSdrahn cmp r12, #2 362d987040fSdrahn blt .Lmemcpy_bsrcul1 363d987040fSdrahn beq .Lmemcpy_bsrcul2 364d987040fSdrahn cmp r2, #0x0c 365d987040fSdrahn blt .Lmemcpy_bsrcul3loop4 366d987040fSdrahn sub r2, r2, #0x0c 367d987040fSdrahn stmdb sp!, {r4, r5, lr} 368d987040fSdrahn 369d987040fSdrahn.Lmemcpy_bsrcul3loop16: 370d987040fSdrahn mov lr, r3, lsl #8 371d987040fSdrahn ldmdb r1!, {r3-r5, r12} 372d987040fSdrahn orr lr, lr, r12, lsr #24 373d987040fSdrahn mov r12, r12, lsl #8 374d987040fSdrahn orr r12, r12, r5, lsr #24 375d987040fSdrahn mov r5, r5, lsl #8 376d987040fSdrahn orr r5, r5, r4, lsr #24 377d987040fSdrahn mov r4, r4, lsl #8 378d987040fSdrahn orr r4, r4, r3, lsr #24 379d987040fSdrahn stmdb r0!, {r4, r5, r12, lr} 380d987040fSdrahn subs r2, r2, #0x10 381d987040fSdrahn bge .Lmemcpy_bsrcul3loop16 382d987040fSdrahn ldmia sp!, {r4, r5, lr} 383d987040fSdrahn adds r2, r2, #0x0c 384d987040fSdrahn blt .Lmemcpy_bsrcul3l4 385d987040fSdrahn 386d987040fSdrahn.Lmemcpy_bsrcul3loop4: 387d987040fSdrahn mov r12, r3, lsl #8 388d987040fSdrahn ldr r3, [r1, #-4]! 389d987040fSdrahn orr r12, r12, r3, lsr #24 390d987040fSdrahn str r12, [r0, #-4]! 391d987040fSdrahn subs r2, r2, #4 392d987040fSdrahn bge .Lmemcpy_bsrcul3loop4 393d987040fSdrahn 394d987040fSdrahn.Lmemcpy_bsrcul3l4: 395d987040fSdrahn add r1, r1, #3 396d987040fSdrahn b .Lmemcpy_bl4 397d987040fSdrahn 398d987040fSdrahn.Lmemcpy_bsrcul2: 399d987040fSdrahn cmp r2, #0x0c 400d987040fSdrahn blt .Lmemcpy_bsrcul2loop4 401d987040fSdrahn sub r2, r2, #0x0c 402d987040fSdrahn stmdb sp!, {r4, r5, lr} 403d987040fSdrahn 404d987040fSdrahn.Lmemcpy_bsrcul2loop16: 405d987040fSdrahn mov lr, r3, lsl #16 406d987040fSdrahn ldmdb r1!, {r3-r5, r12} 407d987040fSdrahn orr lr, lr, r12, lsr #16 408d987040fSdrahn mov r12, r12, lsl #16 409d987040fSdrahn orr r12, r12, r5, lsr #16 410d987040fSdrahn mov r5, r5, lsl #16 411d987040fSdrahn orr r5, r5, r4, lsr #16 412d987040fSdrahn mov r4, r4, lsl #16 413d987040fSdrahn orr r4, r4, r3, lsr #16 414d987040fSdrahn stmdb r0!, {r4, r5, r12, lr} 415d987040fSdrahn subs r2, r2, #0x10 416d987040fSdrahn bge .Lmemcpy_bsrcul2loop16 417d987040fSdrahn ldmia sp!, {r4, r5, lr} 418d987040fSdrahn adds r2, r2, #0x0c 419d987040fSdrahn blt .Lmemcpy_bsrcul2l4 420d987040fSdrahn 421d987040fSdrahn.Lmemcpy_bsrcul2loop4: 422d987040fSdrahn mov r12, r3, lsl #16 423d987040fSdrahn ldr r3, [r1, #-4]! 424d987040fSdrahn orr r12, r12, r3, lsr #16 425d987040fSdrahn str r12, [r0, #-4]! 426d987040fSdrahn subs r2, r2, #4 427d987040fSdrahn bge .Lmemcpy_bsrcul2loop4 428d987040fSdrahn 429d987040fSdrahn.Lmemcpy_bsrcul2l4: 430d987040fSdrahn add r1, r1, #2 431d987040fSdrahn b .Lmemcpy_bl4 432d987040fSdrahn 433d987040fSdrahn.Lmemcpy_bsrcul1: 434d987040fSdrahn cmp r2, #0x0c 435d987040fSdrahn blt .Lmemcpy_bsrcul1loop4 436d987040fSdrahn sub r2, r2, #0x0c 437d987040fSdrahn stmdb sp!, {r4, r5, lr} 438d987040fSdrahn 439d987040fSdrahn.Lmemcpy_bsrcul1loop32: 440d987040fSdrahn mov lr, r3, lsl #24 441d987040fSdrahn ldmdb r1!, {r3-r5, r12} 442d987040fSdrahn orr lr, lr, r12, lsr #8 443d987040fSdrahn mov r12, r12, lsl #24 444d987040fSdrahn orr r12, r12, r5, lsr #8 445d987040fSdrahn mov r5, r5, lsl #24 446d987040fSdrahn orr r5, r5, r4, lsr #8 447d987040fSdrahn mov r4, r4, lsl #24 448d987040fSdrahn orr r4, r4, r3, lsr #8 449d987040fSdrahn stmdb r0!, {r4, r5, r12, lr} 450d987040fSdrahn subs r2, r2, #0x10 451d987040fSdrahn bge .Lmemcpy_bsrcul1loop32 452d987040fSdrahn ldmia sp!, {r4, r5, lr} 453d987040fSdrahn adds r2, r2, #0x0c 454d987040fSdrahn blt .Lmemcpy_bsrcul1l4 455d987040fSdrahn 456d987040fSdrahn.Lmemcpy_bsrcul1loop4: 457d987040fSdrahn mov r12, r3, lsl #24 458d987040fSdrahn ldr r3, [r1, #-4]! 459d987040fSdrahn orr r12, r12, r3, lsr #8 460d987040fSdrahn str r12, [r0, #-4]! 461d987040fSdrahn subs r2, r2, #4 462d987040fSdrahn bge .Lmemcpy_bsrcul1loop4 463d987040fSdrahn 464d987040fSdrahn.Lmemcpy_bsrcul1l4: 465d987040fSdrahn add r1, r1, #1 466d987040fSdrahn b .Lmemcpy_bl4 467*8ead0783SguentherEND(_memcpy) 468