1*0a6a1f1dSLionel Sambuc/* $NetBSD: memcpy_arm.S,v 1.5 2013/12/02 21:21:33 joerg Exp $ */ 2b6cbf720SGianluca Guida 3b6cbf720SGianluca Guida/*- 4b6cbf720SGianluca Guida * Copyright (c) 1997 The NetBSD Foundation, Inc. 5b6cbf720SGianluca Guida * All rights reserved. 6b6cbf720SGianluca Guida * 7b6cbf720SGianluca Guida * This code is derived from software contributed to The NetBSD Foundation 8b6cbf720SGianluca Guida * by Neil A. Carson and Mark Brinicombe 9b6cbf720SGianluca Guida * 10b6cbf720SGianluca Guida * Redistribution and use in source and binary forms, with or without 11b6cbf720SGianluca Guida * modification, are permitted provided that the following conditions 12b6cbf720SGianluca Guida * are met: 13b6cbf720SGianluca Guida * 1. Redistributions of source code must retain the above copyright 14b6cbf720SGianluca Guida * notice, this list of conditions and the following disclaimer. 15b6cbf720SGianluca Guida * 2. Redistributions in binary form must reproduce the above copyright 16b6cbf720SGianluca Guida * notice, this list of conditions and the following disclaimer in the 17b6cbf720SGianluca Guida * documentation and/or other materials provided with the distribution. 18b6cbf720SGianluca Guida * 19b6cbf720SGianluca Guida * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20b6cbf720SGianluca Guida * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21b6cbf720SGianluca Guida * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22b6cbf720SGianluca Guida * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23b6cbf720SGianluca Guida * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24b6cbf720SGianluca Guida * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25b6cbf720SGianluca Guida * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26b6cbf720SGianluca Guida * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27b6cbf720SGianluca Guida * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28b6cbf720SGianluca Guida * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29b6cbf720SGianluca Guida * POSSIBILITY OF SUCH DAMAGE. 30b6cbf720SGianluca Guida */ 31b6cbf720SGianluca Guida 32b6cbf720SGianluca Guida#include <machine/asm.h> 33b6cbf720SGianluca Guida 34b6cbf720SGianluca Guida/* 35b6cbf720SGianluca Guida * This is one fun bit of code ... 36b6cbf720SGianluca Guida * Some easy listening music is suggested while trying to understand this 37b6cbf720SGianluca Guida * code e.g. Iron Maiden 38b6cbf720SGianluca Guida * 39b6cbf720SGianluca Guida * For anyone attempting to understand it : 40b6cbf720SGianluca Guida * 41b6cbf720SGianluca Guida * The core code is implemented here with simple stubs for memcpy(). 42b6cbf720SGianluca Guida * 43b6cbf720SGianluca Guida * All local labels are prefixed with Lmemcpy_ 44b6cbf720SGianluca Guida * Following the prefix a label starting f is used in the forward copy code 45b6cbf720SGianluca Guida * while a label using b is used in the backwards copy code 46b6cbf720SGianluca Guida * The source and destination addresses determine whether a forward or 47b6cbf720SGianluca Guida * backward copy is performed. 48b6cbf720SGianluca Guida * Separate bits of code are used to deal with the following situations 49b6cbf720SGianluca Guida * for both the forward and backwards copy. 50b6cbf720SGianluca Guida * unaligned source address 51b6cbf720SGianluca Guida * unaligned destination address 52b6cbf720SGianluca Guida * Separate copy routines are used to produce an optimised result for each 53b6cbf720SGianluca Guida * of these cases. 54b6cbf720SGianluca Guida * The copy code will use LDM/STM instructions to copy up to 32 bytes at 55b6cbf720SGianluca Guida * a time where possible. 56b6cbf720SGianluca Guida * 57b6cbf720SGianluca Guida * Note: r12 (aka ip) can be trashed during the function along with 58b6cbf720SGianluca Guida * r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out. 59b6cbf720SGianluca Guida * Additional registers are preserved prior to use i.e. r4, r5 & lr 60b6cbf720SGianluca Guida * 61b6cbf720SGianluca Guida * Apologies for the state of the comments ;-) 62b6cbf720SGianluca Guida */ 63b6cbf720SGianluca Guida/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */ 64b6cbf720SGianluca GuidaENTRY(memcpy) 65b6cbf720SGianluca Guida /* save leaf functions having to store this away */ 6684d9c625SLionel Sambuc push {r0, lr} /* memcpy() returns dest addr */ 67b6cbf720SGianluca Guida 68b6cbf720SGianluca Guida subs r2, r2, #4 69b6cbf720SGianluca Guida blt .Lmemcpy_l4 /* less than 4 bytes */ 70b6cbf720SGianluca Guida ands r12, r0, #3 71b6cbf720SGianluca Guida bne .Lmemcpy_destul /* oh unaligned destination addr */ 72b6cbf720SGianluca Guida ands r12, r1, #3 73b6cbf720SGianluca Guida bne .Lmemcpy_srcul /* oh unaligned source addr */ 74b6cbf720SGianluca Guida 75b6cbf720SGianluca Guida.Lmemcpy_t8: 76b6cbf720SGianluca Guida /* We have aligned source and destination */ 77b6cbf720SGianluca Guida subs r2, r2, #8 78b6cbf720SGianluca Guida blt .Lmemcpy_l12 /* less than 12 bytes (4 from above) */ 79b6cbf720SGianluca Guida subs r2, r2, #0x14 80b6cbf720SGianluca Guida blt .Lmemcpy_l32 /* less than 32 bytes (12 from above) */ 8184d9c625SLionel Sambuc push {r4} /* borrow r4 */ 82b6cbf720SGianluca Guida 83b6cbf720SGianluca Guida /* blat 32 bytes at a time */ 84b6cbf720SGianluca Guida /* XXX for really big copies perhaps we should use more registers */ 85b6cbf720SGianluca Guida.Lmemcpy_loop32: 86b6cbf720SGianluca Guida ldmia r1!, {r3, r4, r12, lr} 87b6cbf720SGianluca Guida stmia r0!, {r3, r4, r12, lr} 88b6cbf720SGianluca Guida ldmia r1!, {r3, r4, r12, lr} 89b6cbf720SGianluca Guida stmia r0!, {r3, r4, r12, lr} 90b6cbf720SGianluca Guida subs r2, r2, #0x20 91b6cbf720SGianluca Guida bge .Lmemcpy_loop32 92b6cbf720SGianluca Guida 93b6cbf720SGianluca Guida cmn r2, #0x10 9484d9c625SLionel Sambuc ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 9584d9c625SLionel Sambuc stmiage r0!, {r3, r4, r12, lr} 96b6cbf720SGianluca Guida subge r2, r2, #0x10 9784d9c625SLionel Sambuc pop {r4} /* return r4 */ 98b6cbf720SGianluca Guida 99b6cbf720SGianluca Guida.Lmemcpy_l32: 100b6cbf720SGianluca Guida adds r2, r2, #0x14 101b6cbf720SGianluca Guida 102b6cbf720SGianluca Guida /* blat 12 bytes at a time */ 103b6cbf720SGianluca Guida.Lmemcpy_loop12: 10484d9c625SLionel Sambuc ldmiage r1!, {r3, r12, lr} 10584d9c625SLionel Sambuc stmiage r0!, {r3, r12, lr} 10684d9c625SLionel Sambuc subsge r2, r2, #0x0c 107b6cbf720SGianluca Guida bge .Lmemcpy_loop12 108b6cbf720SGianluca Guida 109b6cbf720SGianluca Guida.Lmemcpy_l12: 110b6cbf720SGianluca Guida adds r2, r2, #8 111b6cbf720SGianluca Guida blt .Lmemcpy_l4 112b6cbf720SGianluca Guida 113b6cbf720SGianluca Guida subs r2, r2, #4 114b6cbf720SGianluca Guida ldrlt r3, [r1], #4 115b6cbf720SGianluca Guida strlt r3, [r0], #4 11684d9c625SLionel Sambuc ldmiage r1!, {r3, r12} 11784d9c625SLionel Sambuc stmiage r0!, {r3, r12} 118b6cbf720SGianluca Guida subge r2, r2, #4 119b6cbf720SGianluca Guida 120b6cbf720SGianluca Guida.Lmemcpy_l4: 121b6cbf720SGianluca Guida /* less than 4 bytes to go */ 122b6cbf720SGianluca Guida adds r2, r2, #4 123b6cbf720SGianluca Guida#ifdef __APCS_26_ 12484d9c625SLionel Sambuc ldmiaeq sp!, {r0, pc}^ /* done */ 125b6cbf720SGianluca Guida#else 12684d9c625SLionel Sambuc popeq {r0, pc} /* done */ 127b6cbf720SGianluca Guida#endif 128b6cbf720SGianluca Guida /* copy the crud byte at a time */ 129b6cbf720SGianluca Guida cmp r2, #2 130b6cbf720SGianluca Guida ldrb r3, [r1], #1 131b6cbf720SGianluca Guida strb r3, [r0], #1 13284d9c625SLionel Sambuc ldrbge r3, [r1], #1 13384d9c625SLionel Sambuc strbge r3, [r0], #1 13484d9c625SLionel Sambuc ldrbgt r3, [r1], #1 13584d9c625SLionel Sambuc strbgt r3, [r0], #1 13684d9c625SLionel Sambuc pop {r0, pc} 137b6cbf720SGianluca Guida 138b6cbf720SGianluca Guida /* erg - unaligned destination */ 139b6cbf720SGianluca Guida.Lmemcpy_destul: 140b6cbf720SGianluca Guida rsb r12, r12, #4 141b6cbf720SGianluca Guida cmp r12, #2 142b6cbf720SGianluca Guida 143b6cbf720SGianluca Guida /* align destination with byte copies */ 144b6cbf720SGianluca Guida ldrb r3, [r1], #1 145b6cbf720SGianluca Guida strb r3, [r0], #1 14684d9c625SLionel Sambuc ldrbge r3, [r1], #1 14784d9c625SLionel Sambuc strbge r3, [r0], #1 14884d9c625SLionel Sambuc ldrbgt r3, [r1], #1 14984d9c625SLionel Sambuc strbgt r3, [r0], #1 150b6cbf720SGianluca Guida subs r2, r2, r12 151b6cbf720SGianluca Guida blt .Lmemcpy_l4 /* less the 4 bytes */ 152b6cbf720SGianluca Guida 153b6cbf720SGianluca Guida ands r12, r1, #3 154b6cbf720SGianluca Guida beq .Lmemcpy_t8 /* we have an aligned source */ 155b6cbf720SGianluca Guida 156b6cbf720SGianluca Guida /* erg - unaligned source */ 157b6cbf720SGianluca Guida /* This is where it gets nasty ... */ 158b6cbf720SGianluca Guida.Lmemcpy_srcul: 159b6cbf720SGianluca Guida bic r1, r1, #3 160b6cbf720SGianluca Guida ldr lr, [r1], #4 161b6cbf720SGianluca Guida cmp r12, #2 162b6cbf720SGianluca Guida bgt .Lmemcpy_srcul3 163b6cbf720SGianluca Guida beq .Lmemcpy_srcul2 164b6cbf720SGianluca Guida cmp r2, #0x0c 165b6cbf720SGianluca Guida blt .Lmemcpy_srcul1loop4 166b6cbf720SGianluca Guida sub r2, r2, #0x0c 16784d9c625SLionel Sambuc push {r4, r5} 168b6cbf720SGianluca Guida 169b6cbf720SGianluca Guida.Lmemcpy_srcul1loop16: 170b6cbf720SGianluca Guida#ifdef __ARMEB__ 171b6cbf720SGianluca Guida mov r3, lr, lsl #8 172b6cbf720SGianluca Guida#else 173b6cbf720SGianluca Guida mov r3, lr, lsr #8 174b6cbf720SGianluca Guida#endif 175b6cbf720SGianluca Guida ldmia r1!, {r4, r5, r12, lr} 176b6cbf720SGianluca Guida#ifdef __ARMEB__ 177b6cbf720SGianluca Guida orr r3, r3, r4, lsr #24 178b6cbf720SGianluca Guida mov r4, r4, lsl #8 179b6cbf720SGianluca Guida orr r4, r4, r5, lsr #24 180b6cbf720SGianluca Guida mov r5, r5, lsl #8 181b6cbf720SGianluca Guida orr r5, r5, r12, lsr #24 182b6cbf720SGianluca Guida mov r12, r12, lsl #8 183b6cbf720SGianluca Guida orr r12, r12, lr, lsr #24 184b6cbf720SGianluca Guida#else 185b6cbf720SGianluca Guida orr r3, r3, r4, lsl #24 186b6cbf720SGianluca Guida mov r4, r4, lsr #8 187b6cbf720SGianluca Guida orr r4, r4, r5, lsl #24 188b6cbf720SGianluca Guida mov r5, r5, lsr #8 189b6cbf720SGianluca Guida orr r5, r5, r12, lsl #24 190b6cbf720SGianluca Guida mov r12, r12, lsr #8 191b6cbf720SGianluca Guida orr r12, r12, lr, lsl #24 192b6cbf720SGianluca Guida#endif 193b6cbf720SGianluca Guida stmia r0!, {r3-r5, r12} 194b6cbf720SGianluca Guida subs r2, r2, #0x10 195b6cbf720SGianluca Guida bge .Lmemcpy_srcul1loop16 19684d9c625SLionel Sambuc pop {r4, r5} 197b6cbf720SGianluca Guida adds r2, r2, #0x0c 198b6cbf720SGianluca Guida blt .Lmemcpy_srcul1l4 199b6cbf720SGianluca Guida 200b6cbf720SGianluca Guida.Lmemcpy_srcul1loop4: 201b6cbf720SGianluca Guida#ifdef __ARMEB__ 202b6cbf720SGianluca Guida mov r12, lr, lsl #8 203b6cbf720SGianluca Guida#else 204b6cbf720SGianluca Guida mov r12, lr, lsr #8 205b6cbf720SGianluca Guida#endif 206b6cbf720SGianluca Guida ldr lr, [r1], #4 207b6cbf720SGianluca Guida#ifdef __ARMEB__ 208b6cbf720SGianluca Guida orr r12, r12, lr, lsr #24 209b6cbf720SGianluca Guida#else 210b6cbf720SGianluca Guida orr r12, r12, lr, lsl #24 211b6cbf720SGianluca Guida#endif 212b6cbf720SGianluca Guida str r12, [r0], #4 213b6cbf720SGianluca Guida subs r2, r2, #4 214b6cbf720SGianluca Guida bge .Lmemcpy_srcul1loop4 215b6cbf720SGianluca Guida 216b6cbf720SGianluca Guida.Lmemcpy_srcul1l4: 217b6cbf720SGianluca Guida sub r1, r1, #3 218b6cbf720SGianluca Guida b .Lmemcpy_l4 219b6cbf720SGianluca Guida 220b6cbf720SGianluca Guida.Lmemcpy_srcul2: 221b6cbf720SGianluca Guida cmp r2, #0x0c 222b6cbf720SGianluca Guida blt .Lmemcpy_srcul2loop4 223b6cbf720SGianluca Guida sub r2, r2, #0x0c 22484d9c625SLionel Sambuc push {r4, r5} 225b6cbf720SGianluca Guida 226b6cbf720SGianluca Guida.Lmemcpy_srcul2loop16: 227b6cbf720SGianluca Guida#ifdef __ARMEB__ 228b6cbf720SGianluca Guida mov r3, lr, lsl #16 229b6cbf720SGianluca Guida#else 230b6cbf720SGianluca Guida mov r3, lr, lsr #16 231b6cbf720SGianluca Guida#endif 232b6cbf720SGianluca Guida ldmia r1!, {r4, r5, r12, lr} 233b6cbf720SGianluca Guida#ifdef __ARMEB__ 234b6cbf720SGianluca Guida orr r3, r3, r4, lsr #16 235b6cbf720SGianluca Guida mov r4, r4, lsl #16 236b6cbf720SGianluca Guida orr r4, r4, r5, lsr #16 237b6cbf720SGianluca Guida mov r5, r5, lsl #16 238b6cbf720SGianluca Guida orr r5, r5, r12, lsr #16 239b6cbf720SGianluca Guida mov r12, r12, lsl #16 240b6cbf720SGianluca Guida orr r12, r12, lr, lsr #16 241b6cbf720SGianluca Guida#else 242b6cbf720SGianluca Guida orr r3, r3, r4, lsl #16 243b6cbf720SGianluca Guida mov r4, r4, lsr #16 244b6cbf720SGianluca Guida orr r4, r4, r5, lsl #16 245b6cbf720SGianluca Guida mov r5, r5, lsr #16 246b6cbf720SGianluca Guida orr r5, r5, r12, lsl #16 247b6cbf720SGianluca Guida mov r12, r12, lsr #16 248b6cbf720SGianluca Guida orr r12, r12, lr, lsl #16 249b6cbf720SGianluca Guida#endif 250b6cbf720SGianluca Guida stmia r0!, {r3-r5, r12} 251b6cbf720SGianluca Guida subs r2, r2, #0x10 252b6cbf720SGianluca Guida bge .Lmemcpy_srcul2loop16 25384d9c625SLionel Sambuc pop {r4, r5} 254b6cbf720SGianluca Guida adds r2, r2, #0x0c 255b6cbf720SGianluca Guida blt .Lmemcpy_srcul2l4 256b6cbf720SGianluca Guida 257b6cbf720SGianluca Guida.Lmemcpy_srcul2loop4: 258b6cbf720SGianluca Guida#ifdef __ARMEB__ 259b6cbf720SGianluca Guida mov r12, lr, lsl #16 260b6cbf720SGianluca Guida#else 261b6cbf720SGianluca Guida mov r12, lr, lsr #16 262b6cbf720SGianluca Guida#endif 263b6cbf720SGianluca Guida ldr lr, [r1], #4 264b6cbf720SGianluca Guida#ifdef __ARMEB__ 265b6cbf720SGianluca Guida orr r12, r12, lr, lsr #16 266b6cbf720SGianluca Guida#else 267b6cbf720SGianluca Guida orr r12, r12, lr, lsl #16 268b6cbf720SGianluca Guida#endif 269b6cbf720SGianluca Guida str r12, [r0], #4 270b6cbf720SGianluca Guida subs r2, r2, #4 271b6cbf720SGianluca Guida bge .Lmemcpy_srcul2loop4 272b6cbf720SGianluca Guida 273b6cbf720SGianluca Guida.Lmemcpy_srcul2l4: 274b6cbf720SGianluca Guida sub r1, r1, #2 275b6cbf720SGianluca Guida b .Lmemcpy_l4 276b6cbf720SGianluca Guida 277b6cbf720SGianluca Guida.Lmemcpy_srcul3: 278b6cbf720SGianluca Guida cmp r2, #0x0c 279b6cbf720SGianluca Guida blt .Lmemcpy_srcul3loop4 280b6cbf720SGianluca Guida sub r2, r2, #0x0c 28184d9c625SLionel Sambuc push {r4, r5} 282b6cbf720SGianluca Guida 283b6cbf720SGianluca Guida.Lmemcpy_srcul3loop16: 284b6cbf720SGianluca Guida#ifdef __ARMEB__ 285b6cbf720SGianluca Guida mov r3, lr, lsl #24 286b6cbf720SGianluca Guida#else 287b6cbf720SGianluca Guida mov r3, lr, lsr #24 288b6cbf720SGianluca Guida#endif 289b6cbf720SGianluca Guida ldmia r1!, {r4, r5, r12, lr} 290b6cbf720SGianluca Guida#ifdef __ARMEB__ 291b6cbf720SGianluca Guida orr r3, r3, r4, lsr #8 292b6cbf720SGianluca Guida mov r4, r4, lsl #24 293b6cbf720SGianluca Guida orr r4, r4, r5, lsr #8 294b6cbf720SGianluca Guida mov r5, r5, lsl #24 295b6cbf720SGianluca Guida orr r5, r5, r12, lsr #8 296b6cbf720SGianluca Guida mov r12, r12, lsl #24 297b6cbf720SGianluca Guida orr r12, r12, lr, lsr #8 298b6cbf720SGianluca Guida#else 299b6cbf720SGianluca Guida orr r3, r3, r4, lsl #8 300b6cbf720SGianluca Guida mov r4, r4, lsr #24 301b6cbf720SGianluca Guida orr r4, r4, r5, lsl #8 302b6cbf720SGianluca Guida mov r5, r5, lsr #24 303b6cbf720SGianluca Guida orr r5, r5, r12, lsl #8 304b6cbf720SGianluca Guida mov r12, r12, lsr #24 305b6cbf720SGianluca Guida orr r12, r12, lr, lsl #8 306b6cbf720SGianluca Guida#endif 307b6cbf720SGianluca Guida stmia r0!, {r3-r5, r12} 308b6cbf720SGianluca Guida subs r2, r2, #0x10 309b6cbf720SGianluca Guida bge .Lmemcpy_srcul3loop16 31084d9c625SLionel Sambuc pop {r4, r5} 311b6cbf720SGianluca Guida adds r2, r2, #0x0c 312b6cbf720SGianluca Guida blt .Lmemcpy_srcul3l4 313b6cbf720SGianluca Guida 314b6cbf720SGianluca Guida.Lmemcpy_srcul3loop4: 315b6cbf720SGianluca Guida#ifdef __ARMEB__ 316b6cbf720SGianluca Guida mov r12, lr, lsl #24 317b6cbf720SGianluca Guida#else 318b6cbf720SGianluca Guida mov r12, lr, lsr #24 319b6cbf720SGianluca Guida#endif 320b6cbf720SGianluca Guida ldr lr, [r1], #4 321b6cbf720SGianluca Guida#ifdef __ARMEB__ 322b6cbf720SGianluca Guida orr r12, r12, lr, lsr #8 323b6cbf720SGianluca Guida#else 324b6cbf720SGianluca Guida orr r12, r12, lr, lsl #8 325b6cbf720SGianluca Guida#endif 326b6cbf720SGianluca Guida str r12, [r0], #4 327b6cbf720SGianluca Guida subs r2, r2, #4 328b6cbf720SGianluca Guida bge .Lmemcpy_srcul3loop4 329b6cbf720SGianluca Guida 330b6cbf720SGianluca Guida.Lmemcpy_srcul3l4: 331b6cbf720SGianluca Guida sub r1, r1, #1 332b6cbf720SGianluca Guida b .Lmemcpy_l4 33384d9c625SLionel SambucEND(memcpy) 334