1*84d9c625SLionel Sambuc/* $NetBSD: memset_arm.S,v 1.2 2013/01/14 19:15:13 matt Exp $ */ 2*84d9c625SLionel Sambuc 3*84d9c625SLionel Sambuc/*- 4*84d9c625SLionel Sambuc * Copyright (c) 2012 The NetBSD Foundation, Inc. 5*84d9c625SLionel Sambuc * All rights reserved. 6*84d9c625SLionel Sambuc * 7*84d9c625SLionel Sambuc * This code is derived from software contributed to The NetBSD Foundation 8*84d9c625SLionel Sambuc * by Matt Thomas of 3am Software Foundry. 9*84d9c625SLionel Sambuc * 10*84d9c625SLionel Sambuc * Redistribution and use in source and binary forms, with or without 11*84d9c625SLionel Sambuc * modification, are permitted provided that the following conditions 12*84d9c625SLionel Sambuc * are met: 13*84d9c625SLionel Sambuc * 1. Redistributions of source code must retain the above copyright 14*84d9c625SLionel Sambuc * notice, this list of conditions and the following disclaimer. 15*84d9c625SLionel Sambuc * 2. Redistributions in binary form must reproduce the above copyright 16*84d9c625SLionel Sambuc * notice, this list of conditions and the following disclaimer in the 17*84d9c625SLionel Sambuc * documentation and/or other materials provided with the distribution. 18*84d9c625SLionel Sambuc * 19*84d9c625SLionel Sambuc * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20*84d9c625SLionel Sambuc * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21*84d9c625SLionel Sambuc * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22*84d9c625SLionel Sambuc * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23*84d9c625SLionel Sambuc * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24*84d9c625SLionel Sambuc * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25*84d9c625SLionel Sambuc * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26*84d9c625SLionel Sambuc * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27*84d9c625SLionel Sambuc * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28*84d9c625SLionel Sambuc * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29*84d9c625SLionel Sambuc * POSSIBILITY OF SUCH DAMAGE. 30*84d9c625SLionel Sambuc */ 31*84d9c625SLionel Sambuc#include <machine/asm.h> 32*84d9c625SLionel Sambuc 33*84d9c625SLionel Sambuc#if defined(NEON) 34*84d9c625SLionel Sambuc#define STORE8 vst1.32 {d0}, [ip:64]! 35*84d9c625SLionel Sambuc#define STORE16 vst1.32 {d0-d1}, [ip:64]! 36*84d9c625SLionel Sambuc#define STORE32 vst1.32 {d0-d3}, [ip:64]! 37*84d9c625SLionel Sambuc#elif defined(VFP) 38*84d9c625SLionel Sambuc#define STORE8 vstmia ip!, {d0} 39*84d9c625SLionel Sambuc#define STORE16 vstmia ip!, {d0-d1} 40*84d9c625SLionel Sambuc#define STORE32 vstmia ip!, {d0-d3} 41*84d9c625SLionel Sambuc#elif defined(_ARM_ARCH_DWORD_OK) 42*84d9c625SLionel Sambuc#define STORE8 strd r2, [ip], #8 43*84d9c625SLionel Sambuc#define STORE16 STORE8; STORE8 44*84d9c625SLionel Sambuc#define STORE32 STORE16; STORE16 45*84d9c625SLionel Sambuc#else 46*84d9c625SLionel Sambuc#define STORE8 stmia ip!, {r2,r3} 47*84d9c625SLionel Sambuc#define STORE16 STORE8; STORE8 48*84d9c625SLionel Sambuc#define STORE32 STORE16; STORE16 49*84d9c625SLionel Sambuc#endif 50*84d9c625SLionel Sambuc/* 51*84d9c625SLionel Sambuc * memset: Sets a block of memory to the specified value 52*84d9c625SLionel Sambuc * Using NEON instructions 53*84d9c625SLionel Sambuc * 54*84d9c625SLionel Sambuc * On entry: 55*84d9c625SLionel Sambuc * r0 - dest address 56*84d9c625SLionel Sambuc * r1 - byte to write 57*84d9c625SLionel Sambuc * r2 - number of bytes to write 58*84d9c625SLionel Sambuc * 59*84d9c625SLionel Sambuc * On exit: 60*84d9c625SLionel Sambuc * r0 - dest address 61*84d9c625SLionel Sambuc */ 62*84d9c625SLionel Sambuc/* LINTSTUB: Func: void *memset(void *, int, size_t) */ 63*84d9c625SLionel SambucENTRY(memset) 64*84d9c625SLionel Sambuc ands r3, r1, #0xff /* We deal with bytes */ 65*84d9c625SLionel Sambuc orrne r3, r3, r3, lsl #8 /* replicate to all bytes */ 66*84d9c625SLionel Sambuc orrne r3, r3, r3, lsl #16 /* replicate to all bytes */ 67*84d9c625SLionel Sambuc movs r1, r2 /* we need r2 & r3 */ 68*84d9c625SLionel Sambuc RETc(eq) /* return if length is 0 */ 69*84d9c625SLionel Sambuc mov ip, r0 /* r0 needs to stay the same */ 70*84d9c625SLionel Sambuc 71*84d9c625SLionel Sambuc cmp r1, #12 /* is this a small memset? *? 72*84d9c625SLionel Sambuc blt .Lbyte_by_byte /* then do it byte by byte */ 73*84d9c625SLionel Sambuc 74*84d9c625SLionel Sambuc /* Ok first we will dword align the address */ 75*84d9c625SLionel Sambuc ands r2, ip, #7 /* grab the bottom three bits */ 76*84d9c625SLionel Sambuc beq .Lmemset_dwordaligned /* The addr is dword aligned */ 77*84d9c625SLionel Sambuc 78*84d9c625SLionel Sambuc rsb r2, r2, #8 /* how far until dword aligned? */ 79*84d9c625SLionel Sambuc sub r1, r1, r2 /* subtract it from remaining length */ 80*84d9c625SLionel Sambuc mov r2, r3 /* duplicate fill value */ 81*84d9c625SLionel Sambuc 82*84d9c625SLionel Sambuc tst ip, #1 /* halfword aligned? */ 83*84d9c625SLionel Sambuc strneb r3, [ip], #1 /* no, write a byte */ 84*84d9c625SLionel Sambuc tst ip, #2 /* word aligned? */ 85*84d9c625SLionel Sambuc strneh r3, [ip], #2 /* no, write a halfword */ 86*84d9c625SLionel Sambuc tst ip, #4 /* dword aligned? */ 87*84d9c625SLionel Sambuc strne r3, [ip], #4 /* no, write a word */ 88*84d9c625SLionel Sambuc 89*84d9c625SLionel Sambuc /* We are now doubleword aligned */ 90*84d9c625SLionel Sambuc.Lmemset_dwordaligned: 91*84d9c625SLionel Sambuc#if defined(NEON) 92*84d9c625SLionel Sambuc vdup.8 q0, r3 /* move fill to SIMD */ 93*84d9c625SLionel Sambuc vmov q1, q0 /* put fill in q1 (d2-d3) */ 94*84d9c625SLionel Sambuc#elif defined(VFP) 95*84d9c625SLionel Sambuc mov r2, r3 /* duplicate fill value */ 96*84d9c625SLionel Sambuc vmov d0, r2, r3 /* move to VFP */ 97*84d9c625SLionel Sambuc vmov d1, r2, r3 98*84d9c625SLionel Sambuc vmov d2, r2, r3 99*84d9c625SLionel Sambuc vmov d3, r2, r3 100*84d9c625SLionel Sambuc#endif 101*84d9c625SLionel Sambuc 102*84d9c625SLionel Sambuc#if 1 103*84d9c625SLionel Sambuc cmp r1, #128 104*84d9c625SLionel Sambuc blt .Lmemset_mainloop 105*84d9c625SLionel Sambuc ands r2, ip, #63 /* check for 64-byte alignment */ 106*84d9c625SLionel Sambuc beq .Lmemset_mainloop 107*84d9c625SLionel Sambuc /* 108*84d9c625SLionel Sambuc * Let's align to a 64-byte boundary so that stores don't cross 109*84d9c625SLionel Sambuc * cacheline boundaries. We also know we have at least 128-bytes to 110*84d9c625SLionel Sambuc * copy so we don't have to worry about the length at the moment. 111*84d9c625SLionel Sambuc */ 112*84d9c625SLionel Sambuc rsb r2, r2, #64 /* how many bytes until 64 bytes */ 113*84d9c625SLionel Sambuc sub r1, r1, r2 /* subtract from remaining length */ 114*84d9c625SLionel Sambuc#if !defined(NEON) && !defined(VFP) 115*84d9c625SLionel Sambuc mov r2, r3 /* put fill back in r2 */ 116*84d9c625SLionel Sambuc#endif 117*84d9c625SLionel Sambuc 118*84d9c625SLionel Sambuc tst ip, #8 /* quadword aligned? */ 119*84d9c625SLionel Sambuc beq 1f /* yes */ 120*84d9c625SLionel Sambuc STORE8 /* no, store a dword */ 121*84d9c625SLionel Sambuc1: tst ip, #16 /* octaword aligned? *? 122*84d9c625SLionel Sambuc beq 2f /* yes */ 123*84d9c625SLionel Sambuc STORE16 /* no, store a quadword */ 124*84d9c625SLionel Sambuc2: tst ip, #32 /* 32 word aligned? */ 125*84d9c625SLionel Sambuc beq .Lmemset_mainloop /* yes */ 126*84d9c625SLionel Sambuc STORE32 /* no, make 64-byte aligned */ 127*84d9c625SLionel Sambuc#endif 128*84d9c625SLionel Sambuc 129*84d9c625SLionel Sambuc.Lmemset_mainloop: 130*84d9c625SLionel Sambuc#if !defined(NEON) && !defined(VFP) 131*84d9c625SLionel Sambuc mov r2, r3 /* put fill back in r2 */ 132*84d9c625SLionel Sambuc#endif 133*84d9c625SLionel Sambuc subs r1, r1, #64 /* subtract an initial 64 */ 134*84d9c625SLionel Sambuc blt .Lmemset_lessthan_64bytes 135*84d9c625SLionel Sambuc 136*84d9c625SLionel Sambuc3: STORE32 /* store first octaword */ 137*84d9c625SLionel Sambuc STORE32 /* store second octaword */ 138*84d9c625SLionel Sambuc RETc(eq) /* return if done */ 139*84d9c625SLionel Sambuc subs r1, r1, #64 /* subtract another 64 */ 140*84d9c625SLionel Sambuc bge 3b /* and do other if still >= 0 */ 141*84d9c625SLionel Sambuc.Lmemset_lessthan_64bytes: 142*84d9c625SLionel Sambuc tst r1, #32 /* do we have 16 bytes left? */ 143*84d9c625SLionel Sambuc beq .Lmemset_lessthan_32bytes 144*84d9c625SLionel Sambuc STORE32 /* yes, store an octaword */ 145*84d9c625SLionel Sambuc bics r1, r1, #32 /* subtract 16 */ 146*84d9c625SLionel Sambuc RETc(eq) /* return if length is 0 */ 147*84d9c625SLionel Sambuc.Lmemset_lessthan_32bytes: 148*84d9c625SLionel Sambuc tst r1, #16 /* do we have 16 bytes left? */ 149*84d9c625SLionel Sambuc beq .Lmemset_lessthan_16bytes 150*84d9c625SLionel Sambuc STORE16 /* yes, store a quadword */ 151*84d9c625SLionel Sambuc bics r1, r1, #16 /* subtract 16 */ 152*84d9c625SLionel Sambuc RETc(eq) /* return if length is 0 */ 153*84d9c625SLionel Sambuc.Lmemset_lessthan_16bytes: 154*84d9c625SLionel Sambuc tst r1, #8 /* do we have 8 bytes left? */ 155*84d9c625SLionel Sambuc beq .Lmemset_lessthan_8bytes/* no */ 156*84d9c625SLionel Sambuc STORE8 /* yes, store a dword */ 157*84d9c625SLionel Sambuc bics r1, r1, #8 /* subtract 8 */ 158*84d9c625SLionel Sambuc RETc(eq) /* return if length is 0 */ 159*84d9c625SLionel Sambuc.Lmemset_lessthan_8bytes: 160*84d9c625SLionel Sambuc tst r1, #4 /* do we have a word left? */ 161*84d9c625SLionel Sambuc strne r2, [ip], #4 /* yes, so write one */ 162*84d9c625SLionel Sambuc tst r1, #2 /* do we have a halfword left? */ 163*84d9c625SLionel Sambuc strneh r2, [ip], #2 /* yes, so write one */ 164*84d9c625SLionel Sambuc tst r1, #1 /* do we have a byte left? */ 165*84d9c625SLionel Sambuc strneb r2, [ip], #1 /* yes, so write one */ 166*84d9c625SLionel Sambuc RET /* return */ 167*84d9c625SLionel Sambuc 168*84d9c625SLionel Sambuc.Lbyte_by_byte: 169*84d9c625SLionel Sambuc subs r1, r1, #1 /* can we write a byte? */ 170*84d9c625SLionel Sambuc RETc(lt) /* no, we're done */ 171*84d9c625SLionel Sambuc strb r3, [ip], #1 /* yes, so do it */ 172*84d9c625SLionel Sambuc b .Lbyte_by_byte /* try next byte */ 173*84d9c625SLionel SambucEND(memset) 174