1*b83ebebaSscole/* $NetBSD: bcopy.S,v 1.1 2016/08/05 15:06:02 scole Exp $ */ 2*b83ebebaSscole 3*b83ebebaSscole/*- 4*b83ebebaSscole * Copyright (c) 2000 Doug Rabson 5*b83ebebaSscole * All rights reserved. 6*b83ebebaSscole * 7*b83ebebaSscole * Redistribution and use in source and binary forms, with or without 8*b83ebebaSscole * modification, are permitted provided that the following conditions 9*b83ebebaSscole * are met: 10*b83ebebaSscole * 1. Redistributions of source code must retain the above copyright 11*b83ebebaSscole * notice, this list of conditions and the following disclaimer. 12*b83ebebaSscole * 2. Redistributions in binary form must reproduce the above copyright 13*b83ebebaSscole * notice, this list of conditions and the following disclaimer in the 14*b83ebebaSscole * documentation and/or other materials provided with the distribution. 15*b83ebebaSscole * 16*b83ebebaSscole * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17*b83ebebaSscole * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18*b83ebebaSscole * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19*b83ebebaSscole * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20*b83ebebaSscole * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21*b83ebebaSscole * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22*b83ebebaSscole * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23*b83ebebaSscole * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24*b83ebebaSscole * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25*b83ebebaSscole * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26*b83ebebaSscole * SUCH DAMAGE. 27*b83ebebaSscole * 28*b83ebebaSscole * __FBSDID("$FreeBSD: releng/10.1/lib/libc/ia64/string/bcopy.S 125669 2004-02-10 20:45:28Z cperciva $"); 29*b83ebebaSscole */ 30*b83ebebaSscole 31*b83ebebaSscole#include <machine/asm.h> 32*b83ebebaSscole 33*b83ebebaSscole/* 34*b83ebebaSscole * void 35*b83ebebaSscole * bcopy(const void *src, void *dst, size_t len); 36*b83ebebaSscole */ 37*b83ebebaSscole 38*b83ebebaSscole/* 39*b83ebebaSscole * Not the fastest bcopy in the world. 40*b83ebebaSscole */ 41*b83ebebaSscoleENTRY(bcopy, 3) 42*b83ebebaSscole 43*b83ebebaSscole cmp.le p6,p0=in2,r0 // bail if len <= 0 44*b83ebebaSscole(p6) br.ret.spnt.few rp 45*b83ebebaSscole 46*b83ebebaSscole sub r14=in1,in0 ;; // check for overlap 47*b83ebebaSscole cmp.ltu p6,p0=r14,in2 // dst-src < len 48*b83ebebaSscole(p6) br.cond.spnt.few 5f 49*b83ebebaSscole 50*b83ebebaSscole extr.u r14=in0,0,3 // src & 7 51*b83ebebaSscole extr.u r15=in1,0,3 ;; // dst & 7 52*b83ebebaSscole cmp.eq p6,p0=r14,r15 // different alignment? 53*b83ebebaSscole(p6) br.cond.spnt.few 2f // branch if same alignment 54*b83ebebaSscole 55*b83ebebaSscole1: ld1 r14=[in0],1 ;; // copy bytewise 56*b83ebebaSscole st1 [in1]=r14,1 57*b83ebebaSscole add in2=-1,in2 ;; // len-- 58*b83ebebaSscole cmp.ne p6,p0=r0,in2 59*b83ebebaSscole(p6) br.cond.dptk.few 1b // loop 60*b83ebebaSscole br.ret.sptk.few rp // done 61*b83ebebaSscole 62*b83ebebaSscole2: cmp.eq p6,p0=r14,r0 // aligned? 63*b83ebebaSscole(p6) br.cond.sptk.few 4f 64*b83ebebaSscole 65*b83ebebaSscole3: ld1 r14=[in0],1 ;; // copy bytewise 66*b83ebebaSscole st1 [in1]=r14,1 67*b83ebebaSscole extr.u r15=in0,0,3 // src & 7 68*b83ebebaSscole add in2=-1,in2 ;; // len-- 69*b83ebebaSscole cmp.eq p6,p0=r0,in2 // done? 70*b83ebebaSscole cmp.eq p7,p0=r0,r15 ;; // aligned now? 71*b83ebebaSscole(p6) br.ret.spnt.few rp // return if done 72*b83ebebaSscole(p7) br.cond.spnt.few 4f // go to main copy 73*b83ebebaSscole br.cond.sptk.few 3b // more bytes to copy 74*b83ebebaSscole 75*b83ebebaSscole // At this point, in2 is non-zero 76*b83ebebaSscole 77*b83ebebaSscole4: mov r14=8 ;; 78*b83ebebaSscole cmp.ltu p6,p0=in2,r14 ;; // len < 8? 79*b83ebebaSscole(p6) br.cond.spnt.few 1b // byte copy the end 80*b83ebebaSscole ld8 r15=[in0],8 ;; // copy word 81*b83ebebaSscole st8 [in1]=r15,8 82*b83ebebaSscole add in2=-8,in2 ;; // len -= 8 83*b83ebebaSscole cmp.ne p6,p0=r0,in2 // done? 84*b83ebebaSscole(p6) br.cond.spnt.few 4b // again 85*b83ebebaSscole 86*b83ebebaSscole br.ret.sptk.few rp // return 87*b83ebebaSscole 88*b83ebebaSscole // Don't bother optimising overlap case 89*b83ebebaSscole 90*b83ebebaSscole5: add in0=in0,in2 91*b83ebebaSscole add in1=in1,in2 ;; 92*b83ebebaSscole add in0=-1,in0 93*b83ebebaSscole add in1=-1,in1 ;; 94*b83ebebaSscole 95*b83ebebaSscole6: ld1 r14=[in0],-1 ;; 96*b83ebebaSscole st1 [in1]=r14,-1 97*b83ebebaSscole add in2=-1,in2 ;; 98*b83ebebaSscole cmp.ne p6,p0=r0,in2 99*b83ebebaSscole(p6) br.cond.spnt.few 6b 100*b83ebebaSscole 101*b83ebebaSscole br.ret.sptk.few rp 102*b83ebebaSscoleEND(bcopy) 103