1*b6cbf720SGianluca Guida/* $NetBSD: bcopy.S,v 1.1 2005/12/20 19:28:49 christos Exp $ */ 2*b6cbf720SGianluca Guida 3*b6cbf720SGianluca Guida/* 4*b6cbf720SGianluca Guida * Copyright (c) 1995 Carnegie-Mellon University. 5*b6cbf720SGianluca Guida * All rights reserved. 6*b6cbf720SGianluca Guida * 7*b6cbf720SGianluca Guida * Author: Trevor Blackwell. Support for use as memcpy() and memmove() 8*b6cbf720SGianluca Guida * added by Chris Demetriou. 9*b6cbf720SGianluca Guida * 10*b6cbf720SGianluca Guida * Permission to use, copy, modify and distribute this software and 11*b6cbf720SGianluca Guida * its documentation is hereby granted, provided that both the copyright 12*b6cbf720SGianluca Guida * notice and this permission notice appear in all copies of the 13*b6cbf720SGianluca Guida * software, derivative works or modified versions, and any portions 14*b6cbf720SGianluca Guida * thereof, and that both notices appear in supporting documentation. 15*b6cbf720SGianluca Guida * 16*b6cbf720SGianluca Guida * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 17*b6cbf720SGianluca Guida * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 18*b6cbf720SGianluca Guida * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 19*b6cbf720SGianluca Guida * 20*b6cbf720SGianluca Guida * Carnegie Mellon requests users of this software to return to 21*b6cbf720SGianluca Guida * 22*b6cbf720SGianluca Guida * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 23*b6cbf720SGianluca Guida * School of Computer Science 24*b6cbf720SGianluca Guida * Carnegie Mellon University 25*b6cbf720SGianluca Guida * Pittsburgh PA 15213-3890 26*b6cbf720SGianluca Guida * 27*b6cbf720SGianluca Guida * any improvements or extensions that they make and grant Carnegie the 28*b6cbf720SGianluca Guida * rights to redistribute these changes. 29*b6cbf720SGianluca Guida */ 30*b6cbf720SGianluca Guida 31*b6cbf720SGianluca Guida#include <machine/asm.h> 32*b6cbf720SGianluca Guida 33*b6cbf720SGianluca Guida#if defined(MEMCOPY) || defined(MEMMOVE) 34*b6cbf720SGianluca Guida#ifdef MEMCOPY 35*b6cbf720SGianluca Guida#define FUNCTION memcpy 36*b6cbf720SGianluca Guida#else 37*b6cbf720SGianluca Guida#define FUNCTION memmove 38*b6cbf720SGianluca Guida#endif 39*b6cbf720SGianluca Guida#define SRCREG a1 40*b6cbf720SGianluca Guida#define DSTREG a0 41*b6cbf720SGianluca Guida#else /* !(defined(MEMCOPY) || defined(MEMMOVE)) */ 42*b6cbf720SGianluca Guida#define FUNCTION bcopy 43*b6cbf720SGianluca Guida#define SRCREG a0 44*b6cbf720SGianluca Guida#define DSTREG a1 45*b6cbf720SGianluca Guida#endif /* !(defined(MEMCOPY) || defined(MEMMOVE)) */ 46*b6cbf720SGianluca Guida 47*b6cbf720SGianluca Guida#define SIZEREG a2 48*b6cbf720SGianluca Guida 49*b6cbf720SGianluca Guida/* 50*b6cbf720SGianluca Guida * Copy bytes. 51*b6cbf720SGianluca Guida * 52*b6cbf720SGianluca Guida * void bcopy(char *from, char *to, size_t len); 53*b6cbf720SGianluca Guida * char *memcpy(void *to, const void *from, size_t len); 54*b6cbf720SGianluca Guida * char *memmove(void *to, const void *from, size_t len); 55*b6cbf720SGianluca Guida * 56*b6cbf720SGianluca Guida * No matter how invoked, the source and destination registers 57*b6cbf720SGianluca Guida * for calculation. There's no point in copying them to "working" 58*b6cbf720SGianluca Guida * registers, since the code uses their values "in place," and 59*b6cbf720SGianluca Guida * copying them would be slower. 60*b6cbf720SGianluca Guida */ 61*b6cbf720SGianluca Guida 62*b6cbf720SGianluca GuidaLEAF(FUNCTION,3) 63*b6cbf720SGianluca Guida 64*b6cbf720SGianluca Guida#if defined(MEMCOPY) || defined(MEMMOVE) 65*b6cbf720SGianluca Guida /* set up return value, while we still can */ 66*b6cbf720SGianluca Guida mov DSTREG,v0 67*b6cbf720SGianluca Guida#endif 68*b6cbf720SGianluca Guida 69*b6cbf720SGianluca Guida /* Check for negative length */ 70*b6cbf720SGianluca Guida ble SIZEREG,bcopy_done 71*b6cbf720SGianluca Guida 72*b6cbf720SGianluca Guida /* Check for overlap */ 73*b6cbf720SGianluca Guida subq DSTREG,SRCREG,t5 74*b6cbf720SGianluca Guida cmpult t5,SIZEREG,t5 75*b6cbf720SGianluca Guida bne t5,bcopy_overlap 76*b6cbf720SGianluca Guida 77*b6cbf720SGianluca Guida /* a3 = end address */ 78*b6cbf720SGianluca Guida addq SRCREG,SIZEREG,a3 79*b6cbf720SGianluca Guida 80*b6cbf720SGianluca Guida /* Get the first word */ 81*b6cbf720SGianluca Guida ldq_u t2,0(SRCREG) 82*b6cbf720SGianluca Guida 83*b6cbf720SGianluca Guida /* Do they have the same alignment? */ 84*b6cbf720SGianluca Guida xor SRCREG,DSTREG,t0 85*b6cbf720SGianluca Guida and t0,7,t0 86*b6cbf720SGianluca Guida and DSTREG,7,t1 87*b6cbf720SGianluca Guida bne t0,bcopy_different_alignment 88*b6cbf720SGianluca Guida 89*b6cbf720SGianluca Guida /* src & dst have same alignment */ 90*b6cbf720SGianluca Guida beq t1,bcopy_all_aligned 91*b6cbf720SGianluca Guida 92*b6cbf720SGianluca Guida ldq_u t3,0(DSTREG) 93*b6cbf720SGianluca Guida addq SIZEREG,t1,SIZEREG 94*b6cbf720SGianluca Guida mskqh t2,SRCREG,t2 95*b6cbf720SGianluca Guida mskql t3,SRCREG,t3 96*b6cbf720SGianluca Guida or t2,t3,t2 97*b6cbf720SGianluca Guida 98*b6cbf720SGianluca Guida /* Dst is 8-byte aligned */ 99*b6cbf720SGianluca Guida 100*b6cbf720SGianluca Guidabcopy_all_aligned: 101*b6cbf720SGianluca Guida /* If less than 8 bytes,skip loop */ 102*b6cbf720SGianluca Guida subq SIZEREG,1,t0 103*b6cbf720SGianluca Guida and SIZEREG,7,SIZEREG 104*b6cbf720SGianluca Guida bic t0,7,t0 105*b6cbf720SGianluca Guida beq t0,bcopy_samealign_lp_end 106*b6cbf720SGianluca Guida 107*b6cbf720SGianluca Guidabcopy_samealign_lp: 108*b6cbf720SGianluca Guida stq_u t2,0(DSTREG) 109*b6cbf720SGianluca Guida addq DSTREG,8,DSTREG 110*b6cbf720SGianluca Guida ldq_u t2,8(SRCREG) 111*b6cbf720SGianluca Guida subq t0,8,t0 112*b6cbf720SGianluca Guida addq SRCREG,8,SRCREG 113*b6cbf720SGianluca Guida bne t0,bcopy_samealign_lp 114*b6cbf720SGianluca Guida 115*b6cbf720SGianluca Guidabcopy_samealign_lp_end: 116*b6cbf720SGianluca Guida /* If we're done, exit */ 117*b6cbf720SGianluca Guida bne SIZEREG,bcopy_small_left 118*b6cbf720SGianluca Guida stq_u t2,0(DSTREG) 119*b6cbf720SGianluca Guida RET 120*b6cbf720SGianluca Guida 121*b6cbf720SGianluca Guidabcopy_small_left: 122*b6cbf720SGianluca Guida mskql t2,SIZEREG,t4 123*b6cbf720SGianluca Guida ldq_u t3,0(DSTREG) 124*b6cbf720SGianluca Guida mskqh t3,SIZEREG,t3 125*b6cbf720SGianluca Guida or t4,t3,t4 126*b6cbf720SGianluca Guida stq_u t4,0(DSTREG) 127*b6cbf720SGianluca Guida RET 128*b6cbf720SGianluca Guida 129*b6cbf720SGianluca Guidabcopy_different_alignment: 130*b6cbf720SGianluca Guida /* 131*b6cbf720SGianluca Guida * this is the fun part 132*b6cbf720SGianluca Guida */ 133*b6cbf720SGianluca Guida addq SRCREG,SIZEREG,a3 134*b6cbf720SGianluca Guida cmpule SIZEREG,8,t0 135*b6cbf720SGianluca Guida bne t0,bcopy_da_finish 136*b6cbf720SGianluca Guida 137*b6cbf720SGianluca Guida beq t1,bcopy_da_noentry 138*b6cbf720SGianluca Guida 139*b6cbf720SGianluca Guida /* Do the initial partial word */ 140*b6cbf720SGianluca Guida subq zero,DSTREG,t0 141*b6cbf720SGianluca Guida and t0,7,t0 142*b6cbf720SGianluca Guida ldq_u t3,7(SRCREG) 143*b6cbf720SGianluca Guida extql t2,SRCREG,t2 144*b6cbf720SGianluca Guida extqh t3,SRCREG,t3 145*b6cbf720SGianluca Guida or t2,t3,t5 146*b6cbf720SGianluca Guida insql t5,DSTREG,t5 147*b6cbf720SGianluca Guida ldq_u t6,0(DSTREG) 148*b6cbf720SGianluca Guida mskql t6,DSTREG,t6 149*b6cbf720SGianluca Guida or t5,t6,t5 150*b6cbf720SGianluca Guida stq_u t5,0(DSTREG) 151*b6cbf720SGianluca Guida addq SRCREG,t0,SRCREG 152*b6cbf720SGianluca Guida addq DSTREG,t0,DSTREG 153*b6cbf720SGianluca Guida subq SIZEREG,t0,SIZEREG 154*b6cbf720SGianluca Guida ldq_u t2,0(SRCREG) 155*b6cbf720SGianluca Guida 156*b6cbf720SGianluca Guidabcopy_da_noentry: 157*b6cbf720SGianluca Guida subq SIZEREG,1,t0 158*b6cbf720SGianluca Guida bic t0,7,t0 159*b6cbf720SGianluca Guida and SIZEREG,7,SIZEREG 160*b6cbf720SGianluca Guida beq t0,bcopy_da_finish2 161*b6cbf720SGianluca Guida 162*b6cbf720SGianluca Guidabcopy_da_lp: 163*b6cbf720SGianluca Guida ldq_u t3,7(SRCREG) 164*b6cbf720SGianluca Guida addq SRCREG,8,SRCREG 165*b6cbf720SGianluca Guida extql t2,SRCREG,t4 166*b6cbf720SGianluca Guida extqh t3,SRCREG,t5 167*b6cbf720SGianluca Guida subq t0,8,t0 168*b6cbf720SGianluca Guida or t4,t5,t5 169*b6cbf720SGianluca Guida stq t5,0(DSTREG) 170*b6cbf720SGianluca Guida addq DSTREG,8,DSTREG 171*b6cbf720SGianluca Guida beq t0,bcopy_da_finish1 172*b6cbf720SGianluca Guida ldq_u t2,7(SRCREG) 173*b6cbf720SGianluca Guida addq SRCREG,8,SRCREG 174*b6cbf720SGianluca Guida extql t3,SRCREG,t4 175*b6cbf720SGianluca Guida extqh t2,SRCREG,t5 176*b6cbf720SGianluca Guida subq t0,8,t0 177*b6cbf720SGianluca Guida or t4,t5,t5 178*b6cbf720SGianluca Guida stq t5,0(DSTREG) 179*b6cbf720SGianluca Guida addq DSTREG,8,DSTREG 180*b6cbf720SGianluca Guida bne t0,bcopy_da_lp 181*b6cbf720SGianluca Guida 182*b6cbf720SGianluca Guidabcopy_da_finish2: 183*b6cbf720SGianluca Guida /* Do the last new word */ 184*b6cbf720SGianluca Guida mov t2,t3 185*b6cbf720SGianluca Guida 186*b6cbf720SGianluca Guidabcopy_da_finish1: 187*b6cbf720SGianluca Guida /* Do the last partial word */ 188*b6cbf720SGianluca Guida ldq_u t2,-1(a3) 189*b6cbf720SGianluca Guida extql t3,SRCREG,t3 190*b6cbf720SGianluca Guida extqh t2,SRCREG,t2 191*b6cbf720SGianluca Guida or t2,t3,t2 192*b6cbf720SGianluca Guida br zero,bcopy_samealign_lp_end 193*b6cbf720SGianluca Guida 194*b6cbf720SGianluca Guidabcopy_da_finish: 195*b6cbf720SGianluca Guida /* Do the last word in the next source word */ 196*b6cbf720SGianluca Guida ldq_u t3,-1(a3) 197*b6cbf720SGianluca Guida extql t2,SRCREG,t2 198*b6cbf720SGianluca Guida extqh t3,SRCREG,t3 199*b6cbf720SGianluca Guida or t2,t3,t2 200*b6cbf720SGianluca Guida insqh t2,DSTREG,t3 201*b6cbf720SGianluca Guida insql t2,DSTREG,t2 202*b6cbf720SGianluca Guida lda t4,-1(zero) 203*b6cbf720SGianluca Guida mskql t4,SIZEREG,t5 204*b6cbf720SGianluca Guida cmovne t5,t5,t4 205*b6cbf720SGianluca Guida insqh t4,DSTREG,t5 206*b6cbf720SGianluca Guida insql t4,DSTREG,t4 207*b6cbf720SGianluca Guida addq DSTREG,SIZEREG,a4 208*b6cbf720SGianluca Guida ldq_u t6,0(DSTREG) 209*b6cbf720SGianluca Guida ldq_u t7,-1(a4) 210*b6cbf720SGianluca Guida bic t6,t4,t6 211*b6cbf720SGianluca Guida bic t7,t5,t7 212*b6cbf720SGianluca Guida and t2,t4,t2 213*b6cbf720SGianluca Guida and t3,t5,t3 214*b6cbf720SGianluca Guida or t2,t6,t2 215*b6cbf720SGianluca Guida or t3,t7,t3 216*b6cbf720SGianluca Guida stq_u t3,-1(a4) 217*b6cbf720SGianluca Guida stq_u t2,0(DSTREG) 218*b6cbf720SGianluca Guida RET 219*b6cbf720SGianluca Guida 220*b6cbf720SGianluca Guidabcopy_overlap: 221*b6cbf720SGianluca Guida /* 222*b6cbf720SGianluca Guida * Basically equivalent to previous case, only backwards. 223*b6cbf720SGianluca Guida * Not quite as highly optimized 224*b6cbf720SGianluca Guida */ 225*b6cbf720SGianluca Guida addq SRCREG,SIZEREG,a3 226*b6cbf720SGianluca Guida addq DSTREG,SIZEREG,a4 227*b6cbf720SGianluca Guida 228*b6cbf720SGianluca Guida /* less than 8 bytes - don't worry about overlap */ 229*b6cbf720SGianluca Guida cmpule SIZEREG,8,t0 230*b6cbf720SGianluca Guida bne t0,bcopy_ov_short 231*b6cbf720SGianluca Guida 232*b6cbf720SGianluca Guida /* Possibly do a partial first word */ 233*b6cbf720SGianluca Guida and a4,7,t4 234*b6cbf720SGianluca Guida beq t4,bcopy_ov_nostart2 235*b6cbf720SGianluca Guida subq a3,t4,a3 236*b6cbf720SGianluca Guida subq a4,t4,a4 237*b6cbf720SGianluca Guida ldq_u t1,0(a3) 238*b6cbf720SGianluca Guida subq SIZEREG,t4,SIZEREG 239*b6cbf720SGianluca Guida ldq_u t2,7(a3) 240*b6cbf720SGianluca Guida ldq t3,0(a4) 241*b6cbf720SGianluca Guida extql t1,a3,t1 242*b6cbf720SGianluca Guida extqh t2,a3,t2 243*b6cbf720SGianluca Guida or t1,t2,t1 244*b6cbf720SGianluca Guida mskqh t3,t4,t3 245*b6cbf720SGianluca Guida mskql t1,t4,t1 246*b6cbf720SGianluca Guida or t1,t3,t1 247*b6cbf720SGianluca Guida stq t1,0(a4) 248*b6cbf720SGianluca Guida 249*b6cbf720SGianluca Guidabcopy_ov_nostart2: 250*b6cbf720SGianluca Guida bic SIZEREG,7,t4 251*b6cbf720SGianluca Guida and SIZEREG,7,SIZEREG 252*b6cbf720SGianluca Guida beq t4,bcopy_ov_lp_end 253*b6cbf720SGianluca Guida 254*b6cbf720SGianluca Guidabcopy_ov_lp: 255*b6cbf720SGianluca Guida /* This could be more pipelined, but it doesn't seem worth it */ 256*b6cbf720SGianluca Guida ldq_u t0,-8(a3) 257*b6cbf720SGianluca Guida subq a4,8,a4 258*b6cbf720SGianluca Guida ldq_u t1,-1(a3) 259*b6cbf720SGianluca Guida subq a3,8,a3 260*b6cbf720SGianluca Guida extql t0,a3,t0 261*b6cbf720SGianluca Guida extqh t1,a3,t1 262*b6cbf720SGianluca Guida subq t4,8,t4 263*b6cbf720SGianluca Guida or t0,t1,t0 264*b6cbf720SGianluca Guida stq t0,0(a4) 265*b6cbf720SGianluca Guida bne t4,bcopy_ov_lp 266*b6cbf720SGianluca Guida 267*b6cbf720SGianluca Guidabcopy_ov_lp_end: 268*b6cbf720SGianluca Guida beq SIZEREG,bcopy_done 269*b6cbf720SGianluca Guida 270*b6cbf720SGianluca Guida ldq_u t0,0(SRCREG) 271*b6cbf720SGianluca Guida ldq_u t1,7(SRCREG) 272*b6cbf720SGianluca Guida ldq_u t2,0(DSTREG) 273*b6cbf720SGianluca Guida extql t0,SRCREG,t0 274*b6cbf720SGianluca Guida extqh t1,SRCREG,t1 275*b6cbf720SGianluca Guida or t0,t1,t0 276*b6cbf720SGianluca Guida insql t0,DSTREG,t0 277*b6cbf720SGianluca Guida mskql t2,DSTREG,t2 278*b6cbf720SGianluca Guida or t2,t0,t2 279*b6cbf720SGianluca Guida stq_u t2,0(DSTREG) 280*b6cbf720SGianluca Guida 281*b6cbf720SGianluca Guidabcopy_done: 282*b6cbf720SGianluca Guida RET 283*b6cbf720SGianluca Guida 284*b6cbf720SGianluca Guidabcopy_ov_short: 285*b6cbf720SGianluca Guida ldq_u t2,0(SRCREG) 286*b6cbf720SGianluca Guida br zero,bcopy_da_finish 287*b6cbf720SGianluca Guida 288*b6cbf720SGianluca Guida END(FUNCTION) 289