1*37c9f0a6Schristos/* $NetBSD: bcopy.S,v 1.1 2005/12/20 19:28:49 christos Exp $ */ 2*37c9f0a6Schristos 3*37c9f0a6Schristos/* 4*37c9f0a6Schristos * Copyright (c) 1995 Carnegie-Mellon University. 5*37c9f0a6Schristos * All rights reserved. 6*37c9f0a6Schristos * 7*37c9f0a6Schristos * Author: Trevor Blackwell. Support for use as memcpy() and memmove() 8*37c9f0a6Schristos * added by Chris Demetriou. 9*37c9f0a6Schristos * 10*37c9f0a6Schristos * Permission to use, copy, modify and distribute this software and 11*37c9f0a6Schristos * its documentation is hereby granted, provided that both the copyright 12*37c9f0a6Schristos * notice and this permission notice appear in all copies of the 13*37c9f0a6Schristos * software, derivative works or modified versions, and any portions 14*37c9f0a6Schristos * thereof, and that both notices appear in supporting documentation. 15*37c9f0a6Schristos * 16*37c9f0a6Schristos * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 17*37c9f0a6Schristos * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 18*37c9f0a6Schristos * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 19*37c9f0a6Schristos * 20*37c9f0a6Schristos * Carnegie Mellon requests users of this software to return to 21*37c9f0a6Schristos * 22*37c9f0a6Schristos * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 23*37c9f0a6Schristos * School of Computer Science 24*37c9f0a6Schristos * Carnegie Mellon University 25*37c9f0a6Schristos * Pittsburgh PA 15213-3890 26*37c9f0a6Schristos * 27*37c9f0a6Schristos * any improvements or extensions that they make and grant Carnegie the 28*37c9f0a6Schristos * rights to redistribute these changes. 29*37c9f0a6Schristos */ 30*37c9f0a6Schristos 31*37c9f0a6Schristos#include <machine/asm.h> 32*37c9f0a6Schristos 33*37c9f0a6Schristos#if defined(MEMCOPY) || defined(MEMMOVE) 34*37c9f0a6Schristos#ifdef MEMCOPY 35*37c9f0a6Schristos#define FUNCTION memcpy 36*37c9f0a6Schristos#else 37*37c9f0a6Schristos#define FUNCTION memmove 38*37c9f0a6Schristos#endif 39*37c9f0a6Schristos#define SRCREG a1 40*37c9f0a6Schristos#define DSTREG a0 41*37c9f0a6Schristos#else /* !(defined(MEMCOPY) || defined(MEMMOVE)) */ 42*37c9f0a6Schristos#define FUNCTION bcopy 43*37c9f0a6Schristos#define SRCREG a0 44*37c9f0a6Schristos#define DSTREG a1 45*37c9f0a6Schristos#endif /* !(defined(MEMCOPY) || defined(MEMMOVE)) */ 46*37c9f0a6Schristos 47*37c9f0a6Schristos#define SIZEREG a2 48*37c9f0a6Schristos 49*37c9f0a6Schristos/* 50*37c9f0a6Schristos * Copy bytes. 51*37c9f0a6Schristos * 52*37c9f0a6Schristos * void bcopy(char *from, char *to, size_t len); 53*37c9f0a6Schristos * char *memcpy(void *to, const void *from, size_t len); 54*37c9f0a6Schristos * char *memmove(void *to, const void *from, size_t len); 55*37c9f0a6Schristos * 56*37c9f0a6Schristos * No matter how invoked, the source and destination registers 57*37c9f0a6Schristos * for calculation. There's no point in copying them to "working" 58*37c9f0a6Schristos * registers, since the code uses their values "in place," and 59*37c9f0a6Schristos * copying them would be slower. 60*37c9f0a6Schristos */ 61*37c9f0a6Schristos 62*37c9f0a6SchristosLEAF(FUNCTION,3) 63*37c9f0a6Schristos 64*37c9f0a6Schristos#if defined(MEMCOPY) || defined(MEMMOVE) 65*37c9f0a6Schristos /* set up return value, while we still can */ 66*37c9f0a6Schristos mov DSTREG,v0 67*37c9f0a6Schristos#endif 68*37c9f0a6Schristos 69*37c9f0a6Schristos /* Check for negative length */ 70*37c9f0a6Schristos ble SIZEREG,bcopy_done 71*37c9f0a6Schristos 72*37c9f0a6Schristos /* Check for overlap */ 73*37c9f0a6Schristos subq DSTREG,SRCREG,t5 74*37c9f0a6Schristos cmpult t5,SIZEREG,t5 75*37c9f0a6Schristos bne t5,bcopy_overlap 76*37c9f0a6Schristos 77*37c9f0a6Schristos /* a3 = end address */ 78*37c9f0a6Schristos addq SRCREG,SIZEREG,a3 79*37c9f0a6Schristos 80*37c9f0a6Schristos /* Get the first word */ 81*37c9f0a6Schristos ldq_u t2,0(SRCREG) 82*37c9f0a6Schristos 83*37c9f0a6Schristos /* Do they have the same alignment? */ 84*37c9f0a6Schristos xor SRCREG,DSTREG,t0 85*37c9f0a6Schristos and t0,7,t0 86*37c9f0a6Schristos and DSTREG,7,t1 87*37c9f0a6Schristos bne t0,bcopy_different_alignment 88*37c9f0a6Schristos 89*37c9f0a6Schristos /* src & dst have same alignment */ 90*37c9f0a6Schristos beq t1,bcopy_all_aligned 91*37c9f0a6Schristos 92*37c9f0a6Schristos ldq_u t3,0(DSTREG) 93*37c9f0a6Schristos addq SIZEREG,t1,SIZEREG 94*37c9f0a6Schristos mskqh t2,SRCREG,t2 95*37c9f0a6Schristos mskql t3,SRCREG,t3 96*37c9f0a6Schristos or t2,t3,t2 97*37c9f0a6Schristos 98*37c9f0a6Schristos /* Dst is 8-byte aligned */ 99*37c9f0a6Schristos 100*37c9f0a6Schristosbcopy_all_aligned: 101*37c9f0a6Schristos /* If less than 8 bytes,skip loop */ 102*37c9f0a6Schristos subq SIZEREG,1,t0 103*37c9f0a6Schristos and SIZEREG,7,SIZEREG 104*37c9f0a6Schristos bic t0,7,t0 105*37c9f0a6Schristos beq t0,bcopy_samealign_lp_end 106*37c9f0a6Schristos 107*37c9f0a6Schristosbcopy_samealign_lp: 108*37c9f0a6Schristos stq_u t2,0(DSTREG) 109*37c9f0a6Schristos addq DSTREG,8,DSTREG 110*37c9f0a6Schristos ldq_u t2,8(SRCREG) 111*37c9f0a6Schristos subq t0,8,t0 112*37c9f0a6Schristos addq SRCREG,8,SRCREG 113*37c9f0a6Schristos bne t0,bcopy_samealign_lp 114*37c9f0a6Schristos 115*37c9f0a6Schristosbcopy_samealign_lp_end: 116*37c9f0a6Schristos /* If we're done, exit */ 117*37c9f0a6Schristos bne SIZEREG,bcopy_small_left 118*37c9f0a6Schristos stq_u t2,0(DSTREG) 119*37c9f0a6Schristos RET 120*37c9f0a6Schristos 121*37c9f0a6Schristosbcopy_small_left: 122*37c9f0a6Schristos mskql t2,SIZEREG,t4 123*37c9f0a6Schristos ldq_u t3,0(DSTREG) 124*37c9f0a6Schristos mskqh t3,SIZEREG,t3 125*37c9f0a6Schristos or t4,t3,t4 126*37c9f0a6Schristos stq_u t4,0(DSTREG) 127*37c9f0a6Schristos RET 128*37c9f0a6Schristos 129*37c9f0a6Schristosbcopy_different_alignment: 130*37c9f0a6Schristos /* 131*37c9f0a6Schristos * this is the fun part 132*37c9f0a6Schristos */ 133*37c9f0a6Schristos addq SRCREG,SIZEREG,a3 134*37c9f0a6Schristos cmpule SIZEREG,8,t0 135*37c9f0a6Schristos bne t0,bcopy_da_finish 136*37c9f0a6Schristos 137*37c9f0a6Schristos beq t1,bcopy_da_noentry 138*37c9f0a6Schristos 139*37c9f0a6Schristos /* Do the initial partial word */ 140*37c9f0a6Schristos subq zero,DSTREG,t0 141*37c9f0a6Schristos and t0,7,t0 142*37c9f0a6Schristos ldq_u t3,7(SRCREG) 143*37c9f0a6Schristos extql t2,SRCREG,t2 144*37c9f0a6Schristos extqh t3,SRCREG,t3 145*37c9f0a6Schristos or t2,t3,t5 146*37c9f0a6Schristos insql t5,DSTREG,t5 147*37c9f0a6Schristos ldq_u t6,0(DSTREG) 148*37c9f0a6Schristos mskql t6,DSTREG,t6 149*37c9f0a6Schristos or t5,t6,t5 150*37c9f0a6Schristos stq_u t5,0(DSTREG) 151*37c9f0a6Schristos addq SRCREG,t0,SRCREG 152*37c9f0a6Schristos addq DSTREG,t0,DSTREG 153*37c9f0a6Schristos subq SIZEREG,t0,SIZEREG 154*37c9f0a6Schristos ldq_u t2,0(SRCREG) 155*37c9f0a6Schristos 156*37c9f0a6Schristosbcopy_da_noentry: 157*37c9f0a6Schristos subq SIZEREG,1,t0 158*37c9f0a6Schristos bic t0,7,t0 159*37c9f0a6Schristos and SIZEREG,7,SIZEREG 160*37c9f0a6Schristos beq t0,bcopy_da_finish2 161*37c9f0a6Schristos 162*37c9f0a6Schristosbcopy_da_lp: 163*37c9f0a6Schristos ldq_u t3,7(SRCREG) 164*37c9f0a6Schristos addq SRCREG,8,SRCREG 165*37c9f0a6Schristos extql t2,SRCREG,t4 166*37c9f0a6Schristos extqh t3,SRCREG,t5 167*37c9f0a6Schristos subq t0,8,t0 168*37c9f0a6Schristos or t4,t5,t5 169*37c9f0a6Schristos stq t5,0(DSTREG) 170*37c9f0a6Schristos addq DSTREG,8,DSTREG 171*37c9f0a6Schristos beq t0,bcopy_da_finish1 172*37c9f0a6Schristos ldq_u t2,7(SRCREG) 173*37c9f0a6Schristos addq SRCREG,8,SRCREG 174*37c9f0a6Schristos extql t3,SRCREG,t4 175*37c9f0a6Schristos extqh t2,SRCREG,t5 176*37c9f0a6Schristos subq t0,8,t0 177*37c9f0a6Schristos or t4,t5,t5 178*37c9f0a6Schristos stq t5,0(DSTREG) 179*37c9f0a6Schristos addq DSTREG,8,DSTREG 180*37c9f0a6Schristos bne t0,bcopy_da_lp 181*37c9f0a6Schristos 182*37c9f0a6Schristosbcopy_da_finish2: 183*37c9f0a6Schristos /* Do the last new word */ 184*37c9f0a6Schristos mov t2,t3 185*37c9f0a6Schristos 186*37c9f0a6Schristosbcopy_da_finish1: 187*37c9f0a6Schristos /* Do the last partial word */ 188*37c9f0a6Schristos ldq_u t2,-1(a3) 189*37c9f0a6Schristos extql t3,SRCREG,t3 190*37c9f0a6Schristos extqh t2,SRCREG,t2 191*37c9f0a6Schristos or t2,t3,t2 192*37c9f0a6Schristos br zero,bcopy_samealign_lp_end 193*37c9f0a6Schristos 194*37c9f0a6Schristosbcopy_da_finish: 195*37c9f0a6Schristos /* Do the last word in the next source word */ 196*37c9f0a6Schristos ldq_u t3,-1(a3) 197*37c9f0a6Schristos extql t2,SRCREG,t2 198*37c9f0a6Schristos extqh t3,SRCREG,t3 199*37c9f0a6Schristos or t2,t3,t2 200*37c9f0a6Schristos insqh t2,DSTREG,t3 201*37c9f0a6Schristos insql t2,DSTREG,t2 202*37c9f0a6Schristos lda t4,-1(zero) 203*37c9f0a6Schristos mskql t4,SIZEREG,t5 204*37c9f0a6Schristos cmovne t5,t5,t4 205*37c9f0a6Schristos insqh t4,DSTREG,t5 206*37c9f0a6Schristos insql t4,DSTREG,t4 207*37c9f0a6Schristos addq DSTREG,SIZEREG,a4 208*37c9f0a6Schristos ldq_u t6,0(DSTREG) 209*37c9f0a6Schristos ldq_u t7,-1(a4) 210*37c9f0a6Schristos bic t6,t4,t6 211*37c9f0a6Schristos bic t7,t5,t7 212*37c9f0a6Schristos and t2,t4,t2 213*37c9f0a6Schristos and t3,t5,t3 214*37c9f0a6Schristos or t2,t6,t2 215*37c9f0a6Schristos or t3,t7,t3 216*37c9f0a6Schristos stq_u t3,-1(a4) 217*37c9f0a6Schristos stq_u t2,0(DSTREG) 218*37c9f0a6Schristos RET 219*37c9f0a6Schristos 220*37c9f0a6Schristosbcopy_overlap: 221*37c9f0a6Schristos /* 222*37c9f0a6Schristos * Basically equivalent to previous case, only backwards. 223*37c9f0a6Schristos * Not quite as highly optimized 224*37c9f0a6Schristos */ 225*37c9f0a6Schristos addq SRCREG,SIZEREG,a3 226*37c9f0a6Schristos addq DSTREG,SIZEREG,a4 227*37c9f0a6Schristos 228*37c9f0a6Schristos /* less than 8 bytes - don't worry about overlap */ 229*37c9f0a6Schristos cmpule SIZEREG,8,t0 230*37c9f0a6Schristos bne t0,bcopy_ov_short 231*37c9f0a6Schristos 232*37c9f0a6Schristos /* Possibly do a partial first word */ 233*37c9f0a6Schristos and a4,7,t4 234*37c9f0a6Schristos beq t4,bcopy_ov_nostart2 235*37c9f0a6Schristos subq a3,t4,a3 236*37c9f0a6Schristos subq a4,t4,a4 237*37c9f0a6Schristos ldq_u t1,0(a3) 238*37c9f0a6Schristos subq SIZEREG,t4,SIZEREG 239*37c9f0a6Schristos ldq_u t2,7(a3) 240*37c9f0a6Schristos ldq t3,0(a4) 241*37c9f0a6Schristos extql t1,a3,t1 242*37c9f0a6Schristos extqh t2,a3,t2 243*37c9f0a6Schristos or t1,t2,t1 244*37c9f0a6Schristos mskqh t3,t4,t3 245*37c9f0a6Schristos mskql t1,t4,t1 246*37c9f0a6Schristos or t1,t3,t1 247*37c9f0a6Schristos stq t1,0(a4) 248*37c9f0a6Schristos 249*37c9f0a6Schristosbcopy_ov_nostart2: 250*37c9f0a6Schristos bic SIZEREG,7,t4 251*37c9f0a6Schristos and SIZEREG,7,SIZEREG 252*37c9f0a6Schristos beq t4,bcopy_ov_lp_end 253*37c9f0a6Schristos 254*37c9f0a6Schristosbcopy_ov_lp: 255*37c9f0a6Schristos /* This could be more pipelined, but it doesn't seem worth it */ 256*37c9f0a6Schristos ldq_u t0,-8(a3) 257*37c9f0a6Schristos subq a4,8,a4 258*37c9f0a6Schristos ldq_u t1,-1(a3) 259*37c9f0a6Schristos subq a3,8,a3 260*37c9f0a6Schristos extql t0,a3,t0 261*37c9f0a6Schristos extqh t1,a3,t1 262*37c9f0a6Schristos subq t4,8,t4 263*37c9f0a6Schristos or t0,t1,t0 264*37c9f0a6Schristos stq t0,0(a4) 265*37c9f0a6Schristos bne t4,bcopy_ov_lp 266*37c9f0a6Schristos 267*37c9f0a6Schristosbcopy_ov_lp_end: 268*37c9f0a6Schristos beq SIZEREG,bcopy_done 269*37c9f0a6Schristos 270*37c9f0a6Schristos ldq_u t0,0(SRCREG) 271*37c9f0a6Schristos ldq_u t1,7(SRCREG) 272*37c9f0a6Schristos ldq_u t2,0(DSTREG) 273*37c9f0a6Schristos extql t0,SRCREG,t0 274*37c9f0a6Schristos extqh t1,SRCREG,t1 275*37c9f0a6Schristos or t0,t1,t0 276*37c9f0a6Schristos insql t0,DSTREG,t0 277*37c9f0a6Schristos mskql t2,DSTREG,t2 278*37c9f0a6Schristos or t2,t0,t2 279*37c9f0a6Schristos stq_u t2,0(DSTREG) 280*37c9f0a6Schristos 281*37c9f0a6Schristosbcopy_done: 282*37c9f0a6Schristos RET 283*37c9f0a6Schristos 284*37c9f0a6Schristosbcopy_ov_short: 285*37c9f0a6Schristos ldq_u t2,0(SRCREG) 286*37c9f0a6Schristos br zero,bcopy_da_finish 287*37c9f0a6Schristos 288*37c9f0a6Schristos END(FUNCTION) 289