1*9b9d2a55Sguenther/* $OpenBSD: bcopy.S,v 1.8 2015/08/31 02:53:56 guenther Exp $ */ 239bae441Sniklas/* $NetBSD: bcopy.S,v 1.3 1996/10/17 03:08:11 cgd Exp $ */ 339bae441Sniklas 4df930be7Sderaadt/* 5df930be7Sderaadt * Copyright (c) 1995 Carnegie-Mellon University. 6df930be7Sderaadt * All rights reserved. 7df930be7Sderaadt * 8df930be7Sderaadt * Author: Trevor Blackwell. Support for use as memcpy() and memmove() 9df930be7Sderaadt * added by Chris Demetriou. 10df930be7Sderaadt * 11df930be7Sderaadt * Permission to use, copy, modify and distribute this software and 12df930be7Sderaadt * its documentation is hereby granted, provided that both the copyright 13df930be7Sderaadt * notice and this permission notice appear in all copies of the 14df930be7Sderaadt * software, derivative works or modified versions, and any portions 15df930be7Sderaadt * thereof, and that both notices appear in supporting documentation. 16df930be7Sderaadt * 17df930be7Sderaadt * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 18df930be7Sderaadt * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 19df930be7Sderaadt * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 20df930be7Sderaadt * 21df930be7Sderaadt * Carnegie Mellon requests users of this software to return to 22df930be7Sderaadt * 23df930be7Sderaadt * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 24df930be7Sderaadt * School of Computer Science 25df930be7Sderaadt * Carnegie Mellon University 26df930be7Sderaadt * Pittsburgh PA 15213-3890 27df930be7Sderaadt * 28df930be7Sderaadt * any improvements or extensions that they make and grant Carnegie the 29df930be7Sderaadt * rights to redistribute these changes. 30df930be7Sderaadt */ 31df930be7Sderaadt 32*9b9d2a55Sguenther#include "SYS.h" 33df930be7Sderaadt 34df930be7Sderaadt#define SRCREG a0 35df930be7Sderaadt#define DSTREG a1 36df930be7Sderaadt#define SIZEREG a2 37df930be7Sderaadt 38df930be7Sderaadt/* 39df930be7Sderaadt * Copy bytes. 40df930be7Sderaadt * 41df930be7Sderaadt * void bcopy(char *from, char *to, size_t len); 42df930be7Sderaadt * 43df930be7Sderaadt * No matter how invoked, the source and destination registers 44df930be7Sderaadt * for calculation. There's no point in copying them to "working" 45df930be7Sderaadt * registers, since the code uses their values "in place," and 46df930be7Sderaadt * copying them would be slower. 47df930be7Sderaadt */ 48df930be7Sderaadt 495b859c19SderaadtLEAF(bcopy,3) 50c55068e2Sderaadt /* Check for zero length */ 51c55068e2Sderaadt beq SIZEREG,bcopy_done 52df930be7Sderaadt 53df930be7Sderaadt /* Check for overlap */ 54df930be7Sderaadt subq DSTREG,SRCREG,t5 55df930be7Sderaadt cmpult t5,SIZEREG,t5 56df930be7Sderaadt bne t5,bcopy_overlap 57df930be7Sderaadt 58df930be7Sderaadt /* a3 = end address */ 59df930be7Sderaadt addq SRCREG,SIZEREG,a3 60df930be7Sderaadt 61df930be7Sderaadt /* Get the first word */ 62df930be7Sderaadt ldq_u t2,0(SRCREG) 63df930be7Sderaadt 64df930be7Sderaadt /* Do they have the same alignment? */ 65df930be7Sderaadt xor SRCREG,DSTREG,t0 66df930be7Sderaadt and t0,7,t0 67df930be7Sderaadt and DSTREG,7,t1 68df930be7Sderaadt bne t0,bcopy_different_alignment 69df930be7Sderaadt 70df930be7Sderaadt /* src & dst have same alignment */ 71df930be7Sderaadt beq t1,bcopy_all_aligned 72df930be7Sderaadt 73df930be7Sderaadt ldq_u t3,0(DSTREG) 74df930be7Sderaadt addq SIZEREG,t1,SIZEREG 75df930be7Sderaadt mskqh t2,SRCREG,t2 76df930be7Sderaadt mskql t3,SRCREG,t3 77df930be7Sderaadt or t2,t3,t2 78df930be7Sderaadt 79df930be7Sderaadt /* Dst is 8-byte aligned */ 80df930be7Sderaadt 81df930be7Sderaadtbcopy_all_aligned: 82df930be7Sderaadt /* If less than 8 bytes,skip loop */ 83df930be7Sderaadt subq SIZEREG,1,t0 84df930be7Sderaadt and SIZEREG,7,SIZEREG 85df930be7Sderaadt bic t0,7,t0 86df930be7Sderaadt beq t0,bcopy_samealign_lp_end 87df930be7Sderaadt 88df930be7Sderaadtbcopy_samealign_lp: 89df930be7Sderaadt stq_u t2,0(DSTREG) 90df930be7Sderaadt addq DSTREG,8,DSTREG 91df930be7Sderaadt ldq_u t2,8(SRCREG) 92df930be7Sderaadt subq t0,8,t0 93df930be7Sderaadt addq SRCREG,8,SRCREG 94df930be7Sderaadt bne t0,bcopy_samealign_lp 95df930be7Sderaadt 96df930be7Sderaadtbcopy_samealign_lp_end: 97df930be7Sderaadt /* If we're done, exit */ 98df930be7Sderaadt bne SIZEREG,bcopy_small_left 99df930be7Sderaadt stq_u t2,0(DSTREG) 100df930be7Sderaadt RET 101df930be7Sderaadt 102df930be7Sderaadtbcopy_small_left: 103df930be7Sderaadt mskql t2,SIZEREG,t4 104df930be7Sderaadt ldq_u t3,0(DSTREG) 105df930be7Sderaadt mskqh t3,SIZEREG,t3 106df930be7Sderaadt or t4,t3,t4 107df930be7Sderaadt stq_u t4,0(DSTREG) 108df930be7Sderaadt RET 109df930be7Sderaadt 110df930be7Sderaadtbcopy_different_alignment: 111df930be7Sderaadt /* 112df930be7Sderaadt * this is the fun part 113df930be7Sderaadt */ 114df930be7Sderaadt addq SRCREG,SIZEREG,a3 115df930be7Sderaadt cmpule SIZEREG,8,t0 116df930be7Sderaadt bne t0,bcopy_da_finish 117df930be7Sderaadt 118df930be7Sderaadt beq t1,bcopy_da_noentry 119df930be7Sderaadt 120df930be7Sderaadt /* Do the initial partial word */ 121df930be7Sderaadt subq zero,DSTREG,t0 122df930be7Sderaadt and t0,7,t0 123df930be7Sderaadt ldq_u t3,7(SRCREG) 124df930be7Sderaadt extql t2,SRCREG,t2 125df930be7Sderaadt extqh t3,SRCREG,t3 126df930be7Sderaadt or t2,t3,t5 127df930be7Sderaadt insql t5,DSTREG,t5 128df930be7Sderaadt ldq_u t6,0(DSTREG) 129df930be7Sderaadt mskql t6,DSTREG,t6 130df930be7Sderaadt or t5,t6,t5 131df930be7Sderaadt stq_u t5,0(DSTREG) 132df930be7Sderaadt addq SRCREG,t0,SRCREG 133df930be7Sderaadt addq DSTREG,t0,DSTREG 134df930be7Sderaadt subq SIZEREG,t0,SIZEREG 135df930be7Sderaadt ldq_u t2,0(SRCREG) 136df930be7Sderaadt 137df930be7Sderaadtbcopy_da_noentry: 138df930be7Sderaadt subq SIZEREG,1,t0 139df930be7Sderaadt bic t0,7,t0 140df930be7Sderaadt and SIZEREG,7,SIZEREG 141df930be7Sderaadt beq t0,bcopy_da_finish2 142df930be7Sderaadt 143df930be7Sderaadtbcopy_da_lp: 144df930be7Sderaadt ldq_u t3,7(SRCREG) 145df930be7Sderaadt addq SRCREG,8,SRCREG 146df930be7Sderaadt extql t2,SRCREG,t4 147df930be7Sderaadt extqh t3,SRCREG,t5 148df930be7Sderaadt subq t0,8,t0 149df930be7Sderaadt or t4,t5,t5 150df930be7Sderaadt stq t5,0(DSTREG) 151df930be7Sderaadt addq DSTREG,8,DSTREG 152df930be7Sderaadt beq t0,bcopy_da_finish1 153df930be7Sderaadt ldq_u t2,7(SRCREG) 154df930be7Sderaadt addq SRCREG,8,SRCREG 155df930be7Sderaadt extql t3,SRCREG,t4 156df930be7Sderaadt extqh t2,SRCREG,t5 157df930be7Sderaadt subq t0,8,t0 158df930be7Sderaadt or t4,t5,t5 159df930be7Sderaadt stq t5,0(DSTREG) 160df930be7Sderaadt addq DSTREG,8,DSTREG 161df930be7Sderaadt bne t0,bcopy_da_lp 162df930be7Sderaadt 163df930be7Sderaadtbcopy_da_finish2: 164df930be7Sderaadt /* Do the last new word */ 165df930be7Sderaadt mov t2,t3 166df930be7Sderaadt 167df930be7Sderaadtbcopy_da_finish1: 168df930be7Sderaadt /* Do the last partial word */ 169df930be7Sderaadt ldq_u t2,-1(a3) 170df930be7Sderaadt extql t3,SRCREG,t3 171df930be7Sderaadt extqh t2,SRCREG,t2 172df930be7Sderaadt or t2,t3,t2 173df930be7Sderaadt br zero,bcopy_samealign_lp_end 174df930be7Sderaadt 175df930be7Sderaadtbcopy_da_finish: 176df930be7Sderaadt /* Do the last word in the next source word */ 177df930be7Sderaadt ldq_u t3,-1(a3) 178df930be7Sderaadt extql t2,SRCREG,t2 179df930be7Sderaadt extqh t3,SRCREG,t3 180df930be7Sderaadt or t2,t3,t2 181df930be7Sderaadt insqh t2,DSTREG,t3 182df930be7Sderaadt insql t2,DSTREG,t2 183df930be7Sderaadt lda t4,-1(zero) 184df930be7Sderaadt mskql t4,SIZEREG,t5 185df930be7Sderaadt cmovne t5,t5,t4 186df930be7Sderaadt insqh t4,DSTREG,t5 187df930be7Sderaadt insql t4,DSTREG,t4 188df930be7Sderaadt addq DSTREG,SIZEREG,a4 189df930be7Sderaadt ldq_u t6,0(DSTREG) 190df930be7Sderaadt ldq_u t7,-1(a4) 191df930be7Sderaadt bic t6,t4,t6 192df930be7Sderaadt bic t7,t5,t7 193df930be7Sderaadt and t2,t4,t2 194df930be7Sderaadt and t3,t5,t3 195df930be7Sderaadt or t2,t6,t2 196df930be7Sderaadt or t3,t7,t3 197df930be7Sderaadt stq_u t3,-1(a4) 198df930be7Sderaadt stq_u t2,0(DSTREG) 199df930be7Sderaadt RET 200df930be7Sderaadt 201df930be7Sderaadtbcopy_overlap: 202df930be7Sderaadt /* 203df930be7Sderaadt * Basically equivalent to previous case, only backwards. 204df930be7Sderaadt * Not quite as highly optimized 205df930be7Sderaadt */ 206df930be7Sderaadt addq SRCREG,SIZEREG,a3 207df930be7Sderaadt addq DSTREG,SIZEREG,a4 208df930be7Sderaadt 209df930be7Sderaadt /* less than 8 bytes - don't worry about overlap */ 210df930be7Sderaadt cmpule SIZEREG,8,t0 211df930be7Sderaadt bne t0,bcopy_ov_short 212df930be7Sderaadt 213df930be7Sderaadt /* Possibly do a partial first word */ 214df930be7Sderaadt and a4,7,t4 215df930be7Sderaadt beq t4,bcopy_ov_nostart2 216df930be7Sderaadt subq a3,t4,a3 217df930be7Sderaadt subq a4,t4,a4 218df930be7Sderaadt ldq_u t1,0(a3) 219df930be7Sderaadt subq SIZEREG,t4,SIZEREG 220df930be7Sderaadt ldq_u t2,7(a3) 221df930be7Sderaadt ldq t3,0(a4) 222df930be7Sderaadt extql t1,a3,t1 223df930be7Sderaadt extqh t2,a3,t2 224df930be7Sderaadt or t1,t2,t1 225df930be7Sderaadt mskqh t3,t4,t3 226df930be7Sderaadt mskql t1,t4,t1 227df930be7Sderaadt or t1,t3,t1 228df930be7Sderaadt stq t1,0(a4) 229df930be7Sderaadt 230df930be7Sderaadtbcopy_ov_nostart2: 231df930be7Sderaadt bic SIZEREG,7,t4 232df930be7Sderaadt and SIZEREG,7,SIZEREG 233df930be7Sderaadt beq t4,bcopy_ov_lp_end 234df930be7Sderaadt 235df930be7Sderaadtbcopy_ov_lp: 236df930be7Sderaadt /* This could be more pipelined, but it doesn't seem worth it */ 237df930be7Sderaadt ldq_u t0,-8(a3) 238df930be7Sderaadt subq a4,8,a4 239df930be7Sderaadt ldq_u t1,-1(a3) 240df930be7Sderaadt subq a3,8,a3 241df930be7Sderaadt extql t0,a3,t0 242df930be7Sderaadt extqh t1,a3,t1 243df930be7Sderaadt subq t4,8,t4 244df930be7Sderaadt or t0,t1,t0 245df930be7Sderaadt stq t0,0(a4) 246df930be7Sderaadt bne t4,bcopy_ov_lp 247df930be7Sderaadt 248df930be7Sderaadtbcopy_ov_lp_end: 249df930be7Sderaadt beq SIZEREG,bcopy_done 250df930be7Sderaadt 251df930be7Sderaadt ldq_u t0,0(SRCREG) 252df930be7Sderaadt ldq_u t1,7(SRCREG) 253df930be7Sderaadt ldq_u t2,0(DSTREG) 254df930be7Sderaadt extql t0,SRCREG,t0 255df930be7Sderaadt extqh t1,SRCREG,t1 256df930be7Sderaadt or t0,t1,t0 257df930be7Sderaadt insql t0,DSTREG,t0 258df930be7Sderaadt mskql t2,DSTREG,t2 259df930be7Sderaadt or t2,t0,t2 260df930be7Sderaadt stq_u t2,0(DSTREG) 261df930be7Sderaadt 262df930be7Sderaadtbcopy_done: 263df930be7Sderaadt RET 264df930be7Sderaadt 265df930be7Sderaadtbcopy_ov_short: 266df930be7Sderaadt ldq_u t2,0(SRCREG) 267df930be7Sderaadt br zero,bcopy_da_finish 268df930be7Sderaadt 269*9b9d2a55Sguenther END_WEAK(bcopy) 270