1/* $NetBSD: bcopy.S,v 1.1 2005/12/21 00:25:56 christos Exp $ */ 2 3/* 4 * Mach Operating System 5 * Copyright (c) 1993 Carnegie Mellon University 6 * All Rights Reserved. 7 * 8 * Permission to use, copy, modify and distribute this software and its 9 * documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 16 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie Mellon 26 * the rights to redistribute these changes. 27 */ 28 29/* 30 * File: mips_bcopy.s 31 * Author: Chris Maeda 32 * Date: June 1993 33 * 34 * Fast copy routine. Derived from aligned_block_copy. 35 */ 36 37 38#include <mips/asm.h> 39#define _LOCORE /* XXX not really, just assembly-code source */ 40#include <machine/endian.h> 41 42 43#if defined(LIBC_SCCS) && !defined(lint) 44 ASMSTR("from: @(#)mips_bcopy.s 2.2 CMU 18/06/93") 45 ASMSTR("$NetBSD: bcopy.S,v 1.1 2005/12/21 00:25:56 christos Exp $") 46#endif /* LIBC_SCCS and not lint */ 47 48#ifdef __ABICALLS__ 49 .abicalls 50#endif 51 52/* 53 * bcopy(caddr_t src, caddr_t dst, unsigned int len) 54 * 55 * a0 src address 56 * a1 dst address 57 * a2 length 58 */ 59 60#if defined(MEMCOPY) || defined(MEMMOVE) 61#ifdef MEMCOPY 62#define FUNCTION memcpy 63#else 64#define FUNCTION memmove 65#endif 66#define SRCREG a1 67#define DSTREG a0 68#else 69#define FUNCTION bcopy 70#define SRCREG a0 71#define DSTREG a1 72#endif 73 74#define SIZEREG a2 75 76LEAF(FUNCTION) 77 .set noat 78 .set noreorder 79 80#if defined(MEMCOPY) || defined(MEMMOVE) 81 /* set up return value, while we still can */ 82 move v0,DSTREG 83#endif 84 /* 85 * Make sure we can copy forwards. 86 */ 87 sltu t0,SRCREG,DSTREG # t0 == SRCREG < DSTREG 88 bne t0,zero,6f # copy backwards 89 90 /* 91 * There are four alignment cases (with frequency) 92 * (Based on measurements taken with a DECstation 5000/200 93 * inside a Mach kernel.) 94 * 95 * aligned -> aligned (mostly) 96 * unaligned -> aligned (sometimes) 97 * aligned,unaligned -> unaligned (almost never) 98 * 99 * Note that we could add another case that checks if 100 * the destination and source are unaligned but the 101 * copy is alignable. eg if src and dest are both 102 * on a halfword boundary. 103 */ 104 andi t1,DSTREG,3 # get last 3 bits of dest 105 bne t1,zero,3f 106 andi t0,SRCREG,3 # get last 3 bits of src 107 bne t0,zero,5f 108 109 /* 110 * Forward aligned->aligned copy, 8*4 bytes at a time. 111 */ 112 li AT,-32 113 and t0,SIZEREG,AT # count truncated to multiple of 32 */ 114 addu a3,SRCREG,t0 # run fast loop up to this address 115 sltu AT,SRCREG,a3 # any work to do? 116 beq AT,zero,2f 117 subu SIZEREG,t0 118 119 /* 120 * loop body 121 */ 1221: # cp 123 lw t3,0(SRCREG) 124 lw v1,4(SRCREG) 125 lw t0,8(SRCREG) 126 lw t1,12(SRCREG) 127 addu SRCREG,32 128 sw t3,0(DSTREG) 129 sw v1,4(DSTREG) 130 sw t0,8(DSTREG) 131 sw t1,12(DSTREG) 132 lw t1,-4(SRCREG) 133 lw t0,-8(SRCREG) 134 lw v1,-12(SRCREG) 135 lw t3,-16(SRCREG) 136 addu DSTREG,32 137 sw t1,-4(DSTREG) 138 sw t0,-8(DSTREG) 139 sw v1,-12(DSTREG) 140 bne SRCREG,a3,1b 141 sw t3,-16(DSTREG) 142 143 /* 144 * Copy a word at a time, no loop unrolling. 145 */ 1462: # wordcopy 147 andi t2,SIZEREG,3 # get byte count / 4 148 subu t2,SIZEREG,t2 # t2 = number of words to copy * 4 149 beq t2,zero,3f 150 addu t0,SRCREG,t2 # stop at t0 151 subu SIZEREG,SIZEREG,t2 1521: 153 lw t3,0(SRCREG) 154 addu SRCREG,4 155 sw t3,0(DSTREG) 156 bne SRCREG,t0,1b 157 addu DSTREG,4 158 1593: # bytecopy 160 beq SIZEREG,zero,4f # nothing left to do? 161 nop 1621: 163 lb t3,0(SRCREG) 164 addu SRCREG,1 165 sb t3,0(DSTREG) 166 subu SIZEREG,1 167 bgtz SIZEREG,1b 168 addu DSTREG,1 169 1704: # copydone 171 j ra 172 nop 173 174 /* 175 * Copy from unaligned source to aligned dest. 176 */ 1775: # destaligned 178 andi t0,SIZEREG,3 # t0 = bytecount mod 4 179 subu a3,SIZEREG,t0 # number of words to transfer 180 beq a3,zero,3b 181 nop 182 move SIZEREG,t0 # this many to do after we are done 183 addu a3,SRCREG,a3 # stop point 184 1851: 186 LWHI t3,0(SRCREG) 187 LWLO t3,3(SRCREG) 188 addi SRCREG,4 189 sw t3,0(DSTREG) 190 bne SRCREG,a3,1b 191 addi DSTREG,4 192 193 j 3b 194 nop 195 1966: # backcopy -- based on above 197 addu SRCREG,SIZEREG 198 addu DSTREG,SIZEREG 199 andi t1,DSTREG,3 # get last 3 bits of dest 200 bne t1,zero,3f 201 andi t0,SRCREG,3 # get last 3 bits of src 202 bne t0,zero,5f 203 204 /* 205 * Forward aligned->aligned copy, 8*4 bytes at a time. 206 */ 207 li AT,-32 208 and t0,SIZEREG,AT # count truncated to multiple of 32 209 beq t0,zero,2f # any work to do? 210 subu SIZEREG,t0 211 subu a3,SRCREG,t0 212 213 /* 214 * loop body 215 */ 2161: # cp 217 lw t3,-16(SRCREG) 218 lw v1,-12(SRCREG) 219 lw t0,-8(SRCREG) 220 lw t1,-4(SRCREG) 221 subu SRCREG,32 222 sw t3,-16(DSTREG) 223 sw v1,-12(DSTREG) 224 sw t0,-8(DSTREG) 225 sw t1,-4(DSTREG) 226 lw t1,12(SRCREG) 227 lw t0,8(SRCREG) 228 lw v1,4(SRCREG) 229 lw t3,0(SRCREG) 230 subu DSTREG,32 231 sw t1,12(DSTREG) 232 sw t0,8(DSTREG) 233 sw v1,4(DSTREG) 234 bne SRCREG,a3,1b 235 sw t3,0(DSTREG) 236 237 /* 238 * Copy a word at a time, no loop unrolling. 239 */ 2402: # wordcopy 241 andi t2,SIZEREG,3 # get byte count / 4 242 subu t2,SIZEREG,t2 # t2 = number of words to copy * 4 243 beq t2,zero,3f 244 subu t0,SRCREG,t2 # stop at t0 245 subu SIZEREG,SIZEREG,t2 2461: 247 lw t3,-4(SRCREG) 248 subu SRCREG,4 249 sw t3,-4(DSTREG) 250 bne SRCREG,t0,1b 251 subu DSTREG,4 252 2533: # bytecopy 254 beq SIZEREG,zero,4f # nothing left to do? 255 nop 2561: 257 lb t3,-1(SRCREG) 258 subu SRCREG,1 259 sb t3,-1(DSTREG) 260 subu SIZEREG,1 261 bgtz SIZEREG,1b 262 subu DSTREG,1 263 2644: # copydone 265 j ra 266 nop 267 268 /* 269 * Copy from unaligned source to aligned dest. 270 */ 2715: # destaligned 272 andi t0,SIZEREG,3 # t0 = bytecount mod 4 273 subu a3,SIZEREG,t0 # number of words to transfer 274 beq a3,zero,3b 275 nop 276 move SIZEREG,t0 # this many to do after we are done 277 subu a3,SRCREG,a3 # stop point 278 2791: 280 LWHI t3,-4(SRCREG) 281 LWLO t3,-1(SRCREG) 282 subu SRCREG,4 283 sw t3,-4(DSTREG) 284 bne SRCREG,a3,1b 285 subu DSTREG,4 286 287 j 3b 288 nop 289 290 .set reorder 291 .set at 292 END(FUNCTION) 293