1/* $NetBSD: bcopy.S,v 1.2 2005/12/27 11:23:53 tsutsui Exp $ */ 2 3/* 4 * Mach Operating System 5 * Copyright (c) 1993 Carnegie Mellon University 6 * All Rights Reserved. 7 * 8 * Permission to use, copy, modify and distribute this software and its 9 * documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 16 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie Mellon 26 * the rights to redistribute these changes. 27 */ 28 29/* 30 * File: mips_bcopy.s 31 * Author: Chris Maeda 32 * Date: June 1993 33 * 34 * Fast copy routine. Derived from aligned_block_copy. 35 */ 36 37 38#include <mips/asm.h> 39#ifndef _LOCORE 40#define _LOCORE /* XXX not really, just assembly-code source */ 41#endif 42#include <machine/endian.h> 43 44 45#if defined(LIBC_SCCS) && !defined(lint) 46 ASMSTR("from: @(#)mips_bcopy.s 2.2 CMU 18/06/93") 47 ASMSTR("$NetBSD: bcopy.S,v 1.2 2005/12/27 11:23:53 tsutsui Exp $") 48#endif /* LIBC_SCCS and not lint */ 49 50#ifdef __ABICALLS__ 51 .abicalls 52#endif 53 54/* 55 * bcopy(caddr_t src, caddr_t dst, unsigned int len) 56 * 57 * a0 src address 58 * a1 dst address 59 * a2 length 60 */ 61 62#if defined(MEMCOPY) || defined(MEMMOVE) 63#ifdef MEMCOPY 64#define FUNCTION memcpy 65#else 66#define FUNCTION memmove 67#endif 68#define SRCREG a1 69#define DSTREG a0 70#else 71#define FUNCTION bcopy 72#define SRCREG a0 73#define DSTREG a1 74#endif 75 76#define SIZEREG a2 77 78LEAF(FUNCTION) 79 .set noat 80 .set noreorder 81 82#if defined(MEMCOPY) || defined(MEMMOVE) 83 /* set up return value, while we still can */ 84 move v0,DSTREG 85#endif 86 /* 87 * Make sure we can copy forwards. 88 */ 89 sltu t0,SRCREG,DSTREG # t0 == SRCREG < DSTREG 90 bne t0,zero,6f # copy backwards 91 92 /* 93 * There are four alignment cases (with frequency) 94 * (Based on measurements taken with a DECstation 5000/200 95 * inside a Mach kernel.) 96 * 97 * aligned -> aligned (mostly) 98 * unaligned -> aligned (sometimes) 99 * aligned,unaligned -> unaligned (almost never) 100 * 101 * Note that we could add another case that checks if 102 * the destination and source are unaligned but the 103 * copy is alignable. eg if src and dest are both 104 * on a halfword boundary. 105 */ 106 andi t1,DSTREG,3 # get last 3 bits of dest 107 bne t1,zero,3f 108 andi t0,SRCREG,3 # get last 3 bits of src 109 bne t0,zero,5f 110 111 /* 112 * Forward aligned->aligned copy, 8*4 bytes at a time. 113 */ 114 li AT,-32 115 and t0,SIZEREG,AT # count truncated to multiple of 32 */ 116 addu a3,SRCREG,t0 # run fast loop up to this address 117 sltu AT,SRCREG,a3 # any work to do? 118 beq AT,zero,2f 119 subu SIZEREG,t0 120 121 /* 122 * loop body 123 */ 1241: # cp 125 lw t3,0(SRCREG) 126 lw v1,4(SRCREG) 127 lw t0,8(SRCREG) 128 lw t1,12(SRCREG) 129 addu SRCREG,32 130 sw t3,0(DSTREG) 131 sw v1,4(DSTREG) 132 sw t0,8(DSTREG) 133 sw t1,12(DSTREG) 134 lw t1,-4(SRCREG) 135 lw t0,-8(SRCREG) 136 lw v1,-12(SRCREG) 137 lw t3,-16(SRCREG) 138 addu DSTREG,32 139 sw t1,-4(DSTREG) 140 sw t0,-8(DSTREG) 141 sw v1,-12(DSTREG) 142 bne SRCREG,a3,1b 143 sw t3,-16(DSTREG) 144 145 /* 146 * Copy a word at a time, no loop unrolling. 147 */ 1482: # wordcopy 149 andi t2,SIZEREG,3 # get byte count / 4 150 subu t2,SIZEREG,t2 # t2 = number of words to copy * 4 151 beq t2,zero,3f 152 addu t0,SRCREG,t2 # stop at t0 153 subu SIZEREG,SIZEREG,t2 1541: 155 lw t3,0(SRCREG) 156 addu SRCREG,4 157 sw t3,0(DSTREG) 158 bne SRCREG,t0,1b 159 addu DSTREG,4 160 1613: # bytecopy 162 beq SIZEREG,zero,4f # nothing left to do? 163 nop 1641: 165 lb t3,0(SRCREG) 166 addu SRCREG,1 167 sb t3,0(DSTREG) 168 subu SIZEREG,1 169 bgtz SIZEREG,1b 170 addu DSTREG,1 171 1724: # copydone 173 j ra 174 nop 175 176 /* 177 * Copy from unaligned source to aligned dest. 178 */ 1795: # destaligned 180 andi t0,SIZEREG,3 # t0 = bytecount mod 4 181 subu a3,SIZEREG,t0 # number of words to transfer 182 beq a3,zero,3b 183 nop 184 move SIZEREG,t0 # this many to do after we are done 185 addu a3,SRCREG,a3 # stop point 186 1871: 188 LWHI t3,0(SRCREG) 189 LWLO t3,3(SRCREG) 190 addi SRCREG,4 191 sw t3,0(DSTREG) 192 bne SRCREG,a3,1b 193 addi DSTREG,4 194 195 j 3b 196 nop 197 1986: # backcopy -- based on above 199 addu SRCREG,SIZEREG 200 addu DSTREG,SIZEREG 201 andi t1,DSTREG,3 # get last 3 bits of dest 202 bne t1,zero,3f 203 andi t0,SRCREG,3 # get last 3 bits of src 204 bne t0,zero,5f 205 206 /* 207 * Forward aligned->aligned copy, 8*4 bytes at a time. 208 */ 209 li AT,-32 210 and t0,SIZEREG,AT # count truncated to multiple of 32 211 beq t0,zero,2f # any work to do? 212 subu SIZEREG,t0 213 subu a3,SRCREG,t0 214 215 /* 216 * loop body 217 */ 2181: # cp 219 lw t3,-16(SRCREG) 220 lw v1,-12(SRCREG) 221 lw t0,-8(SRCREG) 222 lw t1,-4(SRCREG) 223 subu SRCREG,32 224 sw t3,-16(DSTREG) 225 sw v1,-12(DSTREG) 226 sw t0,-8(DSTREG) 227 sw t1,-4(DSTREG) 228 lw t1,12(SRCREG) 229 lw t0,8(SRCREG) 230 lw v1,4(SRCREG) 231 lw t3,0(SRCREG) 232 subu DSTREG,32 233 sw t1,12(DSTREG) 234 sw t0,8(DSTREG) 235 sw v1,4(DSTREG) 236 bne SRCREG,a3,1b 237 sw t3,0(DSTREG) 238 239 /* 240 * Copy a word at a time, no loop unrolling. 241 */ 2422: # wordcopy 243 andi t2,SIZEREG,3 # get byte count / 4 244 subu t2,SIZEREG,t2 # t2 = number of words to copy * 4 245 beq t2,zero,3f 246 subu t0,SRCREG,t2 # stop at t0 247 subu SIZEREG,SIZEREG,t2 2481: 249 lw t3,-4(SRCREG) 250 subu SRCREG,4 251 sw t3,-4(DSTREG) 252 bne SRCREG,t0,1b 253 subu DSTREG,4 254 2553: # bytecopy 256 beq SIZEREG,zero,4f # nothing left to do? 257 nop 2581: 259 lb t3,-1(SRCREG) 260 subu SRCREG,1 261 sb t3,-1(DSTREG) 262 subu SIZEREG,1 263 bgtz SIZEREG,1b 264 subu DSTREG,1 265 2664: # copydone 267 j ra 268 nop 269 270 /* 271 * Copy from unaligned source to aligned dest. 272 */ 2735: # destaligned 274 andi t0,SIZEREG,3 # t0 = bytecount mod 4 275 subu a3,SIZEREG,t0 # number of words to transfer 276 beq a3,zero,3b 277 nop 278 move SIZEREG,t0 # this many to do after we are done 279 subu a3,SRCREG,a3 # stop point 280 2811: 282 LWHI t3,-4(SRCREG) 283 LWLO t3,-1(SRCREG) 284 subu SRCREG,4 285 sw t3,-4(DSTREG) 286 bne SRCREG,a3,1b 287 subu DSTREG,4 288 289 j 3b 290 nop 291 292 .set reorder 293 .set at 294 END(FUNCTION) 295