1*4c44c335Sbouyer/* $NetBSD: bcopy.S,v 1.4 2011/08/27 13:23:52 bouyer Exp $ */ 2734fc1f9Schristos 3734fc1f9Schristos/* 4734fc1f9Schristos * Mach Operating System 5734fc1f9Schristos * Copyright (c) 1993 Carnegie Mellon University 6734fc1f9Schristos * All Rights Reserved. 7734fc1f9Schristos * 8734fc1f9Schristos * Permission to use, copy, modify and distribute this software and its 9734fc1f9Schristos * documentation is hereby granted, provided that both the copyright 10734fc1f9Schristos * notice and this permission notice appear in all copies of the 11734fc1f9Schristos * software, derivative works or modified versions, and any portions 12734fc1f9Schristos * thereof, and that both notices appear in supporting documentation. 13734fc1f9Schristos * 14734fc1f9Schristos * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 15734fc1f9Schristos * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 16734fc1f9Schristos * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17734fc1f9Schristos * 18734fc1f9Schristos * Carnegie Mellon requests users of this software to return to 19734fc1f9Schristos * 20734fc1f9Schristos * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21734fc1f9Schristos * School of Computer Science 22734fc1f9Schristos * Carnegie Mellon University 23734fc1f9Schristos * Pittsburgh PA 15213-3890 24734fc1f9Schristos * 25734fc1f9Schristos * any improvements or extensions that they make and grant Carnegie Mellon 26734fc1f9Schristos * the rights to redistribute these changes. 27734fc1f9Schristos */ 28734fc1f9Schristos 29734fc1f9Schristos/* 30734fc1f9Schristos * File: mips_bcopy.s 31734fc1f9Schristos * Author: Chris Maeda 32734fc1f9Schristos * Date: June 1993 33734fc1f9Schristos * 34734fc1f9Schristos * Fast copy routine. Derived from aligned_block_copy. 35734fc1f9Schristos */ 36734fc1f9Schristos 37734fc1f9Schristos 38734fc1f9Schristos#include <mips/asm.h> 39d5f01723Stsutsui#ifndef _LOCORE 40734fc1f9Schristos#define _LOCORE /* XXX not really, just assembly-code source */ 41d5f01723Stsutsui#endif 42734fc1f9Schristos#include <machine/endian.h> 43734fc1f9Schristos 44734fc1f9Schristos 45734fc1f9Schristos#if defined(LIBC_SCCS) && !defined(lint) 468daf714eSmatt#if 0 478daf714eSmatt RCSID("from: @(#)mips_bcopy.s 2.2 CMU 18/06/93") 488daf714eSmatt#else 49*4c44c335Sbouyer RCSID("$NetBSD: bcopy.S,v 1.4 2011/08/27 13:23:52 bouyer Exp $") 50734fc1f9Schristos#endif 518daf714eSmatt#endif /* LIBC_SCCS and not lint */ 52734fc1f9Schristos 53734fc1f9Schristos/* 54734fc1f9Schristos * bcopy(caddr_t src, caddr_t dst, unsigned int len) 55734fc1f9Schristos * 56734fc1f9Schristos * a0 src address 57734fc1f9Schristos * a1 dst address 58734fc1f9Schristos * a2 length 59734fc1f9Schristos */ 60734fc1f9Schristos 61734fc1f9Schristos#if defined(MEMCOPY) || defined(MEMMOVE) 62734fc1f9Schristos#ifdef MEMCOPY 63734fc1f9Schristos#define FUNCTION memcpy 64734fc1f9Schristos#else 65734fc1f9Schristos#define FUNCTION memmove 66734fc1f9Schristos#endif 67734fc1f9Schristos#define SRCREG a1 68734fc1f9Schristos#define DSTREG a0 69734fc1f9Schristos#else 70734fc1f9Schristos#define FUNCTION bcopy 71734fc1f9Schristos#define SRCREG a0 72734fc1f9Schristos#define DSTREG a1 73734fc1f9Schristos#endif 74734fc1f9Schristos 75734fc1f9Schristos#define SIZEREG a2 76734fc1f9Schristos 77734fc1f9SchristosLEAF(FUNCTION) 78734fc1f9Schristos .set noat 79734fc1f9Schristos .set noreorder 80734fc1f9Schristos 81734fc1f9Schristos#if defined(MEMCOPY) || defined(MEMMOVE) 82734fc1f9Schristos /* set up return value, while we still can */ 83734fc1f9Schristos move v0,DSTREG 84734fc1f9Schristos#endif 85734fc1f9Schristos /* 86734fc1f9Schristos * Make sure we can copy forwards. 87734fc1f9Schristos */ 88734fc1f9Schristos sltu t0,SRCREG,DSTREG # t0 == SRCREG < DSTREG 89734fc1f9Schristos bne t0,zero,6f # copy backwards 90734fc1f9Schristos 91734fc1f9Schristos /* 92734fc1f9Schristos * There are four alignment cases (with frequency) 93734fc1f9Schristos * (Based on measurements taken with a DECstation 5000/200 94734fc1f9Schristos * inside a Mach kernel.) 95734fc1f9Schristos * 96734fc1f9Schristos * aligned -> aligned (mostly) 97734fc1f9Schristos * unaligned -> aligned (sometimes) 98734fc1f9Schristos * aligned,unaligned -> unaligned (almost never) 99734fc1f9Schristos * 100734fc1f9Schristos * Note that we could add another case that checks if 101734fc1f9Schristos * the destination and source are unaligned but the 102734fc1f9Schristos * copy is alignable. eg if src and dest are both 103734fc1f9Schristos * on a halfword boundary. 104734fc1f9Schristos */ 1058daf714eSmatt andi t1,DSTREG,(SZREG-1) # get last bits of dest 1068daf714eSmatt bne t1,zero,3f # dest unaligned 1078daf714eSmatt andi t0,SRCREG,(SZREG-1) # get last bits of src 108734fc1f9Schristos bne t0,zero,5f 109734fc1f9Schristos 110734fc1f9Schristos /* 1118daf714eSmatt * Forward aligned->aligned copy, 8 words at a time. 112734fc1f9Schristos */ 1138daf714eSmatt98: 1148daf714eSmatt li AT,-(SZREG*8) 1158daf714eSmatt and t0,SIZEREG,AT # count truncated to multiples 1168daf714eSmatt PTR_ADDU a3,SRCREG,t0 # run fast loop up to this addr 117734fc1f9Schristos sltu AT,SRCREG,a3 # any work to do? 118734fc1f9Schristos beq AT,zero,2f 1198daf714eSmatt PTR_SUBU SIZEREG,t0 120734fc1f9Schristos 121734fc1f9Schristos /* 122734fc1f9Schristos * loop body 123734fc1f9Schristos */ 124734fc1f9Schristos1: # cp 1258daf714eSmatt REG_L t3,(0*SZREG)(SRCREG) 1268daf714eSmatt REG_L v1,(1*SZREG)(SRCREG) 1278daf714eSmatt REG_L t0,(2*SZREG)(SRCREG) 1288daf714eSmatt REG_L t1,(3*SZREG)(SRCREG) 1298daf714eSmatt PTR_ADDU SRCREG,SZREG*8 1308daf714eSmatt REG_S t3,(0*SZREG)(DSTREG) 1318daf714eSmatt REG_S v1,(1*SZREG)(DSTREG) 1328daf714eSmatt REG_S t0,(2*SZREG)(DSTREG) 1338daf714eSmatt REG_S t1,(3*SZREG)(DSTREG) 1348daf714eSmatt REG_L t1,(-1*SZREG)(SRCREG) 1358daf714eSmatt REG_L t0,(-2*SZREG)(SRCREG) 1368daf714eSmatt REG_L v1,(-3*SZREG)(SRCREG) 1378daf714eSmatt REG_L t3,(-4*SZREG)(SRCREG) 1388daf714eSmatt PTR_ADDU DSTREG,SZREG*8 1398daf714eSmatt REG_S t1,(-1*SZREG)(DSTREG) 1408daf714eSmatt REG_S t0,(-2*SZREG)(DSTREG) 1418daf714eSmatt REG_S v1,(-3*SZREG)(DSTREG) 142734fc1f9Schristos bne SRCREG,a3,1b 1438daf714eSmatt REG_S t3,(-4*SZREG)(DSTREG) 144734fc1f9Schristos 145734fc1f9Schristos /* 146734fc1f9Schristos * Copy a word at a time, no loop unrolling. 147734fc1f9Schristos */ 148734fc1f9Schristos2: # wordcopy 1498daf714eSmatt andi t2,SIZEREG,(SZREG-1) # get byte count / SZREG 1508daf714eSmatt PTR_SUBU t2,SIZEREG,t2 # t2 = words to copy * SZREG 151734fc1f9Schristos beq t2,zero,3f 1528daf714eSmatt PTR_ADDU t0,SRCREG,t2 # stop at t0 1538daf714eSmatt PTR_SUBU SIZEREG,SIZEREG,t2 154734fc1f9Schristos1: 1558daf714eSmatt REG_L t3,0(SRCREG) 1568daf714eSmatt PTR_ADDU SRCREG,SZREG 1578daf714eSmatt REG_S t3,0(DSTREG) 158734fc1f9Schristos bne SRCREG,t0,1b 1598daf714eSmatt PTR_ADDU DSTREG,SZREG 160734fc1f9Schristos 161734fc1f9Schristos3: # bytecopy 162734fc1f9Schristos beq SIZEREG,zero,4f # nothing left to do? 163734fc1f9Schristos nop 164734fc1f9Schristos1: 165734fc1f9Schristos lb t3,0(SRCREG) 1668daf714eSmatt PTR_ADDU SRCREG,1 167734fc1f9Schristos sb t3,0(DSTREG) 1688daf714eSmatt PTR_SUBU SIZEREG,1 169734fc1f9Schristos bgtz SIZEREG,1b 1708daf714eSmatt PTR_ADDU DSTREG,1 171734fc1f9Schristos 172734fc1f9Schristos4: # copydone 173*4c44c335Sbouyer .set at #-mfix-loongson2f-btb 174734fc1f9Schristos j ra 175734fc1f9Schristos nop 176*4c44c335Sbouyer .set noat 177734fc1f9Schristos 178734fc1f9Schristos /* 179734fc1f9Schristos * Copy from unaligned source to aligned dest. 180734fc1f9Schristos */ 181734fc1f9Schristos5: # destaligned 1828daf714eSmatt andi t0,SIZEREG,(SZREG-1) # t0 = bytecount mod SZREG 1838daf714eSmatt PTR_SUBU a3,SIZEREG,t0 # number of words to transfer 184734fc1f9Schristos beq a3,zero,3b 185734fc1f9Schristos nop 186734fc1f9Schristos move SIZEREG,t0 # this many to do after we are done 1878daf714eSmatt PTR_ADDU a3,SRCREG,a3 # stop point 188734fc1f9Schristos 189734fc1f9Schristos1: 1908daf714eSmatt REG_LHI t3,0(SRCREG) 1918daf714eSmatt REG_LLO t3,SZREG-1(SRCREG) 1928daf714eSmatt PTR_ADDI SRCREG,SZREG 1938daf714eSmatt REG_S t3,0(DSTREG) 194734fc1f9Schristos bne SRCREG,a3,1b 1958daf714eSmatt PTR_ADDI DSTREG,SZREG 196734fc1f9Schristos 1978daf714eSmatt b 3b 198734fc1f9Schristos nop 199734fc1f9Schristos 200734fc1f9Schristos6: # backcopy -- based on above 2018daf714eSmatt PTR_ADDU SRCREG,SIZEREG 2028daf714eSmatt PTR_ADDU DSTREG,SIZEREG 2038daf714eSmatt andi t1,DSTREG,SZREG-1 # get last 3 bits of dest 204734fc1f9Schristos bne t1,zero,3f 2058daf714eSmatt andi t0,SRCREG,SZREG-1 # get last 3 bits of src 206734fc1f9Schristos bne t0,zero,5f 207734fc1f9Schristos 208734fc1f9Schristos /* 209734fc1f9Schristos * Forward aligned->aligned copy, 8*4 bytes at a time. 210734fc1f9Schristos */ 2118daf714eSmatt li AT,(-8*SZREG) 212734fc1f9Schristos and t0,SIZEREG,AT # count truncated to multiple of 32 213734fc1f9Schristos beq t0,zero,2f # any work to do? 2148daf714eSmatt PTR_SUBU SIZEREG,t0 2158daf714eSmatt PTR_SUBU a3,SRCREG,t0 216734fc1f9Schristos 217734fc1f9Schristos /* 218734fc1f9Schristos * loop body 219734fc1f9Schristos */ 220734fc1f9Schristos1: # cp 2218daf714eSmatt REG_L t3,(-4*SZREG)(SRCREG) 2228daf714eSmatt REG_L v1,(-3*SZREG)(SRCREG) 2238daf714eSmatt REG_L t0,(-2*SZREG)(SRCREG) 2248daf714eSmatt REG_L t1,(-1*SZREG)(SRCREG) 2258daf714eSmatt PTR_SUBU SRCREG,8*SZREG 2268daf714eSmatt REG_S t3,(-4*SZREG)(DSTREG) 2278daf714eSmatt REG_S v1,(-3*SZREG)(DSTREG) 2288daf714eSmatt REG_S t0,(-2*SZREG)(DSTREG) 2298daf714eSmatt REG_S t1,(-1*SZREG)(DSTREG) 2308daf714eSmatt REG_L t1,(3*SZREG)(SRCREG) 2318daf714eSmatt REG_L t0,(2*SZREG)(SRCREG) 2328daf714eSmatt REG_L v1,(1*SZREG)(SRCREG) 2338daf714eSmatt REG_L t3,(0*SZREG)(SRCREG) 2348daf714eSmatt PTR_SUBU DSTREG,8*SZREG 2358daf714eSmatt REG_S t1,(3*SZREG)(DSTREG) 2368daf714eSmatt REG_S t0,(2*SZREG)(DSTREG) 2378daf714eSmatt REG_S v1,(1*SZREG)(DSTREG) 238734fc1f9Schristos bne SRCREG,a3,1b 2398daf714eSmatt REG_S t3,(0*SZREG)(DSTREG) 240734fc1f9Schristos 241734fc1f9Schristos /* 242734fc1f9Schristos * Copy a word at a time, no loop unrolling. 243734fc1f9Schristos */ 244734fc1f9Schristos2: # wordcopy 2458daf714eSmatt andi t2,SIZEREG,SZREG-1 # get byte count / 4 2468daf714eSmatt PTR_SUBU t2,SIZEREG,t2 # t2 = number of words to copy 247734fc1f9Schristos beq t2,zero,3f 2488daf714eSmatt PTR_SUBU t0,SRCREG,t2 # stop at t0 2498daf714eSmatt PTR_SUBU SIZEREG,SIZEREG,t2 250734fc1f9Schristos1: 2518daf714eSmatt REG_L t3,-SZREG(SRCREG) 2528daf714eSmatt PTR_SUBU SRCREG,SZREG 2538daf714eSmatt REG_S t3,-SZREG(DSTREG) 254734fc1f9Schristos bne SRCREG,t0,1b 2558daf714eSmatt PTR_SUBU DSTREG,SZREG 256734fc1f9Schristos 257734fc1f9Schristos3: # bytecopy 258734fc1f9Schristos beq SIZEREG,zero,4f # nothing left to do? 259734fc1f9Schristos nop 260734fc1f9Schristos1: 261734fc1f9Schristos lb t3,-1(SRCREG) 2628daf714eSmatt PTR_SUBU SRCREG,1 263734fc1f9Schristos sb t3,-1(DSTREG) 2648daf714eSmatt PTR_SUBU SIZEREG,1 265734fc1f9Schristos bgtz SIZEREG,1b 2668daf714eSmatt PTR_SUBU DSTREG,1 267734fc1f9Schristos 268734fc1f9Schristos4: # copydone 269*4c44c335Sbouyer .set at #-mfix-loongson2f-btb 270734fc1f9Schristos j ra 271734fc1f9Schristos nop 272*4c44c335Sbouyer .set noat 273734fc1f9Schristos 274734fc1f9Schristos /* 275734fc1f9Schristos * Copy from unaligned source to aligned dest. 276734fc1f9Schristos */ 277734fc1f9Schristos5: # destaligned 2788daf714eSmatt andi t0,SIZEREG,SZREG-1 # t0 = bytecount mod 4 2798daf714eSmatt PTR_SUBU a3,SIZEREG,t0 # number of words to transfer 280734fc1f9Schristos beq a3,zero,3b 281734fc1f9Schristos nop 282734fc1f9Schristos move SIZEREG,t0 # this many to do after we are done 2838daf714eSmatt PTR_SUBU a3,SRCREG,a3 # stop point 284734fc1f9Schristos 285734fc1f9Schristos1: 2868daf714eSmatt REG_LHI t3,-SZREG(SRCREG) 2878daf714eSmatt REG_LLO t3,-1(SRCREG) 2888daf714eSmatt PTR_SUBU SRCREG,SZREG 2898daf714eSmatt REG_S t3,-SZREG(DSTREG) 290734fc1f9Schristos bne SRCREG,a3,1b 2918daf714eSmatt PTR_SUBU DSTREG,SZREG 292734fc1f9Schristos 2938daf714eSmatt b 3b 294734fc1f9Schristos nop 295734fc1f9Schristos 296734fc1f9Schristos .set reorder 297734fc1f9Schristos .set at 298734fc1f9Schristos END(FUNCTION) 299