1*9b9d2a55Sguenther/* $OpenBSD: memmove.S,v 1.6 2015/08/31 02:53:56 guenther Exp $ */ 25b859c19Sderaadt/* $NetBSD: bcopy.S,v 1.3 1996/10/17 03:08:11 cgd Exp $ */ 339bae441Sniklas 45b859c19Sderaadt/* 55b859c19Sderaadt * Copyright (c) 1995 Carnegie-Mellon University. 65b859c19Sderaadt * All rights reserved. 75b859c19Sderaadt * 85b859c19Sderaadt * Author: Trevor Blackwell. Support for use as memcpy() and memmove() 95b859c19Sderaadt * added by Chris Demetriou. 105b859c19Sderaadt * 115b859c19Sderaadt * Permission to use, copy, modify and distribute this software and 125b859c19Sderaadt * its documentation is hereby granted, provided that both the copyright 135b859c19Sderaadt * notice and this permission notice appear in all copies of the 145b859c19Sderaadt * software, derivative works or modified versions, and any portions 155b859c19Sderaadt * thereof, and that both notices appear in supporting documentation. 165b859c19Sderaadt * 175b859c19Sderaadt * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 185b859c19Sderaadt * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 195b859c19Sderaadt * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 205b859c19Sderaadt * 215b859c19Sderaadt * Carnegie Mellon requests users of this software to return to 225b859c19Sderaadt * 235b859c19Sderaadt * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 245b859c19Sderaadt * School of Computer Science 255b859c19Sderaadt * Carnegie Mellon University 265b859c19Sderaadt * Pittsburgh PA 15213-3890 275b859c19Sderaadt * 285b859c19Sderaadt * any improvements or extensions that they make and grant Carnegie the 295b859c19Sderaadt * rights to redistribute these changes. 305b859c19Sderaadt */ 315b859c19Sderaadt 32*9b9d2a55Sguenther#include "SYS.h" 335b859c19Sderaadt 345b859c19Sderaadt#define SRCREG a1 355b859c19Sderaadt#define DSTREG a0 365b859c19Sderaadt 375b859c19Sderaadt#define SIZEREG a2 385b859c19Sderaadt 395b859c19Sderaadt/* 405b859c19Sderaadt * Copy bytes. 415b859c19Sderaadt * 425b859c19Sderaadt * char *memmove(void *to, const void *from, size_t len); 435b859c19Sderaadt * 445b859c19Sderaadt * No matter how invoked, the source and destination registers 455b859c19Sderaadt * for calculation. There's no point in copying them to "working" 465b859c19Sderaadt * registers, since the code uses their values "in place," and 475b859c19Sderaadt * copying them would be slower. 485b859c19Sderaadt */ 495b859c19Sderaadt 505b859c19SderaadtLEAF(memmove,3) 515b859c19Sderaadt /* set up return value, while we still can */ 525b859c19Sderaadt mov DSTREG,v0 535b859c19Sderaadt 545b859c19Sderaadt /* Check for zero length */ 555b859c19Sderaadt beq SIZEREG,bcopy_done 565b859c19Sderaadt 575b859c19Sderaadt /* Check for overlap */ 585b859c19Sderaadt subq DSTREG,SRCREG,t5 595b859c19Sderaadt cmpult t5,SIZEREG,t5 605b859c19Sderaadt bne t5,bcopy_overlap 615b859c19Sderaadt 625b859c19Sderaadt /* a3 = end address */ 635b859c19Sderaadt addq SRCREG,SIZEREG,a3 645b859c19Sderaadt 655b859c19Sderaadt /* Get the first word */ 665b859c19Sderaadt ldq_u t2,0(SRCREG) 675b859c19Sderaadt 685b859c19Sderaadt /* Do they have the same alignment? */ 695b859c19Sderaadt xor SRCREG,DSTREG,t0 705b859c19Sderaadt and t0,7,t0 715b859c19Sderaadt and DSTREG,7,t1 725b859c19Sderaadt bne t0,bcopy_different_alignment 735b859c19Sderaadt 745b859c19Sderaadt /* src & dst have same alignment */ 755b859c19Sderaadt beq t1,bcopy_all_aligned 765b859c19Sderaadt 775b859c19Sderaadt ldq_u t3,0(DSTREG) 785b859c19Sderaadt addq SIZEREG,t1,SIZEREG 795b859c19Sderaadt mskqh t2,SRCREG,t2 805b859c19Sderaadt mskql t3,SRCREG,t3 815b859c19Sderaadt or t2,t3,t2 825b859c19Sderaadt 835b859c19Sderaadt /* Dst is 8-byte aligned */ 845b859c19Sderaadt 855b859c19Sderaadtbcopy_all_aligned: 865b859c19Sderaadt /* If less than 8 bytes,skip loop */ 875b859c19Sderaadt subq SIZEREG,1,t0 885b859c19Sderaadt and SIZEREG,7,SIZEREG 895b859c19Sderaadt bic t0,7,t0 905b859c19Sderaadt beq t0,bcopy_samealign_lp_end 915b859c19Sderaadt 925b859c19Sderaadtbcopy_samealign_lp: 935b859c19Sderaadt stq_u t2,0(DSTREG) 945b859c19Sderaadt addq DSTREG,8,DSTREG 955b859c19Sderaadt ldq_u t2,8(SRCREG) 965b859c19Sderaadt subq t0,8,t0 975b859c19Sderaadt addq SRCREG,8,SRCREG 985b859c19Sderaadt bne t0,bcopy_samealign_lp 995b859c19Sderaadt 1005b859c19Sderaadtbcopy_samealign_lp_end: 1015b859c19Sderaadt /* If we're done, exit */ 1025b859c19Sderaadt bne SIZEREG,bcopy_small_left 1035b859c19Sderaadt stq_u t2,0(DSTREG) 1045b859c19Sderaadt RET 1055b859c19Sderaadt 1065b859c19Sderaadtbcopy_small_left: 1075b859c19Sderaadt mskql t2,SIZEREG,t4 1085b859c19Sderaadt ldq_u t3,0(DSTREG) 1095b859c19Sderaadt mskqh t3,SIZEREG,t3 1105b859c19Sderaadt or t4,t3,t4 1115b859c19Sderaadt stq_u t4,0(DSTREG) 1125b859c19Sderaadt RET 1135b859c19Sderaadt 1145b859c19Sderaadtbcopy_different_alignment: 1155b859c19Sderaadt /* 1165b859c19Sderaadt * this is the fun part 1175b859c19Sderaadt */ 1185b859c19Sderaadt addq SRCREG,SIZEREG,a3 1195b859c19Sderaadt cmpule SIZEREG,8,t0 1205b859c19Sderaadt bne t0,bcopy_da_finish 1215b859c19Sderaadt 1225b859c19Sderaadt beq t1,bcopy_da_noentry 1235b859c19Sderaadt 1245b859c19Sderaadt /* Do the initial partial word */ 1255b859c19Sderaadt subq zero,DSTREG,t0 1265b859c19Sderaadt and t0,7,t0 1275b859c19Sderaadt ldq_u t3,7(SRCREG) 1285b859c19Sderaadt extql t2,SRCREG,t2 1295b859c19Sderaadt extqh t3,SRCREG,t3 1305b859c19Sderaadt or t2,t3,t5 1315b859c19Sderaadt insql t5,DSTREG,t5 1325b859c19Sderaadt ldq_u t6,0(DSTREG) 1335b859c19Sderaadt mskql t6,DSTREG,t6 1345b859c19Sderaadt or t5,t6,t5 1355b859c19Sderaadt stq_u t5,0(DSTREG) 1365b859c19Sderaadt addq SRCREG,t0,SRCREG 1375b859c19Sderaadt addq DSTREG,t0,DSTREG 1385b859c19Sderaadt subq SIZEREG,t0,SIZEREG 1395b859c19Sderaadt ldq_u t2,0(SRCREG) 1405b859c19Sderaadt 1415b859c19Sderaadtbcopy_da_noentry: 1425b859c19Sderaadt subq SIZEREG,1,t0 1435b859c19Sderaadt bic t0,7,t0 1445b859c19Sderaadt and SIZEREG,7,SIZEREG 1455b859c19Sderaadt beq t0,bcopy_da_finish2 1465b859c19Sderaadt 1475b859c19Sderaadtbcopy_da_lp: 1485b859c19Sderaadt ldq_u t3,7(SRCREG) 1495b859c19Sderaadt addq SRCREG,8,SRCREG 1505b859c19Sderaadt extql t2,SRCREG,t4 1515b859c19Sderaadt extqh t3,SRCREG,t5 1525b859c19Sderaadt subq t0,8,t0 1535b859c19Sderaadt or t4,t5,t5 1545b859c19Sderaadt stq t5,0(DSTREG) 1555b859c19Sderaadt addq DSTREG,8,DSTREG 1565b859c19Sderaadt beq t0,bcopy_da_finish1 1575b859c19Sderaadt ldq_u t2,7(SRCREG) 1585b859c19Sderaadt addq SRCREG,8,SRCREG 1595b859c19Sderaadt extql t3,SRCREG,t4 1605b859c19Sderaadt extqh t2,SRCREG,t5 1615b859c19Sderaadt subq t0,8,t0 1625b859c19Sderaadt or t4,t5,t5 1635b859c19Sderaadt stq t5,0(DSTREG) 1645b859c19Sderaadt addq DSTREG,8,DSTREG 1655b859c19Sderaadt bne t0,bcopy_da_lp 1665b859c19Sderaadt 1675b859c19Sderaadtbcopy_da_finish2: 1685b859c19Sderaadt /* Do the last new word */ 1695b859c19Sderaadt mov t2,t3 1705b859c19Sderaadt 1715b859c19Sderaadtbcopy_da_finish1: 1725b859c19Sderaadt /* Do the last partial word */ 1735b859c19Sderaadt ldq_u t2,-1(a3) 1745b859c19Sderaadt extql t3,SRCREG,t3 1755b859c19Sderaadt extqh t2,SRCREG,t2 1765b859c19Sderaadt or t2,t3,t2 1775b859c19Sderaadt br zero,bcopy_samealign_lp_end 1785b859c19Sderaadt 1795b859c19Sderaadtbcopy_da_finish: 1805b859c19Sderaadt /* Do the last word in the next source word */ 1815b859c19Sderaadt ldq_u t3,-1(a3) 1825b859c19Sderaadt extql t2,SRCREG,t2 1835b859c19Sderaadt extqh t3,SRCREG,t3 1845b859c19Sderaadt or t2,t3,t2 1855b859c19Sderaadt insqh t2,DSTREG,t3 1865b859c19Sderaadt insql t2,DSTREG,t2 1875b859c19Sderaadt lda t4,-1(zero) 1885b859c19Sderaadt mskql t4,SIZEREG,t5 1895b859c19Sderaadt cmovne t5,t5,t4 1905b859c19Sderaadt insqh t4,DSTREG,t5 1915b859c19Sderaadt insql t4,DSTREG,t4 1925b859c19Sderaadt addq DSTREG,SIZEREG,a4 1935b859c19Sderaadt ldq_u t6,0(DSTREG) 1945b859c19Sderaadt ldq_u t7,-1(a4) 1955b859c19Sderaadt bic t6,t4,t6 1965b859c19Sderaadt bic t7,t5,t7 1975b859c19Sderaadt and t2,t4,t2 1985b859c19Sderaadt and t3,t5,t3 1995b859c19Sderaadt or t2,t6,t2 2005b859c19Sderaadt or t3,t7,t3 2015b859c19Sderaadt stq_u t3,-1(a4) 2025b859c19Sderaadt stq_u t2,0(DSTREG) 2035b859c19Sderaadt RET 2045b859c19Sderaadt 2055b859c19Sderaadtbcopy_overlap: 2065b859c19Sderaadt /* 2075b859c19Sderaadt * Basically equivalent to previous case, only backwards. 2085b859c19Sderaadt * Not quite as highly optimized 2095b859c19Sderaadt */ 2105b859c19Sderaadt addq SRCREG,SIZEREG,a3 2115b859c19Sderaadt addq DSTREG,SIZEREG,a4 2125b859c19Sderaadt 2135b859c19Sderaadt /* less than 8 bytes - don't worry about overlap */ 2145b859c19Sderaadt cmpule SIZEREG,8,t0 2155b859c19Sderaadt bne t0,bcopy_ov_short 2165b859c19Sderaadt 2175b859c19Sderaadt /* Possibly do a partial first word */ 2185b859c19Sderaadt and a4,7,t4 2195b859c19Sderaadt beq t4,bcopy_ov_nostart2 2205b859c19Sderaadt subq a3,t4,a3 2215b859c19Sderaadt subq a4,t4,a4 2225b859c19Sderaadt ldq_u t1,0(a3) 2235b859c19Sderaadt subq SIZEREG,t4,SIZEREG 2245b859c19Sderaadt ldq_u t2,7(a3) 2255b859c19Sderaadt ldq t3,0(a4) 2265b859c19Sderaadt extql t1,a3,t1 2275b859c19Sderaadt extqh t2,a3,t2 2285b859c19Sderaadt or t1,t2,t1 2295b859c19Sderaadt mskqh t3,t4,t3 2305b859c19Sderaadt mskql t1,t4,t1 2315b859c19Sderaadt or t1,t3,t1 2325b859c19Sderaadt stq t1,0(a4) 2335b859c19Sderaadt 2345b859c19Sderaadtbcopy_ov_nostart2: 2355b859c19Sderaadt bic SIZEREG,7,t4 2365b859c19Sderaadt and SIZEREG,7,SIZEREG 2375b859c19Sderaadt beq t4,bcopy_ov_lp_end 2385b859c19Sderaadt 2395b859c19Sderaadtbcopy_ov_lp: 2405b859c19Sderaadt /* This could be more pipelined, but it doesn't seem worth it */ 2415b859c19Sderaadt ldq_u t0,-8(a3) 2425b859c19Sderaadt subq a4,8,a4 2435b859c19Sderaadt ldq_u t1,-1(a3) 2445b859c19Sderaadt subq a3,8,a3 2455b859c19Sderaadt extql t0,a3,t0 2465b859c19Sderaadt extqh t1,a3,t1 2475b859c19Sderaadt subq t4,8,t4 2485b859c19Sderaadt or t0,t1,t0 2495b859c19Sderaadt stq t0,0(a4) 2505b859c19Sderaadt bne t4,bcopy_ov_lp 2515b859c19Sderaadt 2525b859c19Sderaadtbcopy_ov_lp_end: 2535b859c19Sderaadt beq SIZEREG,bcopy_done 2545b859c19Sderaadt 2555b859c19Sderaadt ldq_u t0,0(SRCREG) 2565b859c19Sderaadt ldq_u t1,7(SRCREG) 2575b859c19Sderaadt ldq_u t2,0(DSTREG) 2585b859c19Sderaadt extql t0,SRCREG,t0 2595b859c19Sderaadt extqh t1,SRCREG,t1 2605b859c19Sderaadt or t0,t1,t0 2615b859c19Sderaadt insql t0,DSTREG,t0 2625b859c19Sderaadt mskql t2,DSTREG,t2 2635b859c19Sderaadt or t2,t0,t2 2645b859c19Sderaadt stq_u t2,0(DSTREG) 2655b859c19Sderaadt 2665b859c19Sderaadtbcopy_done: 2675b859c19Sderaadt RET 2685b859c19Sderaadt 2695b859c19Sderaadtbcopy_ov_short: 2705b859c19Sderaadt ldq_u t2,0(SRCREG) 2715b859c19Sderaadt br zero,bcopy_da_finish 2725b859c19Sderaadt 273*9b9d2a55Sguenther END_STRONG(memmove) 274