xref: /openbsd-src/lib/libc/arch/alpha/string/memmove.S (revision 9b9d2a55a62c8e82206c25f94fcc7f4e2765250e)
1*9b9d2a55Sguenther/*	$OpenBSD: memmove.S,v 1.6 2015/08/31 02:53:56 guenther Exp $	*/
25b859c19Sderaadt/*	$NetBSD: bcopy.S,v 1.3 1996/10/17 03:08:11 cgd Exp $	*/
339bae441Sniklas
45b859c19Sderaadt/*
55b859c19Sderaadt * Copyright (c) 1995 Carnegie-Mellon University.
65b859c19Sderaadt * All rights reserved.
75b859c19Sderaadt *
85b859c19Sderaadt * Author: Trevor Blackwell.  Support for use as memcpy() and memmove()
95b859c19Sderaadt *	   added by Chris Demetriou.
105b859c19Sderaadt *
115b859c19Sderaadt * Permission to use, copy, modify and distribute this software and
125b859c19Sderaadt * its documentation is hereby granted, provided that both the copyright
135b859c19Sderaadt * notice and this permission notice appear in all copies of the
145b859c19Sderaadt * software, derivative works or modified versions, and any portions
155b859c19Sderaadt * thereof, and that both notices appear in supporting documentation.
165b859c19Sderaadt *
175b859c19Sderaadt * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
185b859c19Sderaadt * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
195b859c19Sderaadt * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
205b859c19Sderaadt *
215b859c19Sderaadt * Carnegie Mellon requests users of this software to return to
225b859c19Sderaadt *
235b859c19Sderaadt *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
245b859c19Sderaadt *  School of Computer Science
255b859c19Sderaadt *  Carnegie Mellon University
265b859c19Sderaadt *  Pittsburgh PA 15213-3890
275b859c19Sderaadt *
285b859c19Sderaadt * any improvements or extensions that they make and grant Carnegie the
295b859c19Sderaadt * rights to redistribute these changes.
305b859c19Sderaadt */
315b859c19Sderaadt
32*9b9d2a55Sguenther#include "SYS.h"
335b859c19Sderaadt
345b859c19Sderaadt#define	SRCREG		a1
355b859c19Sderaadt#define	DSTREG		a0
365b859c19Sderaadt
375b859c19Sderaadt#define	SIZEREG		a2
385b859c19Sderaadt
395b859c19Sderaadt/*
405b859c19Sderaadt * Copy bytes.
415b859c19Sderaadt *
425b859c19Sderaadt * char *memmove(void *to, const void *from, size_t len);
435b859c19Sderaadt *
445b859c19Sderaadt * No matter how invoked, the source and destination registers
455b859c19Sderaadt * for calculation.  There's no point in copying them to "working"
465b859c19Sderaadt * registers, since the code uses their values "in place," and
475b859c19Sderaadt * copying them would be slower.
485b859c19Sderaadt */
495b859c19Sderaadt
505b859c19SderaadtLEAF(memmove,3)
515b859c19Sderaadt	/* set up return value, while we still can */
525b859c19Sderaadt	mov	DSTREG,v0
535b859c19Sderaadt
545b859c19Sderaadt	/* Check for zero length */
555b859c19Sderaadt	beq	SIZEREG,bcopy_done
565b859c19Sderaadt
575b859c19Sderaadt	/* Check for overlap */
585b859c19Sderaadt	subq	DSTREG,SRCREG,t5
595b859c19Sderaadt	cmpult	t5,SIZEREG,t5
605b859c19Sderaadt	bne	t5,bcopy_overlap
615b859c19Sderaadt
625b859c19Sderaadt	/* a3 = end address */
635b859c19Sderaadt	addq	SRCREG,SIZEREG,a3
645b859c19Sderaadt
655b859c19Sderaadt	/* Get the first word */
665b859c19Sderaadt	ldq_u	t2,0(SRCREG)
675b859c19Sderaadt
685b859c19Sderaadt	/* Do they have the same alignment? */
695b859c19Sderaadt	xor	SRCREG,DSTREG,t0
705b859c19Sderaadt	and	t0,7,t0
715b859c19Sderaadt	and	DSTREG,7,t1
725b859c19Sderaadt	bne	t0,bcopy_different_alignment
735b859c19Sderaadt
745b859c19Sderaadt	/* src & dst have same alignment */
755b859c19Sderaadt	beq	t1,bcopy_all_aligned
765b859c19Sderaadt
775b859c19Sderaadt	ldq_u	t3,0(DSTREG)
785b859c19Sderaadt	addq	SIZEREG,t1,SIZEREG
795b859c19Sderaadt	mskqh	t2,SRCREG,t2
805b859c19Sderaadt	mskql	t3,SRCREG,t3
815b859c19Sderaadt	or	t2,t3,t2
825b859c19Sderaadt
835b859c19Sderaadt	/* Dst is 8-byte aligned */
845b859c19Sderaadt
855b859c19Sderaadtbcopy_all_aligned:
865b859c19Sderaadt	/* If less than 8 bytes,skip loop */
875b859c19Sderaadt	subq	SIZEREG,1,t0
885b859c19Sderaadt	and	SIZEREG,7,SIZEREG
895b859c19Sderaadt	bic	t0,7,t0
905b859c19Sderaadt	beq	t0,bcopy_samealign_lp_end
915b859c19Sderaadt
925b859c19Sderaadtbcopy_samealign_lp:
935b859c19Sderaadt	stq_u	t2,0(DSTREG)
945b859c19Sderaadt	addq	DSTREG,8,DSTREG
955b859c19Sderaadt	ldq_u	t2,8(SRCREG)
965b859c19Sderaadt	subq	t0,8,t0
975b859c19Sderaadt	addq	SRCREG,8,SRCREG
985b859c19Sderaadt	bne	t0,bcopy_samealign_lp
995b859c19Sderaadt
1005b859c19Sderaadtbcopy_samealign_lp_end:
1015b859c19Sderaadt	/* If we're done, exit */
1025b859c19Sderaadt	bne	SIZEREG,bcopy_small_left
1035b859c19Sderaadt	stq_u	t2,0(DSTREG)
1045b859c19Sderaadt	RET
1055b859c19Sderaadt
1065b859c19Sderaadtbcopy_small_left:
1075b859c19Sderaadt	mskql	t2,SIZEREG,t4
1085b859c19Sderaadt	ldq_u	t3,0(DSTREG)
1095b859c19Sderaadt	mskqh	t3,SIZEREG,t3
1105b859c19Sderaadt	or	t4,t3,t4
1115b859c19Sderaadt	stq_u	t4,0(DSTREG)
1125b859c19Sderaadt	RET
1135b859c19Sderaadt
1145b859c19Sderaadtbcopy_different_alignment:
1155b859c19Sderaadt	/*
1165b859c19Sderaadt	 * this is the fun part
1175b859c19Sderaadt	 */
1185b859c19Sderaadt	addq	SRCREG,SIZEREG,a3
1195b859c19Sderaadt	cmpule	SIZEREG,8,t0
1205b859c19Sderaadt	bne	t0,bcopy_da_finish
1215b859c19Sderaadt
1225b859c19Sderaadt	beq	t1,bcopy_da_noentry
1235b859c19Sderaadt
1245b859c19Sderaadt	/* Do the initial partial word */
1255b859c19Sderaadt	subq	zero,DSTREG,t0
1265b859c19Sderaadt	and	t0,7,t0
1275b859c19Sderaadt	ldq_u	t3,7(SRCREG)
1285b859c19Sderaadt	extql	t2,SRCREG,t2
1295b859c19Sderaadt	extqh	t3,SRCREG,t3
1305b859c19Sderaadt	or	t2,t3,t5
1315b859c19Sderaadt	insql	t5,DSTREG,t5
1325b859c19Sderaadt	ldq_u	t6,0(DSTREG)
1335b859c19Sderaadt	mskql	t6,DSTREG,t6
1345b859c19Sderaadt	or	t5,t6,t5
1355b859c19Sderaadt	stq_u	t5,0(DSTREG)
1365b859c19Sderaadt	addq	SRCREG,t0,SRCREG
1375b859c19Sderaadt	addq	DSTREG,t0,DSTREG
1385b859c19Sderaadt	subq	SIZEREG,t0,SIZEREG
1395b859c19Sderaadt	ldq_u	t2,0(SRCREG)
1405b859c19Sderaadt
1415b859c19Sderaadtbcopy_da_noentry:
1425b859c19Sderaadt	subq	SIZEREG,1,t0
1435b859c19Sderaadt	bic	t0,7,t0
1445b859c19Sderaadt	and	SIZEREG,7,SIZEREG
1455b859c19Sderaadt	beq	t0,bcopy_da_finish2
1465b859c19Sderaadt
1475b859c19Sderaadtbcopy_da_lp:
1485b859c19Sderaadt	ldq_u	t3,7(SRCREG)
1495b859c19Sderaadt	addq	SRCREG,8,SRCREG
1505b859c19Sderaadt	extql	t2,SRCREG,t4
1515b859c19Sderaadt	extqh	t3,SRCREG,t5
1525b859c19Sderaadt	subq	t0,8,t0
1535b859c19Sderaadt	or	t4,t5,t5
1545b859c19Sderaadt	stq	t5,0(DSTREG)
1555b859c19Sderaadt	addq	DSTREG,8,DSTREG
1565b859c19Sderaadt	beq	t0,bcopy_da_finish1
1575b859c19Sderaadt	ldq_u	t2,7(SRCREG)
1585b859c19Sderaadt	addq	SRCREG,8,SRCREG
1595b859c19Sderaadt	extql	t3,SRCREG,t4
1605b859c19Sderaadt	extqh	t2,SRCREG,t5
1615b859c19Sderaadt	subq	t0,8,t0
1625b859c19Sderaadt	or	t4,t5,t5
1635b859c19Sderaadt	stq	t5,0(DSTREG)
1645b859c19Sderaadt	addq	DSTREG,8,DSTREG
1655b859c19Sderaadt	bne	t0,bcopy_da_lp
1665b859c19Sderaadt
1675b859c19Sderaadtbcopy_da_finish2:
1685b859c19Sderaadt	/* Do the last new word */
1695b859c19Sderaadt	mov	t2,t3
1705b859c19Sderaadt
1715b859c19Sderaadtbcopy_da_finish1:
1725b859c19Sderaadt	/* Do the last partial word */
1735b859c19Sderaadt	ldq_u	t2,-1(a3)
1745b859c19Sderaadt	extql	t3,SRCREG,t3
1755b859c19Sderaadt	extqh	t2,SRCREG,t2
1765b859c19Sderaadt	or	t2,t3,t2
1775b859c19Sderaadt	br	zero,bcopy_samealign_lp_end
1785b859c19Sderaadt
1795b859c19Sderaadtbcopy_da_finish:
1805b859c19Sderaadt	/* Do the last word in the next source word */
1815b859c19Sderaadt	ldq_u	t3,-1(a3)
1825b859c19Sderaadt	extql	t2,SRCREG,t2
1835b859c19Sderaadt	extqh	t3,SRCREG,t3
1845b859c19Sderaadt	or	t2,t3,t2
1855b859c19Sderaadt	insqh	t2,DSTREG,t3
1865b859c19Sderaadt	insql	t2,DSTREG,t2
1875b859c19Sderaadt	lda	t4,-1(zero)
1885b859c19Sderaadt	mskql	t4,SIZEREG,t5
1895b859c19Sderaadt	cmovne	t5,t5,t4
1905b859c19Sderaadt	insqh	t4,DSTREG,t5
1915b859c19Sderaadt	insql	t4,DSTREG,t4
1925b859c19Sderaadt	addq	DSTREG,SIZEREG,a4
1935b859c19Sderaadt	ldq_u	t6,0(DSTREG)
1945b859c19Sderaadt	ldq_u	t7,-1(a4)
1955b859c19Sderaadt	bic	t6,t4,t6
1965b859c19Sderaadt	bic	t7,t5,t7
1975b859c19Sderaadt	and	t2,t4,t2
1985b859c19Sderaadt	and	t3,t5,t3
1995b859c19Sderaadt	or	t2,t6,t2
2005b859c19Sderaadt	or	t3,t7,t3
2015b859c19Sderaadt	stq_u	t3,-1(a4)
2025b859c19Sderaadt	stq_u	t2,0(DSTREG)
2035b859c19Sderaadt	RET
2045b859c19Sderaadt
2055b859c19Sderaadtbcopy_overlap:
2065b859c19Sderaadt	/*
2075b859c19Sderaadt	 * Basically equivalent to previous case, only backwards.
2085b859c19Sderaadt	 * Not quite as highly optimized
2095b859c19Sderaadt	 */
2105b859c19Sderaadt	addq	SRCREG,SIZEREG,a3
2115b859c19Sderaadt	addq	DSTREG,SIZEREG,a4
2125b859c19Sderaadt
2135b859c19Sderaadt	/* less than 8 bytes - don't worry about overlap */
2145b859c19Sderaadt	cmpule	SIZEREG,8,t0
2155b859c19Sderaadt	bne	t0,bcopy_ov_short
2165b859c19Sderaadt
2175b859c19Sderaadt	/* Possibly do a partial first word */
2185b859c19Sderaadt	and	a4,7,t4
2195b859c19Sderaadt	beq	t4,bcopy_ov_nostart2
2205b859c19Sderaadt	subq	a3,t4,a3
2215b859c19Sderaadt	subq	a4,t4,a4
2225b859c19Sderaadt	ldq_u	t1,0(a3)
2235b859c19Sderaadt	subq	SIZEREG,t4,SIZEREG
2245b859c19Sderaadt	ldq_u	t2,7(a3)
2255b859c19Sderaadt	ldq	t3,0(a4)
2265b859c19Sderaadt	extql	t1,a3,t1
2275b859c19Sderaadt	extqh	t2,a3,t2
2285b859c19Sderaadt	or	t1,t2,t1
2295b859c19Sderaadt	mskqh	t3,t4,t3
2305b859c19Sderaadt	mskql	t1,t4,t1
2315b859c19Sderaadt	or	t1,t3,t1
2325b859c19Sderaadt	stq	t1,0(a4)
2335b859c19Sderaadt
2345b859c19Sderaadtbcopy_ov_nostart2:
2355b859c19Sderaadt	bic	SIZEREG,7,t4
2365b859c19Sderaadt	and	SIZEREG,7,SIZEREG
2375b859c19Sderaadt	beq	t4,bcopy_ov_lp_end
2385b859c19Sderaadt
2395b859c19Sderaadtbcopy_ov_lp:
2405b859c19Sderaadt	/* This could be more pipelined, but it doesn't seem worth it */
2415b859c19Sderaadt	ldq_u	t0,-8(a3)
2425b859c19Sderaadt	subq	a4,8,a4
2435b859c19Sderaadt	ldq_u	t1,-1(a3)
2445b859c19Sderaadt	subq	a3,8,a3
2455b859c19Sderaadt	extql	t0,a3,t0
2465b859c19Sderaadt	extqh	t1,a3,t1
2475b859c19Sderaadt	subq	t4,8,t4
2485b859c19Sderaadt	or	t0,t1,t0
2495b859c19Sderaadt	stq	t0,0(a4)
2505b859c19Sderaadt	bne	t4,bcopy_ov_lp
2515b859c19Sderaadt
2525b859c19Sderaadtbcopy_ov_lp_end:
2535b859c19Sderaadt	beq	SIZEREG,bcopy_done
2545b859c19Sderaadt
2555b859c19Sderaadt	ldq_u	t0,0(SRCREG)
2565b859c19Sderaadt	ldq_u	t1,7(SRCREG)
2575b859c19Sderaadt	ldq_u	t2,0(DSTREG)
2585b859c19Sderaadt	extql	t0,SRCREG,t0
2595b859c19Sderaadt	extqh	t1,SRCREG,t1
2605b859c19Sderaadt	or	t0,t1,t0
2615b859c19Sderaadt	insql	t0,DSTREG,t0
2625b859c19Sderaadt	mskql	t2,DSTREG,t2
2635b859c19Sderaadt	or	t2,t0,t2
2645b859c19Sderaadt	stq_u	t2,0(DSTREG)
2655b859c19Sderaadt
2665b859c19Sderaadtbcopy_done:
2675b859c19Sderaadt	RET
2685b859c19Sderaadt
2695b859c19Sderaadtbcopy_ov_short:
2705b859c19Sderaadt	ldq_u	t2,0(SRCREG)
2715b859c19Sderaadt	br	zero,bcopy_da_finish
2725b859c19Sderaadt
273*9b9d2a55Sguenther	END_STRONG(memmove)
274