xref: /openbsd-src/lib/libc/arch/alpha/string/memcpy.S (revision 9b9d2a55a62c8e82206c25f94fcc7f4e2765250e)
1*9b9d2a55Sguenther/*	$OpenBSD: memcpy.S,v 1.6 2015/08/31 02:53:56 guenther Exp $	*/
25b859c19Sderaadt/*	$NetBSD: bcopy.S,v 1.3 1996/10/17 03:08:11 cgd Exp $	*/
339bae441Sniklas
45b859c19Sderaadt/*
55b859c19Sderaadt * Copyright (c) 1995 Carnegie-Mellon University.
65b859c19Sderaadt * All rights reserved.
75b859c19Sderaadt *
85b859c19Sderaadt * Author: Trevor Blackwell.  Support for use as memcpy() and memmove()
95b859c19Sderaadt *	   added by Chris Demetriou.
105b859c19Sderaadt *
115b859c19Sderaadt * Permission to use, copy, modify and distribute this software and
125b859c19Sderaadt * its documentation is hereby granted, provided that both the copyright
135b859c19Sderaadt * notice and this permission notice appear in all copies of the
145b859c19Sderaadt * software, derivative works or modified versions, and any portions
155b859c19Sderaadt * thereof, and that both notices appear in supporting documentation.
165b859c19Sderaadt *
175b859c19Sderaadt * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
185b859c19Sderaadt * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
195b859c19Sderaadt * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
205b859c19Sderaadt *
215b859c19Sderaadt * Carnegie Mellon requests users of this software to return to
225b859c19Sderaadt *
235b859c19Sderaadt *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
245b859c19Sderaadt *  School of Computer Science
255b859c19Sderaadt *  Carnegie Mellon University
265b859c19Sderaadt *  Pittsburgh PA 15213-3890
275b859c19Sderaadt *
285b859c19Sderaadt * any improvements or extensions that they make and grant Carnegie the
295b859c19Sderaadt * rights to redistribute these changes.
305b859c19Sderaadt */
315b859c19Sderaadt
32*9b9d2a55Sguenther#include "SYS.h"
335b859c19Sderaadt
345b859c19Sderaadt#define	SRCREG		a1
355b859c19Sderaadt#define	DSTREG		a0
365b859c19Sderaadt#define	SIZEREG		a2
375b859c19Sderaadt
385b859c19Sderaadt/*
395b859c19Sderaadt * Copy bytes.
405b859c19Sderaadt *
415b859c19Sderaadt * char *memcpy(void *to, const void *from, size_t len);
425b859c19Sderaadt *
435b859c19Sderaadt * No matter how invoked, the source and destination registers
445b859c19Sderaadt * for calculation.  There's no point in copying them to "working"
455b859c19Sderaadt * registers, since the code uses their values "in place," and
465b859c19Sderaadt * copying them would be slower.
475b859c19Sderaadt */
485b859c19Sderaadt
495b859c19SderaadtLEAF(memcpy,3)
505b859c19Sderaadt	/* set up return value, while we still can */
515b859c19Sderaadt	mov	DSTREG,v0
525b859c19Sderaadt
535b859c19Sderaadt	/* Check for zero length */
545b859c19Sderaadt	beq	SIZEREG,bcopy_done
555b859c19Sderaadt
565b859c19Sderaadt	/* a3 = end address */
575b859c19Sderaadt	addq	SRCREG,SIZEREG,a3
585b859c19Sderaadt
595b859c19Sderaadt	/* Get the first word */
605b859c19Sderaadt	ldq_u	t2,0(SRCREG)
615b859c19Sderaadt
625b859c19Sderaadt	/* Do they have the same alignment? */
635b859c19Sderaadt	xor	SRCREG,DSTREG,t0
645b859c19Sderaadt	and	t0,7,t0
655b859c19Sderaadt	and	DSTREG,7,t1
665b859c19Sderaadt	bne	t0,bcopy_different_alignment
675b859c19Sderaadt
685b859c19Sderaadt	/* src & dst have same alignment */
695b859c19Sderaadt	beq	t1,bcopy_all_aligned
705b859c19Sderaadt
715b859c19Sderaadt	ldq_u	t3,0(DSTREG)
725b859c19Sderaadt	addq	SIZEREG,t1,SIZEREG
735b859c19Sderaadt	mskqh	t2,SRCREG,t2
745b859c19Sderaadt	mskql	t3,SRCREG,t3
755b859c19Sderaadt	or	t2,t3,t2
765b859c19Sderaadt
775b859c19Sderaadt	/* Dst is 8-byte aligned */
785b859c19Sderaadt
795b859c19Sderaadtbcopy_all_aligned:
805b859c19Sderaadt	/* If less than 8 bytes,skip loop */
815b859c19Sderaadt	subq	SIZEREG,1,t0
825b859c19Sderaadt	and	SIZEREG,7,SIZEREG
835b859c19Sderaadt	bic	t0,7,t0
845b859c19Sderaadt	beq	t0,bcopy_samealign_lp_end
855b859c19Sderaadt
865b859c19Sderaadtbcopy_samealign_lp:
875b859c19Sderaadt	stq_u	t2,0(DSTREG)
885b859c19Sderaadt	addq	DSTREG,8,DSTREG
895b859c19Sderaadt	ldq_u	t2,8(SRCREG)
905b859c19Sderaadt	subq	t0,8,t0
915b859c19Sderaadt	addq	SRCREG,8,SRCREG
925b859c19Sderaadt	bne	t0,bcopy_samealign_lp
935b859c19Sderaadt
945b859c19Sderaadtbcopy_samealign_lp_end:
955b859c19Sderaadt	/* If we're done, exit */
965b859c19Sderaadt	bne	SIZEREG,bcopy_small_left
975b859c19Sderaadt	stq_u	t2,0(DSTREG)
985b859c19Sderaadt	RET
995b859c19Sderaadt
1005b859c19Sderaadtbcopy_small_left:
1015b859c19Sderaadt	mskql	t2,SIZEREG,t4
1025b859c19Sderaadt	ldq_u	t3,0(DSTREG)
1035b859c19Sderaadt	mskqh	t3,SIZEREG,t3
1045b859c19Sderaadt	or	t4,t3,t4
1055b859c19Sderaadt	stq_u	t4,0(DSTREG)
1065b859c19Sderaadt	RET
1075b859c19Sderaadt
1085b859c19Sderaadtbcopy_different_alignment:
1095b859c19Sderaadt	/*
1105b859c19Sderaadt	 * this is the fun part
1115b859c19Sderaadt	 */
1125b859c19Sderaadt	addq	SRCREG,SIZEREG,a3
1135b859c19Sderaadt	cmpule	SIZEREG,8,t0
1145b859c19Sderaadt	bne	t0,bcopy_da_finish
1155b859c19Sderaadt
1165b859c19Sderaadt	beq	t1,bcopy_da_noentry
1175b859c19Sderaadt
1185b859c19Sderaadt	/* Do the initial partial word */
1195b859c19Sderaadt	subq	zero,DSTREG,t0
1205b859c19Sderaadt	and	t0,7,t0
1215b859c19Sderaadt	ldq_u	t3,7(SRCREG)
1225b859c19Sderaadt	extql	t2,SRCREG,t2
1235b859c19Sderaadt	extqh	t3,SRCREG,t3
1245b859c19Sderaadt	or	t2,t3,t5
1255b859c19Sderaadt	insql	t5,DSTREG,t5
1265b859c19Sderaadt	ldq_u	t6,0(DSTREG)
1275b859c19Sderaadt	mskql	t6,DSTREG,t6
1285b859c19Sderaadt	or	t5,t6,t5
1295b859c19Sderaadt	stq_u	t5,0(DSTREG)
1305b859c19Sderaadt	addq	SRCREG,t0,SRCREG
1315b859c19Sderaadt	addq	DSTREG,t0,DSTREG
1325b859c19Sderaadt	subq	SIZEREG,t0,SIZEREG
1335b859c19Sderaadt	ldq_u	t2,0(SRCREG)
1345b859c19Sderaadt
1355b859c19Sderaadtbcopy_da_noentry:
1365b859c19Sderaadt	subq	SIZEREG,1,t0
1375b859c19Sderaadt	bic	t0,7,t0
1385b859c19Sderaadt	and	SIZEREG,7,SIZEREG
1395b859c19Sderaadt	beq	t0,bcopy_da_finish2
1405b859c19Sderaadt
1415b859c19Sderaadtbcopy_da_lp:
1425b859c19Sderaadt	ldq_u	t3,7(SRCREG)
1435b859c19Sderaadt	addq	SRCREG,8,SRCREG
1445b859c19Sderaadt	extql	t2,SRCREG,t4
1455b859c19Sderaadt	extqh	t3,SRCREG,t5
1465b859c19Sderaadt	subq	t0,8,t0
1475b859c19Sderaadt	or	t4,t5,t5
1485b859c19Sderaadt	stq	t5,0(DSTREG)
1495b859c19Sderaadt	addq	DSTREG,8,DSTREG
1505b859c19Sderaadt	beq	t0,bcopy_da_finish1
1515b859c19Sderaadt	ldq_u	t2,7(SRCREG)
1525b859c19Sderaadt	addq	SRCREG,8,SRCREG
1535b859c19Sderaadt	extql	t3,SRCREG,t4
1545b859c19Sderaadt	extqh	t2,SRCREG,t5
1555b859c19Sderaadt	subq	t0,8,t0
1565b859c19Sderaadt	or	t4,t5,t5
1575b859c19Sderaadt	stq	t5,0(DSTREG)
1585b859c19Sderaadt	addq	DSTREG,8,DSTREG
1595b859c19Sderaadt	bne	t0,bcopy_da_lp
1605b859c19Sderaadt
1615b859c19Sderaadtbcopy_da_finish2:
1625b859c19Sderaadt	/* Do the last new word */
1635b859c19Sderaadt	mov	t2,t3
1645b859c19Sderaadt
1655b859c19Sderaadtbcopy_da_finish1:
1665b859c19Sderaadt	/* Do the last partial word */
1675b859c19Sderaadt	ldq_u	t2,-1(a3)
1685b859c19Sderaadt	extql	t3,SRCREG,t3
1695b859c19Sderaadt	extqh	t2,SRCREG,t2
1705b859c19Sderaadt	or	t2,t3,t2
1715b859c19Sderaadt	br	zero,bcopy_samealign_lp_end
1725b859c19Sderaadt
1735b859c19Sderaadtbcopy_da_finish:
1745b859c19Sderaadt	/* Do the last word in the next source word */
1755b859c19Sderaadt	ldq_u	t3,-1(a3)
1765b859c19Sderaadt	extql	t2,SRCREG,t2
1775b859c19Sderaadt	extqh	t3,SRCREG,t3
1785b859c19Sderaadt	or	t2,t3,t2
1795b859c19Sderaadt	insqh	t2,DSTREG,t3
1805b859c19Sderaadt	insql	t2,DSTREG,t2
1815b859c19Sderaadt	lda	t4,-1(zero)
1825b859c19Sderaadt	mskql	t4,SIZEREG,t5
1835b859c19Sderaadt	cmovne	t5,t5,t4
1845b859c19Sderaadt	insqh	t4,DSTREG,t5
1855b859c19Sderaadt	insql	t4,DSTREG,t4
1865b859c19Sderaadt	addq	DSTREG,SIZEREG,a4
1875b859c19Sderaadt	ldq_u	t6,0(DSTREG)
1885b859c19Sderaadt	ldq_u	t7,-1(a4)
1895b859c19Sderaadt	bic	t6,t4,t6
1905b859c19Sderaadt	bic	t7,t5,t7
1915b859c19Sderaadt	and	t2,t4,t2
1925b859c19Sderaadt	and	t3,t5,t3
1935b859c19Sderaadt	or	t2,t6,t2
1945b859c19Sderaadt	or	t3,t7,t3
1955b859c19Sderaadt	stq_u	t3,-1(a4)
1965b859c19Sderaadt	stq_u	t2,0(DSTREG)
1975b859c19Sderaadtbcopy_done:
1985b859c19Sderaadt	RET
1995b859c19Sderaadt
200*9b9d2a55Sguenther	END_STRONG(memcpy)
201