xref: /plan9/sys/src/ape/lib/ap/power/memmove.s (revision 7dd7cddf99dd7472612f1413b4da293630e6b1bc)
1*7dd7cddfSDavid du Colombier#define	BDNZ	BC	16,0,
2*7dd7cddfSDavid du Colombier	TEXT	memmove(SB), $0
3*7dd7cddfSDavid du Colombier	BR	move
4*7dd7cddfSDavid du Colombier
5*7dd7cddfSDavid du Colombier	TEXT	memcpy(SB), $0
6*7dd7cddfSDavid du Colombiermove:
7*7dd7cddfSDavid du Colombier
8*7dd7cddfSDavid du Colombier/*
9*7dd7cddfSDavid du Colombier * performance:
10*7dd7cddfSDavid du Colombier * (tba)
11*7dd7cddfSDavid du Colombier */
12*7dd7cddfSDavid du Colombier
13*7dd7cddfSDavid du Colombier	MOVW	R3, s1+0(FP)
14*7dd7cddfSDavid du Colombier	MOVW	n+8(FP), R9		/* R9 is count */
15*7dd7cddfSDavid du Colombier	MOVW	R3, R10			/* R10 is to-pointer */
16*7dd7cddfSDavid du Colombier	CMP	R9, $0
17*7dd7cddfSDavid du Colombier	BEQ	ret
18*7dd7cddfSDavid du Colombier	BLT	trap
19*7dd7cddfSDavid du Colombier	MOVW	s2+4(FP), R11		/* R11 is from-pointer */
20*7dd7cddfSDavid du Colombier
21*7dd7cddfSDavid du Colombier/*
22*7dd7cddfSDavid du Colombier * if no more than 16 bytes, just use one lsw/stsw
23*7dd7cddfSDavid du Colombier */
24*7dd7cddfSDavid du Colombier	CMP	R9, $16
25*7dd7cddfSDavid du Colombier	BLE	fout
26*7dd7cddfSDavid du Colombier
27*7dd7cddfSDavid du Colombier	ADD	R9,R11, R13		/* R13 is end from-pointer */
28*7dd7cddfSDavid du Colombier	ADD	R9,R10, R12		/* R12 is end to-pointer */
29*7dd7cddfSDavid du Colombier
30*7dd7cddfSDavid du Colombier/*
31*7dd7cddfSDavid du Colombier * easiest test is copy backwards if
32*7dd7cddfSDavid du Colombier * destination string has higher mem address
33*7dd7cddfSDavid du Colombier */
34*7dd7cddfSDavid du Colombier	CMPU	R10, R11
35*7dd7cddfSDavid du Colombier	BGT	back
36*7dd7cddfSDavid du Colombier
37*7dd7cddfSDavid du Colombier/*
38*7dd7cddfSDavid du Colombier * test if both pointers
39*7dd7cddfSDavid du Colombier * are similarly word aligned
40*7dd7cddfSDavid du Colombier */
41*7dd7cddfSDavid du Colombier	XOR	R10,R11, R7
42*7dd7cddfSDavid du Colombier	ANDCC	$3,R7
43*7dd7cddfSDavid du Colombier	BNE	fbad
44*7dd7cddfSDavid du Colombier
45*7dd7cddfSDavid du Colombier/*
46*7dd7cddfSDavid du Colombier * move a few bytes to align pointers
47*7dd7cddfSDavid du Colombier */
48*7dd7cddfSDavid du Colombier	ANDCC	$3,R10,R7
49*7dd7cddfSDavid du Colombier	BEQ	f2
50*7dd7cddfSDavid du Colombier	SUBC	R7, $4, R7
51*7dd7cddfSDavid du Colombier	SUB	R7, R9
52*7dd7cddfSDavid du Colombier	MOVW	R7, XER
53*7dd7cddfSDavid du Colombier	LSW	(R11), R16
54*7dd7cddfSDavid du Colombier	ADD	R7, R11
55*7dd7cddfSDavid du Colombier	STSW	R16, (R10)
56*7dd7cddfSDavid du Colombier	ADD	R7, R10
57*7dd7cddfSDavid du Colombier
58*7dd7cddfSDavid du Colombier/*
59*7dd7cddfSDavid du Colombier * turn R14 into doubleword count
60*7dd7cddfSDavid du Colombier * copy 16 bytes at a time while there's room.
61*7dd7cddfSDavid du Colombier */
62*7dd7cddfSDavid du Colombierf2:
63*7dd7cddfSDavid du Colombier	SRAWCC	$4, R9, R14
64*7dd7cddfSDavid du Colombier	BLE	fout
65*7dd7cddfSDavid du Colombier	MOVW	R14, CTR
66*7dd7cddfSDavid du Colombier	SUB	$4, R11
67*7dd7cddfSDavid du Colombier	SUB	$4, R10
68*7dd7cddfSDavid du Colombierf3:
69*7dd7cddfSDavid du Colombier	MOVWU	4(R11), R16
70*7dd7cddfSDavid du Colombier	MOVWU	R16, 4(R10)
71*7dd7cddfSDavid du Colombier	MOVWU	4(R11), R17
72*7dd7cddfSDavid du Colombier	MOVWU	R17, 4(R10)
73*7dd7cddfSDavid du Colombier	MOVWU	4(R11), R16
74*7dd7cddfSDavid du Colombier	MOVWU	R16, 4(R10)
75*7dd7cddfSDavid du Colombier	MOVWU	4(R11), R17
76*7dd7cddfSDavid du Colombier	MOVWU	R17, 4(R10)
77*7dd7cddfSDavid du Colombier	BDNZ	f3
78*7dd7cddfSDavid du Colombier	RLWNMCC	$0, R9, $15, R9	/* residue */
79*7dd7cddfSDavid du Colombier	BEQ	ret
80*7dd7cddfSDavid du Colombier	ADD	$4, R11
81*7dd7cddfSDavid du Colombier	ADD	$4, R10
82*7dd7cddfSDavid du Colombier
83*7dd7cddfSDavid du Colombier/*
84*7dd7cddfSDavid du Colombier * move up to 16 bytes through R16 .. R19; aligned and unaligned
85*7dd7cddfSDavid du Colombier */
86*7dd7cddfSDavid du Colombierfout:
87*7dd7cddfSDavid du Colombier	MOVW	R9, XER
88*7dd7cddfSDavid du Colombier	LSW	(R11), R16
89*7dd7cddfSDavid du Colombier	STSW	R16, (R10)
90*7dd7cddfSDavid du Colombier	BR	ret
91*7dd7cddfSDavid du Colombier
92*7dd7cddfSDavid du Colombier/*
93*7dd7cddfSDavid du Colombier * loop for unaligned copy, then copy up to 15 remaining bytes
94*7dd7cddfSDavid du Colombier */
95*7dd7cddfSDavid du Colombierfbad:
96*7dd7cddfSDavid du Colombier	SRAWCC	$4, R9, R14
97*7dd7cddfSDavid du Colombier	BLE	f6
98*7dd7cddfSDavid du Colombier	MOVW	R14, CTR
99*7dd7cddfSDavid du Colombierf5:
100*7dd7cddfSDavid du Colombier	LSW	(R11), $16, R16
101*7dd7cddfSDavid du Colombier	ADD	$16, R11
102*7dd7cddfSDavid du Colombier	STSW	R16, $16, (R10)
103*7dd7cddfSDavid du Colombier	ADD	$16, R10
104*7dd7cddfSDavid du Colombier	BDNZ	f5
105*7dd7cddfSDavid du Colombier	RLWNMCC	$0, R9, $15, R9	/* residue */
106*7dd7cddfSDavid du Colombier	BEQ	ret
107*7dd7cddfSDavid du Colombierf6:
108*7dd7cddfSDavid du Colombier	MOVW	R9, XER
109*7dd7cddfSDavid du Colombier	LSW	(R11), R16
110*7dd7cddfSDavid du Colombier	STSW	R16, (R10)
111*7dd7cddfSDavid du Colombier	BR	ret
112*7dd7cddfSDavid du Colombier
113*7dd7cddfSDavid du Colombier/*
114*7dd7cddfSDavid du Colombier * whole thing repeated for backwards
115*7dd7cddfSDavid du Colombier */
116*7dd7cddfSDavid du Colombierback:
117*7dd7cddfSDavid du Colombier	CMP	R9, $4
118*7dd7cddfSDavid du Colombier	BLT	bout
119*7dd7cddfSDavid du Colombier
120*7dd7cddfSDavid du Colombier	XOR	R12,R13, R7
121*7dd7cddfSDavid du Colombier	ANDCC	$3,R7
122*7dd7cddfSDavid du Colombier	BNE	bout
123*7dd7cddfSDavid du Colombierb1:
124*7dd7cddfSDavid du Colombier	ANDCC	$3,R13, R7
125*7dd7cddfSDavid du Colombier	BEQ	b2
126*7dd7cddfSDavid du Colombier	MOVBZU	-1(R13), R16
127*7dd7cddfSDavid du Colombier	MOVBZU	R16, -1(R12)
128*7dd7cddfSDavid du Colombier	SUB	$1, R9
129*7dd7cddfSDavid du Colombier	BR	b1
130*7dd7cddfSDavid du Colombierb2:
131*7dd7cddfSDavid du Colombier	SRAWCC	$4, R9, R14
132*7dd7cddfSDavid du Colombier	BLE	b4
133*7dd7cddfSDavid du Colombier	MOVW	R14, CTR
134*7dd7cddfSDavid du Colombierb3:
135*7dd7cddfSDavid du Colombier	MOVWU	-4(R13), R16
136*7dd7cddfSDavid du Colombier	MOVWU	R16, -4(R12)
137*7dd7cddfSDavid du Colombier	MOVWU	-4(R13), R17
138*7dd7cddfSDavid du Colombier	MOVWU	R17, -4(R12)
139*7dd7cddfSDavid du Colombier	MOVWU	-4(R13), R16
140*7dd7cddfSDavid du Colombier	MOVWU	R16, -4(R12)
141*7dd7cddfSDavid du Colombier	MOVWU	-4(R13), R17
142*7dd7cddfSDavid du Colombier	MOVWU	R17, -4(R12)
143*7dd7cddfSDavid du Colombier	BDNZ	b3
144*7dd7cddfSDavid du Colombier	RLWNMCC	$0, R9, $15, R9	/* residue */
145*7dd7cddfSDavid du Colombier	BEQ	ret
146*7dd7cddfSDavid du Colombierb4:
147*7dd7cddfSDavid du Colombier	SRAWCC	$2, R9, R14
148*7dd7cddfSDavid du Colombier	BLE	bout
149*7dd7cddfSDavid du Colombier	MOVW	R14, CTR
150*7dd7cddfSDavid du Colombierb5:
151*7dd7cddfSDavid du Colombier	MOVWU	-4(R13), R16
152*7dd7cddfSDavid du Colombier	MOVWU	R16, -4(R12)
153*7dd7cddfSDavid du Colombier	BDNZ	b5
154*7dd7cddfSDavid du Colombier	RLWNMCC	$0, R9, $3, R9	/* residue */
155*7dd7cddfSDavid du Colombier	BEQ	ret
156*7dd7cddfSDavid du Colombier
157*7dd7cddfSDavid du Colombierbout:
158*7dd7cddfSDavid du Colombier	CMPU	R13, R11
159*7dd7cddfSDavid du Colombier	BLE	ret
160*7dd7cddfSDavid du Colombier	MOVBZU	-1(R13), R16
161*7dd7cddfSDavid du Colombier	MOVBZU	R16, -1(R12)
162*7dd7cddfSDavid du Colombier	BR	bout
163*7dd7cddfSDavid du Colombier
164*7dd7cddfSDavid du Colombiertrap:
165*7dd7cddfSDavid du Colombier	MOVW	$0, R0
166*7dd7cddfSDavid du Colombier	MOVW	0(R0), R0
167*7dd7cddfSDavid du Colombier
168*7dd7cddfSDavid du Colombierret:
169*7dd7cddfSDavid du Colombier	MOVW	s1+0(FP), R3
170*7dd7cddfSDavid du Colombier	RETURN
171