xref: /inferno-os/libkern/memmove-power.s (revision 37da2899f40661e3e9631e497da8dc59b971cbd0)
1*37da2899SCharles.Forsyth#define	BDNZ	BC	16,0,
2*37da2899SCharles.Forsyth	TEXT	memcpy(SB), $0
3*37da2899SCharles.Forsyth	BR	move
4*37da2899SCharles.Forsyth
5*37da2899SCharles.Forsyth	TEXT	memmove(SB), $0
6*37da2899SCharles.Forsythmove:
7*37da2899SCharles.Forsyth
8*37da2899SCharles.Forsyth/*
9*37da2899SCharles.Forsyth * performance:
10*37da2899SCharles.Forsyth * (tba)
11*37da2899SCharles.Forsyth */
12*37da2899SCharles.Forsyth
13*37da2899SCharles.Forsyth	MOVW	R3, s1+0(FP)
14*37da2899SCharles.Forsyth	MOVW	n+8(FP), R9		/* R9 is count */
15*37da2899SCharles.Forsyth	MOVW	R3, R10			/* R10 is to-pointer */
16*37da2899SCharles.Forsyth	CMP	R9, $0
17*37da2899SCharles.Forsyth	BEQ	ret
18*37da2899SCharles.Forsyth	BLT	trap
19*37da2899SCharles.Forsyth	MOVW	s2+4(FP), R11		/* R11 is from-pointer */
20*37da2899SCharles.Forsyth
21*37da2899SCharles.Forsyth/*
22*37da2899SCharles.Forsyth * if no more than 16 bytes, just use one lsw/stsw
23*37da2899SCharles.Forsyth */
24*37da2899SCharles.Forsyth	CMP	R9, $16
25*37da2899SCharles.Forsyth	BLE	fout
26*37da2899SCharles.Forsyth
27*37da2899SCharles.Forsyth	ADD	R9,R11, R13		/* R13 is end from-pointer */
28*37da2899SCharles.Forsyth	ADD	R9,R10, R12		/* R12 is end to-pointer */
29*37da2899SCharles.Forsyth
30*37da2899SCharles.Forsyth/*
31*37da2899SCharles.Forsyth * easiest test is copy backwards if
32*37da2899SCharles.Forsyth * destination string has higher mem address
33*37da2899SCharles.Forsyth */
34*37da2899SCharles.Forsyth	CMPU	R10, R11
35*37da2899SCharles.Forsyth	BGT	back
36*37da2899SCharles.Forsyth
37*37da2899SCharles.Forsyth/*
38*37da2899SCharles.Forsyth * test if both pointers
39*37da2899SCharles.Forsyth * are similarly word aligned
40*37da2899SCharles.Forsyth */
41*37da2899SCharles.Forsyth	XOR	R10,R11, R7
42*37da2899SCharles.Forsyth	ANDCC	$3,R7
43*37da2899SCharles.Forsyth	BNE	fbad
44*37da2899SCharles.Forsyth
45*37da2899SCharles.Forsyth/*
46*37da2899SCharles.Forsyth * move a few bytes to align pointers
47*37da2899SCharles.Forsyth */
48*37da2899SCharles.Forsyth	ANDCC	$3,R10,R7
49*37da2899SCharles.Forsyth	BEQ	f2
50*37da2899SCharles.Forsyth	SUBC	R7, $4, R7
51*37da2899SCharles.Forsyth	SUB	R7, R9
52*37da2899SCharles.Forsyth	MOVW	R7, XER
53*37da2899SCharles.Forsyth	LSW	(R11), R16
54*37da2899SCharles.Forsyth	ADD	R7, R11
55*37da2899SCharles.Forsyth	STSW	R16, (R10)
56*37da2899SCharles.Forsyth	ADD	R7, R10
57*37da2899SCharles.Forsyth
58*37da2899SCharles.Forsyth/*
59*37da2899SCharles.Forsyth * turn R14 into doubleword count
60*37da2899SCharles.Forsyth * copy 16 bytes at a time while there's room.
61*37da2899SCharles.Forsyth */
62*37da2899SCharles.Forsythf2:
63*37da2899SCharles.Forsyth	SRAWCC	$4, R9, R14
64*37da2899SCharles.Forsyth	BLE	fout
65*37da2899SCharles.Forsyth	MOVW	R14, CTR
66*37da2899SCharles.Forsyth	SUB	$4, R11
67*37da2899SCharles.Forsyth	SUB	$4, R10
68*37da2899SCharles.Forsythf3:
69*37da2899SCharles.Forsyth	MOVWU	4(R11), R16
70*37da2899SCharles.Forsyth	MOVWU	R16, 4(R10)
71*37da2899SCharles.Forsyth	MOVWU	4(R11), R17
72*37da2899SCharles.Forsyth	MOVWU	R17, 4(R10)
73*37da2899SCharles.Forsyth	MOVWU	4(R11), R16
74*37da2899SCharles.Forsyth	MOVWU	R16, 4(R10)
75*37da2899SCharles.Forsyth	MOVWU	4(R11), R17
76*37da2899SCharles.Forsyth	MOVWU	R17, 4(R10)
77*37da2899SCharles.Forsyth	BDNZ	f3
78*37da2899SCharles.Forsyth	RLWNMCC	$0, R9, $15, R9	/* residue */
79*37da2899SCharles.Forsyth	BEQ	ret
80*37da2899SCharles.Forsyth	ADD	$4, R11
81*37da2899SCharles.Forsyth	ADD	$4, R10
82*37da2899SCharles.Forsyth
83*37da2899SCharles.Forsyth/*
84*37da2899SCharles.Forsyth * move up to 16 bytes through R16 .. R19; aligned and unaligned
85*37da2899SCharles.Forsyth */
86*37da2899SCharles.Forsythfout:
87*37da2899SCharles.Forsyth	MOVW	R9, XER
88*37da2899SCharles.Forsyth	LSW	(R11), R16
89*37da2899SCharles.Forsyth	STSW	R16, (R10)
90*37da2899SCharles.Forsyth	BR	ret
91*37da2899SCharles.Forsyth
92*37da2899SCharles.Forsyth/*
93*37da2899SCharles.Forsyth * loop for unaligned copy, then copy up to 15 remaining bytes
94*37da2899SCharles.Forsyth */
95*37da2899SCharles.Forsythfbad:
96*37da2899SCharles.Forsyth	SRAWCC	$4, R9, R14
97*37da2899SCharles.Forsyth	BLE	f6
98*37da2899SCharles.Forsyth	MOVW	R14, CTR
99*37da2899SCharles.Forsythf5:
100*37da2899SCharles.Forsyth	LSW	(R11), $16, R16
101*37da2899SCharles.Forsyth	ADD	$16, R11
102*37da2899SCharles.Forsyth	STSW	R16, $16, (R10)
103*37da2899SCharles.Forsyth	ADD	$16, R10
104*37da2899SCharles.Forsyth	BDNZ	f5
105*37da2899SCharles.Forsyth	RLWNMCC	$0, R9, $15, R9	/* residue */
106*37da2899SCharles.Forsyth	BEQ	ret
107*37da2899SCharles.Forsythf6:
108*37da2899SCharles.Forsyth	MOVW	R9, XER
109*37da2899SCharles.Forsyth	LSW	(R11), R16
110*37da2899SCharles.Forsyth	STSW	R16, (R10)
111*37da2899SCharles.Forsyth	BR	ret
112*37da2899SCharles.Forsyth
113*37da2899SCharles.Forsyth/*
114*37da2899SCharles.Forsyth * whole thing repeated for backwards
115*37da2899SCharles.Forsyth */
116*37da2899SCharles.Forsythback:
117*37da2899SCharles.Forsyth	CMP	R9, $4
118*37da2899SCharles.Forsyth	BLT	bout
119*37da2899SCharles.Forsyth
120*37da2899SCharles.Forsyth	XOR	R12,R13, R7
121*37da2899SCharles.Forsyth	ANDCC	$3,R7
122*37da2899SCharles.Forsyth	BNE	bout
123*37da2899SCharles.Forsythb1:
124*37da2899SCharles.Forsyth	ANDCC	$3,R13, R7
125*37da2899SCharles.Forsyth	BEQ	b2
126*37da2899SCharles.Forsyth	MOVBZU	-1(R13), R16
127*37da2899SCharles.Forsyth	MOVBZU	R16, -1(R12)
128*37da2899SCharles.Forsyth	SUB	$1, R9
129*37da2899SCharles.Forsyth	BR	b1
130*37da2899SCharles.Forsythb2:
131*37da2899SCharles.Forsyth	SRAWCC	$4, R9, R14
132*37da2899SCharles.Forsyth	BLE	b4
133*37da2899SCharles.Forsyth	MOVW	R14, CTR
134*37da2899SCharles.Forsythb3:
135*37da2899SCharles.Forsyth	MOVWU	-4(R13), R16
136*37da2899SCharles.Forsyth	MOVWU	R16, -4(R12)
137*37da2899SCharles.Forsyth	MOVWU	-4(R13), R17
138*37da2899SCharles.Forsyth	MOVWU	R17, -4(R12)
139*37da2899SCharles.Forsyth	MOVWU	-4(R13), R16
140*37da2899SCharles.Forsyth	MOVWU	R16, -4(R12)
141*37da2899SCharles.Forsyth	MOVWU	-4(R13), R17
142*37da2899SCharles.Forsyth	MOVWU	R17, -4(R12)
143*37da2899SCharles.Forsyth	BDNZ	b3
144*37da2899SCharles.Forsyth	RLWNMCC	$0, R9, $15, R9	/* residue */
145*37da2899SCharles.Forsyth	BEQ	ret
146*37da2899SCharles.Forsythb4:
147*37da2899SCharles.Forsyth	SRAWCC	$2, R9, R14
148*37da2899SCharles.Forsyth	BLE	bout
149*37da2899SCharles.Forsyth	MOVW	R14, CTR
150*37da2899SCharles.Forsythb5:
151*37da2899SCharles.Forsyth	MOVWU	-4(R13), R16
152*37da2899SCharles.Forsyth	MOVWU	R16, -4(R12)
153*37da2899SCharles.Forsyth	BDNZ	b5
154*37da2899SCharles.Forsyth	RLWNMCC	$0, R9, $3, R9	/* residue */
155*37da2899SCharles.Forsyth	BEQ	ret
156*37da2899SCharles.Forsyth
157*37da2899SCharles.Forsythbout:
158*37da2899SCharles.Forsyth	CMPU	R13, R11
159*37da2899SCharles.Forsyth	BLE	ret
160*37da2899SCharles.Forsyth	MOVBZU	-1(R13), R16
161*37da2899SCharles.Forsyth	MOVBZU	R16, -1(R12)
162*37da2899SCharles.Forsyth	BR	bout
163*37da2899SCharles.Forsyth
164*37da2899SCharles.Forsythtrap:
165*37da2899SCharles.Forsyth/*	MOVW	$0, R0	*/
166*37da2899SCharles.Forsyth	MOVW	R0, 0(R0)
167*37da2899SCharles.Forsyth
168*37da2899SCharles.Forsythret:
169*37da2899SCharles.Forsyth	MOVW	s1+0(FP), R3
170*37da2899SCharles.Forsyth	RETURN
171