xref: /inferno-os/libkern/memmove-mips.s (revision 37da2899f40661e3e9631e497da8dc59b971cbd0)
1*37da2899SCharles.Forsyth	TEXT	memmove(SB), $0
2*37da2899SCharles.Forsyth
3*37da2899SCharles.Forsyth	JMP	move
4*37da2899SCharles.Forsyth
5*37da2899SCharles.Forsyth	TEXT	memcpy(SB), $0
6*37da2899SCharles.Forsythmove:
7*37da2899SCharles.Forsyth	MOVW	R1, s1+0(FP)
8*37da2899SCharles.Forsyth
9*37da2899SCharles.Forsyth	MOVW	n+8(FP), R3		/* R3 is count */
10*37da2899SCharles.Forsyth	MOVW	R1, R4			/* R4 is to-pointer */
11*37da2899SCharles.Forsyth	SGT	R0, R3, R5
12*37da2899SCharles.Forsyth	BEQ	R5, ok
13*37da2899SCharles.Forsyth	MOVW	(R0), R0		/* abort if negative count */
14*37da2899SCharles.Forsythok:
15*37da2899SCharles.Forsyth	MOVW	s2+4(FP), R5		/* R5 is from-pointer */
16*37da2899SCharles.Forsyth	ADDU	R3,R5, R7		/* R7 is end from-pointer */
17*37da2899SCharles.Forsyth	ADDU	R3,R4, R6		/* R6 is end to-pointer */
18*37da2899SCharles.Forsyth
19*37da2899SCharles.Forsyth/*
20*37da2899SCharles.Forsyth * easiest test is copy backwards if
21*37da2899SCharles.Forsyth * destination string has higher mem address
22*37da2899SCharles.Forsyth */
23*37da2899SCharles.Forsyth	SGT	$4,R3, R2
24*37da2899SCharles.Forsyth	SGTU	R4,R5, R1
25*37da2899SCharles.Forsyth	BNE	R1, back
26*37da2899SCharles.Forsyth
27*37da2899SCharles.Forsyth/*
28*37da2899SCharles.Forsyth * if not at least 4 chars,
29*37da2899SCharles.Forsyth * don't even mess around.
30*37da2899SCharles.Forsyth * 3 chars to guarantee any
31*37da2899SCharles.Forsyth * rounding up to a word
32*37da2899SCharles.Forsyth * boundary and 4 characters
33*37da2899SCharles.Forsyth * to get at least maybe one
34*37da2899SCharles.Forsyth * full word store.
35*37da2899SCharles.Forsyth */
36*37da2899SCharles.Forsyth	BNE	R2, fout
37*37da2899SCharles.Forsyth
38*37da2899SCharles.Forsyth
39*37da2899SCharles.Forsyth/*
40*37da2899SCharles.Forsyth * byte at a time to word align destination
41*37da2899SCharles.Forsyth */
42*37da2899SCharles.Forsythf1:
43*37da2899SCharles.Forsyth	AND	$3,R4, R1
44*37da2899SCharles.Forsyth	BEQ	R1, f2
45*37da2899SCharles.Forsyth	MOVB	0(R5), R8
46*37da2899SCharles.Forsyth	ADDU	$1, R5
47*37da2899SCharles.Forsyth	MOVB	R8, 0(R4)
48*37da2899SCharles.Forsyth	ADDU	$1, R4
49*37da2899SCharles.Forsyth	JMP	f1
50*37da2899SCharles.Forsyth
51*37da2899SCharles.Forsyth/*
52*37da2899SCharles.Forsyth * test if source is now word aligned
53*37da2899SCharles.Forsyth */
54*37da2899SCharles.Forsythf2:
55*37da2899SCharles.Forsyth	AND	$3, R5, R1
56*37da2899SCharles.Forsyth	BNE	R1, fun2
57*37da2899SCharles.Forsyth/*
58*37da2899SCharles.Forsyth * turn R3 into to-end pointer-15
59*37da2899SCharles.Forsyth * copy 16 at a time while theres room.
60*37da2899SCharles.Forsyth * R6 is smaller than R7 --
61*37da2899SCharles.Forsyth * there are problems if R7 is 0.
62*37da2899SCharles.Forsyth */
63*37da2899SCharles.Forsyth	ADDU	$-15,R6, R3
64*37da2899SCharles.Forsythf3:
65*37da2899SCharles.Forsyth	SGTU	R3,R4, R1
66*37da2899SCharles.Forsyth	BEQ	R1, f4
67*37da2899SCharles.Forsyth	MOVW	0(R5), R8
68*37da2899SCharles.Forsyth	MOVW	4(R5), R9
69*37da2899SCharles.Forsyth	MOVW	R8, 0(R4)
70*37da2899SCharles.Forsyth	MOVW	8(R5), R8
71*37da2899SCharles.Forsyth	MOVW	R9, 4(R4)
72*37da2899SCharles.Forsyth	MOVW	12(R5), R9
73*37da2899SCharles.Forsyth	ADDU	$16, R5
74*37da2899SCharles.Forsyth	MOVW	R8, 8(R4)
75*37da2899SCharles.Forsyth	MOVW	R9, 12(R4)
76*37da2899SCharles.Forsyth	ADDU	$16, R4
77*37da2899SCharles.Forsyth	JMP	f3
78*37da2899SCharles.Forsyth
79*37da2899SCharles.Forsyth/*
80*37da2899SCharles.Forsyth * turn R3 into to-end pointer-3
81*37da2899SCharles.Forsyth * copy 4 at a time while theres room
82*37da2899SCharles.Forsyth */
83*37da2899SCharles.Forsythf4:
84*37da2899SCharles.Forsyth	ADDU	$-3,R6, R3
85*37da2899SCharles.Forsythf5:
86*37da2899SCharles.Forsyth	SGTU	R3,R4, R1
87*37da2899SCharles.Forsyth	BEQ	R1, fout
88*37da2899SCharles.Forsyth	MOVW	0(R5), R8
89*37da2899SCharles.Forsyth	ADDU	$4, R5
90*37da2899SCharles.Forsyth	MOVW	R8, 0(R4)
91*37da2899SCharles.Forsyth	ADDU	$4, R4
92*37da2899SCharles.Forsyth	JMP	f5
93*37da2899SCharles.Forsyth
94*37da2899SCharles.Forsyth/*
95*37da2899SCharles.Forsyth * forward copy, unaligned
96*37da2899SCharles.Forsyth * turn R3 into to-end pointer-15
97*37da2899SCharles.Forsyth * copy 16 at a time while theres room.
98*37da2899SCharles.Forsyth * R6 is smaller than R7 --
99*37da2899SCharles.Forsyth * there are problems if R7 is 0.
100*37da2899SCharles.Forsyth */
101*37da2899SCharles.Forsythfun2:
102*37da2899SCharles.Forsyth	ADDU	$-15,R6, R3
103*37da2899SCharles.Forsythfun3:
104*37da2899SCharles.Forsyth	SGTU	R3,R4, R1
105*37da2899SCharles.Forsyth	BEQ	R1, fun4
106*37da2899SCharles.Forsyth	MOVWL	0(R5), R8
107*37da2899SCharles.Forsyth	MOVWR	3(R5), R8
108*37da2899SCharles.Forsyth	MOVWL	4(R5), R9
109*37da2899SCharles.Forsyth	MOVWR	7(R5), R9
110*37da2899SCharles.Forsyth	MOVW	R8, 0(R4)
111*37da2899SCharles.Forsyth	MOVWL	8(R5), R8
112*37da2899SCharles.Forsyth	MOVWR	11(R5), R8
113*37da2899SCharles.Forsyth	MOVW	R9, 4(R4)
114*37da2899SCharles.Forsyth	MOVWL	12(R5), R9
115*37da2899SCharles.Forsyth	MOVWR	15(R5), R9
116*37da2899SCharles.Forsyth	ADDU	$16, R5
117*37da2899SCharles.Forsyth	MOVW	R8, 8(R4)
118*37da2899SCharles.Forsyth	MOVW	R9, 12(R4)
119*37da2899SCharles.Forsyth	ADDU	$16, R4
120*37da2899SCharles.Forsyth	JMP	fun3
121*37da2899SCharles.Forsyth
122*37da2899SCharles.Forsyth/*
123*37da2899SCharles.Forsyth * turn R3 into to-end pointer-3
124*37da2899SCharles.Forsyth * copy 4 at a time while theres room
125*37da2899SCharles.Forsyth */
126*37da2899SCharles.Forsythfun4:
127*37da2899SCharles.Forsyth	ADDU	$-3,R6, R3
128*37da2899SCharles.Forsythfun5:
129*37da2899SCharles.Forsyth	SGTU	R3,R4, R1
130*37da2899SCharles.Forsyth	BEQ	R1, fout
131*37da2899SCharles.Forsyth	MOVWL	0(R5), R8
132*37da2899SCharles.Forsyth	MOVWR	3(R5), R8
133*37da2899SCharles.Forsyth	ADDU	$4, R5
134*37da2899SCharles.Forsyth	MOVW	R8, 0(R4)
135*37da2899SCharles.Forsyth	ADDU	$4, R4
136*37da2899SCharles.Forsyth	JMP	fun5
137*37da2899SCharles.Forsyth
138*37da2899SCharles.Forsyth/*
139*37da2899SCharles.Forsyth * last loop, copy byte at a time
140*37da2899SCharles.Forsyth */
141*37da2899SCharles.Forsythfout:
142*37da2899SCharles.Forsyth	BEQ	R7,R5, ret
143*37da2899SCharles.Forsyth	MOVB	0(R5), R8
144*37da2899SCharles.Forsyth	ADDU	$1, R5
145*37da2899SCharles.Forsyth	MOVB	R8, 0(R4)
146*37da2899SCharles.Forsyth	ADDU	$1, R4
147*37da2899SCharles.Forsyth	JMP	fout
148*37da2899SCharles.Forsyth
149*37da2899SCharles.Forsyth/*
150*37da2899SCharles.Forsyth * whole thing repeated for backwards
151*37da2899SCharles.Forsyth */
152*37da2899SCharles.Forsythback:
153*37da2899SCharles.Forsyth	BNE	R2, bout
154*37da2899SCharles.Forsythb1:
155*37da2899SCharles.Forsyth	AND	$3,R6, R1
156*37da2899SCharles.Forsyth	BEQ	R1, b2
157*37da2899SCharles.Forsyth	MOVB	-1(R7), R8
158*37da2899SCharles.Forsyth	ADDU	$-1, R7
159*37da2899SCharles.Forsyth	MOVB	R8, -1(R6)
160*37da2899SCharles.Forsyth	ADDU	$-1, R6
161*37da2899SCharles.Forsyth	JMP	b1
162*37da2899SCharles.Forsyth
163*37da2899SCharles.Forsythb2:
164*37da2899SCharles.Forsyth	AND	$3, R7, R1
165*37da2899SCharles.Forsyth	BNE	R1, bun2
166*37da2899SCharles.Forsyth
167*37da2899SCharles.Forsyth	ADDU	$15,R5, R3
168*37da2899SCharles.Forsythb3:
169*37da2899SCharles.Forsyth	SGTU	R7,R3, R1
170*37da2899SCharles.Forsyth	BEQ	R1, b4
171*37da2899SCharles.Forsyth	MOVW	-4(R7), R8
172*37da2899SCharles.Forsyth	MOVW	-8(R7), R9
173*37da2899SCharles.Forsyth	MOVW	R8, -4(R6)
174*37da2899SCharles.Forsyth	MOVW	-12(R7), R8
175*37da2899SCharles.Forsyth	MOVW	R9, -8(R6)
176*37da2899SCharles.Forsyth	MOVW	-16(R7), R9
177*37da2899SCharles.Forsyth	ADDU	$-16, R7
178*37da2899SCharles.Forsyth	MOVW	R8, -12(R6)
179*37da2899SCharles.Forsyth	MOVW	R9, -16(R6)
180*37da2899SCharles.Forsyth	ADDU	$-16, R6
181*37da2899SCharles.Forsyth	JMP	b3
182*37da2899SCharles.Forsythb4:
183*37da2899SCharles.Forsyth	ADDU	$3,R5, R3
184*37da2899SCharles.Forsythb5:
185*37da2899SCharles.Forsyth	SGTU	R7,R3, R1
186*37da2899SCharles.Forsyth	BEQ	R1, bout
187*37da2899SCharles.Forsyth	MOVW	-4(R7), R8
188*37da2899SCharles.Forsyth	ADDU	$-4, R7
189*37da2899SCharles.Forsyth	MOVW	R8, -4(R6)
190*37da2899SCharles.Forsyth	ADDU	$-4, R6
191*37da2899SCharles.Forsyth	JMP	b5
192*37da2899SCharles.Forsyth
193*37da2899SCharles.Forsythbun2:
194*37da2899SCharles.Forsyth	ADDU	$15,R5, R3
195*37da2899SCharles.Forsythbun3:
196*37da2899SCharles.Forsyth	SGTU	R7,R3, R1
197*37da2899SCharles.Forsyth	BEQ	R1, bun4
198*37da2899SCharles.Forsyth	MOVWL	-4(R7), R8
199*37da2899SCharles.Forsyth	MOVWR	-1(R7), R8
200*37da2899SCharles.Forsyth	MOVWL	-8(R7), R9
201*37da2899SCharles.Forsyth	MOVWR	-5(R7), R9
202*37da2899SCharles.Forsyth	MOVW	R8, -4(R6)
203*37da2899SCharles.Forsyth	MOVWL	-12(R7), R8
204*37da2899SCharles.Forsyth	MOVWR	-9(R7), R8
205*37da2899SCharles.Forsyth	MOVW	R9, -8(R6)
206*37da2899SCharles.Forsyth	MOVWL	-16(R7), R9
207*37da2899SCharles.Forsyth	MOVWR	-13(R7), R9
208*37da2899SCharles.Forsyth	ADDU	$-16, R7
209*37da2899SCharles.Forsyth	MOVW	R8, -12(R6)
210*37da2899SCharles.Forsyth	MOVW	R9, -16(R6)
211*37da2899SCharles.Forsyth	ADDU	$-16, R6
212*37da2899SCharles.Forsyth	JMP	bun3
213*37da2899SCharles.Forsyth
214*37da2899SCharles.Forsythbun4:
215*37da2899SCharles.Forsyth	ADDU	$3,R5, R3
216*37da2899SCharles.Forsythbun5:
217*37da2899SCharles.Forsyth	SGTU	R7,R3, R1
218*37da2899SCharles.Forsyth	BEQ	R1, bout
219*37da2899SCharles.Forsyth	MOVWL	-4(R7), R8
220*37da2899SCharles.Forsyth	MOVWR	-1(R7), R8
221*37da2899SCharles.Forsyth	ADDU	$-4, R7
222*37da2899SCharles.Forsyth	MOVW	R8, -4(R6)
223*37da2899SCharles.Forsyth	ADDU	$-4, R6
224*37da2899SCharles.Forsyth	JMP	bun5
225*37da2899SCharles.Forsyth
226*37da2899SCharles.Forsythbout:
227*37da2899SCharles.Forsyth	BEQ	R7,R5, ret
228*37da2899SCharles.Forsyth	MOVB	-1(R7), R8
229*37da2899SCharles.Forsyth	ADDU	$-1, R7
230*37da2899SCharles.Forsyth	MOVB	R8, -1(R6)
231*37da2899SCharles.Forsyth	ADDU	$-1, R6
232*37da2899SCharles.Forsyth	JMP	bout
233*37da2899SCharles.Forsyth
234*37da2899SCharles.Forsythret:
235*37da2899SCharles.Forsyth	MOVW	s1+0(FP), R1
236*37da2899SCharles.Forsyth	RET
237*37da2899SCharles.Forsyth	END
238