xref: /inferno-os/libkern/memset-thumb.s (revision 37da2899f40661e3e9631e497da8dc59b971cbd0)
1*37da2899SCharles.ForsythTO = 1
2*37da2899SCharles.ForsythTOE = 2
3*37da2899SCharles.ForsythN = 3
4*37da2899SCharles.ForsythTMP = 3					/* N and TMP don't overlap */
5*37da2899SCharles.Forsyth
6*37da2899SCharles.ForsythTEXT memset(SB), $0
7*37da2899SCharles.Forsyth	MOVW	R0, R(TO)
8*37da2899SCharles.Forsyth	MOVW	data+4(FP), R(4)
9*37da2899SCharles.Forsyth	MOVW	n+8(FP), R(N)
10*37da2899SCharles.Forsyth
11*37da2899SCharles.Forsyth	ADD	R(N), R(TO), R(TOE)	/* to end pointer */
12*37da2899SCharles.Forsyth
13*37da2899SCharles.Forsyth	CMP	$4, R(N)		/* need at least 4 bytes to copy */
14*37da2899SCharles.Forsyth	BLT	_1tail
15*37da2899SCharles.Forsyth
16*37da2899SCharles.Forsyth	AND	$0xFF, R(4)		/* it's a byte */
17*37da2899SCharles.Forsyth	SLL	$8, R(4), R(TMP)	/* replicate to a word */
18*37da2899SCharles.Forsyth	ORR	R(TMP), R(4)
19*37da2899SCharles.Forsyth	SLL	$16, R(4), R(TMP)
20*37da2899SCharles.Forsyth	ORR	R(TMP), R(4)
21*37da2899SCharles.Forsyth
22*37da2899SCharles.Forsyth_4align:				/* align on 4 */
23*37da2899SCharles.Forsyth	AND.S	$3, R(TO), R(TMP)
24*37da2899SCharles.Forsyth	BEQ	_4aligned
25*37da2899SCharles.Forsyth
26*37da2899SCharles.Forsyth	MOVBU.P	R(4), 1(R(TO))		/* implicit write back */
27*37da2899SCharles.Forsyth	B	_4align
28*37da2899SCharles.Forsyth
29*37da2899SCharles.Forsyth_4aligned:
30*37da2899SCharles.Forsyth	SUB	$31, R(TOE), R(TMP)	/* do 32-byte chunks if possible */
31*37da2899SCharles.Forsyth	CMP	R(TMP), R(TO)
32*37da2899SCharles.Forsyth	BHS	_4tail
33*37da2899SCharles.Forsyth
34*37da2899SCharles.Forsyth	MOVW	R4, R5			/* replicate */
35*37da2899SCharles.Forsyth	MOVW	R4, R6
36*37da2899SCharles.Forsyth	MOVW	R4, R7
37*37da2899SCharles.Forsyth	MOVW	R4, R8
38*37da2899SCharles.Forsyth	MOVW	R4, R9
39*37da2899SCharles.Forsyth	MOVW	R4, R10
40*37da2899SCharles.Forsyth	MOVW	R4, R11
41*37da2899SCharles.Forsyth
42*37da2899SCharles.Forsyth_f32loop:
43*37da2899SCharles.Forsyth	CMP	R(TMP), R(TO)
44*37da2899SCharles.Forsyth	BHS	_4tail
45*37da2899SCharles.Forsyth
46*37da2899SCharles.Forsyth	MOVM.IA.W [R4-R11], (R(TO))
47*37da2899SCharles.Forsyth	B	_f32loop
48*37da2899SCharles.Forsyth
49*37da2899SCharles.Forsyth_4tail:
50*37da2899SCharles.Forsyth	SUB	$3, R(TOE), R(TMP)	/* do remaining words if possible */
51*37da2899SCharles.Forsyth_4loop:
52*37da2899SCharles.Forsyth	CMP	R(TMP), R(TO)
53*37da2899SCharles.Forsyth	BHS	_1tail
54*37da2899SCharles.Forsyth
55*37da2899SCharles.Forsyth	MOVW.P	R(4), 4(R(TO))		/* implicit write back */
56*37da2899SCharles.Forsyth	B	_4loop
57*37da2899SCharles.Forsyth
58*37da2899SCharles.Forsyth_1tail:
59*37da2899SCharles.Forsyth	CMP	R(TO), R(TOE)
60*37da2899SCharles.Forsyth	BEQ	_return
61*37da2899SCharles.Forsyth
62*37da2899SCharles.Forsyth	MOVBU.P	R(4), 1(R(TO))		/* implicit write back */
63*37da2899SCharles.Forsyth	B	_1tail
64*37da2899SCharles.Forsyth
65*37da2899SCharles.Forsyth_return:
66*37da2899SCharles.Forsyth	RET
67