xref: /inferno-os/libkern/memset-arm.s (revision 37da2899f40661e3e9631e497da8dc59b971cbd0)
1 *37da2899SCharles.ForsythTO = 1
2 *37da2899SCharles.ForsythTOE = 2
3 *37da2899SCharles.ForsythN = 3
4 *37da2899SCharles.ForsythTMP = 3					/* N and TMP don't overlap */
5 *37da2899SCharles.Forsyth
6 *37da2899SCharles.ForsythTEXT memset(SB), $0
7 *37da2899SCharles.Forsyth	MOVW	R0, R(TO)
8 *37da2899SCharles.Forsyth	MOVW	data+4(FP), R(4)
9 *37da2899SCharles.Forsyth	MOVW	n+8(FP), R(N)
10 *37da2899SCharles.Forsyth
11 *37da2899SCharles.Forsyth	ADD	R(N), R(TO), R(TOE)	/* to end pointer */
12 *37da2899SCharles.Forsyth
13 *37da2899SCharles.Forsyth	CMP	$4, R(N)		/* need at least 4 bytes to copy */
14 *37da2899SCharles.Forsyth	BLT	_1tail
15 *37da2899SCharles.Forsyth
16 *37da2899SCharles.Forsyth	AND	$0xFF, R(4)		/* it's a byte */
17 *37da2899SCharles.Forsyth	SLL	$8, R(4), R(TMP)	/* replicate to a word */
18 *37da2899SCharles.Forsyth	ORR	R(TMP), R(4)
19 *37da2899SCharles.Forsyth	SLL	$16, R(4), R(TMP)
20 *37da2899SCharles.Forsyth	ORR	R(TMP), R(4)
21 *37da2899SCharles.Forsyth
22 *37da2899SCharles.Forsyth_4align:				/* align on 4 */
23 *37da2899SCharles.Forsyth	AND.S	$3, R(TO), R(TMP)
24 *37da2899SCharles.Forsyth	BEQ	_4aligned
25 *37da2899SCharles.Forsyth
26 *37da2899SCharles.Forsyth	MOVBU.P	R(4), 1(R(TO))		/* implicit write back */
27 *37da2899SCharles.Forsyth	B	_4align
28 *37da2899SCharles.Forsyth
29 *37da2899SCharles.Forsyth_4aligned:
30 *37da2899SCharles.Forsyth	SUB	$31, R(TOE), R(TMP)	/* do 32-byte chunks if possible */
31 *37da2899SCharles.Forsyth	CMP	R(TMP), R(TO)
32 *37da2899SCharles.Forsyth	BHS	_4tail
33 *37da2899SCharles.Forsyth
34 *37da2899SCharles.Forsyth	MOVW	R4, R5			/* replicate */
35 *37da2899SCharles.Forsyth	MOVW	R4, R6
36 *37da2899SCharles.Forsyth	MOVW	R4, R7
37 *37da2899SCharles.Forsyth	MOVW	R4, R8
38 *37da2899SCharles.Forsyth	MOVW	R4, R9
39 *37da2899SCharles.Forsyth	MOVW	R4, R10
40 *37da2899SCharles.Forsyth	MOVW	R4, R11
41 *37da2899SCharles.Forsyth
42 *37da2899SCharles.Forsyth_f32loop:
43 *37da2899SCharles.Forsyth	CMP	R(TMP), R(TO)
44 *37da2899SCharles.Forsyth	BHS	_4tail
45 *37da2899SCharles.Forsyth
46 *37da2899SCharles.Forsyth	MOVM.IA.W [R4-R11], (R(TO))
47 *37da2899SCharles.Forsyth	B	_f32loop
48 *37da2899SCharles.Forsyth
49 *37da2899SCharles.Forsyth_4tail:
50 *37da2899SCharles.Forsyth	SUB	$3, R(TOE), R(TMP)	/* do remaining words if possible */
51 *37da2899SCharles.Forsyth_4loop:
52 *37da2899SCharles.Forsyth	CMP	R(TMP), R(TO)
53 *37da2899SCharles.Forsyth	BHS	_1tail
54 *37da2899SCharles.Forsyth
55 *37da2899SCharles.Forsyth	MOVW.P	R(4), 4(R(TO))		/* implicit write back */
56 *37da2899SCharles.Forsyth	B	_4loop
57 *37da2899SCharles.Forsyth
58 *37da2899SCharles.Forsyth_1tail:
59 *37da2899SCharles.Forsyth	CMP	R(TO), R(TOE)
60 *37da2899SCharles.Forsyth	BEQ	_return
61 *37da2899SCharles.Forsyth
62 *37da2899SCharles.Forsyth	MOVBU.P	R(4), 1(R(TO))		/* implicit write back */
63 *37da2899SCharles.Forsyth	B	_1tail
64 *37da2899SCharles.Forsyth
65 *37da2899SCharles.Forsyth_return:
66 *37da2899SCharles.Forsyth	RET
67