xref: /plan9-contrib/sys/src/libc/spim/memset.s (revision 12b1df16f1a9233fb47d4260d51ef450cc19c368)
1*12b1df16SDavid du Colombier	TEXT	memset(SB),$12
2*12b1df16SDavid du ColombierMOVW R1, 0(FP)
3*12b1df16SDavid du Colombier
4*12b1df16SDavid du Colombier/*
5*12b1df16SDavid du Colombier * performance:
6*12b1df16SDavid du Colombier *	about 1us/call and 28mb/sec
7*12b1df16SDavid du Colombier */
8*12b1df16SDavid du Colombier
9*12b1df16SDavid du Colombier	MOVW	n+8(FP), R3		/* R3 is count */
10*12b1df16SDavid du Colombier	MOVW	p+0(FP), R4		/* R4 is pointer */
11*12b1df16SDavid du Colombier	MOVW	c+4(FP), R5		/* R5 is char */
12*12b1df16SDavid du Colombier	ADDU	R3,R4, R6		/* R6 is end pointer */
13*12b1df16SDavid du Colombier
14*12b1df16SDavid du Colombier/*
15*12b1df16SDavid du Colombier * if not at least 4 chars,
16*12b1df16SDavid du Colombier * dont even mess around.
17*12b1df16SDavid du Colombier * 3 chars to guarantee any
18*12b1df16SDavid du Colombier * rounding up to a word
19*12b1df16SDavid du Colombier * boundary and 4 characters
20*12b1df16SDavid du Colombier * to get at least maybe one
21*12b1df16SDavid du Colombier * full word store.
22*12b1df16SDavid du Colombier */
23*12b1df16SDavid du Colombier	SGT	$4,R3, R1
24*12b1df16SDavid du Colombier	BNE	R1, out
25*12b1df16SDavid du Colombier
26*12b1df16SDavid du Colombier/*
27*12b1df16SDavid du Colombier * turn R5 into a word of characters
28*12b1df16SDavid du Colombier */
29*12b1df16SDavid du Colombier	AND	$0xff, R5
30*12b1df16SDavid du Colombier	SLL	$8,R5, R1
31*12b1df16SDavid du Colombier	OR	R1, R5
32*12b1df16SDavid du Colombier	SLL	$16,R5, R1
33*12b1df16SDavid du Colombier	OR	R1, R5
34*12b1df16SDavid du Colombier
35*12b1df16SDavid du Colombier/*
36*12b1df16SDavid du Colombier * store one byte at a time until pointer
37*12b1df16SDavid du Colombier * is alligned on a word boundary
38*12b1df16SDavid du Colombier */
39*12b1df16SDavid du Colombierl1:
40*12b1df16SDavid du Colombier	AND	$3,R4, R1
41*12b1df16SDavid du Colombier	BEQ	R1, l2
42*12b1df16SDavid du Colombier	MOVB	R5, 0(R4)
43*12b1df16SDavid du Colombier	ADDU	$1, R4
44*12b1df16SDavid du Colombier	JMP	l1
45*12b1df16SDavid du Colombier
46*12b1df16SDavid du Colombier/*
47*12b1df16SDavid du Colombier * turn R3 into end pointer-15
48*12b1df16SDavid du Colombier * store 16 at a time while theres room
49*12b1df16SDavid du Colombier */
50*12b1df16SDavid du Colombierl2:
51*12b1df16SDavid du Colombier	ADDU	$-15,R6, R3
52*12b1df16SDavid du Colombierl3:
53*12b1df16SDavid du Colombier	SGTU	R3,R4, R1
54*12b1df16SDavid du Colombier	BEQ	R1, l4
55*12b1df16SDavid du Colombier	MOVW	R5, 0(R4)
56*12b1df16SDavid du Colombier	MOVW	R5, 4(R4)
57*12b1df16SDavid du Colombier	ADDU	$16, R4
58*12b1df16SDavid du Colombier	MOVW	R5, -8(R4)
59*12b1df16SDavid du Colombier	MOVW	R5, -4(R4)
60*12b1df16SDavid du Colombier	JMP	l3
61*12b1df16SDavid du Colombier
62*12b1df16SDavid du Colombier/*
63*12b1df16SDavid du Colombier * turn R3 into end pointer-3
64*12b1df16SDavid du Colombier * store 4 at a time while theres room
65*12b1df16SDavid du Colombier */
66*12b1df16SDavid du Colombierl4:
67*12b1df16SDavid du Colombier	ADDU	$-3,R6, R3
68*12b1df16SDavid du Colombierl5:
69*12b1df16SDavid du Colombier	SGTU	R3,R4, R1
70*12b1df16SDavid du Colombier	BEQ	R1, out
71*12b1df16SDavid du Colombier	MOVW	R5, 0(R4)
72*12b1df16SDavid du Colombier	ADDU	$4, R4
73*12b1df16SDavid du Colombier	JMP	l5
74*12b1df16SDavid du Colombier
75*12b1df16SDavid du Colombier/*
76*12b1df16SDavid du Colombier * last loop, store byte at a time
77*12b1df16SDavid du Colombier */
78*12b1df16SDavid du Colombierout:
79*12b1df16SDavid du Colombier	SGTU	R6,R4 ,R1
80*12b1df16SDavid du Colombier	BEQ	R1, ret
81*12b1df16SDavid du Colombier	MOVB	R5, 0(R4)
82*12b1df16SDavid du Colombier	ADDU	$1, R4
83*12b1df16SDavid du Colombier	JMP	out
84*12b1df16SDavid du Colombier
85*12b1df16SDavid du Colombierret:
86*12b1df16SDavid du Colombier	MOVW	s1+0(FP), R1
87*12b1df16SDavid du Colombier	RET
88*12b1df16SDavid du Colombier	END
89