xref: /plan9/sys/src/libc/power/memset.s (revision 7dd7cddf99dd7472612f1413b4da293630e6b1bc)
1*7dd7cddfSDavid du Colombier	TEXT	memset(SB),$0
2*7dd7cddfSDavid du Colombier#define	BDNZ	BC	16,0,
3*7dd7cddfSDavid du Colombier	MOVW R3, p+0(FP)		/* R3 is pointer */
4*7dd7cddfSDavid du Colombier
5*7dd7cddfSDavid du Colombier/*
6*7dd7cddfSDavid du Colombier * performance:
7*7dd7cddfSDavid du Colombier *	about 100mbytes/sec (8k blocks) on a 603/105 without L2 cache
8*7dd7cddfSDavid du Colombier *	drops to 40mbytes/sec (10k blocks) and 28mbytes/sec with 32k blocks
9*7dd7cddfSDavid du Colombier */
10*7dd7cddfSDavid du Colombier
11*7dd7cddfSDavid du Colombier	MOVW	n+8(FP), R4		/* R4 is count */
12*7dd7cddfSDavid du Colombier	CMP	R4, $0
13*7dd7cddfSDavid du Colombier	BLE	ret
14*7dd7cddfSDavid du Colombier	MOVW	c+4(FP), R5		/* R5 is char */
15*7dd7cddfSDavid du Colombier
16*7dd7cddfSDavid du Colombier/*
17*7dd7cddfSDavid du Colombier * create 16 copies of c in R5 .. R8
18*7dd7cddfSDavid du Colombier */
19*7dd7cddfSDavid du Colombier	RLWNM	$0, R5, $0xff, R5
20*7dd7cddfSDavid du Colombier	RLWMI	$8, R5, $0xff00, R5
21*7dd7cddfSDavid du Colombier	RLWMI	$16, R5, $0xffff0000, R5
22*7dd7cddfSDavid du Colombier	MOVW	R5, R6
23*7dd7cddfSDavid du Colombier	MOVW	R5, R7
24*7dd7cddfSDavid du Colombier	MOVW	R5, R8
25*7dd7cddfSDavid du Colombier
26*7dd7cddfSDavid du Colombier/*
27*7dd7cddfSDavid du Colombier * let STSW do the work for 16 characters or less; aligned and unaligned
28*7dd7cddfSDavid du Colombier */
29*7dd7cddfSDavid du Colombier	CMP	R4, $16
30*7dd7cddfSDavid du Colombier	BLE	out
31*7dd7cddfSDavid du Colombier
32*7dd7cddfSDavid du Colombier/*
33*7dd7cddfSDavid du Colombier * store enough bytes to align pointer
34*7dd7cddfSDavid du Colombier */
35*7dd7cddfSDavid du Colombier	ANDCC	$7,R3, R9
36*7dd7cddfSDavid du Colombier	BEQ	l2
37*7dd7cddfSDavid du Colombier	SUBC	R9, $8, R9
38*7dd7cddfSDavid du Colombier	MOVW	R9, XER
39*7dd7cddfSDavid du Colombier	STSW	R5, (R3)
40*7dd7cddfSDavid du Colombier	ADD	R9, R3
41*7dd7cddfSDavid du Colombier	SUB	R9, R4
42*7dd7cddfSDavid du Colombier
43*7dd7cddfSDavid du Colombier/*
44*7dd7cddfSDavid du Colombier * store 16 at a time while there's room
45*7dd7cddfSDavid du Colombier * STSW was used here originally, but it's `completion serialised'
46*7dd7cddfSDavid du Colombier */
47*7dd7cddfSDavid du Colombierl2:
48*7dd7cddfSDavid du Colombier	SRAWCC	$4, R4, R9
49*7dd7cddfSDavid du Colombier	BLE	out
50*7dd7cddfSDavid du Colombier	MOVW	R9, CTR
51*7dd7cddfSDavid du Colombierl3:
52*7dd7cddfSDavid du Colombier	MOVW	R5, 0(R3)
53*7dd7cddfSDavid du Colombier	ADD	$8, R3, R10
54*7dd7cddfSDavid du Colombier	MOVW	R6, 4(R3)
55*7dd7cddfSDavid du Colombier	MOVW	R7, 0(R10)
56*7dd7cddfSDavid du Colombier	ADD	$8, R10, R3
57*7dd7cddfSDavid du Colombier	MOVW	R8, 4(R10)
58*7dd7cddfSDavid du Colombier	BDNZ	l3
59*7dd7cddfSDavid du Colombier	RLWNMCC	$0, R4, $15, R4	/* residue */
60*7dd7cddfSDavid du Colombier	BEQ	ret
61*7dd7cddfSDavid du Colombier
62*7dd7cddfSDavid du Colombier/*
63*7dd7cddfSDavid du Colombier * store up to 16 bytes from R5 .. R8; aligned and unaligned
64*7dd7cddfSDavid du Colombier */
65*7dd7cddfSDavid du Colombier
66*7dd7cddfSDavid du Colombierout:
67*7dd7cddfSDavid du Colombier	MOVW	R4, XER
68*7dd7cddfSDavid du Colombier	STSW	R5, (R3)
69*7dd7cddfSDavid du Colombier
70*7dd7cddfSDavid du Colombierret:
71*7dd7cddfSDavid du Colombier	MOVW	0(FP), R3
72*7dd7cddfSDavid du Colombier	RETURN
73*7dd7cddfSDavid du Colombier	END
74