xref: /inferno-os/libkern/memcmp-power.s (revision 37da2899f40661e3e9631e497da8dc59b971cbd0)
1*37da2899SCharles.Forsyth	TEXT	memcmp(SB), $0
2*37da2899SCharles.Forsyth#define	BDNZ	BC	16,0,
3*37da2899SCharles.Forsyth	MOVW R3, s1+0(FP)		/* R3 is pointer1 */
4*37da2899SCharles.Forsyth
5*37da2899SCharles.Forsyth/*
6*37da2899SCharles.Forsyth * performance:
7*37da2899SCharles.Forsyth *	67mb/sec aligned; 16mb/sec unaligned
8*37da2899SCharles.Forsyth */
9*37da2899SCharles.Forsyth
10*37da2899SCharles.Forsyth	MOVW	n+8(FP), R4		/* R4 is count */
11*37da2899SCharles.Forsyth	MOVW	s2+4(FP), R5		/* R5 is pointer2 */
12*37da2899SCharles.Forsyth
13*37da2899SCharles.Forsyth/*
14*37da2899SCharles.Forsyth * let LSW do the work for 4 characters or less; aligned and unaligned
15*37da2899SCharles.Forsyth */
16*37da2899SCharles.Forsyth	CMP	R4, $0
17*37da2899SCharles.Forsyth	BLE	eq
18*37da2899SCharles.Forsyth	CMP	R4, $4
19*37da2899SCharles.Forsyth	BLE	out
20*37da2899SCharles.Forsyth
21*37da2899SCharles.Forsyth	XOR	R3, R5, R9
22*37da2899SCharles.Forsyth	ANDCC	$3, R9
23*37da2899SCharles.Forsyth	BNE	l4	/* pointers misaligned; use LSW loop */
24*37da2899SCharles.Forsyth
25*37da2899SCharles.Forsyth/*
26*37da2899SCharles.Forsyth * do enough bytes to align pointers
27*37da2899SCharles.Forsyth */
28*37da2899SCharles.Forsyth	ANDCC	$3,R3, R9
29*37da2899SCharles.Forsyth	BEQ	l2
30*37da2899SCharles.Forsyth	SUBC	R9, $4, R9
31*37da2899SCharles.Forsyth	MOVW	R9, XER
32*37da2899SCharles.Forsyth	LSW	(R3), R10
33*37da2899SCharles.Forsyth	ADD	R9, R3
34*37da2899SCharles.Forsyth	LSW	(R5), R14
35*37da2899SCharles.Forsyth	ADD	R9, R5
36*37da2899SCharles.Forsyth	SUB	R9, R4
37*37da2899SCharles.Forsyth	CMPU	R10, R14
38*37da2899SCharles.Forsyth	BNE	ne
39*37da2899SCharles.Forsyth
40*37da2899SCharles.Forsyth/*
41*37da2899SCharles.Forsyth * compare 16 at a time
42*37da2899SCharles.Forsyth */
43*37da2899SCharles.Forsythl2:
44*37da2899SCharles.Forsyth	SRAWCC	$4, R4, R9
45*37da2899SCharles.Forsyth	BLE	l4
46*37da2899SCharles.Forsyth	MOVW	R9, CTR
47*37da2899SCharles.Forsyth	SUB	$4, R3
48*37da2899SCharles.Forsyth	SUB	$4, R5
49*37da2899SCharles.Forsythl3:
50*37da2899SCharles.Forsyth	MOVWU	4(R3), R10
51*37da2899SCharles.Forsyth	MOVWU	4(R5), R12
52*37da2899SCharles.Forsyth	MOVWU	4(R3), R11
53*37da2899SCharles.Forsyth	MOVWU	4(R5), R13
54*37da2899SCharles.Forsyth	CMPU	R10, R12
55*37da2899SCharles.Forsyth	BNE	ne
56*37da2899SCharles.Forsyth	MOVWU	4(R3), R10
57*37da2899SCharles.Forsyth	MOVWU	4(R5), R12
58*37da2899SCharles.Forsyth	CMPU	R11, R13
59*37da2899SCharles.Forsyth	BNE	ne
60*37da2899SCharles.Forsyth	MOVWU	4(R3), R11
61*37da2899SCharles.Forsyth	MOVWU	4(R5), R13
62*37da2899SCharles.Forsyth	CMPU	R10, R12
63*37da2899SCharles.Forsyth	BNE	ne
64*37da2899SCharles.Forsyth	CMPU	R11, R13
65*37da2899SCharles.Forsyth	BNE	ne
66*37da2899SCharles.Forsyth	BDNZ	l3
67*37da2899SCharles.Forsyth	ADD	$4, R3
68*37da2899SCharles.Forsyth	ADD	$4, R5
69*37da2899SCharles.Forsyth	RLWNMCC	$0, R4, $15, R4	/* residue */
70*37da2899SCharles.Forsyth	BEQ	eq
71*37da2899SCharles.Forsyth
72*37da2899SCharles.Forsyth/*
73*37da2899SCharles.Forsyth * do remaining words with LSW; also does unaligned case
74*37da2899SCharles.Forsyth */
75*37da2899SCharles.Forsythl4:
76*37da2899SCharles.Forsyth	SRAWCC	$2, R4, R9
77*37da2899SCharles.Forsyth	BLE	out
78*37da2899SCharles.Forsyth	MOVW	R9, CTR
79*37da2899SCharles.Forsythl5:
80*37da2899SCharles.Forsyth	LSW	(R3), $4, R10
81*37da2899SCharles.Forsyth	ADD	$4, R3
82*37da2899SCharles.Forsyth	LSW	(R5), $4, R11
83*37da2899SCharles.Forsyth	ADD	$4, R5
84*37da2899SCharles.Forsyth	CMPU	R10, R11
85*37da2899SCharles.Forsyth	BNE	ne
86*37da2899SCharles.Forsyth	BDNZ	l5
87*37da2899SCharles.Forsyth	RLWNMCC	$0, R4, $3, R4	/* residue */
88*37da2899SCharles.Forsyth	BEQ	eq
89*37da2899SCharles.Forsyth
90*37da2899SCharles.Forsyth/*
91*37da2899SCharles.Forsyth * do remaining bytes with final LSW
92*37da2899SCharles.Forsyth */
93*37da2899SCharles.Forsythout:
94*37da2899SCharles.Forsyth	MOVW	R4, XER
95*37da2899SCharles.Forsyth	LSW	(R3), R10
96*37da2899SCharles.Forsyth	LSW	(R5), R11
97*37da2899SCharles.Forsyth	CMPU	R10, R11
98*37da2899SCharles.Forsyth	BNE	ne
99*37da2899SCharles.Forsyth
100*37da2899SCharles.Forsytheq:
101*37da2899SCharles.Forsyth	MOVW	$0, R3
102*37da2899SCharles.Forsyth	RETURN
103*37da2899SCharles.Forsyth
104*37da2899SCharles.Forsythne:
105*37da2899SCharles.Forsyth	MOVW	$1, R3
106*37da2899SCharles.Forsyth	BGE	ret
107*37da2899SCharles.Forsyth	MOVW	$-1,R3
108*37da2899SCharles.Forsythret:
109*37da2899SCharles.Forsyth	RETURN
110*37da2899SCharles.Forsyth	END
111