xref: /plan9-contrib/sys/src/libc/mips/memcmp.s (revision 3e12c5d1bb89fc02707907988834ef147769ddaf)
1*3e12c5d1SDavid du Colombier	TEXT	memcmp(SB), $0
2*3e12c5d1SDavid du ColombierMOVW R1, 0(FP)
3*3e12c5d1SDavid du Colombier
4*3e12c5d1SDavid du Colombier/*
5*3e12c5d1SDavid du Colombier * performance:
6*3e12c5d1SDavid du Colombier *	alligned about 1.0us/call and 17.4mb/sec
7*3e12c5d1SDavid du Colombier *	unalligned is about 3.1mb/sec
8*3e12c5d1SDavid du Colombier */
9*3e12c5d1SDavid du Colombier
10*3e12c5d1SDavid du Colombier	MOVW	n+8(FP), R3		/* R3 is count */
11*3e12c5d1SDavid du Colombier	MOVW	s1+0(FP), R4		/* R4 is pointer1 */
12*3e12c5d1SDavid du Colombier	MOVW	s2+4(FP), R5		/* R5 is pointer2 */
13*3e12c5d1SDavid du Colombier	ADDU	R3,R4, R6		/* R6 is end pointer1 */
14*3e12c5d1SDavid du Colombier
15*3e12c5d1SDavid du Colombier/*
16*3e12c5d1SDavid du Colombier * if not at least 4 chars,
17*3e12c5d1SDavid du Colombier * dont even mess around.
18*3e12c5d1SDavid du Colombier * 3 chars to guarantee any
19*3e12c5d1SDavid du Colombier * rounding up to a word
20*3e12c5d1SDavid du Colombier * boundary and 4 characters
21*3e12c5d1SDavid du Colombier * to get at least maybe one
22*3e12c5d1SDavid du Colombier * full word cmp.
23*3e12c5d1SDavid du Colombier */
24*3e12c5d1SDavid du Colombier	SGT	$4,R3, R1
25*3e12c5d1SDavid du Colombier	BNE	R1, out
26*3e12c5d1SDavid du Colombier
27*3e12c5d1SDavid du Colombier/*
28*3e12c5d1SDavid du Colombier * test if both pointers
29*3e12c5d1SDavid du Colombier * are similarly word alligned
30*3e12c5d1SDavid du Colombier */
31*3e12c5d1SDavid du Colombier	XOR	R4,R5, R1
32*3e12c5d1SDavid du Colombier	AND	$3, R1
33*3e12c5d1SDavid du Colombier	BNE	R1, out
34*3e12c5d1SDavid du Colombier
35*3e12c5d1SDavid du Colombier/*
36*3e12c5d1SDavid du Colombier * byte at a time to word allign
37*3e12c5d1SDavid du Colombier */
38*3e12c5d1SDavid du Colombierl1:
39*3e12c5d1SDavid du Colombier	AND	$3,R4, R1
40*3e12c5d1SDavid du Colombier	BEQ	R1, l2
41*3e12c5d1SDavid du Colombier	MOVBU	0(R4), R8
42*3e12c5d1SDavid du Colombier	MOVBU	0(R5), R9
43*3e12c5d1SDavid du Colombier	ADDU	$1, R4
44*3e12c5d1SDavid du Colombier	BNE	R8,R9, ne
45*3e12c5d1SDavid du Colombier	ADDU	$1, R5
46*3e12c5d1SDavid du Colombier	JMP	l1
47*3e12c5d1SDavid du Colombier
48*3e12c5d1SDavid du Colombier/*
49*3e12c5d1SDavid du Colombier * turn R3 into end pointer1-15
50*3e12c5d1SDavid du Colombier * cmp 16 at a time while theres room
51*3e12c5d1SDavid du Colombier */
52*3e12c5d1SDavid du Colombierl2:
53*3e12c5d1SDavid du Colombier	ADDU	$-15,R6, R3
54*3e12c5d1SDavid du Colombierl3:
55*3e12c5d1SDavid du Colombier	SGTU	R3,R4, R1
56*3e12c5d1SDavid du Colombier	BEQ	R1, l4
57*3e12c5d1SDavid du Colombier	MOVW	0(R4), R8
58*3e12c5d1SDavid du Colombier	MOVW	0(R5), R9
59*3e12c5d1SDavid du Colombier	MOVW	4(R4), R10
60*3e12c5d1SDavid du Colombier	BNE	R8,R9, ne
61*3e12c5d1SDavid du Colombier	MOVW	4(R5), R11
62*3e12c5d1SDavid du Colombier	MOVW	8(R4), R8
63*3e12c5d1SDavid du Colombier	BNE	R10,R11, ne1
64*3e12c5d1SDavid du Colombier	MOVW	8(R5), R9
65*3e12c5d1SDavid du Colombier	MOVW	12(R4), R10
66*3e12c5d1SDavid du Colombier	BNE	R8,R9, ne
67*3e12c5d1SDavid du Colombier	MOVW	12(R5), R11
68*3e12c5d1SDavid du Colombier	ADDU	$16, R4
69*3e12c5d1SDavid du Colombier	BNE	R10,R11, ne1
70*3e12c5d1SDavid du Colombier	BNE	R8,R9, ne
71*3e12c5d1SDavid du Colombier	ADDU	$16, R5
72*3e12c5d1SDavid du Colombier	JMP	l3
73*3e12c5d1SDavid du Colombier
74*3e12c5d1SDavid du Colombier/*
75*3e12c5d1SDavid du Colombier * turn R3 into end pointer1-3
76*3e12c5d1SDavid du Colombier * cmp 4 at a time while theres room
77*3e12c5d1SDavid du Colombier */
78*3e12c5d1SDavid du Colombierl4:
79*3e12c5d1SDavid du Colombier	ADDU	$-3,R6, R3
80*3e12c5d1SDavid du Colombierl5:
81*3e12c5d1SDavid du Colombier	SGTU	R3,R4, R1
82*3e12c5d1SDavid du Colombier	BEQ	R1, out
83*3e12c5d1SDavid du Colombier	MOVW	0(R4), R8
84*3e12c5d1SDavid du Colombier	MOVW	0(R5), R9
85*3e12c5d1SDavid du Colombier	ADDU	$4, R4
86*3e12c5d1SDavid du Colombier	BNE	R8,R9, ne	/* only works because big endian */
87*3e12c5d1SDavid du Colombier	ADDU	$4, R5
88*3e12c5d1SDavid du Colombier	JMP	l5
89*3e12c5d1SDavid du Colombier
90*3e12c5d1SDavid du Colombier/*
91*3e12c5d1SDavid du Colombier * last loop, cmp byte at a time
92*3e12c5d1SDavid du Colombier */
93*3e12c5d1SDavid du Colombierout:
94*3e12c5d1SDavid du Colombier	SGTU	R6,R4, R1
95*3e12c5d1SDavid du Colombier	BEQ	R1, ret
96*3e12c5d1SDavid du Colombier	MOVBU	0(R4), R8
97*3e12c5d1SDavid du Colombier	MOVBU	0(R5), R9
98*3e12c5d1SDavid du Colombier	ADDU	$1, R4
99*3e12c5d1SDavid du Colombier	BNE	R8,R9, ne
100*3e12c5d1SDavid du Colombier	ADDU	$1, R5
101*3e12c5d1SDavid du Colombier	JMP	out
102*3e12c5d1SDavid du Colombier
103*3e12c5d1SDavid du Colombierne1:
104*3e12c5d1SDavid du Colombier	SGTU	R10,R11, R1
105*3e12c5d1SDavid du Colombier	BNE	R1, ret
106*3e12c5d1SDavid du Colombier	MOVW	$-1,R1
107*3e12c5d1SDavid du Colombier	RET
108*3e12c5d1SDavid du Colombierne:
109*3e12c5d1SDavid du Colombier	SGTU	R8,R9, R1
110*3e12c5d1SDavid du Colombier	BNE	R1, ret
111*3e12c5d1SDavid du Colombier	MOVW	$-1,R1
112*3e12c5d1SDavid du Colombierret:
113*3e12c5d1SDavid du Colombier	RET
114*3e12c5d1SDavid du Colombier	END
115