xref: /plan9/sys/src/libc/mips/memcmp.s (revision 3e12c5d1bb89fc02707907988834ef147769ddaf)
1	TEXT	memcmp(SB), $0
2MOVW R1, 0(FP)
3
4/*
5 * performance:
6 *	alligned about 1.0us/call and 17.4mb/sec
7 *	unalligned is about 3.1mb/sec
8 */
9
10	MOVW	n+8(FP), R3		/* R3 is count */
11	MOVW	s1+0(FP), R4		/* R4 is pointer1 */
12	MOVW	s2+4(FP), R5		/* R5 is pointer2 */
13	ADDU	R3,R4, R6		/* R6 is end pointer1 */
14
15/*
16 * if not at least 4 chars,
17 * dont even mess around.
18 * 3 chars to guarantee any
19 * rounding up to a word
20 * boundary and 4 characters
21 * to get at least maybe one
22 * full word cmp.
23 */
24	SGT	$4,R3, R1
25	BNE	R1, out
26
27/*
28 * test if both pointers
29 * are similarly word alligned
30 */
31	XOR	R4,R5, R1
32	AND	$3, R1
33	BNE	R1, out
34
35/*
36 * byte at a time to word allign
37 */
38l1:
39	AND	$3,R4, R1
40	BEQ	R1, l2
41	MOVBU	0(R4), R8
42	MOVBU	0(R5), R9
43	ADDU	$1, R4
44	BNE	R8,R9, ne
45	ADDU	$1, R5
46	JMP	l1
47
48/*
49 * turn R3 into end pointer1-15
50 * cmp 16 at a time while theres room
51 */
52l2:
53	ADDU	$-15,R6, R3
54l3:
55	SGTU	R3,R4, R1
56	BEQ	R1, l4
57	MOVW	0(R4), R8
58	MOVW	0(R5), R9
59	MOVW	4(R4), R10
60	BNE	R8,R9, ne
61	MOVW	4(R5), R11
62	MOVW	8(R4), R8
63	BNE	R10,R11, ne1
64	MOVW	8(R5), R9
65	MOVW	12(R4), R10
66	BNE	R8,R9, ne
67	MOVW	12(R5), R11
68	ADDU	$16, R4
69	BNE	R10,R11, ne1
70	BNE	R8,R9, ne
71	ADDU	$16, R5
72	JMP	l3
73
74/*
75 * turn R3 into end pointer1-3
76 * cmp 4 at a time while theres room
77 */
78l4:
79	ADDU	$-3,R6, R3
80l5:
81	SGTU	R3,R4, R1
82	BEQ	R1, out
83	MOVW	0(R4), R8
84	MOVW	0(R5), R9
85	ADDU	$4, R4
86	BNE	R8,R9, ne	/* only works because big endian */
87	ADDU	$4, R5
88	JMP	l5
89
90/*
91 * last loop, cmp byte at a time
92 */
93out:
94	SGTU	R6,R4, R1
95	BEQ	R1, ret
96	MOVBU	0(R4), R8
97	MOVBU	0(R5), R9
98	ADDU	$1, R4
99	BNE	R8,R9, ne
100	ADDU	$1, R5
101	JMP	out
102
103ne1:
104	SGTU	R10,R11, R1
105	BNE	R1, ret
106	MOVW	$-1,R1
107	RET
108ne:
109	SGTU	R8,R9, R1
110	BNE	R1, ret
111	MOVW	$-1,R1
112ret:
113	RET
114	END
115