xref: /plan9-contrib/sys/src/ape/lib/ap/spim/memcmp.s (revision 12b1df16f1a9233fb47d4260d51ef450cc19c368)
1*12b1df16SDavid du Colombier	TEXT	memcmp(SB), $0
2*12b1df16SDavid du ColombierMOVW R1, 0(FP)
3*12b1df16SDavid du Colombier
4*12b1df16SDavid du Colombier/*
5*12b1df16SDavid du Colombier * performance:
6*12b1df16SDavid du Colombier *	alligned about 1.0us/call and 17.4mb/sec
7*12b1df16SDavid du Colombier *	unalligned is about 3.1mb/sec
8*12b1df16SDavid du Colombier */
9*12b1df16SDavid du Colombier
10*12b1df16SDavid du Colombier	MOVW	n+8(FP), R3		/* R3 is count */
11*12b1df16SDavid du Colombier	MOVW	s1+0(FP), R4		/* R4 is pointer1 */
12*12b1df16SDavid du Colombier	MOVW	s2+4(FP), R5		/* R5 is pointer2 */
13*12b1df16SDavid du Colombier	ADDU	R3,R4, R6		/* R6 is end pointer1 */
14*12b1df16SDavid du Colombier
15*12b1df16SDavid du Colombier	JMP	out		// XXX little endian
16*12b1df16SDavid du Colombier
17*12b1df16SDavid du Colombier/*
18*12b1df16SDavid du Colombier * if not at least 4 chars,
19*12b1df16SDavid du Colombier * dont even mess around.
20*12b1df16SDavid du Colombier * 3 chars to guarantee any
21*12b1df16SDavid du Colombier * rounding up to a word
22*12b1df16SDavid du Colombier * boundary and 4 characters
23*12b1df16SDavid du Colombier * to get at least maybe one
24*12b1df16SDavid du Colombier * full word cmp.
25*12b1df16SDavid du Colombier */
26*12b1df16SDavid du Colombier	SGT	$4,R3, R1
27*12b1df16SDavid du Colombier	BNE	R1, out
28*12b1df16SDavid du Colombier
29*12b1df16SDavid du Colombier/*
30*12b1df16SDavid du Colombier * test if both pointers
31*12b1df16SDavid du Colombier * are similarly word alligned
32*12b1df16SDavid du Colombier */
33*12b1df16SDavid du Colombier	XOR	R4,R5, R1
34*12b1df16SDavid du Colombier	AND	$3, R1
35*12b1df16SDavid du Colombier	BNE	R1, out
36*12b1df16SDavid du Colombier
37*12b1df16SDavid du Colombier/*
38*12b1df16SDavid du Colombier * byte at a time to word allign
39*12b1df16SDavid du Colombier */
40*12b1df16SDavid du Colombierl1:
41*12b1df16SDavid du Colombier	AND	$3,R4, R1
42*12b1df16SDavid du Colombier	BEQ	R1, l2
43*12b1df16SDavid du Colombier	MOVB	0(R4), R8
44*12b1df16SDavid du Colombier	MOVB	0(R5), R9
45*12b1df16SDavid du Colombier	ADDU	$1, R4
46*12b1df16SDavid du Colombier	BNE	R8,R9, ne
47*12b1df16SDavid du Colombier	ADDU	$1, R5
48*12b1df16SDavid du Colombier	JMP	l1
49*12b1df16SDavid du Colombier
50*12b1df16SDavid du Colombier/*
51*12b1df16SDavid du Colombier * turn R3 into end pointer1-15
52*12b1df16SDavid du Colombier * cmp 16 at a time while theres room
53*12b1df16SDavid du Colombier */
54*12b1df16SDavid du Colombierl2:
55*12b1df16SDavid du Colombier	ADDU	$-15,R6, R3
56*12b1df16SDavid du Colombierl3:
57*12b1df16SDavid du Colombier	SGTU	R3,R4, R1
58*12b1df16SDavid du Colombier	BEQ	R1, l4
59*12b1df16SDavid du Colombier	MOVW	0(R4), R8
60*12b1df16SDavid du Colombier	MOVW	0(R5), R9
61*12b1df16SDavid du Colombier	MOVW	4(R4), R10
62*12b1df16SDavid du Colombier	BNE	R8,R9, ne
63*12b1df16SDavid du Colombier	MOVW	4(R5), R11
64*12b1df16SDavid du Colombier	MOVW	8(R4), R8
65*12b1df16SDavid du Colombier	BNE	R10,R11, ne1
66*12b1df16SDavid du Colombier	MOVW	8(R5), R9
67*12b1df16SDavid du Colombier	MOVW	12(R4), R10
68*12b1df16SDavid du Colombier	BNE	R8,R9, ne
69*12b1df16SDavid du Colombier	MOVW	12(R5), R11
70*12b1df16SDavid du Colombier	ADDU	$16, R4
71*12b1df16SDavid du Colombier	BNE	R10,R11, ne1
72*12b1df16SDavid du Colombier	BNE	R8,R9, ne
73*12b1df16SDavid du Colombier	ADDU	$16, R5
74*12b1df16SDavid du Colombier	JMP	l3
75*12b1df16SDavid du Colombier
76*12b1df16SDavid du Colombier/*
77*12b1df16SDavid du Colombier * turn R3 into end pointer1-3
78*12b1df16SDavid du Colombier * cmp 4 at a time while theres room
79*12b1df16SDavid du Colombier */
80*12b1df16SDavid du Colombierl4:
81*12b1df16SDavid du Colombier	ADDU	$-3,R6, R3
82*12b1df16SDavid du Colombierl5:
83*12b1df16SDavid du Colombier	SGTU	R3,R4, R1
84*12b1df16SDavid du Colombier	BEQ	R1, out
85*12b1df16SDavid du Colombier	MOVW	0(R4), R8
86*12b1df16SDavid du Colombier	MOVW	0(R5), R9
87*12b1df16SDavid du Colombier	ADDU	$4, R4
88*12b1df16SDavid du Colombier	BNE	R8,R9, ne	/* only works because big endian */
89*12b1df16SDavid du Colombier	ADDU	$4, R5
90*12b1df16SDavid du Colombier	JMP	l5
91*12b1df16SDavid du Colombier
92*12b1df16SDavid du Colombier/*
93*12b1df16SDavid du Colombier * last loop, cmp byte at a time
94*12b1df16SDavid du Colombier */
95*12b1df16SDavid du Colombierout:
96*12b1df16SDavid du Colombier	SGTU	R6,R4, R1
97*12b1df16SDavid du Colombier	BEQ	R1, ret
98*12b1df16SDavid du Colombier	MOVB	0(R4), R8
99*12b1df16SDavid du Colombier	MOVB	0(R5), R9
100*12b1df16SDavid du Colombier	ADDU	$1, R4
101*12b1df16SDavid du Colombier	BNE	R8,R9, ne
102*12b1df16SDavid du Colombier	ADDU	$1, R5
103*12b1df16SDavid du Colombier	JMP	out
104*12b1df16SDavid du Colombier
105*12b1df16SDavid du Colombierne1:
106*12b1df16SDavid du Colombier	SGTU	R10,R11, R1
107*12b1df16SDavid du Colombier	BNE	R1, ret
108*12b1df16SDavid du Colombier	MOVW	$-1,R1
109*12b1df16SDavid du Colombier	RET
110*12b1df16SDavid du Colombierne:
111*12b1df16SDavid du Colombier	SGTU	R8,R9, R1
112*12b1df16SDavid du Colombier	BNE	R1, ret
113*12b1df16SDavid du Colombier	MOVW	$-1,R1
114*12b1df16SDavid du Colombierret:
115*12b1df16SDavid du Colombier	RET
116*12b1df16SDavid du Colombier	END
117