xref: /minix3/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/umodsi3.S (revision 0a6a1f1d05b60e214de2f05a7310ddd1f0e590e7)
1*0a6a1f1dSLionel Sambuc/*===-- umodsi3.S - 32-bit unsigned integer modulus -----------------------===//
2*0a6a1f1dSLionel Sambuc *
3*0a6a1f1dSLionel Sambuc *                     The LLVM Compiler Infrastructure
4*0a6a1f1dSLionel Sambuc *
5*0a6a1f1dSLionel Sambuc * This file is dual licensed under the MIT and the University of Illinois Open
6*0a6a1f1dSLionel Sambuc * Source Licenses. See LICENSE.TXT for details.
7*0a6a1f1dSLionel Sambuc *
8*0a6a1f1dSLionel Sambuc *===----------------------------------------------------------------------===//
9*0a6a1f1dSLionel Sambuc *
10*0a6a1f1dSLionel Sambuc * This file implements the __umodsi3 (32-bit unsigned integer modulus)
11*0a6a1f1dSLionel Sambuc * function for the ARM 32-bit architecture.
12*0a6a1f1dSLionel Sambuc *
13*0a6a1f1dSLionel Sambuc *===----------------------------------------------------------------------===*/
14*0a6a1f1dSLionel Sambuc
15*0a6a1f1dSLionel Sambuc#include "../assembly.h"
16*0a6a1f1dSLionel Sambuc
17*0a6a1f1dSLionel Sambuc	.syntax unified
18*0a6a1f1dSLionel Sambuc	.text
19*0a6a1f1dSLionel Sambuc#if __ARM_ARCH_ISA_THUMB == 2
20*0a6a1f1dSLionel Sambuc	.thumb
21*0a6a1f1dSLionel Sambuc#endif
22*0a6a1f1dSLionel Sambuc
23*0a6a1f1dSLionel Sambuc@ unsigned int __umodsi3(unsigned int divident, unsigned int divisor)
24*0a6a1f1dSLionel Sambuc@   Calculate and return the remainder of the (unsigned) division.
25*0a6a1f1dSLionel Sambuc
26*0a6a1f1dSLionel Sambuc	.p2align 2
27*0a6a1f1dSLionel SambucDEFINE_COMPILERRT_FUNCTION(__umodsi3)
28*0a6a1f1dSLionel Sambuc#if __ARM_ARCH_EXT_IDIV__
29*0a6a1f1dSLionel Sambuc	tst     r1, r1
30*0a6a1f1dSLionel Sambuc	beq     LOCAL_LABEL(divby0)
31*0a6a1f1dSLionel Sambuc	udiv	r2, r0, r1
32*0a6a1f1dSLionel Sambuc	mls 	r0, r2, r1, r0
33*0a6a1f1dSLionel Sambuc	bx  	lr
34*0a6a1f1dSLionel Sambuc#else
35*0a6a1f1dSLionel Sambuc	cmp	r1, #1
36*0a6a1f1dSLionel Sambuc	bcc	LOCAL_LABEL(divby0)
37*0a6a1f1dSLionel Sambuc	ITT(eq)
38*0a6a1f1dSLionel Sambuc	moveq	r0, #0
39*0a6a1f1dSLionel Sambuc	JMPc(lr, eq)
40*0a6a1f1dSLionel Sambuc	cmp	r0, r1
41*0a6a1f1dSLionel Sambuc	IT(cc)
42*0a6a1f1dSLionel Sambuc	JMPc(lr, cc)
43*0a6a1f1dSLionel Sambuc	/*
44*0a6a1f1dSLionel Sambuc	 * Implement division using binary long division algorithm.
45*0a6a1f1dSLionel Sambuc	 *
46*0a6a1f1dSLionel Sambuc	 * r0 is the numerator, r1 the denominator.
47*0a6a1f1dSLionel Sambuc	 *
48*0a6a1f1dSLionel Sambuc	 * The code before JMP computes the correct shift I, so that
49*0a6a1f1dSLionel Sambuc	 * r0 and (r1 << I) have the highest bit set in the same position.
50*0a6a1f1dSLionel Sambuc	 * At the time of JMP, ip := .Ldiv0block - 8 * I.
51*0a6a1f1dSLionel Sambuc	 * This depends on the fixed instruction size of block.
52*0a6a1f1dSLionel Sambuc	 * For ARM mode, this is 8 Bytes, for THUMB mode 10 Bytes.
53*0a6a1f1dSLionel Sambuc	 *
54*0a6a1f1dSLionel Sambuc	 * block(shift) implements the test-and-update-quotient core.
55*0a6a1f1dSLionel Sambuc	 * It assumes (r0 << shift) can be computed without overflow and
56*0a6a1f1dSLionel Sambuc	 * that (r0 << shift) < 2 * r1. The quotient is stored in r3.
57*0a6a1f1dSLionel Sambuc	 */
58*0a6a1f1dSLionel Sambuc
59*0a6a1f1dSLionel Sambuc#  ifdef __ARM_FEATURE_CLZ
60*0a6a1f1dSLionel Sambuc	clz	ip, r0
61*0a6a1f1dSLionel Sambuc	clz	r3, r1
62*0a6a1f1dSLionel Sambuc	/* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
63*0a6a1f1dSLionel Sambuc	sub	r3, r3, ip
64*0a6a1f1dSLionel Sambuc#    if __ARM_ARCH_ISA_THUMB == 2
65*0a6a1f1dSLionel Sambuc	adr	ip, LOCAL_LABEL(div0block) + 1
66*0a6a1f1dSLionel Sambuc	sub	ip, ip, r3, lsl #1
67*0a6a1f1dSLionel Sambuc#    else
68*0a6a1f1dSLionel Sambuc	adr	ip, LOCAL_LABEL(div0block)
69*0a6a1f1dSLionel Sambuc#    endif
70*0a6a1f1dSLionel Sambuc	sub	ip, ip, r3, lsl #3
71*0a6a1f1dSLionel Sambuc	bx	ip
72*0a6a1f1dSLionel Sambuc#  else
73*0a6a1f1dSLionel Sambuc#    if __ARM_ARCH_ISA_THUMB == 2
74*0a6a1f1dSLionel Sambuc#    error THUMB mode requires CLZ or UDIV
75*0a6a1f1dSLionel Sambuc#    endif
76*0a6a1f1dSLionel Sambuc	mov	r2, r0
77*0a6a1f1dSLionel Sambuc	adr	ip, LOCAL_LABEL(div0block)
78*0a6a1f1dSLionel Sambuc
79*0a6a1f1dSLionel Sambuc	lsr	r3, r2, #16
80*0a6a1f1dSLionel Sambuc	cmp	r3, r1
81*0a6a1f1dSLionel Sambuc	movhs	r2, r3
82*0a6a1f1dSLionel Sambuc	subhs	ip, ip, #(16 * 8)
83*0a6a1f1dSLionel Sambuc
84*0a6a1f1dSLionel Sambuc	lsr	r3, r2, #8
85*0a6a1f1dSLionel Sambuc	cmp	r3, r1
86*0a6a1f1dSLionel Sambuc	movhs	r2, r3
87*0a6a1f1dSLionel Sambuc	subhs	ip, ip, #(8 * 8)
88*0a6a1f1dSLionel Sambuc
89*0a6a1f1dSLionel Sambuc	lsr	r3, r2, #4
90*0a6a1f1dSLionel Sambuc	cmp	r3, r1
91*0a6a1f1dSLionel Sambuc	movhs	r2, r3
92*0a6a1f1dSLionel Sambuc	subhs	ip, #(4 * 8)
93*0a6a1f1dSLionel Sambuc
94*0a6a1f1dSLionel Sambuc	lsr	r3, r2, #2
95*0a6a1f1dSLionel Sambuc	cmp	r3, r1
96*0a6a1f1dSLionel Sambuc	movhs	r2, r3
97*0a6a1f1dSLionel Sambuc	subhs	ip, ip, #(2 * 8)
98*0a6a1f1dSLionel Sambuc
99*0a6a1f1dSLionel Sambuc	/* Last block, no need to update r2 or r3. */
100*0a6a1f1dSLionel Sambuc	cmp	r1, r2, lsr #1
101*0a6a1f1dSLionel Sambuc	subls	ip, ip, #(1 * 8)
102*0a6a1f1dSLionel Sambuc
103*0a6a1f1dSLionel Sambuc	JMP(ip)
104*0a6a1f1dSLionel Sambuc#  endif
105*0a6a1f1dSLionel Sambuc
106*0a6a1f1dSLionel Sambuc#define	IMM	#
107*0a6a1f1dSLionel Sambuc
108*0a6a1f1dSLionel Sambuc#define block(shift)                                                           \
109*0a6a1f1dSLionel Sambuc	cmp	r0, r1, lsl IMM shift;                                         \
110*0a6a1f1dSLionel Sambuc	IT(hs);                                                                \
111*0a6a1f1dSLionel Sambuc	WIDE(subhs)	r0, r0, r1, lsl IMM shift
112*0a6a1f1dSLionel Sambuc
113*0a6a1f1dSLionel Sambuc	block(31)
114*0a6a1f1dSLionel Sambuc	block(30)
115*0a6a1f1dSLionel Sambuc	block(29)
116*0a6a1f1dSLionel Sambuc	block(28)
117*0a6a1f1dSLionel Sambuc	block(27)
118*0a6a1f1dSLionel Sambuc	block(26)
119*0a6a1f1dSLionel Sambuc	block(25)
120*0a6a1f1dSLionel Sambuc	block(24)
121*0a6a1f1dSLionel Sambuc	block(23)
122*0a6a1f1dSLionel Sambuc	block(22)
123*0a6a1f1dSLionel Sambuc	block(21)
124*0a6a1f1dSLionel Sambuc	block(20)
125*0a6a1f1dSLionel Sambuc	block(19)
126*0a6a1f1dSLionel Sambuc	block(18)
127*0a6a1f1dSLionel Sambuc	block(17)
128*0a6a1f1dSLionel Sambuc	block(16)
129*0a6a1f1dSLionel Sambuc	block(15)
130*0a6a1f1dSLionel Sambuc	block(14)
131*0a6a1f1dSLionel Sambuc	block(13)
132*0a6a1f1dSLionel Sambuc	block(12)
133*0a6a1f1dSLionel Sambuc	block(11)
134*0a6a1f1dSLionel Sambuc	block(10)
135*0a6a1f1dSLionel Sambuc	block(9)
136*0a6a1f1dSLionel Sambuc	block(8)
137*0a6a1f1dSLionel Sambuc	block(7)
138*0a6a1f1dSLionel Sambuc	block(6)
139*0a6a1f1dSLionel Sambuc	block(5)
140*0a6a1f1dSLionel Sambuc	block(4)
141*0a6a1f1dSLionel Sambuc	block(3)
142*0a6a1f1dSLionel Sambuc	block(2)
143*0a6a1f1dSLionel Sambuc	block(1)
144*0a6a1f1dSLionel SambucLOCAL_LABEL(div0block):
145*0a6a1f1dSLionel Sambuc	block(0)
146*0a6a1f1dSLionel Sambuc	JMP(lr)
147*0a6a1f1dSLionel Sambuc#endif /* __ARM_ARCH_EXT_IDIV__ */
148*0a6a1f1dSLionel Sambuc
149*0a6a1f1dSLionel SambucLOCAL_LABEL(divby0):
150*0a6a1f1dSLionel Sambuc	mov	r0, #0
151*0a6a1f1dSLionel Sambuc#ifdef __ARM_EABI__
152*0a6a1f1dSLionel Sambuc	b	__aeabi_idiv0
153*0a6a1f1dSLionel Sambuc#else
154*0a6a1f1dSLionel Sambuc	JMP(lr)
155*0a6a1f1dSLionel Sambuc#endif
156*0a6a1f1dSLionel Sambuc
157*0a6a1f1dSLionel SambucEND_COMPILERRT_FUNCTION(__umodsi3)
158