xref: /minix3/sys/external/bsd/compiler_rt/dist/lib/builtins/arm/udivsi3.S (revision 0a6a1f1d05b60e214de2f05a7310ddd1f0e590e7)
1*0a6a1f1dSLionel Sambuc/*===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===//
2*0a6a1f1dSLionel Sambuc *
3*0a6a1f1dSLionel Sambuc *                     The LLVM Compiler Infrastructure
4*0a6a1f1dSLionel Sambuc *
5*0a6a1f1dSLionel Sambuc * This file is dual licensed under the MIT and the University of Illinois Open
6*0a6a1f1dSLionel Sambuc * Source Licenses. See LICENSE.TXT for details.
7*0a6a1f1dSLionel Sambuc *
8*0a6a1f1dSLionel Sambuc *===----------------------------------------------------------------------===//
9*0a6a1f1dSLionel Sambuc *
10*0a6a1f1dSLionel Sambuc * This file implements the __udivsi3 (32-bit unsigned integer divide)
11*0a6a1f1dSLionel Sambuc * function for the ARM 32-bit architecture.
12*0a6a1f1dSLionel Sambuc *
13*0a6a1f1dSLionel Sambuc *===----------------------------------------------------------------------===*/
14*0a6a1f1dSLionel Sambuc
15*0a6a1f1dSLionel Sambuc#include "../assembly.h"
16*0a6a1f1dSLionel Sambuc
17*0a6a1f1dSLionel Sambuc	.syntax unified
18*0a6a1f1dSLionel Sambuc	.text
19*0a6a1f1dSLionel Sambuc
20*0a6a1f1dSLionel Sambuc#if __ARM_ARCH_ISA_THUMB == 2
21*0a6a1f1dSLionel Sambuc	.thumb
22*0a6a1f1dSLionel Sambuc#endif
23*0a6a1f1dSLionel Sambuc
24*0a6a1f1dSLionel Sambuc	.p2align 2
25*0a6a1f1dSLionel SambucDEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3)
26*0a6a1f1dSLionel Sambuc
27*0a6a1f1dSLionel Sambuc@ unsigned int __udivsi3(unsigned int divident, unsigned int divisor)
28*0a6a1f1dSLionel Sambuc@   Calculate and return the quotient of the (unsigned) division.
29*0a6a1f1dSLionel Sambuc
30*0a6a1f1dSLionel SambucDEFINE_COMPILERRT_FUNCTION(__udivsi3)
31*0a6a1f1dSLionel Sambuc#if __ARM_ARCH_EXT_IDIV__
32*0a6a1f1dSLionel Sambuc	tst     r1, r1
33*0a6a1f1dSLionel Sambuc	beq     LOCAL_LABEL(divby0)
34*0a6a1f1dSLionel Sambuc	udiv	r0, r0, r1
35*0a6a1f1dSLionel Sambuc	bx  	lr
36*0a6a1f1dSLionel Sambuc#else
37*0a6a1f1dSLionel Sambuc	cmp	r1, #1
38*0a6a1f1dSLionel Sambuc	bcc	LOCAL_LABEL(divby0)
39*0a6a1f1dSLionel Sambuc	IT(eq)
40*0a6a1f1dSLionel Sambuc	JMPc(lr, eq)
41*0a6a1f1dSLionel Sambuc	cmp	r0, r1
42*0a6a1f1dSLionel Sambuc	ITT(cc)
43*0a6a1f1dSLionel Sambuc	movcc	r0, #0
44*0a6a1f1dSLionel Sambuc	JMPc(lr, cc)
45*0a6a1f1dSLionel Sambuc	/*
46*0a6a1f1dSLionel Sambuc	 * Implement division using binary long division algorithm.
47*0a6a1f1dSLionel Sambuc	 *
48*0a6a1f1dSLionel Sambuc	 * r0 is the numerator, r1 the denominator.
49*0a6a1f1dSLionel Sambuc	 *
50*0a6a1f1dSLionel Sambuc	 * The code before JMP computes the correct shift I, so that
51*0a6a1f1dSLionel Sambuc	 * r0 and (r1 << I) have the highest bit set in the same position.
52*0a6a1f1dSLionel Sambuc	 * At the time of JMP, ip := .Ldiv0block - 12 * I.
53*0a6a1f1dSLionel Sambuc	 * This depends on the fixed instruction size of block.
54*0a6a1f1dSLionel Sambuc	 * For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes.
55*0a6a1f1dSLionel Sambuc	 *
56*0a6a1f1dSLionel Sambuc	 * block(shift) implements the test-and-update-quotient core.
57*0a6a1f1dSLionel Sambuc	 * It assumes (r0 << shift) can be computed without overflow and
58*0a6a1f1dSLionel Sambuc	 * that (r0 << shift) < 2 * r1. The quotient is stored in r3.
59*0a6a1f1dSLionel Sambuc	 */
60*0a6a1f1dSLionel Sambuc
61*0a6a1f1dSLionel Sambuc#  ifdef __ARM_FEATURE_CLZ
62*0a6a1f1dSLionel Sambuc	clz	ip, r0
63*0a6a1f1dSLionel Sambuc	clz	r3, r1
64*0a6a1f1dSLionel Sambuc	/* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
65*0a6a1f1dSLionel Sambuc	sub	r3, r3, ip
66*0a6a1f1dSLionel Sambuc#    if __ARM_ARCH_ISA_THUMB == 2
67*0a6a1f1dSLionel Sambuc	adr	ip, LOCAL_LABEL(div0block) + 1
68*0a6a1f1dSLionel Sambuc	sub	ip, ip, r3, lsl #1
69*0a6a1f1dSLionel Sambuc#    else
70*0a6a1f1dSLionel Sambuc	adr	ip, LOCAL_LABEL(div0block)
71*0a6a1f1dSLionel Sambuc#    endif
72*0a6a1f1dSLionel Sambuc	sub	ip, ip, r3, lsl #2
73*0a6a1f1dSLionel Sambuc	sub	ip, ip, r3, lsl #3
74*0a6a1f1dSLionel Sambuc	mov	r3, #0
75*0a6a1f1dSLionel Sambuc	bx	ip
76*0a6a1f1dSLionel Sambuc#  else
77*0a6a1f1dSLionel Sambuc#    if __ARM_ARCH_ISA_THUMB == 2
78*0a6a1f1dSLionel Sambuc#    error THUMB mode requires CLZ or UDIV
79*0a6a1f1dSLionel Sambuc#    endif
80*0a6a1f1dSLionel Sambuc	mov	r2, r0
81*0a6a1f1dSLionel Sambuc	adr	ip, LOCAL_LABEL(div0block)
82*0a6a1f1dSLionel Sambuc
83*0a6a1f1dSLionel Sambuc	lsr	r3, r2, #16
84*0a6a1f1dSLionel Sambuc	cmp	r3, r1
85*0a6a1f1dSLionel Sambuc	movhs	r2, r3
86*0a6a1f1dSLionel Sambuc	subhs	ip, ip, #(16 * 12)
87*0a6a1f1dSLionel Sambuc
88*0a6a1f1dSLionel Sambuc	lsr	r3, r2, #8
89*0a6a1f1dSLionel Sambuc	cmp	r3, r1
90*0a6a1f1dSLionel Sambuc	movhs	r2, r3
91*0a6a1f1dSLionel Sambuc	subhs	ip, ip, #(8 * 12)
92*0a6a1f1dSLionel Sambuc
93*0a6a1f1dSLionel Sambuc	lsr	r3, r2, #4
94*0a6a1f1dSLionel Sambuc	cmp	r3, r1
95*0a6a1f1dSLionel Sambuc	movhs	r2, r3
96*0a6a1f1dSLionel Sambuc	subhs	ip, #(4 * 12)
97*0a6a1f1dSLionel Sambuc
98*0a6a1f1dSLionel Sambuc	lsr	r3, r2, #2
99*0a6a1f1dSLionel Sambuc	cmp	r3, r1
100*0a6a1f1dSLionel Sambuc	movhs	r2, r3
101*0a6a1f1dSLionel Sambuc	subhs	ip, ip, #(2 * 12)
102*0a6a1f1dSLionel Sambuc
103*0a6a1f1dSLionel Sambuc	/* Last block, no need to update r2 or r3. */
104*0a6a1f1dSLionel Sambuc	cmp	r1, r2, lsr #1
105*0a6a1f1dSLionel Sambuc	subls	ip, ip, #(1 * 12)
106*0a6a1f1dSLionel Sambuc
107*0a6a1f1dSLionel Sambuc	mov	r3, #0
108*0a6a1f1dSLionel Sambuc
109*0a6a1f1dSLionel Sambuc	JMP(ip)
110*0a6a1f1dSLionel Sambuc#  endif
111*0a6a1f1dSLionel Sambuc
112*0a6a1f1dSLionel Sambuc#define	IMM	#
113*0a6a1f1dSLionel Sambuc
114*0a6a1f1dSLionel Sambuc#define block(shift)                                                           \
115*0a6a1f1dSLionel Sambuc	cmp	r0, r1, lsl IMM shift;                                         \
116*0a6a1f1dSLionel Sambuc	ITT(hs);                                                               \
117*0a6a1f1dSLionel Sambuc	WIDE(addhs)	r3, r3, IMM (1 << shift);                              \
118*0a6a1f1dSLionel Sambuc	WIDE(subhs)	r0, r0, r1, lsl IMM shift
119*0a6a1f1dSLionel Sambuc
120*0a6a1f1dSLionel Sambuc	block(31)
121*0a6a1f1dSLionel Sambuc	block(30)
122*0a6a1f1dSLionel Sambuc	block(29)
123*0a6a1f1dSLionel Sambuc	block(28)
124*0a6a1f1dSLionel Sambuc	block(27)
125*0a6a1f1dSLionel Sambuc	block(26)
126*0a6a1f1dSLionel Sambuc	block(25)
127*0a6a1f1dSLionel Sambuc	block(24)
128*0a6a1f1dSLionel Sambuc	block(23)
129*0a6a1f1dSLionel Sambuc	block(22)
130*0a6a1f1dSLionel Sambuc	block(21)
131*0a6a1f1dSLionel Sambuc	block(20)
132*0a6a1f1dSLionel Sambuc	block(19)
133*0a6a1f1dSLionel Sambuc	block(18)
134*0a6a1f1dSLionel Sambuc	block(17)
135*0a6a1f1dSLionel Sambuc	block(16)
136*0a6a1f1dSLionel Sambuc	block(15)
137*0a6a1f1dSLionel Sambuc	block(14)
138*0a6a1f1dSLionel Sambuc	block(13)
139*0a6a1f1dSLionel Sambuc	block(12)
140*0a6a1f1dSLionel Sambuc	block(11)
141*0a6a1f1dSLionel Sambuc	block(10)
142*0a6a1f1dSLionel Sambuc	block(9)
143*0a6a1f1dSLionel Sambuc	block(8)
144*0a6a1f1dSLionel Sambuc	block(7)
145*0a6a1f1dSLionel Sambuc	block(6)
146*0a6a1f1dSLionel Sambuc	block(5)
147*0a6a1f1dSLionel Sambuc	block(4)
148*0a6a1f1dSLionel Sambuc	block(3)
149*0a6a1f1dSLionel Sambuc	block(2)
150*0a6a1f1dSLionel Sambuc	block(1)
151*0a6a1f1dSLionel SambucLOCAL_LABEL(div0block):
152*0a6a1f1dSLionel Sambuc	block(0)
153*0a6a1f1dSLionel Sambuc
154*0a6a1f1dSLionel Sambuc	mov	r0, r3
155*0a6a1f1dSLionel Sambuc	JMP(lr)
156*0a6a1f1dSLionel Sambuc#endif /* __ARM_ARCH_EXT_IDIV__ */
157*0a6a1f1dSLionel Sambuc
158*0a6a1f1dSLionel SambucLOCAL_LABEL(divby0):
159*0a6a1f1dSLionel Sambuc	mov	r0, #0
160*0a6a1f1dSLionel Sambuc#ifdef __ARM_EABI__
161*0a6a1f1dSLionel Sambuc	b	__aeabi_idiv0
162*0a6a1f1dSLionel Sambuc#else
163*0a6a1f1dSLionel Sambuc	JMP(lr)
164*0a6a1f1dSLionel Sambuc#endif
165*0a6a1f1dSLionel Sambuc
166*0a6a1f1dSLionel SambucEND_COMPILERRT_FUNCTION(__udivsi3)
167