xref: /openbsd-src/gnu/llvm/compiler-rt/lib/builtins/arm/udivmodsi4.S (revision 3cab2bb3f667058bece8e38b12449a63a9d73c4b)
1*3cab2bb3Spatrick//===-- udivmodsi4.S - 32-bit unsigned integer divide and modulus ---------===//
2*3cab2bb3Spatrick//
3*3cab2bb3Spatrick// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*3cab2bb3Spatrick// See https://llvm.org/LICENSE.txt for license information.
5*3cab2bb3Spatrick// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*3cab2bb3Spatrick//
7*3cab2bb3Spatrick//===----------------------------------------------------------------------===//
8*3cab2bb3Spatrick//
9*3cab2bb3Spatrick// This file implements the __udivmodsi4 (32-bit unsigned integer divide and
10*3cab2bb3Spatrick// modulus) function for the ARM 32-bit architecture.
11*3cab2bb3Spatrick//
12*3cab2bb3Spatrick//===----------------------------------------------------------------------===//
13*3cab2bb3Spatrick
14*3cab2bb3Spatrick#include "../assembly.h"
15*3cab2bb3Spatrick
16*3cab2bb3Spatrick	.syntax unified
17*3cab2bb3Spatrick	.text
18*3cab2bb3Spatrick	DEFINE_CODE_STATE
19*3cab2bb3Spatrick
20*3cab2bb3Spatrick@ unsigned int __udivmodsi4(unsigned int divident, unsigned int divisor,
21*3cab2bb3Spatrick@                           unsigned int *remainder)
22*3cab2bb3Spatrick@   Calculate the quotient and remainder of the (unsigned) division.  The return
23*3cab2bb3Spatrick@   value is the quotient, the remainder is placed in the variable.
24*3cab2bb3Spatrick
25*3cab2bb3Spatrick	.p2align 2
26*3cab2bb3SpatrickDEFINE_COMPILERRT_FUNCTION(__udivmodsi4)
27*3cab2bb3Spatrick#if __ARM_ARCH_EXT_IDIV__
28*3cab2bb3Spatrick	tst     r1, r1
29*3cab2bb3Spatrick	beq     LOCAL_LABEL(divby0)
30*3cab2bb3Spatrick	mov 	r3, r0
31*3cab2bb3Spatrick	udiv	r0, r3, r1
32*3cab2bb3Spatrick	mls 	r1, r0, r1, r3
33*3cab2bb3Spatrick	str 	r1, [r2]
34*3cab2bb3Spatrick	bx  	lr
35*3cab2bb3Spatrick#else
36*3cab2bb3Spatrick	cmp	r1, #1
37*3cab2bb3Spatrick	bcc	LOCAL_LABEL(divby0)
38*3cab2bb3Spatrick	beq	LOCAL_LABEL(divby1)
39*3cab2bb3Spatrick	cmp	r0, r1
40*3cab2bb3Spatrick	bcc	LOCAL_LABEL(quotient0)
41*3cab2bb3Spatrick
42*3cab2bb3Spatrick	// Implement division using binary long division algorithm.
43*3cab2bb3Spatrick	//
44*3cab2bb3Spatrick	// r0 is the numerator, r1 the denominator.
45*3cab2bb3Spatrick	//
46*3cab2bb3Spatrick	// The code before JMP computes the correct shift I, so that
47*3cab2bb3Spatrick	// r0 and (r1 << I) have the highest bit set in the same position.
48*3cab2bb3Spatrick	// At the time of JMP, ip := .Ldiv0block - 12 * I.
49*3cab2bb3Spatrick	// This depends on the fixed instruction size of block.
50*3cab2bb3Spatrick	// For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes.
51*3cab2bb3Spatrick	//
52*3cab2bb3Spatrick	// block(shift) implements the test-and-update-quotient core.
53*3cab2bb3Spatrick	// It assumes (r0 << shift) can be computed without overflow and
54*3cab2bb3Spatrick	// that (r0 << shift) < 2 * r1. The quotient is stored in r3.
55*3cab2bb3Spatrick
56*3cab2bb3Spatrick#  ifdef __ARM_FEATURE_CLZ
57*3cab2bb3Spatrick	clz	ip, r0
58*3cab2bb3Spatrick	clz	r3, r1
59*3cab2bb3Spatrick	// r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3.
60*3cab2bb3Spatrick	sub	r3, r3, ip
61*3cab2bb3Spatrick#    if defined(USE_THUMB_2)
62*3cab2bb3Spatrick	adr	ip, LOCAL_LABEL(div0block) + 1
63*3cab2bb3Spatrick	sub	ip, ip, r3, lsl #1
64*3cab2bb3Spatrick#    else
65*3cab2bb3Spatrick	adr	ip, LOCAL_LABEL(div0block)
66*3cab2bb3Spatrick#    endif
67*3cab2bb3Spatrick	sub	ip, ip, r3, lsl #2
68*3cab2bb3Spatrick	sub	ip, ip, r3, lsl #3
69*3cab2bb3Spatrick	mov	r3, #0
70*3cab2bb3Spatrick	bx	ip
71*3cab2bb3Spatrick#  else
72*3cab2bb3Spatrick#    if defined(USE_THUMB_2)
73*3cab2bb3Spatrick#    error THUMB mode requires CLZ or UDIV
74*3cab2bb3Spatrick#    endif
75*3cab2bb3Spatrick	str	r4, [sp, #-8]!
76*3cab2bb3Spatrick
77*3cab2bb3Spatrick	mov	r4, r0
78*3cab2bb3Spatrick	adr	ip, LOCAL_LABEL(div0block)
79*3cab2bb3Spatrick
80*3cab2bb3Spatrick	lsr	r3, r4, #16
81*3cab2bb3Spatrick	cmp	r3, r1
82*3cab2bb3Spatrick	movhs	r4, r3
83*3cab2bb3Spatrick	subhs	ip, ip, #(16 * 12)
84*3cab2bb3Spatrick
85*3cab2bb3Spatrick	lsr	r3, r4, #8
86*3cab2bb3Spatrick	cmp	r3, r1
87*3cab2bb3Spatrick	movhs	r4, r3
88*3cab2bb3Spatrick	subhs	ip, ip, #(8 * 12)
89*3cab2bb3Spatrick
90*3cab2bb3Spatrick	lsr	r3, r4, #4
91*3cab2bb3Spatrick	cmp	r3, r1
92*3cab2bb3Spatrick	movhs	r4, r3
93*3cab2bb3Spatrick	subhs	ip, #(4 * 12)
94*3cab2bb3Spatrick
95*3cab2bb3Spatrick	lsr	r3, r4, #2
96*3cab2bb3Spatrick	cmp	r3, r1
97*3cab2bb3Spatrick	movhs	r4, r3
98*3cab2bb3Spatrick	subhs	ip, ip, #(2 * 12)
99*3cab2bb3Spatrick
100*3cab2bb3Spatrick	// Last block, no need to update r3 or r4.
101*3cab2bb3Spatrick	cmp	r1, r4, lsr #1
102*3cab2bb3Spatrick	subls	ip, ip, #(1 * 12)
103*3cab2bb3Spatrick
104*3cab2bb3Spatrick	ldr	r4, [sp], #8	// restore r4, we are done with it.
105*3cab2bb3Spatrick	mov	r3, #0
106*3cab2bb3Spatrick
107*3cab2bb3Spatrick	JMP(ip)
108*3cab2bb3Spatrick#  endif
109*3cab2bb3Spatrick
110*3cab2bb3Spatrick#define	IMM	#
111*3cab2bb3Spatrick
112*3cab2bb3Spatrick#define block(shift)                                                           \
113*3cab2bb3Spatrick	cmp	r0, r1, lsl IMM shift;                                         \
114*3cab2bb3Spatrick	ITT(hs);                                                               \
115*3cab2bb3Spatrick	WIDE(addhs)	r3, r3, IMM (1 << shift);                              \
116*3cab2bb3Spatrick	WIDE(subhs)	r0, r0, r1, lsl IMM shift
117*3cab2bb3Spatrick
118*3cab2bb3Spatrick	block(31)
119*3cab2bb3Spatrick	block(30)
120*3cab2bb3Spatrick	block(29)
121*3cab2bb3Spatrick	block(28)
122*3cab2bb3Spatrick	block(27)
123*3cab2bb3Spatrick	block(26)
124*3cab2bb3Spatrick	block(25)
125*3cab2bb3Spatrick	block(24)
126*3cab2bb3Spatrick	block(23)
127*3cab2bb3Spatrick	block(22)
128*3cab2bb3Spatrick	block(21)
129*3cab2bb3Spatrick	block(20)
130*3cab2bb3Spatrick	block(19)
131*3cab2bb3Spatrick	block(18)
132*3cab2bb3Spatrick	block(17)
133*3cab2bb3Spatrick	block(16)
134*3cab2bb3Spatrick	block(15)
135*3cab2bb3Spatrick	block(14)
136*3cab2bb3Spatrick	block(13)
137*3cab2bb3Spatrick	block(12)
138*3cab2bb3Spatrick	block(11)
139*3cab2bb3Spatrick	block(10)
140*3cab2bb3Spatrick	block(9)
141*3cab2bb3Spatrick	block(8)
142*3cab2bb3Spatrick	block(7)
143*3cab2bb3Spatrick	block(6)
144*3cab2bb3Spatrick	block(5)
145*3cab2bb3Spatrick	block(4)
146*3cab2bb3Spatrick	block(3)
147*3cab2bb3Spatrick	block(2)
148*3cab2bb3Spatrick	block(1)
149*3cab2bb3SpatrickLOCAL_LABEL(div0block):
150*3cab2bb3Spatrick	block(0)
151*3cab2bb3Spatrick
152*3cab2bb3Spatrick	str	r0, [r2]
153*3cab2bb3Spatrick	mov	r0, r3
154*3cab2bb3Spatrick	JMP(lr)
155*3cab2bb3Spatrick
156*3cab2bb3SpatrickLOCAL_LABEL(quotient0):
157*3cab2bb3Spatrick	str	r0, [r2]
158*3cab2bb3Spatrick	mov	r0, #0
159*3cab2bb3Spatrick	JMP(lr)
160*3cab2bb3Spatrick
161*3cab2bb3SpatrickLOCAL_LABEL(divby1):
162*3cab2bb3Spatrick	mov	r3, #0
163*3cab2bb3Spatrick	str	r3, [r2]
164*3cab2bb3Spatrick	JMP(lr)
165*3cab2bb3Spatrick#endif // __ARM_ARCH_EXT_IDIV__
166*3cab2bb3Spatrick
167*3cab2bb3SpatrickLOCAL_LABEL(divby0):
168*3cab2bb3Spatrick	mov	r0, #0
169*3cab2bb3Spatrick#ifdef __ARM_EABI__
170*3cab2bb3Spatrick	b	__aeabi_idiv0
171*3cab2bb3Spatrick#else
172*3cab2bb3Spatrick	JMP(lr)
173*3cab2bb3Spatrick#endif
174*3cab2bb3Spatrick
175*3cab2bb3SpatrickEND_COMPILERRT_FUNCTION(__udivmodsi4)
176*3cab2bb3Spatrick
177*3cab2bb3SpatrickNO_EXEC_STACK_DIRECTIVE
178*3cab2bb3Spatrick
179