xref: /openbsd-src/gnu/llvm/compiler-rt/lib/builtins/arm/umodsi3.S (revision 3cab2bb3f667058bece8e38b12449a63a9d73c4b)
1*3cab2bb3Spatrick//===-- umodsi3.S - 32-bit unsigned integer modulus -----------------------===//
2*3cab2bb3Spatrick//
3*3cab2bb3Spatrick// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*3cab2bb3Spatrick// See https://llvm.org/LICENSE.txt for license information.
5*3cab2bb3Spatrick// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*3cab2bb3Spatrick//
7*3cab2bb3Spatrick//===----------------------------------------------------------------------===//
8*3cab2bb3Spatrick//
9*3cab2bb3Spatrick// This file implements the __umodsi3 (32-bit unsigned integer modulus)
10*3cab2bb3Spatrick// function for the ARM 32-bit architecture.
11*3cab2bb3Spatrick//
12*3cab2bb3Spatrick//===----------------------------------------------------------------------===//
13*3cab2bb3Spatrick
14*3cab2bb3Spatrick#include "../assembly.h"
15*3cab2bb3Spatrick
16*3cab2bb3Spatrick	.syntax unified
17*3cab2bb3Spatrick	.text
18*3cab2bb3Spatrick	DEFINE_CODE_STATE
19*3cab2bb3Spatrick
20*3cab2bb3Spatrick@ unsigned int __umodsi3(unsigned int divident, unsigned int divisor)
21*3cab2bb3Spatrick@   Calculate and return the remainder of the (unsigned) division.
22*3cab2bb3Spatrick
23*3cab2bb3Spatrick	.p2align 2
24*3cab2bb3SpatrickDEFINE_COMPILERRT_FUNCTION(__umodsi3)
25*3cab2bb3Spatrick#if __ARM_ARCH_EXT_IDIV__
26*3cab2bb3Spatrick	tst     r1, r1
27*3cab2bb3Spatrick	beq     LOCAL_LABEL(divby0)
28*3cab2bb3Spatrick	udiv	r2, r0, r1
29*3cab2bb3Spatrick	mls 	r0, r2, r1, r0
30*3cab2bb3Spatrick	bx  	lr
31*3cab2bb3Spatrick#else
32*3cab2bb3Spatrick	cmp	r1, #1
33*3cab2bb3Spatrick	bcc	LOCAL_LABEL(divby0)
34*3cab2bb3Spatrick	ITT(eq)
35*3cab2bb3Spatrick	moveq	r0, #0
36*3cab2bb3Spatrick	JMPc(lr, eq)
37*3cab2bb3Spatrick	cmp	r0, r1
38*3cab2bb3Spatrick	IT(cc)
39*3cab2bb3Spatrick	JMPc(lr, cc)
40*3cab2bb3Spatrick
41*3cab2bb3Spatrick	// Implement division using binary long division algorithm.
42*3cab2bb3Spatrick	//
43*3cab2bb3Spatrick	// r0 is the numerator, r1 the denominator.
44*3cab2bb3Spatrick	//
45*3cab2bb3Spatrick	// The code before JMP computes the correct shift I, so that
46*3cab2bb3Spatrick	// r0 and (r1 << I) have the highest bit set in the same position.
47*3cab2bb3Spatrick	// At the time of JMP, ip := .Ldiv0block - 8 * I.
48*3cab2bb3Spatrick	// This depends on the fixed instruction size of block.
49*3cab2bb3Spatrick	// For ARM mode, this is 8 Bytes, for THUMB mode 10 Bytes.
50*3cab2bb3Spatrick	//
51*3cab2bb3Spatrick	// block(shift) implements the test-and-update-quotient core.
52*3cab2bb3Spatrick	// It assumes (r0 << shift) can be computed without overflow and
53*3cab2bb3Spatrick	// that (r0 << shift) < 2 * r1. The quotient is stored in r3.
54*3cab2bb3Spatrick
55*3cab2bb3Spatrick#  ifdef __ARM_FEATURE_CLZ
56*3cab2bb3Spatrick	clz	ip, r0
57*3cab2bb3Spatrick	clz	r3, r1
58*3cab2bb3Spatrick	// r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3.
59*3cab2bb3Spatrick	sub	r3, r3, ip
60*3cab2bb3Spatrick#    if defined(USE_THUMB_2)
61*3cab2bb3Spatrick	adr	ip, LOCAL_LABEL(div0block) + 1
62*3cab2bb3Spatrick	sub	ip, ip, r3, lsl #1
63*3cab2bb3Spatrick#    else
64*3cab2bb3Spatrick	adr	ip, LOCAL_LABEL(div0block)
65*3cab2bb3Spatrick#    endif
66*3cab2bb3Spatrick	sub	ip, ip, r3, lsl #3
67*3cab2bb3Spatrick	bx	ip
68*3cab2bb3Spatrick#  else
69*3cab2bb3Spatrick#    if defined(USE_THUMB_2)
70*3cab2bb3Spatrick#    error THUMB mode requires CLZ or UDIV
71*3cab2bb3Spatrick#    endif
72*3cab2bb3Spatrick	mov	r2, r0
73*3cab2bb3Spatrick	adr	ip, LOCAL_LABEL(div0block)
74*3cab2bb3Spatrick
75*3cab2bb3Spatrick	lsr	r3, r2, #16
76*3cab2bb3Spatrick	cmp	r3, r1
77*3cab2bb3Spatrick	movhs	r2, r3
78*3cab2bb3Spatrick	subhs	ip, ip, #(16 * 8)
79*3cab2bb3Spatrick
80*3cab2bb3Spatrick	lsr	r3, r2, #8
81*3cab2bb3Spatrick	cmp	r3, r1
82*3cab2bb3Spatrick	movhs	r2, r3
83*3cab2bb3Spatrick	subhs	ip, ip, #(8 * 8)
84*3cab2bb3Spatrick
85*3cab2bb3Spatrick	lsr	r3, r2, #4
86*3cab2bb3Spatrick	cmp	r3, r1
87*3cab2bb3Spatrick	movhs	r2, r3
88*3cab2bb3Spatrick	subhs	ip, #(4 * 8)
89*3cab2bb3Spatrick
90*3cab2bb3Spatrick	lsr	r3, r2, #2
91*3cab2bb3Spatrick	cmp	r3, r1
92*3cab2bb3Spatrick	movhs	r2, r3
93*3cab2bb3Spatrick	subhs	ip, ip, #(2 * 8)
94*3cab2bb3Spatrick
95*3cab2bb3Spatrick	// Last block, no need to update r2 or r3.
96*3cab2bb3Spatrick	cmp	r1, r2, lsr #1
97*3cab2bb3Spatrick	subls	ip, ip, #(1 * 8)
98*3cab2bb3Spatrick
99*3cab2bb3Spatrick	JMP(ip)
100*3cab2bb3Spatrick#  endif
101*3cab2bb3Spatrick
102*3cab2bb3Spatrick#define	IMM	#
103*3cab2bb3Spatrick
104*3cab2bb3Spatrick#define block(shift)                                                           \
105*3cab2bb3Spatrick	cmp	r0, r1, lsl IMM shift;                                         \
106*3cab2bb3Spatrick	IT(hs);                                                                \
107*3cab2bb3Spatrick	WIDE(subhs)	r0, r0, r1, lsl IMM shift
108*3cab2bb3Spatrick
109*3cab2bb3Spatrick	block(31)
110*3cab2bb3Spatrick	block(30)
111*3cab2bb3Spatrick	block(29)
112*3cab2bb3Spatrick	block(28)
113*3cab2bb3Spatrick	block(27)
114*3cab2bb3Spatrick	block(26)
115*3cab2bb3Spatrick	block(25)
116*3cab2bb3Spatrick	block(24)
117*3cab2bb3Spatrick	block(23)
118*3cab2bb3Spatrick	block(22)
119*3cab2bb3Spatrick	block(21)
120*3cab2bb3Spatrick	block(20)
121*3cab2bb3Spatrick	block(19)
122*3cab2bb3Spatrick	block(18)
123*3cab2bb3Spatrick	block(17)
124*3cab2bb3Spatrick	block(16)
125*3cab2bb3Spatrick	block(15)
126*3cab2bb3Spatrick	block(14)
127*3cab2bb3Spatrick	block(13)
128*3cab2bb3Spatrick	block(12)
129*3cab2bb3Spatrick	block(11)
130*3cab2bb3Spatrick	block(10)
131*3cab2bb3Spatrick	block(9)
132*3cab2bb3Spatrick	block(8)
133*3cab2bb3Spatrick	block(7)
134*3cab2bb3Spatrick	block(6)
135*3cab2bb3Spatrick	block(5)
136*3cab2bb3Spatrick	block(4)
137*3cab2bb3Spatrick	block(3)
138*3cab2bb3Spatrick	block(2)
139*3cab2bb3Spatrick	block(1)
140*3cab2bb3SpatrickLOCAL_LABEL(div0block):
141*3cab2bb3Spatrick	block(0)
142*3cab2bb3Spatrick	JMP(lr)
143*3cab2bb3Spatrick#endif // __ARM_ARCH_EXT_IDIV__
144*3cab2bb3Spatrick
145*3cab2bb3SpatrickLOCAL_LABEL(divby0):
146*3cab2bb3Spatrick	mov	r0, #0
147*3cab2bb3Spatrick#ifdef __ARM_EABI__
148*3cab2bb3Spatrick	b	__aeabi_idiv0
149*3cab2bb3Spatrick#else
150*3cab2bb3Spatrick	JMP(lr)
151*3cab2bb3Spatrick#endif
152*3cab2bb3Spatrick
153*3cab2bb3SpatrickEND_COMPILERRT_FUNCTION(__umodsi3)
154*3cab2bb3Spatrick
155*3cab2bb3SpatrickNO_EXEC_STACK_DIRECTIVE
156*3cab2bb3Spatrick
157