1*3cab2bb3Spatrick//===-- umodsi3.S - 32-bit unsigned integer modulus -----------------------===// 2*3cab2bb3Spatrick// 3*3cab2bb3Spatrick// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*3cab2bb3Spatrick// See https://llvm.org/LICENSE.txt for license information. 5*3cab2bb3Spatrick// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*3cab2bb3Spatrick// 7*3cab2bb3Spatrick//===----------------------------------------------------------------------===// 8*3cab2bb3Spatrick// 9*3cab2bb3Spatrick// This file implements the __umodsi3 (32-bit unsigned integer modulus) 10*3cab2bb3Spatrick// function for the ARM 32-bit architecture. 11*3cab2bb3Spatrick// 12*3cab2bb3Spatrick//===----------------------------------------------------------------------===// 13*3cab2bb3Spatrick 14*3cab2bb3Spatrick#include "../assembly.h" 15*3cab2bb3Spatrick 16*3cab2bb3Spatrick .syntax unified 17*3cab2bb3Spatrick .text 18*3cab2bb3Spatrick DEFINE_CODE_STATE 19*3cab2bb3Spatrick 20*3cab2bb3Spatrick@ unsigned int __umodsi3(unsigned int divident, unsigned int divisor) 21*3cab2bb3Spatrick@ Calculate and return the remainder of the (unsigned) division. 22*3cab2bb3Spatrick 23*3cab2bb3Spatrick .p2align 2 24*3cab2bb3SpatrickDEFINE_COMPILERRT_FUNCTION(__umodsi3) 25*3cab2bb3Spatrick#if __ARM_ARCH_EXT_IDIV__ 26*3cab2bb3Spatrick tst r1, r1 27*3cab2bb3Spatrick beq LOCAL_LABEL(divby0) 28*3cab2bb3Spatrick udiv r2, r0, r1 29*3cab2bb3Spatrick mls r0, r2, r1, r0 30*3cab2bb3Spatrick bx lr 31*3cab2bb3Spatrick#else 32*3cab2bb3Spatrick cmp r1, #1 33*3cab2bb3Spatrick bcc LOCAL_LABEL(divby0) 34*3cab2bb3Spatrick ITT(eq) 35*3cab2bb3Spatrick moveq r0, #0 36*3cab2bb3Spatrick JMPc(lr, eq) 37*3cab2bb3Spatrick cmp r0, r1 38*3cab2bb3Spatrick IT(cc) 39*3cab2bb3Spatrick JMPc(lr, cc) 40*3cab2bb3Spatrick 41*3cab2bb3Spatrick // Implement division using binary long division algorithm. 42*3cab2bb3Spatrick // 43*3cab2bb3Spatrick // r0 is the numerator, r1 the denominator. 44*3cab2bb3Spatrick // 45*3cab2bb3Spatrick // The code before JMP computes the correct shift I, so that 46*3cab2bb3Spatrick // r0 and (r1 << I) have the highest bit set in the same position. 47*3cab2bb3Spatrick // At the time of JMP, ip := .Ldiv0block - 8 * I. 48*3cab2bb3Spatrick // This depends on the fixed instruction size of block. 49*3cab2bb3Spatrick // For ARM mode, this is 8 Bytes, for THUMB mode 10 Bytes. 50*3cab2bb3Spatrick // 51*3cab2bb3Spatrick // block(shift) implements the test-and-update-quotient core. 52*3cab2bb3Spatrick // It assumes (r0 << shift) can be computed without overflow and 53*3cab2bb3Spatrick // that (r0 << shift) < 2 * r1. The quotient is stored in r3. 54*3cab2bb3Spatrick 55*3cab2bb3Spatrick# ifdef __ARM_FEATURE_CLZ 56*3cab2bb3Spatrick clz ip, r0 57*3cab2bb3Spatrick clz r3, r1 58*3cab2bb3Spatrick // r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. 59*3cab2bb3Spatrick sub r3, r3, ip 60*3cab2bb3Spatrick# if defined(USE_THUMB_2) 61*3cab2bb3Spatrick adr ip, LOCAL_LABEL(div0block) + 1 62*3cab2bb3Spatrick sub ip, ip, r3, lsl #1 63*3cab2bb3Spatrick# else 64*3cab2bb3Spatrick adr ip, LOCAL_LABEL(div0block) 65*3cab2bb3Spatrick# endif 66*3cab2bb3Spatrick sub ip, ip, r3, lsl #3 67*3cab2bb3Spatrick bx ip 68*3cab2bb3Spatrick# else 69*3cab2bb3Spatrick# if defined(USE_THUMB_2) 70*3cab2bb3Spatrick# error THUMB mode requires CLZ or UDIV 71*3cab2bb3Spatrick# endif 72*3cab2bb3Spatrick mov r2, r0 73*3cab2bb3Spatrick adr ip, LOCAL_LABEL(div0block) 74*3cab2bb3Spatrick 75*3cab2bb3Spatrick lsr r3, r2, #16 76*3cab2bb3Spatrick cmp r3, r1 77*3cab2bb3Spatrick movhs r2, r3 78*3cab2bb3Spatrick subhs ip, ip, #(16 * 8) 79*3cab2bb3Spatrick 80*3cab2bb3Spatrick lsr r3, r2, #8 81*3cab2bb3Spatrick cmp r3, r1 82*3cab2bb3Spatrick movhs r2, r3 83*3cab2bb3Spatrick subhs ip, ip, #(8 * 8) 84*3cab2bb3Spatrick 85*3cab2bb3Spatrick lsr r3, r2, #4 86*3cab2bb3Spatrick cmp r3, r1 87*3cab2bb3Spatrick movhs r2, r3 88*3cab2bb3Spatrick subhs ip, #(4 * 8) 89*3cab2bb3Spatrick 90*3cab2bb3Spatrick lsr r3, r2, #2 91*3cab2bb3Spatrick cmp r3, r1 92*3cab2bb3Spatrick movhs r2, r3 93*3cab2bb3Spatrick subhs ip, ip, #(2 * 8) 94*3cab2bb3Spatrick 95*3cab2bb3Spatrick // Last block, no need to update r2 or r3. 96*3cab2bb3Spatrick cmp r1, r2, lsr #1 97*3cab2bb3Spatrick subls ip, ip, #(1 * 8) 98*3cab2bb3Spatrick 99*3cab2bb3Spatrick JMP(ip) 100*3cab2bb3Spatrick# endif 101*3cab2bb3Spatrick 102*3cab2bb3Spatrick#define IMM # 103*3cab2bb3Spatrick 104*3cab2bb3Spatrick#define block(shift) \ 105*3cab2bb3Spatrick cmp r0, r1, lsl IMM shift; \ 106*3cab2bb3Spatrick IT(hs); \ 107*3cab2bb3Spatrick WIDE(subhs) r0, r0, r1, lsl IMM shift 108*3cab2bb3Spatrick 109*3cab2bb3Spatrick block(31) 110*3cab2bb3Spatrick block(30) 111*3cab2bb3Spatrick block(29) 112*3cab2bb3Spatrick block(28) 113*3cab2bb3Spatrick block(27) 114*3cab2bb3Spatrick block(26) 115*3cab2bb3Spatrick block(25) 116*3cab2bb3Spatrick block(24) 117*3cab2bb3Spatrick block(23) 118*3cab2bb3Spatrick block(22) 119*3cab2bb3Spatrick block(21) 120*3cab2bb3Spatrick block(20) 121*3cab2bb3Spatrick block(19) 122*3cab2bb3Spatrick block(18) 123*3cab2bb3Spatrick block(17) 124*3cab2bb3Spatrick block(16) 125*3cab2bb3Spatrick block(15) 126*3cab2bb3Spatrick block(14) 127*3cab2bb3Spatrick block(13) 128*3cab2bb3Spatrick block(12) 129*3cab2bb3Spatrick block(11) 130*3cab2bb3Spatrick block(10) 131*3cab2bb3Spatrick block(9) 132*3cab2bb3Spatrick block(8) 133*3cab2bb3Spatrick block(7) 134*3cab2bb3Spatrick block(6) 135*3cab2bb3Spatrick block(5) 136*3cab2bb3Spatrick block(4) 137*3cab2bb3Spatrick block(3) 138*3cab2bb3Spatrick block(2) 139*3cab2bb3Spatrick block(1) 140*3cab2bb3SpatrickLOCAL_LABEL(div0block): 141*3cab2bb3Spatrick block(0) 142*3cab2bb3Spatrick JMP(lr) 143*3cab2bb3Spatrick#endif // __ARM_ARCH_EXT_IDIV__ 144*3cab2bb3Spatrick 145*3cab2bb3SpatrickLOCAL_LABEL(divby0): 146*3cab2bb3Spatrick mov r0, #0 147*3cab2bb3Spatrick#ifdef __ARM_EABI__ 148*3cab2bb3Spatrick b __aeabi_idiv0 149*3cab2bb3Spatrick#else 150*3cab2bb3Spatrick JMP(lr) 151*3cab2bb3Spatrick#endif 152*3cab2bb3Spatrick 153*3cab2bb3SpatrickEND_COMPILERRT_FUNCTION(__umodsi3) 154*3cab2bb3Spatrick 155*3cab2bb3SpatrickNO_EXEC_STACK_DIRECTIVE 156*3cab2bb3Spatrick 157