1*0a6a1f1dSLionel Sambuc/*===-- umodsi3.S - 32-bit unsigned integer modulus -----------------------===// 2*0a6a1f1dSLionel Sambuc * 3*0a6a1f1dSLionel Sambuc * The LLVM Compiler Infrastructure 4*0a6a1f1dSLionel Sambuc * 5*0a6a1f1dSLionel Sambuc * This file is dual licensed under the MIT and the University of Illinois Open 6*0a6a1f1dSLionel Sambuc * Source Licenses. See LICENSE.TXT for details. 7*0a6a1f1dSLionel Sambuc * 8*0a6a1f1dSLionel Sambuc *===----------------------------------------------------------------------===// 9*0a6a1f1dSLionel Sambuc * 10*0a6a1f1dSLionel Sambuc * This file implements the __umodsi3 (32-bit unsigned integer modulus) 11*0a6a1f1dSLionel Sambuc * function for the ARM 32-bit architecture. 12*0a6a1f1dSLionel Sambuc * 13*0a6a1f1dSLionel Sambuc *===----------------------------------------------------------------------===*/ 14*0a6a1f1dSLionel Sambuc 15*0a6a1f1dSLionel Sambuc#include "../assembly.h" 16*0a6a1f1dSLionel Sambuc 17*0a6a1f1dSLionel Sambuc .syntax unified 18*0a6a1f1dSLionel Sambuc .text 19*0a6a1f1dSLionel Sambuc#if __ARM_ARCH_ISA_THUMB == 2 20*0a6a1f1dSLionel Sambuc .thumb 21*0a6a1f1dSLionel Sambuc#endif 22*0a6a1f1dSLionel Sambuc 23*0a6a1f1dSLionel Sambuc@ unsigned int __umodsi3(unsigned int divident, unsigned int divisor) 24*0a6a1f1dSLionel Sambuc@ Calculate and return the remainder of the (unsigned) division. 25*0a6a1f1dSLionel Sambuc 26*0a6a1f1dSLionel Sambuc .p2align 2 27*0a6a1f1dSLionel SambucDEFINE_COMPILERRT_FUNCTION(__umodsi3) 28*0a6a1f1dSLionel Sambuc#if __ARM_ARCH_EXT_IDIV__ 29*0a6a1f1dSLionel Sambuc tst r1, r1 30*0a6a1f1dSLionel Sambuc beq LOCAL_LABEL(divby0) 31*0a6a1f1dSLionel Sambuc udiv r2, r0, r1 32*0a6a1f1dSLionel Sambuc mls r0, r2, r1, r0 33*0a6a1f1dSLionel Sambuc bx lr 34*0a6a1f1dSLionel Sambuc#else 35*0a6a1f1dSLionel Sambuc cmp r1, #1 36*0a6a1f1dSLionel Sambuc bcc LOCAL_LABEL(divby0) 37*0a6a1f1dSLionel Sambuc ITT(eq) 38*0a6a1f1dSLionel Sambuc moveq r0, #0 39*0a6a1f1dSLionel Sambuc JMPc(lr, eq) 40*0a6a1f1dSLionel Sambuc cmp r0, r1 41*0a6a1f1dSLionel Sambuc IT(cc) 42*0a6a1f1dSLionel Sambuc JMPc(lr, cc) 43*0a6a1f1dSLionel Sambuc /* 44*0a6a1f1dSLionel Sambuc * Implement division using binary long division algorithm. 45*0a6a1f1dSLionel Sambuc * 46*0a6a1f1dSLionel Sambuc * r0 is the numerator, r1 the denominator. 47*0a6a1f1dSLionel Sambuc * 48*0a6a1f1dSLionel Sambuc * The code before JMP computes the correct shift I, so that 49*0a6a1f1dSLionel Sambuc * r0 and (r1 << I) have the highest bit set in the same position. 50*0a6a1f1dSLionel Sambuc * At the time of JMP, ip := .Ldiv0block - 8 * I. 51*0a6a1f1dSLionel Sambuc * This depends on the fixed instruction size of block. 52*0a6a1f1dSLionel Sambuc * For ARM mode, this is 8 Bytes, for THUMB mode 10 Bytes. 53*0a6a1f1dSLionel Sambuc * 54*0a6a1f1dSLionel Sambuc * block(shift) implements the test-and-update-quotient core. 55*0a6a1f1dSLionel Sambuc * It assumes (r0 << shift) can be computed without overflow and 56*0a6a1f1dSLionel Sambuc * that (r0 << shift) < 2 * r1. The quotient is stored in r3. 57*0a6a1f1dSLionel Sambuc */ 58*0a6a1f1dSLionel Sambuc 59*0a6a1f1dSLionel Sambuc# ifdef __ARM_FEATURE_CLZ 60*0a6a1f1dSLionel Sambuc clz ip, r0 61*0a6a1f1dSLionel Sambuc clz r3, r1 62*0a6a1f1dSLionel Sambuc /* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */ 63*0a6a1f1dSLionel Sambuc sub r3, r3, ip 64*0a6a1f1dSLionel Sambuc# if __ARM_ARCH_ISA_THUMB == 2 65*0a6a1f1dSLionel Sambuc adr ip, LOCAL_LABEL(div0block) + 1 66*0a6a1f1dSLionel Sambuc sub ip, ip, r3, lsl #1 67*0a6a1f1dSLionel Sambuc# else 68*0a6a1f1dSLionel Sambuc adr ip, LOCAL_LABEL(div0block) 69*0a6a1f1dSLionel Sambuc# endif 70*0a6a1f1dSLionel Sambuc sub ip, ip, r3, lsl #3 71*0a6a1f1dSLionel Sambuc bx ip 72*0a6a1f1dSLionel Sambuc# else 73*0a6a1f1dSLionel Sambuc# if __ARM_ARCH_ISA_THUMB == 2 74*0a6a1f1dSLionel Sambuc# error THUMB mode requires CLZ or UDIV 75*0a6a1f1dSLionel Sambuc# endif 76*0a6a1f1dSLionel Sambuc mov r2, r0 77*0a6a1f1dSLionel Sambuc adr ip, LOCAL_LABEL(div0block) 78*0a6a1f1dSLionel Sambuc 79*0a6a1f1dSLionel Sambuc lsr r3, r2, #16 80*0a6a1f1dSLionel Sambuc cmp r3, r1 81*0a6a1f1dSLionel Sambuc movhs r2, r3 82*0a6a1f1dSLionel Sambuc subhs ip, ip, #(16 * 8) 83*0a6a1f1dSLionel Sambuc 84*0a6a1f1dSLionel Sambuc lsr r3, r2, #8 85*0a6a1f1dSLionel Sambuc cmp r3, r1 86*0a6a1f1dSLionel Sambuc movhs r2, r3 87*0a6a1f1dSLionel Sambuc subhs ip, ip, #(8 * 8) 88*0a6a1f1dSLionel Sambuc 89*0a6a1f1dSLionel Sambuc lsr r3, r2, #4 90*0a6a1f1dSLionel Sambuc cmp r3, r1 91*0a6a1f1dSLionel Sambuc movhs r2, r3 92*0a6a1f1dSLionel Sambuc subhs ip, #(4 * 8) 93*0a6a1f1dSLionel Sambuc 94*0a6a1f1dSLionel Sambuc lsr r3, r2, #2 95*0a6a1f1dSLionel Sambuc cmp r3, r1 96*0a6a1f1dSLionel Sambuc movhs r2, r3 97*0a6a1f1dSLionel Sambuc subhs ip, ip, #(2 * 8) 98*0a6a1f1dSLionel Sambuc 99*0a6a1f1dSLionel Sambuc /* Last block, no need to update r2 or r3. */ 100*0a6a1f1dSLionel Sambuc cmp r1, r2, lsr #1 101*0a6a1f1dSLionel Sambuc subls ip, ip, #(1 * 8) 102*0a6a1f1dSLionel Sambuc 103*0a6a1f1dSLionel Sambuc JMP(ip) 104*0a6a1f1dSLionel Sambuc# endif 105*0a6a1f1dSLionel Sambuc 106*0a6a1f1dSLionel Sambuc#define IMM # 107*0a6a1f1dSLionel Sambuc 108*0a6a1f1dSLionel Sambuc#define block(shift) \ 109*0a6a1f1dSLionel Sambuc cmp r0, r1, lsl IMM shift; \ 110*0a6a1f1dSLionel Sambuc IT(hs); \ 111*0a6a1f1dSLionel Sambuc WIDE(subhs) r0, r0, r1, lsl IMM shift 112*0a6a1f1dSLionel Sambuc 113*0a6a1f1dSLionel Sambuc block(31) 114*0a6a1f1dSLionel Sambuc block(30) 115*0a6a1f1dSLionel Sambuc block(29) 116*0a6a1f1dSLionel Sambuc block(28) 117*0a6a1f1dSLionel Sambuc block(27) 118*0a6a1f1dSLionel Sambuc block(26) 119*0a6a1f1dSLionel Sambuc block(25) 120*0a6a1f1dSLionel Sambuc block(24) 121*0a6a1f1dSLionel Sambuc block(23) 122*0a6a1f1dSLionel Sambuc block(22) 123*0a6a1f1dSLionel Sambuc block(21) 124*0a6a1f1dSLionel Sambuc block(20) 125*0a6a1f1dSLionel Sambuc block(19) 126*0a6a1f1dSLionel Sambuc block(18) 127*0a6a1f1dSLionel Sambuc block(17) 128*0a6a1f1dSLionel Sambuc block(16) 129*0a6a1f1dSLionel Sambuc block(15) 130*0a6a1f1dSLionel Sambuc block(14) 131*0a6a1f1dSLionel Sambuc block(13) 132*0a6a1f1dSLionel Sambuc block(12) 133*0a6a1f1dSLionel Sambuc block(11) 134*0a6a1f1dSLionel Sambuc block(10) 135*0a6a1f1dSLionel Sambuc block(9) 136*0a6a1f1dSLionel Sambuc block(8) 137*0a6a1f1dSLionel Sambuc block(7) 138*0a6a1f1dSLionel Sambuc block(6) 139*0a6a1f1dSLionel Sambuc block(5) 140*0a6a1f1dSLionel Sambuc block(4) 141*0a6a1f1dSLionel Sambuc block(3) 142*0a6a1f1dSLionel Sambuc block(2) 143*0a6a1f1dSLionel Sambuc block(1) 144*0a6a1f1dSLionel SambucLOCAL_LABEL(div0block): 145*0a6a1f1dSLionel Sambuc block(0) 146*0a6a1f1dSLionel Sambuc JMP(lr) 147*0a6a1f1dSLionel Sambuc#endif /* __ARM_ARCH_EXT_IDIV__ */ 148*0a6a1f1dSLionel Sambuc 149*0a6a1f1dSLionel SambucLOCAL_LABEL(divby0): 150*0a6a1f1dSLionel Sambuc mov r0, #0 151*0a6a1f1dSLionel Sambuc#ifdef __ARM_EABI__ 152*0a6a1f1dSLionel Sambuc b __aeabi_idiv0 153*0a6a1f1dSLionel Sambuc#else 154*0a6a1f1dSLionel Sambuc JMP(lr) 155*0a6a1f1dSLionel Sambuc#endif 156*0a6a1f1dSLionel Sambuc 157*0a6a1f1dSLionel SambucEND_COMPILERRT_FUNCTION(__umodsi3) 158