1*0a6a1f1dSLionel Sambuc/*===-- udivmodsi4.S - 32-bit unsigned integer divide and modulus ---------===// 2*0a6a1f1dSLionel Sambuc * 3*0a6a1f1dSLionel Sambuc * The LLVM Compiler Infrastructure 4*0a6a1f1dSLionel Sambuc * 5*0a6a1f1dSLionel Sambuc * This file is dual licensed under the MIT and the University of Illinois Open 6*0a6a1f1dSLionel Sambuc * Source Licenses. See LICENSE.TXT for details. 7*0a6a1f1dSLionel Sambuc * 8*0a6a1f1dSLionel Sambuc *===----------------------------------------------------------------------===// 9*0a6a1f1dSLionel Sambuc * 10*0a6a1f1dSLionel Sambuc * This file implements the __udivmodsi4 (32-bit unsigned integer divide and 11*0a6a1f1dSLionel Sambuc * modulus) function for the ARM 32-bit architecture. 12*0a6a1f1dSLionel Sambuc * 13*0a6a1f1dSLionel Sambuc *===----------------------------------------------------------------------===*/ 14*0a6a1f1dSLionel Sambuc 15*0a6a1f1dSLionel Sambuc#include "../assembly.h" 16*0a6a1f1dSLionel Sambuc 17*0a6a1f1dSLionel Sambuc .syntax unified 18*0a6a1f1dSLionel Sambuc .text 19*0a6a1f1dSLionel Sambuc 20*0a6a1f1dSLionel Sambuc#if __ARM_ARCH_ISA_THUMB == 2 21*0a6a1f1dSLionel Sambuc .thumb 22*0a6a1f1dSLionel Sambuc#endif 23*0a6a1f1dSLionel Sambuc 24*0a6a1f1dSLionel Sambuc@ unsigned int __udivmodsi4(unsigned int divident, unsigned int divisor, 25*0a6a1f1dSLionel Sambuc@ unsigned int *remainder) 26*0a6a1f1dSLionel Sambuc@ Calculate the quotient and remainder of the (unsigned) division. The return 27*0a6a1f1dSLionel Sambuc@ value is the quotient, the remainder is placed in the variable. 28*0a6a1f1dSLionel Sambuc 29*0a6a1f1dSLionel Sambuc .p2align 2 30*0a6a1f1dSLionel SambucDEFINE_COMPILERRT_FUNCTION(__udivmodsi4) 31*0a6a1f1dSLionel Sambuc#if __ARM_ARCH_EXT_IDIV__ 32*0a6a1f1dSLionel Sambuc tst r1, r1 33*0a6a1f1dSLionel Sambuc beq LOCAL_LABEL(divby0) 34*0a6a1f1dSLionel Sambuc mov r3, r0 35*0a6a1f1dSLionel Sambuc udiv r0, r3, r1 36*0a6a1f1dSLionel Sambuc mls r1, r0, r1, r3 37*0a6a1f1dSLionel Sambuc str r1, [r2] 38*0a6a1f1dSLionel Sambuc bx lr 39*0a6a1f1dSLionel Sambuc#else 40*0a6a1f1dSLionel Sambuc cmp r1, #1 41*0a6a1f1dSLionel Sambuc bcc LOCAL_LABEL(divby0) 42*0a6a1f1dSLionel Sambuc beq LOCAL_LABEL(divby1) 43*0a6a1f1dSLionel Sambuc cmp r0, r1 44*0a6a1f1dSLionel Sambuc bcc LOCAL_LABEL(quotient0) 45*0a6a1f1dSLionel Sambuc /* 46*0a6a1f1dSLionel Sambuc * Implement division using binary long division algorithm. 47*0a6a1f1dSLionel Sambuc * 48*0a6a1f1dSLionel Sambuc * r0 is the numerator, r1 the denominator. 49*0a6a1f1dSLionel Sambuc * 50*0a6a1f1dSLionel Sambuc * The code before JMP computes the correct shift I, so that 51*0a6a1f1dSLionel Sambuc * r0 and (r1 << I) have the highest bit set in the same position. 52*0a6a1f1dSLionel Sambuc * At the time of JMP, ip := .Ldiv0block - 12 * I. 53*0a6a1f1dSLionel Sambuc * This depends on the fixed instruction size of block. 54*0a6a1f1dSLionel Sambuc * For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes. 55*0a6a1f1dSLionel Sambuc * 56*0a6a1f1dSLionel Sambuc * block(shift) implements the test-and-update-quotient core. 57*0a6a1f1dSLionel Sambuc * It assumes (r0 << shift) can be computed without overflow and 58*0a6a1f1dSLionel Sambuc * that (r0 << shift) < 2 * r1. The quotient is stored in r3. 59*0a6a1f1dSLionel Sambuc */ 60*0a6a1f1dSLionel Sambuc 61*0a6a1f1dSLionel Sambuc# ifdef __ARM_FEATURE_CLZ 62*0a6a1f1dSLionel Sambuc clz ip, r0 63*0a6a1f1dSLionel Sambuc clz r3, r1 64*0a6a1f1dSLionel Sambuc /* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */ 65*0a6a1f1dSLionel Sambuc sub r3, r3, ip 66*0a6a1f1dSLionel Sambuc# if __ARM_ARCH_ISA_THUMB == 2 67*0a6a1f1dSLionel Sambuc adr ip, LOCAL_LABEL(div0block) + 1 68*0a6a1f1dSLionel Sambuc sub ip, ip, r3, lsl #1 69*0a6a1f1dSLionel Sambuc# else 70*0a6a1f1dSLionel Sambuc adr ip, LOCAL_LABEL(div0block) 71*0a6a1f1dSLionel Sambuc# endif 72*0a6a1f1dSLionel Sambuc sub ip, ip, r3, lsl #2 73*0a6a1f1dSLionel Sambuc sub ip, ip, r3, lsl #3 74*0a6a1f1dSLionel Sambuc mov r3, #0 75*0a6a1f1dSLionel Sambuc bx ip 76*0a6a1f1dSLionel Sambuc# else 77*0a6a1f1dSLionel Sambuc# if __ARM_ARCH_ISA_THUMB == 2 78*0a6a1f1dSLionel Sambuc# error THUMB mode requires CLZ or UDIV 79*0a6a1f1dSLionel Sambuc# endif 80*0a6a1f1dSLionel Sambuc str r4, [sp, #-8]! 81*0a6a1f1dSLionel Sambuc 82*0a6a1f1dSLionel Sambuc mov r4, r0 83*0a6a1f1dSLionel Sambuc adr ip, LOCAL_LABEL(div0block) 84*0a6a1f1dSLionel Sambuc 85*0a6a1f1dSLionel Sambuc lsr r3, r4, #16 86*0a6a1f1dSLionel Sambuc cmp r3, r1 87*0a6a1f1dSLionel Sambuc movhs r4, r3 88*0a6a1f1dSLionel Sambuc subhs ip, ip, #(16 * 12) 89*0a6a1f1dSLionel Sambuc 90*0a6a1f1dSLionel Sambuc lsr r3, r4, #8 91*0a6a1f1dSLionel Sambuc cmp r3, r1 92*0a6a1f1dSLionel Sambuc movhs r4, r3 93*0a6a1f1dSLionel Sambuc subhs ip, ip, #(8 * 12) 94*0a6a1f1dSLionel Sambuc 95*0a6a1f1dSLionel Sambuc lsr r3, r4, #4 96*0a6a1f1dSLionel Sambuc cmp r3, r1 97*0a6a1f1dSLionel Sambuc movhs r4, r3 98*0a6a1f1dSLionel Sambuc subhs ip, #(4 * 12) 99*0a6a1f1dSLionel Sambuc 100*0a6a1f1dSLionel Sambuc lsr r3, r4, #2 101*0a6a1f1dSLionel Sambuc cmp r3, r1 102*0a6a1f1dSLionel Sambuc movhs r4, r3 103*0a6a1f1dSLionel Sambuc subhs ip, ip, #(2 * 12) 104*0a6a1f1dSLionel Sambuc 105*0a6a1f1dSLionel Sambuc /* Last block, no need to update r3 or r4. */ 106*0a6a1f1dSLionel Sambuc cmp r1, r4, lsr #1 107*0a6a1f1dSLionel Sambuc subls ip, ip, #(1 * 12) 108*0a6a1f1dSLionel Sambuc 109*0a6a1f1dSLionel Sambuc ldr r4, [sp], #8 /* restore r4, we are done with it. */ 110*0a6a1f1dSLionel Sambuc mov r3, #0 111*0a6a1f1dSLionel Sambuc 112*0a6a1f1dSLionel Sambuc JMP(ip) 113*0a6a1f1dSLionel Sambuc# endif 114*0a6a1f1dSLionel Sambuc 115*0a6a1f1dSLionel Sambuc#define IMM # 116*0a6a1f1dSLionel Sambuc 117*0a6a1f1dSLionel Sambuc#define block(shift) \ 118*0a6a1f1dSLionel Sambuc cmp r0, r1, lsl IMM shift; \ 119*0a6a1f1dSLionel Sambuc ITT(hs); \ 120*0a6a1f1dSLionel Sambuc WIDE(addhs) r3, r3, IMM (1 << shift); \ 121*0a6a1f1dSLionel Sambuc WIDE(subhs) r0, r0, r1, lsl IMM shift 122*0a6a1f1dSLionel Sambuc 123*0a6a1f1dSLionel Sambuc block(31) 124*0a6a1f1dSLionel Sambuc block(30) 125*0a6a1f1dSLionel Sambuc block(29) 126*0a6a1f1dSLionel Sambuc block(28) 127*0a6a1f1dSLionel Sambuc block(27) 128*0a6a1f1dSLionel Sambuc block(26) 129*0a6a1f1dSLionel Sambuc block(25) 130*0a6a1f1dSLionel Sambuc block(24) 131*0a6a1f1dSLionel Sambuc block(23) 132*0a6a1f1dSLionel Sambuc block(22) 133*0a6a1f1dSLionel Sambuc block(21) 134*0a6a1f1dSLionel Sambuc block(20) 135*0a6a1f1dSLionel Sambuc block(19) 136*0a6a1f1dSLionel Sambuc block(18) 137*0a6a1f1dSLionel Sambuc block(17) 138*0a6a1f1dSLionel Sambuc block(16) 139*0a6a1f1dSLionel Sambuc block(15) 140*0a6a1f1dSLionel Sambuc block(14) 141*0a6a1f1dSLionel Sambuc block(13) 142*0a6a1f1dSLionel Sambuc block(12) 143*0a6a1f1dSLionel Sambuc block(11) 144*0a6a1f1dSLionel Sambuc block(10) 145*0a6a1f1dSLionel Sambuc block(9) 146*0a6a1f1dSLionel Sambuc block(8) 147*0a6a1f1dSLionel Sambuc block(7) 148*0a6a1f1dSLionel Sambuc block(6) 149*0a6a1f1dSLionel Sambuc block(5) 150*0a6a1f1dSLionel Sambuc block(4) 151*0a6a1f1dSLionel Sambuc block(3) 152*0a6a1f1dSLionel Sambuc block(2) 153*0a6a1f1dSLionel Sambuc block(1) 154*0a6a1f1dSLionel SambucLOCAL_LABEL(div0block): 155*0a6a1f1dSLionel Sambuc block(0) 156*0a6a1f1dSLionel Sambuc 157*0a6a1f1dSLionel Sambuc str r0, [r2] 158*0a6a1f1dSLionel Sambuc mov r0, r3 159*0a6a1f1dSLionel Sambuc JMP(lr) 160*0a6a1f1dSLionel Sambuc 161*0a6a1f1dSLionel SambucLOCAL_LABEL(quotient0): 162*0a6a1f1dSLionel Sambuc str r0, [r2] 163*0a6a1f1dSLionel Sambuc mov r0, #0 164*0a6a1f1dSLionel Sambuc JMP(lr) 165*0a6a1f1dSLionel Sambuc 166*0a6a1f1dSLionel SambucLOCAL_LABEL(divby1): 167*0a6a1f1dSLionel Sambuc mov r3, #0 168*0a6a1f1dSLionel Sambuc str r3, [r2] 169*0a6a1f1dSLionel Sambuc JMP(lr) 170*0a6a1f1dSLionel Sambuc#endif /* __ARM_ARCH_EXT_IDIV__ */ 171*0a6a1f1dSLionel Sambuc 172*0a6a1f1dSLionel SambucLOCAL_LABEL(divby0): 173*0a6a1f1dSLionel Sambuc mov r0, #0 174*0a6a1f1dSLionel Sambuc#ifdef __ARM_EABI__ 175*0a6a1f1dSLionel Sambuc b __aeabi_idiv0 176*0a6a1f1dSLionel Sambuc#else 177*0a6a1f1dSLionel Sambuc JMP(lr) 178*0a6a1f1dSLionel Sambuc#endif 179*0a6a1f1dSLionel Sambuc 180*0a6a1f1dSLionel SambucEND_COMPILERRT_FUNCTION(__udivmodsi4) 181