1*3cab2bb3Spatrick//===-- udivmodsi4.S - 32-bit unsigned integer divide and modulus ---------===// 2*3cab2bb3Spatrick// 3*3cab2bb3Spatrick// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*3cab2bb3Spatrick// See https://llvm.org/LICENSE.txt for license information. 5*3cab2bb3Spatrick// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*3cab2bb3Spatrick// 7*3cab2bb3Spatrick//===----------------------------------------------------------------------===// 8*3cab2bb3Spatrick// 9*3cab2bb3Spatrick// This file implements the __udivmodsi4 (32-bit unsigned integer divide and 10*3cab2bb3Spatrick// modulus) function for the ARM 32-bit architecture. 11*3cab2bb3Spatrick// 12*3cab2bb3Spatrick//===----------------------------------------------------------------------===// 13*3cab2bb3Spatrick 14*3cab2bb3Spatrick#include "../assembly.h" 15*3cab2bb3Spatrick 16*3cab2bb3Spatrick .syntax unified 17*3cab2bb3Spatrick .text 18*3cab2bb3Spatrick DEFINE_CODE_STATE 19*3cab2bb3Spatrick 20*3cab2bb3Spatrick@ unsigned int __udivmodsi4(unsigned int divident, unsigned int divisor, 21*3cab2bb3Spatrick@ unsigned int *remainder) 22*3cab2bb3Spatrick@ Calculate the quotient and remainder of the (unsigned) division. The return 23*3cab2bb3Spatrick@ value is the quotient, the remainder is placed in the variable. 24*3cab2bb3Spatrick 25*3cab2bb3Spatrick .p2align 2 26*3cab2bb3SpatrickDEFINE_COMPILERRT_FUNCTION(__udivmodsi4) 27*3cab2bb3Spatrick#if __ARM_ARCH_EXT_IDIV__ 28*3cab2bb3Spatrick tst r1, r1 29*3cab2bb3Spatrick beq LOCAL_LABEL(divby0) 30*3cab2bb3Spatrick mov r3, r0 31*3cab2bb3Spatrick udiv r0, r3, r1 32*3cab2bb3Spatrick mls r1, r0, r1, r3 33*3cab2bb3Spatrick str r1, [r2] 34*3cab2bb3Spatrick bx lr 35*3cab2bb3Spatrick#else 36*3cab2bb3Spatrick cmp r1, #1 37*3cab2bb3Spatrick bcc LOCAL_LABEL(divby0) 38*3cab2bb3Spatrick beq LOCAL_LABEL(divby1) 39*3cab2bb3Spatrick cmp r0, r1 40*3cab2bb3Spatrick bcc LOCAL_LABEL(quotient0) 41*3cab2bb3Spatrick 42*3cab2bb3Spatrick // Implement division using binary long division algorithm. 43*3cab2bb3Spatrick // 44*3cab2bb3Spatrick // r0 is the numerator, r1 the denominator. 45*3cab2bb3Spatrick // 46*3cab2bb3Spatrick // The code before JMP computes the correct shift I, so that 47*3cab2bb3Spatrick // r0 and (r1 << I) have the highest bit set in the same position. 48*3cab2bb3Spatrick // At the time of JMP, ip := .Ldiv0block - 12 * I. 49*3cab2bb3Spatrick // This depends on the fixed instruction size of block. 50*3cab2bb3Spatrick // For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes. 51*3cab2bb3Spatrick // 52*3cab2bb3Spatrick // block(shift) implements the test-and-update-quotient core. 53*3cab2bb3Spatrick // It assumes (r0 << shift) can be computed without overflow and 54*3cab2bb3Spatrick // that (r0 << shift) < 2 * r1. The quotient is stored in r3. 55*3cab2bb3Spatrick 56*3cab2bb3Spatrick# ifdef __ARM_FEATURE_CLZ 57*3cab2bb3Spatrick clz ip, r0 58*3cab2bb3Spatrick clz r3, r1 59*3cab2bb3Spatrick // r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. 60*3cab2bb3Spatrick sub r3, r3, ip 61*3cab2bb3Spatrick# if defined(USE_THUMB_2) 62*3cab2bb3Spatrick adr ip, LOCAL_LABEL(div0block) + 1 63*3cab2bb3Spatrick sub ip, ip, r3, lsl #1 64*3cab2bb3Spatrick# else 65*3cab2bb3Spatrick adr ip, LOCAL_LABEL(div0block) 66*3cab2bb3Spatrick# endif 67*3cab2bb3Spatrick sub ip, ip, r3, lsl #2 68*3cab2bb3Spatrick sub ip, ip, r3, lsl #3 69*3cab2bb3Spatrick mov r3, #0 70*3cab2bb3Spatrick bx ip 71*3cab2bb3Spatrick# else 72*3cab2bb3Spatrick# if defined(USE_THUMB_2) 73*3cab2bb3Spatrick# error THUMB mode requires CLZ or UDIV 74*3cab2bb3Spatrick# endif 75*3cab2bb3Spatrick str r4, [sp, #-8]! 76*3cab2bb3Spatrick 77*3cab2bb3Spatrick mov r4, r0 78*3cab2bb3Spatrick adr ip, LOCAL_LABEL(div0block) 79*3cab2bb3Spatrick 80*3cab2bb3Spatrick lsr r3, r4, #16 81*3cab2bb3Spatrick cmp r3, r1 82*3cab2bb3Spatrick movhs r4, r3 83*3cab2bb3Spatrick subhs ip, ip, #(16 * 12) 84*3cab2bb3Spatrick 85*3cab2bb3Spatrick lsr r3, r4, #8 86*3cab2bb3Spatrick cmp r3, r1 87*3cab2bb3Spatrick movhs r4, r3 88*3cab2bb3Spatrick subhs ip, ip, #(8 * 12) 89*3cab2bb3Spatrick 90*3cab2bb3Spatrick lsr r3, r4, #4 91*3cab2bb3Spatrick cmp r3, r1 92*3cab2bb3Spatrick movhs r4, r3 93*3cab2bb3Spatrick subhs ip, #(4 * 12) 94*3cab2bb3Spatrick 95*3cab2bb3Spatrick lsr r3, r4, #2 96*3cab2bb3Spatrick cmp r3, r1 97*3cab2bb3Spatrick movhs r4, r3 98*3cab2bb3Spatrick subhs ip, ip, #(2 * 12) 99*3cab2bb3Spatrick 100*3cab2bb3Spatrick // Last block, no need to update r3 or r4. 101*3cab2bb3Spatrick cmp r1, r4, lsr #1 102*3cab2bb3Spatrick subls ip, ip, #(1 * 12) 103*3cab2bb3Spatrick 104*3cab2bb3Spatrick ldr r4, [sp], #8 // restore r4, we are done with it. 105*3cab2bb3Spatrick mov r3, #0 106*3cab2bb3Spatrick 107*3cab2bb3Spatrick JMP(ip) 108*3cab2bb3Spatrick# endif 109*3cab2bb3Spatrick 110*3cab2bb3Spatrick#define IMM # 111*3cab2bb3Spatrick 112*3cab2bb3Spatrick#define block(shift) \ 113*3cab2bb3Spatrick cmp r0, r1, lsl IMM shift; \ 114*3cab2bb3Spatrick ITT(hs); \ 115*3cab2bb3Spatrick WIDE(addhs) r3, r3, IMM (1 << shift); \ 116*3cab2bb3Spatrick WIDE(subhs) r0, r0, r1, lsl IMM shift 117*3cab2bb3Spatrick 118*3cab2bb3Spatrick block(31) 119*3cab2bb3Spatrick block(30) 120*3cab2bb3Spatrick block(29) 121*3cab2bb3Spatrick block(28) 122*3cab2bb3Spatrick block(27) 123*3cab2bb3Spatrick block(26) 124*3cab2bb3Spatrick block(25) 125*3cab2bb3Spatrick block(24) 126*3cab2bb3Spatrick block(23) 127*3cab2bb3Spatrick block(22) 128*3cab2bb3Spatrick block(21) 129*3cab2bb3Spatrick block(20) 130*3cab2bb3Spatrick block(19) 131*3cab2bb3Spatrick block(18) 132*3cab2bb3Spatrick block(17) 133*3cab2bb3Spatrick block(16) 134*3cab2bb3Spatrick block(15) 135*3cab2bb3Spatrick block(14) 136*3cab2bb3Spatrick block(13) 137*3cab2bb3Spatrick block(12) 138*3cab2bb3Spatrick block(11) 139*3cab2bb3Spatrick block(10) 140*3cab2bb3Spatrick block(9) 141*3cab2bb3Spatrick block(8) 142*3cab2bb3Spatrick block(7) 143*3cab2bb3Spatrick block(6) 144*3cab2bb3Spatrick block(5) 145*3cab2bb3Spatrick block(4) 146*3cab2bb3Spatrick block(3) 147*3cab2bb3Spatrick block(2) 148*3cab2bb3Spatrick block(1) 149*3cab2bb3SpatrickLOCAL_LABEL(div0block): 150*3cab2bb3Spatrick block(0) 151*3cab2bb3Spatrick 152*3cab2bb3Spatrick str r0, [r2] 153*3cab2bb3Spatrick mov r0, r3 154*3cab2bb3Spatrick JMP(lr) 155*3cab2bb3Spatrick 156*3cab2bb3SpatrickLOCAL_LABEL(quotient0): 157*3cab2bb3Spatrick str r0, [r2] 158*3cab2bb3Spatrick mov r0, #0 159*3cab2bb3Spatrick JMP(lr) 160*3cab2bb3Spatrick 161*3cab2bb3SpatrickLOCAL_LABEL(divby1): 162*3cab2bb3Spatrick mov r3, #0 163*3cab2bb3Spatrick str r3, [r2] 164*3cab2bb3Spatrick JMP(lr) 165*3cab2bb3Spatrick#endif // __ARM_ARCH_EXT_IDIV__ 166*3cab2bb3Spatrick 167*3cab2bb3SpatrickLOCAL_LABEL(divby0): 168*3cab2bb3Spatrick mov r0, #0 169*3cab2bb3Spatrick#ifdef __ARM_EABI__ 170*3cab2bb3Spatrick b __aeabi_idiv0 171*3cab2bb3Spatrick#else 172*3cab2bb3Spatrick JMP(lr) 173*3cab2bb3Spatrick#endif 174*3cab2bb3Spatrick 175*3cab2bb3SpatrickEND_COMPILERRT_FUNCTION(__udivmodsi4) 176*3cab2bb3Spatrick 177*3cab2bb3SpatrickNO_EXEC_STACK_DIRECTIVE 178*3cab2bb3Spatrick 179