1dnl ARM v6t2 mpn_divrem_1 and mpn_preinv_divrem_1. 2 3dnl Contributed to the GNU project by Torbjorn Granlund. 4 5dnl Copyright 2012 Free Software Foundation, Inc. 6 7dnl This file is part of the GNU MP Library. 8 9dnl The GNU MP Library is free software; you can redistribute it and/or modify 10dnl it under the terms of the GNU Lesser General Public License as published 11dnl by the Free Software Foundation; either version 3 of the License, or (at 12dnl your option) any later version. 13 14dnl The GNU MP Library is distributed in the hope that it will be useful, but 15dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 16dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 17dnl License for more details. 18 19dnl You should have received a copy of the GNU Lesser General Public License 20dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 21 22include(`../config.m4') 23 24C norm unorm frac 25C StrongARM ? 26C XScale ? 27C Cortex-A8 ? 28C Cortex-A9 13 14 13 29C Cortex-A15 ? 30 31C TODO 32C * Optimise inner-loops better, they could likely run a cycle or two faster. 33C * Decrease register usage, streamline non-loop code. 34 35define(`qp_arg', `r0') 36define(`fn', `r1') 37define(`up_arg', `r2') 38define(`n_arg', `r3') 39define(`d_arg', `0') 40define(`dinv_arg',`4') 41define(`cnt_arg', `8') 42 43define(`n', `r9') 44define(`qp', `r5') 45define(`up', `r6') 46define(`cnt', `r7') 47define(`tnc', `r10') 48define(`dinv', `r0') 49define(`d', `r4') 50 51ASM_START() 52PROLOGUE(mpn_preinv_divrem_1) 53 stmfd sp!, {r4, r5, r6, r7, r8, r9, r10, r11, lr} 54 ldr d, [sp, #9*4+d_arg] 55 ldr cnt, [sp, #9*4+cnt_arg] 56 str r1, [sp, #9*4+d_arg] C reuse d stack slot for fn 57 sub n, r3, #1 58 add r3, r1, n 59 cmp d, #0 60 add qp, qp_arg, r3, lsl #2 C put qp at Q[] end 61 add up, up_arg, n, lsl #2 C put up at U[] end 62 ldr dinv, [sp, #9*4+dinv_arg] 63 blt L(nent) 64 b L(uent) 65EPILOGUE() 66 67PROLOGUE(mpn_divrem_1) 68 stmfd sp!, {r4, r5, r6, r7, r8, r9, r10, r11, lr} 69 sub n, r3, #1 70 ldr d, [sp, #9*4+d_arg] C d 71 str r1, [sp, #9*4+d_arg] C reuse d stack slot for fn 72 add r3, r1, n 73 cmp d, #0 74 add qp, qp_arg, r3, lsl #2 C put qp at Q[] end 75 add up, up_arg, n, lsl #2 C put up at U[] end 76 blt L(normalised) 77 78L(unnorm): 79 clz cnt, d 80 mov r0, d, lsl cnt C pass d << cnt 81 bl mpn_invert_limb 82L(uent): 83 mov d, d, lsl cnt C d <<= cnt 84 cmp n, #0 85 mov r1, #0 C r 86 blt L(frac) 87 88 ldr r11, [up, #0] 89 90 rsb tnc, cnt, #32 91 mov r1, r11, lsr tnc 92 mov r11, r11, lsl cnt 93 beq L(uend) 94 95 ldr r3, [up, #-4]! 96 orr r2, r11, r3, lsr tnc 97 b L(mid) 98 99L(utop): 100 mls r1, d, r8, r11 101 mov r11, r3, lsl cnt 102 ldr r3, [up, #-4]! 103 cmp r1, r2 104 addhi r1, r1, d 105 subhi r8, r8, #1 106 orr r2, r11, r3, lsr tnc 107 cmp r1, d 108 bcs L(ufx) 109L(uok): str r8, [qp], #-4 110L(mid): add r8, r1, #1 111 mov r11, r2 112 umlal r2, r8, r1, dinv 113 subs n, n, #1 114 bne L(utop) 115 116 mls r1, d, r8, r11 117 mov r11, r3, lsl cnt 118 cmp r1, r2 119 addhi r1, r1, d 120 subhi r8, r8, #1 121 cmp r1, d 122 rsbcs r1, d, r1 123 addcs r8, r8, #1 124 str r8, [qp], #-4 125 126L(uend):add r8, r1, #1 127 mov r2, r11 128 umlal r2, r8, r1, dinv 129 mls r1, d, r8, r11 130 cmp r1, r2 131 addhi r1, r1, d 132 subhi r8, r8, #1 133 cmp r1, d 134 rsbcs r1, d, r1 135 addcs r8, r8, #1 136 str r8, [qp], #-4 137L(frac): 138 ldr r2, [sp, #9*4+d_arg] C fn 139 cmp r2, #0 140 beq L(fend) 141 142L(ftop):mov r6, #0 143 add r3, r1, #1 144 umlal r6, r3, r1, dinv 145 mov r8, #0 146 mls r1, d, r3, r8 147 cmp r1, r6 148 addhi r1, r1, d 149 subhi r3, r3, #1 150 subs r2, r2, #1 151 str r3, [qp], #-4 152 bne L(ftop) 153 154L(fend):mov r11, r1, lsr cnt 155L(rtn): mov r0, r11 156 ldmfd sp!, {r4, r5, r6, r7, r8, r9, r10, r11, pc} 157 158L(normalised): 159 mov r0, d 160 bl mpn_invert_limb 161L(nent): 162 cmp n, #0 163 mov r11, #0 C r 164 blt L(nend) 165 166 ldr r11, [up, #0] 167 cmp r11, d 168 movlo r2, #0 C hi q limb 169 movhs r2, #1 C hi q limb 170 subhs r11, r11, d 171 172 str r2, [qp], #-4 173 cmp n, #0 174 beq L(nend) 175 176L(ntop):ldr r1, [up, #-4]! 177 add r12, r11, #1 178 umlal r1, r12, r11, dinv 179 ldr r3, [up, #0] 180 mls r11, d, r12, r3 181 cmp r11, r1 182 addhi r11, r11, d 183 subhi r12, r12, #1 184 cmp d, r11 185 bls L(nfx) 186L(nok): str r12, [qp], #-4 187 subs n, n, #1 188 bne L(ntop) 189 190L(nend):mov r1, r11 C r 191 mov cnt, #0 C shift cnt 192 b L(frac) 193 194L(nfx): add r12, r12, #1 195 rsb r11, d, r11 196 b L(nok) 197L(ufx): rsb r1, d, r1 198 add r8, r8, #1 199 b L(uok) 200EPILOGUE() 201