1dnl PPC-64 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number. 2 3dnl Copyright 2007, 2008 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of the GNU Lesser General Public License as published 9dnl by the Free Software Foundation; either version 3 of the License, or (at 10dnl your option) any later version. 11 12dnl The GNU MP Library is distributed in the hope that it will be useful, but 13dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 15dnl License for more details. 16 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 19 20include(`../config.m4') 21 22C cycles/limb 23C norm frac 24C POWER3/PPC630 25C POWER4/PPC970 ? ? 26C POWER5 37 ? 27C POWER6 62 ? 28C POWER6 30.5 ? 29 30C INPUT PARAMETERS 31C qp = r3 32C fn = r4 33C up = r5 34C un = r6 35C dp = r7 36 37 38ifdef(`DARWIN',,` 39define(`r2',`r31')') C FIXME! 40 41ASM_START() 42 43EXTERN_FUNC(mpn_invert_limb) 44 45PROLOGUE(mpn_divrem_2) 46 mflr r0 47 std r23, -72(r1) 48 std r24, -64(r1) 49 std r25, -56(r1) 50 std r26, -48(r1) 51 std r27, -40(r1) 52 std r28, -32(r1) 53 std r29, -24(r1) 54 std r30, -16(r1) 55 std r31, -8(r1) 56 std r0, 16(r1) 57 stdu r1, -192(r1) 58 mr r24, r3 59 mr r25, r4 60 sldi r0, r6, 3 61 add r26, r5, r0 62 addi r26, r26, -24 63 ld r30, 8(r7) 64 ld r28, 0(r7) 65 ld r29, 16(r26) 66 ld r31, 8(r26) 67 68ifelse(0,1,` 69 li r23, 0 70 cmpld cr7, r29, r30 71 blt cr7, L(8) 72 bgt cr7, L(9) 73 cmpld cr0, r31, r28 74 blt cr0, L(8) 75L(9): subfc r31, r28, r31 76 subfe r29, r30, r29 77 li r23, 1 78',` 79 li r23, 0 80 cmpld cr7, r29, r30 81 blt cr7, L(8) 82 mfcr r0 83 rlwinm r0, r0, 30, 1 84 subfc r9, r28, r31 85 addze. r0, r0 86 nop 87 beq cr0, L(8) 88 subfc r31, r28, r31 89 subfe r29, r30, r29 90 li r23, 1 91') 92 93L(8): 94 add r27, r25, r6 95 addic. r27, r27, -3 96 blt cr0, L(18) 97 mr r3, r30 98 CALL( mpn_invert_limb) 99 nop 100 mulld r10, r3, r30 101 mulhdu r0, r3, r28 102 addc r8, r10, r28 103 subfe r11, r1, r1 104 addc r10, r8, r0 105 addze. r11, r11 106 blt cr0, L(91) 107L(40): 108 subfc r10, r30, r10 109 addme. r11, r11 110 addi r3, r3, -1 111 bge cr0, L(40) 112L(91): 113 addi r5, r27, 1 114 mtctr r5 115 sldi r0, r27, 3 116 add r24, r24, r0 117 ALIGN(16) 118L(loop): 119 mulhdu r8, r29, r3 120 mulld r6, r29, r3 121 addc r6, r6, r31 122 adde r8, r8, r29 123 cmpd cr7, r27, r25 124 mulld r0, r30, r8 125 mulhdu r11, r28, r8 126 mulld r10, r28, r8 127 subf r31, r0, r31 128 li r7, 0 129 blt cr7, L(60) 130 ld r7, 0(r26) 131 addi r26, r26, -8 132 nop 133L(60): subfc r7, r28, r7 134 subfe r31, r30, r31 135 subfc r7, r10, r7 136 subfe r4, r11, r31 137 subfc r9, r6, r4 138 subfe r9, r1, r1 139 andc r6, r28, r9 140 andc r0, r30, r9 141 addc r31, r7, r6 142 adde r29, r4, r0 143 subf r8, r9, r8 144 cmpld cr7, r29, r30 145 bge- cr7, L(fix) 146L(bck): std r8, 0(r24) 147 addi r24, r24, -8 148 addi r27, r27, -1 149 bdnz L(loop) 150L(18): 151 std r31, 8(r26) 152 std r29, 16(r26) 153 mr r3, r23 154 addi r1, r1, 192 155 ld r0, 16(r1) 156 mtlr r0 157 ld r23, -72(r1) 158 ld r24, -64(r1) 159 ld r25, -56(r1) 160 ld r26, -48(r1) 161 ld r27, -40(r1) 162 ld r28, -32(r1) 163 ld r29, -24(r1) 164 ld r30, -16(r1) 165 ld r31, -8(r1) 166 blr 167L(fix): 168 mfcr r0 169 rlwinm r0, r0, 30, 1 170 subfc r9, r28, r31 171 addze. r0, r0 172 beq cr0, L(bck) 173 subfc r31, r28, r31 174 subfe r29, r30, r29 175 addi r8, r8, 1 176 b L(bck) 177EPILOGUE() 178