1dnl PPC-64 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number. 2 3dnl Copyright 2007, 2008 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of the GNU Lesser General Public License as published 9dnl by the Free Software Foundation; either version 3 of the License, or (at 10dnl your option) any later version. 11 12dnl The GNU MP Library is distributed in the hope that it will be useful, but 13dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 15dnl License for more details. 16 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 19 20include(`../config.m4') 21 22C cycles/limb 23C norm frac 24C POWER3/PPC630 25C POWER4/PPC970 39* 39* 26C POWER5 39* 39* 27 28C STATUS 29C * Performace fluctuates like crazy 30 31C INPUT PARAMETERS 32C qp = r3 33C fn = r4 34C up = r5 35C un = r6 36C dp = r7 37 38 39ifdef(`DARWIN',,` 40define(`r2',`r31')') C FIXME! 41 42ASM_START() 43 44EXTERN_FUNC(mpn_invert_limb) 45 46PROLOGUE(mpn_divrem_2) 47 mflr r0 48 std r23, -72(r1) 49 std r24, -64(r1) 50 std r25, -56(r1) 51 std r26, -48(r1) 52 std r27, -40(r1) 53 std r28, -32(r1) 54 std r29, -24(r1) 55 std r30, -16(r1) 56 std r31, -8(r1) 57 std r0, 16(r1) 58 stdu r1, -192(r1) 59 mr r24, r3 60 mr r25, r4 61 sldi r0, r6, 3 62 add r26, r5, r0 63 addi r26, r26, -24 64 ld r30, 8(r7) 65 ld r28, 0(r7) 66 ld r29, 16(r26) 67 ld r31, 8(r26) 68 69ifelse(0,1,` 70 li r23, 0 71 cmpld cr7, r29, r30 72 blt cr7, L(8) 73 bgt cr7, L(9) 74 cmpld cr0, r31, r28 75 blt cr0, L(8) 76L(9): subfc r31, r28, r31 77 subfe r29, r30, r29 78 li r23, 1 79',` 80 li r23, 0 81 cmpld cr7, r29, r30 82 blt cr7, L(8) 83 mfcr r0 84 rlwinm r0, r0, 30, 1 85 subfc r9, r28, r31 86 addze. r0, r0 87 nop 88 beq cr0, L(8) 89 subfc r31, r28, r31 90 subfe r29, r30, r29 91 li r23, 1 92') 93 94L(8): 95 add r27, r25, r6 96 addic. r27, r27, -3 97 blt cr0, L(18) 98 mr r3, r30 99 CALL( mpn_invert_limb) 100 nop 101 mulld r10, r3, r30 102 mulhdu r0, r3, r28 103 addc r8, r10, r28 104 subfe r11, r1, r1 105 addc r10, r8, r0 106 addze. r11, r11 107 blt cr0, L(91) 108L(40): 109 subfc r10, r30, r10 110 addme. r11, r11 111 addi r3, r3, -1 112 bge cr0, L(40) 113L(91): 114 addi r5, r27, 1 115 mtctr r5 116 sldi r0, r27, 3 117 add r24, r24, r0 118 ALIGN(16) 119L(loop): 120 mulhdu r8, r29, r3 121 mulld r6, r29, r3 122 addc r6, r6, r31 123 adde r8, r8, r29 124 mulld r0, r30, r8 125 subf r31, r0, r31 126 mulhdu r11, r28, r8 127 mulld r10, r28, r8 128 li r7, 0 129 cmpd cr7, r27, r25 130 blt cr7, L(60) 131 ld r7, 0(r26) 132 addi r26, r26, -8 133 nop 134L(60): subfc r7, r28, r7 135 subfe r31, r30, r31 136 subfc r7, r10, r7 137 subfe r4, r11, r31 138 subfc r9, r6, r4 139 subfe r9, r1, r1 140 andc r6, r28, r9 141 andc r0, r30, r9 142 addc r31, r7, r6 143 adde r29, r4, r0 144 subf r8, r9, r8 145 cmpld cr7, r29, r30 146 bge- cr7, L(fix) 147L(bck): std r8, 0(r24) 148 addi r24, r24, -8 149 addi r27, r27, -1 150 bdnz L(loop) 151L(18): 152 std r31, 8(r26) 153 std r29, 16(r26) 154 mr r3, r23 155 addi r1, r1, 192 156 ld r0, 16(r1) 157 mtlr r0 158 ld r23, -72(r1) 159 ld r24, -64(r1) 160 ld r25, -56(r1) 161 ld r26, -48(r1) 162 ld r27, -40(r1) 163 ld r28, -32(r1) 164 ld r29, -24(r1) 165 ld r30, -16(r1) 166 ld r31, -8(r1) 167 blr 168L(fix): 169 mfcr r0 170 rlwinm r0, r0, 30, 1 171 subfc r9, r28, r31 172 addze. r0, r0 173 beq cr0, L(bck) 174 subfc r31, r28, r31 175 subfe r29, r30, r29 176 addi r8, r8, 1 177 b L(bck) 178EPILOGUE() 179