1dnl PowerPC-64 mpn_divexact_1 -- mpn by limb exact division. 2 3dnl Copyright 2006, 2010 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33C cycles/limb 34C norm unorm 35C POWER3/PPC630 13-19 36C POWER4/PPC970 16 37C POWER5 16 16 38C POWER6 37 46 39C POWER7 12 12 40C POWER8 12 12 41 42C TODO 43C * Check if n=1 code is really an improvement. It probably isn't. 44C * Make more similar to mode1o.asm. 45 46C INPUT PARAMETERS 47define(`rp', `r3') 48define(`up', `r4') 49define(`n', `r5') 50define(`d', `r6') 51 52 53ASM_START() 54 55EXTERN(binvert_limb_table) 56 57PROLOGUE(mpn_divexact_1,toc) 58 addic. n, n, -1 59 ld r12, 0(up) 60 bne cr0, L(2) 61 divdu r0, r12, d 62 std r0, 0(rp) 63 blr 64L(2): 65 rldicl. r0, d, 0, 63 66 li r10, 0 67 bne cr0, L(7) 68 neg r0, d 69 and r0, d, r0 70 cntlzd r0, r0 71 subfic r0, r0, 63 72 rldicl r10, r0, 0, 32 73 srd d, d, r0 74L(7): 75 mtctr n 76 LEA( r5, binvert_limb_table) 77 rldicl r11, d, 63, 57 78 lbzx r0, r5, r11 79 mulld r9, r0, r0 80 sldi r0, r0, 1 81 mulld r9, d, r9 82 subf r0, r9, r0 83 mulld r5, r0, r0 84 sldi r0, r0, 1 85 mulld r5, d, r5 86 subf r0, r5, r0 87 mulld r9, r0, r0 88 sldi r0, r0, 1 89 mulld r9, d, r9 90 subf r7, r9, r0 C r7 = 1/d mod 2^64 91 92 bne cr0, L(norm) 93 subfic r8, r10, 64 C set carry as side effect 94 li r5, 0 95 srd r11, r12, r10 96 97 ALIGN(16) 98L(loop0): 99 ld r12, 8(up) 100 nop 101 addi up, up, 8 102 sld r0, r12, r8 103 or r11, r11, r0 104 subfe r9, r5, r11 105 srd r11, r12, r10 106 mulld r0, r7, r9 107 mulhdu r5, r0, d 108 std r0, 0(rp) 109 addi rp, rp, 8 110 bdnz L(loop0) 111 112 subfe r0, r5, r11 113 mulld r0, r7, r0 114 std r0, 0(rp) 115 blr 116 117 ALIGN(16) 118L(norm): 119 mulld r11, r12, r7 120 mulhdu r5, r11, d 121 std r11, 0(rp) 122 ALIGN(16) 123L(loop1): 124 ld r9, 8(up) 125 addi up, up, 8 126 subfe r5, r5, r9 127 mulld r11, r7, r5 128 mulhdu r5, r11, d C result not used in last iteration 129 std r11, 8(rp) 130 addi rp, rp, 8 131 bdnz L(loop1) 132 133 blr 134EPILOGUE() 135ASM_END() 136