1dnl PowerPC-64 mpn_gcd_11. 2 3dnl Copyright 2000-2002, 2005, 2009, 2011-2013 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33C cycles/bit (approx) 34C POWER3/PPC630 ? 35C POWER4/PPC970 8.5 obsolete 36C POWER5 ? 37C POWER6 ? 38C POWER7 9.4 obsolete 39C POWER8 ? 40C POWER9 ? 41C Numbers measured with: speed -CD -s16-64 -t48 mpn_gcd_1 42 43define(`u0', `r3') 44define(`v0', `r4') 45 46define(`mask', `r0')dnl 47define(`a1', `r4')dnl 48define(`a2', `r5')dnl 49define(`d1', `r6')dnl 50define(`d2', `r7')dnl 51define(`cnt', `r9')dnl 52 53ASM_START() 54PROLOGUE(mpn_gcd_11) 55 li r12, 63 56 mr r8, v0 57 subf. r10, u0, v0 C r10 = d - a 58 beq L(end) 59 60 ALIGN(16) 61L(top): subfc r11, r8, r3 C r11 = a - d 62 and d2, r11, r10 63 subfe mask, mask, mask 64 cntlzd cnt, d2 65 and a1, r10, mask C d - a 66 andc a2, r11, mask C a - d 67 and d1, r3, mask C a 68 andc d2, r8, mask C d 69 or r3, a1, a2 C new a 70 subf cnt, cnt, r12 71 or r8, d1, d2 C new d 72 srd r3, r3, cnt 73 subf. r10, r3, r8 C r10 = d - a 74 bne L(top) 75 76L(end): blr 77EPILOGUE() 78