1dnl AMD64 mpn_gcd_11 -- 1 x 1 gcd. 2 3dnl Based on the K7 gcd_1.asm, by Kevin Ryde. Rehacked for AMD64 by Torbjorn 4dnl Granlund. 5 6dnl Copyright 2000-2002, 2005, 2009, 2011, 2012, 2017 Free Software 7dnl Foundation, Inc. 8 9dnl This file is part of the GNU MP Library. 10dnl 11dnl The GNU MP Library is free software; you can redistribute it and/or modify 12dnl it under the terms of either: 13dnl 14dnl * the GNU Lesser General Public License as published by the Free 15dnl Software Foundation; either version 3 of the License, or (at your 16dnl option) any later version. 17dnl 18dnl or 19dnl 20dnl * the GNU General Public License as published by the Free Software 21dnl Foundation; either version 2 of the License, or (at your option) any 22dnl later version. 23dnl 24dnl or both in parallel, as here. 25dnl 26dnl The GNU MP Library is distributed in the hope that it will be useful, but 27dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 28dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 29dnl for more details. 30dnl 31dnl You should have received copies of the GNU General Public License and the 32dnl GNU Lesser General Public License along with the GNU MP Library. If not, 33dnl see https://www.gnu.org/licenses/. 34 35include(`../config.m4') 36 37 38C cycles/bit 39C AMD K8,K9 5.5 40C AMD K10 ? 41C AMD bd1 ? 42C AMD bd2 ? 43C AMD bd3 ? 44C AMD bd4 ? 45C AMD bt1 7.1 46C AMD bt2 ? 47C AMD zn1 ? 48C AMD zn2 ? 49C Intel P4 ? 50C Intel CNR ? 51C Intel PNR ? 52C Intel NHM ? 53C Intel WSM ? 54C Intel SBR ? 55C Intel IBR ? 56C Intel HWL ? 57C Intel BWL ? 58C Intel SKL ? 59C Intel atom 9.1 60C Intel SLM 6.9 61C Intel GLM 6.0 62C Intel GLM+ 5.8 63C VIA nano ? 64 65 66C ctz_table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0. 67 68deflit(MAXSHIFT, 7) 69deflit(MASK, eval((m4_lshift(1,MAXSHIFT))-1)) 70 71DEF_OBJECT(ctz_table,64) 72 .byte MAXSHIFT 73forloop(i,1,MASK, 74` .byte m4_count_trailing_zeros(i) 75') 76END_OBJECT(ctz_table) 77 78define(`u0', `%rdi') 79define(`v0', `%rsi') 80 81ABI_SUPPORT(DOS64) 82ABI_SUPPORT(STD64) 83 84ASM_START() 85 TEXT 86 ALIGN(64) 87PROLOGUE(mpn_gcd_11) 88 FUNC_ENTRY(2) 89 LEA( ctz_table, %r8) 90 jmp L(ent) 91 92 ALIGN(16) 93L(top): cmovc %rdx, u0 C u = |u - v| 94 cmovc %rax, v0 C v = min(u,v) 95L(mid): and $MASK, R32(%rdx) 96 movzbl (%r8,%rdx), R32(%rcx) 97 jz L(shift_alot) 98 shr R8(%rcx), u0 99L(ent): mov u0, %rax 100 mov v0, %rdx 101 sub u0, %rdx 102 sub v0, u0 103 jnz L(top) 104 105L(end): C rax = result 106 C rdx = 0 for the benefit of internal gcd_22 call 107 FUNC_EXIT() 108 ret 109 110L(shift_alot): 111 shr $MAXSHIFT, u0 112 mov u0, %rdx 113 jmp L(mid) 114EPILOGUE() 115