1dnl Itanium-2 mpn_gcd_11 2 3dnl Copyright 2002-2005, 2012, 2013, 2015, 2019 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33 34C cycles/bitpair (1x1 gcd) 35C Itanium: ? 36C Itanium 2: 4.5 37 38 39ASM_START() 40 41C ctz_table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0. 42 43deflit(MAXSHIFT, 7) 44deflit(MASK, eval((m4_lshift(1,MAXSHIFT))-1)) 45 46 .rodata 47 ALIGN(m4_lshift(1,MAXSHIFT)) C align table to allow using dep 48ctz_table: 49 data1 MAXSHIFT 50forloop(i,1,MASK, 51` data1 m4_count_trailing_zeros(i)-1 52') 53 54define(`x0', r32) 55define(`y0', r33) 56 57PROLOGUE(mpn_gcd_11) 58 .prologue 59 .body 60 addl r22 = @ltoff(ctz_table), r1 61 ;; 62 ld8 r22 = [r22] 63 br L(ent) 64 ;; 65 66 ALIGN(32) 67L(top): 68 .pred.rel "mutex", p6,p7 69 {.mmi; (p7) mov y0 = x0 70 (p6) sub x0 = x0, y0 71 dep r21 = r19, r22, 0, MAXSHIFT C concat(table,lowbits) 72}{.mmi; and r20 = MASK, r19 73 (p7) mov x0 = r19 74 and r23 = 6, r19 75 ;; 76}{.mmi; cmp.eq p6,p0 = 4, r23 77 cmp.eq p7,p0 = 0, r23 78 shr.u x0 = x0, 1 C shift-by-1, always OK 79}{.mmb; ld1 r16 = [r21] 80 cmp.eq p10,p0 = 0, r20 81 (p10) br.spnt.few.clr L(count_better) 82 ;; 83} 84L(bck): 85 .pred.rel "mutex", p6,p7 86 {.mii; nop 0 87 (p6) shr.u x0 = x0, 1 C u was ...100 before shift-by-1 above 88 (p7) shr.u x0 = x0, r16 C u was ...000 before shift-by-1 above 89 ;; 90} 91L(ent): 92 {.mmi; sub r19 = y0, x0 93 cmp.gtu p6,p7 = x0, y0 94 cmp.ne p8,p0 = x0, y0 95}{.mmb; nop 0 96 nop 0 97 (p8) br.sptk.few.clr L(top) 98} 99 100L(end): mov r8 = y0 101 br.ret.sptk.many b0 102 103L(count_better): 104 add r20 = -1, x0 105 ;; 106 andcm r23 = r20, x0 107 ;; 108 popcnt r16 = r23 109 br L(bck) 110EPILOGUE() 111