1dnl ARM v8a mpn_gcd_22. 2 3dnl Copyright 2019 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33changecom(blah) 34 35C cycles/bit (approx) 36C Cortex-A35 ? 37C Cortex-A53 7.26 38C Cortex-A55 ? 39C Cortex-A57 ? 40C Cortex-A72 5.72 41C Cortex-A73 6.43 42C Cortex-A75 ? 43C Cortex-A76 ? 44C Cortex-A77 ? 45 46 47define(`u1', `x0') 48define(`u0', `x1') 49define(`v1', `x2') 50define(`v0', `x3') 51 52define(`t0', `x5') 53define(`t1', `x6') 54define(`cnt', `x7') 55define(`tnc', `x8') 56 57ASM_START() 58PROLOGUE(mpn_gcd_22) 59 60 ALIGN(16) 61L(top): subs t0, u0, v0 C 0 6 62 cbz t0, L(lowz) 63 sbcs t1, u1, v1 C 1 7 64 65 rbit cnt, t0 C 1 66 67 cneg t0, t0, cc C 2 68 cinv t1, t1, cc C 2 u = |u - v| 69L(bck): csel v0, v0, u0, cs C 2 70 csel v1, v1, u1, cs C 2 v = min(u,v) 71 72 clz cnt, cnt C 2 73 sub tnc, xzr, cnt C 3 74 75 lsr u0, t0, cnt C 3 76 lsl x14, t1, tnc C 4 77 lsr u1, t1, cnt C 3 78 orr u0, u0, x14 C 5 79 80 orr x11, u1, v1 81 cbnz x11, L(top) 82 83 84 subs x4, u0, v0 C 0 85 b.eq L(end1) C 86 87 ALIGN(16) 88L(top1):rbit x12, x4 C 1,5 89 clz x12, x12 C 2 90 csneg x4, x4, x4, cs C v = abs(u-v), even 1 91 csel u0, v0, u0, cs C u = min(u,v) 1 92 lsr v0, x4, x12 C 3 93 subs x4, u0, v0 C 4 94 b.ne L(top1) C 95L(end1):mov x0, u0 96 mov x1, #0 97 ret 98 99L(lowz):C We come here when v0 - u0 = 0 100 C 1. If v1 - u1 = 0, then gcd is u = v. 101 C 2. Else compute gcd_21({v1,v0}, |u1-v1|) 102 subs t0, u1, v1 103 b.eq L(end) 104 mov t1, #0 105 rbit cnt, t0 C 1 106 cneg t0, t0, cc C 2 107 b L(bck) C FIXME: make conditional 108 109L(end): mov x0, v0 110 mov x1, v1 111 ret 112EPILOGUE() 113