1dnl ARM64 mpn_rshift. 2 3dnl Copyright 2013, 2014 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of the GNU Lesser General Public License as published 9dnl by the Free Software Foundation; either version 3 of the License, or (at 10dnl your option) any later version. 11 12dnl The GNU MP Library is distributed in the hope that it will be useful, but 13dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 15dnl License for more details. 16 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 19 20include(`../config.m4') 21 22C cycles/limb 23C Cortex-A53 ? 24C Cortex-A57 ? 25 26changecom(@&*$) 27 28define(`rp_arg', `x0') 29define(`up', `x1') 30define(`n', `x2') 31define(`cnt', `x3') 32 33define(`rp', `x16') 34 35define(`tnc',`x8') 36 37ASM_START() 38PROLOGUE(mpn_rshift) 39 mov rp, rp_arg 40 sub tnc, xzr, cnt 41 tbz n, #0, L(bx0) 42 43L(bx1): ldr x4, [up,#0] 44 tbnz n, #1, L(b11) 45 46L(b01): lsl x0, x4, tnc 47 lsr x18, x4, cnt 48 sub n, n, #1 49 cbnz n, L(gt1) 50 str x18, [rp,#0] 51 ret 52L(gt1): ldp x5, x4, [up,#8] 53 sub up, up, #8 54 sub rp, rp, #32 55 b L(lo2) 56 57L(b11): lsl x0, x4, tnc 58 lsr x9, x4, cnt 59 ldp x7, x6, [up,#8] 60 add n, n, #1 61 sub up, up, #24 62 sub rp, rp, #48 63 b L(lo0) 64 65L(bx0): ldp x5, x4, [up,#0] 66 tbz n, #1, L(b00) 67 68L(b10): lsl x0, x5, tnc 69 lsr x13, x5, cnt 70 lsl x10, x4, tnc 71 lsr x18, x4, cnt 72 sub n, n, #2 73 cbnz n, L(gt2) 74 orr x10, x10, x13 75 stp x10, x18, [rp,#0] 76 ret 77L(gt2): ldp x5, x4, [up,#16] 78 orr x10, x10, x13 79 str x10, [rp,#0] 80 sub rp, rp, #24 81 b L(lo2) 82 83L(b00): lsl x0, x5, tnc 84 lsr x13, x5, cnt 85 lsl x10, x4, tnc 86 lsr x9, x4, cnt 87 ldp x7, x6, [up,#16] 88 orr x10, x10, x13 89 str x10, [rp,#0] 90 sub up, up, #16 91 sub rp, rp, #40 92 b L(lo0) 93 94 ALIGN(16) 95L(top): ldp x5, x4, [up,#48] 96 add rp, rp, #32 C integrate with stp? 97 add up, up, #32 C integrate with ldp? 98 orr x11, x11, x9 99 orr x10, x10, x13 100 stp x11, x10, [rp,#16] 101L(lo2): lsl x11, x5, tnc 102 lsr x13, x5, cnt 103 lsl x10, x4, tnc 104 lsr x9, x4, cnt 105 ldp x7, x6, [up,#32] 106 orr x11, x11, x18 107 orr x10, x10, x13 108 stp x11, x10, [rp,#32] 109L(lo0): sub n, n, #4 110 lsl x11, x7, tnc 111 lsr x13, x7, cnt 112 lsl x10, x6, tnc 113 lsr x18, x6, cnt 114 cbnz n, L(top) 115 116L(end): orr x11, x11, x9 117 orr x10, x10, x13 118 stp x11, x10, [rp,#48] 119 str x18, [rp,#64] 120 ret 121EPILOGUE() 122