1dnl ARM mpn_addlsh1_n and mpn_sublsh1_n 2 3dnl Contributed to the GNU project by Torbjörn Granlund. 4 5dnl Copyright 2012 Free Software Foundation, Inc. 6 7dnl This file is part of the GNU MP Library. 8dnl 9dnl The GNU MP Library is free software; you can redistribute it and/or modify 10dnl it under the terms of either: 11dnl 12dnl * the GNU Lesser General Public License as published by the Free 13dnl Software Foundation; either version 3 of the License, or (at your 14dnl option) any later version. 15dnl 16dnl or 17dnl 18dnl * the GNU General Public License as published by the Free Software 19dnl Foundation; either version 2 of the License, or (at your option) any 20dnl later version. 21dnl 22dnl or both in parallel, as here. 23dnl 24dnl The GNU MP Library is distributed in the hope that it will be useful, but 25dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27dnl for more details. 28dnl 29dnl You should have received copies of the GNU General Public License and the 30dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31dnl see https://www.gnu.org/licenses/. 32 33include(`../config.m4') 34 35C addlsh1_n sublsh1_n 36C cycles/limb cycles/limb 37C StrongARM ? ? 38C XScale ? ? 39C Cortex-A7 ? ? 40C Cortex-A8 ? ? 41C Cortex-A9 3.12 3.7 42C Cortex-A15 ? ? 43 44C TODO 45C * The addlsh1_n code runs well, but is only barely faster than mpn_addmul_1. 46C The sublsh1_n code could surely be tweaked, its REVCY slows down things 47C very much. If two insns are really needed, it might help to separate them 48C for better micro-parallelism. 49 50define(`rp', `r0') 51define(`up', `r1') 52define(`vp', `r2') 53define(`n', `r3') 54 55ifdef(`OPERATION_addlsh1_n', ` 56 define(`ADDSUB', adds) 57 define(`ADDSUBC', adcs) 58 define(`SETCY', `cmp $1, #1') 59 define(`RETVAL', `adc r0, $1, #2') 60 define(`SAVECY', `sbc $1, $2, #0') 61 define(`RESTCY', `cmn $1, #1') 62 define(`REVCY', `') 63 define(`INICYR', `mov $1, #0') 64 define(`r10r11', `r11') 65 define(`func', mpn_addlsh1_n) 66 define(`func_nc', mpn_addlsh1_nc)') 67ifdef(`OPERATION_sublsh1_n', ` 68 define(`ADDSUB', subs) 69 define(`ADDSUBC', sbcs) 70 define(`SETCY', `rsbs $1, $1, #0') 71 define(`RETVAL', `adc r0, $1, #1') 72 define(`SAVECY', `sbc $1, $1, $1') 73 define(`RESTCY', `cmn $1, #1') 74 define(`REVCY', `sbc $1, $1, $1 75 cmn $1, #1') 76 define(`INICYR', `mvn $1, #0') 77 define(`r10r11', `r10') 78 define(`func', mpn_sublsh1_n) 79 define(`func_nc', mpn_sublsh1_nc)') 80 81MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n) 82 83ASM_START() 84PROLOGUE(func) 85 push {r4-r10r11, r14} 86 87ifdef(`OPERATION_addlsh1_n', ` 88 mvn r11, #0 89') 90 INICYR( r14) 91 subs n, n, #3 92 blt L(le2) C carry clear on branch path 93 94 cmn r0, #0 C clear carry 95 ldmia vp!, {r8, r9, r10} 96 b L(mid) 97 98L(top): RESTCY( r14) 99 ADDSUBC r4, r4, r8 100 ADDSUBC r5, r5, r9 101 ADDSUBC r6, r6, r10 102 ldmia vp!, {r8, r9, r10} 103 stmia rp!, {r4, r5, r6} 104 REVCY(r14) 105 adcs r8, r8, r8 106 adcs r9, r9, r9 107 adcs r10, r10, r10 108 ldmia up!, {r4, r5, r6} 109 SAVECY( r14, r11) 110 subs n, n, #3 111 blt L(exi) 112 RESTCY( r12) 113 ADDSUBC r4, r4, r8 114 ADDSUBC r5, r5, r9 115 ADDSUBC r6, r6, r10 116 ldmia vp!, {r8, r9, r10} 117 stmia rp!, {r4, r5, r6} 118 REVCY(r12) 119L(mid): adcs r8, r8, r8 120 adcs r9, r9, r9 121 adcs r10, r10, r10 122 ldmia up!, {r4, r5, r6} 123 SAVECY( r12, r11) 124 subs n, n, #3 125 bge L(top) 126 127 mov r7, r12 C swap alternating... 128 mov r12, r14 C ...carry-save... 129 mov r14, r7 C ...registers 130 131L(exi): RESTCY( r12) 132 ADDSUBC r4, r4, r8 133 ADDSUBC r5, r5, r9 134 ADDSUBC r6, r6, r10 135 stmia rp!, {r4, r5, r6} 136 137 REVCY(r12) 138L(le2): tst n, #1 C n = {-1,-2,-3} map to [2], [1], [0] 139 beq L(e1) 140 141L(e02): tst n, #2 142 beq L(rt0) 143 ldm vp, {r8, r9} 144 adcs r8, r8, r8 145 adcs r9, r9, r9 146 ldm up, {r4, r5} 147 SAVECY( r12, r11) 148 RESTCY( r14) 149 ADDSUBC r4, r4, r8 150 ADDSUBC r5, r5, r9 151 stm rp, {r4, r5} 152 b L(rt1) 153 154L(e1): ldr r8, [vp] 155 adcs r8, r8, r8 156 ldr r4, [up] 157 SAVECY( r12, r11) 158 RESTCY( r14) 159 ADDSUBC r4, r4, r8 160 str r4, [rp] 161 162L(rt1): mov r14, r12 163 REVCY(r12) 164L(rt0): RETVAL( r14) 165 pop {r4-r10r11, r14} 166 return r14 167EPILOGUE() 168