1dnl ARM mpn_addlsh1_n and mpn_sublsh1_n 2 3dnl Contributed to the GNU project by Torbjorn Granlund. 4 5dnl Copyright 2012 Free Software Foundation, Inc. 6 7dnl This file is part of the GNU MP Library. 8 9dnl The GNU MP Library is free software; you can redistribute it and/or modify 10dnl it under the terms of the GNU Lesser General Public License as published 11dnl by the Free Software Foundation; either version 3 of the License, or (at 12dnl your option) any later version. 13 14dnl The GNU MP Library is distributed in the hope that it will be useful, but 15dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 16dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 17dnl License for more details. 18 19dnl You should have received a copy of the GNU Lesser General Public License 20dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 21 22include(`../config.m4') 23 24C addlsh1_n sublsh1_n 25C cycles/limb cycles/limb 26C StrongARM ? ? 27C XScale ? ? 28C Cortex-A8 ? ? 29C Cortex-A9 3.12 3.7 30C Cortex-A15 ? ? 31 32C TODO 33C * The addlsh1_n code runs well, but is only barely faster than mpn_addmul_1. 34C The sublsh1_n code could surely be tweaked, its REVCY slows down things 35C very much. If two insns are really needed, it might help to separate them 36C for better micro-parallelism. 37 38define(`rp', `r0') 39define(`up', `r1') 40define(`vp', `r2') 41define(`n', `r3') 42 43ifdef(`OPERATION_addlsh1_n', ` 44 define(`ADDSUB', adds) 45 define(`ADDSUBC', adcs) 46 define(`SETCY', `cmp $1, #1') 47 define(`RETVAL', `adc r0, $1, #2') 48 define(`SAVECY', `sbc $1, $2, #0') 49 define(`RESTCY', `cmn $1, #1') 50 define(`REVCY', `') 51 define(`INICYR', `mov $1, #0') 52 define(`r10r11', `r11') 53 define(`func', mpn_addlsh1_n) 54 define(`func_nc', mpn_addlsh1_nc)') 55ifdef(`OPERATION_sublsh1_n', ` 56 define(`ADDSUB', subs) 57 define(`ADDSUBC', sbcs) 58 define(`SETCY', `rsbs $1, $1, #0') 59 define(`RETVAL', `adc r0, $1, #1') 60 define(`SAVECY', `sbc $1, $1, $1') 61 define(`RESTCY', `cmn $1, #1') 62 define(`REVCY', `sbc $1, $1, $1 63 cmn $1, #1') 64 define(`INICYR', `mvn $1, #0') 65 define(`r10r11', `r10') 66 define(`func', mpn_sublsh1_n) 67 define(`func_nc', mpn_sublsh1_nc)') 68 69MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n) 70 71ASM_START() 72PROLOGUE(func) 73 push {r4-r10r11, r14} 74 75ifdef(`OPERATION_addlsh1_n', ` 76 mvn r11, #0 77') 78 INICYR( r14) 79 subs n, n, #3 80 blt L(le2) C carry clear on branch path 81 82 cmn r0, #0 C clear carry 83 ldmia vp!, {r8, r9, r10} 84 b L(mid) 85 86L(top): RESTCY( r14) 87 ADDSUBC r4, r4, r8 88 ADDSUBC r5, r5, r9 89 ADDSUBC r6, r6, r10 90 ldmia vp!, {r8, r9, r10} 91 stmia rp!, {r4, r5, r6} 92 REVCY(r14) 93 adcs r8, r8, r8 94 adcs r9, r9, r9 95 adcs r10, r10, r10 96 ldmia up!, {r4, r5, r6} 97 SAVECY( r14, r11) 98 subs n, n, #3 99 blt L(exi) 100 RESTCY( r12) 101 ADDSUBC r4, r4, r8 102 ADDSUBC r5, r5, r9 103 ADDSUBC r6, r6, r10 104 ldmia vp!, {r8, r9, r10} 105 stmia rp!, {r4, r5, r6} 106 REVCY(r12) 107L(mid): adcs r8, r8, r8 108 adcs r9, r9, r9 109 adcs r10, r10, r10 110 ldmia up!, {r4, r5, r6} 111 SAVECY( r12, r11) 112 subs n, n, #3 113 bge L(top) 114 115 mov r7, r12 C swap alternating... 116 mov r12, r14 C ...carry-save... 117 mov r14, r7 C ...registers 118 119L(exi): RESTCY( r12) 120 ADDSUBC r4, r4, r8 121 ADDSUBC r5, r5, r9 122 ADDSUBC r6, r6, r10 123 stmia rp!, {r4, r5, r6} 124 125 REVCY(r12) 126L(le2): tst n, #1 C n = {-1,-2,-3} map to [2], [1], [0] 127 beq L(e1) 128 129L(e02): tst n, #2 130 beq L(rt0) 131 ldm vp, {r8, r9} 132 adcs r8, r8, r8 133 adcs r9, r9, r9 134 ldm up, {r4, r5} 135 SAVECY( r12, r11) 136 RESTCY( r14) 137 ADDSUBC r4, r4, r8 138 ADDSUBC r5, r5, r9 139 stm rp, {r4, r5} 140 b L(rt1) 141 142L(e1): ldr r8, [vp] 143 adcs r8, r8, r8 144 ldr r4, [up] 145 SAVECY( r12, r11) 146 RESTCY( r14) 147 ADDSUBC r4, r4, r8 148 str r4, [rp] 149 150L(rt1): mov r14, r12 151 REVCY(r12) 152L(rt0): RETVAL( r14) 153 pop {r4-r10r11, r14} 154ifdef(`ARM_THUMB_MODE', 155` bx r14 156',` mov pc, r14 157') 158EPILOGUE() 159