1dnl ARM mpn_sec_tabselect 2 3dnl Contributed to the GNU project by Torbjörn Granlund. 4 5dnl Copyright 2013 Free Software Foundation, Inc. 6 7dnl This file is part of the GNU MP Library. 8dnl 9dnl The GNU MP Library is free software; you can redistribute it and/or modify 10dnl it under the terms of either: 11dnl 12dnl * the GNU Lesser General Public License as published by the Free 13dnl Software Foundation; either version 3 of the License, or (at your 14dnl option) any later version. 15dnl 16dnl or 17dnl 18dnl * the GNU General Public License as published by the Free Software 19dnl Foundation; either version 2 of the License, or (at your option) any 20dnl later version. 21dnl 22dnl or both in parallel, as here. 23dnl 24dnl The GNU MP Library is distributed in the hope that it will be useful, but 25dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27dnl for more details. 28dnl 29dnl You should have received copies of the GNU General Public License and the 30dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31dnl see https://www.gnu.org/licenses/. 32 33include(`../config.m4') 34 35C cycles/limb 36C StrongARM ? 37C XScale ? 38C Cortex-A7 ? 39C Cortex-A8 ? 40C Cortex-A9 2.33 41C Cortex-A15 2.2 42 43C TODO 44C * Consider using special code for small nents, either swapping the inner and 45C outer loops, or providing a few completely unrolling the inner loops. 46 47define(`rp', `r0') 48define(`tp', `r1') 49define(`n', `r2') 50define(`nents', `r3') 51C which on stack 52 53define(`i', `r11') 54define(`j', `r12') 55define(`c', `r14') 56define(`mask', `r7') 57 58ASM_START() 59PROLOGUE(mpn_sec_tabselect) 60 push {r4-r11, r14} 61 62 subs j, n, #3 63 bmi L(outer_end) 64L(outer_top): 65 ldr c, [sp, #36] 66 mov i, nents 67 push {tp} 68 69 mov r8, #0 70 mov r9, #0 71 mov r10, #0 72 73L(top): subs c, c, #1 74 ldm tp, {r4,r5,r6} 75 sbc mask, mask, mask 76 subs i, i, #1 77 add tp, tp, n, lsl #2 78 and r4, r4, mask 79 and r5, r5, mask 80 and r6, r6, mask 81 orr r8, r8, r4 82 orr r9, r9, r5 83 orr r10, r10, r6 84 bge L(top) 85 86 stmia rp!, {r8,r9,r10} 87 pop {tp} 88 add tp, tp, #12 89 subs j, j, #3 90 bpl L(outer_top) 91L(outer_end): 92 93 cmp j, #-1 94 bne L(n2) 95 96 ldr c, [sp, #36] 97 mov i, nents 98 mov r8, #0 99 mov r9, #0 100L(tp2): subs c, c, #1 101 sbc mask, mask, mask 102 ldm tp, {r4,r5} 103 subs i, i, #1 104 add tp, tp, n, lsl #2 105 and r4, r4, mask 106 and r5, r5, mask 107 orr r8, r8, r4 108 orr r9, r9, r5 109 bge L(tp2) 110 stmia rp, {r8,r9} 111 pop {r4-r11, r14} 112 return lr 113 114L(n2): cmp j, #-2 115 bne L(n1) 116 117 ldr c, [sp, #36] 118 mov i, nents 119 mov r8, #0 120L(tp1): subs c, c, #1 121 sbc mask, mask, mask 122 ldr r4, [tp] 123 subs i, i, #1 124 add tp, tp, n, lsl #2 125 and r4, r4, mask 126 orr r8, r8, r4 127 bge L(tp1) 128 str r8, [rp] 129L(n1): pop {r4-r11, r14} 130 return lr 131EPILOGUE() 132