1dnl ARM mpn_mod_34lsub1 -- remainder modulo 2^24-1. 2 3dnl Copyright 2012 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of the GNU Lesser General Public License as published 9dnl by the Free Software Foundation; either version 3 of the License, or (at 10dnl your option) any later version. 11 12dnl The GNU MP Library is distributed in the hope that it will be useful, but 13dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 15dnl License for more details. 16 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 19 20include(`../config.m4') 21 22C cycles/limb 23C StrongARM ? 24C XScale ? 25C Cortex-A8 ? 26C Cortex-A9 1.33 27C Cortex-A15 ? 28 29define(`ap', r0) 30define(`n', r1) 31 32C mp_limb_t mpn_mod_34lsub1 (mp_srcptr up, mp_size_t n) 33 34C TODO 35C * Write cleverer summation code. 36C * Consider loading 6 64-bit aligned registers at a time, to approach 1 c/l. 37 38ASM_START() 39 TEXT 40 ALIGN(32) 41PROLOGUE(mpn_mod_34lsub1) 42 push { r4, r5, r6, r7 } 43 44 subs n, n, #3 45 mov r7, #0 46 blt L(le2) C n <= 2 47 48 ldmia ap!, { r2, r3, r12 } 49 subs n, n, #3 50 blt L(sum) C n <= 5 51 adds r0, r0, #0 C clear carry 52 sub n, n, #3 53 b L(mid) 54 55L(top): adcs r2, r2, r4 56 adcs r3, r3, r5 57 adcs r12, r12, r6 58L(mid): ldmia ap!, { r4, r5, r6 } 59 tst n, n 60 sub n, n, #3 61 bpl L(top) 62 63 add n, n, #3 64 65 adcs r2, r2, r4 66 adcs r3, r3, r5 67 adcs r12, r12, r6 68 movcs r7, #1 C r7 <= 1 69 70L(sum): cmn n, #2 71 movlo r4, #0 72 ldrhs r4, [ap], #4 73 movls r5, #0 74 ldrhi r5, [ap], #4 75 76 adds r2, r2, r4 77 adcs r3, r3, r5 78 adcs r12, r12, #0 79 adc r7, r7, #0 C r7 <= 2 80 81L(sum2): 82 bic r0, r2, #0xff000000 83 add r0, r0, r2, lsr #24 84 add r0, r0, r7 85 86 lsl r7, r3, #8 87 bic r1, r7, #0xff000000 88 add r0, r0, r1 89 add r0, r0, r3, lsr #16 90 91 lsl r7, r12, #16 92 bic r1, r7, #0xff000000 93 add r0, r0, r1 94 add r0, r0, r12, lsr #8 95 96 pop { r4, r5, r6, r7 } 97ifdef(`ARM_THUMB_MODE', 98` bx lr 99',` mov pc, lr 100') 101 102L(le2): cmn n, #1 103 bne L(1) 104 ldmia ap!, { r2, r3 } 105 mov r12, #0 106 b L(sum2) 107L(1): ldr r2, [ap] 108 bic r0, r2, #0xff000000 109 add r0, r0, r2, lsr #24 110 pop { r4, r5, r6, r7 } 111ifdef(`ARM_THUMB_MODE', 112` bx lr 113',` mov pc, lr 114') 115EPILOGUE() 116