1dnl HP-PA 2.0 mpn_add_n, mpn_sub_n 2 3dnl Copyright 1997, 2000, 2002, 2003, 2009, 2010 Free Software Foundation, 4dnl Inc. 5 6dnl This file is part of the GNU MP Library. 7 8dnl The GNU MP Library is free software; you can redistribute it and/or modify 9dnl it under the terms of the GNU Lesser General Public License as published 10dnl by the Free Software Foundation; either version 3 of the License, or (at 11dnl your option) any later version. 12 13dnl The GNU MP Library is distributed in the hope that it will be useful, but 14dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16dnl License for more details. 17 18dnl You should have received a copy of the GNU Lesser General Public License 19dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 20 21 22dnl This runs at 2 cycles/limb on PA8000 and 1.6875 cycles/limb on PA8500. It 23dnl should be possible to reach the cache bandwidth 1.5 cycles/limb at least 24dnl with PA8500. The problem now is stalling of the first ADD,DC after LDO, 25dnl where the processor gets confused about where carry comes from. 26 27include(`../config.m4') 28 29dnl INPUT PARAMETERS 30define(`rp',`%r26') 31define(`up',`%r25') 32define(`vp',`%r24') 33define(`n',`%r23') 34 35ifdef(`OPERATION_add_n', ` 36 define(ADCSBC, `add,dc') 37 define(INITCY, `addi -1,%r22,%r0') 38 define(func, mpn_add_n) 39 define(func_nc, mpn_add_nc)') 40ifdef(`OPERATION_sub_n', ` 41 define(ADCSBC, `sub,db') 42 define(INITCY, `subi 0,%r22,%r0') 43 define(func, mpn_sub_n) 44 define(func_nc, mpn_sub_nc)') 45 46MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc) 47 48ifdef(`HAVE_ABI_2_0w', 49` .level 2.0w 50',` .level 2.0 51') 52PROLOGUE(func_nc) 53ifdef(`HAVE_ABI_2_0w', 54` b L(com) 55 nop 56',` b L(com) 57 ldw -52(%r30), %r22 58') 59EPILOGUE() 60PROLOGUE(func) 61 ldi 0, %r22 62LDEF(com) 63 sub %r0, n, %r21 64 depw,z %r21, 30, 3, %r28 C r28 = 2 * (-n & 7) 65 depw,z %r21, 28, 3, %r21 C r21 = 8 * (-n & 7) 66 sub up, %r21, up C offset up 67 sub vp, %r21, vp C offset vp 68 sub rp, %r21, rp C offset rp 69 blr %r28, %r0 C branch into loop 70 INITCY 71 72LDEF(loop) 73 ldd 0(up), %r20 74 ldd 0(vp), %r31 75 ADCSBC %r20, %r31, %r20 76 std %r20, 0(rp) 77LDEF(7) ldd 8(up), %r21 78 ldd 8(vp), %r19 79 ADCSBC %r21, %r19, %r21 80 std %r21, 8(rp) 81LDEF(6) ldd 16(up), %r20 82 ldd 16(vp), %r31 83 ADCSBC %r20, %r31, %r20 84 std %r20, 16(rp) 85LDEF(5) ldd 24(up), %r21 86 ldd 24(vp), %r19 87 ADCSBC %r21, %r19, %r21 88 std %r21, 24(rp) 89LDEF(4) ldd 32(up), %r20 90 ldd 32(vp), %r31 91 ADCSBC %r20, %r31, %r20 92 std %r20, 32(rp) 93LDEF(3) ldd 40(up), %r21 94 ldd 40(vp), %r19 95 ADCSBC %r21, %r19, %r21 96 std %r21, 40(rp) 97LDEF(2) ldd 48(up), %r20 98 ldd 48(vp), %r31 99 ADCSBC %r20, %r31, %r20 100 std %r20, 48(rp) 101LDEF(1) ldd 56(up), %r21 102 ldd 56(vp), %r19 103 ADCSBC %r21, %r19, %r21 104 ldo 64(up), up 105 std %r21, 56(rp) 106 ldo 64(vp), vp 107 addib,> -8, n, L(loop) 108 ldo 64(rp), rp 109 110 add,dc %r0, %r0, %r29 111ifdef(`OPERATION_sub_n',` 112 subi 1, %r29, %r29 113') 114 bve (%r2) 115ifdef(`HAVE_ABI_2_0w', 116` copy %r29, %r28 117',` ldi 0, %r28 118') 119EPILOGUE() 120