1dnl HP-PA 1.1 mpn_addmul_1 -- Multiply a limb vector with a limb and add the 2dnl result to a second limb vector. 3 4dnl Copyright 1992, 1993, 1994, 2000, 2001, 2002 Free Software Foundation, 5dnl Inc. 6 7dnl This file is part of the GNU MP Library. 8 9dnl The GNU MP Library is free software; you can redistribute it and/or modify 10dnl it under the terms of the GNU Lesser General Public License as published 11dnl by the Free Software Foundation; either version 3 of the License, or (at 12dnl your option) any later version. 13 14dnl The GNU MP Library is distributed in the hope that it will be useful, but 15dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 16dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 17dnl License for more details. 18 19dnl You should have received a copy of the GNU Lesser General Public License 20dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 21 22include(`../config.m4') 23 24C INPUT PARAMETERS 25C res_ptr r26 26C s1_ptr r25 27C size r24 28C s2_limb r23 29 30C This runs at 11 cycles/limb on a PA7000. With the used instructions, it can 31C not become faster due to data cache contention after a store. On the PA7100 32C it runs at 10 cycles/limb. 33 34C There are some ideas described in mul_1.asm that applies to this code too. 35 36ASM_START() 37PROLOGUE(mpn_addmul_1) 38C .callinfo frame=64,no_calls 39 40 ldo 64(%r30),%r30 41 fldws,ma 4(%r25),%fr5 42 stw %r23,-16(%r30) C move s2_limb ... 43 addib,= -1,%r24,L(just_one_limb) 44 fldws -16(%r30),%fr4 C ... into fr4 45 add %r0,%r0,%r0 C clear carry 46 xmpyu %fr4,%fr5,%fr6 47 fldws,ma 4(%r25),%fr7 48 fstds %fr6,-16(%r30) 49 xmpyu %fr4,%fr7,%fr8 50 ldw -12(%r30),%r19 C least significant limb in product 51 ldw -16(%r30),%r28 52 53 fstds %fr8,-16(%r30) 54 addib,= -1,%r24,L(end) 55 ldw -12(%r30),%r1 56 57C Main loop 58LDEF(loop) 59 ldws 0(%r26),%r29 60 fldws,ma 4(%r25),%fr5 61 add %r29,%r19,%r19 62 stws,ma %r19,4(%r26) 63 addc %r28,%r1,%r19 64 xmpyu %fr4,%fr5,%fr6 65 ldw -16(%r30),%r28 66 fstds %fr6,-16(%r30) 67 addc %r0,%r28,%r28 68 addib,<> -1,%r24,L(loop) 69 ldw -12(%r30),%r1 70 71LDEF(end) 72 ldw 0(%r26),%r29 73 add %r29,%r19,%r19 74 stws,ma %r19,4(%r26) 75 addc %r28,%r1,%r19 76 ldw -16(%r30),%r28 77 ldws 0(%r26),%r29 78 addc %r0,%r28,%r28 79 add %r29,%r19,%r19 80 stws,ma %r19,4(%r26) 81 addc %r0,%r28,%r28 82 bv 0(%r2) 83 ldo -64(%r30),%r30 84 85LDEF(just_one_limb) 86 xmpyu %fr4,%fr5,%fr6 87 ldw 0(%r26),%r29 88 fstds %fr6,-16(%r30) 89 ldw -12(%r30),%r1 90 ldw -16(%r30),%r28 91 add %r29,%r1,%r19 92 stw %r19,0(%r26) 93 addc %r0,%r28,%r28 94 bv 0(%r2) 95 ldo -64(%r30),%r30 96EPILOGUE() 97