1dnl x86 mpn_mul_1 (for 386, 486, and Pentium Pro) -- Multiply a limb vector 2dnl with a limb and store the result in a second limb vector. 3 4dnl Copyright 1992, 1994, 1997-2002, 2005 Free Software Foundation, Inc. 5 6dnl This file is part of the GNU MP Library. 7dnl 8dnl The GNU MP Library is free software; you can redistribute it and/or modify 9dnl it under the terms of either: 10dnl 11dnl * the GNU Lesser General Public License as published by the Free 12dnl Software Foundation; either version 3 of the License, or (at your 13dnl option) any later version. 14dnl 15dnl or 16dnl 17dnl * the GNU General Public License as published by the Free Software 18dnl Foundation; either version 2 of the License, or (at your option) any 19dnl later version. 20dnl 21dnl or both in parallel, as here. 22dnl 23dnl The GNU MP Library is distributed in the hope that it will be useful, but 24dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26dnl for more details. 27dnl 28dnl You should have received copies of the GNU General Public License and the 29dnl GNU Lesser General Public License along with the GNU MP Library. If not, 30dnl see https://www.gnu.org/licenses/. 31 32include(`../config.m4') 33 34 35C cycles/limb 36C P5 12.5 37C P6 model 0-8,10-12 5.5 38C P6 model 9 (Banias) 39C P6 model 13 (Dothan) 5.25 40C P4 model 0 (Willamette) 19.0 41C P4 model 1 (?) 19.0 42C P4 model 2 (Northwood) 19.0 43C P4 model 3 (Prescott) 44C P4 model 4 (Nocona) 45C AMD K6 10.5 46C AMD K7 4.5 47C AMD K8 48 49 50C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, 51C mp_limb_t multiplier); 52 53defframe(PARAM_MULTIPLIER,16) 54defframe(PARAM_SIZE, 12) 55defframe(PARAM_SRC, 8) 56defframe(PARAM_DST, 4) 57 58 TEXT 59 ALIGN(8) 60PROLOGUE(mpn_mul_1) 61deflit(`FRAME',0) 62 63 pushl %edi 64 pushl %esi 65 pushl %ebx 66 pushl %ebp 67deflit(`FRAME',16) 68 69 movl PARAM_DST,%edi 70 movl PARAM_SRC,%esi 71 movl PARAM_SIZE,%ecx 72 73 xorl %ebx,%ebx 74 andl $3,%ecx 75 jz L(end0) 76 77L(oop0): 78 movl (%esi),%eax 79 mull PARAM_MULTIPLIER 80 leal 4(%esi),%esi 81 addl %ebx,%eax 82 movl $0,%ebx 83 adcl %ebx,%edx 84 movl %eax,(%edi) 85 movl %edx,%ebx C propagate carry into cylimb 86 87 leal 4(%edi),%edi 88 decl %ecx 89 jnz L(oop0) 90 91L(end0): 92 movl PARAM_SIZE,%ecx 93 shrl $2,%ecx 94 jz L(end) 95 96 97 ALIGN(8) 98L(oop): movl (%esi),%eax 99 mull PARAM_MULTIPLIER 100 addl %eax,%ebx 101 movl $0,%ebp 102 adcl %edx,%ebp 103 104 movl 4(%esi),%eax 105 mull PARAM_MULTIPLIER 106 movl %ebx,(%edi) 107 addl %eax,%ebp C new lo + cylimb 108 movl $0,%ebx 109 adcl %edx,%ebx 110 111 movl 8(%esi),%eax 112 mull PARAM_MULTIPLIER 113 movl %ebp,4(%edi) 114 addl %eax,%ebx C new lo + cylimb 115 movl $0,%ebp 116 adcl %edx,%ebp 117 118 movl 12(%esi),%eax 119 mull PARAM_MULTIPLIER 120 movl %ebx,8(%edi) 121 addl %eax,%ebp C new lo + cylimb 122 movl $0,%ebx 123 adcl %edx,%ebx 124 125 movl %ebp,12(%edi) 126 127 leal 16(%esi),%esi 128 leal 16(%edi),%edi 129 decl %ecx 130 jnz L(oop) 131 132L(end): movl %ebx,%eax 133 134 popl %ebp 135 popl %ebx 136 popl %esi 137 popl %edi 138 ret 139 140EPILOGUE() 141