1dnl Intel Atom mpn_add_n/mpn_sub_n -- rp[] = up[] +- vp[]. 2 3dnl Copyright 2011 Free Software Foundation, Inc. 4 5dnl Contributed to the GNU project by Marco Bodrato. 6 7dnl This file is part of the GNU MP Library. 8 9dnl The GNU MP Library is free software; you can redistribute it and/or modify 10dnl it under the terms of the GNU Lesser General Public License as published 11dnl by the Free Software Foundation; either version 3 of the License, or (at 12dnl your option) any later version. 13 14dnl The GNU MP Library is distributed in the hope that it will be useful, but 15dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 16dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 17dnl License for more details. 18 19dnl You should have received a copy of the GNU Lesser General Public License 20dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 21 22include(`../config.m4') 23 24C cycles/limb 25C P5 26C P6 model 0-8,10-12 27C P6 model 9 (Banias) 28C P6 model 13 (Dothan) 29C P4 model 0 (Willamette) 30C P4 model 1 (?) 31C P4 model 2 (Northwood) 32C P4 model 3 (Prescott) 33C P4 model 4 (Nocona) 34C Intel Atom 3 35C AMD K6 36C AMD K7 37C AMD K8 38C AMD K10 39 40ifdef(`OPERATION_add_n', ` 41 define(M4_inst, adcl) 42 define(M4_function_n, mpn_add_n) 43 define(M4_function_nc, mpn_add_nc) 44 define(M4_description, add) 45',`ifdef(`OPERATION_sub_n', ` 46 define(M4_inst, sbbl) 47 define(M4_function_n, mpn_sub_n) 48 define(M4_function_nc, mpn_sub_nc) 49 define(M4_description, subtract) 50',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n 51')')') 52 53MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc) 54 55C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, 56C mp_size_t size); 57C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, 58C mp_size_t size, mp_limb_t carry); 59C 60C Calculate src1,size M4_description src2,size, and store the result in 61C dst,size. The return value is the carry bit from the top of the result (1 62C or 0). 63C 64C The _nc version accepts 1 or 0 for an initial carry into the low limb of 65C the calculation. Note values other than 1 or 0 here will lead to garbage 66C results. 67 68defframe(PARAM_CARRY,20) 69defframe(PARAM_SIZE, 16) 70defframe(PARAM_SRC2, 12) 71defframe(PARAM_SRC1, 8) 72defframe(PARAM_DST, 4) 73 74dnl re-use parameter space 75define(SAVE_RP,`PARAM_SIZE') 76define(SAVE_VP,`PARAM_SRC1') 77define(SAVE_UP,`PARAM_DST') 78 79define(`rp', `%edi') 80define(`up', `%esi') 81define(`vp', `%ebx') 82define(`cy', `%ecx') 83define(`r1', `%ecx') 84define(`r2', `%edx') 85 86ASM_START() 87 TEXT 88 ALIGN(16) 89deflit(`FRAME',0) 90 91PROLOGUE(M4_function_n) 92 xor cy, cy C carry 93L(start): 94 mov PARAM_SIZE, %eax C size 95 mov rp, SAVE_RP 96 mov PARAM_DST, rp 97 mov up, SAVE_UP 98 mov PARAM_SRC1, up 99 shr %eax C size >> 1 100 mov vp, SAVE_VP 101 mov PARAM_SRC2, vp 102 jz L(one) C size == 1 103 jc L(three) C size % 2 == 1 104 105 shr cy 106 mov (up), r2 107 lea 4(up), up 108 lea 4(vp), vp 109 lea -4(rp), rp 110 jmp L(entry) 111L(one): 112 shr cy 113 mov (up), r1 114 jmp L(end) 115L(three): 116 shr cy 117 mov (up), r1 118 119 ALIGN(16) 120L(oop): 121 M4_inst (vp), r1 122 lea 8(up), up 123 mov -4(up), r2 124 lea 8(vp), vp 125 mov r1, (rp) 126L(entry): 127 M4_inst -4(vp), r2 128 lea 8(rp), rp 129 dec %eax 130 mov (up), r1 131 mov r2, -4(rp) 132 jnz L(oop) 133 134L(end): C %eax is zero here 135 mov SAVE_UP, up 136 M4_inst (vp), r1 137 mov SAVE_VP, vp 138 mov r1, (rp) 139 adc %eax, %eax 140 mov SAVE_RP, rp 141 ret 142EPILOGUE() 143 144PROLOGUE(M4_function_nc) 145 mov PARAM_CARRY, cy C carry 146 jmp L(start) 147EPILOGUE() 148ASM_END() 149