1dnl Intel Pentium-4 mpn_add_n -- mpn addition. 2 3dnl Copyright 2001, 2002 Free Software Foundation, Inc. 4dnl 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or 8dnl modify it under the terms of the GNU Lesser General Public License as 9dnl published by the Free Software Foundation; either version 3 of the 10dnl License, or (at your option) any later version. 11dnl 12dnl The GNU MP Library is distributed in the hope that it will be useful, 13dnl but WITHOUT ANY WARRANTY; without even the implied warranty of 14dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15dnl Lesser General Public License for more details. 16dnl 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 19 20include(`../config.m4') 21 22 23C cycles/limb 24C dst!=src1,2 dst==src1 dst==src2 25C P6 model 0-8,10-12 - 26C P6 model 9 (Banias) ? 27C P6 model 13 (Dothan) ? 28C P4 model 0-1 (Willamette) ? 29C P4 model 2 (Northwood) 4 6 6 30C P4 model 3-4 (Prescott) 4.25 7.5 7.5 31 32defframe(PARAM_CARRY,20) 33defframe(PARAM_SIZE, 16) 34defframe(PARAM_SRC2, 12) 35defframe(PARAM_SRC1, 8) 36defframe(PARAM_DST, 4) 37 38dnl re-use parameter space 39define(SAVE_EBX,`PARAM_SRC1') 40 41 TEXT 42 ALIGN(8) 43 44PROLOGUE(mpn_add_nc) 45deflit(`FRAME',0) 46 movd PARAM_CARRY, %mm0 47 jmp L(start_nc) 48EPILOGUE() 49 50 ALIGN(8) 51PROLOGUE(mpn_add_n) 52deflit(`FRAME',0) 53 pxor %mm0, %mm0 54L(start_nc): 55 mov PARAM_SRC1, %eax 56 mov %ebx, SAVE_EBX 57 mov PARAM_SRC2, %ebx 58 mov PARAM_DST, %edx 59 mov PARAM_SIZE, %ecx 60 61 lea (%eax,%ecx,4), %eax C src1 end 62 lea (%ebx,%ecx,4), %ebx C src2 end 63 lea (%edx,%ecx,4), %edx C dst end 64 neg %ecx C -size 65 66L(top): 67 C eax src1 end 68 C ebx src2 end 69 C ecx counter, limbs, negative 70 C edx dst end 71 C mm0 carry bit 72 73 movd (%eax,%ecx,4), %mm1 74 movd (%ebx,%ecx,4), %mm2 75 paddq %mm2, %mm1 76 77 paddq %mm1, %mm0 78 movd %mm0, (%edx,%ecx,4) 79 80 psrlq $32, %mm0 81 82 add $1, %ecx 83 jnz L(top) 84 85 movd %mm0, %eax 86 mov SAVE_EBX, %ebx 87 emms 88 ret 89 90EPILOGUE() 91