1dnl Intel Pentium-4 mpn_sub_n -- mpn subtraction. 2 3dnl Copyright 2001, 2002 Free Software Foundation, Inc. 4dnl 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or 8dnl modify it under the terms of the GNU Lesser General Public License as 9dnl published by the Free Software Foundation; either version 3 of the 10dnl License, or (at your option) any later version. 11dnl 12dnl The GNU MP Library is distributed in the hope that it will be useful, 13dnl but WITHOUT ANY WARRANTY; without even the implied warranty of 14dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15dnl Lesser General Public License for more details. 16dnl 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 19 20include(`../config.m4') 21 22 23C cycles/limb 24C dst!=src1,2 dst==src1 dst==src2 25C P6 model 0-8,10-12 - 26C P6 model 9 (Banias) ? 27C P6 model 13 (Dothan) ? 28C P4 model 0-1 (Willamette) ? 29C P4 model 2 (Northwood) 4 6 6 30C P4 model 3-4 (Prescott) 4.25 7.5 7.5 31 32defframe(PARAM_CARRY,20) 33defframe(PARAM_SIZE, 16) 34defframe(PARAM_SRC2, 12) 35defframe(PARAM_SRC1, 8) 36defframe(PARAM_DST, 4) 37 38dnl re-use parameter space 39define(SAVE_EBX,`PARAM_SRC1') 40 41 TEXT 42 ALIGN(8) 43 44PROLOGUE(mpn_sub_nc) 45deflit(`FRAME',0) 46 movd PARAM_CARRY, %mm0 47 jmp L(start_nc) 48EPILOGUE() 49 50 ALIGN(8) 51PROLOGUE(mpn_sub_n) 52deflit(`FRAME',0) 53 pxor %mm0, %mm0 54L(start_nc): 55 mov PARAM_SRC1, %eax 56 mov %ebx, SAVE_EBX 57 mov PARAM_SRC2, %ebx 58 mov PARAM_DST, %edx 59 mov PARAM_SIZE, %ecx 60 61 lea (%eax,%ecx,4), %eax C src1 end 62 lea (%ebx,%ecx,4), %ebx C src2 end 63 lea (%edx,%ecx,4), %edx C dst end 64 neg %ecx C -size 65 66L(top): 67 C eax src1 end 68 C ebx src2 end 69 C ecx counter, limbs, negative 70 C edx dst end 71 C mm0 carry bit 72 73 movd (%eax,%ecx,4), %mm1 74 movd (%ebx,%ecx,4), %mm2 75 psubq %mm2, %mm1 76 77 psubq %mm0, %mm1 78 movd %mm1, (%edx,%ecx,4) 79 80 psrlq $63, %mm1 81 82 add $1, %ecx 83 jz L(done_mm1) 84 85 movd (%eax,%ecx,4), %mm0 86 movd (%ebx,%ecx,4), %mm2 87 psubq %mm2, %mm0 88 89 psubq %mm1, %mm0 90 movd %mm0, (%edx,%ecx,4) 91 92 psrlq $63, %mm0 93 94 add $1, %ecx 95 jnz L(top) 96 97 movd %mm0, %eax 98 mov SAVE_EBX, %ebx 99 emms 100 ret 101 102L(done_mm1): 103 movd %mm1, %eax 104 mov SAVE_EBX, %ebx 105 emms 106 ret 107 108EPILOGUE() 109