1dnl AMD K6-2 mpn_com -- mpn bitwise one's complement. 2 3dnl Copyright 1999-2002 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33NAILS_SUPPORT(0-31) 34 35 36C alignment dst/src, A=0mod8 N=4mod8 37C A/A A/N N/A N/N 38C K6-2 1.0 1.18 1.18 1.18 cycles/limb 39C K6 1.5 1.85 1.75 1.85 40 41 42C void mpn_com (mp_ptr dst, mp_srcptr src, mp_size_t size); 43C 44C Take the bitwise ones-complement of src,size and write it to dst,size. 45 46defframe(PARAM_SIZE,12) 47defframe(PARAM_SRC, 8) 48defframe(PARAM_DST, 4) 49 50 TEXT 51 ALIGN(16) 52PROLOGUE(mpn_com) 53deflit(`FRAME',0) 54 55 movl PARAM_SIZE, %ecx 56 movl PARAM_SRC, %eax 57 movl PARAM_DST, %edx 58 shrl %ecx 59 jnz L(two_or_more) 60 61 movl (%eax), %eax 62 notl_or_xorl_GMP_NUMB_MASK( %eax) 63 movl %eax, (%edx) 64 ret 65 66 67L(two_or_more): 68 pushl %ebx FRAME_pushl() 69 pcmpeqd %mm7, %mm7 C all ones 70 71 movl %ecx, %ebx 72ifelse(GMP_NAIL_BITS,0,, 73` psrld $GMP_NAIL_BITS, %mm7') C clear nails 74 75 76 77 ALIGN(8) 78L(top): 79 C eax src 80 C ebx floor(size/2) 81 C ecx counter 82 C edx dst 83 C 84 C mm0 scratch 85 C mm7 mask 86 87 movq -8(%eax,%ecx,8), %mm0 88 pxor %mm7, %mm0 89 movq %mm0, -8(%edx,%ecx,8) 90 loop L(top) 91 92 93 jnc L(no_extra) 94 movl (%eax,%ebx,8), %eax 95 notl_or_xorl_GMP_NUMB_MASK( %eax) 96 movl %eax, (%edx,%ebx,8) 97L(no_extra): 98 99 popl %ebx 100 emms_or_femms 101 ret 102 103EPILOGUE() 104