1dnl X86 MMX mpn_sec_tabselect. 2 3dnl Contributed to the GNU project by Torbjörn Granlund. 4 5dnl Copyright 2011-2013 Free Software Foundation, Inc. 6 7dnl This file is part of the GNU MP Library. 8dnl 9dnl The GNU MP Library is free software; you can redistribute it and/or modify 10dnl it under the terms of either: 11dnl 12dnl * the GNU Lesser General Public License as published by the Free 13dnl Software Foundation; either version 3 of the License, or (at your 14dnl option) any later version. 15dnl 16dnl or 17dnl 18dnl * the GNU General Public License as published by the Free Software 19dnl Foundation; either version 2 of the License, or (at your option) any 20dnl later version. 21dnl 22dnl or both in parallel, as here. 23dnl 24dnl The GNU MP Library is distributed in the hope that it will be useful, but 25dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27dnl for more details. 28dnl 29dnl You should have received copies of the GNU General Public License and the 30dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31dnl see https://www.gnu.org/licenses/. 32 33include(`../config.m4') 34 35C cycles/limb cycles/limb 36C ali,evn n unal,evn n 37C P5 38C P6 model 0-8,10-12 39C P6 model 9 (Banias) 40C P6 model 13 (Dothan) 1.33 1.87 41C P4 model 0 (Willamette) 42C P4 model 1 (?) 43C P4 model 2 (Northwood) 2.1 2.63 44C P4 model 3 (Prescott) 45C P4 model 4 (Nocona) 1.7 2.57 46C Intel Atom 1.85 2.7 47C AMD K6 48C AMD K7 1.33 1.33 49C AMD K8 50C AMD K10 51 52define(`rp', `%edi') 53define(`tp', `%esi') 54define(`n', `%edx') 55define(`nents', `%ecx') 56define(`which', `') 57 58define(`i', `%ebp') 59define(`j', `%ebx') 60 61ASM_START() 62 TEXT 63 ALIGN(16) 64PROLOGUE(mpn_sec_tabselect) 65 push %ebx 66 push %esi 67 push %edi 68 push %ebp 69 70 mov 20(%esp), rp 71 mov 24(%esp), tp 72 mov 28(%esp), n 73 mov 32(%esp), nents 74 75 movd 36(%esp), %mm6 76 punpckldq %mm6, %mm6 C 2 copies of `which' 77 78 mov $1, %ebx 79 movd %ebx, %mm7 80 punpckldq %mm7, %mm7 C 2 copies of 1 81 82 mov n, j 83 add $-4, j 84 js L(outer_end) 85 86L(outer_top): 87 mov nents, i 88 mov tp, %eax 89 pxor %mm1, %mm1 90 pxor %mm4, %mm4 91 pxor %mm5, %mm5 92 ALIGN(16) 93L(top): movq %mm6, %mm0 94 pcmpeqd %mm1, %mm0 95 paddd %mm7, %mm1 96 movq (tp), %mm2 97 movq 8(tp), %mm3 98 pand %mm0, %mm2 99 pand %mm0, %mm3 100 por %mm2, %mm4 101 por %mm3, %mm5 102 lea (tp,n,4), tp 103 add $-1, i 104 jne L(top) 105 106 movq %mm4, (rp) 107 movq %mm5, 8(rp) 108 109 lea 16(%eax), tp 110 lea 16(rp), rp 111 add $-4, j 112 jns L(outer_top) 113L(outer_end): 114 115 test $2, %dl 116 jz L(b0x) 117 118L(b1x): mov nents, i 119 mov tp, %eax 120 pxor %mm1, %mm1 121 pxor %mm4, %mm4 122 ALIGN(16) 123L(tp2): movq %mm6, %mm0 124 pcmpeqd %mm1, %mm0 125 paddd %mm7, %mm1 126 movq (tp), %mm2 127 pand %mm0, %mm2 128 por %mm2, %mm4 129 lea (tp,n,4), tp 130 add $-1, i 131 jne L(tp2) 132 133 movq %mm4, (rp) 134 135 lea 8(%eax), tp 136 lea 8(rp), rp 137 138L(b0x): test $1, %dl 139 jz L(b00) 140 141L(b01): mov nents, i 142 pxor %mm1, %mm1 143 pxor %mm4, %mm4 144 ALIGN(16) 145L(tp1): movq %mm6, %mm0 146 pcmpeqd %mm1, %mm0 147 paddd %mm7, %mm1 148 movd (tp), %mm2 149 pand %mm0, %mm2 150 por %mm2, %mm4 151 lea (tp,n,4), tp 152 add $-1, i 153 jne L(tp1) 154 155 movd %mm4, (rp) 156 157L(b00): pop %ebp 158 pop %edi 159 pop %esi 160 pop %ebx 161 emms 162 ret 163EPILOGUE() 164