1dnl AMD64 logops. 2 3dnl Copyright 2004-2006, 2011, 2012 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33 34C cycles/limb 35C AMD K8,K9 1.5 with fluctuations for variant 2 and 3 36C AMD K10 1.5 with fluctuations for all variants 37C Intel P4 2.8/3.35/3.60 (variant1/variant2/variant3) 38C Intel core2 2 39C Intel NHM 2 40C Intel SBR 1.5/1.75/1.75 41C Intel atom 3.75 42C VIA nano 3.25 43 44ifdef(`OPERATION_and_n',` 45 define(`func',`mpn_and_n') 46 define(`VARIANT_1') 47 define(`LOGOP',`andq')') 48ifdef(`OPERATION_andn_n',` 49 define(`func',`mpn_andn_n') 50 define(`VARIANT_2') 51 define(`LOGOP',`andq')') 52ifdef(`OPERATION_nand_n',` 53 define(`func',`mpn_nand_n') 54 define(`VARIANT_3') 55 define(`LOGOP',`andq')') 56ifdef(`OPERATION_ior_n',` 57 define(`func',`mpn_ior_n') 58 define(`VARIANT_1') 59 define(`LOGOP',`orq')') 60ifdef(`OPERATION_iorn_n',` 61 define(`func',`mpn_iorn_n') 62 define(`VARIANT_2') 63 define(`LOGOP',`orq')') 64ifdef(`OPERATION_nior_n',` 65 define(`func',`mpn_nior_n') 66 define(`VARIANT_3') 67 define(`LOGOP',`orq')') 68ifdef(`OPERATION_xor_n',` 69 define(`func',`mpn_xor_n') 70 define(`VARIANT_1') 71 define(`LOGOP',`xorq')') 72ifdef(`OPERATION_xnor_n',` 73 define(`func',`mpn_xnor_n') 74 define(`VARIANT_2') 75 define(`LOGOP',`xorq')') 76 77 78MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n) 79 80C INPUT PARAMETERS 81define(`rp',`%rdi') 82define(`up',`%rsi') 83define(`vp',`%rdx') 84define(`n',`%rcx') 85 86ABI_SUPPORT(DOS64) 87ABI_SUPPORT(STD64) 88 89ASM_START() 90 91ifdef(`VARIANT_1',` 92 TEXT 93 ALIGN(32) 94PROLOGUE(func) 95 FUNC_ENTRY(4) 96 movq (vp), %r8 97 movl R32(%rcx), R32(%rax) 98 leaq (vp,n,8), vp 99 leaq (up,n,8), up 100 leaq (rp,n,8), rp 101 negq n 102 andl $3, R32(%rax) 103 je L(b00) 104 cmpl $2, R32(%rax) 105 jc L(b01) 106 je L(b10) 107 108L(b11): LOGOP (up,n,8), %r8 109 movq %r8, (rp,n,8) 110 decq n 111 jmp L(e11) 112L(b10): addq $-2, n 113 jmp L(e10) 114L(b01): LOGOP (up,n,8), %r8 115 movq %r8, (rp,n,8) 116 incq n 117 jz L(ret) 118 119L(oop): movq (vp,n,8), %r8 120L(b00): movq 8(vp,n,8), %r9 121 LOGOP (up,n,8), %r8 122 LOGOP 8(up,n,8), %r9 123 nop 124 movq %r8, (rp,n,8) 125 movq %r9, 8(rp,n,8) 126L(e11): movq 16(vp,n,8), %r8 127L(e10): movq 24(vp,n,8), %r9 128 LOGOP 16(up,n,8), %r8 129 LOGOP 24(up,n,8), %r9 130 movq %r8, 16(rp,n,8) 131 movq %r9, 24(rp,n,8) 132 addq $4, n 133 jnc L(oop) 134L(ret): FUNC_EXIT() 135 ret 136EPILOGUE() 137') 138 139ifdef(`VARIANT_2',` 140 TEXT 141 ALIGN(32) 142PROLOGUE(func) 143 FUNC_ENTRY(4) 144 movq (vp), %r8 145 notq %r8 146 movl R32(%rcx), R32(%rax) 147 leaq (vp,n,8), vp 148 leaq (up,n,8), up 149 leaq (rp,n,8), rp 150 negq n 151 andl $3, R32(%rax) 152 je L(b00) 153 cmpl $2, R32(%rax) 154 jc L(b01) 155 je L(b10) 156 157L(b11): LOGOP (up,n,8), %r8 158 movq %r8, (rp,n,8) 159 decq n 160 jmp L(e11) 161L(b10): addq $-2, n 162 jmp L(e10) 163 .byte 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90 164L(b01): LOGOP (up,n,8), %r8 165 movq %r8, (rp,n,8) 166 incq n 167 jz L(ret) 168 169L(oop): movq (vp,n,8), %r8 170 notq %r8 171L(b00): movq 8(vp,n,8), %r9 172 notq %r9 173 LOGOP (up,n,8), %r8 174 LOGOP 8(up,n,8), %r9 175 movq %r8, (rp,n,8) 176 movq %r9, 8(rp,n,8) 177L(e11): movq 16(vp,n,8), %r8 178 notq %r8 179L(e10): movq 24(vp,n,8), %r9 180 notq %r9 181 LOGOP 16(up,n,8), %r8 182 LOGOP 24(up,n,8), %r9 183 movq %r8, 16(rp,n,8) 184 movq %r9, 24(rp,n,8) 185 addq $4, n 186 jnc L(oop) 187L(ret): FUNC_EXIT() 188 ret 189EPILOGUE() 190') 191 192ifdef(`VARIANT_3',` 193 TEXT 194 ALIGN(32) 195PROLOGUE(func) 196 FUNC_ENTRY(4) 197 movq (vp), %r8 198 movl R32(%rcx), R32(%rax) 199 leaq (vp,n,8), vp 200 leaq (up,n,8), up 201 leaq (rp,n,8), rp 202 negq n 203 andl $3, R32(%rax) 204 je L(b00) 205 cmpl $2, R32(%rax) 206 jc L(b01) 207 je L(b10) 208 209L(b11): LOGOP (up,n,8), %r8 210 notq %r8 211 movq %r8, (rp,n,8) 212 decq n 213 jmp L(e11) 214L(b10): addq $-2, n 215 jmp L(e10) 216 .byte 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90 217L(b01): LOGOP (up,n,8), %r8 218 notq %r8 219 movq %r8, (rp,n,8) 220 incq n 221 jz L(ret) 222 223L(oop): movq (vp,n,8), %r8 224L(b00): movq 8(vp,n,8), %r9 225 LOGOP (up,n,8), %r8 226 notq %r8 227 LOGOP 8(up,n,8), %r9 228 notq %r9 229 movq %r8, (rp,n,8) 230 movq %r9, 8(rp,n,8) 231L(e11): movq 16(vp,n,8), %r8 232L(e10): movq 24(vp,n,8), %r9 233 LOGOP 16(up,n,8), %r8 234 notq %r8 235 LOGOP 24(up,n,8), %r9 236 notq %r9 237 movq %r8, 16(rp,n,8) 238 movq %r9, 24(rp,n,8) 239 addq $4, n 240 jnc L(oop) 241L(ret): FUNC_EXIT() 242 ret 243EPILOGUE() 244') 245