1dnl AMD64 logops. 2 3dnl Copyright 2004, 2005, 2006, 2011, 2012 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of the GNU Lesser General Public License as published 9dnl by the Free Software Foundation; either version 3 of the License, or (at 10dnl your option) any later version. 11 12dnl The GNU MP Library is distributed in the hope that it will be useful, but 13dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 15dnl License for more details. 16 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 19 20include(`../config.m4') 21 22 23C cycles/limb 24C AMD K8,K9 1.5 with fluctuations for variant 2 and 3 25C AMD K10 1.5 with fluctuations for all variants 26C Intel P4 2.8/3.35/3.60 (variant1/variant2/variant3) 27C Intel core2 2 28C Intel NHM 2 29C Intel SBR 1.5/1.75/1.75 30C Intel atom 3.75 31C VIA nano 3.25 32 33ifdef(`OPERATION_and_n',` 34 define(`func',`mpn_and_n') 35 define(`VARIANT_1') 36 define(`LOGOP',`andq')') 37ifdef(`OPERATION_andn_n',` 38 define(`func',`mpn_andn_n') 39 define(`VARIANT_2') 40 define(`LOGOP',`andq')') 41ifdef(`OPERATION_nand_n',` 42 define(`func',`mpn_nand_n') 43 define(`VARIANT_3') 44 define(`LOGOP',`andq')') 45ifdef(`OPERATION_ior_n',` 46 define(`func',`mpn_ior_n') 47 define(`VARIANT_1') 48 define(`LOGOP',`orq')') 49ifdef(`OPERATION_iorn_n',` 50 define(`func',`mpn_iorn_n') 51 define(`VARIANT_2') 52 define(`LOGOP',`orq')') 53ifdef(`OPERATION_nior_n',` 54 define(`func',`mpn_nior_n') 55 define(`VARIANT_3') 56 define(`LOGOP',`orq')') 57ifdef(`OPERATION_xor_n',` 58 define(`func',`mpn_xor_n') 59 define(`VARIANT_1') 60 define(`LOGOP',`xorq')') 61ifdef(`OPERATION_xnor_n',` 62 define(`func',`mpn_xnor_n') 63 define(`VARIANT_2') 64 define(`LOGOP',`xorq')') 65 66 67MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n) 68 69C INPUT PARAMETERS 70define(`rp',`%rdi') 71define(`up',`%rsi') 72define(`vp',`%rdx') 73define(`n',`%rcx') 74 75ABI_SUPPORT(DOS64) 76ABI_SUPPORT(STD64) 77 78ASM_START() 79 80ifdef(`VARIANT_1',` 81 TEXT 82 ALIGN(32) 83PROLOGUE(func) 84 FUNC_ENTRY(4) 85 movq (vp), %r8 86 movl R32(%rcx), R32(%rax) 87 leaq (vp,n,8), vp 88 leaq (up,n,8), up 89 leaq (rp,n,8), rp 90 negq n 91 andl $3, R32(%rax) 92 je L(b00) 93 cmpl $2, R32(%rax) 94 jc L(b01) 95 je L(b10) 96 97L(b11): LOGOP (up,n,8), %r8 98 movq %r8, (rp,n,8) 99 decq n 100 jmp L(e11) 101L(b10): addq $-2, n 102 jmp L(e10) 103L(b01): LOGOP (up,n,8), %r8 104 movq %r8, (rp,n,8) 105 incq n 106 jz L(ret) 107 108L(oop): movq (vp,n,8), %r8 109L(b00): movq 8(vp,n,8), %r9 110 LOGOP (up,n,8), %r8 111 LOGOP 8(up,n,8), %r9 112 nop 113 movq %r8, (rp,n,8) 114 movq %r9, 8(rp,n,8) 115L(e11): movq 16(vp,n,8), %r8 116L(e10): movq 24(vp,n,8), %r9 117 LOGOP 16(up,n,8), %r8 118 LOGOP 24(up,n,8), %r9 119 movq %r8, 16(rp,n,8) 120 movq %r9, 24(rp,n,8) 121 addq $4, n 122 jnc L(oop) 123L(ret): FUNC_EXIT() 124 ret 125EPILOGUE() 126') 127 128ifdef(`VARIANT_2',` 129 TEXT 130 ALIGN(32) 131PROLOGUE(func) 132 FUNC_ENTRY(4) 133 movq (vp), %r8 134 notq %r8 135 movl R32(%rcx), R32(%rax) 136 leaq (vp,n,8), vp 137 leaq (up,n,8), up 138 leaq (rp,n,8), rp 139 negq n 140 andl $3, R32(%rax) 141 je L(b00) 142 cmpl $2, R32(%rax) 143 jc L(b01) 144 je L(b10) 145 146L(b11): LOGOP (up,n,8), %r8 147 movq %r8, (rp,n,8) 148 decq n 149 jmp L(e11) 150L(b10): addq $-2, n 151 jmp L(e10) 152 .byte 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90 153L(b01): LOGOP (up,n,8), %r8 154 movq %r8, (rp,n,8) 155 incq n 156 jz L(ret) 157 158L(oop): movq (vp,n,8), %r8 159 notq %r8 160L(b00): movq 8(vp,n,8), %r9 161 notq %r9 162 LOGOP (up,n,8), %r8 163 LOGOP 8(up,n,8), %r9 164 movq %r8, (rp,n,8) 165 movq %r9, 8(rp,n,8) 166L(e11): movq 16(vp,n,8), %r8 167 notq %r8 168L(e10): movq 24(vp,n,8), %r9 169 notq %r9 170 LOGOP 16(up,n,8), %r8 171 LOGOP 24(up,n,8), %r9 172 movq %r8, 16(rp,n,8) 173 movq %r9, 24(rp,n,8) 174 addq $4, n 175 jnc L(oop) 176L(ret): FUNC_EXIT() 177 ret 178EPILOGUE() 179') 180 181ifdef(`VARIANT_3',` 182 TEXT 183 ALIGN(32) 184PROLOGUE(func) 185 FUNC_ENTRY(4) 186 movq (vp), %r8 187 movl R32(%rcx), R32(%rax) 188 leaq (vp,n,8), vp 189 leaq (up,n,8), up 190 leaq (rp,n,8), rp 191 negq n 192 andl $3, R32(%rax) 193 je L(b00) 194 cmpl $2, R32(%rax) 195 jc L(b01) 196 je L(b10) 197 198L(b11): LOGOP (up,n,8), %r8 199 notq %r8 200 movq %r8, (rp,n,8) 201 decq n 202 jmp L(e11) 203L(b10): addq $-2, n 204 jmp L(e10) 205 .byte 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90 206L(b01): LOGOP (up,n,8), %r8 207 notq %r8 208 movq %r8, (rp,n,8) 209 incq n 210 jz L(ret) 211 212L(oop): movq (vp,n,8), %r8 213L(b00): movq 8(vp,n,8), %r9 214 LOGOP (up,n,8), %r8 215 notq %r8 216 LOGOP 8(up,n,8), %r9 217 notq %r9 218 movq %r8, (rp,n,8) 219 movq %r9, 8(rp,n,8) 220L(e11): movq 16(vp,n,8), %r8 221L(e10): movq 24(vp,n,8), %r9 222 LOGOP 16(up,n,8), %r8 223 notq %r8 224 LOGOP 24(up,n,8), %r9 225 notq %r9 226 movq %r8, 16(rp,n,8) 227 movq %r9, 24(rp,n,8) 228 addq $4, n 229 jnc L(oop) 230L(ret): FUNC_EXIT() 231 ret 232EPILOGUE() 233') 234