1//===---- X86InstrAMX.td - AMX Instruction Set Extension --*- tablegen -*--===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file describes the instructions that make up the Intel AMX instruction 10// set. 11// 12//===----------------------------------------------------------------------===// 13 14//===----------------------------------------------------------------------===// 15// AMX instructions 16 17multiclass AMX_TILE_COMMON<string Suffix, Predicate HasEGPR> { 18let Predicates = [HasAMXTILE, HasEGPR, In64BitMode] in { 19 let hasSideEffects = 1, 20 Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in 21 def LDTILECFG#Suffix : I<0x49, MRM0m, (outs), (ins opaquemem:$src), 22 "ldtilecfg\t$src", 23 [(int_x86_ldtilecfg addr:$src)]>, 24 T8, PS; 25 let hasSideEffects = 1 in 26 def STTILECFG#Suffix : I<0x49, MRM0m, (outs), (ins opaquemem:$src), 27 "sttilecfg\t$src", 28 [(int_x86_sttilecfg addr:$src)]>, 29 T8, PD; 30 let mayLoad = 1 in 31 def TILELOADD#Suffix : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst), 32 (ins sibmem:$src), 33 "tileloadd\t{$src, $dst|$dst, $src}", []>, 34 T8, XD; 35 let mayLoad = 1 in 36 def TILELOADDT1#Suffix : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst), 37 (ins sibmem:$src), 38 "tileloaddt1\t{$src, $dst|$dst, $src}", []>, 39 T8, PD; 40 let mayStore = 1 in 41 def TILESTORED#Suffix : I<0x4b, MRMDestMemFSIB, (outs), 42 (ins sibmem:$dst, TILE:$src), 43 "tilestored\t{$src, $dst|$dst, $src}", []>, 44 T8, XS; 45} 46} 47 48let SchedRW = [WriteSystem] in { 49 defm "" : AMX_TILE_COMMON<"", NoEGPR>, VEX; 50 defm "" : AMX_TILE_COMMON<"_EVEX", HasEGPR>, EVEX, NoCD8; 51 52 let Predicates = [HasAMXTILE, In64BitMode] in { 53 let Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in 54 def TILERELEASE : I<0x49, MRM_C0, (outs), (ins), 55 "tilerelease", [(int_x86_tilerelease)]>, VEX, T8, PS; 56 def TILEZERO : I<0x49, MRMr0, (outs TILE:$dst), (ins), 57 "tilezero\t$dst", []>, 58 VEX, T8, XD; 59 60 // Pseduo instruction for RA. 61 let isPseudo = true, mayLoad = 1, hasSideEffects = 1, 62 Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in 63 def PLDTILECFGV : PseudoI<(outs), (ins opaquemem:$src), []>; 64 let isPseudo = true, mayLoad = 1 in 65 def PTILELOADDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, 66 GR16:$src2, 67 opaquemem:$src3), []>; 68 let isPseudo = true, mayLoad = 1 in 69 def PTILELOADDT1V : PseudoI<(outs TILE:$dst), (ins GR16:$src1, 70 GR16:$src2, 71 opaquemem:$src3), []>; 72 let isPseudo = true, mayStore = 1 in 73 def PTILESTOREDV : PseudoI<(outs), (ins GR16:$src1, 74 GR16:$src2, opaquemem:$src3, 75 TILE:$src4), []>; 76 let isPseudo = true, isReMaterializable = 1, isAsCheapAsAMove = 1, 77 canFoldAsLoad = 1, usesCustomInserter = 1 in 78 def PTILEZEROV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2), 79 [(set TILE:$dst, (int_x86_tilezero_internal 80 GR16:$src1, GR16:$src2))]>; 81 82 let usesCustomInserter = 1 in { 83 // Pseudo instructions, using immediates instead of tile registers. 84 // To be translated to the actual instructions in X86ISelLowering.cpp 85 let mayLoad = 1 in 86 def PTILELOADD : PseudoI<(outs), (ins u8imm:$src1, sibmem:$src2), []>; 87 let mayLoad = 1 in 88 def PTILELOADDT1 : PseudoI<(outs), (ins u8imm:$src1, 89 sibmem:$src2), []>; 90 let mayStore = 1 in 91 def PTILESTORED : PseudoI<(outs), (ins i8mem:$dst, u8imm:$src), []>; 92 def PTILEZERO : PseudoI<(outs), (ins u8imm:$src), 93 [(int_x86_tilezero timm:$src)]>; 94 } 95 } // Predicates 96} // SchedRW 97 98let Predicates = [HasAMXINT8, In64BitMode] in { 99 let SchedRW = [WriteSystem] in { 100 let Constraints = "$src1 = $dst" in { 101 def TDPBSSD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst), 102 (ins TILE:$src1, TILE:$src2, TILE:$src3), 103 "tdpbssd\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>, 104 VEX, VVVV, T8, XD; 105 def TDPBSUD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst), 106 (ins TILE:$src1, TILE:$src2, TILE:$src3), 107 "tdpbsud\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>, 108 VEX, VVVV, T8, XS; 109 def TDPBUSD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst), 110 (ins TILE:$src1, TILE:$src2, TILE:$src3), 111 "tdpbusd\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>, 112 VEX, VVVV, T8, PD; 113 def TDPBUUD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst), 114 (ins TILE:$src1, TILE:$src2, TILE:$src3), 115 "tdpbuud\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>, 116 VEX, VVVV, T8; 117 } 118 119 // Pseduo instruction for RA. 120 let isPseudo = true, Constraints = "$src4 = $dst" in { 121 def PTDPBSSDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, 122 GR16:$src2, GR16:$src3, TILE:$src4, 123 TILE:$src5, TILE:$src6), 124 [(set TILE: $dst, 125 (int_x86_tdpbssd_internal GR16:$src1, GR16:$src2, 126 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; 127 def PTDPBSUDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1, 128 GR16:$src2, GR16:$src3, TILE:$src4, 129 TILE:$src5, TILE:$src6), 130 [(set TILE: $dst, 131 (int_x86_tdpbsud_internal GR16:$src1, GR16:$src2, 132 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; 133 def PTDPBUSDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1, 134 GR16:$src2, GR16:$src3, TILE:$src4, 135 TILE:$src5, TILE:$src6), 136 [(set TILE: $dst, 137 (int_x86_tdpbusd_internal GR16:$src1, GR16:$src2, 138 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; 139 def PTDPBUUDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1, 140 GR16:$src2, GR16:$src3, TILE:$src4, 141 TILE:$src5, TILE:$src6), 142 [(set TILE: $dst, 143 (int_x86_tdpbuud_internal GR16:$src1, GR16:$src2, 144 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; 145 } 146 147 let usesCustomInserter = 1 in { 148 // Pseudo instructions, using immediates instead of tile registers. 149 // To be translated to the actual instructions in X86ISelLowering.cpp 150 def PTDPBSSD : PseudoI<(outs), (ins u8imm:$src1, 151 u8imm:$src2, u8imm:$src3), 152 [(int_x86_tdpbssd timm:$src1, 153 timm:$src2, timm:$src3)]>; 154 def PTDPBSUD : PseudoI<(outs), (ins u8imm:$src1, 155 u8imm:$src2, u8imm:$src3), 156 [(int_x86_tdpbsud timm:$src1, 157 timm:$src2, timm:$src3)]>; 158 def PTDPBUSD : PseudoI<(outs), (ins u8imm:$src1, 159 u8imm:$src2, u8imm:$src3), 160 [(int_x86_tdpbusd timm:$src1, 161 timm:$src2, timm:$src3)]>; 162 def PTDPBUUD : PseudoI<(outs), (ins u8imm:$src1, 163 u8imm:$src2, u8imm:$src3), 164 [(int_x86_tdpbuud timm:$src1, 165 timm:$src2, timm:$src3)]>; 166 } 167 } 168} // HasAMXTILE 169 170let Predicates = [HasAMXBF16, In64BitMode] in { 171 let SchedRW = [WriteSystem] in { 172 let Constraints = "$src1 = $dst" in 173 def TDPBF16PS : I<0x5c, MRMSrcReg4VOp3, (outs TILE:$dst), 174 (ins TILE:$src1, TILE:$src2, TILE:$src3), 175 "tdpbf16ps\t{$src3, $src2, $dst|$dst, $src2, $src3}", 176 []>, VEX, VVVV, T8, XS; 177 178 // Pseduo instruction for RA. 179 let isPseudo = true, Constraints = "$src4 = $dst" in 180 def PTDPBF16PSV : PseudoI<(outs TILE: $dst), (ins GR16:$src1, 181 GR16:$src2, GR16:$src3, TILE:$src4, 182 TILE:$src5, TILE:$src6), 183 [(set TILE: $dst, 184 (int_x86_tdpbf16ps_internal GR16:$src1, 185 GR16:$src2, GR16:$src3, TILE:$src4, 186 TILE:$src5, TILE:$src6))]>; 187 188 let usesCustomInserter = 1 in { 189 // Pseudo instructions, using immediates instead of tile registers. 190 // To be translated to the actual instructions in X86ISelLowering.cpp 191 def PTDPBF16PS : PseudoI<(outs), (ins u8imm:$src1, 192 u8imm:$src2, u8imm:$src3), 193 [(int_x86_tdpbf16ps timm:$src1, 194 timm:$src2, timm:$src3)]>; 195 } 196 } 197} // HasAMXTILE, HasAMXBF16 198 199//AMX-FP16 200let Predicates = [HasAMXFP16, In64BitMode] in { 201 let SchedRW = [WriteSystem] in { 202 let Constraints = "$src1 = $dst" in { 203 def TDPFP16PS : I<0x5c, MRMSrcReg4VOp3, (outs TILE:$dst), 204 (ins TILE:$src1, TILE:$src2, TILE:$src3), 205 "tdpfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}", 206 []>, VEX, VVVV, T8, XD; 207 } 208 209 // Pseduo instruction for RA. 210 let isPseudo = true, Constraints = "$src4 = $dst" in { 211 def PTDPFP16PSV : PseudoI<(outs TILE: $dst), (ins GR16:$src1, 212 GR16:$src2, GR16:$src3, TILE:$src4, 213 TILE:$src5, TILE:$src6), 214 [(set TILE: $dst, 215 (int_x86_tdpfp16ps_internal GR16:$src1, 216 GR16:$src2, GR16:$src3, TILE:$src4, 217 TILE:$src5, TILE:$src6))]>; 218 } 219 220 let usesCustomInserter = 1 in { 221 def PTDPFP16PS : PseudoI<(outs), (ins u8imm:$src1, 222 u8imm:$src2, u8imm:$src3), 223 [(int_x86_tdpfp16ps timm:$src1, 224 timm:$src2, timm:$src3)]>; 225 } 226 } 227} // HasAMXTILE, HasAMXFP16 228 229let Predicates = [HasAMXCOMPLEX, In64BitMode] in { 230 let SchedRW = [WriteSystem] in { 231 let Constraints = "$src1 = $dst" in { 232 def TCMMIMFP16PS : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst), 233 (ins TILE:$src1, TILE:$src2, TILE:$src3), 234 "tcmmimfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}", 235 []>, T8, PD, VEX, VVVV; 236 def TCMMRLFP16PS : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst), 237 (ins TILE:$src1, TILE:$src2, TILE:$src3), 238 "tcmmrlfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}", 239 []>, VEX, VVVV, WIG, T8; 240 241 } // Constraints = "$src1 = $dst" 242 243 let Constraints = "$src4 = $dst" in { 244 def PTCMMIMFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, 245 GR16:$src2, GR16:$src3, TILE:$src4, 246 TILE:$src5, TILE:$src6), 247 [(set TILE: $dst, 248 (int_x86_tcmmimfp16ps_internal GR16:$src1, GR16:$src2, 249 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; 250 def PTCMMRLFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, 251 GR16:$src2, GR16:$src3, TILE:$src4, 252 TILE:$src5, TILE:$src6), 253 [(set TILE: $dst, 254 (int_x86_tcmmrlfp16ps_internal GR16:$src1, GR16:$src2, 255 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; 256 } 257 258 let usesCustomInserter = 1 in { 259 def PTCMMIMFP16PS : PseudoI<(outs), (ins u8imm:$src1, 260 u8imm:$src2, u8imm:$src3), 261 [(int_x86_tcmmimfp16ps timm:$src1, 262 timm:$src2, timm:$src3)]>; 263 def PTCMMRLFP16PS : PseudoI<(outs), (ins u8imm:$src1, 264 u8imm:$src2, u8imm:$src3), 265 [(int_x86_tcmmrlfp16ps timm:$src1, 266 timm:$src2, timm:$src3)]>; 267 } 268 } // SchedRW = [WriteSystem] 269} 270 271// AMX-FP8 272let Predicates = [HasAMXFP8, In64BitMode] in { 273 let SchedRW = [WriteSystem] in { 274 let Constraints = "$src1 = $dst" in { 275 class AMX_FP8_BASE<bits<8> Opcode, string Opstr> : 276 I<Opcode, MRMSrcReg4VOp3, (outs TILE:$dst), 277 (ins TILE:$src1, TILE:$src2, TILE:$src3), 278 !strconcat(Opstr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 279 []>, VEX, VVVV; 280 } 281 282 def TDPBF8PS : AMX_FP8_BASE<0xfd, "tdpbf8ps">, T_MAP5, PS; 283 def TDPBHF8PS : AMX_FP8_BASE<0xfd, "tdpbhf8ps">, T_MAP5, XD; 284 def TDPHBF8PS : AMX_FP8_BASE<0xfd, "tdphbf8ps">, T_MAP5, XS; 285 def TDPHF8PS : AMX_FP8_BASE<0xfd, "tdphf8ps">, T_MAP5, PD; 286 287 let usesCustomInserter = 1 in { 288 // Pseudo instructions, using immediates instead of tile registers. 289 // To be translated to the actual instructions in X86ISelLowering.cpp 290 def PTDPBF8PS : PseudoI<(outs), 291 (ins u8imm:$src1, u8imm:$src2, u8imm:$src3), 292 [(int_x86_tdpbf8ps timm:$src1, timm:$src2, 293 timm:$src3)]>; 294 def PTDPBHF8PS : PseudoI<(outs), 295 (ins u8imm:$src1, u8imm:$src2, u8imm:$src3), 296 [(int_x86_tdpbhf8ps timm:$src1, timm:$src2, 297 timm:$src3)]>; 298 def PTDPHBF8PS : PseudoI<(outs), 299 (ins u8imm:$src1, u8imm:$src2, u8imm:$src3), 300 [(int_x86_tdphbf8ps timm:$src1, timm:$src2, 301 timm:$src3)]>; 302 def PTDPHF8PS : PseudoI<(outs), 303 (ins u8imm:$src1, u8imm:$src2, u8imm:$src3), 304 [(int_x86_tdphf8ps timm:$src1, timm:$src2, 305 timm:$src3)]>; 306 } 307 308 let Constraints = "$src4 = $dst" in { 309 def PTDPBF8PSV : PseudoI<(outs TILE:$dst), 310 (ins GR16:$src1, GR16:$src2, GR16:$src3, 311 TILE:$src4, TILE:$src5, TILE:$src6), 312 [(set TILE:$dst, 313 (int_x86_tdpbf8ps_internal GR16:$src1, 314 GR16:$src2, GR16:$src3, TILE:$src4, 315 TILE:$src5, TILE:$src6))]>; 316 def PTDPBHF8PSV : PseudoI<(outs TILE:$dst), 317 (ins GR16:$src1, GR16:$src2, GR16:$src3, 318 TILE:$src4, TILE:$src5, TILE:$src6), 319 [(set TILE:$dst, 320 (int_x86_tdpbhf8ps_internal GR16:$src1, 321 GR16:$src2, GR16:$src3, TILE:$src4, 322 TILE:$src5, TILE:$src6))]>; 323 def PTDPHBF8PSV : PseudoI<(outs TILE:$dst), 324 (ins GR16:$src1, GR16:$src2, GR16:$src3, 325 TILE:$src4, TILE:$src5, TILE:$src6), 326 [(set TILE:$dst, 327 (int_x86_tdphbf8ps_internal GR16:$src1, 328 GR16:$src2, GR16:$src3, TILE:$src4, 329 TILE:$src5, TILE:$src6))]>; 330 def PTDPHF8PSV : PseudoI<(outs TILE:$dst), 331 (ins GR16:$src1, GR16:$src2, GR16:$src3, 332 TILE:$src4, TILE:$src5, TILE:$src6), 333 [(set TILE:$dst, 334 (int_x86_tdphf8ps_internal GR16:$src1, 335 GR16:$src2, GR16:$src3, TILE:$src4, 336 TILE:$src5, TILE:$src6))]>; 337 } 338 } 339} 340 341let Predicates = [HasAMXTILE, In64BitMode], isPseudo = true, SchedRW = [WriteSystem] in { 342 let mayStore = 1 in 343 def PTILEPAIRSTORE : PseudoI<(outs), (ins opaquemem:$src1, TILEPair:$src2), []>; 344 let mayLoad = 1 in 345 def PTILEPAIRLOAD : PseudoI<(outs TILEPair:$dst), (ins opaquemem:$src), []>; 346} 347 348multiclass T2RPNTLVW_Base<bits<8> op1, bits<8> op2, string rs, string suffix> { 349 def Z0#rs#suffix : I<op1, MRMSrcMemFSIB, (outs TILEPair:$dst), (ins sibmem:$src), 350 "t2rpntlvwz0" #!tolower(rs)# "\t{$src, $dst|$dst, $src}", []>, PS; 351 def Z0#rs#T1#suffix : I<op2, MRMSrcMemFSIB, (outs TILEPair:$dst), (ins sibmem:$src), 352 "t2rpntlvwz0" #!tolower(rs)# "t1\t{$src, $dst|$dst, $src}", []>, PS; 353 def Z1#rs#suffix : I<op1, MRMSrcMemFSIB, (outs TILEPair:$dst), (ins sibmem:$src), 354 "t2rpntlvwz1" #!tolower(rs)# "\t{$src, $dst|$dst, $src}", []>, PD; 355 def Z1#rs#T1#suffix : I<op2, MRMSrcMemFSIB, (outs TILEPair:$dst), (ins sibmem:$src), 356 "t2rpntlvwz1" #!tolower(rs)# "t1\t{$src, $dst|$dst, $src}", []>, PD; 357} 358 359let Predicates = [HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in 360 defm T2RPNTLVW : T2RPNTLVW_Base<0x6e, 0x6f, "", "">, T8, VEX; 361 362let Predicates = [HasAMXTRANSPOSE, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in 363 defm T2RPNTLVW : T2RPNTLVW_Base<0x6e, 0x6f, "", "_EVEX">, T8, EVEX, NoCD8; 364 365let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in 366 defm T2RPNTLVW : T2RPNTLVW_Base<0xf8, 0xf9, "RS", "">, T_MAP5, VEX; 367 368let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in 369 defm T2RPNTLVW : T2RPNTLVW_Base<0xf8, 0xf9, "RS", "_EVEX">, T_MAP5, EVEX, NoCD8; 370 371let Predicates = [HasAMXTRANSPOSE, In64BitMode] in { 372 let SchedRW = [WriteSystem] in { 373 def TTRANSPOSED : I<0x5f, MRMSrcReg, (outs TILE:$dst), (ins TILE:$src), 374 "ttransposed\t{$src, $dst|$dst, $src}", []>, VEX, T8, XS; 375 let isPseudo = true in { 376 def PT2RPNTLVWZ0V : PseudoI<(outs TILEPair:$dst), 377 (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4), 378 []>; 379 def PT2RPNTLVWZ0T1V : PseudoI<(outs TILEPair:$dst), 380 (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4), 381 []>; 382 def PT2RPNTLVWZ1V : PseudoI<(outs TILEPair:$dst), 383 (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4), 384 []>; 385 def PT2RPNTLVWZ1T1V : PseudoI<(outs TILEPair:$dst), 386 (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4), 387 []>; 388 } 389 390 def PTTRANSPOSEDV : PseudoI<(outs TILE:$dst), 391 (ins GR16:$src1, GR16:$src2, TILE:$src), 392 [(set TILE: $dst, 393 (int_x86_ttransposed_internal GR16:$src1, GR16:$src2, 394 TILE:$src))]>; 395 396 let usesCustomInserter = 1 in { 397 def PT2RPNTLVWZ0 : PseudoI<(outs), (ins u8imm:$dst, 398 sibmem:$src1), []>; 399 def PT2RPNTLVWZ0T1 : PseudoI<(outs), (ins u8imm:$dst, 400 sibmem:$src1), []>; 401 def PT2RPNTLVWZ1 : PseudoI<(outs), (ins u8imm:$dst, 402 sibmem:$src1), []>; 403 def PT2RPNTLVWZ1T1 : PseudoI<(outs), (ins u8imm:$dst, 404 sibmem:$src1), []>; 405 def PTTRANSPOSED : PseudoI<(outs), (ins u8imm:$dst, u8imm:$src), 406 [(int_x86_ttransposed timm:$dst, timm:$src)]>; 407 } 408 } 409} // HasAMXTILE, HasAMXTRANSPOSE 410 411let Predicates = [HasAMXBF16, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in { 412 let Constraints = "$src1 = $dst" in 413 def TTDPBF16PS : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst), 414 (ins TILE:$src1, TILE:$src2, TILE:$src3), 415 "ttdpbf16ps\t{$src3, $src2, $dst|$dst, $src2, $src3}", 416 []>, VEX, VVVV, T8,XS; 417 let Constraints = "$src4 = $dst" in 418 def PTTDPBF16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, 419 GR16:$src2, GR16:$src3, TILE:$src4, 420 TILE:$src5, TILE:$src6), 421 [(set TILE: $dst, 422 (int_x86_ttdpbf16ps_internal GR16:$src1, GR16:$src2, 423 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; 424 let usesCustomInserter = 1 in 425 def PTTDPBF16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3), 426 [(int_x86_ttdpbf16ps timm:$src1, timm:$src2, timm:$src3)]>; 427} 428 429let Predicates = [HasAMXFP16, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in { 430 let Constraints = "$src1 = $dst" in 431 def TTDPFP16PS : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst), 432 (ins TILE:$src1, TILE:$src2, TILE:$src3), 433 "ttdpfp16ps\t{$src3, $src2, $dst|$dst, $src2, $src3}", 434 []>, VEX, VVVV, T8,XD; 435 let Constraints = "$src4 = $dst" in 436 def PTTDPFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, 437 GR16:$src2, GR16:$src3, TILE:$src4, 438 TILE:$src5, TILE:$src6), 439 [(set TILE: $dst, 440 (int_x86_ttdpfp16ps_internal GR16:$src1, GR16:$src2, 441 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; 442 let usesCustomInserter = 1 in 443 def PTTDPFP16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3), 444 [(int_x86_ttdpfp16ps timm:$src1, timm:$src2, timm:$src3)]>; 445} 446 447let Predicates = [HasAMXCOMPLEX, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in { 448 let Constraints = "$src1 = $dst" in { 449 def TTCMMIMFP16PS : I<0x6b, MRMSrcReg4VOp3, (outs TILE:$dst), 450 (ins TILE:$src1, TILE:$src2, TILE:$src3), 451 "ttcmmimfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}", 452 []>, VEX, VVVV, T8,XD; 453 def TTCMMRLFP16PS: I<0x6b, MRMSrcReg4VOp3, (outs TILE:$dst), 454 (ins TILE:$src1, TILE:$src2, TILE:$src3), 455 "ttcmmrlfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}", 456 []>, VEX, VVVV, T8,XS; 457 def TCONJTCMMIMFP16PS : I<0x6b, MRMSrcReg4VOp3, (outs TILE:$dst), 458 (ins TILE:$src1, TILE:$src2, TILE:$src3), 459 "tconjtcmmimfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}", 460 []>, VEX, VVVV, WIG, T8,PS; 461 } 462 def TCONJTFP16 : I<0x6b, MRMSrcReg, (outs TILE:$dst), (ins TILE:$src), 463 "tconjtfp16\t{$src, $dst|$dst, $src}", []>, VEX, T8,PD; 464 465 let Constraints = "$src4 = $dst" in { 466 def PTTCMMIMFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, 467 GR16:$src2, GR16:$src3, TILE:$src4, 468 TILE:$src5, TILE:$src6), 469 [(set TILE: $dst, 470 (int_x86_ttcmmimfp16ps_internal GR16:$src1, GR16:$src2, 471 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; 472 def PTTCMMRLFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, 473 GR16:$src2, GR16:$src3, TILE:$src4, 474 TILE:$src5, TILE:$src6), 475 [(set TILE: $dst, 476 (int_x86_ttcmmrlfp16ps_internal GR16:$src1, GR16:$src2, 477 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; 478 def PTCONJTCMMIMFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, 479 GR16:$src2, GR16:$src3, TILE:$src4, 480 TILE:$src5, TILE:$src6), 481 [(set TILE: $dst, 482 (int_x86_tconjtcmmimfp16ps_internal GR16:$src1, GR16:$src2, 483 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; 484 } 485 def PTCONJTFP16V : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2, TILE:$src3), 486 [(set TILE: $dst, (int_x86_tconjtfp16_internal GR16:$src1, GR16:$src2, TILE:$src3))]>; 487 488 let usesCustomInserter = 1 in { 489 def PTTCMMIMFP16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3), 490 [(int_x86_ttcmmimfp16ps timm:$src1, timm:$src2, timm:$src3)]>; 491 def PTTCMMRLFP16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3), 492 [(int_x86_ttcmmrlfp16ps timm:$src1, timm:$src2, timm:$src3)]>; 493 def PTCONJTCMMIMFP16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3), 494 [(int_x86_tconjtcmmimfp16ps timm:$src1, timm:$src2, timm:$src3)]>; 495 def PTCONJTFP16 : PseudoI<(outs), (ins u8imm:$dst, u8imm:$src), 496 [(int_x86_tconjtfp16 timm:$dst, timm:$src)]>; 497 } 498} 499 500let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in { 501 let isPseudo = true in { 502 def PT2RPNTLVWZ0RSV : PseudoI<(outs TILEPair:$dst), 503 (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4), 504 []>; 505 def PT2RPNTLVWZ0RST1V : PseudoI<(outs TILEPair:$dst), 506 (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4), 507 []>; 508 def PT2RPNTLVWZ1RSV : PseudoI<(outs TILEPair:$dst), 509 (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4), 510 []>; 511 def PT2RPNTLVWZ1RST1V : PseudoI<(outs TILEPair:$dst), 512 (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4), 513 []>; 514 } 515 let usesCustomInserter = 1 in { 516 def PT2RPNTLVWZ0RS : PseudoI<(outs), (ins u8imm:$dst, sibmem:$src1), []>; 517 def PT2RPNTLVWZ0RST1 : PseudoI<(outs), (ins u8imm:$dst, sibmem:$src1), []>; 518 def PT2RPNTLVWZ1RS : PseudoI<(outs), (ins u8imm:$dst, sibmem:$src1), []>; 519 def PT2RPNTLVWZ1RST1 : PseudoI<(outs), (ins u8imm:$dst, sibmem:$src1), []>; 520 } 521} // HasAMXMOVRS, HasAMXTRANSPOSE 522 523multiclass TILELOADDRS_Base<string suffix> { 524 def suffix : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst), (ins sibmem:$src1), 525 "tileloaddrs\t{$src1, $dst|$dst, $src1}", []>, T8, XD; 526 def T1#suffix : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst), (ins sibmem:$src1), 527 "tileloaddrst1\t{$src1, $dst|$dst, $src1}", []>, T8, PD; 528} 529 530let Predicates = [HasAMXMOVRS, In64BitMode], SchedRW = [WriteSystem] in 531 defm TILELOADDRS : TILELOADDRS_Base<"">, VEX; 532 533let Predicates = [HasAMXMOVRS, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in 534 defm TILELOADDRS : TILELOADDRS_Base<"_EVEX">, EVEX, NoCD8; 535 536let Predicates = [HasAMXMOVRS, In64BitMode], SchedRW = [WriteSystem] in { 537 let isPseudo = true, mayLoad = 1 in { 538 def PTILELOADDRSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, 539 GR16:$src2, 540 opaquemem:$src3), []>; 541 def PTILELOADDRST1V : PseudoI<(outs TILE:$dst), (ins GR16:$src1, 542 GR16:$src2, 543 opaquemem:$src3), []>; 544 } 545 546 let usesCustomInserter = 1, mayLoad = 1 in { 547 def PTILELOADDRS : PseudoI<(outs), (ins u8imm:$src1, sibmem:$src2), []>; 548 def PTILELOADDRST1 : PseudoI<(outs), (ins u8imm:$src1, sibmem:$src2), []>; 549 } 550} // HasAMXMOVRS, In64BitMode 551 552multiclass m_tcvtrowd2ps { 553 let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in { 554 let SchedRW = [WriteSystem] in { 555 def rri : Ii8<0x7, MRMSrcReg, (outs VR512:$dst), 556 (ins TILE:$src1, i32u8imm:$src2), 557 "tcvtrowd2ps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 558 []>, TA,XS, EVEX, EVEX_V512; 559 def rre : I<0x4A, MRMSrcReg4VOp3, (outs VR512:$dst), 560 (ins TILE:$src1, GR32:$src2), 561 "tcvtrowd2ps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 562 []>, T8,XS, EVEX, VVVV, EVEX_V512; 563 } 564 } // HasAMXAVX512, HasAVX10_2_512, In64BitMode 565} 566 567defm TCVTROWD2PS : m_tcvtrowd2ps; 568 569let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in { 570 let SchedRW = [WriteSystem] in { 571 let usesCustomInserter = 1 in { 572 def PTCVTROWD2PSrri : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, i32u8imm:$src2), 573 [(set VR512:$dst, (int_x86_tcvtrowd2ps timm:$src1, imm:$src2))]>; 574 def PTCVTROWD2PSrre : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, GR32:$src2), 575 [(set VR512:$dst, (int_x86_tcvtrowd2ps timm:$src1, GR32:$src2))]>; 576 } 577 578 def PTCVTROWD2PSrriV : PseudoI<(outs VR512:$dst), 579 (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4), 580 [(set VR512: $dst, 581 (int_x86_tcvtrowd2ps_internal GR16:$src1, GR16:$src2, 582 TILE:$src3, imm:$src4))]>; 583 def PTCVTROWD2PSrreV : PseudoI<(outs VR512:$dst), 584 (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4), 585 [(set VR512: $dst, 586 (int_x86_tcvtrowd2ps_internal GR16:$src1, GR16:$src2, 587 TILE:$src3, GR32:$src4))]>; 588 def PTCVTROWPS2BF16HrriV : PseudoI<(outs VR512:$dst), 589 (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4), 590 [(set VR512: $dst, 591 (int_x86_tcvtrowps2bf16h_internal GR16:$src1, GR16:$src2, 592 TILE:$src3, imm:$src4))]>; 593 def PTCVTROWPS2BF16HrreV : PseudoI<(outs VR512:$dst), 594 (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4), 595 [(set VR512: $dst, 596 (int_x86_tcvtrowps2bf16h_internal GR16:$src1, GR16:$src2, 597 TILE:$src3, GR32:$src4))]>; 598 def PTCVTROWPS2BF16LrriV : PseudoI<(outs VR512:$dst), 599 (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4), 600 [(set VR512: $dst, 601 (int_x86_tcvtrowps2bf16l_internal GR16:$src1, GR16:$src2, 602 TILE:$src3, imm:$src4))]>; 603 def PTCVTROWPS2BF16LrreV : PseudoI<(outs VR512:$dst), 604 (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4), 605 [(set VR512: $dst, 606 (int_x86_tcvtrowps2bf16l_internal GR16:$src1, GR16:$src2, 607 TILE:$src3, GR32:$src4))]>; 608 def PTCVTROWPS2PHHrriV : PseudoI<(outs VR512:$dst), 609 (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4), 610 [(set VR512: $dst, 611 (int_x86_tcvtrowps2phh_internal GR16:$src1, GR16:$src2, 612 TILE:$src3, imm:$src4))]>; 613 def PTCVTROWPS2PHHrreV : PseudoI<(outs VR512:$dst), 614 (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4), 615 [(set VR512: $dst, 616 (int_x86_tcvtrowps2phh_internal GR16:$src1, GR16:$src2, 617 TILE:$src3, GR32:$src4))]>; 618 def PTCVTROWPS2PHLrriV : PseudoI<(outs VR512:$dst), 619 (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4), 620 [(set VR512: $dst, 621 (int_x86_tcvtrowps2phl_internal GR16:$src1, GR16:$src2, 622 TILE:$src3, imm:$src4))]>; 623 def PTCVTROWPS2PHLrreV : PseudoI<(outs VR512:$dst), 624 (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4), 625 [(set VR512: $dst, 626 (int_x86_tcvtrowps2phl_internal GR16:$src1, GR16:$src2, 627 TILE:$src3, GR32:$src4))]>; 628 } 629} 630 631multiclass AMXAVX512_BASE<bits<8> Opcode1, bits<8> Opcode2, string Opstr, 632 Prefix P1, Prefix P2> { 633 let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode], SchedRW = [WriteSystem] in { 634 let OpPrefix = P1 in 635 def rre : I<Opcode1, MRMSrcReg4VOp3, (outs VR512:$dst), 636 (ins TILE:$src1, GR32:$src2), 637 !strconcat(Opstr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 638 []>, EVEX, VVVV, EVEX_V512, T8; 639 let OpPrefix = P2 in 640 def rri : Ii8<Opcode2, MRMSrcReg, (outs VR512:$dst), 641 (ins TILE:$src1, i32u8imm:$src2), 642 !strconcat(Opstr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 643 []>, EVEX, EVEX_V512, TA; 644 let usesCustomInserter = 1 in { 645 def "P"#NAME#"rre" : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, GR32:$src2), 646 [(set VR512:$dst, 647 (!cast<Intrinsic>("int_x86_"#Opstr) timm:$src1, GR32:$src2))]>; 648 def "P"#NAME#"rri" : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, i32u8imm:$src2), 649 [(set VR512:$dst, 650 (!cast<Intrinsic>("int_x86_"#Opstr) timm:$src1, imm:$src2))]>; 651 } 652 } 653} 654 655defm TCVTROWPS2PHH : AMXAVX512_BASE<0x6d, 0x07, "tcvtrowps2phh", PS, PS>; 656defm TCVTROWPS2PHL : AMXAVX512_BASE<0x6d, 0x77, "tcvtrowps2phl", PD, XD>; 657defm TCVTROWPS2BF16H : AMXAVX512_BASE<0x6d, 0x07, "tcvtrowps2bf16h", XD, XD>; 658defm TCVTROWPS2BF16L : AMXAVX512_BASE<0x6d, 0x77, "tcvtrowps2bf16l", XS, XS>; 659 660multiclass m_tilemovrow { 661 let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in { 662 let SchedRW = [WriteSystem] in { 663 def rri : Ii8<0x7, MRMSrcReg, (outs VR512:$dst), 664 (ins TILE:$src1, u8imm:$src2), 665 "tilemovrow\t{$src2, $src1, $dst|$dst, $src1, $src2}", 666 []>, TA,PD, EVEX, EVEX_V512; 667 def rre : I<0x4A, MRMSrcReg4VOp3, (outs VR512:$dst), 668 (ins TILE:$src1, GR32:$src2), 669 "tilemovrow\t{$src2, $src1, $dst|$dst, $src1, $src2}", 670 []>, T8,PD, EVEX, VVVV, EVEX_V512; 671 } 672 } // HasAMXAVX512, HasAVX10_2_512, In64BitMode 673} 674 675defm TILEMOVROW : m_tilemovrow; 676 677let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in { 678 let SchedRW = [WriteSystem] in { 679 let usesCustomInserter = 1 in { 680 def PTILEMOVROWrri : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, i32u8imm:$src2), 681 [(set VR512:$dst, (int_x86_tilemovrow timm:$src1, imm:$src2))]>; 682 def PTILEMOVROWrre : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, GR32:$src2), 683 [(set VR512:$dst, (int_x86_tilemovrow timm:$src1, GR32:$src2))]>; 684 } 685 686 def PTILEMOVROWrriV : PseudoI<(outs VR512:$dst), 687 (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4), 688 [(set VR512: $dst, 689 (int_x86_tilemovrow_internal GR16:$src1, GR16:$src2, 690 TILE:$src3, imm:$src4))]>; 691 def PTILEMOVROWrreV : PseudoI<(outs VR512:$dst), 692 (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4), 693 [(set VR512: $dst, 694 (int_x86_tilemovrow_internal GR16:$src1, GR16:$src2, 695 TILE:$src3, GR32:$src4))]>; 696 } 697} 698 699let Predicates = [HasAMXTF32, In64BitMode] in { 700 let SchedRW = [WriteSystem] in { 701 let Constraints = "$src1 = $dst" in { 702 def TMMULTF32PS: I<0x48, MRMSrcReg4VOp3, (outs TILE:$dst), 703 (ins TILE:$src1, TILE:$src2, TILE:$src3), 704 "tmmultf32ps\t{$src3, $src2, $dst|$dst, $src2, $src3}", 705 []>, VEX, VVVV, T8, PD; 706 } 707 let Constraints = "$src4 = $dst" in { 708 def PTMMULTF32PSV : PseudoI<(outs TILE:$dst), 709 (ins GR16:$src1, GR16:$src2, GR16:$src3, 710 TILE:$src4, TILE:$src5, TILE:$src6), 711 [(set TILE:$dst, 712 (int_x86_tmmultf32ps_internal GR16:$src1, 713 GR16:$src2, GR16:$src3, TILE:$src4, 714 TILE:$src5, TILE:$src6))]>; 715 } 716 let usesCustomInserter = 1 in { 717 def PTMMULTF32PS : PseudoI<(outs), 718 (ins u8imm:$src1, u8imm:$src2, u8imm:$src3), 719 [(int_x86_tmmultf32ps timm:$src1, timm:$src2, 720 timm:$src3)]>; 721 } 722 } // SchedRW = [WriteSystem] 723} // HasAMXTF32 724 725let Predicates = [HasAMXTF32, HasAMXTRANSPOSE, In64BitMode] in { 726 let SchedRW = [WriteSystem] in { 727 let Constraints = "$src1 = $dst" in { 728 def TTMMULTF32PS: I<0x48, MRMSrcReg4VOp3, (outs TILE:$dst), 729 (ins TILE:$src1, TILE:$src2, TILE:$src3), 730 "ttmmultf32ps\t{$src3, $src2, $dst|$dst, $src2, $src3}", 731 []>, VEX, VVVV, T8, PS; 732 } 733 let Constraints = "$src4 = $dst" in { 734 def PTTMMULTF32PSV : PseudoI<(outs TILE:$dst), 735 (ins GR16:$src1, GR16:$src2, GR16:$src3, 736 TILE:$src4, TILE:$src5, TILE:$src6), 737 [(set TILE:$dst, 738 (int_x86_ttmmultf32ps_internal GR16:$src1, 739 GR16:$src2, GR16:$src3, TILE:$src4, 740 TILE:$src5, TILE:$src6))]>; 741 } 742 let usesCustomInserter = 1 in { 743 def PTTMMULTF32PS : PseudoI<(outs), 744 (ins u8imm:$src1, u8imm:$src2, u8imm:$src3), 745 [(int_x86_ttmmultf32ps timm:$src1, timm:$src2, 746 timm:$src3)]>; 747 } 748 } // SchedRW = [WriteSystem] 749} // HasAMXTF32, HasAMXTRANSPOSE 750