//===---- X86InstrAMX.td - AMX Instruction Set Extension --*- tablegen -*--===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file describes the instructions that make up the Intel AMX instruction // set. // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // AMX instructions multiclass AMX_TILE_COMMON { let Predicates = [HasAMXTILE, HasEGPR, In64BitMode] in { let hasSideEffects = 1, Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in def LDTILECFG#Suffix : I<0x49, MRM0m, (outs), (ins opaquemem:$src), "ldtilecfg\t$src", [(int_x86_ldtilecfg addr:$src)]>, T8, PS; let hasSideEffects = 1 in def STTILECFG#Suffix : I<0x49, MRM0m, (outs), (ins opaquemem:$src), "sttilecfg\t$src", [(int_x86_sttilecfg addr:$src)]>, T8, PD; let mayLoad = 1 in def TILELOADD#Suffix : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst), (ins sibmem:$src), "tileloadd\t{$src, $dst|$dst, $src}", []>, T8, XD; let mayLoad = 1 in def TILELOADDT1#Suffix : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst), (ins sibmem:$src), "tileloaddt1\t{$src, $dst|$dst, $src}", []>, T8, PD; let mayStore = 1 in def TILESTORED#Suffix : I<0x4b, MRMDestMemFSIB, (outs), (ins sibmem:$dst, TILE:$src), "tilestored\t{$src, $dst|$dst, $src}", []>, T8, XS; } } let SchedRW = [WriteSystem] in { defm "" : AMX_TILE_COMMON<"", NoEGPR>, VEX; defm "" : AMX_TILE_COMMON<"_EVEX", HasEGPR>, EVEX, NoCD8; let Predicates = [HasAMXTILE, In64BitMode] in { let Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in def TILERELEASE : I<0x49, MRM_C0, (outs), (ins), "tilerelease", [(int_x86_tilerelease)]>, VEX, T8, PS; def TILEZERO : I<0x49, MRMr0, (outs TILE:$dst), (ins), "tilezero\t$dst", []>, VEX, T8, XD; // Pseduo instruction for RA. let isPseudo = true, mayLoad = 1, hasSideEffects = 1, Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in def PLDTILECFGV : PseudoI<(outs), (ins opaquemem:$src), []>; let isPseudo = true, mayLoad = 1 in def PTILELOADDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2, opaquemem:$src3), []>; let isPseudo = true, mayLoad = 1 in def PTILELOADDT1V : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2, opaquemem:$src3), []>; let isPseudo = true, mayStore = 1 in def PTILESTOREDV : PseudoI<(outs), (ins GR16:$src1, GR16:$src2, opaquemem:$src3, TILE:$src4), []>; let isPseudo = true, isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, usesCustomInserter = 1 in def PTILEZEROV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2), [(set TILE:$dst, (int_x86_tilezero_internal GR16:$src1, GR16:$src2))]>; let usesCustomInserter = 1 in { // Pseudo instructions, using immediates instead of tile registers. // To be translated to the actual instructions in X86ISelLowering.cpp let mayLoad = 1 in def PTILELOADD : PseudoI<(outs), (ins u8imm:$src1, sibmem:$src2), []>; let mayLoad = 1 in def PTILELOADDT1 : PseudoI<(outs), (ins u8imm:$src1, sibmem:$src2), []>; let mayStore = 1 in def PTILESTORED : PseudoI<(outs), (ins i8mem:$dst, u8imm:$src), []>; def PTILEZERO : PseudoI<(outs), (ins u8imm:$src), [(int_x86_tilezero timm:$src)]>; } } // Predicates } // SchedRW let Predicates = [HasAMXINT8, In64BitMode] in { let SchedRW = [WriteSystem] in { let Constraints = "$src1 = $dst" in { def TDPBSSD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst), (ins TILE:$src1, TILE:$src2, TILE:$src3), "tdpbssd\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>, VEX, VVVV, T8, XD; def TDPBSUD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst), (ins TILE:$src1, TILE:$src2, TILE:$src3), "tdpbsud\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>, VEX, VVVV, T8, XS; def TDPBUSD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst), (ins TILE:$src1, TILE:$src2, TILE:$src3), "tdpbusd\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>, VEX, VVVV, T8, PD; def TDPBUUD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst), (ins TILE:$src1, TILE:$src2, TILE:$src3), "tdpbuud\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>, VEX, VVVV, T8; } // Pseduo instruction for RA. let isPseudo = true, Constraints = "$src4 = $dst" in { def PTDPBSSDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6), [(set TILE: $dst, (int_x86_tdpbssd_internal GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; def PTDPBSUDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6), [(set TILE: $dst, (int_x86_tdpbsud_internal GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; def PTDPBUSDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6), [(set TILE: $dst, (int_x86_tdpbusd_internal GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; def PTDPBUUDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6), [(set TILE: $dst, (int_x86_tdpbuud_internal GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; } let usesCustomInserter = 1 in { // Pseudo instructions, using immediates instead of tile registers. // To be translated to the actual instructions in X86ISelLowering.cpp def PTDPBSSD : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3), [(int_x86_tdpbssd timm:$src1, timm:$src2, timm:$src3)]>; def PTDPBSUD : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3), [(int_x86_tdpbsud timm:$src1, timm:$src2, timm:$src3)]>; def PTDPBUSD : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3), [(int_x86_tdpbusd timm:$src1, timm:$src2, timm:$src3)]>; def PTDPBUUD : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3), [(int_x86_tdpbuud timm:$src1, timm:$src2, timm:$src3)]>; } } } // HasAMXTILE let Predicates = [HasAMXBF16, In64BitMode] in { let SchedRW = [WriteSystem] in { let Constraints = "$src1 = $dst" in def TDPBF16PS : I<0x5c, MRMSrcReg4VOp3, (outs TILE:$dst), (ins TILE:$src1, TILE:$src2, TILE:$src3), "tdpbf16ps\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>, VEX, VVVV, T8, XS; // Pseduo instruction for RA. let isPseudo = true, Constraints = "$src4 = $dst" in def PTDPBF16PSV : PseudoI<(outs TILE: $dst), (ins GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6), [(set TILE: $dst, (int_x86_tdpbf16ps_internal GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; let usesCustomInserter = 1 in { // Pseudo instructions, using immediates instead of tile registers. // To be translated to the actual instructions in X86ISelLowering.cpp def PTDPBF16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3), [(int_x86_tdpbf16ps timm:$src1, timm:$src2, timm:$src3)]>; } } } // HasAMXTILE, HasAMXBF16 //AMX-FP16 let Predicates = [HasAMXFP16, In64BitMode] in { let SchedRW = [WriteSystem] in { let Constraints = "$src1 = $dst" in { def TDPFP16PS : I<0x5c, MRMSrcReg4VOp3, (outs TILE:$dst), (ins TILE:$src1, TILE:$src2, TILE:$src3), "tdpfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, VEX, VVVV, T8, XD; } // Pseduo instruction for RA. let isPseudo = true, Constraints = "$src4 = $dst" in { def PTDPFP16PSV : PseudoI<(outs TILE: $dst), (ins GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6), [(set TILE: $dst, (int_x86_tdpfp16ps_internal GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; } let usesCustomInserter = 1 in { def PTDPFP16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3), [(int_x86_tdpfp16ps timm:$src1, timm:$src2, timm:$src3)]>; } } } // HasAMXTILE, HasAMXFP16 let Predicates = [HasAMXCOMPLEX, In64BitMode] in { let SchedRW = [WriteSystem] in { let Constraints = "$src1 = $dst" in { def TCMMIMFP16PS : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst), (ins TILE:$src1, TILE:$src2, TILE:$src3), "tcmmimfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, T8, PD, VEX, VVVV; def TCMMRLFP16PS : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst), (ins TILE:$src1, TILE:$src2, TILE:$src3), "tcmmrlfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, VEX, VVVV, WIG, T8; } // Constraints = "$src1 = $dst" let Constraints = "$src4 = $dst" in { def PTCMMIMFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6), [(set TILE: $dst, (int_x86_tcmmimfp16ps_internal GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; def PTCMMRLFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6), [(set TILE: $dst, (int_x86_tcmmrlfp16ps_internal GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; } let usesCustomInserter = 1 in { def PTCMMIMFP16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3), [(int_x86_tcmmimfp16ps timm:$src1, timm:$src2, timm:$src3)]>; def PTCMMRLFP16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3), [(int_x86_tcmmrlfp16ps timm:$src1, timm:$src2, timm:$src3)]>; } } // SchedRW = [WriteSystem] } // AMX-FP8 let Predicates = [HasAMXFP8, In64BitMode] in { let SchedRW = [WriteSystem] in { let Constraints = "$src1 = $dst" in { class AMX_FP8_BASE Opcode, string Opstr> : I, VEX, VVVV; } def TDPBF8PS : AMX_FP8_BASE<0xfd, "tdpbf8ps">, T_MAP5, PS; def TDPBHF8PS : AMX_FP8_BASE<0xfd, "tdpbhf8ps">, T_MAP5, XD; def TDPHBF8PS : AMX_FP8_BASE<0xfd, "tdphbf8ps">, T_MAP5, XS; def TDPHF8PS : AMX_FP8_BASE<0xfd, "tdphf8ps">, T_MAP5, PD; let usesCustomInserter = 1 in { // Pseudo instructions, using immediates instead of tile registers. // To be translated to the actual instructions in X86ISelLowering.cpp def PTDPBF8PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3), [(int_x86_tdpbf8ps timm:$src1, timm:$src2, timm:$src3)]>; def PTDPBHF8PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3), [(int_x86_tdpbhf8ps timm:$src1, timm:$src2, timm:$src3)]>; def PTDPHBF8PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3), [(int_x86_tdphbf8ps timm:$src1, timm:$src2, timm:$src3)]>; def PTDPHF8PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3), [(int_x86_tdphf8ps timm:$src1, timm:$src2, timm:$src3)]>; } let Constraints = "$src4 = $dst" in { def PTDPBF8PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6), [(set TILE:$dst, (int_x86_tdpbf8ps_internal GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; def PTDPBHF8PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6), [(set TILE:$dst, (int_x86_tdpbhf8ps_internal GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; def PTDPHBF8PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6), [(set TILE:$dst, (int_x86_tdphbf8ps_internal GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; def PTDPHF8PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6), [(set TILE:$dst, (int_x86_tdphf8ps_internal GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; } } } let Predicates = [HasAMXTILE, In64BitMode], isPseudo = true, SchedRW = [WriteSystem] in { let mayStore = 1 in def PTILEPAIRSTORE : PseudoI<(outs), (ins opaquemem:$src1, TILEPair:$src2), []>; let mayLoad = 1 in def PTILEPAIRLOAD : PseudoI<(outs TILEPair:$dst), (ins opaquemem:$src), []>; } multiclass T2RPNTLVW_Base op1, bits<8> op2, string rs, string suffix> { def Z0#rs#suffix : I, PS; def Z0#rs#T1#suffix : I, PS; def Z1#rs#suffix : I, PD; def Z1#rs#T1#suffix : I, PD; } let Predicates = [HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in defm T2RPNTLVW : T2RPNTLVW_Base<0x6e, 0x6f, "", "">, T8, VEX; let Predicates = [HasAMXTRANSPOSE, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in defm T2RPNTLVW : T2RPNTLVW_Base<0x6e, 0x6f, "", "_EVEX">, T8, EVEX, NoCD8; let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in defm T2RPNTLVW : T2RPNTLVW_Base<0xf8, 0xf9, "RS", "">, T_MAP5, VEX; let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in defm T2RPNTLVW : T2RPNTLVW_Base<0xf8, 0xf9, "RS", "_EVEX">, T_MAP5, EVEX, NoCD8; let Predicates = [HasAMXTRANSPOSE, In64BitMode] in { let SchedRW = [WriteSystem] in { def TTRANSPOSED : I<0x5f, MRMSrcReg, (outs TILE:$dst), (ins TILE:$src), "ttransposed\t{$src, $dst|$dst, $src}", []>, VEX, T8, XS; let isPseudo = true in { def PT2RPNTLVWZ0V : PseudoI<(outs TILEPair:$dst), (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4), []>; def PT2RPNTLVWZ0T1V : PseudoI<(outs TILEPair:$dst), (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4), []>; def PT2RPNTLVWZ1V : PseudoI<(outs TILEPair:$dst), (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4), []>; def PT2RPNTLVWZ1T1V : PseudoI<(outs TILEPair:$dst), (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4), []>; } def PTTRANSPOSEDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2, TILE:$src), [(set TILE: $dst, (int_x86_ttransposed_internal GR16:$src1, GR16:$src2, TILE:$src))]>; let usesCustomInserter = 1 in { def PT2RPNTLVWZ0 : PseudoI<(outs), (ins u8imm:$dst, sibmem:$src1), []>; def PT2RPNTLVWZ0T1 : PseudoI<(outs), (ins u8imm:$dst, sibmem:$src1), []>; def PT2RPNTLVWZ1 : PseudoI<(outs), (ins u8imm:$dst, sibmem:$src1), []>; def PT2RPNTLVWZ1T1 : PseudoI<(outs), (ins u8imm:$dst, sibmem:$src1), []>; def PTTRANSPOSED : PseudoI<(outs), (ins u8imm:$dst, u8imm:$src), [(int_x86_ttransposed timm:$dst, timm:$src)]>; } } } // HasAMXTILE, HasAMXTRANSPOSE let Predicates = [HasAMXBF16, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in { let Constraints = "$src1 = $dst" in def TTDPBF16PS : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst), (ins TILE:$src1, TILE:$src2, TILE:$src3), "ttdpbf16ps\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>, VEX, VVVV, T8,XS; let Constraints = "$src4 = $dst" in def PTTDPBF16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6), [(set TILE: $dst, (int_x86_ttdpbf16ps_internal GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; let usesCustomInserter = 1 in def PTTDPBF16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3), [(int_x86_ttdpbf16ps timm:$src1, timm:$src2, timm:$src3)]>; } let Predicates = [HasAMXFP16, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in { let Constraints = "$src1 = $dst" in def TTDPFP16PS : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst), (ins TILE:$src1, TILE:$src2, TILE:$src3), "ttdpfp16ps\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>, VEX, VVVV, T8,XD; let Constraints = "$src4 = $dst" in def PTTDPFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6), [(set TILE: $dst, (int_x86_ttdpfp16ps_internal GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; let usesCustomInserter = 1 in def PTTDPFP16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3), [(int_x86_ttdpfp16ps timm:$src1, timm:$src2, timm:$src3)]>; } let Predicates = [HasAMXCOMPLEX, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in { let Constraints = "$src1 = $dst" in { def TTCMMIMFP16PS : I<0x6b, MRMSrcReg4VOp3, (outs TILE:$dst), (ins TILE:$src1, TILE:$src2, TILE:$src3), "ttcmmimfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, VEX, VVVV, T8,XD; def TTCMMRLFP16PS: I<0x6b, MRMSrcReg4VOp3, (outs TILE:$dst), (ins TILE:$src1, TILE:$src2, TILE:$src3), "ttcmmrlfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, VEX, VVVV, T8,XS; def TCONJTCMMIMFP16PS : I<0x6b, MRMSrcReg4VOp3, (outs TILE:$dst), (ins TILE:$src1, TILE:$src2, TILE:$src3), "tconjtcmmimfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, VEX, VVVV, WIG, T8,PS; } def TCONJTFP16 : I<0x6b, MRMSrcReg, (outs TILE:$dst), (ins TILE:$src), "tconjtfp16\t{$src, $dst|$dst, $src}", []>, VEX, T8,PD; let Constraints = "$src4 = $dst" in { def PTTCMMIMFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6), [(set TILE: $dst, (int_x86_ttcmmimfp16ps_internal GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; def PTTCMMRLFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6), [(set TILE: $dst, (int_x86_ttcmmrlfp16ps_internal GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; def PTCONJTCMMIMFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6), [(set TILE: $dst, (int_x86_tconjtcmmimfp16ps_internal GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; } def PTCONJTFP16V : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2, TILE:$src3), [(set TILE: $dst, (int_x86_tconjtfp16_internal GR16:$src1, GR16:$src2, TILE:$src3))]>; let usesCustomInserter = 1 in { def PTTCMMIMFP16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3), [(int_x86_ttcmmimfp16ps timm:$src1, timm:$src2, timm:$src3)]>; def PTTCMMRLFP16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3), [(int_x86_ttcmmrlfp16ps timm:$src1, timm:$src2, timm:$src3)]>; def PTCONJTCMMIMFP16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3), [(int_x86_tconjtcmmimfp16ps timm:$src1, timm:$src2, timm:$src3)]>; def PTCONJTFP16 : PseudoI<(outs), (ins u8imm:$dst, u8imm:$src), [(int_x86_tconjtfp16 timm:$dst, timm:$src)]>; } } let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in { let isPseudo = true in { def PT2RPNTLVWZ0RSV : PseudoI<(outs TILEPair:$dst), (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4), []>; def PT2RPNTLVWZ0RST1V : PseudoI<(outs TILEPair:$dst), (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4), []>; def PT2RPNTLVWZ1RSV : PseudoI<(outs TILEPair:$dst), (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4), []>; def PT2RPNTLVWZ1RST1V : PseudoI<(outs TILEPair:$dst), (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4), []>; } let usesCustomInserter = 1 in { def PT2RPNTLVWZ0RS : PseudoI<(outs), (ins u8imm:$dst, sibmem:$src1), []>; def PT2RPNTLVWZ0RST1 : PseudoI<(outs), (ins u8imm:$dst, sibmem:$src1), []>; def PT2RPNTLVWZ1RS : PseudoI<(outs), (ins u8imm:$dst, sibmem:$src1), []>; def PT2RPNTLVWZ1RST1 : PseudoI<(outs), (ins u8imm:$dst, sibmem:$src1), []>; } } // HasAMXMOVRS, HasAMXTRANSPOSE multiclass TILELOADDRS_Base { def suffix : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst), (ins sibmem:$src1), "tileloaddrs\t{$src1, $dst|$dst, $src1}", []>, T8, XD; def T1#suffix : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst), (ins sibmem:$src1), "tileloaddrst1\t{$src1, $dst|$dst, $src1}", []>, T8, PD; } let Predicates = [HasAMXMOVRS, In64BitMode], SchedRW = [WriteSystem] in defm TILELOADDRS : TILELOADDRS_Base<"">, VEX; let Predicates = [HasAMXMOVRS, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in defm TILELOADDRS : TILELOADDRS_Base<"_EVEX">, EVEX, NoCD8; let Predicates = [HasAMXMOVRS, In64BitMode], SchedRW = [WriteSystem] in { let isPseudo = true, mayLoad = 1 in { def PTILELOADDRSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2, opaquemem:$src3), []>; def PTILELOADDRST1V : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2, opaquemem:$src3), []>; } let usesCustomInserter = 1, mayLoad = 1 in { def PTILELOADDRS : PseudoI<(outs), (ins u8imm:$src1, sibmem:$src2), []>; def PTILELOADDRST1 : PseudoI<(outs), (ins u8imm:$src1, sibmem:$src2), []>; } } // HasAMXMOVRS, In64BitMode multiclass m_tcvtrowd2ps { let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in { let SchedRW = [WriteSystem] in { def rri : Ii8<0x7, MRMSrcReg, (outs VR512:$dst), (ins TILE:$src1, i32u8imm:$src2), "tcvtrowd2ps\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, TA,XS, EVEX, EVEX_V512; def rre : I<0x4A, MRMSrcReg4VOp3, (outs VR512:$dst), (ins TILE:$src1, GR32:$src2), "tcvtrowd2ps\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, T8,XS, EVEX, VVVV, EVEX_V512; } } // HasAMXAVX512, HasAVX10_2_512, In64BitMode } defm TCVTROWD2PS : m_tcvtrowd2ps; let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in { let SchedRW = [WriteSystem] in { let usesCustomInserter = 1 in { def PTCVTROWD2PSrri : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, i32u8imm:$src2), [(set VR512:$dst, (int_x86_tcvtrowd2ps timm:$src1, imm:$src2))]>; def PTCVTROWD2PSrre : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, GR32:$src2), [(set VR512:$dst, (int_x86_tcvtrowd2ps timm:$src1, GR32:$src2))]>; } def PTCVTROWD2PSrriV : PseudoI<(outs VR512:$dst), (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4), [(set VR512: $dst, (int_x86_tcvtrowd2ps_internal GR16:$src1, GR16:$src2, TILE:$src3, imm:$src4))]>; def PTCVTROWD2PSrreV : PseudoI<(outs VR512:$dst), (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4), [(set VR512: $dst, (int_x86_tcvtrowd2ps_internal GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4))]>; def PTCVTROWPS2BF16HrriV : PseudoI<(outs VR512:$dst), (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4), [(set VR512: $dst, (int_x86_tcvtrowps2bf16h_internal GR16:$src1, GR16:$src2, TILE:$src3, imm:$src4))]>; def PTCVTROWPS2BF16HrreV : PseudoI<(outs VR512:$dst), (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4), [(set VR512: $dst, (int_x86_tcvtrowps2bf16h_internal GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4))]>; def PTCVTROWPS2BF16LrriV : PseudoI<(outs VR512:$dst), (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4), [(set VR512: $dst, (int_x86_tcvtrowps2bf16l_internal GR16:$src1, GR16:$src2, TILE:$src3, imm:$src4))]>; def PTCVTROWPS2BF16LrreV : PseudoI<(outs VR512:$dst), (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4), [(set VR512: $dst, (int_x86_tcvtrowps2bf16l_internal GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4))]>; def PTCVTROWPS2PHHrriV : PseudoI<(outs VR512:$dst), (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4), [(set VR512: $dst, (int_x86_tcvtrowps2phh_internal GR16:$src1, GR16:$src2, TILE:$src3, imm:$src4))]>; def PTCVTROWPS2PHHrreV : PseudoI<(outs VR512:$dst), (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4), [(set VR512: $dst, (int_x86_tcvtrowps2phh_internal GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4))]>; def PTCVTROWPS2PHLrriV : PseudoI<(outs VR512:$dst), (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4), [(set VR512: $dst, (int_x86_tcvtrowps2phl_internal GR16:$src1, GR16:$src2, TILE:$src3, imm:$src4))]>; def PTCVTROWPS2PHLrreV : PseudoI<(outs VR512:$dst), (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4), [(set VR512: $dst, (int_x86_tcvtrowps2phl_internal GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4))]>; } } multiclass AMXAVX512_BASE Opcode1, bits<8> Opcode2, string Opstr, Prefix P1, Prefix P2> { let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode], SchedRW = [WriteSystem] in { let OpPrefix = P1 in def rre : I, EVEX, VVVV, EVEX_V512, T8; let OpPrefix = P2 in def rri : Ii8, EVEX, EVEX_V512, TA; let usesCustomInserter = 1 in { def "P"#NAME#"rre" : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, GR32:$src2), [(set VR512:$dst, (!cast("int_x86_"#Opstr) timm:$src1, GR32:$src2))]>; def "P"#NAME#"rri" : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, i32u8imm:$src2), [(set VR512:$dst, (!cast("int_x86_"#Opstr) timm:$src1, imm:$src2))]>; } } } defm TCVTROWPS2PHH : AMXAVX512_BASE<0x6d, 0x07, "tcvtrowps2phh", PS, PS>; defm TCVTROWPS2PHL : AMXAVX512_BASE<0x6d, 0x77, "tcvtrowps2phl", PD, XD>; defm TCVTROWPS2BF16H : AMXAVX512_BASE<0x6d, 0x07, "tcvtrowps2bf16h", XD, XD>; defm TCVTROWPS2BF16L : AMXAVX512_BASE<0x6d, 0x77, "tcvtrowps2bf16l", XS, XS>; multiclass m_tilemovrow { let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in { let SchedRW = [WriteSystem] in { def rri : Ii8<0x7, MRMSrcReg, (outs VR512:$dst), (ins TILE:$src1, u8imm:$src2), "tilemovrow\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, TA,PD, EVEX, EVEX_V512; def rre : I<0x4A, MRMSrcReg4VOp3, (outs VR512:$dst), (ins TILE:$src1, GR32:$src2), "tilemovrow\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, T8,PD, EVEX, VVVV, EVEX_V512; } } // HasAMXAVX512, HasAVX10_2_512, In64BitMode } defm TILEMOVROW : m_tilemovrow; let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in { let SchedRW = [WriteSystem] in { let usesCustomInserter = 1 in { def PTILEMOVROWrri : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, i32u8imm:$src2), [(set VR512:$dst, (int_x86_tilemovrow timm:$src1, imm:$src2))]>; def PTILEMOVROWrre : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, GR32:$src2), [(set VR512:$dst, (int_x86_tilemovrow timm:$src1, GR32:$src2))]>; } def PTILEMOVROWrriV : PseudoI<(outs VR512:$dst), (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4), [(set VR512: $dst, (int_x86_tilemovrow_internal GR16:$src1, GR16:$src2, TILE:$src3, imm:$src4))]>; def PTILEMOVROWrreV : PseudoI<(outs VR512:$dst), (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4), [(set VR512: $dst, (int_x86_tilemovrow_internal GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4))]>; } } let Predicates = [HasAMXTF32, In64BitMode] in { let SchedRW = [WriteSystem] in { let Constraints = "$src1 = $dst" in { def TMMULTF32PS: I<0x48, MRMSrcReg4VOp3, (outs TILE:$dst), (ins TILE:$src1, TILE:$src2, TILE:$src3), "tmmultf32ps\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>, VEX, VVVV, T8, PD; } let Constraints = "$src4 = $dst" in { def PTMMULTF32PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6), [(set TILE:$dst, (int_x86_tmmultf32ps_internal GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; } let usesCustomInserter = 1 in { def PTMMULTF32PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3), [(int_x86_tmmultf32ps timm:$src1, timm:$src2, timm:$src3)]>; } } // SchedRW = [WriteSystem] } // HasAMXTF32 let Predicates = [HasAMXTF32, HasAMXTRANSPOSE, In64BitMode] in { let SchedRW = [WriteSystem] in { let Constraints = "$src1 = $dst" in { def TTMMULTF32PS: I<0x48, MRMSrcReg4VOp3, (outs TILE:$dst), (ins TILE:$src1, TILE:$src2, TILE:$src3), "ttmmultf32ps\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>, VEX, VVVV, T8, PS; } let Constraints = "$src4 = $dst" in { def PTTMMULTF32PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6), [(set TILE:$dst, (int_x86_ttmmultf32ps_internal GR16:$src1, GR16:$src2, GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; } let usesCustomInserter = 1 in { def PTTMMULTF32PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3), [(int_x86_ttmmultf32ps timm:$src1, timm:$src2, timm:$src3)]>; } } // SchedRW = [WriteSystem] } // HasAMXTF32, HasAMXTRANSPOSE