xref: /llvm-project/llvm/lib/Target/X86/X86InstrAMX.td (revision 48803bc8c7be25745a0e623e6753261c07281b06)
1//===---- X86InstrAMX.td - AMX Instruction Set Extension --*- tablegen -*--===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file describes the instructions that make up the Intel AMX instruction
10// set.
11//
12//===----------------------------------------------------------------------===//
13
14//===----------------------------------------------------------------------===//
15// AMX instructions
16
17multiclass AMX_TILE_COMMON<string Suffix, Predicate HasEGPR> {
18let Predicates = [HasAMXTILE, HasEGPR, In64BitMode] in {
19  let hasSideEffects = 1,
20      Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
21  def LDTILECFG#Suffix : I<0x49, MRM0m, (outs), (ins opaquemem:$src),
22                           "ldtilecfg\t$src",
23                           [(int_x86_ldtilecfg addr:$src)]>,
24                         T8, PS;
25  let hasSideEffects = 1 in
26  def STTILECFG#Suffix : I<0x49, MRM0m, (outs), (ins opaquemem:$src),
27                           "sttilecfg\t$src",
28                           [(int_x86_sttilecfg addr:$src)]>,
29                         T8, PD;
30  let mayLoad = 1 in
31  def TILELOADD#Suffix : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst),
32                           (ins sibmem:$src),
33                           "tileloadd\t{$src, $dst|$dst, $src}", []>,
34                         T8, XD;
35  let mayLoad = 1 in
36  def TILELOADDT1#Suffix : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst),
37                             (ins sibmem:$src),
38                             "tileloaddt1\t{$src, $dst|$dst, $src}", []>,
39                           T8, PD;
40  let mayStore = 1 in
41  def TILESTORED#Suffix : I<0x4b, MRMDestMemFSIB, (outs),
42                            (ins sibmem:$dst, TILE:$src),
43                            "tilestored\t{$src, $dst|$dst, $src}", []>,
44                          T8, XS;
45}
46}
47
48let SchedRW = [WriteSystem] in {
49  defm "" : AMX_TILE_COMMON<"", NoEGPR>, VEX;
50  defm "" : AMX_TILE_COMMON<"_EVEX", HasEGPR>, EVEX, NoCD8;
51
52  let Predicates = [HasAMXTILE, In64BitMode] in {
53    let Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
54    def TILERELEASE : I<0x49, MRM_C0, (outs), (ins),
55                        "tilerelease", [(int_x86_tilerelease)]>, VEX, T8, PS;
56    def TILEZERO : I<0x49, MRMr0, (outs TILE:$dst), (ins),
57                     "tilezero\t$dst", []>,
58                     VEX, T8, XD;
59
60    // Pseduo instruction for RA.
61    let isPseudo = true, mayLoad = 1, hasSideEffects = 1,
62        Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
63    def PLDTILECFGV : PseudoI<(outs), (ins opaquemem:$src), []>;
64    let isPseudo = true, mayLoad = 1 in
65    def PTILELOADDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
66                                                     GR16:$src2,
67                                                     opaquemem:$src3), []>;
68    let isPseudo = true, mayLoad = 1 in
69    def PTILELOADDT1V : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
70                                                       GR16:$src2,
71                                                       opaquemem:$src3), []>;
72    let isPseudo = true, mayStore = 1 in
73    def PTILESTOREDV : PseudoI<(outs), (ins GR16:$src1,
74                                            GR16:$src2, opaquemem:$src3,
75                                            TILE:$src4), []>;
76    let isPseudo = true, isReMaterializable = 1, isAsCheapAsAMove = 1,
77        canFoldAsLoad = 1, usesCustomInserter = 1 in
78      def PTILEZEROV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2),
79                                [(set TILE:$dst, (int_x86_tilezero_internal
80                                  GR16:$src1, GR16:$src2))]>;
81
82    let usesCustomInserter = 1 in {
83      // Pseudo instructions, using immediates instead of tile registers.
84      // To be translated to the actual instructions in X86ISelLowering.cpp
85      let mayLoad = 1 in
86      def PTILELOADD : PseudoI<(outs), (ins u8imm:$src1, sibmem:$src2), []>;
87      let mayLoad = 1 in
88      def PTILELOADDT1 : PseudoI<(outs), (ins u8imm:$src1,
89                                          sibmem:$src2), []>;
90      let mayStore = 1 in
91      def PTILESTORED : PseudoI<(outs), (ins i8mem:$dst, u8imm:$src), []>;
92      def PTILEZERO : PseudoI<(outs), (ins u8imm:$src),
93                              [(int_x86_tilezero timm:$src)]>;
94    }
95  } // Predicates
96} // SchedRW
97
98let Predicates = [HasAMXINT8, In64BitMode] in {
99  let SchedRW = [WriteSystem] in {
100    let Constraints = "$src1 = $dst" in {
101      def TDPBSSD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
102                      (ins TILE:$src1, TILE:$src2, TILE:$src3),
103                      "tdpbssd\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
104                      VEX, VVVV, T8, XD;
105      def TDPBSUD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
106                      (ins TILE:$src1, TILE:$src2, TILE:$src3),
107                      "tdpbsud\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
108                      VEX, VVVV, T8, XS;
109      def TDPBUSD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
110                      (ins TILE:$src1, TILE:$src2, TILE:$src3),
111                      "tdpbusd\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
112                      VEX, VVVV, T8, PD;
113      def TDPBUUD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
114                      (ins TILE:$src1, TILE:$src2, TILE:$src3),
115                      "tdpbuud\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
116                      VEX, VVVV, T8;
117    }
118
119    // Pseduo instruction for RA.
120    let isPseudo = true, Constraints = "$src4 = $dst" in {
121      def PTDPBSSDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
122                              GR16:$src2, GR16:$src3, TILE:$src4,
123                              TILE:$src5, TILE:$src6),
124                              [(set TILE: $dst,
125                              (int_x86_tdpbssd_internal GR16:$src1, GR16:$src2,
126                              GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
127      def PTDPBSUDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
128                              GR16:$src2, GR16:$src3, TILE:$src4,
129                              TILE:$src5, TILE:$src6),
130                              [(set TILE: $dst,
131                              (int_x86_tdpbsud_internal GR16:$src1, GR16:$src2,
132                               GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
133      def PTDPBUSDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
134                              GR16:$src2, GR16:$src3, TILE:$src4,
135                              TILE:$src5, TILE:$src6),
136                              [(set TILE: $dst,
137                              (int_x86_tdpbusd_internal GR16:$src1, GR16:$src2,
138                              GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
139      def PTDPBUUDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
140                              GR16:$src2, GR16:$src3, TILE:$src4,
141                              TILE:$src5, TILE:$src6),
142                              [(set TILE: $dst,
143                              (int_x86_tdpbuud_internal GR16:$src1, GR16:$src2,
144                              GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
145    }
146
147    let usesCustomInserter = 1 in {
148      // Pseudo instructions, using immediates instead of tile registers.
149      // To be translated to the actual instructions in X86ISelLowering.cpp
150      def PTDPBSSD : PseudoI<(outs), (ins u8imm:$src1,
151                             u8imm:$src2, u8imm:$src3),
152                             [(int_x86_tdpbssd timm:$src1,
153                               timm:$src2, timm:$src3)]>;
154      def PTDPBSUD : PseudoI<(outs), (ins u8imm:$src1,
155                             u8imm:$src2, u8imm:$src3),
156                             [(int_x86_tdpbsud timm:$src1,
157                               timm:$src2, timm:$src3)]>;
158      def PTDPBUSD : PseudoI<(outs), (ins u8imm:$src1,
159                             u8imm:$src2, u8imm:$src3),
160                             [(int_x86_tdpbusd timm:$src1,
161                               timm:$src2, timm:$src3)]>;
162      def PTDPBUUD : PseudoI<(outs), (ins u8imm:$src1,
163                             u8imm:$src2, u8imm:$src3),
164                             [(int_x86_tdpbuud timm:$src1,
165                               timm:$src2, timm:$src3)]>;
166    }
167  }
168} // HasAMXTILE
169
170let Predicates = [HasAMXBF16, In64BitMode] in {
171  let SchedRW = [WriteSystem] in {
172    let Constraints = "$src1 = $dst" in
173    def TDPBF16PS : I<0x5c, MRMSrcReg4VOp3, (outs TILE:$dst),
174                      (ins TILE:$src1, TILE:$src2, TILE:$src3),
175                      "tdpbf16ps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
176                      []>, VEX, VVVV, T8, XS;
177
178    // Pseduo instruction for RA.
179    let isPseudo = true, Constraints = "$src4 = $dst" in
180      def PTDPBF16PSV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
181                                 GR16:$src2, GR16:$src3, TILE:$src4,
182                                 TILE:$src5, TILE:$src6),
183                                 [(set TILE: $dst,
184                                  (int_x86_tdpbf16ps_internal GR16:$src1,
185                                   GR16:$src2, GR16:$src3, TILE:$src4,
186                                   TILE:$src5, TILE:$src6))]>;
187
188    let usesCustomInserter = 1 in {
189      // Pseudo instructions, using immediates instead of tile registers.
190      // To be translated to the actual instructions in X86ISelLowering.cpp
191      def PTDPBF16PS : PseudoI<(outs), (ins u8imm:$src1,
192                               u8imm:$src2, u8imm:$src3),
193                               [(int_x86_tdpbf16ps timm:$src1,
194                                 timm:$src2, timm:$src3)]>;
195    }
196  }
197} // HasAMXTILE, HasAMXBF16
198
199//AMX-FP16
200let Predicates = [HasAMXFP16, In64BitMode] in {
201  let SchedRW = [WriteSystem] in {
202    let Constraints = "$src1 = $dst" in {
203      def TDPFP16PS : I<0x5c, MRMSrcReg4VOp3, (outs TILE:$dst),
204                        (ins TILE:$src1, TILE:$src2, TILE:$src3),
205                        "tdpfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
206                        []>, VEX, VVVV, T8, XD;
207    }
208
209    // Pseduo instruction for RA.
210    let isPseudo = true, Constraints = "$src4 = $dst" in {
211      def PTDPFP16PSV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
212                                 GR16:$src2, GR16:$src3, TILE:$src4,
213                                 TILE:$src5, TILE:$src6),
214                                 [(set TILE: $dst,
215                                  (int_x86_tdpfp16ps_internal GR16:$src1,
216                                   GR16:$src2, GR16:$src3, TILE:$src4,
217                                   TILE:$src5, TILE:$src6))]>;
218    }
219
220    let  usesCustomInserter = 1 in {
221      def PTDPFP16PS : PseudoI<(outs), (ins u8imm:$src1,
222                               u8imm:$src2, u8imm:$src3),
223                               [(int_x86_tdpfp16ps timm:$src1,
224                                 timm:$src2, timm:$src3)]>;
225    }
226  }
227} // HasAMXTILE, HasAMXFP16
228
229let Predicates = [HasAMXCOMPLEX, In64BitMode] in {
230  let SchedRW = [WriteSystem] in {
231    let Constraints = "$src1 = $dst" in {
232      def TCMMIMFP16PS   : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst),
233                            (ins TILE:$src1, TILE:$src2, TILE:$src3),
234                            "tcmmimfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
235                            []>, T8, PD, VEX, VVVV;
236      def TCMMRLFP16PS : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst),
237                            (ins TILE:$src1, TILE:$src2, TILE:$src3),
238                            "tcmmrlfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
239                            []>, VEX, VVVV, WIG, T8;
240
241    } // Constraints = "$src1 = $dst"
242
243    let Constraints = "$src4 = $dst" in {
244      def PTCMMIMFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
245                                  GR16:$src2, GR16:$src3, TILE:$src4,
246                                  TILE:$src5, TILE:$src6),
247                                  [(set TILE: $dst,
248                                  (int_x86_tcmmimfp16ps_internal GR16:$src1, GR16:$src2,
249                                   GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
250      def PTCMMRLFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
251                                  GR16:$src2, GR16:$src3, TILE:$src4,
252                                  TILE:$src5, TILE:$src6),
253                                  [(set TILE: $dst,
254                                  (int_x86_tcmmrlfp16ps_internal GR16:$src1, GR16:$src2,
255                                   GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
256    }
257
258    let usesCustomInserter = 1 in {
259      def PTCMMIMFP16PS : PseudoI<(outs), (ins u8imm:$src1,
260                                u8imm:$src2, u8imm:$src3),
261                                [(int_x86_tcmmimfp16ps timm:$src1,
262                                  timm:$src2, timm:$src3)]>;
263      def PTCMMRLFP16PS : PseudoI<(outs), (ins u8imm:$src1,
264                                u8imm:$src2, u8imm:$src3),
265                                [(int_x86_tcmmrlfp16ps timm:$src1,
266                                  timm:$src2, timm:$src3)]>;
267    }
268  } // SchedRW = [WriteSystem]
269}
270
271// AMX-FP8
272let Predicates = [HasAMXFP8, In64BitMode] in {
273  let SchedRW = [WriteSystem] in {
274    let Constraints = "$src1 = $dst" in {
275      class AMX_FP8_BASE<bits<8> Opcode, string Opstr> :
276        I<Opcode, MRMSrcReg4VOp3, (outs TILE:$dst),
277          (ins TILE:$src1, TILE:$src2, TILE:$src3),
278          !strconcat(Opstr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
279          []>, VEX, VVVV;
280    }
281
282    def TDPBF8PS : AMX_FP8_BASE<0xfd, "tdpbf8ps">, T_MAP5, PS;
283    def TDPBHF8PS : AMX_FP8_BASE<0xfd, "tdpbhf8ps">, T_MAP5, XD;
284    def TDPHBF8PS : AMX_FP8_BASE<0xfd, "tdphbf8ps">, T_MAP5, XS;
285    def TDPHF8PS : AMX_FP8_BASE<0xfd, "tdphf8ps">, T_MAP5, PD;
286
287    let usesCustomInserter = 1 in {
288      // Pseudo instructions, using immediates instead of tile registers.
289      // To be translated to the actual instructions in X86ISelLowering.cpp
290      def PTDPBF8PS : PseudoI<(outs),
291                              (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
292                              [(int_x86_tdpbf8ps timm:$src1, timm:$src2,
293                                timm:$src3)]>;
294      def PTDPBHF8PS : PseudoI<(outs),
295                               (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
296                               [(int_x86_tdpbhf8ps timm:$src1, timm:$src2,
297                                 timm:$src3)]>;
298      def PTDPHBF8PS : PseudoI<(outs),
299                               (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
300                               [(int_x86_tdphbf8ps timm:$src1, timm:$src2,
301                                 timm:$src3)]>;
302      def PTDPHF8PS : PseudoI<(outs),
303                              (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
304                              [(int_x86_tdphf8ps timm:$src1, timm:$src2,
305                                timm:$src3)]>;
306    }
307
308    let Constraints = "$src4 = $dst" in {
309      def PTDPBF8PSV : PseudoI<(outs TILE:$dst),
310                               (ins GR16:$src1, GR16:$src2, GR16:$src3,
311                                    TILE:$src4, TILE:$src5, TILE:$src6),
312                               [(set TILE:$dst,
313                                (int_x86_tdpbf8ps_internal GR16:$src1,
314                                 GR16:$src2, GR16:$src3, TILE:$src4,
315                                 TILE:$src5, TILE:$src6))]>;
316      def PTDPBHF8PSV : PseudoI<(outs TILE:$dst),
317                               (ins GR16:$src1, GR16:$src2, GR16:$src3,
318                                    TILE:$src4, TILE:$src5, TILE:$src6),
319                               [(set TILE:$dst,
320                                (int_x86_tdpbhf8ps_internal GR16:$src1,
321                                 GR16:$src2, GR16:$src3, TILE:$src4,
322                                 TILE:$src5, TILE:$src6))]>;
323      def PTDPHBF8PSV : PseudoI<(outs TILE:$dst),
324                               (ins GR16:$src1, GR16:$src2, GR16:$src3,
325                                    TILE:$src4, TILE:$src5, TILE:$src6),
326                               [(set TILE:$dst,
327                                (int_x86_tdphbf8ps_internal GR16:$src1,
328                                 GR16:$src2, GR16:$src3, TILE:$src4,
329                                 TILE:$src5, TILE:$src6))]>;
330      def PTDPHF8PSV : PseudoI<(outs TILE:$dst),
331                               (ins GR16:$src1, GR16:$src2, GR16:$src3,
332                                    TILE:$src4, TILE:$src5, TILE:$src6),
333                               [(set TILE:$dst,
334                                (int_x86_tdphf8ps_internal GR16:$src1,
335                                 GR16:$src2, GR16:$src3, TILE:$src4,
336                                 TILE:$src5, TILE:$src6))]>;
337    }
338  }
339}
340
341let Predicates = [HasAMXTILE, In64BitMode], isPseudo = true, SchedRW = [WriteSystem] in {
342  let mayStore = 1 in
343  def PTILEPAIRSTORE : PseudoI<(outs), (ins opaquemem:$src1, TILEPair:$src2), []>;
344  let mayLoad = 1 in
345  def PTILEPAIRLOAD : PseudoI<(outs TILEPair:$dst), (ins opaquemem:$src), []>;
346}
347
348multiclass T2RPNTLVW_Base<bits<8> op1, bits<8> op2, string rs, string suffix> {
349  def Z0#rs#suffix    : I<op1, MRMSrcMemFSIB, (outs TILEPair:$dst), (ins sibmem:$src),
350                          "t2rpntlvwz0" #!tolower(rs)# "\t{$src, $dst|$dst, $src}", []>, PS;
351  def Z0#rs#T1#suffix : I<op2, MRMSrcMemFSIB, (outs TILEPair:$dst), (ins sibmem:$src),
352                          "t2rpntlvwz0" #!tolower(rs)# "t1\t{$src, $dst|$dst, $src}", []>, PS;
353  def Z1#rs#suffix    : I<op1, MRMSrcMemFSIB, (outs TILEPair:$dst), (ins sibmem:$src),
354                          "t2rpntlvwz1" #!tolower(rs)# "\t{$src, $dst|$dst, $src}", []>, PD;
355  def Z1#rs#T1#suffix : I<op2, MRMSrcMemFSIB, (outs TILEPair:$dst), (ins sibmem:$src),
356                          "t2rpntlvwz1" #!tolower(rs)# "t1\t{$src, $dst|$dst, $src}", []>, PD;
357}
358
359let Predicates = [HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in
360  defm T2RPNTLVW : T2RPNTLVW_Base<0x6e, 0x6f, "", "">, T8, VEX;
361
362let Predicates = [HasAMXTRANSPOSE, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in
363  defm T2RPNTLVW : T2RPNTLVW_Base<0x6e, 0x6f, "", "_EVEX">, T8, EVEX, NoCD8;
364
365let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in
366  defm T2RPNTLVW : T2RPNTLVW_Base<0xf8, 0xf9, "RS", "">, T_MAP5, VEX;
367
368let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in
369  defm T2RPNTLVW : T2RPNTLVW_Base<0xf8, 0xf9, "RS", "_EVEX">, T_MAP5, EVEX, NoCD8;
370
371let Predicates = [HasAMXTRANSPOSE, In64BitMode] in {
372  let SchedRW = [WriteSystem] in {
373    def TTRANSPOSED : I<0x5f, MRMSrcReg, (outs TILE:$dst), (ins TILE:$src),
374                        "ttransposed\t{$src, $dst|$dst, $src}", []>, VEX, T8, XS;
375    let isPseudo = true in {
376      def PT2RPNTLVWZ0V : PseudoI<(outs TILEPair:$dst),
377                                  (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
378                                  []>;
379      def PT2RPNTLVWZ0T1V : PseudoI<(outs TILEPair:$dst),
380                                  (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
381                                  []>;
382      def PT2RPNTLVWZ1V : PseudoI<(outs TILEPair:$dst),
383                                  (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
384                                  []>;
385      def PT2RPNTLVWZ1T1V : PseudoI<(outs TILEPair:$dst),
386                                  (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
387                                  []>;
388    }
389
390    def PTTRANSPOSEDV : PseudoI<(outs TILE:$dst),
391                                (ins GR16:$src1, GR16:$src2, TILE:$src),
392                                [(set TILE: $dst,
393                                 (int_x86_ttransposed_internal GR16:$src1, GR16:$src2,
394                                  TILE:$src))]>;
395
396    let usesCustomInserter = 1 in {
397      def PT2RPNTLVWZ0 : PseudoI<(outs), (ins u8imm:$dst,
398                                 sibmem:$src1), []>;
399      def PT2RPNTLVWZ0T1 : PseudoI<(outs), (ins u8imm:$dst,
400                                   sibmem:$src1), []>;
401      def PT2RPNTLVWZ1 : PseudoI<(outs), (ins u8imm:$dst,
402                                 sibmem:$src1), []>;
403      def PT2RPNTLVWZ1T1 : PseudoI<(outs), (ins u8imm:$dst,
404                                   sibmem:$src1), []>;
405      def PTTRANSPOSED : PseudoI<(outs), (ins u8imm:$dst, u8imm:$src),
406                                 [(int_x86_ttransposed timm:$dst, timm:$src)]>;
407    }
408  }
409} // HasAMXTILE, HasAMXTRANSPOSE
410
411let Predicates = [HasAMXBF16, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in {
412  let Constraints = "$src1 = $dst" in
413    def TTDPBF16PS : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst),
414                       (ins TILE:$src1, TILE:$src2, TILE:$src3),
415                       "ttdpbf16ps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
416                       []>, VEX, VVVV, T8,XS;
417  let Constraints = "$src4 = $dst" in
418    def PTTDPBF16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
419                                GR16:$src2, GR16:$src3, TILE:$src4,
420                                TILE:$src5, TILE:$src6),
421                                [(set TILE: $dst,
422                                  (int_x86_ttdpbf16ps_internal GR16:$src1, GR16:$src2,
423                                   GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
424  let usesCustomInserter = 1 in
425    def PTTDPBF16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
426                              [(int_x86_ttdpbf16ps timm:$src1, timm:$src2, timm:$src3)]>;
427}
428
429let Predicates = [HasAMXFP16, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in {
430  let Constraints = "$src1 = $dst" in
431    def TTDPFP16PS : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst),
432                       (ins TILE:$src1, TILE:$src2, TILE:$src3),
433                       "ttdpfp16ps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
434                       []>, VEX, VVVV, T8,XD;
435  let Constraints = "$src4 = $dst" in
436    def PTTDPFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
437                                GR16:$src2, GR16:$src3, TILE:$src4,
438                                TILE:$src5, TILE:$src6),
439                                [(set TILE: $dst,
440                                  (int_x86_ttdpfp16ps_internal GR16:$src1, GR16:$src2,
441                                   GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
442  let usesCustomInserter = 1 in
443    def PTTDPFP16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
444                              [(int_x86_ttdpfp16ps timm:$src1, timm:$src2, timm:$src3)]>;
445}
446
447let Predicates = [HasAMXCOMPLEX, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in {
448  let Constraints = "$src1 = $dst" in {
449    def TTCMMIMFP16PS : I<0x6b, MRMSrcReg4VOp3, (outs TILE:$dst),
450                          (ins TILE:$src1, TILE:$src2, TILE:$src3),
451                          "ttcmmimfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
452                          []>, VEX, VVVV, T8,XD;
453    def TTCMMRLFP16PS: I<0x6b, MRMSrcReg4VOp3, (outs TILE:$dst),
454                         (ins TILE:$src1, TILE:$src2, TILE:$src3),
455                         "ttcmmrlfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
456                         []>, VEX, VVVV, T8,XS;
457    def TCONJTCMMIMFP16PS : I<0x6b, MRMSrcReg4VOp3, (outs TILE:$dst),
458                          (ins TILE:$src1, TILE:$src2, TILE:$src3),
459                          "tconjtcmmimfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
460                          []>, VEX, VVVV, WIG, T8,PS;
461  }
462  def TCONJTFP16 : I<0x6b, MRMSrcReg, (outs TILE:$dst), (ins TILE:$src),
463                     "tconjtfp16\t{$src, $dst|$dst, $src}", []>, VEX, T8,PD;
464
465  let Constraints = "$src4 = $dst" in {
466    def PTTCMMIMFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
467                                  GR16:$src2, GR16:$src3, TILE:$src4,
468                                  TILE:$src5, TILE:$src6),
469                                  [(set TILE: $dst,
470                                    (int_x86_ttcmmimfp16ps_internal GR16:$src1, GR16:$src2,
471                                     GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
472    def PTTCMMRLFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
473                                  GR16:$src2, GR16:$src3, TILE:$src4,
474                                  TILE:$src5, TILE:$src6),
475                                  [(set TILE: $dst,
476                                    (int_x86_ttcmmrlfp16ps_internal GR16:$src1, GR16:$src2,
477                                     GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
478    def PTCONJTCMMIMFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
479                                      GR16:$src2, GR16:$src3, TILE:$src4,
480                                      TILE:$src5, TILE:$src6),
481                                      [(set TILE: $dst,
482                                        (int_x86_tconjtcmmimfp16ps_internal GR16:$src1, GR16:$src2,
483                                         GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
484  }
485  def PTCONJTFP16V : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2, TILE:$src3),
486                             [(set TILE: $dst, (int_x86_tconjtfp16_internal GR16:$src1, GR16:$src2, TILE:$src3))]>;
487
488  let usesCustomInserter = 1 in {
489    def PTTCMMIMFP16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
490                                 [(int_x86_ttcmmimfp16ps timm:$src1, timm:$src2, timm:$src3)]>;
491    def PTTCMMRLFP16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
492                                 [(int_x86_ttcmmrlfp16ps timm:$src1, timm:$src2, timm:$src3)]>;
493    def PTCONJTCMMIMFP16PS : PseudoI<(outs), (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
494                                     [(int_x86_tconjtcmmimfp16ps timm:$src1, timm:$src2, timm:$src3)]>;
495    def PTCONJTFP16 : PseudoI<(outs), (ins u8imm:$dst, u8imm:$src),
496                              [(int_x86_tconjtfp16 timm:$dst, timm:$src)]>;
497  }
498}
499
500let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in {
501  let isPseudo = true in {
502    def PT2RPNTLVWZ0RSV   : PseudoI<(outs TILEPair:$dst),
503                              (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
504                              []>;
505    def PT2RPNTLVWZ0RST1V : PseudoI<(outs TILEPair:$dst),
506                              (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
507                              []>;
508    def PT2RPNTLVWZ1RSV   : PseudoI<(outs TILEPair:$dst),
509                              (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
510                              []>;
511    def PT2RPNTLVWZ1RST1V : PseudoI<(outs TILEPair:$dst),
512                              (ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
513                              []>;
514  }
515  let  usesCustomInserter = 1 in {
516    def PT2RPNTLVWZ0RS   : PseudoI<(outs), (ins u8imm:$dst, sibmem:$src1), []>;
517    def PT2RPNTLVWZ0RST1 : PseudoI<(outs), (ins u8imm:$dst, sibmem:$src1), []>;
518    def PT2RPNTLVWZ1RS   : PseudoI<(outs), (ins u8imm:$dst, sibmem:$src1), []>;
519    def PT2RPNTLVWZ1RST1 : PseudoI<(outs), (ins u8imm:$dst, sibmem:$src1), []>;
520  }
521} // HasAMXMOVRS, HasAMXTRANSPOSE
522
523multiclass TILELOADDRS_Base<string suffix> {
524  def suffix    : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst), (ins sibmem:$src1),
525                    "tileloaddrs\t{$src1, $dst|$dst, $src1}", []>, T8, XD;
526  def T1#suffix : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst), (ins sibmem:$src1),
527                    "tileloaddrst1\t{$src1, $dst|$dst, $src1}", []>, T8, PD;
528}
529
530let Predicates = [HasAMXMOVRS, In64BitMode], SchedRW = [WriteSystem] in
531  defm TILELOADDRS : TILELOADDRS_Base<"">, VEX;
532
533let Predicates = [HasAMXMOVRS, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in
534  defm TILELOADDRS : TILELOADDRS_Base<"_EVEX">, EVEX, NoCD8;
535
536let Predicates = [HasAMXMOVRS, In64BitMode], SchedRW = [WriteSystem] in {
537  let isPseudo = true, mayLoad = 1 in {
538    def PTILELOADDRSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
539                                                  GR16:$src2,
540                                                  opaquemem:$src3), []>;
541    def PTILELOADDRST1V : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
542                                                    GR16:$src2,
543                                                    opaquemem:$src3), []>;
544  }
545
546  let usesCustomInserter = 1, mayLoad = 1 in {
547    def PTILELOADDRS : PseudoI<(outs), (ins u8imm:$src1, sibmem:$src2), []>;
548    def PTILELOADDRST1 : PseudoI<(outs), (ins u8imm:$src1, sibmem:$src2), []>;
549  }
550} // HasAMXMOVRS, In64BitMode
551
552multiclass m_tcvtrowd2ps {
553  let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
554    let SchedRW = [WriteSystem] in {
555      def rri : Ii8<0x7, MRMSrcReg, (outs VR512:$dst),
556                    (ins TILE:$src1, i32u8imm:$src2),
557                    "tcvtrowd2ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
558                    []>, TA,XS, EVEX, EVEX_V512;
559      def rre : I<0x4A, MRMSrcReg4VOp3, (outs VR512:$dst),
560                  (ins TILE:$src1, GR32:$src2),
561                  "tcvtrowd2ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
562                  []>, T8,XS, EVEX, VVVV, EVEX_V512;
563    }
564  } // HasAMXAVX512, HasAVX10_2_512, In64BitMode
565}
566
567defm TCVTROWD2PS : m_tcvtrowd2ps;
568
569let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
570  let SchedRW = [WriteSystem] in {
571    let  usesCustomInserter = 1 in {
572      def PTCVTROWD2PSrri : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, i32u8imm:$src2),
573                                    [(set VR512:$dst, (int_x86_tcvtrowd2ps timm:$src1, imm:$src2))]>;
574      def PTCVTROWD2PSrre : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, GR32:$src2),
575                                    [(set VR512:$dst, (int_x86_tcvtrowd2ps timm:$src1, GR32:$src2))]>;
576    }
577
578    def PTCVTROWD2PSrriV : PseudoI<(outs VR512:$dst),
579                                   (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
580                                    [(set VR512: $dst,
581                                      (int_x86_tcvtrowd2ps_internal GR16:$src1, GR16:$src2,
582                                       TILE:$src3, imm:$src4))]>;
583    def PTCVTROWD2PSrreV : PseudoI<(outs VR512:$dst),
584                                   (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4),
585                                   [(set VR512: $dst,
586                                     (int_x86_tcvtrowd2ps_internal GR16:$src1, GR16:$src2,
587                                      TILE:$src3, GR32:$src4))]>;
588    def PTCVTROWPS2BF16HrriV : PseudoI<(outs VR512:$dst),
589                                       (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
590                                       [(set VR512: $dst,
591                                         (int_x86_tcvtrowps2bf16h_internal GR16:$src1, GR16:$src2,
592                                          TILE:$src3, imm:$src4))]>;
593    def PTCVTROWPS2BF16HrreV : PseudoI<(outs VR512:$dst),
594                                       (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4),
595                                       [(set VR512: $dst,
596                                         (int_x86_tcvtrowps2bf16h_internal GR16:$src1, GR16:$src2,
597                                          TILE:$src3, GR32:$src4))]>;
598    def PTCVTROWPS2BF16LrriV : PseudoI<(outs VR512:$dst),
599                                       (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
600                                       [(set VR512: $dst,
601                                         (int_x86_tcvtrowps2bf16l_internal GR16:$src1, GR16:$src2,
602                                          TILE:$src3, imm:$src4))]>;
603    def PTCVTROWPS2BF16LrreV : PseudoI<(outs VR512:$dst),
604                                       (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4),
605                                       [(set VR512: $dst,
606                                         (int_x86_tcvtrowps2bf16l_internal GR16:$src1, GR16:$src2,
607                                          TILE:$src3, GR32:$src4))]>;
608    def PTCVTROWPS2PHHrriV : PseudoI<(outs VR512:$dst),
609                                     (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
610                                     [(set VR512: $dst,
611                                       (int_x86_tcvtrowps2phh_internal GR16:$src1, GR16:$src2,
612                                        TILE:$src3, imm:$src4))]>;
613    def PTCVTROWPS2PHHrreV : PseudoI<(outs VR512:$dst),
614                                     (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4),
615                                     [(set VR512: $dst,
616                                       (int_x86_tcvtrowps2phh_internal GR16:$src1, GR16:$src2,
617                                        TILE:$src3, GR32:$src4))]>;
618    def PTCVTROWPS2PHLrriV : PseudoI<(outs VR512:$dst),
619                                     (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
620                                     [(set VR512: $dst,
621                                       (int_x86_tcvtrowps2phl_internal GR16:$src1, GR16:$src2,
622                                        TILE:$src3, imm:$src4))]>;
623    def PTCVTROWPS2PHLrreV : PseudoI<(outs VR512:$dst),
624                                     (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4),
625                                     [(set VR512: $dst,
626                                       (int_x86_tcvtrowps2phl_internal GR16:$src1, GR16:$src2,
627                                        TILE:$src3, GR32:$src4))]>;
628  }
629}
630
631multiclass AMXAVX512_BASE<bits<8> Opcode1, bits<8> Opcode2, string Opstr,
632                                Prefix P1, Prefix P2> {
633  let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode], SchedRW = [WriteSystem] in {
634    let OpPrefix = P1 in
635      def rre : I<Opcode1, MRMSrcReg4VOp3, (outs VR512:$dst),
636                  (ins TILE:$src1, GR32:$src2),
637                  !strconcat(Opstr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
638                  []>, EVEX, VVVV, EVEX_V512, T8;
639    let OpPrefix = P2 in
640      def rri : Ii8<Opcode2, MRMSrcReg, (outs VR512:$dst),
641                    (ins TILE:$src1, i32u8imm:$src2),
642                    !strconcat(Opstr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
643                    []>, EVEX, EVEX_V512, TA;
644    let usesCustomInserter = 1 in {
645      def "P"#NAME#"rre" : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, GR32:$src2),
646                                   [(set VR512:$dst,
647                                    (!cast<Intrinsic>("int_x86_"#Opstr) timm:$src1, GR32:$src2))]>;
648      def "P"#NAME#"rri" : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, i32u8imm:$src2),
649                                   [(set VR512:$dst,
650                                    (!cast<Intrinsic>("int_x86_"#Opstr) timm:$src1, imm:$src2))]>;
651    }
652  }
653}
654
655defm TCVTROWPS2PHH : AMXAVX512_BASE<0x6d, 0x07, "tcvtrowps2phh", PS, PS>;
656defm TCVTROWPS2PHL : AMXAVX512_BASE<0x6d, 0x77, "tcvtrowps2phl", PD, XD>;
657defm TCVTROWPS2BF16H : AMXAVX512_BASE<0x6d, 0x07, "tcvtrowps2bf16h", XD, XD>;
658defm TCVTROWPS2BF16L : AMXAVX512_BASE<0x6d, 0x77, "tcvtrowps2bf16l", XS, XS>;
659
660multiclass m_tilemovrow {
661  let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
662    let SchedRW = [WriteSystem] in {
663      def rri : Ii8<0x7, MRMSrcReg, (outs VR512:$dst),
664                    (ins TILE:$src1, u8imm:$src2),
665                    "tilemovrow\t{$src2, $src1, $dst|$dst, $src1, $src2}",
666                    []>, TA,PD, EVEX, EVEX_V512;
667      def rre : I<0x4A, MRMSrcReg4VOp3, (outs VR512:$dst),
668                  (ins TILE:$src1, GR32:$src2),
669                  "tilemovrow\t{$src2, $src1, $dst|$dst, $src1, $src2}",
670                  []>, T8,PD, EVEX, VVVV, EVEX_V512;
671    }
672  } // HasAMXAVX512, HasAVX10_2_512, In64BitMode
673}
674
675defm TILEMOVROW : m_tilemovrow;
676
677let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
678  let SchedRW = [WriteSystem] in {
679    let  usesCustomInserter = 1 in {
680      def PTILEMOVROWrri : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, i32u8imm:$src2),
681                                   [(set VR512:$dst, (int_x86_tilemovrow timm:$src1, imm:$src2))]>;
682      def PTILEMOVROWrre : PseudoI<(outs VR512:$dst), (ins u8imm:$src1, GR32:$src2),
683                                   [(set VR512:$dst, (int_x86_tilemovrow timm:$src1, GR32:$src2))]>;
684    }
685
686    def PTILEMOVROWrriV : PseudoI<(outs VR512:$dst),
687                                  (ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
688                                  [(set VR512: $dst,
689                                    (int_x86_tilemovrow_internal GR16:$src1, GR16:$src2,
690                                     TILE:$src3, imm:$src4))]>;
691    def PTILEMOVROWrreV : PseudoI<(outs VR512:$dst),
692                                  (ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4),
693                                  [(set VR512: $dst,
694                                    (int_x86_tilemovrow_internal GR16:$src1, GR16:$src2,
695                                     TILE:$src3, GR32:$src4))]>;
696  }
697}
698
699let Predicates = [HasAMXTF32, In64BitMode] in {
700  let SchedRW = [WriteSystem] in {
701    let Constraints = "$src1 = $dst" in {
702      def TMMULTF32PS: I<0x48, MRMSrcReg4VOp3, (outs TILE:$dst),
703                         (ins TILE:$src1, TILE:$src2, TILE:$src3),
704                         "tmmultf32ps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
705                         []>, VEX, VVVV, T8, PD;
706    }
707    let Constraints = "$src4 = $dst" in {
708      def PTMMULTF32PSV : PseudoI<(outs TILE:$dst),
709                                  (ins GR16:$src1, GR16:$src2, GR16:$src3,
710                                   TILE:$src4, TILE:$src5, TILE:$src6),
711                                  [(set TILE:$dst,
712                                    (int_x86_tmmultf32ps_internal GR16:$src1,
713                                     GR16:$src2, GR16:$src3, TILE:$src4,
714                                     TILE:$src5, TILE:$src6))]>;
715    }
716    let usesCustomInserter = 1 in {
717      def PTMMULTF32PS : PseudoI<(outs),
718                                 (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
719                                 [(int_x86_tmmultf32ps timm:$src1, timm:$src2,
720                                   timm:$src3)]>;
721    }
722  } // SchedRW = [WriteSystem]
723} // HasAMXTF32
724
725let Predicates = [HasAMXTF32, HasAMXTRANSPOSE, In64BitMode] in {
726  let SchedRW = [WriteSystem] in {
727    let Constraints = "$src1 = $dst" in {
728      def TTMMULTF32PS: I<0x48, MRMSrcReg4VOp3, (outs TILE:$dst),
729                         (ins TILE:$src1, TILE:$src2, TILE:$src3),
730                         "ttmmultf32ps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
731                         []>, VEX, VVVV, T8, PS;
732    }
733    let Constraints = "$src4 = $dst" in {
734      def PTTMMULTF32PSV : PseudoI<(outs TILE:$dst),
735                                   (ins GR16:$src1, GR16:$src2, GR16:$src3,
736                                    TILE:$src4, TILE:$src5, TILE:$src6),
737                                   [(set TILE:$dst,
738                                     (int_x86_ttmmultf32ps_internal GR16:$src1,
739                                      GR16:$src2, GR16:$src3, TILE:$src4,
740                                      TILE:$src5, TILE:$src6))]>;
741    }
742    let usesCustomInserter = 1 in {
743      def PTTMMULTF32PS : PseudoI<(outs),
744                                  (ins u8imm:$src1, u8imm:$src2, u8imm:$src3),
745                                  [(int_x86_ttmmultf32ps timm:$src1, timm:$src2,
746                                    timm:$src3)]>;
747    }
748  } // SchedRW = [WriteSystem]
749} // HasAMXTF32, HasAMXTRANSPOSE
750