xref: /llvm-project/llvm/test/CodeGen/X86/AMX/amx-fastconfig.mir (revision 214ff5036cb407222e6ff34ef2c1eeef55c70b4a)
1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2# RUN: llc -mtriple=x86_64-- -run-pass=fastpretileconfig -o - %s | FileCheck %s
3
4--- |
5  target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
6  target triple = "x86_64-unknown-unknown"
7
8  @buf = dso_local global [1024 x i8] zeroinitializer, align 16
9  @buf2 = dso_local global [1024 x i8] zeroinitializer, align 16
10
11  define dso_local void @test_api(i32 %cond, i16 signext %row, i16 signext %col) local_unnamed_addr #0 {
12  entry:
13    %tobool.not = icmp eq i32 %cond, 0
14    br i1 %tobool.not, label %if.else, label %if.then
15
16  if.then:                                          ; preds = %entry
17    %0 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 8, ptr @buf, i64 32)
18    %1 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 %col, ptr @buf, i64 32)
19    %2 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 %col, ptr @buf, i64 32)
20    br label %if.end
21
22  if.else:                                          ; preds = %entry
23    %3 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 8, ptr @buf2, i64 32)
24    %4 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 %col, ptr @buf2, i64 32)
25    %5 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 %col, ptr @buf2, i64 32)
26    br label %if.end
27
28  if.end:                                           ; preds = %if.else, %if.then
29    %a.sroa.1094.0.in = phi x86_amx [ %3, %if.else ], [ %0, %if.then ]
30    %b.sroa.1069.0.in = phi x86_amx [ %4, %if.else ], [ %1, %if.then ]
31    %c.sroa.1044.0.in = phi x86_amx [ %5, %if.else ], [ %2, %if.then ]
32    %6 = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 %row, i16 %col, i16 8, x86_amx %c.sroa.1044.0.in, x86_amx %a.sroa.1094.0.in, x86_amx %b.sroa.1069.0.in)
33    tail call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col, ptr @buf, i64 32, x86_amx %6)
34    ret void
35  }
36
37  declare x86_amx @llvm.x86.tileloadd64.internal(i16, i16, ptr, i64) #1
38  declare x86_amx @llvm.x86.tdpbssd.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx) #1
39  declare void @llvm.x86.tilestored64.internal(i16, i16, ptr, i64, x86_amx) #1
40
41  attributes #0 = { "target-features"="+amx-int8,+avx512f" }
42  attributes #1 = { nounwind "target-features"="+amx-int8,+avx512f" }
43
44...
45---
46name:            test_api
47alignment:       16
48tracksRegLiveness: true
49registers:
50  - { id: 0, class: tile }
51  - { id: 1, class: tile }
52  - { id: 2, class: tile }
53  - { id: 3, class: tile }
54  - { id: 4, class: tile }
55  - { id: 5, class: tile }
56  - { id: 6, class: tile }
57  - { id: 7, class: tile }
58  - { id: 8, class: tile }
59  - { id: 9, class: gr32 }
60  - { id: 10, class: gr32 }
61  - { id: 11, class: gr32 }
62  - { id: 12, class: gr16 }
63  - { id: 13, class: gr16 }
64  - { id: 14, class: gr64 }
65  - { id: 15, class: gr64_nosp }
66  - { id: 16, class: gr16 }
67  - { id: 17, class: gr64 }
68  - { id: 18, class: gr64_nosp }
69  - { id: 19, class: gr16 }
70  - { id: 20, class: gr16 }
71  - { id: 21, class: tile }
72  - { id: 22, class: gr64 }
73  - { id: 23, class: gr64_nosp }
74liveins:
75  - { reg: '$edi', virtual-reg: '%9' }
76  - { reg: '$esi', virtual-reg: '%10' }
77  - { reg: '$edx', virtual-reg: '%11' }
78frameInfo:
79  maxAlignment:    1
80machineFunctionInfo:
81  amxProgModel: ManagedRA
82body:             |
83  ; CHECK-LABEL: name: test_api
84  ; CHECK: bb.0.entry:
85  ; CHECK-NEXT:   successors: %bb.2(0x30000000), %bb.1(0x50000000)
86  ; CHECK-NEXT:   liveins: $edi, $esi, $edx
87  ; CHECK-NEXT: {{  $}}
88  ; CHECK-NEXT:   [[AVX512_512_SET0_:%[0-9]+]]:vr512 = AVX512_512_SET0
89  ; CHECK-NEXT:   VMOVUPSZmr %stack.3, 1, $noreg, 0, $noreg, [[AVX512_512_SET0_]] :: (store (s512) into %stack.3, align 4)
90  ; CHECK-NEXT:   MOV8mi %stack.3, 1, $noreg, 0, $noreg, 1 :: (store (s512) into %stack.3, align 4)
91  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gr32 = COPY killed $edx
92  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gr32 = COPY killed $esi
93  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:gr32 = COPY killed $edi
94  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:gr16 = COPY killed [[COPY]].sub_16bit
95  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:gr16 = COPY killed [[COPY1]].sub_16bit
96  ; CHECK-NEXT:   TEST32rr killed [[COPY2]], [[COPY2]], implicit-def $eflags
97  ; CHECK-NEXT:   JCC_1 %bb.2, 4, implicit killed $eflags
98  ; CHECK-NEXT:   JMP_1 %bb.1
99  ; CHECK-NEXT: {{  $}}
100  ; CHECK-NEXT: bb.1.if.then:
101  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
102  ; CHECK-NEXT: {{  $}}
103  ; CHECK-NEXT:   [[MOV32ri64_:%[0-9]+]]:gr64 = MOV32ri64 @buf
104  ; CHECK-NEXT:   [[MOV32ri64_1:%[0-9]+]]:gr64_nosp = MOV32ri64 32
105  ; CHECK-NEXT:   [[MOV16ri:%[0-9]+]]:gr16 = MOV16ri 8
106  ; CHECK-NEXT:   PLDTILECFGV %stack.3, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.3, align 4)
107  ; CHECK-NEXT:   [[LEA64r:%[0-9]+]]:gr64_nosp = LEA64r %stack.2, 1, $noreg, 0, $noreg
108  ; CHECK-NEXT:   [[PTILELOADDV:%[0-9]+]]:tile = PTILELOADDV [[COPY4]], [[MOV16ri]], [[MOV32ri64_]], 1, [[MOV32ri64_1]], 0, $noreg
109  ; CHECK-NEXT:   [[MOV64ri:%[0-9]+]]:gr64_nosp = MOV64ri 64
110  ; CHECK-NEXT:   TILESTORED %stack.2, 1, killed [[MOV64ri]], 0, $noreg, [[PTILELOADDV]] :: (store (s8192) into %stack.2)
111  ; CHECK-NEXT:   [[LEA64r1:%[0-9]+]]:gr64_nosp = LEA64r %stack.1, 1, $noreg, 0, $noreg
112  ; CHECK-NEXT:   [[PTILELOADDV1:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri]], [[COPY3]], [[MOV32ri64_]], 1, [[MOV32ri64_1]], 0, $noreg
113  ; CHECK-NEXT:   [[MOV64ri1:%[0-9]+]]:gr64_nosp = MOV64ri 64
114  ; CHECK-NEXT:   TILESTORED %stack.1, 1, killed [[MOV64ri1]], 0, $noreg, [[PTILELOADDV1]] :: (store (s8192) into %stack.1)
115  ; CHECK-NEXT:   [[LEA64r2:%[0-9]+]]:gr64_nosp = LEA64r %stack.0, 1, $noreg, 0, $noreg
116  ; CHECK-NEXT:   [[PTILELOADDV2:%[0-9]+]]:tile = PTILELOADDV [[COPY4]], [[COPY3]], killed [[MOV32ri64_]], 1, killed [[MOV32ri64_1]], 0, $noreg
117  ; CHECK-NEXT:   [[MOV64ri2:%[0-9]+]]:gr64_nosp = MOV64ri 64
118  ; CHECK-NEXT:   TILESTORED %stack.0, 1, killed [[MOV64ri2]], 0, $noreg, [[PTILELOADDV2]] :: (store (s8192) into %stack.0)
119  ; CHECK-NEXT:   JMP_1 %bb.3
120  ; CHECK-NEXT: {{  $}}
121  ; CHECK-NEXT: bb.2.if.else:
122  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
123  ; CHECK-NEXT: {{  $}}
124  ; CHECK-NEXT:   [[MOV32ri64_2:%[0-9]+]]:gr64 = MOV32ri64 @buf2
125  ; CHECK-NEXT:   [[MOV32ri64_3:%[0-9]+]]:gr64_nosp = MOV32ri64 32
126  ; CHECK-NEXT:   [[MOV16ri1:%[0-9]+]]:gr16 = MOV16ri 8
127  ; CHECK-NEXT:   PLDTILECFGV %stack.3, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.3, align 4)
128  ; CHECK-NEXT:   [[LEA64r3:%[0-9]+]]:gr64_nosp = LEA64r %stack.6, 1, $noreg, 0, $noreg
129  ; CHECK-NEXT:   [[PTILELOADDV3:%[0-9]+]]:tile = PTILELOADDV [[COPY4]], [[MOV16ri1]], [[MOV32ri64_2]], 1, [[MOV32ri64_3]], 0, $noreg
130  ; CHECK-NEXT:   [[MOV64ri3:%[0-9]+]]:gr64_nosp = MOV64ri 64
131  ; CHECK-NEXT:   TILESTORED %stack.6, 1, killed [[MOV64ri3]], 0, $noreg, [[PTILELOADDV3]] :: (store (s8192) into %stack.6)
132  ; CHECK-NEXT:   [[LEA64r4:%[0-9]+]]:gr64_nosp = LEA64r %stack.5, 1, $noreg, 0, $noreg
133  ; CHECK-NEXT:   [[PTILELOADDV4:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri1]], [[COPY3]], [[MOV32ri64_2]], 1, [[MOV32ri64_3]], 0, $noreg
134  ; CHECK-NEXT:   [[MOV64ri4:%[0-9]+]]:gr64_nosp = MOV64ri 64
135  ; CHECK-NEXT:   TILESTORED %stack.5, 1, killed [[MOV64ri4]], 0, $noreg, [[PTILELOADDV4]] :: (store (s8192) into %stack.5)
136  ; CHECK-NEXT:   [[LEA64r5:%[0-9]+]]:gr64_nosp = LEA64r %stack.4, 1, $noreg, 0, $noreg
137  ; CHECK-NEXT:   [[PTILELOADDV5:%[0-9]+]]:tile = PTILELOADDV [[COPY4]], [[COPY3]], killed [[MOV32ri64_2]], 1, killed [[MOV32ri64_3]], 0, $noreg
138  ; CHECK-NEXT:   [[MOV64ri5:%[0-9]+]]:gr64_nosp = MOV64ri 64
139  ; CHECK-NEXT:   TILESTORED %stack.4, 1, killed [[MOV64ri5]], 0, $noreg, [[PTILELOADDV5]] :: (store (s8192) into %stack.4)
140  ; CHECK-NEXT: {{  $}}
141  ; CHECK-NEXT: bb.3.if.end:
142  ; CHECK-NEXT:   [[PHI:%[0-9]+]]:gr16 = PHI [[MOV16ri]], %bb.1, [[MOV16ri1]], %bb.2
143  ; CHECK-NEXT:   [[PHI1:%[0-9]+]]:gr16 = PHI [[COPY4]], %bb.1, [[COPY4]], %bb.2
144  ; CHECK-NEXT:   [[PHI2:%[0-9]+]]:gr64_nosp = PHI [[LEA64r]], %bb.1, [[LEA64r3]], %bb.2
145  ; CHECK-NEXT:   [[PHI3:%[0-9]+]]:gr16 = PHI [[COPY3]], %bb.1, [[COPY3]], %bb.2
146  ; CHECK-NEXT:   [[PHI4:%[0-9]+]]:gr16 = PHI [[MOV16ri]], %bb.1, [[MOV16ri1]], %bb.2
147  ; CHECK-NEXT:   [[PHI5:%[0-9]+]]:gr64_nosp = PHI [[LEA64r1]], %bb.1, [[LEA64r4]], %bb.2
148  ; CHECK-NEXT:   [[PHI6:%[0-9]+]]:gr16 = PHI [[COPY3]], %bb.1, [[COPY3]], %bb.2
149  ; CHECK-NEXT:   [[PHI7:%[0-9]+]]:gr16 = PHI [[COPY4]], %bb.1, [[COPY4]], %bb.2
150  ; CHECK-NEXT:   [[PHI8:%[0-9]+]]:gr64_nosp = PHI [[LEA64r2]], %bb.1, [[LEA64r5]], %bb.2
151  ; CHECK-NEXT:   PLDTILECFGV %stack.3, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.3, align 4)
152  ; CHECK-NEXT:   [[MOV64ri6:%[0-9]+]]:gr64_nosp = MOV64ri 64
153  ; CHECK-NEXT:   [[PTILELOADDV6:%[0-9]+]]:tile = PTILELOADDV [[PHI1]], [[PHI]], [[PHI2]], 1, killed [[MOV64ri6]], 0, $noreg
154  ; CHECK-NEXT:   [[MOV64ri7:%[0-9]+]]:gr64_nosp = MOV64ri 64
155  ; CHECK-NEXT:   [[PTILELOADDV7:%[0-9]+]]:tile = PTILELOADDV [[PHI4]], [[PHI3]], [[PHI5]], 1, killed [[MOV64ri7]], 0, $noreg
156  ; CHECK-NEXT:   [[MOV64ri8:%[0-9]+]]:gr64_nosp = MOV64ri 64
157  ; CHECK-NEXT:   [[PTILELOADDV8:%[0-9]+]]:tile = PTILELOADDV [[PHI7]], [[PHI6]], [[PHI8]], 1, killed [[MOV64ri8]], 0, $noreg
158  ; CHECK-NEXT:   [[MOV16ri2:%[0-9]+]]:gr16 = MOV16ri 8
159  ; CHECK-NEXT:   [[PTDPBSSDV:%[0-9]+]]:tile = PTDPBSSDV [[COPY4]], [[COPY3]], killed [[MOV16ri2]], killed [[PTILELOADDV8]], killed [[PTILELOADDV6]], killed [[PTILELOADDV7]]
160  ; CHECK-NEXT:   [[MOV32ri64_4:%[0-9]+]]:gr64 = MOV32ri64 @buf
161  ; CHECK-NEXT:   [[MOV32ri64_5:%[0-9]+]]:gr64_nosp = MOV32ri64 32
162  ; CHECK-NEXT:   PTILESTOREDV killed [[COPY4]], killed [[COPY3]], killed [[MOV32ri64_4]], 1, killed [[MOV32ri64_5]], 0, $noreg, killed [[PTDPBSSDV]]
163  ; CHECK-NEXT:   RET 0
164  bb.0.entry:
165    successors: %bb.2(0x30000000), %bb.1(0x50000000)
166    liveins: $edi, $esi, $edx
167
168
169    %11:gr32 = COPY killed $edx
170    %10:gr32 = COPY killed $esi
171    %9:gr32 = COPY killed $edi
172    %13:gr16 = COPY killed %11.sub_16bit
173    %12:gr16 = COPY killed %10.sub_16bit
174    TEST32rr killed %9, %9, implicit-def $eflags
175    JCC_1 %bb.2, 4, implicit killed $eflags
176    JMP_1 %bb.1
177
178  bb.1.if.then:
179    %14:gr64 = MOV32ri64 @buf
180    %15:gr64_nosp = MOV32ri64 32
181    %16:gr16 = MOV16ri 8
182    %0:tile = PTILELOADDV %12, %16, %14, 1, %15, 0, $noreg
183    %1:tile = PTILELOADDV killed %16, %13, %14, 1, %15, 0, $noreg
184    %2:tile = PTILELOADDV %12, %13, killed %14, 1, killed %15, 0, $noreg
185    JMP_1 %bb.3
186
187  bb.2.if.else:
188    %17:gr64 = MOV32ri64 @buf2
189    %18:gr64_nosp = MOV32ri64 32
190    %19:gr16 = MOV16ri 8
191    %3:tile = PTILELOADDV %12, %19, %17, 1, %18, 0, $noreg
192    %4:tile = PTILELOADDV killed %19, %13, %17, 1, %18, 0, $noreg
193    %5:tile = PTILELOADDV %12, %13, killed %17, 1, killed %18, 0, $noreg
194
195  bb.3.if.end:
196
197
198    %6:tile = PHI %0, %bb.1, %3, %bb.2
199    %7:tile = PHI %1, %bb.1, %4, %bb.2
200    %8:tile = PHI %2, %bb.1, %5, %bb.2
201    %20:gr16 = MOV16ri 8
202    %21:tile = PTDPBSSDV %12, %13, killed %20, killed %8, killed %6, killed %7
203    %22:gr64 = MOV32ri64 @buf
204    %23:gr64_nosp = MOV32ri64 32
205    PTILESTOREDV killed %12, killed %13, killed %22, 1, killed %23, 0, $noreg, killed %21
206    RET 0
207
208...
209