1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2# RUN: llc -mtriple=x86_64-- -run-pass=fastpretileconfig -o - %s | FileCheck %s 3 4--- | 5 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" 6 target triple = "x86_64-unknown-unknown" 7 8 @buf = dso_local global [1024 x i8] zeroinitializer, align 16 9 @buf2 = dso_local global [1024 x i8] zeroinitializer, align 16 10 11 define dso_local void @test_api(i32 %cond, i16 signext %row, i16 signext %col) local_unnamed_addr #0 { 12 entry: 13 %tobool.not = icmp eq i32 %cond, 0 14 br i1 %tobool.not, label %if.else, label %if.then 15 16 if.then: ; preds = %entry 17 %0 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 8, ptr @buf, i64 32) 18 %1 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 %col, ptr @buf, i64 32) 19 %2 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 %col, ptr @buf, i64 32) 20 br label %if.end 21 22 if.else: ; preds = %entry 23 %3 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 8, ptr @buf2, i64 32) 24 %4 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 %col, ptr @buf2, i64 32) 25 %5 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 %col, ptr @buf2, i64 32) 26 br label %if.end 27 28 if.end: ; preds = %if.else, %if.then 29 %a.sroa.1094.0.in = phi x86_amx [ %3, %if.else ], [ %0, %if.then ] 30 %b.sroa.1069.0.in = phi x86_amx [ %4, %if.else ], [ %1, %if.then ] 31 %c.sroa.1044.0.in = phi x86_amx [ %5, %if.else ], [ %2, %if.then ] 32 %6 = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 %row, i16 %col, i16 8, x86_amx %c.sroa.1044.0.in, x86_amx %a.sroa.1094.0.in, x86_amx %b.sroa.1069.0.in) 33 tail call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col, ptr @buf, i64 32, x86_amx %6) 34 ret void 35 } 36 37 declare x86_amx @llvm.x86.tileloadd64.internal(i16, i16, ptr, i64) #1 38 declare x86_amx @llvm.x86.tdpbssd.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx) #1 39 declare void @llvm.x86.tilestored64.internal(i16, i16, ptr, i64, x86_amx) #1 40 41 attributes #0 = { "target-features"="+amx-int8,+avx512f" } 42 attributes #1 = { nounwind "target-features"="+amx-int8,+avx512f" } 43 44... 45--- 46name: test_api 47alignment: 16 48tracksRegLiveness: true 49registers: 50 - { id: 0, class: tile } 51 - { id: 1, class: tile } 52 - { id: 2, class: tile } 53 - { id: 3, class: tile } 54 - { id: 4, class: tile } 55 - { id: 5, class: tile } 56 - { id: 6, class: tile } 57 - { id: 7, class: tile } 58 - { id: 8, class: tile } 59 - { id: 9, class: gr32 } 60 - { id: 10, class: gr32 } 61 - { id: 11, class: gr32 } 62 - { id: 12, class: gr16 } 63 - { id: 13, class: gr16 } 64 - { id: 14, class: gr64 } 65 - { id: 15, class: gr64_nosp } 66 - { id: 16, class: gr16 } 67 - { id: 17, class: gr64 } 68 - { id: 18, class: gr64_nosp } 69 - { id: 19, class: gr16 } 70 - { id: 20, class: gr16 } 71 - { id: 21, class: tile } 72 - { id: 22, class: gr64 } 73 - { id: 23, class: gr64_nosp } 74liveins: 75 - { reg: '$edi', virtual-reg: '%9' } 76 - { reg: '$esi', virtual-reg: '%10' } 77 - { reg: '$edx', virtual-reg: '%11' } 78frameInfo: 79 maxAlignment: 1 80machineFunctionInfo: 81 amxProgModel: ManagedRA 82body: | 83 ; CHECK-LABEL: name: test_api 84 ; CHECK: bb.0.entry: 85 ; CHECK-NEXT: successors: %bb.2(0x30000000), %bb.1(0x50000000) 86 ; CHECK-NEXT: liveins: $edi, $esi, $edx 87 ; CHECK-NEXT: {{ $}} 88 ; CHECK-NEXT: [[AVX512_512_SET0_:%[0-9]+]]:vr512 = AVX512_512_SET0 89 ; CHECK-NEXT: VMOVUPSZmr %stack.3, 1, $noreg, 0, $noreg, [[AVX512_512_SET0_]] :: (store (s512) into %stack.3, align 4) 90 ; CHECK-NEXT: MOV8mi %stack.3, 1, $noreg, 0, $noreg, 1 :: (store (s512) into %stack.3, align 4) 91 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY killed $edx 92 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY killed $esi 93 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY killed $edi 94 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr16 = COPY killed [[COPY]].sub_16bit 95 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr16 = COPY killed [[COPY1]].sub_16bit 96 ; CHECK-NEXT: TEST32rr killed [[COPY2]], [[COPY2]], implicit-def $eflags 97 ; CHECK-NEXT: JCC_1 %bb.2, 4, implicit killed $eflags 98 ; CHECK-NEXT: JMP_1 %bb.1 99 ; CHECK-NEXT: {{ $}} 100 ; CHECK-NEXT: bb.1.if.then: 101 ; CHECK-NEXT: successors: %bb.3(0x80000000) 102 ; CHECK-NEXT: {{ $}} 103 ; CHECK-NEXT: [[MOV32ri64_:%[0-9]+]]:gr64 = MOV32ri64 @buf 104 ; CHECK-NEXT: [[MOV32ri64_1:%[0-9]+]]:gr64_nosp = MOV32ri64 32 105 ; CHECK-NEXT: [[MOV16ri:%[0-9]+]]:gr16 = MOV16ri 8 106 ; CHECK-NEXT: PLDTILECFGV %stack.3, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.3, align 4) 107 ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64_nosp = LEA64r %stack.2, 1, $noreg, 0, $noreg 108 ; CHECK-NEXT: [[PTILELOADDV:%[0-9]+]]:tile = PTILELOADDV [[COPY4]], [[MOV16ri]], [[MOV32ri64_]], 1, [[MOV32ri64_1]], 0, $noreg 109 ; CHECK-NEXT: [[MOV64ri:%[0-9]+]]:gr64_nosp = MOV64ri 64 110 ; CHECK-NEXT: TILESTORED %stack.2, 1, killed [[MOV64ri]], 0, $noreg, [[PTILELOADDV]] :: (store (s8192) into %stack.2) 111 ; CHECK-NEXT: [[LEA64r1:%[0-9]+]]:gr64_nosp = LEA64r %stack.1, 1, $noreg, 0, $noreg 112 ; CHECK-NEXT: [[PTILELOADDV1:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri]], [[COPY3]], [[MOV32ri64_]], 1, [[MOV32ri64_1]], 0, $noreg 113 ; CHECK-NEXT: [[MOV64ri1:%[0-9]+]]:gr64_nosp = MOV64ri 64 114 ; CHECK-NEXT: TILESTORED %stack.1, 1, killed [[MOV64ri1]], 0, $noreg, [[PTILELOADDV1]] :: (store (s8192) into %stack.1) 115 ; CHECK-NEXT: [[LEA64r2:%[0-9]+]]:gr64_nosp = LEA64r %stack.0, 1, $noreg, 0, $noreg 116 ; CHECK-NEXT: [[PTILELOADDV2:%[0-9]+]]:tile = PTILELOADDV [[COPY4]], [[COPY3]], killed [[MOV32ri64_]], 1, killed [[MOV32ri64_1]], 0, $noreg 117 ; CHECK-NEXT: [[MOV64ri2:%[0-9]+]]:gr64_nosp = MOV64ri 64 118 ; CHECK-NEXT: TILESTORED %stack.0, 1, killed [[MOV64ri2]], 0, $noreg, [[PTILELOADDV2]] :: (store (s8192) into %stack.0) 119 ; CHECK-NEXT: JMP_1 %bb.3 120 ; CHECK-NEXT: {{ $}} 121 ; CHECK-NEXT: bb.2.if.else: 122 ; CHECK-NEXT: successors: %bb.3(0x80000000) 123 ; CHECK-NEXT: {{ $}} 124 ; CHECK-NEXT: [[MOV32ri64_2:%[0-9]+]]:gr64 = MOV32ri64 @buf2 125 ; CHECK-NEXT: [[MOV32ri64_3:%[0-9]+]]:gr64_nosp = MOV32ri64 32 126 ; CHECK-NEXT: [[MOV16ri1:%[0-9]+]]:gr16 = MOV16ri 8 127 ; CHECK-NEXT: PLDTILECFGV %stack.3, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.3, align 4) 128 ; CHECK-NEXT: [[LEA64r3:%[0-9]+]]:gr64_nosp = LEA64r %stack.6, 1, $noreg, 0, $noreg 129 ; CHECK-NEXT: [[PTILELOADDV3:%[0-9]+]]:tile = PTILELOADDV [[COPY4]], [[MOV16ri1]], [[MOV32ri64_2]], 1, [[MOV32ri64_3]], 0, $noreg 130 ; CHECK-NEXT: [[MOV64ri3:%[0-9]+]]:gr64_nosp = MOV64ri 64 131 ; CHECK-NEXT: TILESTORED %stack.6, 1, killed [[MOV64ri3]], 0, $noreg, [[PTILELOADDV3]] :: (store (s8192) into %stack.6) 132 ; CHECK-NEXT: [[LEA64r4:%[0-9]+]]:gr64_nosp = LEA64r %stack.5, 1, $noreg, 0, $noreg 133 ; CHECK-NEXT: [[PTILELOADDV4:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri1]], [[COPY3]], [[MOV32ri64_2]], 1, [[MOV32ri64_3]], 0, $noreg 134 ; CHECK-NEXT: [[MOV64ri4:%[0-9]+]]:gr64_nosp = MOV64ri 64 135 ; CHECK-NEXT: TILESTORED %stack.5, 1, killed [[MOV64ri4]], 0, $noreg, [[PTILELOADDV4]] :: (store (s8192) into %stack.5) 136 ; CHECK-NEXT: [[LEA64r5:%[0-9]+]]:gr64_nosp = LEA64r %stack.4, 1, $noreg, 0, $noreg 137 ; CHECK-NEXT: [[PTILELOADDV5:%[0-9]+]]:tile = PTILELOADDV [[COPY4]], [[COPY3]], killed [[MOV32ri64_2]], 1, killed [[MOV32ri64_3]], 0, $noreg 138 ; CHECK-NEXT: [[MOV64ri5:%[0-9]+]]:gr64_nosp = MOV64ri 64 139 ; CHECK-NEXT: TILESTORED %stack.4, 1, killed [[MOV64ri5]], 0, $noreg, [[PTILELOADDV5]] :: (store (s8192) into %stack.4) 140 ; CHECK-NEXT: {{ $}} 141 ; CHECK-NEXT: bb.3.if.end: 142 ; CHECK-NEXT: [[PHI:%[0-9]+]]:gr16 = PHI [[MOV16ri]], %bb.1, [[MOV16ri1]], %bb.2 143 ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gr16 = PHI [[COPY4]], %bb.1, [[COPY4]], %bb.2 144 ; CHECK-NEXT: [[PHI2:%[0-9]+]]:gr64_nosp = PHI [[LEA64r]], %bb.1, [[LEA64r3]], %bb.2 145 ; CHECK-NEXT: [[PHI3:%[0-9]+]]:gr16 = PHI [[COPY3]], %bb.1, [[COPY3]], %bb.2 146 ; CHECK-NEXT: [[PHI4:%[0-9]+]]:gr16 = PHI [[MOV16ri]], %bb.1, [[MOV16ri1]], %bb.2 147 ; CHECK-NEXT: [[PHI5:%[0-9]+]]:gr64_nosp = PHI [[LEA64r1]], %bb.1, [[LEA64r4]], %bb.2 148 ; CHECK-NEXT: [[PHI6:%[0-9]+]]:gr16 = PHI [[COPY3]], %bb.1, [[COPY3]], %bb.2 149 ; CHECK-NEXT: [[PHI7:%[0-9]+]]:gr16 = PHI [[COPY4]], %bb.1, [[COPY4]], %bb.2 150 ; CHECK-NEXT: [[PHI8:%[0-9]+]]:gr64_nosp = PHI [[LEA64r2]], %bb.1, [[LEA64r5]], %bb.2 151 ; CHECK-NEXT: PLDTILECFGV %stack.3, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.3, align 4) 152 ; CHECK-NEXT: [[MOV64ri6:%[0-9]+]]:gr64_nosp = MOV64ri 64 153 ; CHECK-NEXT: [[PTILELOADDV6:%[0-9]+]]:tile = PTILELOADDV [[PHI1]], [[PHI]], [[PHI2]], 1, killed [[MOV64ri6]], 0, $noreg 154 ; CHECK-NEXT: [[MOV64ri7:%[0-9]+]]:gr64_nosp = MOV64ri 64 155 ; CHECK-NEXT: [[PTILELOADDV7:%[0-9]+]]:tile = PTILELOADDV [[PHI4]], [[PHI3]], [[PHI5]], 1, killed [[MOV64ri7]], 0, $noreg 156 ; CHECK-NEXT: [[MOV64ri8:%[0-9]+]]:gr64_nosp = MOV64ri 64 157 ; CHECK-NEXT: [[PTILELOADDV8:%[0-9]+]]:tile = PTILELOADDV [[PHI7]], [[PHI6]], [[PHI8]], 1, killed [[MOV64ri8]], 0, $noreg 158 ; CHECK-NEXT: [[MOV16ri2:%[0-9]+]]:gr16 = MOV16ri 8 159 ; CHECK-NEXT: [[PTDPBSSDV:%[0-9]+]]:tile = PTDPBSSDV [[COPY4]], [[COPY3]], killed [[MOV16ri2]], killed [[PTILELOADDV8]], killed [[PTILELOADDV6]], killed [[PTILELOADDV7]] 160 ; CHECK-NEXT: [[MOV32ri64_4:%[0-9]+]]:gr64 = MOV32ri64 @buf 161 ; CHECK-NEXT: [[MOV32ri64_5:%[0-9]+]]:gr64_nosp = MOV32ri64 32 162 ; CHECK-NEXT: PTILESTOREDV killed [[COPY4]], killed [[COPY3]], killed [[MOV32ri64_4]], 1, killed [[MOV32ri64_5]], 0, $noreg, killed [[PTDPBSSDV]] 163 ; CHECK-NEXT: RET 0 164 bb.0.entry: 165 successors: %bb.2(0x30000000), %bb.1(0x50000000) 166 liveins: $edi, $esi, $edx 167 168 169 %11:gr32 = COPY killed $edx 170 %10:gr32 = COPY killed $esi 171 %9:gr32 = COPY killed $edi 172 %13:gr16 = COPY killed %11.sub_16bit 173 %12:gr16 = COPY killed %10.sub_16bit 174 TEST32rr killed %9, %9, implicit-def $eflags 175 JCC_1 %bb.2, 4, implicit killed $eflags 176 JMP_1 %bb.1 177 178 bb.1.if.then: 179 %14:gr64 = MOV32ri64 @buf 180 %15:gr64_nosp = MOV32ri64 32 181 %16:gr16 = MOV16ri 8 182 %0:tile = PTILELOADDV %12, %16, %14, 1, %15, 0, $noreg 183 %1:tile = PTILELOADDV killed %16, %13, %14, 1, %15, 0, $noreg 184 %2:tile = PTILELOADDV %12, %13, killed %14, 1, killed %15, 0, $noreg 185 JMP_1 %bb.3 186 187 bb.2.if.else: 188 %17:gr64 = MOV32ri64 @buf2 189 %18:gr64_nosp = MOV32ri64 32 190 %19:gr16 = MOV16ri 8 191 %3:tile = PTILELOADDV %12, %19, %17, 1, %18, 0, $noreg 192 %4:tile = PTILELOADDV killed %19, %13, %17, 1, %18, 0, $noreg 193 %5:tile = PTILELOADDV %12, %13, killed %17, 1, killed %18, 0, $noreg 194 195 bb.3.if.end: 196 197 198 %6:tile = PHI %0, %bb.1, %3, %bb.2 199 %7:tile = PHI %1, %bb.1, %4, %bb.2 200 %8:tile = PHI %2, %bb.1, %5, %bb.2 201 %20:gr16 = MOV16ri 8 202 %21:tile = PTDPBSSDV %12, %13, killed %20, killed %8, killed %6, killed %7 203 %22:gr64 = MOV32ri64 @buf 204 %23:gr64_nosp = MOV32ri64 32 205 PTILESTOREDV killed %12, killed %13, killed %22, 1, killed %23, 0, $noreg, killed %21 206 RET 0 207 208... 209