1//===-- FLATInstructions.td - FLAT Instruction Definitions ----------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9let WantsRoot = true in { 10 def FlatOffset : ComplexPattern<iPTR, 2, "SelectFlatOffset", [], [], -10>; 11 def GlobalOffset : ComplexPattern<iPTR, 2, "SelectGlobalOffset", [], [], -10>; 12 def ScratchOffset : ComplexPattern<iPTR, 2, "SelectScratchOffset", [], [], -10>; 13 14 def GlobalSAddr : ComplexPattern<iPTR, 3, "SelectGlobalSAddr", [], [], -10>; 15 def ScratchSAddr : ComplexPattern<iPTR, 2, "SelectScratchSAddr", [], [], -10>; 16 def ScratchSVAddr : ComplexPattern<iPTR, 3, "SelectScratchSVAddr", [], [], -10>; 17} 18 19//===----------------------------------------------------------------------===// 20// FLAT classes 21//===----------------------------------------------------------------------===// 22 23class FLAT_Pseudo<string opName, dag outs, dag ins, 24 string asmOps, list<dag> pattern=[]> : 25 InstSI<outs, ins, "", pattern>, 26 SIMCInstr<NAME, SIEncodingFamily.NONE> { 27 28 let isPseudo = 1; 29 let isCodeGenOnly = 1; 30 31 let FLAT = 1; 32 33 let UseNamedOperandTable = 1; 34 let hasSideEffects = 0; 35 let SchedRW = [WriteVMEM]; 36 37 string Mnemonic = opName; 38 string AsmOperands = asmOps; 39 40 bits<1> is_flat_global = 0; 41 bits<1> is_flat_scratch = 0; 42 43 bits<1> has_vdst = 1; 44 45 // We need to distinguish having saddr and enabling saddr because 46 // saddr is only valid for scratch and global instructions. Pre-gfx9 47 // these bits were reserved, so we also don't necessarily want to 48 // set these bits to the disabled value for the original flat 49 // segment instructions. 50 bits<1> has_saddr = 0; 51 bits<1> enabled_saddr = 0; 52 bits<7> saddr_value = 0; 53 bits<1> has_vaddr = 1; 54 55 bits<1> has_data = 1; 56 bits<1> has_glc = 1; 57 bits<1> glcValue = 0; 58 bits<1> has_dlc = 1; 59 bits<1> dlcValue = 0; 60 bits<1> has_sccb = 1; 61 bits<1> sccbValue = 0; 62 bits<1> has_sve = 0; // Scratch VGPR Enable 63 bits<1> lds = 0; 64 bits<1> sve = 0; 65 bits<1> has_offset = 1; 66 67 let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts, 68 !if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace)); 69 70 // TODO: M0 if it could possibly access LDS (before gfx9? only)? 71 let Uses = !if(is_flat_global, [EXEC], [EXEC, FLAT_SCR]); 72 73 // Internally, FLAT instruction are executed as both an LDS and a 74 // Buffer instruction; so, they increment both VM_CNT and LGKM_CNT 75 // and are not considered done until both have been decremented. 76 let VM_CNT = 1; 77 let LGKM_CNT = !not(!or(is_flat_global, is_flat_scratch)); 78 79 let FlatGlobal = is_flat_global; 80 81 let FlatScratch = is_flat_scratch; 82} 83 84class FLAT_Real <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> : 85 InstSI <ps.OutOperandList, ps.InOperandList, opName # ps.AsmOperands, []>, 86 Enc64 { 87 88 let isPseudo = 0; 89 let isCodeGenOnly = 0; 90 91 let FLAT = 1; 92 93 // copy relevant pseudo op flags 94 let SubtargetPredicate = ps.SubtargetPredicate; 95 let AsmMatchConverter = ps.AsmMatchConverter; 96 let OtherPredicates = ps.OtherPredicates; 97 let TSFlags = ps.TSFlags; 98 let UseNamedOperandTable = ps.UseNamedOperandTable; 99 let SchedRW = ps.SchedRW; 100 let mayLoad = ps.mayLoad; 101 let mayStore = ps.mayStore; 102 let IsAtomicRet = ps.IsAtomicRet; 103 let IsAtomicNoRet = ps.IsAtomicNoRet; 104 let VM_CNT = ps.VM_CNT; 105 let LGKM_CNT = ps.LGKM_CNT; 106 let VALU = ps.VALU; 107 let Uses = ps.Uses; 108 let Defs = ps.Defs; 109 let isConvergent = ps.isConvergent; 110 111 // encoding fields 112 bits<8> vaddr; 113 bits<10> vdata; 114 bits<7> saddr; 115 bits<10> vdst; 116 117 bits<5> cpol; 118 119 // Only valid on gfx9 120 bits<1> lds = ps.lds; // LDS DMA for global and scratch 121 122 // Segment, 00=flat, 01=scratch, 10=global, 11=reserved 123 bits<2> seg = {ps.is_flat_global, ps.is_flat_scratch}; 124 125 // Signed offset. Highest bit ignored for flat and treated as 12-bit 126 // unsigned for flat accesses. 127 bits<13> offset; 128 // GFX90A+ only: instruction uses AccVGPR for data 129 bits<1> acc = !if(ps.has_vdst, vdst{9}, !if(ps.has_data, vdata{9}, 0)); 130 131 // We don't use tfe right now, and it was removed in gfx9. 132 bits<1> tfe = 0; 133 134 // Only valid on GFX9+ 135 let Inst{12-0} = offset; 136 let Inst{13} = !if(ps.has_sve, ps.sve, lds); 137 let Inst{15-14} = seg; 138 139 let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glcValue); 140 let Inst{17} = cpol{CPolBit.SLC}; 141 let Inst{24-18} = op; 142 let Inst{31-26} = 0x37; // Encoding. 143 let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); 144 let Inst{47-40} = !if(ps.has_data, vdata{7-0}, ?); 145 let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7f), 0); 146 147 // 54-48 is reserved. 148 let Inst{55} = acc; // nv on GFX9+, TFE before. AccVGPR for data on GFX90A. 149 let Inst{63-56} = !if(ps.has_vdst, vdst{7-0}, ?); 150} 151 152class VFLAT_Real <bits<8> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> : 153 InstSI <ps.OutOperandList, ps.InOperandList, opName # ps.AsmOperands, []>, 154 Enc96 { 155 156 let FLAT = 1; 157 158 // copy relevant pseudo op flags 159 let SubtargetPredicate = ps.SubtargetPredicate; 160 let WaveSizePredicate = ps.WaveSizePredicate; 161 let AsmMatchConverter = ps.AsmMatchConverter; 162 let OtherPredicates = ps.OtherPredicates; 163 let TSFlags = ps.TSFlags; 164 let UseNamedOperandTable = ps.UseNamedOperandTable; 165 let SchedRW = ps.SchedRW; 166 let mayLoad = ps.mayLoad; 167 let mayStore = ps.mayStore; 168 let IsAtomicRet = ps.IsAtomicRet; 169 let IsAtomicNoRet = ps.IsAtomicNoRet; 170 let VM_CNT = ps.VM_CNT; 171 let LGKM_CNT = ps.LGKM_CNT; 172 let VALU = ps.VALU; 173 let Uses = ps.Uses; 174 let Defs = ps.Defs; 175 let isConvergent = ps.isConvergent; 176 177 bits<7> saddr; 178 bits<8> vdst; 179 bits<6> cpol; 180 bits<8> vdata; // vsrc 181 bits<8> vaddr; 182 bits<24> offset; 183 184 let Inst{6-0} = !if(ps.enabled_saddr, saddr, SGPR_NULL_gfx11plus.Index); 185 let Inst{21-14} = op; 186 let Inst{31-26} = 0x3b; 187 let Inst{39-32} = !if(ps.has_vdst, vdst, ?); 188 let Inst{49} = ps.sve; 189 let Inst{54-53} = cpol{2-1}; // th{2-1} 190 let Inst{52} = !if(ps.IsAtomicRet, 1, cpol{0}); // th{0} 191 let Inst{51-50} = cpol{4-3}; // scope 192 let Inst{62-55} = !if(ps.has_data, vdata{7-0}, ?); 193 let Inst{71-64} = !if(ps.has_vaddr, vaddr, ?); 194 let Inst{95-72} = !if(ps.has_offset, offset, ?); 195} 196 197class GlobalSaddrTable <bit is_saddr, string Name = ""> { 198 bit IsSaddr = is_saddr; 199 string SaddrOp = Name; 200} 201 202// TODO: Is exec allowed for saddr? The disabled value 0x7f is the 203// same encoding value as exec_hi, so it isn't possible to use that if 204// saddr is 32-bit (which isn't handled here yet). 205class FLAT_Load_Pseudo <string opName, RegisterClass regClass, 206 bit HasTiedOutput = 0, 207 bit HasSaddr = 0, bit EnableSaddr = 0, 208 RegisterOperand vdata_op = getLdStRegisterOperand<regClass>.ret> : FLAT_Pseudo< 209 opName, 210 (outs vdata_op:$vdst), 211 !con( 212 !if(EnableSaddr, 213 (ins SReg_64_XEXEC_XNULL:$saddr, VGPR_32:$vaddr), 214 (ins VReg_64:$vaddr)), 215 (ins flat_offset:$offset), 216 // FIXME: Operands with default values do not work with following non-optional operands. 217 !if(HasTiedOutput, (ins CPol:$cpol, vdata_op:$vdst_in), 218 (ins CPol_0:$cpol))), 219 " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> { 220 let has_data = 0; 221 let mayLoad = 1; 222 let has_saddr = HasSaddr; 223 let enabled_saddr = EnableSaddr; 224 225 let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); 226 let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); 227} 228 229class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass, 230 bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo< 231 opName, 232 (outs), 233 !con( 234 !if(EnableSaddr, 235 (ins VGPR_32:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata, SReg_64_XEXEC_XNULL:$saddr), 236 (ins VReg_64:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata)), 237 (ins flat_offset:$offset, CPol_0:$cpol)), 238 " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> { 239 let mayLoad = 0; 240 let mayStore = 1; 241 let has_vdst = 0; 242 let has_saddr = HasSaddr; 243 let enabled_saddr = EnableSaddr; 244} 245 246multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> { 247 let is_flat_global = 1 in { 248 def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1>, 249 GlobalSaddrTable<0, opName>; 250 def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>, 251 GlobalSaddrTable<1, opName>; 252 } 253} 254 255class FLAT_Global_Load_AddTid_Pseudo <string opName, RegisterClass regClass, 256 bit HasTiedOutput = 0, bit EnableSaddr = 0> : FLAT_Pseudo< 257 opName, 258 (outs regClass:$vdst), 259 !con(!if(EnableSaddr, (ins SReg_64:$saddr), (ins)), 260 (ins flat_offset:$offset, CPol_0:$cpol), 261 !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))), 262 " $vdst, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 263 let is_flat_global = 1; 264 let has_data = 0; 265 let mayLoad = 1; 266 let has_vaddr = 0; 267 let has_saddr = 1; 268 let enabled_saddr = EnableSaddr; 269 270 let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); 271 let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); 272} 273 274multiclass FLAT_Global_Load_AddTid_Pseudo<string opName, RegisterClass regClass, 275 bit HasTiedOutput = 0> { 276 def "" : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput>, 277 GlobalSaddrTable<0, opName>; 278 def _SADDR : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput, 1>, 279 GlobalSaddrTable<1, opName>; 280} 281 282multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> { 283 let is_flat_global = 1 in { 284 def "" : FLAT_Store_Pseudo<opName, regClass, 1>, 285 GlobalSaddrTable<0, opName>; 286 def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1>, 287 GlobalSaddrTable<1, opName>; 288 } 289} 290 291class FLAT_Global_Load_LDS_Pseudo <string opName, bit EnableSaddr = 0> : FLAT_Pseudo< 292 opName, 293 (outs ), 294 !con( 295 !if(EnableSaddr, (ins SReg_64:$saddr, VGPR_32:$vaddr), (ins VReg_64:$vaddr)), 296 (ins flat_offset:$offset, CPol_0:$cpol)), 297 " $vaddr"#!if(EnableSaddr, ", $saddr", ", off")#"$offset$cpol"> { 298 let LGKM_CNT = 1; 299 let is_flat_global = 1; 300 let lds = 1; 301 let has_data = 0; 302 let has_vdst = 0; 303 let mayLoad = 1; 304 let mayStore = 1; 305 let has_saddr = 1; 306 let enabled_saddr = EnableSaddr; 307 let VALU = 1; 308 let Uses = [M0, EXEC]; 309 let SchedRW = [WriteVMEM, WriteLDS]; 310} 311 312multiclass FLAT_Global_Load_LDS_Pseudo<string opName> { 313 def "" : FLAT_Global_Load_LDS_Pseudo<opName>, 314 GlobalSaddrTable<0, opName>; 315 def _SADDR : FLAT_Global_Load_LDS_Pseudo<opName, 1>, 316 GlobalSaddrTable<1, opName>; 317} 318 319class FLAT_Global_Store_AddTid_Pseudo <string opName, RegisterClass vdataClass, 320 bit EnableSaddr = 0> : FLAT_Pseudo< 321 opName, 322 (outs), 323 !con(!if(EnableSaddr, (ins vdataClass:$vdata, SReg_64:$saddr), (ins vdataClass:$vdata)), 324 (ins flat_offset:$offset, CPol:$cpol)), 325 " $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 326 let is_flat_global = 1; 327 let mayLoad = 0; 328 let mayStore = 1; 329 let has_vdst = 0; 330 let has_vaddr = 0; 331 let has_saddr = 1; 332 let enabled_saddr = EnableSaddr; 333} 334 335multiclass FLAT_Global_Store_AddTid_Pseudo<string opName, RegisterClass regClass> { 336 def "" : FLAT_Global_Store_AddTid_Pseudo<opName, regClass>, 337 GlobalSaddrTable<0, opName>; 338 def _SADDR : FLAT_Global_Store_AddTid_Pseudo<opName, regClass, 1>, 339 GlobalSaddrTable<1, opName>; 340} 341 342class FLAT_Global_Invalidate_Writeback<string opName, SDPatternOperator node = null_frag> : 343 FLAT_Pseudo<opName, (outs), (ins CPol:$cpol), "$cpol", [(node)]> { 344 345 let AsmMatchConverter = ""; 346 347 let hasSideEffects = 1; 348 let mayLoad = 0; 349 let mayStore = 0; 350 let is_flat_global = 1; 351 352 let has_offset = 0; 353 let has_saddr = 0; 354 let enabled_saddr = 0; 355 let saddr_value = 0; 356 let has_vdst = 0; 357 let has_data = 0; 358 let has_vaddr = 0; 359 let has_glc = 0; 360 let has_dlc = 0; 361 let glcValue = 0; 362 let dlcValue = 0; 363 let has_sccb = 0; 364 let sccbValue = 0; 365 let has_sve = 0; 366 let lds = 0; 367 let sve = 0; 368} 369 370class FlatScratchInst <string sv_op, string mode> { 371 string SVOp = sv_op; 372 string Mode = mode; 373} 374 375class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass, 376 bit HasTiedOutput = 0, 377 bit EnableSaddr = 0, 378 bit EnableSVE = 0, 379 bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr))> 380 : FLAT_Pseudo< 381 opName, 382 (outs getLdStRegisterOperand<regClass>.ret:$vdst), 383 !con( 384 !if(EnableSVE, 385 (ins VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset), 386 !if(EnableSaddr, 387 (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset), 388 !if(EnableVaddr, 389 (ins VGPR_32:$vaddr, flat_offset:$offset), 390 (ins flat_offset:$offset)))), 391 !if(HasTiedOutput, (ins CPol:$cpol, getLdStRegisterOperand<regClass>.ret:$vdst_in), 392 (ins CPol_0:$cpol))), 393 " $vdst, "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 394 let is_flat_scratch = 1; 395 let has_data = 0; 396 let mayLoad = 1; 397 let has_saddr = 1; 398 let enabled_saddr = EnableSaddr; 399 let has_vaddr = EnableVaddr; 400 let has_sve = EnableSVE; 401 let sve = EnableVaddr; 402 403 let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); 404 let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); 405} 406 407class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit EnableSaddr = 0, 408 bit EnableSVE = 0, 409 bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr)), 410 RegisterOperand vdata_op = getLdStRegisterOperand<vdataClass>.ret> : FLAT_Pseudo< 411 opName, 412 (outs), 413 !if(EnableSVE, 414 (ins vdata_op:$vdata, VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol), 415 !if(EnableSaddr, 416 (ins vdata_op:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol), 417 !if(EnableVaddr, 418 (ins vdata_op:$vdata, VGPR_32:$vaddr, flat_offset:$offset, CPol_0:$cpol), 419 (ins vdata_op:$vdata, flat_offset:$offset, CPol_0:$cpol)))), 420 " "#!if(EnableVaddr, "$vaddr", "off")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 421 let is_flat_scratch = 1; 422 let mayLoad = 0; 423 let mayStore = 1; 424 let has_vdst = 0; 425 let has_saddr = 1; 426 let enabled_saddr = EnableSaddr; 427 let has_vaddr = EnableVaddr; 428 let has_sve = EnableSVE; 429 let sve = EnableVaddr; 430} 431 432multiclass FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedOutput = 0> { 433 def "" : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput>, 434 FlatScratchInst<opName, "SV">; 435 def _SADDR : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 1>, 436 FlatScratchInst<opName, "SS">; 437 438 let SubtargetPredicate = HasFlatScratchSVSMode in 439 def _SVS : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 1, 1>, 440 FlatScratchInst<opName, "SVS">; 441 442 let SubtargetPredicate = HasFlatScratchSTMode in 443 def _ST : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 0, 0, 0>, 444 FlatScratchInst<opName, "ST">; 445} 446 447multiclass FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> { 448 def "" : FLAT_Scratch_Store_Pseudo<opName, regClass>, 449 FlatScratchInst<opName, "SV">; 450 def _SADDR : FLAT_Scratch_Store_Pseudo<opName, regClass, 1>, 451 FlatScratchInst<opName, "SS">; 452 453 let SubtargetPredicate = HasFlatScratchSVSMode in 454 def _SVS : FLAT_Scratch_Store_Pseudo<opName, regClass, 1, 1>, 455 FlatScratchInst<opName, "SVS">; 456 457 let SubtargetPredicate = HasFlatScratchSTMode in 458 def _ST : FLAT_Scratch_Store_Pseudo<opName, regClass, 0, 0, 0>, 459 FlatScratchInst<opName, "ST">; 460} 461 462class FLAT_Scratch_Load_LDS_Pseudo <string opName, bit EnableSaddr = 0, 463 bit EnableSVE = 0, 464 bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr))> : FLAT_Pseudo< 465 opName, 466 (outs ), 467 !if(EnableSVE, 468 (ins VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol:$cpol), 469 !if(EnableSaddr, 470 (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol:$cpol), 471 !if(EnableVaddr, 472 (ins VGPR_32:$vaddr, flat_offset:$offset, CPol:$cpol), 473 (ins flat_offset:$offset, CPol:$cpol)))), 474 " "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 475 476 let LGKM_CNT = 1; 477 let is_flat_scratch = 1; 478 let lds = 1; 479 let has_data = 0; 480 let has_vdst = 0; 481 let mayLoad = 1; 482 let mayStore = 1; 483 let has_saddr = 1; 484 let enabled_saddr = EnableSaddr; 485 let has_vaddr = EnableVaddr; 486 let has_sve = EnableSVE; 487 let sve = EnableVaddr; 488 let VALU = 1; 489 let Uses = [M0, EXEC]; 490 let SchedRW = [WriteVMEM, WriteLDS]; 491} 492 493multiclass FLAT_Scratch_Load_LDS_Pseudo<string opName> { 494 def "" : FLAT_Scratch_Load_LDS_Pseudo<opName>, 495 FlatScratchInst<opName, "SV">; 496 def _SADDR : FLAT_Scratch_Load_LDS_Pseudo<opName, 1>, 497 FlatScratchInst<opName, "SS">; 498 def _SVS : FLAT_Scratch_Load_LDS_Pseudo<opName, 1, 1>, 499 FlatScratchInst<opName, "SVS">; 500 def _ST : FLAT_Scratch_Load_LDS_Pseudo<opName, 0, 0, 0>, 501 FlatScratchInst<opName, "ST">; 502} 503 504class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins, 505 string asm, list<dag> pattern = []> : 506 FLAT_Pseudo<opName, outs, ins, asm, pattern> { 507 let mayLoad = 1; 508 let mayStore = 1; 509 let has_glc = 0; 510 let glcValue = 0; 511 let has_vdst = 0; 512 let has_sccb = 1; 513 let sccbValue = 0; 514 let IsAtomicNoRet = 1; 515} 516 517class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins, 518 string asm, list<dag> pattern = []> 519 : FLAT_AtomicNoRet_Pseudo<opName, outs, ins, asm, pattern> { 520 let has_vdst = 1; 521 let glcValue = 1; 522 let sccbValue = 0; 523 let IsAtomicNoRet = 0; 524 let IsAtomicRet = 1; 525} 526 527multiclass FLAT_Atomic_Pseudo_NO_RTN< 528 string opName, 529 RegisterClass vdst_rc, 530 ValueType vt, 531 ValueType data_vt = vt, 532 RegisterClass data_rc = vdst_rc, 533 RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> { 534 def "" : FLAT_AtomicNoRet_Pseudo <opName, 535 (outs), 536 (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_0:$cpol), 537 " $vaddr, $vdata$offset$cpol">, 538 GlobalSaddrTable<0, opName> { 539 let FPAtomic = data_vt.isFP; 540 let AddedComplexity = -1; // Prefer global atomics if available 541 } 542} 543 544multiclass FLAT_Atomic_Pseudo_RTN< 545 string opName, 546 RegisterClass vdst_rc, 547 ValueType vt, 548 ValueType data_vt = vt, 549 RegisterClass data_rc = vdst_rc, 550 RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> { 551 def _RTN : FLAT_AtomicRet_Pseudo <opName, 552 (outs getLdStRegisterOperand<vdst_rc>.ret:$vdst), 553 (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol), 554 " $vdst, $vaddr, $vdata$offset$cpol">, 555 GlobalSaddrTable<0, opName#"_rtn"> { 556 let FPAtomic = data_vt.isFP; 557 let AddedComplexity = -1; // Prefer global atomics if available 558 } 559} 560 561multiclass FLAT_Atomic_Pseudo< 562 string opName, 563 RegisterClass vdst_rc, 564 ValueType vt, 565 ValueType data_vt = vt, 566 RegisterClass data_rc = vdst_rc, 567 RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> { 568 defm "" : FLAT_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, data_vt, data_rc, data_op>; 569 defm "" : FLAT_Atomic_Pseudo_RTN<opName, vdst_rc, vt, data_vt, data_rc, data_op>; 570} 571 572multiclass FLAT_Global_Atomic_Pseudo_NO_RTN< 573 string opName, 574 RegisterClass vdst_rc, 575 ValueType vt, 576 ValueType data_vt = vt, 577 RegisterClass data_rc = vdst_rc, 578 RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> { 579 580 let is_flat_global = 1 in { 581 def "" : FLAT_AtomicNoRet_Pseudo <opName, 582 (outs), 583 (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_0:$cpol), 584 " $vaddr, $vdata, off$offset$cpol">, 585 GlobalSaddrTable<0, opName> { 586 let has_saddr = 1; 587 let FPAtomic = data_vt.isFP; 588 } 589 590 def _SADDR : FLAT_AtomicNoRet_Pseudo <opName, 591 (outs), 592 (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64_XEXEC_XNULL:$saddr, flat_offset:$offset, CPol_0:$cpol), 593 " $vaddr, $vdata, $saddr$offset$cpol">, 594 GlobalSaddrTable<1, opName> { 595 let has_saddr = 1; 596 let enabled_saddr = 1; 597 let FPAtomic = data_vt.isFP; 598 } 599 } 600} 601 602multiclass FLAT_Global_Atomic_Pseudo_RTN< 603 string opName, 604 RegisterClass vdst_rc, 605 ValueType vt, 606 ValueType data_vt = vt, 607 RegisterClass data_rc = vdst_rc, 608 RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret, 609 RegisterOperand vdst_op = getLdStRegisterOperand<vdst_rc>.ret> { 610 611 let is_flat_global = 1 in { 612 def _RTN : FLAT_AtomicRet_Pseudo <opName, 613 (outs vdst_op:$vdst), 614 (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol), 615 " $vdst, $vaddr, $vdata, off$offset$cpol">, 616 GlobalSaddrTable<0, opName#"_rtn"> { 617 let has_saddr = 1; 618 let FPAtomic = data_vt.isFP; 619 } 620 621 def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName, 622 (outs vdst_op:$vdst), 623 (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64_XEXEC_XNULL:$saddr, flat_offset:$offset, CPol_GLC1:$cpol), 624 " $vdst, $vaddr, $vdata, $saddr$offset$cpol">, 625 GlobalSaddrTable<1, opName#"_rtn"> { 626 let has_saddr = 1; 627 let enabled_saddr = 1; 628 let FPAtomic = data_vt.isFP; 629 } 630 } 631} 632 633multiclass FLAT_Global_Atomic_Pseudo< 634 string opName, 635 RegisterClass vdst_rc, 636 ValueType vt, 637 ValueType data_vt = vt, 638 RegisterClass data_rc = vdst_rc> { 639 defm "" : FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, data_vt, data_rc>; 640 defm "" : FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, data_vt, data_rc>; 641} 642 643//===----------------------------------------------------------------------===// 644// Flat Instructions 645//===----------------------------------------------------------------------===// 646 647def FLAT_LOAD_UBYTE : FLAT_Load_Pseudo <"flat_load_ubyte", VGPR_32>; 648def FLAT_LOAD_SBYTE : FLAT_Load_Pseudo <"flat_load_sbyte", VGPR_32>; 649def FLAT_LOAD_USHORT : FLAT_Load_Pseudo <"flat_load_ushort", VGPR_32>; 650def FLAT_LOAD_SSHORT : FLAT_Load_Pseudo <"flat_load_sshort", VGPR_32>; 651def FLAT_LOAD_DWORD : FLAT_Load_Pseudo <"flat_load_dword", VGPR_32>; 652def FLAT_LOAD_DWORDX2 : FLAT_Load_Pseudo <"flat_load_dwordx2", VReg_64>; 653def FLAT_LOAD_DWORDX4 : FLAT_Load_Pseudo <"flat_load_dwordx4", VReg_128>; 654def FLAT_LOAD_DWORDX3 : FLAT_Load_Pseudo <"flat_load_dwordx3", VReg_96>; 655 656def FLAT_STORE_BYTE : FLAT_Store_Pseudo <"flat_store_byte", VGPR_32>; 657def FLAT_STORE_SHORT : FLAT_Store_Pseudo <"flat_store_short", VGPR_32>; 658def FLAT_STORE_DWORD : FLAT_Store_Pseudo <"flat_store_dword", VGPR_32>; 659def FLAT_STORE_DWORDX2 : FLAT_Store_Pseudo <"flat_store_dwordx2", VReg_64>; 660def FLAT_STORE_DWORDX4 : FLAT_Store_Pseudo <"flat_store_dwordx4", VReg_128>; 661def FLAT_STORE_DWORDX3 : FLAT_Store_Pseudo <"flat_store_dwordx3", VReg_96>; 662 663let SubtargetPredicate = HasD16LoadStore in { 664let TiedSourceNotRead = 1 in { 665def FLAT_LOAD_UBYTE_D16 : FLAT_Load_Pseudo <"flat_load_ubyte_d16", VGPR_32, 1>; 666def FLAT_LOAD_UBYTE_D16_HI : FLAT_Load_Pseudo <"flat_load_ubyte_d16_hi", VGPR_32, 1>; 667def FLAT_LOAD_SBYTE_D16 : FLAT_Load_Pseudo <"flat_load_sbyte_d16", VGPR_32, 1>; 668def FLAT_LOAD_SBYTE_D16_HI : FLAT_Load_Pseudo <"flat_load_sbyte_d16_hi", VGPR_32, 1>; 669def FLAT_LOAD_SHORT_D16 : FLAT_Load_Pseudo <"flat_load_short_d16", VGPR_32, 1>; 670def FLAT_LOAD_SHORT_D16_HI : FLAT_Load_Pseudo <"flat_load_short_d16_hi", VGPR_32, 1>; 671} 672 673def FLAT_STORE_BYTE_D16_HI : FLAT_Store_Pseudo <"flat_store_byte_d16_hi", VGPR_32>; 674def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR_32>; 675} 676 677defm FLAT_ATOMIC_CMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap", 678 VGPR_32, i32, v2i32, VReg_64>; 679 680defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2", 681 VReg_64, i64, v2i64, VReg_128>; 682 683defm FLAT_ATOMIC_SWAP : FLAT_Atomic_Pseudo <"flat_atomic_swap", 684 VGPR_32, i32>; 685 686defm FLAT_ATOMIC_SWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2", 687 VReg_64, i64>; 688 689defm FLAT_ATOMIC_ADD : FLAT_Atomic_Pseudo <"flat_atomic_add", 690 VGPR_32, i32>; 691 692defm FLAT_ATOMIC_SUB : FLAT_Atomic_Pseudo <"flat_atomic_sub", 693 VGPR_32, i32>; 694 695defm FLAT_ATOMIC_SMIN : FLAT_Atomic_Pseudo <"flat_atomic_smin", 696 VGPR_32, i32>; 697 698defm FLAT_ATOMIC_UMIN : FLAT_Atomic_Pseudo <"flat_atomic_umin", 699 VGPR_32, i32>; 700 701defm FLAT_ATOMIC_SMAX : FLAT_Atomic_Pseudo <"flat_atomic_smax", 702 VGPR_32, i32>; 703 704defm FLAT_ATOMIC_UMAX : FLAT_Atomic_Pseudo <"flat_atomic_umax", 705 VGPR_32, i32>; 706 707defm FLAT_ATOMIC_AND : FLAT_Atomic_Pseudo <"flat_atomic_and", 708 VGPR_32, i32>; 709 710defm FLAT_ATOMIC_OR : FLAT_Atomic_Pseudo <"flat_atomic_or", 711 VGPR_32, i32>; 712 713defm FLAT_ATOMIC_XOR : FLAT_Atomic_Pseudo <"flat_atomic_xor", 714 VGPR_32, i32>; 715 716defm FLAT_ATOMIC_INC : FLAT_Atomic_Pseudo <"flat_atomic_inc", 717 VGPR_32, i32>; 718 719defm FLAT_ATOMIC_DEC : FLAT_Atomic_Pseudo <"flat_atomic_dec", 720 VGPR_32, i32>; 721 722defm FLAT_ATOMIC_ADD_X2 : FLAT_Atomic_Pseudo <"flat_atomic_add_x2", 723 VReg_64, i64>; 724 725defm FLAT_ATOMIC_SUB_X2 : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2", 726 VReg_64, i64>; 727 728defm FLAT_ATOMIC_SMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2", 729 VReg_64, i64>; 730 731defm FLAT_ATOMIC_UMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2", 732 VReg_64, i64>; 733 734defm FLAT_ATOMIC_SMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2", 735 VReg_64, i64>; 736 737defm FLAT_ATOMIC_UMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2", 738 VReg_64, i64>; 739 740defm FLAT_ATOMIC_AND_X2 : FLAT_Atomic_Pseudo <"flat_atomic_and_x2", 741 VReg_64, i64>; 742 743defm FLAT_ATOMIC_OR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_or_x2", 744 VReg_64, i64>; 745 746defm FLAT_ATOMIC_XOR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2", 747 VReg_64, i64>; 748 749defm FLAT_ATOMIC_INC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2", 750 VReg_64, i64>; 751 752defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2", 753 VReg_64, i64>; 754 755// GFX7-, GFX10-only flat instructions. 756let SubtargetPredicate = isGFX7GFX10 in { 757defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2", 758 VReg_64, f64, v2f64, VReg_128>; 759} // End SubtargetPredicate = isGFX7GFX10 760 761 762// The names may be flat_atomic_fmin_x2 on some subtargets, but we 763// choose this as the canonical name. 764let SubtargetPredicate = HasAtomicFMinFMaxF64FlatInsts in { 765defm FLAT_ATOMIC_MIN_F64 : FLAT_Atomic_Pseudo <"flat_atomic_min_f64", 766 VReg_64, f64>; 767 768defm FLAT_ATOMIC_MAX_F64 : FLAT_Atomic_Pseudo <"flat_atomic_max_f64", 769 VReg_64, f64>; 770} 771 772let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in { 773defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_min_f64", VReg_64, f64>; 774defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64>; 775} 776 777let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in { 778 defm FLAT_ATOMIC_ADD_F64 : FLAT_Atomic_Pseudo<"flat_atomic_add_f64", VReg_64, f64>; 779 defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_add_f64", VReg_64, f64>; 780} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst 781 782let SubtargetPredicate = HasAtomicFlatPkAdd16Insts in { 783 defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_f16", VGPR_32, v2f16>; 784 let FPAtomic = 1 in 785 defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_bf16", VGPR_32, v2i16>; 786} // End SubtargetPredicate = HasAtomicFlatPkAdd16Insts 787 788let SubtargetPredicate = HasAtomicGlobalPkAddBF16Inst, FPAtomic = 1 in 789 defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Atomic_Pseudo<"global_atomic_pk_add_bf16", VGPR_32, v2i16>; 790 791// GFX7-, GFX10-, GFX11-only flat instructions. 792let SubtargetPredicate = isGFX7GFX10GFX11 in { 793 794defm FLAT_ATOMIC_FCMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap", 795 VGPR_32, f32, v2f32, VReg_64>; 796 797defm FLAT_ATOMIC_FMIN : FLAT_Atomic_Pseudo <"flat_atomic_fmin", 798 VGPR_32, f32>; 799 800defm FLAT_ATOMIC_FMAX : FLAT_Atomic_Pseudo <"flat_atomic_fmax", 801 VGPR_32, f32>; 802 803} // End SubtargetPredicate = isGFX7GFX10GFX11 804 805// GFX940-, GFX11-only flat instructions. 806let SubtargetPredicate = HasFlatAtomicFaddF32Inst in { 807 defm FLAT_ATOMIC_ADD_F32 : FLAT_Atomic_Pseudo<"flat_atomic_add_f32", VGPR_32, f32>; 808} // End SubtargetPredicate = HasFlatAtomicFaddF32Inst 809 810let SubtargetPredicate = isGFX12Plus in { 811 defm FLAT_ATOMIC_CSUB_U32 : FLAT_Atomic_Pseudo <"flat_atomic_csub_u32", VGPR_32, i32>; 812 defm FLAT_ATOMIC_COND_SUB_U32 : FLAT_Atomic_Pseudo <"flat_atomic_cond_sub_u32", VGPR_32, i32>; 813} // End SubtargetPredicate = isGFX12Plus 814 815defm GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>; 816defm GLOBAL_LOAD_SBYTE : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>; 817defm GLOBAL_LOAD_USHORT : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>; 818defm GLOBAL_LOAD_SSHORT : FLAT_Global_Load_Pseudo <"global_load_sshort", VGPR_32>; 819defm GLOBAL_LOAD_DWORD : FLAT_Global_Load_Pseudo <"global_load_dword", VGPR_32>; 820defm GLOBAL_LOAD_DWORDX2 : FLAT_Global_Load_Pseudo <"global_load_dwordx2", VReg_64>; 821defm GLOBAL_LOAD_DWORDX3 : FLAT_Global_Load_Pseudo <"global_load_dwordx3", VReg_96>; 822defm GLOBAL_LOAD_DWORDX4 : FLAT_Global_Load_Pseudo <"global_load_dwordx4", VReg_128>; 823 824let TiedSourceNotRead = 1 in { 825defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16", VGPR_32, 1>; 826defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16_hi", VGPR_32, 1>; 827defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16", VGPR_32, 1>; 828defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16_hi", VGPR_32, 1>; 829defm GLOBAL_LOAD_SHORT_D16 : FLAT_Global_Load_Pseudo <"global_load_short_d16", VGPR_32, 1>; 830defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Global_Load_Pseudo <"global_load_short_d16_hi", VGPR_32, 1>; 831} 832 833let OtherPredicates = [HasGFX10_BEncoding] in 834defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Global_Load_AddTid_Pseudo <"global_load_dword_addtid", VGPR_32>; 835 836defm GLOBAL_STORE_BYTE : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>; 837defm GLOBAL_STORE_SHORT : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>; 838defm GLOBAL_STORE_DWORD : FLAT_Global_Store_Pseudo <"global_store_dword", VGPR_32>; 839defm GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VReg_64>; 840defm GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>; 841defm GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>; 842let OtherPredicates = [HasGFX10_BEncoding] in 843defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Global_Store_AddTid_Pseudo <"global_store_dword_addtid", VGPR_32>; 844 845defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Global_Store_Pseudo <"global_store_byte_d16_hi", VGPR_32>; 846defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d16_hi", VGPR_32>; 847 848defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap", 849 VGPR_32, i32, v2i32, VReg_64>; 850 851defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2", 852 VReg_64, i64, v2i64, VReg_128>; 853 854defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap", 855 VGPR_32, i32>; 856 857defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_swap_x2", 858 VReg_64, i64>; 859 860defm GLOBAL_ATOMIC_ADD : FLAT_Global_Atomic_Pseudo <"global_atomic_add", 861 VGPR_32, i32>; 862 863defm GLOBAL_ATOMIC_SUB : FLAT_Global_Atomic_Pseudo <"global_atomic_sub", 864 VGPR_32, i32>; 865 866defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_smin", 867 VGPR_32, i32>; 868 869defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_umin", 870 VGPR_32, i32>; 871 872defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_smax", 873 VGPR_32, i32>; 874 875defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_umax", 876 VGPR_32, i32>; 877 878defm GLOBAL_ATOMIC_AND : FLAT_Global_Atomic_Pseudo <"global_atomic_and", 879 VGPR_32, i32>; 880 881defm GLOBAL_ATOMIC_OR : FLAT_Global_Atomic_Pseudo <"global_atomic_or", 882 VGPR_32, i32>; 883 884defm GLOBAL_ATOMIC_XOR : FLAT_Global_Atomic_Pseudo <"global_atomic_xor", 885 VGPR_32, i32>; 886 887defm GLOBAL_ATOMIC_INC : FLAT_Global_Atomic_Pseudo <"global_atomic_inc", 888 VGPR_32, i32>; 889 890defm GLOBAL_ATOMIC_DEC : FLAT_Global_Atomic_Pseudo <"global_atomic_dec", 891 VGPR_32, i32>; 892 893defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_add_x2", 894 VReg_64, i64>; 895 896defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_sub_x2", 897 VReg_64, i64>; 898 899defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smin_x2", 900 VReg_64, i64>; 901 902defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umin_x2", 903 VReg_64, i64>; 904 905defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smax_x2", 906 VReg_64, i64>; 907 908defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umax_x2", 909 VReg_64, i64>; 910 911defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_and_x2", 912 VReg_64, i64>; 913 914defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_or_x2", 915 VReg_64, i64>; 916 917defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_xor_x2", 918 VReg_64, i64>; 919 920defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2", 921 VReg_64, i64>; 922 923defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2", 924 VReg_64, i64>; 925 926let SubtargetPredicate = HasGFX10_BEncoding in { 927 defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo <"global_atomic_csub", 928 VGPR_32, i32>; 929} 930 931defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ubyte">; 932defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sbyte">; 933defm GLOBAL_LOAD_LDS_USHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ushort">; 934defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sshort">; 935defm GLOBAL_LOAD_LDS_DWORD : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dword">; 936 937let SubtargetPredicate = HasGFX950Insts in { 938defm GLOBAL_LOAD_LDS_DWORDX3 : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dwordx3">; 939defm GLOBAL_LOAD_LDS_DWORDX4 : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dwordx4">; 940} 941 942let SubtargetPredicate = isGFX12Plus in { 943 defm GLOBAL_ATOMIC_COND_SUB_U32 : FLAT_Global_Atomic_Pseudo <"global_atomic_cond_sub_u32", VGPR_32, i32>; 944 defm GLOBAL_ATOMIC_ORDERED_ADD_B64 : FLAT_Global_Atomic_Pseudo <"global_atomic_ordered_add_b64", VReg_64, i64>; 945 946 def GLOBAL_INV : FLAT_Global_Invalidate_Writeback<"global_inv">; 947 def GLOBAL_WB : FLAT_Global_Invalidate_Writeback<"global_wb">; 948 def GLOBAL_WBINV : FLAT_Global_Invalidate_Writeback<"global_wbinv">; 949} // End SubtargetPredicate = isGFX12Plus 950 951defm SCRATCH_LOAD_UBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte", VGPR_32>; 952defm SCRATCH_LOAD_SBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte", VGPR_32>; 953defm SCRATCH_LOAD_USHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_ushort", VGPR_32>; 954defm SCRATCH_LOAD_SSHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_sshort", VGPR_32>; 955defm SCRATCH_LOAD_DWORD : FLAT_Scratch_Load_Pseudo <"scratch_load_dword", VGPR_32>; 956defm SCRATCH_LOAD_DWORDX2 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx2", VReg_64>; 957defm SCRATCH_LOAD_DWORDX3 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx3", VReg_96>; 958defm SCRATCH_LOAD_DWORDX4 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx4", VReg_128>; 959 960let TiedSourceNotRead = 1 in { 961defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16", VGPR_32, 1>; 962defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16_hi", VGPR_32, 1>; 963defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16", VGPR_32, 1>; 964defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16_hi", VGPR_32, 1>; 965defm SCRATCH_LOAD_SHORT_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16", VGPR_32, 1>; 966defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16_hi", VGPR_32, 1>; 967} 968 969defm SCRATCH_STORE_BYTE : FLAT_Scratch_Store_Pseudo <"scratch_store_byte", VGPR_32>; 970defm SCRATCH_STORE_SHORT : FLAT_Scratch_Store_Pseudo <"scratch_store_short", VGPR_32>; 971defm SCRATCH_STORE_DWORD : FLAT_Scratch_Store_Pseudo <"scratch_store_dword", VGPR_32>; 972defm SCRATCH_STORE_DWORDX2 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx2", VReg_64>; 973defm SCRATCH_STORE_DWORDX3 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx3", VReg_96>; 974defm SCRATCH_STORE_DWORDX4 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx4", VReg_128>; 975 976defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_byte_d16_hi", VGPR_32>; 977defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_short_d16_hi", VGPR_32>; 978 979defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ubyte">; 980defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sbyte">; 981defm SCRATCH_LOAD_LDS_USHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ushort">; 982defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sshort">; 983defm SCRATCH_LOAD_LDS_DWORD : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_dword">; 984 985let SubtargetPredicate = isGFX12Plus in { 986 let Uses = [EXEC, M0] in { 987 defm GLOBAL_LOAD_BLOCK : FLAT_Global_Load_Pseudo <"global_load_block", VReg_1024>; 988 defm GLOBAL_STORE_BLOCK : FLAT_Global_Store_Pseudo <"global_store_block", VReg_1024>; 989 } 990 let Uses = [EXEC, FLAT_SCR, M0] in { 991 defm SCRATCH_LOAD_BLOCK : FLAT_Scratch_Load_Pseudo <"scratch_load_block", VReg_1024>; 992 defm SCRATCH_STORE_BLOCK : FLAT_Scratch_Store_Pseudo <"scratch_store_block", VReg_1024>; 993 } 994 995 let WaveSizePredicate = isWave32 in { 996 let Mnemonic = "global_load_tr_b128" in 997 defm GLOBAL_LOAD_TR_B128_w32 : FLAT_Global_Load_Pseudo <"global_load_tr_b128_w32", VReg_128>; 998 let Mnemonic = "global_load_tr_b64" in 999 defm GLOBAL_LOAD_TR_B64_w32 : FLAT_Global_Load_Pseudo <"global_load_tr_b64_w32", VReg_64>; 1000 } 1001 let WaveSizePredicate = isWave64 in { 1002 let Mnemonic = "global_load_tr_b128" in 1003 defm GLOBAL_LOAD_TR_B128_w64 : FLAT_Global_Load_Pseudo <"global_load_tr_b128_w64", VReg_64>; 1004 let Mnemonic = "global_load_tr_b64" in 1005 defm GLOBAL_LOAD_TR_B64_w64 : FLAT_Global_Load_Pseudo <"global_load_tr_b64_w64", VGPR_32>; 1006 } 1007} // End SubtargetPredicate = isGFX12Plus 1008 1009let SubtargetPredicate = isGFX10Plus in { 1010 defm GLOBAL_ATOMIC_FCMPSWAP : 1011 FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32, v2f32, VReg_64>; 1012 defm GLOBAL_ATOMIC_FMIN : 1013 FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>; 1014 defm GLOBAL_ATOMIC_FMAX : 1015 FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>; 1016 defm GLOBAL_ATOMIC_FCMPSWAP_X2 : 1017 FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64, v2f64, VReg_128>; 1018} // End SubtargetPredicate = isGFX10Plus 1019 1020let OtherPredicates = [HasAtomicFaddNoRtnInsts] in 1021 defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN < 1022 "global_atomic_add_f32", VGPR_32, f32 1023 >; 1024let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in 1025 defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN < 1026 "global_atomic_pk_add_f16", VGPR_32, v2f16 1027 >; 1028let OtherPredicates = [HasAtomicFaddRtnInsts] in 1029 defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_RTN < 1030 "global_atomic_add_f32", VGPR_32, f32 1031 >; 1032let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in 1033 defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_RTN < 1034 "global_atomic_pk_add_f16", VGPR_32, v2f16 1035 >; 1036 1037//===----------------------------------------------------------------------===// 1038// Flat Patterns 1039//===----------------------------------------------------------------------===// 1040 1041// Patterns for global loads with no offset. 1042class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1043 (vt (node (FlatOffset i64:$vaddr, i32:$offset))), 1044 (inst $vaddr, $offset) 1045>; 1046 1047class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1048 (node (FlatOffset (i64 VReg_64:$vaddr), i32:$offset), vt:$in), 1049 (inst $vaddr, $offset, 0, $in) 1050>; 1051 1052class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1053 (node (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset), vt:$in), 1054 (inst $vaddr, $offset, 0, $in) 1055>; 1056 1057class GlobalLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1058 (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), vt:$in)), 1059 (inst $saddr, $voffset, $offset, 0, $in) 1060>; 1061 1062class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1063 (vt (node (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset))), 1064 (inst $vaddr, $offset) 1065>; 1066 1067class GlobalLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1068 (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset))), 1069 (inst $saddr, $voffset, $offset, 0) 1070>; 1071 1072class GlobalStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 1073 ValueType vt> : GCNPat < 1074 (node vt:$data, (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset)), 1075 (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset) 1076>; 1077 1078class GlobalAtomicSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 1079 ValueType vt, ValueType data_vt = vt> : GCNPat < 1080 (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), data_vt:$data)), 1081 (inst $voffset, getVregSrcForVT<data_vt>.ret:$data, $saddr, $offset) 1082>; 1083 1084class GlobalAtomicNoRtnSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 1085 ValueType vt> : GCNPat < 1086 (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), vt:$data), 1087 (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset) 1088>; 1089 1090class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1091 (node vt:$data, (FlatOffset i64:$vaddr, i32:$offset)), 1092 (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset) 1093>; 1094 1095class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1096 (node vt:$data, (GlobalOffset i64:$vaddr, i32:$offset)), 1097 (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset) 1098>; 1099 1100class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, 1101 ValueType vt, ValueType data_vt = vt> : GCNPat < 1102 // atomic store follows atomic binop convention so the address comes 1103 // first. 1104 (node (GlobalOffset i64:$vaddr, i32:$offset), data_vt:$data), 1105 (inst $vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset) 1106>; 1107 1108multiclass FlatAtomicNoRtnPatBase <string inst, string node, ValueType vt, 1109 ValueType data_vt = vt> { 1110 1111 defvar noRtnNode = !cast<PatFrags>(node); 1112 1113 let AddedComplexity = 1 in 1114 def : GCNPat <(vt (noRtnNode (FlatOffset i64:$vaddr, i32:$offset), data_vt:$data)), 1115 (!cast<FLAT_Pseudo>(inst) VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>; 1116} 1117 1118multiclass FlatAtomicNoRtnPatWithAddrSpace<string inst, string node, string addrSpaceSuffix, 1119 ValueType vt> : 1120 FlatAtomicNoRtnPatBase<inst, node # "_noret_" # addrSpaceSuffix, vt, vt>; 1121 1122multiclass FlatAtomicNoRtnPat <string inst, string node, ValueType vt, 1123 ValueType data_vt = vt, bit isIntr = 0> : 1124 FlatAtomicNoRtnPatBase<inst, node # "_noret" # !if(isIntr, "", "_"#vt), vt, data_vt>; 1125 1126 1127multiclass FlatAtomicRtnPatBase <string inst, string node, ValueType vt, 1128 ValueType data_vt = vt> { 1129 1130 defvar rtnNode = !cast<SDPatternOperator>(node); 1131 1132 def : GCNPat <(vt (rtnNode (FlatOffset i64:$vaddr, i32:$offset), data_vt:$data)), 1133 (!cast<FLAT_Pseudo>(inst#"_RTN") VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>; 1134} 1135 1136multiclass FlatAtomicRtnPatWithAddrSpace<string inst, string intr, string addrSpaceSuffix, 1137 ValueType vt> : 1138 FlatAtomicRtnPatBase<inst, intr # "_" # addrSpaceSuffix, vt, vt>; 1139 1140multiclass FlatAtomicRtnPat <string inst, string node, ValueType vt, 1141 ValueType data_vt = vt, bit isIntr = 0> : 1142 FlatAtomicRtnPatBase<inst, node # !if(isIntr, "", "_"#vt), vt, data_vt>; 1143 1144 1145multiclass FlatAtomicPat <string inst, string node, ValueType vt, 1146 ValueType data_vt = vt, bit isIntr = 0> : 1147 FlatAtomicRtnPat<inst, node, vt, data_vt, isIntr>, 1148 FlatAtomicNoRtnPat<inst, node, vt, data_vt, isIntr>; 1149 1150multiclass FlatAtomicIntrNoRtnPat <string inst, string node, ValueType vt, 1151 ValueType data_vt = vt> { 1152 defm : FlatAtomicNoRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>; 1153} 1154 1155multiclass FlatAtomicIntrRtnPat <string inst, string node, ValueType vt, 1156 ValueType data_vt = vt> { 1157 defm : FlatAtomicRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>; 1158} 1159 1160multiclass FlatAtomicIntrPat <string inst, string node, ValueType vt, 1161 ValueType data_vt = vt> : 1162 FlatAtomicRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>, 1163 FlatAtomicNoRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>; 1164 1165class FlatSignedAtomicPatBase <FLAT_Pseudo inst, SDPatternOperator node, 1166 ValueType vt, ValueType data_vt = vt> : GCNPat < 1167 (vt (node (GlobalOffset i64:$vaddr, i32:$offset), data_vt:$data)), 1168 (inst VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset) 1169>; 1170 1171multiclass FlatSignedAtomicPat <string inst, string node, ValueType vt, 1172 ValueType data_vt = vt, int complexity = 0, 1173 bit isIntr = 0> { 1174 defvar rtnNode = !cast<SDPatternOperator>(node # !if(isIntr, "", "_" # vt)); 1175 defvar noRtnNode = !cast<PatFrags>(node # "_noret" # !if(isIntr, "", "_" # vt)); 1176 1177 let AddedComplexity = complexity in 1178 def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst#"_RTN"), rtnNode, vt, data_vt>; 1179 1180 let AddedComplexity = !add(complexity, 1) in 1181 def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), noRtnNode, vt, data_vt>; 1182} 1183 1184class ScratchLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1185 (vt (node (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset))), 1186 (inst $vaddr, $offset) 1187>; 1188 1189class ScratchLoadSignedPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1190 (node (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset), vt:$in), 1191 (inst $vaddr, $offset, 0, $in) 1192>; 1193 1194class ScratchStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1195 (node vt:$data, (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset)), 1196 (inst getVregSrcForVT<vt>.ret:$data, $vaddr, $offset) 1197>; 1198 1199class ScratchLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1200 (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset))), 1201 (inst $saddr, $offset) 1202>; 1203 1204class ScratchLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1205 (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset), vt:$in)), 1206 (inst $saddr, $offset, 0, $in) 1207>; 1208 1209class ScratchStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 1210 ValueType vt> : GCNPat < 1211 (node vt:$data, (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset)), 1212 (inst getVregSrcForVT<vt>.ret:$data, $saddr, $offset) 1213>; 1214 1215class ScratchLoadSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1216 (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset))), 1217 (inst $vaddr, $saddr, $offset, 0) 1218>; 1219 1220class ScratchStoreSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 1221 ValueType vt> : GCNPat < 1222 (node vt:$data, (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset)), 1223 (inst getVregSrcForVT<vt>.ret:$data, $vaddr, $saddr, $offset) 1224>; 1225 1226class ScratchLoadSVaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1227 (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset), vt:$in)), 1228 (inst $vaddr, $saddr, $offset, 0, $in) 1229>; 1230 1231multiclass GlobalFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1232 def : FlatLoadSignedPat <inst, node, vt> { 1233 let AddedComplexity = 10; 1234 } 1235 1236 def : GlobalLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1237 let AddedComplexity = 11; 1238 } 1239} 1240 1241multiclass GlobalFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1242 def : FlatSignedLoadPat_D16 <inst, node, vt> { 1243 let AddedComplexity = 10; 1244 } 1245 1246 def : GlobalLoadSaddrPat_D16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1247 let AddedComplexity = 11; 1248 } 1249} 1250 1251multiclass GlobalFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node, 1252 ValueType vt> { 1253 def : FlatStoreSignedPat <inst, node, vt> { 1254 let AddedComplexity = 10; 1255 } 1256 1257 def : GlobalStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1258 let AddedComplexity = 11; 1259 } 1260} 1261 1262multiclass GlobalFLATAtomicPatsNoRtnBase<string inst, string node, ValueType vt, 1263 ValueType data_vt = vt> { 1264 let AddedComplexity = 11 in 1265 def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), !cast<SDPatternOperator>(node), vt, data_vt>; 1266 1267 let AddedComplexity = 13 in 1268 def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR"), !cast<SDPatternOperator>(node), vt, data_vt>; 1269} 1270 1271multiclass GlobalFLATAtomicPatsRtnBase<string inst, string node, ValueType vt, 1272 ValueType data_vt = vt, bit isPatFrags = 0> { 1273 defvar rtnNode = !if(isPatFrags, !cast<PatFrags>(node), !cast<SDPatternOperator>(node)); 1274 1275 let AddedComplexity = 10 in 1276 def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst#"_RTN"), rtnNode, vt, data_vt>; 1277 1278 let AddedComplexity = 12 in 1279 def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR_RTN"), rtnNode, vt, data_vt>; 1280} 1281 1282multiclass GlobalFLATAtomicPatsNoRtn<string inst, string node, ValueType vt, 1283 ValueType data_vt = vt, bit isIntr = 0> : 1284 GlobalFLATAtomicPatsNoRtnBase<inst, node # "_noret" # !if(isIntr, "", "_" # vt), vt, data_vt>; 1285 1286multiclass GlobalFLATAtomicPatsRtn<string inst, string node, ValueType vt, 1287 ValueType data_vt = vt, bit isIntr = 0> : 1288 GlobalFLATAtomicPatsRtnBase<inst, node # !if(isIntr, "", "_" # vt), vt, data_vt>; 1289 1290multiclass GlobalFLATAtomicPats<string inst, string node, ValueType vt, 1291 ValueType data_vt = vt, bit isIntr = 0> : 1292 GlobalFLATAtomicPatsNoRtn<inst, node, vt, data_vt, isIntr>, 1293 GlobalFLATAtomicPatsRtn<inst, node, vt, data_vt, isIntr>; 1294 1295multiclass GlobalFLATAtomicPatsNoRtnWithAddrSpace<string inst, string intr, string addrSpaceSuffix, 1296 ValueType vt, ValueType data_vt = vt> : 1297 GlobalFLATAtomicPatsNoRtnBase<inst, intr # "_noret_" # addrSpaceSuffix, vt, data_vt>; 1298 1299multiclass GlobalFLATAtomicPatsRtnWithAddrSpace<string inst, string intr, string addrSpaceSuffix, 1300 ValueType vt, ValueType data_vt = vt> : 1301 GlobalFLATAtomicPatsRtnBase<inst, intr # "_" # addrSpaceSuffix, vt, data_vt, /*isPatFrags*/ 1>; 1302 1303multiclass GlobalFLATAtomicPatsWithAddrSpace<string inst, string intr, string addrSpaceSuffix, 1304 ValueType vt, ValueType data_vt = vt> : 1305 GlobalFLATAtomicPatsNoRtnWithAddrSpace<inst, intr, addrSpaceSuffix, vt, data_vt>, 1306 GlobalFLATAtomicPatsRtnWithAddrSpace<inst, intr, addrSpaceSuffix, vt, data_vt>; 1307 1308multiclass GlobalFLATAtomicIntrPats<string inst, string node, ValueType vt, 1309 ValueType data_vt = vt> { 1310 defm : GlobalFLATAtomicPats<inst, node, vt, data_vt, /* isIntr */ 1>; 1311} 1312 1313multiclass ScratchFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1314 def : ScratchLoadSignedPat <inst, node, vt> { 1315 let AddedComplexity = 25; 1316 } 1317 1318 def : ScratchLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1319 let AddedComplexity = 26; 1320 } 1321 1322 def : ScratchLoadSVaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> { 1323 let SubtargetPredicate = HasFlatScratchSVSMode; 1324 let AddedComplexity = 27; 1325 } 1326} 1327 1328multiclass ScratchFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node, 1329 ValueType vt> { 1330 def : ScratchStoreSignedPat <inst, node, vt> { 1331 let AddedComplexity = 25; 1332 } 1333 1334 def : ScratchStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1335 let AddedComplexity = 26; 1336 } 1337 1338 def : ScratchStoreSVaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> { 1339 let SubtargetPredicate = HasFlatScratchSVSMode; 1340 let AddedComplexity = 27; 1341 } 1342} 1343 1344multiclass ScratchFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1345 def : ScratchLoadSignedPat_D16 <inst, node, vt> { 1346 let AddedComplexity = 25; 1347 } 1348 1349 def : ScratchLoadSaddrPat_D16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1350 let AddedComplexity = 26; 1351 } 1352 1353 def : ScratchLoadSVaddrPat_D16 <!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> { 1354 let SubtargetPredicate = HasFlatScratchSVSMode; 1355 let AddedComplexity = 27; 1356 } 1357} 1358 1359let OtherPredicates = [HasFlatAddressSpace] in { 1360 1361def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i32>; 1362def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i16>; 1363def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_zext_8_flat, i32>; 1364def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_zext_8_flat, i16>; 1365def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i32>; 1366def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i16>; 1367def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_zext_16_flat, i32>; 1368def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_zext_16_flat, i16>; 1369def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>; 1370def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>; 1371def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>; 1372def : FlatLoadPat <FLAT_LOAD_SBYTE, atomic_load_sext_8_flat, i32>; 1373def : FlatLoadPat <FLAT_LOAD_SBYTE, atomic_load_sext_8_flat, i16>; 1374def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>; 1375def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>; 1376def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>; 1377def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>; 1378def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>; 1379def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>; 1380def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>; 1381def : FlatLoadPat <FLAT_LOAD_SSHORT, atomic_load_sext_16_flat, i32>; 1382def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>; 1383 1384def : FlatLoadPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>; 1385def : FlatLoadPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>; 1386 1387def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>; 1388def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>; 1389 1390foreach vt = Reg32Types.types in { 1391def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, vt>; 1392def : FlatStorePat <FLAT_STORE_DWORD, store_flat, vt>; 1393} 1394 1395foreach vt = VReg_64.RegTypes in { 1396def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, vt>; 1397def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, vt>; 1398} 1399 1400def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32>; 1401 1402foreach vt = VReg_128.RegTypes in { 1403def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, vt>; 1404def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, vt>; 1405} 1406 1407def : FlatStorePat <FLAT_STORE_DWORD, atomic_store_32_flat, i32>; 1408def : FlatStorePat <FLAT_STORE_DWORDX2, atomic_store_64_flat, i64>; 1409def : FlatStorePat <FLAT_STORE_BYTE, atomic_store_8_flat, i32>; 1410def : FlatStorePat <FLAT_STORE_BYTE, atomic_store_8_flat, i16>; 1411def : FlatStorePat <FLAT_STORE_SHORT, atomic_store_16_flat, i32>; 1412def : FlatStorePat <FLAT_STORE_SHORT, atomic_store_16_flat, i16>; 1413 1414foreach as = [ "flat", "global" ] in { 1415defm : FlatAtomicPat <"FLAT_ATOMIC_ADD", "atomic_load_add_"#as, i32>; 1416defm : FlatAtomicPat <"FLAT_ATOMIC_SUB", "atomic_load_sub_"#as, i32>; 1417defm : FlatAtomicPat <"FLAT_ATOMIC_INC", "atomic_load_uinc_wrap_"#as, i32>; 1418defm : FlatAtomicPat <"FLAT_ATOMIC_DEC", "atomic_load_udec_wrap_"#as, i32>; 1419defm : FlatAtomicPat <"FLAT_ATOMIC_AND", "atomic_load_and_"#as, i32>; 1420defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX", "atomic_load_max_"#as, i32>; 1421defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX", "atomic_load_umax_"#as, i32>; 1422defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN", "atomic_load_min_"#as, i32>; 1423defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN", "atomic_load_umin_"#as, i32>; 1424defm : FlatAtomicPat <"FLAT_ATOMIC_OR", "atomic_load_or_"#as, i32>; 1425defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP", "atomic_swap_"#as, i32>; 1426defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_"#as, i32, v2i32>; 1427defm : FlatAtomicPat <"FLAT_ATOMIC_XOR", "atomic_load_xor_"#as, i32>; 1428 1429defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_X2", "atomic_load_add_"#as, i64>; 1430defm : FlatAtomicPat <"FLAT_ATOMIC_SUB_X2", "atomic_load_sub_"#as, i64>; 1431defm : FlatAtomicPat <"FLAT_ATOMIC_INC_X2", "atomic_load_uinc_wrap_"#as, i64>; 1432defm : FlatAtomicPat <"FLAT_ATOMIC_DEC_X2", "atomic_load_udec_wrap_"#as, i64>; 1433defm : FlatAtomicPat <"FLAT_ATOMIC_AND_X2", "atomic_load_and_"#as, i64>; 1434defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX_X2", "atomic_load_max_"#as, i64>; 1435defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX_X2", "atomic_load_umax_"#as, i64>; 1436defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN_X2", "atomic_load_min_"#as, i64>; 1437defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN_X2", "atomic_load_umin_"#as, i64>; 1438defm : FlatAtomicPat <"FLAT_ATOMIC_OR_X2", "atomic_load_or_"#as, i64>; 1439defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP_X2", "atomic_swap_"#as, i64>; 1440defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_"#as, i64, v2i64>; 1441defm : FlatAtomicPat <"FLAT_ATOMIC_XOR_X2", "atomic_load_xor_"#as, i64>; 1442 1443let SubtargetPredicate = HasAtomicFMinFMaxF32FlatInsts in { 1444defm : FlatAtomicPat <"FLAT_ATOMIC_FMIN", "atomic_load_fmin_"#as, f32>; 1445defm : FlatAtomicPat <"FLAT_ATOMIC_FMAX", "atomic_load_fmax_"#as, f32>; 1446} 1447 1448let SubtargetPredicate = HasAtomicFMinFMaxF64FlatInsts in { 1449defm : FlatAtomicPat <"FLAT_ATOMIC_MIN_F64", "atomic_load_fmin_"#as, f64>; 1450defm : FlatAtomicPat <"FLAT_ATOMIC_MAX_F64", "atomic_load_fmax_"#as, f64>; 1451} 1452 1453} // end foreach as 1454 1455let SubtargetPredicate = isGFX12Plus in { 1456 defm : FlatAtomicRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32 >; 1457 1458 let OtherPredicates = [HasAtomicCSubNoRtnInsts] in 1459 defm : FlatAtomicNoRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32>; 1460} 1461 1462def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>; 1463def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>; 1464 1465let OtherPredicates = [HasD16LoadStore] in { 1466def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>; 1467def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>; 1468} 1469 1470let OtherPredicates = [D16PreservesUnusedBits] in { 1471// TODO: Handle atomic loads 1472def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2i16>; 1473def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2f16>; 1474def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2i16>; 1475def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2f16>; 1476def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2i16>; 1477def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2f16>; 1478 1479def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2i16>; 1480def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2f16>; 1481def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2i16>; 1482def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2f16>; 1483def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2i16>; 1484def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>; 1485} 1486 1487} // End OtherPredicates = [HasFlatAddressSpace] 1488 1489let OtherPredicates = [HasFlatGlobalInsts] in { 1490 1491defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_8_global, i32>; 1492defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_8_global, i16>; 1493defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_zext_8_global, i32>; 1494defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_zext_8_global, i16>; 1495defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_16_global, i32>; 1496defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_16_global, i16>; 1497defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_zext_16_global, i32>; 1498defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_zext_16_global, i16>; 1499defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, atomic_load_sext_8_global, i32>; 1500defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, atomic_load_sext_8_global, i16>; 1501defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>; 1502defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i32>; 1503defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i32>; 1504defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>; 1505defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>; 1506defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>; 1507defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, extloadi16_global, i32>; 1508defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>; 1509defm : GlobalFLATLoadPats <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>; 1510defm : GlobalFLATLoadPats <GLOBAL_LOAD_SSHORT, atomic_load_sext_16_global, i32>; 1511defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_zext_16_global, i32>; 1512defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, load_global, i16>; 1513 1514foreach vt = Reg32Types.types in { 1515defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, load_global, vt>; 1516defm : GlobalFLATStorePats <GLOBAL_STORE_DWORD, store_global, vt>; 1517} 1518 1519foreach vt = VReg_64.RegTypes in { 1520defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX2, load_global, vt>; 1521defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX2, store_global, vt>; 1522} 1523 1524defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX3, load_global, v3i32>; 1525 1526foreach vt = VReg_128.RegTypes in { 1527defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX4, load_global, vt>; 1528defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX4, store_global, vt>; 1529} 1530 1531// There is no distinction for atomic load lowering during selection; 1532// the memory legalizer will set the cache bits and insert the 1533// appropriate waits. 1534defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, atomic_load_32_global, i32>; 1535defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX2, atomic_load_64_global, i64>; 1536 1537defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i32>; 1538defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i16>; 1539defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, truncstorei16_global, i32>; 1540defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, store_global, i16>; 1541defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX3, store_global, v3i32>; 1542 1543let OtherPredicates = [HasD16LoadStore] in { 1544defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_hi16_global, i32>; 1545defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_hi16_global, i32>; 1546} 1547 1548let OtherPredicates = [D16PreservesUnusedBits] in { 1549// TODO: Handle atomic loads 1550defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2i16>; 1551defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2f16>; 1552defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2i16>; 1553defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2f16>; 1554defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2i16>; 1555defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2f16>; 1556 1557defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2i16>; 1558defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2f16>; 1559defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2i16>; 1560defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2f16>; 1561defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2i16>; 1562defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2f16>; 1563} 1564 1565defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i32>; 1566defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i16>; 1567defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, atomic_store_16_global, i32>; 1568defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, atomic_store_16_global, i16>; 1569defm : GlobalFLATStorePats <GLOBAL_STORE_DWORD, atomic_store_32_global, i32>; 1570defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX2, atomic_store_64_global, i64>; 1571 1572defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD", "atomic_load_add_global", i32>; 1573defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB", "atomic_load_sub_global", i32>; 1574defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC", "atomic_load_uinc_wrap_global", i32>; 1575defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC", "atomic_load_udec_wrap_global", i32>; 1576defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND", "atomic_load_and_global", i32>; 1577defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX", "atomic_load_max_global", i32>; 1578defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX", "atomic_load_umax_global", i32>; 1579defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN", "atomic_load_min_global", i32>; 1580defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN", "atomic_load_umin_global", i32>; 1581defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR", "atomic_load_or_global", i32>; 1582defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP", "atomic_swap_global", i32>; 1583defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_global", i32, v2i32>; 1584defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR", "atomic_load_xor_global", i32>; 1585defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_CSUB", "int_amdgcn_global_atomic_csub", i32, i32, /* isIntr */ 1>; 1586 1587let OtherPredicates = [HasAtomicCSubNoRtnInsts] in 1588defm : GlobalFLATAtomicPatsNoRtn <"GLOBAL_ATOMIC_CSUB", "int_amdgcn_global_atomic_csub", i32, i32, /* isIntr */ 1>; 1589 1590defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_X2", "atomic_load_add_global", i64>; 1591defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB_X2", "atomic_load_sub_global", i64>; 1592defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC_X2", "atomic_load_uinc_wrap_global", i64>; 1593defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC_X2", "atomic_load_udec_wrap_global", i64>; 1594defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND_X2", "atomic_load_and_global", i64>; 1595defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX_X2", "atomic_load_max_global", i64>; 1596defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX_X2", "atomic_load_umax_global", i64>; 1597defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN_X2", "atomic_load_min_global", i64>; 1598defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN_X2", "atomic_load_umin_global", i64>; 1599defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR_X2", "atomic_load_or_global", i64>; 1600defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP_X2", "atomic_swap_global", i64>; 1601defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_global", i64, v2i64>; 1602defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR_X2", "atomic_load_xor_global", i64>; 1603 1604let SubtargetPredicate = isGFX12Plus in { 1605 defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "global_addrspace", i32>; 1606 1607 let OtherPredicates = [HasAtomicCSubNoRtnInsts] in 1608 defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "global_addrspace", i32>; 1609} 1610 1611let OtherPredicates = [isGFX12Plus] in { 1612 defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_ORDERED_ADD_B64", "int_amdgcn_global_atomic_ordered_add_b64", i64, i64, /* isIntr */ 1>; 1613 1614 let WaveSizePredicate = isWave32 in { 1615 defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B64_w32, int_amdgcn_global_load_tr_b64, v2i32>; 1616 foreach vt = [v8i16, v8f16, v8bf16] in 1617 defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B128_w32, int_amdgcn_global_load_tr_b128, vt>; 1618 } 1619 let WaveSizePredicate = isWave64 in { 1620 defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B64_w64, int_amdgcn_global_load_tr_b64, i32>; 1621 foreach vt = [v4i16, v4f16, v4bf16] in 1622 defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B128_w64, int_amdgcn_global_load_tr_b128, vt>; 1623 } 1624} 1625 1626let SubtargetPredicate = HasAtomicFMinFMaxF32GlobalInsts, OtherPredicates = [HasFlatGlobalInsts] in { 1627defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN", "atomic_load_fmin_global", f32>; 1628defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX", "atomic_load_fmax_global", f32>; 1629} 1630 1631let SubtargetPredicate = HasAtomicFMinFMaxF32FlatInsts in { 1632defm : FlatAtomicPat <"FLAT_ATOMIC_FMIN", "atomic_load_fmin_flat", f32>; 1633defm : FlatAtomicPat <"FLAT_ATOMIC_FMAX", "atomic_load_fmax_flat", f32>; 1634} 1635 1636let OtherPredicates = [isGFX12Only] in { 1637 // FIXME: Remove these intrinsics 1638 defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin_num", f32>; 1639 defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax_num", f32>; 1640 defm : FlatAtomicIntrPat <"FLAT_ATOMIC_FMIN", "int_amdgcn_flat_atomic_fmin_num", f32>; 1641 defm : FlatAtomicIntrPat <"FLAT_ATOMIC_FMAX", "int_amdgcn_flat_atomic_fmax_num", f32>; 1642} 1643 1644let OtherPredicates = [HasAtomicFaddNoRtnInsts] in { 1645defm : GlobalFLATAtomicPatsNoRtn <"GLOBAL_ATOMIC_ADD_F32", "atomic_load_fadd_global", f32>; 1646} 1647 1648let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in { 1649defm : GlobalFLATAtomicPatsNoRtn <"GLOBAL_ATOMIC_PK_ADD_F16", "atomic_load_fadd_global", v2f16>; 1650} 1651 1652let OtherPredicates = [HasAtomicFaddRtnInsts] in { 1653defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_ADD_F32", "atomic_load_fadd_global", f32>; 1654} 1655 1656let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in { 1657defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_PK_ADD_F16", "atomic_load_fadd_global", v2f16>; 1658} 1659 1660let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts, OtherPredicates = [HasFlatGlobalInsts] in { 1661defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MIN_F64", "atomic_load_fmin_global", f64>; 1662defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MAX_F64", "atomic_load_fmax_global", f64>; 1663} 1664 1665let OtherPredicates = [HasFlatBufferGlobalAtomicFaddF64Inst] in { 1666defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F64", "atomic_load_fadd_global", f64>; 1667defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_F64", "atomic_load_fadd_flat", f64>; 1668} 1669 1670let OtherPredicates = [HasFlatAtomicFaddF32Inst] in { 1671defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_F32", "atomic_load_fadd_flat", f32>; 1672} 1673 1674let OtherPredicates = [HasAtomicFlatPkAdd16Insts] in { 1675defm : FlatAtomicPat <"FLAT_ATOMIC_PK_ADD_F16", "atomic_load_fadd_flat", v2f16>; 1676defm : FlatAtomicPat <"FLAT_ATOMIC_PK_ADD_BF16", "atomic_load_fadd_flat", v2bf16>; 1677} 1678 1679let OtherPredicates = [HasAtomicGlobalPkAddBF16Inst] in 1680defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_PK_ADD_BF16", "atomic_load_fadd_global", v2bf16>; 1681} // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 1682 1683let OtherPredicates = [HasFlatScratchInsts, EnableFlatScratch] in { 1684 1685defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i32>; 1686defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i32>; 1687defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i32>; 1688defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i16>; 1689defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i16>; 1690defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i16>; 1691defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, extloadi16_private, i32>; 1692defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, zextloadi16_private, i32>; 1693defm : ScratchFLATLoadPats <SCRATCH_LOAD_SSHORT, sextloadi16_private, i32>; 1694defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, load_private, i16>; 1695 1696foreach vt = Reg32Types.types in { 1697defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORD, load_private, vt>; 1698defm : ScratchFLATStorePats <SCRATCH_STORE_DWORD, store_private, vt>; 1699} 1700 1701foreach vt = VReg_64.RegTypes in { 1702defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX2, load_private, vt>; 1703defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX2, store_private, vt>; 1704} 1705 1706defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX3, load_private, v3i32>; 1707 1708foreach vt = VReg_128.RegTypes in { 1709defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX4, load_private, vt>; 1710defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX4, store_private, vt>; 1711} 1712 1713defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, truncstorei8_private, i32>; 1714defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, truncstorei8_private, i16>; 1715defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, truncstorei16_private, i32>; 1716defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, store_private, i16>; 1717defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX3, store_private, v3i32>; 1718 1719let OtherPredicates = [HasD16LoadStore, HasFlatScratchInsts, EnableFlatScratch] in { 1720defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT_D16_HI, truncstorei16_hi16_private, i32>; 1721defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE_D16_HI, truncstorei8_hi16_private, i32>; 1722} 1723 1724let OtherPredicates = [D16PreservesUnusedBits, HasFlatScratchInsts, EnableFlatScratch] in { 1725defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_private, v2i16>; 1726defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_private, v2f16>; 1727defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_private, v2i16>; 1728defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_private, v2f16>; 1729defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16_HI, load_d16_hi_private, v2i16>; 1730defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16_HI, load_d16_hi_private, v2f16>; 1731 1732defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16, az_extloadi8_d16_lo_private, v2i16>; 1733defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16, az_extloadi8_d16_lo_private, v2f16>; 1734defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16, sextloadi8_d16_lo_private, v2i16>; 1735defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16, sextloadi8_d16_lo_private, v2f16>; 1736defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2i16>; 1737defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2f16>; 1738} 1739 1740} // End OtherPredicates = [HasFlatScratchInsts,EnableFlatScratch] 1741 1742//===----------------------------------------------------------------------===// 1743// Target 1744//===----------------------------------------------------------------------===// 1745 1746//===----------------------------------------------------------------------===// 1747// CI 1748//===----------------------------------------------------------------------===// 1749 1750class FLAT_Real_ci <bits<7> op, FLAT_Pseudo ps, string asmName = ps.Mnemonic> : 1751 FLAT_Real <op, ps, asmName>, 1752 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SI> { 1753 let AssemblerPredicate = isGFX7Only; 1754 let DecoderNamespace="GFX7"; 1755} 1756 1757def FLAT_LOAD_UBYTE_ci : FLAT_Real_ci <0x8, FLAT_LOAD_UBYTE>; 1758def FLAT_LOAD_SBYTE_ci : FLAT_Real_ci <0x9, FLAT_LOAD_SBYTE>; 1759def FLAT_LOAD_USHORT_ci : FLAT_Real_ci <0xa, FLAT_LOAD_USHORT>; 1760def FLAT_LOAD_SSHORT_ci : FLAT_Real_ci <0xb, FLAT_LOAD_SSHORT>; 1761def FLAT_LOAD_DWORD_ci : FLAT_Real_ci <0xc, FLAT_LOAD_DWORD>; 1762def FLAT_LOAD_DWORDX2_ci : FLAT_Real_ci <0xd, FLAT_LOAD_DWORDX2>; 1763def FLAT_LOAD_DWORDX4_ci : FLAT_Real_ci <0xe, FLAT_LOAD_DWORDX4>; 1764def FLAT_LOAD_DWORDX3_ci : FLAT_Real_ci <0xf, FLAT_LOAD_DWORDX3>; 1765 1766def FLAT_STORE_BYTE_ci : FLAT_Real_ci <0x18, FLAT_STORE_BYTE>; 1767def FLAT_STORE_SHORT_ci : FLAT_Real_ci <0x1a, FLAT_STORE_SHORT>; 1768def FLAT_STORE_DWORD_ci : FLAT_Real_ci <0x1c, FLAT_STORE_DWORD>; 1769def FLAT_STORE_DWORDX2_ci : FLAT_Real_ci <0x1d, FLAT_STORE_DWORDX2>; 1770def FLAT_STORE_DWORDX4_ci : FLAT_Real_ci <0x1e, FLAT_STORE_DWORDX4>; 1771def FLAT_STORE_DWORDX3_ci : FLAT_Real_ci <0x1f, FLAT_STORE_DWORDX3>; 1772 1773multiclass FLAT_Real_Atomics_ci <bits<7> op, string opName = NAME, 1774 string asmName = !cast<FLAT_Pseudo>(opName).Mnemonic> { 1775 defvar ps = !cast<FLAT_Pseudo>(opName); 1776 defvar ps_rtn = !cast<FLAT_Pseudo>(opName#"_RTN"); 1777 1778 def _ci : FLAT_Real_ci<op, ps, asmName>; 1779 def _RTN_ci : FLAT_Real_ci<op, ps_rtn, asmName>; 1780} 1781 1782defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_ci <0x30>; 1783defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_ci <0x31>; 1784defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_ci <0x32>; 1785defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_ci <0x33>; 1786defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_ci <0x35>; 1787defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_ci <0x36>; 1788defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_ci <0x37>; 1789defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_ci <0x38>; 1790defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_ci <0x39>; 1791defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_ci <0x3a>; 1792defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_ci <0x3b>; 1793defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_ci <0x3c>; 1794defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_ci <0x3d>; 1795defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_ci <0x50>; 1796defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_ci <0x51>; 1797defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_ci <0x52>; 1798defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_ci <0x53>; 1799defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_ci <0x55>; 1800defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_ci <0x56>; 1801defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_ci <0x57>; 1802defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_ci <0x58>; 1803defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_ci <0x59>; 1804defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_ci <0x5a>; 1805defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_ci <0x5b>; 1806defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_ci <0x5c>; 1807defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_ci <0x5d>; 1808 1809// CI Only flat instructions 1810defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_ci <0x3e>; 1811defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_ci <0x3f>; 1812defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_ci <0x40>; 1813defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_ci <0x5e>; 1814defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_ci <0x5f, "FLAT_ATOMIC_MIN_F64", "flat_atomic_fmin_x2">; 1815defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_ci <0x60, "FLAT_ATOMIC_MAX_F64", "flat_atomic_fmax_x2">; 1816 1817 1818//===----------------------------------------------------------------------===// 1819// VI 1820//===----------------------------------------------------------------------===// 1821 1822class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> : 1823 FLAT_Real <op, ps>, 1824 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> { 1825 let AssemblerPredicate = isGFX8GFX9; 1826 let DecoderNamespace = "GFX8"; 1827 1828 let Inst{25} = !if(has_sccb, cpol{CPolBit.SCC}, ps.sccbValue); 1829 let AsmString = ps.Mnemonic # 1830 !subst("$sccb", !if(has_sccb, "$sccb",""), ps.AsmOperands); 1831} 1832 1833multiclass FLAT_Real_AllAddr_vi<bits<7> op, 1834 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> { 1835 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME), has_sccb>; 1836 def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>; 1837} 1838 1839class FLAT_Real_gfx940 <bits<7> op, FLAT_Pseudo ps> : 1840 FLAT_Real <op, ps>, 1841 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX940> { 1842 let AssemblerPredicate = isGFX940Plus; 1843 let DecoderNamespace = "GFX9"; 1844 let Inst{13} = ps.sve; 1845 let Inst{25} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccbValue); 1846} 1847 1848multiclass FLAT_Real_AllAddr_SVE_vi<bits<7> op> { 1849 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)> { 1850 let AssemblerPredicate = isGFX8GFX9NotGFX940; 1851 let OtherPredicates = [isGFX8GFX9NotGFX940]; 1852 } 1853 def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")> { 1854 let DecoderNamespace = "GFX9"; 1855 } 1856 let AssemblerPredicate = isGFX940Plus, SubtargetPredicate = isGFX940Plus in { 1857 def _VE_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>; 1858 def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>; 1859 def _ST_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>; 1860 } 1861} 1862 1863multiclass FLAT_Real_AllAddr_LDS<bits<7> op, bits<7> pre_gfx940_op, 1864 string pre_gfx940_name = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).Mnemonic), 1865 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> { 1866 1867 let OtherPredicates = [isGFX8GFX9NotGFX940] in { 1868 def _vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME), has_sccb> { 1869 let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds"; 1870 } 1871 def _SADDR_vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb> { 1872 let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds"; 1873 } 1874 } 1875 1876 let SubtargetPredicate = isGFX940Plus in { 1877 def _gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>; 1878 def _SADDR_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>; 1879 } 1880} 1881 1882multiclass FLAT_Real_AllAddr_SVE_LDS<bits<7> op, bits<7> pre_gfx940_op> { 1883 defm "" : FLAT_Real_AllAddr_LDS<op, pre_gfx940_op>; 1884 let SubtargetPredicate = isGFX940Plus in { 1885 def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>; 1886 def _ST_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>; 1887 } 1888} 1889 1890def FLAT_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>; 1891def FLAT_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>; 1892def FLAT_LOAD_USHORT_vi : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>; 1893def FLAT_LOAD_SSHORT_vi : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>; 1894def FLAT_LOAD_DWORD_vi : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>; 1895def FLAT_LOAD_DWORDX2_vi : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>; 1896def FLAT_LOAD_DWORDX4_vi : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>; 1897def FLAT_LOAD_DWORDX3_vi : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>; 1898 1899def FLAT_STORE_BYTE_vi : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>; 1900def FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_vi <0x19, FLAT_STORE_BYTE_D16_HI>; 1901def FLAT_STORE_SHORT_vi : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>; 1902def FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_vi <0x1b, FLAT_STORE_SHORT_D16_HI>; 1903def FLAT_STORE_DWORD_vi : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>; 1904def FLAT_STORE_DWORDX2_vi : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>; 1905def FLAT_STORE_DWORDX4_vi : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>; 1906def FLAT_STORE_DWORDX3_vi : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>; 1907 1908def FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_vi <0x20, FLAT_LOAD_UBYTE_D16>; 1909def FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>; 1910def FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_vi <0x22, FLAT_LOAD_SBYTE_D16>; 1911def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>; 1912def FLAT_LOAD_SHORT_D16_vi : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>; 1913def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>; 1914 1915multiclass FLAT_Real_Atomics_vi <bits<7> op, 1916 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> { 1917 defvar ps = !cast<FLAT_Pseudo>(NAME); 1918 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>; 1919 def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>; 1920} 1921 1922multiclass FLAT_Global_Real_Atomics_vi<bits<7> op, 1923 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> : 1924 FLAT_Real_AllAddr_vi<op, has_sccb> { 1925 def _RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>; 1926 def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>; 1927} 1928 1929 1930defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_vi <0x40>; 1931defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_vi <0x41>; 1932defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_vi <0x42>; 1933defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_vi <0x43>; 1934defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_vi <0x44>; 1935defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_vi <0x45>; 1936defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_vi <0x46>; 1937defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_vi <0x47>; 1938defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_vi <0x48>; 1939defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_vi <0x49>; 1940defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_vi <0x4a>; 1941defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_vi <0x4b>; 1942defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_vi <0x4c>; 1943defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_vi <0x60>; 1944defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61>; 1945defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_vi <0x62>; 1946defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_vi <0x63>; 1947defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_vi <0x64>; 1948defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_vi <0x65>; 1949defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_vi <0x66>; 1950defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_vi <0x67>; 1951defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_vi <0x68>; 1952defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_vi <0x69>; 1953defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_vi <0x6a>; 1954defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_vi <0x6b>; 1955defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_vi <0x6c>; 1956 1957defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>; 1958defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>; 1959defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>; 1960defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>; 1961defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>; 1962defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>; 1963defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>; 1964defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>; 1965 1966defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_vi <0x20>; 1967defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x21>; 1968defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_vi <0x22>; 1969defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x23>; 1970defm GLOBAL_LOAD_SHORT_D16 : FLAT_Real_AllAddr_vi <0x24>; 1971defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x25>; 1972 1973defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>; 1974defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_vi <0x19>; 1975defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>; 1976defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>; 1977defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>; 1978defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>; 1979defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>; 1980defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>; 1981 1982defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Real_AllAddr_LDS <0x026, 0x10>; 1983defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Real_AllAddr_LDS <0x027, 0x11>; 1984defm GLOBAL_LOAD_LDS_USHORT : FLAT_Real_AllAddr_LDS <0x028, 0x12>; 1985defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_LDS <0x029, 0x13>; 1986defm GLOBAL_LOAD_LDS_DWORD : FLAT_Real_AllAddr_LDS <0x02a, 0x14>; 1987 1988defm GLOBAL_LOAD_LDS_DWORDX3 : FLAT_Real_AllAddr_LDS <0x07e, 0x07e>; 1989defm GLOBAL_LOAD_LDS_DWORDX4 : FLAT_Real_AllAddr_LDS <0x07d, 0x07d>; 1990 1991 1992defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Real_Atomics_vi <0x40>; 1993defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Real_Atomics_vi <0x41>; 1994defm GLOBAL_ATOMIC_ADD : FLAT_Global_Real_Atomics_vi <0x42>; 1995defm GLOBAL_ATOMIC_SUB : FLAT_Global_Real_Atomics_vi <0x43>; 1996defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Real_Atomics_vi <0x44>; 1997defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Real_Atomics_vi <0x45>; 1998defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Real_Atomics_vi <0x46>; 1999defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Real_Atomics_vi <0x47>; 2000defm GLOBAL_ATOMIC_AND : FLAT_Global_Real_Atomics_vi <0x48>; 2001defm GLOBAL_ATOMIC_OR : FLAT_Global_Real_Atomics_vi <0x49>; 2002defm GLOBAL_ATOMIC_XOR : FLAT_Global_Real_Atomics_vi <0x4a>; 2003defm GLOBAL_ATOMIC_INC : FLAT_Global_Real_Atomics_vi <0x4b>; 2004defm GLOBAL_ATOMIC_DEC : FLAT_Global_Real_Atomics_vi <0x4c>; 2005defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Real_Atomics_vi <0x60>; 2006defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Real_Atomics_vi <0x61>; 2007defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Real_Atomics_vi <0x62>; 2008defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Real_Atomics_vi <0x63>; 2009defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Real_Atomics_vi <0x64>; 2010defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Real_Atomics_vi <0x65>; 2011defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Real_Atomics_vi <0x66>; 2012defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Real_Atomics_vi <0x67>; 2013defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Real_Atomics_vi <0x68>; 2014defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Real_Atomics_vi <0x69>; 2015defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Real_Atomics_vi <0x6a>; 2016defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Real_Atomics_vi <0x6b>; 2017defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Real_Atomics_vi <0x6c>; 2018 2019defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Real_AllAddr_SVE_LDS <0x026, 0x10>; 2020defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Real_AllAddr_SVE_LDS <0x027, 0x11>; 2021defm SCRATCH_LOAD_LDS_USHORT : FLAT_Real_AllAddr_SVE_LDS <0x028, 0x12>; 2022defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_SVE_LDS <0x029, 0x13>; 2023defm SCRATCH_LOAD_LDS_DWORD : FLAT_Real_AllAddr_SVE_LDS <0x02a, 0x14>; 2024 2025defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_SVE_vi <0x10>; 2026defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_SVE_vi <0x11>; 2027defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_SVE_vi <0x12>; 2028defm SCRATCH_LOAD_SSHORT : FLAT_Real_AllAddr_SVE_vi <0x13>; 2029defm SCRATCH_LOAD_DWORD : FLAT_Real_AllAddr_SVE_vi <0x14>; 2030defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_AllAddr_SVE_vi <0x15>; 2031defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_AllAddr_SVE_vi <0x16>; 2032defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_AllAddr_SVE_vi <0x17>; 2033defm SCRATCH_STORE_BYTE : FLAT_Real_AllAddr_SVE_vi <0x18>; 2034defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x19>; 2035defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_SVE_vi <0x20>; 2036defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x21>; 2037defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_SVE_vi <0x22>; 2038defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x23>; 2039defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_AllAddr_SVE_vi <0x24>; 2040defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x25>; 2041defm SCRATCH_STORE_SHORT : FLAT_Real_AllAddr_SVE_vi <0x1a>; 2042defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x1b>; 2043defm SCRATCH_STORE_DWORD : FLAT_Real_AllAddr_SVE_vi <0x1c>; 2044defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_SVE_vi <0x1d>; 2045defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_SVE_vi <0x1e>; 2046defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_SVE_vi <0x1f>; 2047 2048let SubtargetPredicate = isGFX8GFX9NotGFX940 in { 2049 // These instructions are encoded differently on gfx90* and gfx940. 2050 defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_vi <0x04d, 0>; 2051 defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e, 0>; 2052} 2053 2054let SubtargetPredicate = isGFX90AOnly in { 2055 defm FLAT_ATOMIC_ADD_F64 : FLAT_Real_Atomics_vi<0x4f, 0>; 2056 defm FLAT_ATOMIC_MIN_F64 : FLAT_Real_Atomics_vi<0x50, 0>; 2057 defm FLAT_ATOMIC_MAX_F64 : FLAT_Real_Atomics_vi<0x51, 0>; 2058 defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_vi<0x4f, 0>; 2059 defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_vi<0x50, 0>; 2060 defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_vi<0x51, 0>; 2061} // End SubtargetPredicate = isGFX90AOnly 2062 2063multiclass FLAT_Real_AllAddr_gfx940<bits<7> op> { 2064 def _gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>; 2065 def _SADDR_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>; 2066} 2067 2068multiclass FLAT_Real_Atomics_gfx940 <bits<7> op> { 2069 defvar ps = !cast<FLAT_Pseudo>(NAME); 2070 def _gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>; 2071 def _RTN_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>; 2072} 2073 2074multiclass FLAT_Global_Real_Atomics_gfx940<bits<7> op> : 2075 FLAT_Real_AllAddr_gfx940<op> { 2076 def _RTN_gfx940 : FLAT_Real_gfx940 <op, !cast<FLAT_Pseudo>(NAME#"_RTN")>; 2077 def _SADDR_RTN_gfx940 : FLAT_Real_gfx940 <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>; 2078} 2079 2080let SubtargetPredicate = isGFX940Plus in { 2081 // These instructions are encoded differently on gfx90* and gfx940. 2082 defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_gfx940 <0x04d>; 2083 defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_gfx940 <0x04e>; 2084 2085 defm FLAT_ATOMIC_ADD_F64 : FLAT_Real_Atomics_gfx940<0x4f>; 2086 defm FLAT_ATOMIC_MIN_F64 : FLAT_Real_Atomics_gfx940<0x50>; 2087 defm FLAT_ATOMIC_MAX_F64 : FLAT_Real_Atomics_gfx940<0x51>; 2088 defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_gfx940<0x4f>; 2089 defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_gfx940<0x50>; 2090 defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_gfx940<0x51>; 2091 defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi<0x4d>; 2092 defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi<0x4e>; 2093 defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi<0x52>; 2094 defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>; 2095} // End SubtargetPredicate = isGFX940Plus 2096 2097//===----------------------------------------------------------------------===// 2098// GFX10. 2099//===----------------------------------------------------------------------===// 2100 2101class FLAT_Real_gfx10<bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> : 2102 FLAT_Real<op, ps, opName>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10> { 2103 let AssemblerPredicate = isGFX10Only; 2104 let DecoderNamespace = "GFX10"; 2105 2106 let Inst{11-0} = offset{11-0}; 2107 let Inst{12} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlcValue); 2108 let Inst{54-48} = !cond(ps.enabled_saddr : saddr, 2109 !and(ps.is_flat_scratch, !not(ps.has_vaddr)) : EXEC_HI.Index{6-0}, // ST mode 2110 true : SGPR_NULL_gfxpre11.Index{6-0}); 2111 let Inst{55} = 0; 2112} 2113 2114multiclass FLAT_Real_Base_gfx10<bits<7> op, string psName = NAME, 2115 string asmName = !cast<FLAT_Pseudo>(psName).Mnemonic> { 2116 def _gfx10 : 2117 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(psName), asmName>; 2118} 2119 2120multiclass FLAT_Real_RTN_gfx10<bits<7> op, string psName = NAME, 2121 string asmName = !cast<FLAT_Pseudo>(psName).Mnemonic> { 2122 def _RTN_gfx10 : 2123 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(psName#"_RTN"), asmName>; 2124} 2125 2126multiclass FLAT_Real_SADDR_gfx10<bits<7> op, string psName = NAME, 2127 string asmName = !cast<FLAT_Pseudo>(psName#"_SADDR").Mnemonic> { 2128 def _SADDR_gfx10 : 2129 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(psName#"_SADDR"), asmName>; 2130} 2131 2132multiclass FLAT_Real_SADDR_RTN_gfx10<bits<7> op, string psName = NAME, 2133 string asmName = !cast<FLAT_Pseudo>(psName#"_SADDR_RTN").Mnemonic> { 2134 def _SADDR_RTN_gfx10 : 2135 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(psName#"_SADDR_RTN"), asmName>; 2136} 2137 2138multiclass FLAT_Real_ST_gfx10<bits<7> op> { 2139 def _ST_gfx10 : 2140 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_ST")>; 2141} 2142 2143multiclass FLAT_Real_AllAddr_gfx10<bits<7> op, string OpName = NAME, 2144 string asmName = !cast<FLAT_Pseudo>(OpName).Mnemonic> : 2145 FLAT_Real_Base_gfx10<op, OpName, asmName>, 2146 FLAT_Real_SADDR_gfx10<op, OpName, asmName>; 2147 2148multiclass FLAT_Real_Atomics_gfx10<bits<7> op, string OpName = NAME, 2149 string asmName = !cast<FLAT_Pseudo>(OpName).Mnemonic> : 2150 FLAT_Real_Base_gfx10<op, OpName, asmName>, 2151 FLAT_Real_RTN_gfx10<op, OpName, asmName>; 2152 2153multiclass FLAT_Real_GlblAtomics_gfx10<bits<7> op, string OpName = NAME, 2154 string asmName = !cast<FLAT_Pseudo>(OpName).Mnemonic> : 2155 FLAT_Real_AllAddr_gfx10<op, OpName, asmName>, 2156 FLAT_Real_RTN_gfx10<op, OpName, asmName>, 2157 FLAT_Real_SADDR_RTN_gfx10<op, OpName, asmName>; 2158 2159multiclass FLAT_Real_GlblAtomics_RTN_gfx10<bits<7> op, string OpName = NAME> : 2160 FLAT_Real_RTN_gfx10<op, OpName>, 2161 FLAT_Real_SADDR_RTN_gfx10<op, OpName>; 2162 2163multiclass FLAT_Real_ScratchAllAddr_gfx10<bits<7> op> : 2164 FLAT_Real_Base_gfx10<op>, 2165 FLAT_Real_SADDR_gfx10<op>, 2166 FLAT_Real_ST_gfx10<op>; 2167 2168multiclass FLAT_Real_AllAddr_LDS_gfx10<bits<7> op, 2169 string opname = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).Mnemonic)> { 2170 let AsmString = opname # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds" in 2171 defm "" : FLAT_Real_Base_gfx10<op>; 2172 2173 let AsmString = opname # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds" in 2174 defm "" : FLAT_Real_SADDR_gfx10<op>; 2175} 2176 2177multiclass FLAT_Real_ScratchAllAddr_LDS_gfx10<bits<7> op, 2178 string opname = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).Mnemonic)> { 2179 defm "" : FLAT_Real_AllAddr_LDS_gfx10<op>; 2180 2181 let AsmString = opname # !cast<FLAT_Pseudo>(NAME#"_ST").AsmOperands # " lds" in 2182 defm "" : FLAT_Real_ST_gfx10<op>; 2183} 2184 2185// ENC_FLAT. 2186defm FLAT_LOAD_UBYTE : FLAT_Real_Base_gfx10<0x008>; 2187defm FLAT_LOAD_SBYTE : FLAT_Real_Base_gfx10<0x009>; 2188defm FLAT_LOAD_USHORT : FLAT_Real_Base_gfx10<0x00a>; 2189defm FLAT_LOAD_SSHORT : FLAT_Real_Base_gfx10<0x00b>; 2190defm FLAT_LOAD_DWORD : FLAT_Real_Base_gfx10<0x00c>; 2191defm FLAT_LOAD_DWORDX2 : FLAT_Real_Base_gfx10<0x00d>; 2192defm FLAT_LOAD_DWORDX4 : FLAT_Real_Base_gfx10<0x00e>; 2193defm FLAT_LOAD_DWORDX3 : FLAT_Real_Base_gfx10<0x00f>; 2194defm FLAT_STORE_BYTE : FLAT_Real_Base_gfx10<0x018>; 2195defm FLAT_STORE_BYTE_D16_HI : FLAT_Real_Base_gfx10<0x019>; 2196defm FLAT_STORE_SHORT : FLAT_Real_Base_gfx10<0x01a>; 2197defm FLAT_STORE_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x01b>; 2198defm FLAT_STORE_DWORD : FLAT_Real_Base_gfx10<0x01c>; 2199defm FLAT_STORE_DWORDX2 : FLAT_Real_Base_gfx10<0x01d>; 2200defm FLAT_STORE_DWORDX4 : FLAT_Real_Base_gfx10<0x01e>; 2201defm FLAT_STORE_DWORDX3 : FLAT_Real_Base_gfx10<0x01f>; 2202defm FLAT_LOAD_UBYTE_D16 : FLAT_Real_Base_gfx10<0x020>; 2203defm FLAT_LOAD_UBYTE_D16_HI : FLAT_Real_Base_gfx10<0x021>; 2204defm FLAT_LOAD_SBYTE_D16 : FLAT_Real_Base_gfx10<0x022>; 2205defm FLAT_LOAD_SBYTE_D16_HI : FLAT_Real_Base_gfx10<0x023>; 2206defm FLAT_LOAD_SHORT_D16 : FLAT_Real_Base_gfx10<0x024>; 2207defm FLAT_LOAD_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x025>; 2208defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_gfx10<0x030>; 2209defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_gfx10<0x031>; 2210defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_gfx10<0x032>; 2211defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_gfx10<0x033>; 2212defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_gfx10<0x035>; 2213defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_gfx10<0x036>; 2214defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_gfx10<0x037>; 2215defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_gfx10<0x038>; 2216defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_gfx10<0x039>; 2217defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_gfx10<0x03a>; 2218defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_gfx10<0x03b>; 2219defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_gfx10<0x03c>; 2220defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_gfx10<0x03d>; 2221defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_gfx10<0x03e>; 2222defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_gfx10<0x03f>; 2223defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_gfx10<0x040>; 2224defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_gfx10<0x050>; 2225defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x051>; 2226defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_gfx10<0x052>; 2227defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_gfx10<0x053>; 2228defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_gfx10<0x055>; 2229defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_gfx10<0x056>; 2230defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_gfx10<0x057>; 2231defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_gfx10<0x058>; 2232defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_gfx10<0x059>; 2233defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_gfx10<0x05a>; 2234defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_gfx10<0x05b>; 2235defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_gfx10<0x05c>; 2236defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_gfx10<0x05d>; 2237defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x05e>; 2238defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_gfx10<0x05f, "FLAT_ATOMIC_MIN_F64", "flat_atomic_fmin_x2">; 2239defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_gfx10<0x060, "FLAT_ATOMIC_MAX_F64", "flat_atomic_fmax_x2">; 2240 2241 2242// ENC_FLAT_GLBL. 2243defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_gfx10<0x008>; 2244defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_gfx10<0x009>; 2245defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_gfx10<0x00a>; 2246defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_gfx10<0x00b>; 2247defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_gfx10<0x00c>; 2248defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x00d>; 2249defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x00e>; 2250defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x00f>; 2251defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_gfx10<0x018>; 2252defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x019>; 2253defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_gfx10<0x01a>; 2254defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x01b>; 2255defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_gfx10<0x01c>; 2256defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x01d>; 2257defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x01e>; 2258defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x01f>; 2259defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x020>; 2260defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x021>; 2261defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x022>; 2262defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x023>; 2263defm GLOBAL_LOAD_SHORT_D16 : FLAT_Real_AllAddr_gfx10<0x024>; 2264defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x025>; 2265defm GLOBAL_ATOMIC_SWAP : FLAT_Real_GlblAtomics_gfx10<0x030>; 2266defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x031>; 2267defm GLOBAL_ATOMIC_ADD : FLAT_Real_GlblAtomics_gfx10<0x032>; 2268defm GLOBAL_ATOMIC_SUB : FLAT_Real_GlblAtomics_gfx10<0x033>; 2269defm GLOBAL_ATOMIC_CSUB : FLAT_Real_GlblAtomics_gfx10<0x034>; 2270defm GLOBAL_ATOMIC_SMIN : FLAT_Real_GlblAtomics_gfx10<0x035>; 2271defm GLOBAL_ATOMIC_UMIN : FLAT_Real_GlblAtomics_gfx10<0x036>; 2272defm GLOBAL_ATOMIC_SMAX : FLAT_Real_GlblAtomics_gfx10<0x037>; 2273defm GLOBAL_ATOMIC_UMAX : FLAT_Real_GlblAtomics_gfx10<0x038>; 2274defm GLOBAL_ATOMIC_AND : FLAT_Real_GlblAtomics_gfx10<0x039>; 2275defm GLOBAL_ATOMIC_OR : FLAT_Real_GlblAtomics_gfx10<0x03a>; 2276defm GLOBAL_ATOMIC_XOR : FLAT_Real_GlblAtomics_gfx10<0x03b>; 2277defm GLOBAL_ATOMIC_INC : FLAT_Real_GlblAtomics_gfx10<0x03c>; 2278defm GLOBAL_ATOMIC_DEC : FLAT_Real_GlblAtomics_gfx10<0x03d>; 2279defm GLOBAL_ATOMIC_FCMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x03e>; 2280defm GLOBAL_ATOMIC_FMIN : FLAT_Real_GlblAtomics_gfx10<0x03f>; 2281defm GLOBAL_ATOMIC_FMAX : FLAT_Real_GlblAtomics_gfx10<0x040>; 2282defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x050>; 2283defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x051>; 2284defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Real_GlblAtomics_gfx10<0x052>; 2285defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Real_GlblAtomics_gfx10<0x053>; 2286defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x055>; 2287defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x056>; 2288defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x057>; 2289defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x058>; 2290defm GLOBAL_ATOMIC_AND_X2 : FLAT_Real_GlblAtomics_gfx10<0x059>; 2291defm GLOBAL_ATOMIC_OR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05a>; 2292defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05b>; 2293defm GLOBAL_ATOMIC_INC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05c>; 2294defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05d>; 2295defm GLOBAL_ATOMIC_FCMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x05e>; 2296defm GLOBAL_ATOMIC_FMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x05f, "GLOBAL_ATOMIC_MIN_F64", "global_atomic_fmin_x2">; 2297defm GLOBAL_ATOMIC_FMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x060, "GLOBAL_ATOMIC_MAX_F64", "global_atomic_fmax_x2">; 2298defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Real_AllAddr_gfx10<0x016>; 2299defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Real_AllAddr_gfx10<0x017>; 2300 2301defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Real_AllAddr_LDS_gfx10 <0x008>; 2302defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Real_AllAddr_LDS_gfx10 <0x009>; 2303defm GLOBAL_LOAD_LDS_USHORT : FLAT_Real_AllAddr_LDS_gfx10 <0x00a>; 2304defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_LDS_gfx10 <0x00b>; 2305defm GLOBAL_LOAD_LDS_DWORD : FLAT_Real_AllAddr_LDS_gfx10 <0x00c>; 2306 2307// ENC_FLAT_SCRATCH. 2308defm SCRATCH_LOAD_UBYTE : FLAT_Real_ScratchAllAddr_gfx10<0x008>; 2309defm SCRATCH_LOAD_SBYTE : FLAT_Real_ScratchAllAddr_gfx10<0x009>; 2310defm SCRATCH_LOAD_USHORT : FLAT_Real_ScratchAllAddr_gfx10<0x00a>; 2311defm SCRATCH_LOAD_SSHORT : FLAT_Real_ScratchAllAddr_gfx10<0x00b>; 2312defm SCRATCH_LOAD_DWORD : FLAT_Real_ScratchAllAddr_gfx10<0x00c>; 2313defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_ScratchAllAddr_gfx10<0x00d>; 2314defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_ScratchAllAddr_gfx10<0x00e>; 2315defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_ScratchAllAddr_gfx10<0x00f>; 2316defm SCRATCH_STORE_BYTE : FLAT_Real_ScratchAllAddr_gfx10<0x018>; 2317defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x019>; 2318defm SCRATCH_STORE_SHORT : FLAT_Real_ScratchAllAddr_gfx10<0x01a>; 2319defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x01b>; 2320defm SCRATCH_STORE_DWORD : FLAT_Real_ScratchAllAddr_gfx10<0x01c>; 2321defm SCRATCH_STORE_DWORDX2 : FLAT_Real_ScratchAllAddr_gfx10<0x01d>; 2322defm SCRATCH_STORE_DWORDX4 : FLAT_Real_ScratchAllAddr_gfx10<0x01e>; 2323defm SCRATCH_STORE_DWORDX3 : FLAT_Real_ScratchAllAddr_gfx10<0x01f>; 2324defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x020>; 2325defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x021>; 2326defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x022>; 2327defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x023>; 2328defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x024>; 2329defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x025>; 2330 2331defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x008>; 2332defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x009>; 2333defm SCRATCH_LOAD_LDS_USHORT : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00a>; 2334defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00b>; 2335defm SCRATCH_LOAD_LDS_DWORD : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00c>; 2336 2337//===----------------------------------------------------------------------===// 2338// GFX11 2339//===----------------------------------------------------------------------===// 2340 2341class get_FLAT_ps<string name> { 2342 string Mnemonic = !cast<FLAT_Pseudo>(name).Mnemonic; 2343} 2344 2345multiclass FLAT_Real_gfx11 <bits<7> op, 2346 string name = get_FLAT_ps<NAME>.Mnemonic> { 2347 defvar ps = !cast<FLAT_Pseudo>(NAME); 2348 def _gfx11 : FLAT_Real <op, ps, name>, 2349 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX11> { 2350 let AssemblerPredicate = isGFX11Only; 2351 let DecoderNamespace = "GFX11"; 2352 2353 let Inst{13} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlcValue); 2354 let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glcValue); 2355 let Inst{15} = cpol{CPolBit.SLC}; 2356 let Inst{17-16} = seg; 2357 let Inst{54-48} = !if(ps.enabled_saddr, saddr, SGPR_NULL_gfx11plus.Index); 2358 let Inst{55} = ps.sve; 2359 } 2360} 2361 2362multiclass FLAT_Aliases_gfx11<string name> { 2363 defvar ps = get_FLAT_ps<NAME>; 2364 if !ne(ps.Mnemonic, name) then 2365 def : AMDGPUMnemonicAlias<ps.Mnemonic, name> { 2366 let AssemblerPredicate = isGFX11Only; 2367 } 2368} 2369 2370multiclass FLAT_Real_Base_gfx11<bits<7> op, 2371 string name = get_FLAT_ps<NAME>.Mnemonic> : 2372 FLAT_Aliases_gfx11<name>, 2373 FLAT_Real_gfx11<op, name>; 2374 2375multiclass FLAT_Real_Atomics_gfx11<bits<7> op, 2376 string name = get_FLAT_ps<NAME>.Mnemonic> : 2377 FLAT_Real_Base_gfx11<op, name> { 2378 defm _RTN : FLAT_Real_gfx11<op, name>; 2379} 2380 2381multiclass GLOBAL_Real_AllAddr_gfx11<bits<7> op, 2382 string name = get_FLAT_ps<NAME>.Mnemonic> : 2383 FLAT_Real_Base_gfx11<op, name> { 2384 defm _SADDR : FLAT_Real_gfx11<op, name>; 2385} 2386 2387multiclass GLOBAL_Real_Atomics_gfx11<bits<7> op, 2388 string name = get_FLAT_ps<NAME>.Mnemonic> : 2389 GLOBAL_Real_AllAddr_gfx11<op, name> { 2390 defm _RTN : FLAT_Real_gfx11<op, name>; 2391 defm _SADDR_RTN : FLAT_Real_gfx11<op, name>; 2392} 2393 2394multiclass SCRATCH_Real_AllAddr_gfx11<bits<7> op, 2395 string name = get_FLAT_ps<NAME>.Mnemonic> : 2396 FLAT_Real_Base_gfx11<op, name> { 2397 defm _SADDR : FLAT_Real_gfx11<op, name>; 2398 defm _ST : FLAT_Real_gfx11<op, name>; 2399 defm _SVS : FLAT_Real_gfx11<op, name>; 2400} 2401 2402// ENC_FLAT. 2403defm FLAT_LOAD_UBYTE : FLAT_Real_Base_gfx11<0x010, "flat_load_u8">; 2404defm FLAT_LOAD_SBYTE : FLAT_Real_Base_gfx11<0x011, "flat_load_i8">; 2405defm FLAT_LOAD_USHORT : FLAT_Real_Base_gfx11<0x012, "flat_load_u16">; 2406defm FLAT_LOAD_SSHORT : FLAT_Real_Base_gfx11<0x013, "flat_load_i16">; 2407defm FLAT_LOAD_DWORD : FLAT_Real_Base_gfx11<0x014, "flat_load_b32">; 2408defm FLAT_LOAD_DWORDX2 : FLAT_Real_Base_gfx11<0x015, "flat_load_b64">; 2409defm FLAT_LOAD_DWORDX3 : FLAT_Real_Base_gfx11<0x016, "flat_load_b96">; 2410defm FLAT_LOAD_DWORDX4 : FLAT_Real_Base_gfx11<0x017, "flat_load_b128">; 2411defm FLAT_STORE_BYTE : FLAT_Real_Base_gfx11<0x018, "flat_store_b8">; 2412defm FLAT_STORE_SHORT : FLAT_Real_Base_gfx11<0x019, "flat_store_b16">; 2413defm FLAT_STORE_DWORD : FLAT_Real_Base_gfx11<0x01a, "flat_store_b32">; 2414defm FLAT_STORE_DWORDX2 : FLAT_Real_Base_gfx11<0x01b, "flat_store_b64">; 2415defm FLAT_STORE_DWORDX3 : FLAT_Real_Base_gfx11<0x01c, "flat_store_b96">; 2416defm FLAT_STORE_DWORDX4 : FLAT_Real_Base_gfx11<0x01d, "flat_store_b128">; 2417defm FLAT_LOAD_UBYTE_D16 : FLAT_Real_Base_gfx11<0x01e, "flat_load_d16_u8">; 2418defm FLAT_LOAD_SBYTE_D16 : FLAT_Real_Base_gfx11<0x01f, "flat_load_d16_i8">; 2419defm FLAT_LOAD_SHORT_D16 : FLAT_Real_Base_gfx11<0x020, "flat_load_d16_b16">; 2420defm FLAT_LOAD_UBYTE_D16_HI : FLAT_Real_Base_gfx11<0x021, "flat_load_d16_hi_u8">; 2421defm FLAT_LOAD_SBYTE_D16_HI : FLAT_Real_Base_gfx11<0x022, "flat_load_d16_hi_i8">; 2422defm FLAT_LOAD_SHORT_D16_HI : FLAT_Real_Base_gfx11<0x023, "flat_load_d16_hi_b16">; 2423defm FLAT_STORE_BYTE_D16_HI : FLAT_Real_Base_gfx11<0x024, "flat_store_d16_hi_b8">; 2424defm FLAT_STORE_SHORT_D16_HI : FLAT_Real_Base_gfx11<0x025, "flat_store_d16_hi_b16">; 2425defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_gfx11<0x033, "flat_atomic_swap_b32">; 2426defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_gfx11<0x034, "flat_atomic_cmpswap_b32">; 2427defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_gfx11<0x035, "flat_atomic_add_u32">; 2428defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_gfx11<0x036, "flat_atomic_sub_u32">; 2429defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_gfx11<0x038, "flat_atomic_min_i32">; 2430defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_gfx11<0x039, "flat_atomic_min_u32">; 2431defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_gfx11<0x03a, "flat_atomic_max_i32">; 2432defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_gfx11<0x03b, "flat_atomic_max_u32">; 2433defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_gfx11<0x03c, "flat_atomic_and_b32">; 2434defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_gfx11<0x03d, "flat_atomic_or_b32">; 2435defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_gfx11<0x03e, "flat_atomic_xor_b32">; 2436defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_gfx11<0x03f, "flat_atomic_inc_u32">; 2437defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_gfx11<0x040, "flat_atomic_dec_u32">; 2438defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_gfx11<0x041, "flat_atomic_swap_b64">; 2439defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_gfx11<0x042, "flat_atomic_cmpswap_b64">; 2440defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_gfx11<0x043, "flat_atomic_add_u64">; 2441defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_gfx11<0x044, "flat_atomic_sub_u64">; 2442defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_gfx11<0x045, "flat_atomic_min_i64">; 2443defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_gfx11<0x046, "flat_atomic_min_u64">; 2444defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_gfx11<0x047, "flat_atomic_max_i64">; 2445defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_gfx11<0x048, "flat_atomic_max_u64">; 2446defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_gfx11<0x049, "flat_atomic_and_b64">; 2447defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_gfx11<0x04a, "flat_atomic_or_b64">; 2448defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_gfx11<0x04b, "flat_atomic_xor_b64">; 2449defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_gfx11<0x04c, "flat_atomic_inc_u64">; 2450defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_gfx11<0x04d, "flat_atomic_dec_u64">; 2451defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_gfx11<0x050, "flat_atomic_cmpswap_f32">; 2452defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_gfx11<0x051, "flat_atomic_min_f32">; 2453defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_gfx11<0x052, "flat_atomic_max_f32">; 2454defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_gfx11<0x056>; 2455 2456// ENC_FLAT_GLBL. 2457defm GLOBAL_LOAD_UBYTE : GLOBAL_Real_AllAddr_gfx11<0x010, "global_load_u8">; 2458defm GLOBAL_LOAD_SBYTE : GLOBAL_Real_AllAddr_gfx11<0x011, "global_load_i8">; 2459defm GLOBAL_LOAD_USHORT : GLOBAL_Real_AllAddr_gfx11<0x012, "global_load_u16">; 2460defm GLOBAL_LOAD_SSHORT : GLOBAL_Real_AllAddr_gfx11<0x013, "global_load_i16">; 2461defm GLOBAL_LOAD_DWORD : GLOBAL_Real_AllAddr_gfx11<0x014, "global_load_b32">; 2462defm GLOBAL_LOAD_DWORDX2 : GLOBAL_Real_AllAddr_gfx11<0x015, "global_load_b64">; 2463defm GLOBAL_LOAD_DWORDX3 : GLOBAL_Real_AllAddr_gfx11<0x016, "global_load_b96">; 2464defm GLOBAL_LOAD_DWORDX4 : GLOBAL_Real_AllAddr_gfx11<0x017, "global_load_b128">; 2465defm GLOBAL_STORE_BYTE : GLOBAL_Real_AllAddr_gfx11<0x018, "global_store_b8">; 2466defm GLOBAL_STORE_SHORT : GLOBAL_Real_AllAddr_gfx11<0x019, "global_store_b16">; 2467defm GLOBAL_STORE_DWORD : GLOBAL_Real_AllAddr_gfx11<0x01a, "global_store_b32">; 2468defm GLOBAL_STORE_DWORDX2 : GLOBAL_Real_AllAddr_gfx11<0x01b, "global_store_b64">; 2469defm GLOBAL_STORE_DWORDX3 : GLOBAL_Real_AllAddr_gfx11<0x01c, "global_store_b96">; 2470defm GLOBAL_STORE_DWORDX4 : GLOBAL_Real_AllAddr_gfx11<0x01d, "global_store_b128">; 2471defm GLOBAL_LOAD_UBYTE_D16 : GLOBAL_Real_AllAddr_gfx11<0x01e, "global_load_d16_u8">; 2472defm GLOBAL_LOAD_SBYTE_D16 : GLOBAL_Real_AllAddr_gfx11<0x01f, "global_load_d16_i8">; 2473defm GLOBAL_LOAD_SHORT_D16 : GLOBAL_Real_AllAddr_gfx11<0x020, "global_load_d16_b16">; 2474defm GLOBAL_LOAD_UBYTE_D16_HI : GLOBAL_Real_AllAddr_gfx11<0x021, "global_load_d16_hi_u8">; 2475defm GLOBAL_LOAD_SBYTE_D16_HI : GLOBAL_Real_AllAddr_gfx11<0x022, "global_load_d16_hi_i8">; 2476defm GLOBAL_LOAD_SHORT_D16_HI : GLOBAL_Real_AllAddr_gfx11<0x023, "global_load_d16_hi_b16">; 2477defm GLOBAL_STORE_BYTE_D16_HI : GLOBAL_Real_AllAddr_gfx11<0x024, "global_store_d16_hi_b8">; 2478defm GLOBAL_STORE_SHORT_D16_HI : GLOBAL_Real_AllAddr_gfx11<0x025, "global_store_d16_hi_b16">; 2479defm GLOBAL_LOAD_DWORD_ADDTID : GLOBAL_Real_AllAddr_gfx11<0x028, "global_load_addtid_b32">; 2480defm GLOBAL_STORE_DWORD_ADDTID : GLOBAL_Real_AllAddr_gfx11<0x029, "global_store_addtid_b32">; 2481defm GLOBAL_ATOMIC_SWAP : GLOBAL_Real_Atomics_gfx11<0x033, "global_atomic_swap_b32">; 2482defm GLOBAL_ATOMIC_CMPSWAP : GLOBAL_Real_Atomics_gfx11<0x034, "global_atomic_cmpswap_b32">; 2483defm GLOBAL_ATOMIC_ADD : GLOBAL_Real_Atomics_gfx11<0x035, "global_atomic_add_u32">; 2484defm GLOBAL_ATOMIC_SUB : GLOBAL_Real_Atomics_gfx11<0x036, "global_atomic_sub_u32">; 2485defm GLOBAL_ATOMIC_CSUB : GLOBAL_Real_Atomics_gfx11<0x037, "global_atomic_csub_u32">; 2486defm GLOBAL_ATOMIC_SMIN : GLOBAL_Real_Atomics_gfx11<0x038, "global_atomic_min_i32">; 2487defm GLOBAL_ATOMIC_UMIN : GLOBAL_Real_Atomics_gfx11<0x039, "global_atomic_min_u32">; 2488defm GLOBAL_ATOMIC_SMAX : GLOBAL_Real_Atomics_gfx11<0x03a, "global_atomic_max_i32">; 2489defm GLOBAL_ATOMIC_UMAX : GLOBAL_Real_Atomics_gfx11<0x03b, "global_atomic_max_u32">; 2490defm GLOBAL_ATOMIC_AND : GLOBAL_Real_Atomics_gfx11<0x03c, "global_atomic_and_b32">; 2491defm GLOBAL_ATOMIC_OR : GLOBAL_Real_Atomics_gfx11<0x03d, "global_atomic_or_b32">; 2492defm GLOBAL_ATOMIC_XOR : GLOBAL_Real_Atomics_gfx11<0x03e, "global_atomic_xor_b32">; 2493defm GLOBAL_ATOMIC_INC : GLOBAL_Real_Atomics_gfx11<0x03f, "global_atomic_inc_u32">; 2494defm GLOBAL_ATOMIC_DEC : GLOBAL_Real_Atomics_gfx11<0x040, "global_atomic_dec_u32">; 2495defm GLOBAL_ATOMIC_SWAP_X2 : GLOBAL_Real_Atomics_gfx11<0x041, "global_atomic_swap_b64">; 2496defm GLOBAL_ATOMIC_CMPSWAP_X2 : GLOBAL_Real_Atomics_gfx11<0x042, "global_atomic_cmpswap_b64">; 2497defm GLOBAL_ATOMIC_ADD_X2 : GLOBAL_Real_Atomics_gfx11<0x043, "global_atomic_add_u64">; 2498defm GLOBAL_ATOMIC_SUB_X2 : GLOBAL_Real_Atomics_gfx11<0x044, "global_atomic_sub_u64">; 2499defm GLOBAL_ATOMIC_SMIN_X2 : GLOBAL_Real_Atomics_gfx11<0x045, "global_atomic_min_i64">; 2500defm GLOBAL_ATOMIC_UMIN_X2 : GLOBAL_Real_Atomics_gfx11<0x046, "global_atomic_min_u64">; 2501defm GLOBAL_ATOMIC_SMAX_X2 : GLOBAL_Real_Atomics_gfx11<0x047, "global_atomic_max_i64">; 2502defm GLOBAL_ATOMIC_UMAX_X2 : GLOBAL_Real_Atomics_gfx11<0x048, "global_atomic_max_u64">; 2503defm GLOBAL_ATOMIC_AND_X2 : GLOBAL_Real_Atomics_gfx11<0x049, "global_atomic_and_b64">; 2504defm GLOBAL_ATOMIC_OR_X2 : GLOBAL_Real_Atomics_gfx11<0x04a, "global_atomic_or_b64">; 2505defm GLOBAL_ATOMIC_XOR_X2 : GLOBAL_Real_Atomics_gfx11<0x04b, "global_atomic_xor_b64">; 2506defm GLOBAL_ATOMIC_INC_X2 : GLOBAL_Real_Atomics_gfx11<0x04c, "global_atomic_inc_u64">; 2507defm GLOBAL_ATOMIC_DEC_X2 : GLOBAL_Real_Atomics_gfx11<0x04d, "global_atomic_dec_u64">; 2508defm GLOBAL_ATOMIC_FCMPSWAP : GLOBAL_Real_Atomics_gfx11<0x050, "global_atomic_cmpswap_f32">; 2509defm GLOBAL_ATOMIC_FMIN : GLOBAL_Real_Atomics_gfx11<0x051, "global_atomic_min_f32">; 2510defm GLOBAL_ATOMIC_FMAX : GLOBAL_Real_Atomics_gfx11<0x052, "global_atomic_max_f32">; 2511defm GLOBAL_ATOMIC_ADD_F32 : GLOBAL_Real_Atomics_gfx11<0x056>; 2512 2513// ENC_FLAT_SCRATCH. 2514defm SCRATCH_LOAD_UBYTE : SCRATCH_Real_AllAddr_gfx11<0x10, "scratch_load_u8">; 2515defm SCRATCH_LOAD_SBYTE : SCRATCH_Real_AllAddr_gfx11<0x11, "scratch_load_i8">; 2516defm SCRATCH_LOAD_USHORT : SCRATCH_Real_AllAddr_gfx11<0x12, "scratch_load_u16">; 2517defm SCRATCH_LOAD_SSHORT : SCRATCH_Real_AllAddr_gfx11<0x13, "scratch_load_i16">; 2518defm SCRATCH_LOAD_DWORD : SCRATCH_Real_AllAddr_gfx11<0x14, "scratch_load_b32">; 2519defm SCRATCH_LOAD_DWORDX2 : SCRATCH_Real_AllAddr_gfx11<0x15, "scratch_load_b64">; 2520defm SCRATCH_LOAD_DWORDX3 : SCRATCH_Real_AllAddr_gfx11<0x16, "scratch_load_b96">; 2521defm SCRATCH_LOAD_DWORDX4 : SCRATCH_Real_AllAddr_gfx11<0x17, "scratch_load_b128">; 2522defm SCRATCH_STORE_BYTE : SCRATCH_Real_AllAddr_gfx11<0x18, "scratch_store_b8">; 2523defm SCRATCH_STORE_SHORT : SCRATCH_Real_AllAddr_gfx11<0x19, "scratch_store_b16">; 2524defm SCRATCH_STORE_DWORD : SCRATCH_Real_AllAddr_gfx11<0x1a, "scratch_store_b32">; 2525defm SCRATCH_STORE_DWORDX2 : SCRATCH_Real_AllAddr_gfx11<0x1b, "scratch_store_b64">; 2526defm SCRATCH_STORE_DWORDX3 : SCRATCH_Real_AllAddr_gfx11<0x1c, "scratch_store_b96">; 2527defm SCRATCH_STORE_DWORDX4 : SCRATCH_Real_AllAddr_gfx11<0x1d, "scratch_store_b128">; 2528defm SCRATCH_LOAD_UBYTE_D16 : SCRATCH_Real_AllAddr_gfx11<0x1e, "scratch_load_d16_u8">; 2529defm SCRATCH_LOAD_SBYTE_D16 : SCRATCH_Real_AllAddr_gfx11<0x1f, "scratch_load_d16_i8">; 2530defm SCRATCH_LOAD_SHORT_D16 : SCRATCH_Real_AllAddr_gfx11<0x20, "scratch_load_d16_b16">; 2531defm SCRATCH_LOAD_UBYTE_D16_HI : SCRATCH_Real_AllAddr_gfx11<0x21, "scratch_load_d16_hi_u8">; 2532defm SCRATCH_LOAD_SBYTE_D16_HI : SCRATCH_Real_AllAddr_gfx11<0x22, "scratch_load_d16_hi_i8">; 2533defm SCRATCH_LOAD_SHORT_D16_HI : SCRATCH_Real_AllAddr_gfx11<0x23, "scratch_load_d16_hi_b16">; 2534defm SCRATCH_STORE_BYTE_D16_HI : SCRATCH_Real_AllAddr_gfx11<0x24, "scratch_store_d16_hi_b8">; 2535defm SCRATCH_STORE_SHORT_D16_HI : SCRATCH_Real_AllAddr_gfx11<0x25, "scratch_store_d16_hi_b16">; 2536 2537//===----------------------------------------------------------------------===// 2538// GFX12 2539//===----------------------------------------------------------------------===// 2540 2541multiclass VFLAT_Real_gfx12 <bits<8> op, string name = get_FLAT_ps<NAME>.Mnemonic> { 2542 defvar ps = !cast<FLAT_Pseudo>(NAME); 2543 def _gfx12 : VFLAT_Real <op, ps, name>, 2544 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX12> { 2545 let AssemblerPredicate = isGFX12Only; 2546 let DecoderNamespace = "GFX12"; 2547 2548 let Inst{25-24} = {ps.is_flat_global, ps.is_flat_scratch}; 2549 } 2550} 2551 2552multiclass VFLAT_Aliases_gfx12<string name, string alias = name> { 2553 defvar ps = get_FLAT_ps<NAME>; 2554 let AssemblerPredicate = isGFX12Only in { 2555 if !ne(ps.Mnemonic, name) then 2556 def : AMDGPUMnemonicAlias<ps.Mnemonic, name>; 2557 if !ne(alias, name) then 2558 def : AMDGPUMnemonicAlias<alias, name>; 2559 } 2560} 2561 2562multiclass VFLAT_Real_Base_gfx12<bits<8> op, 2563 string name = get_FLAT_ps<NAME>.Mnemonic, 2564 string alias = name> : 2565 VFLAT_Aliases_gfx12<name, alias>, 2566 VFLAT_Real_gfx12<op, name>; 2567 2568multiclass VFLAT_Real_Atomics_gfx12<bits<8> op, 2569 string name = get_FLAT_ps<NAME>.Mnemonic, 2570 string alias = name> : 2571 VFLAT_Real_Base_gfx12<op, name, alias> { 2572 defm _RTN : VFLAT_Real_gfx12<op, name>; 2573} 2574 2575multiclass VGLOBAL_Real_AllAddr_gfx12<bits<8> op, 2576 string name = get_FLAT_ps<NAME>.Mnemonic, 2577 string alias = name> : 2578 VFLAT_Real_Base_gfx12<op, name, alias> { 2579 defm _SADDR : VFLAT_Real_gfx12<op, name>; 2580} 2581 2582multiclass VGLOBAL_Real_AllAddr_gfx12_w64<bits<8> op, 2583 string name = get_FLAT_ps<NAME>.Mnemonic> : 2584 VFLAT_Aliases_gfx12<name> { 2585 let DecoderNamespace = "GFX12W64" in { 2586 defm "" : VFLAT_Real_gfx12<op, name>; 2587 defm _SADDR : VFLAT_Real_gfx12<op, name>; 2588 } 2589} 2590 2591multiclass VGLOBAL_Real_Atomics_gfx12<bits<8> op, 2592 string name = get_FLAT_ps<NAME>.Mnemonic, 2593 string alias = name> : 2594 VGLOBAL_Real_AllAddr_gfx12<op, name, alias> { 2595 defm _RTN : VFLAT_Real_gfx12<op, name>; 2596 defm _SADDR_RTN : VFLAT_Real_gfx12<op, name>; 2597} 2598 2599multiclass VSCRATCH_Real_AllAddr_gfx12<bits<8> op, 2600 string name = get_FLAT_ps<NAME>.Mnemonic> : 2601 VFLAT_Real_Base_gfx12<op, name> { 2602 defm _SADDR : VFLAT_Real_gfx12<op, name>; 2603 defm _ST : VFLAT_Real_gfx12<op, name>; 2604 defm _SVS : VFLAT_Real_gfx12<op, name>; 2605} 2606 2607// ENC_VFLAT. 2608defm FLAT_LOAD_UBYTE : VFLAT_Real_Base_gfx12<0x010, "flat_load_u8">; 2609defm FLAT_LOAD_SBYTE : VFLAT_Real_Base_gfx12<0x011, "flat_load_i8">; 2610defm FLAT_LOAD_USHORT : VFLAT_Real_Base_gfx12<0x012, "flat_load_u16">; 2611defm FLAT_LOAD_SSHORT : VFLAT_Real_Base_gfx12<0x013, "flat_load_i16">; 2612defm FLAT_LOAD_DWORD : VFLAT_Real_Base_gfx12<0x014, "flat_load_b32">; 2613defm FLAT_LOAD_DWORDX2 : VFLAT_Real_Base_gfx12<0x015, "flat_load_b64">; 2614defm FLAT_LOAD_DWORDX3 : VFLAT_Real_Base_gfx12<0x016, "flat_load_b96">; 2615defm FLAT_LOAD_DWORDX4 : VFLAT_Real_Base_gfx12<0x017, "flat_load_b128">; 2616defm FLAT_STORE_BYTE : VFLAT_Real_Base_gfx12<0x018, "flat_store_b8">; 2617defm FLAT_STORE_SHORT : VFLAT_Real_Base_gfx12<0x019, "flat_store_b16">; 2618defm FLAT_STORE_DWORD : VFLAT_Real_Base_gfx12<0x01a, "flat_store_b32">; 2619defm FLAT_STORE_DWORDX2 : VFLAT_Real_Base_gfx12<0x01b, "flat_store_b64">; 2620defm FLAT_STORE_DWORDX3 : VFLAT_Real_Base_gfx12<0x01c, "flat_store_b96">; 2621defm FLAT_STORE_DWORDX4 : VFLAT_Real_Base_gfx12<0x01d, "flat_store_b128">; 2622defm FLAT_LOAD_UBYTE_D16 : VFLAT_Real_Base_gfx12<0x01e, "flat_load_d16_u8">; 2623defm FLAT_LOAD_SBYTE_D16 : VFLAT_Real_Base_gfx12<0x01f, "flat_load_d16_i8">; 2624defm FLAT_LOAD_SHORT_D16 : VFLAT_Real_Base_gfx12<0x020, "flat_load_d16_b16">; 2625defm FLAT_LOAD_UBYTE_D16_HI : VFLAT_Real_Base_gfx12<0x021, "flat_load_d16_hi_u8">; 2626defm FLAT_LOAD_SBYTE_D16_HI : VFLAT_Real_Base_gfx12<0x022, "flat_load_d16_hi_i8">; 2627defm FLAT_LOAD_SHORT_D16_HI : VFLAT_Real_Base_gfx12<0x023, "flat_load_d16_hi_b16">; 2628defm FLAT_STORE_BYTE_D16_HI : VFLAT_Real_Base_gfx12<0x024, "flat_store_d16_hi_b8">; 2629defm FLAT_STORE_SHORT_D16_HI : VFLAT_Real_Base_gfx12<0x025, "flat_store_d16_hi_b16">; 2630defm FLAT_ATOMIC_SWAP : VFLAT_Real_Atomics_gfx12<0x033, "flat_atomic_swap_b32">; 2631defm FLAT_ATOMIC_CMPSWAP : VFLAT_Real_Atomics_gfx12<0x034, "flat_atomic_cmpswap_b32">; 2632defm FLAT_ATOMIC_ADD : VFLAT_Real_Atomics_gfx12<0x035, "flat_atomic_add_u32">; 2633defm FLAT_ATOMIC_SUB : VFLAT_Real_Atomics_gfx12<0x036, "flat_atomic_sub_u32">; 2634defm FLAT_ATOMIC_CSUB_U32 : VFLAT_Real_Atomics_gfx12<0x037, "flat_atomic_sub_clamp_u32">; 2635defm FLAT_ATOMIC_SMIN : VFLAT_Real_Atomics_gfx12<0x038, "flat_atomic_min_i32">; 2636defm FLAT_ATOMIC_UMIN : VFLAT_Real_Atomics_gfx12<0x039, "flat_atomic_min_u32">; 2637defm FLAT_ATOMIC_SMAX : VFLAT_Real_Atomics_gfx12<0x03a, "flat_atomic_max_i32">; 2638defm FLAT_ATOMIC_UMAX : VFLAT_Real_Atomics_gfx12<0x03b, "flat_atomic_max_u32">; 2639defm FLAT_ATOMIC_AND : VFLAT_Real_Atomics_gfx12<0x03c, "flat_atomic_and_b32">; 2640defm FLAT_ATOMIC_OR : VFLAT_Real_Atomics_gfx12<0x03d, "flat_atomic_or_b32">; 2641defm FLAT_ATOMIC_XOR : VFLAT_Real_Atomics_gfx12<0x03e, "flat_atomic_xor_b32">; 2642defm FLAT_ATOMIC_INC : VFLAT_Real_Atomics_gfx12<0x03f, "flat_atomic_inc_u32">; 2643defm FLAT_ATOMIC_DEC : VFLAT_Real_Atomics_gfx12<0x040, "flat_atomic_dec_u32">; 2644defm FLAT_ATOMIC_SWAP_X2 : VFLAT_Real_Atomics_gfx12<0x041, "flat_atomic_swap_b64">; 2645defm FLAT_ATOMIC_CMPSWAP_X2 : VFLAT_Real_Atomics_gfx12<0x042, "flat_atomic_cmpswap_b64">; 2646defm FLAT_ATOMIC_ADD_X2 : VFLAT_Real_Atomics_gfx12<0x043, "flat_atomic_add_u64">; 2647defm FLAT_ATOMIC_SUB_X2 : VFLAT_Real_Atomics_gfx12<0x044, "flat_atomic_sub_u64">; 2648defm FLAT_ATOMIC_SMIN_X2 : VFLAT_Real_Atomics_gfx12<0x045, "flat_atomic_min_i64">; 2649defm FLAT_ATOMIC_UMIN_X2 : VFLAT_Real_Atomics_gfx12<0x046, "flat_atomic_min_u64">; 2650defm FLAT_ATOMIC_SMAX_X2 : VFLAT_Real_Atomics_gfx12<0x047, "flat_atomic_max_i64">; 2651defm FLAT_ATOMIC_UMAX_X2 : VFLAT_Real_Atomics_gfx12<0x048, "flat_atomic_max_u64">; 2652defm FLAT_ATOMIC_AND_X2 : VFLAT_Real_Atomics_gfx12<0x049, "flat_atomic_and_b64">; 2653defm FLAT_ATOMIC_OR_X2 : VFLAT_Real_Atomics_gfx12<0x04a, "flat_atomic_or_b64">; 2654defm FLAT_ATOMIC_XOR_X2 : VFLAT_Real_Atomics_gfx12<0x04b, "flat_atomic_xor_b64">; 2655defm FLAT_ATOMIC_INC_X2 : VFLAT_Real_Atomics_gfx12<0x04c, "flat_atomic_inc_u64">; 2656defm FLAT_ATOMIC_DEC_X2 : VFLAT_Real_Atomics_gfx12<0x04d, "flat_atomic_dec_u64">; 2657defm FLAT_ATOMIC_COND_SUB_U32 : VFLAT_Real_Atomics_gfx12<0x050>; 2658defm FLAT_ATOMIC_FMIN : VFLAT_Real_Atomics_gfx12<0x051, "flat_atomic_min_num_f32", "flat_atomic_min_f32">; 2659defm FLAT_ATOMIC_FMAX : VFLAT_Real_Atomics_gfx12<0x052, "flat_atomic_max_num_f32", "flat_atomic_max_f32">; 2660defm FLAT_ATOMIC_ADD_F32 : VFLAT_Real_Atomics_gfx12<0x056>; 2661defm FLAT_ATOMIC_PK_ADD_F16 : VFLAT_Real_Atomics_gfx12<0x059>; 2662defm FLAT_ATOMIC_PK_ADD_BF16 : VFLAT_Real_Atomics_gfx12<0x05a>; 2663 2664// ENC_VGLOBAL. 2665defm GLOBAL_LOAD_UBYTE : VGLOBAL_Real_AllAddr_gfx12<0x010, "global_load_u8">; 2666defm GLOBAL_LOAD_SBYTE : VGLOBAL_Real_AllAddr_gfx12<0x011, "global_load_i8">; 2667defm GLOBAL_LOAD_USHORT : VGLOBAL_Real_AllAddr_gfx12<0x012, "global_load_u16">; 2668defm GLOBAL_LOAD_SSHORT : VGLOBAL_Real_AllAddr_gfx12<0x013, "global_load_i16">; 2669defm GLOBAL_LOAD_DWORD : VGLOBAL_Real_AllAddr_gfx12<0x014, "global_load_b32">; 2670defm GLOBAL_LOAD_DWORDX2 : VGLOBAL_Real_AllAddr_gfx12<0x015, "global_load_b64">; 2671defm GLOBAL_LOAD_DWORDX3 : VGLOBAL_Real_AllAddr_gfx12<0x016, "global_load_b96">; 2672defm GLOBAL_LOAD_DWORDX4 : VGLOBAL_Real_AllAddr_gfx12<0x017, "global_load_b128">; 2673defm GLOBAL_STORE_BYTE : VGLOBAL_Real_AllAddr_gfx12<0x018, "global_store_b8">; 2674defm GLOBAL_STORE_SHORT : VGLOBAL_Real_AllAddr_gfx12<0x019, "global_store_b16">; 2675defm GLOBAL_STORE_DWORD : VGLOBAL_Real_AllAddr_gfx12<0x01a, "global_store_b32">; 2676defm GLOBAL_STORE_DWORDX2 : VGLOBAL_Real_AllAddr_gfx12<0x01b, "global_store_b64">; 2677defm GLOBAL_STORE_DWORDX3 : VGLOBAL_Real_AllAddr_gfx12<0x01c, "global_store_b96">; 2678defm GLOBAL_STORE_DWORDX4 : VGLOBAL_Real_AllAddr_gfx12<0x01d, "global_store_b128">; 2679defm GLOBAL_LOAD_UBYTE_D16 : VGLOBAL_Real_AllAddr_gfx12<0x01e, "global_load_d16_u8">; 2680defm GLOBAL_LOAD_SBYTE_D16 : VGLOBAL_Real_AllAddr_gfx12<0x01f, "global_load_d16_i8">; 2681defm GLOBAL_LOAD_SHORT_D16 : VGLOBAL_Real_AllAddr_gfx12<0x020, "global_load_d16_b16">; 2682defm GLOBAL_LOAD_UBYTE_D16_HI : VGLOBAL_Real_AllAddr_gfx12<0x021, "global_load_d16_hi_u8">; 2683defm GLOBAL_LOAD_SBYTE_D16_HI : VGLOBAL_Real_AllAddr_gfx12<0x022, "global_load_d16_hi_i8">; 2684defm GLOBAL_LOAD_SHORT_D16_HI : VGLOBAL_Real_AllAddr_gfx12<0x023, "global_load_d16_hi_b16">; 2685defm GLOBAL_STORE_BYTE_D16_HI : VGLOBAL_Real_AllAddr_gfx12<0x024, "global_store_d16_hi_b8">; 2686defm GLOBAL_STORE_SHORT_D16_HI : VGLOBAL_Real_AllAddr_gfx12<0x025, "global_store_d16_hi_b16">; 2687defm GLOBAL_LOAD_DWORD_ADDTID : VGLOBAL_Real_AllAddr_gfx12<0x028, "global_load_addtid_b32">; 2688defm GLOBAL_STORE_DWORD_ADDTID : VGLOBAL_Real_AllAddr_gfx12<0x029, "global_store_addtid_b32">; 2689defm GLOBAL_LOAD_BLOCK : VGLOBAL_Real_AllAddr_gfx12<0x053>; 2690defm GLOBAL_STORE_BLOCK : VGLOBAL_Real_AllAddr_gfx12<0x054>; 2691 2692defm GLOBAL_ATOMIC_SWAP : VGLOBAL_Real_Atomics_gfx12<0x033, "global_atomic_swap_b32">; 2693defm GLOBAL_ATOMIC_CMPSWAP : VGLOBAL_Real_Atomics_gfx12<0x034, "global_atomic_cmpswap_b32">; 2694defm GLOBAL_ATOMIC_ADD : VGLOBAL_Real_Atomics_gfx12<0x035, "global_atomic_add_u32">; 2695defm GLOBAL_ATOMIC_SUB : VGLOBAL_Real_Atomics_gfx12<0x036, "global_atomic_sub_u32">; 2696defm GLOBAL_ATOMIC_CSUB : VGLOBAL_Real_Atomics_gfx12<0x037, "global_atomic_sub_clamp_u32", "global_atomic_csub_u32">; 2697defm GLOBAL_ATOMIC_SMIN : VGLOBAL_Real_Atomics_gfx12<0x038, "global_atomic_min_i32">; 2698defm GLOBAL_ATOMIC_UMIN : VGLOBAL_Real_Atomics_gfx12<0x039, "global_atomic_min_u32">; 2699defm GLOBAL_ATOMIC_SMAX : VGLOBAL_Real_Atomics_gfx12<0x03a, "global_atomic_max_i32">; 2700defm GLOBAL_ATOMIC_UMAX : VGLOBAL_Real_Atomics_gfx12<0x03b, "global_atomic_max_u32">; 2701defm GLOBAL_ATOMIC_AND : VGLOBAL_Real_Atomics_gfx12<0x03c, "global_atomic_and_b32">; 2702defm GLOBAL_ATOMIC_OR : VGLOBAL_Real_Atomics_gfx12<0x03d, "global_atomic_or_b32">; 2703defm GLOBAL_ATOMIC_XOR : VGLOBAL_Real_Atomics_gfx12<0x03e, "global_atomic_xor_b32">; 2704defm GLOBAL_ATOMIC_INC : VGLOBAL_Real_Atomics_gfx12<0x03f, "global_atomic_inc_u32">; 2705defm GLOBAL_ATOMIC_DEC : VGLOBAL_Real_Atomics_gfx12<0x040, "global_atomic_dec_u32">; 2706defm GLOBAL_ATOMIC_SWAP_X2 : VGLOBAL_Real_Atomics_gfx12<0x041, "global_atomic_swap_b64">; 2707defm GLOBAL_ATOMIC_CMPSWAP_X2 : VGLOBAL_Real_Atomics_gfx12<0x042, "global_atomic_cmpswap_b64">; 2708defm GLOBAL_ATOMIC_ADD_X2 : VGLOBAL_Real_Atomics_gfx12<0x043, "global_atomic_add_u64">; 2709defm GLOBAL_ATOMIC_SUB_X2 : VGLOBAL_Real_Atomics_gfx12<0x044, "global_atomic_sub_u64">; 2710defm GLOBAL_ATOMIC_SMIN_X2 : VGLOBAL_Real_Atomics_gfx12<0x045, "global_atomic_min_i64">; 2711defm GLOBAL_ATOMIC_UMIN_X2 : VGLOBAL_Real_Atomics_gfx12<0x046, "global_atomic_min_u64">; 2712defm GLOBAL_ATOMIC_SMAX_X2 : VGLOBAL_Real_Atomics_gfx12<0x047, "global_atomic_max_i64">; 2713defm GLOBAL_ATOMIC_UMAX_X2 : VGLOBAL_Real_Atomics_gfx12<0x048, "global_atomic_max_u64">; 2714defm GLOBAL_ATOMIC_AND_X2 : VGLOBAL_Real_Atomics_gfx12<0x049, "global_atomic_and_b64">; 2715defm GLOBAL_ATOMIC_OR_X2 : VGLOBAL_Real_Atomics_gfx12<0x04a, "global_atomic_or_b64">; 2716defm GLOBAL_ATOMIC_XOR_X2 : VGLOBAL_Real_Atomics_gfx12<0x04b, "global_atomic_xor_b64">; 2717defm GLOBAL_ATOMIC_INC_X2 : VGLOBAL_Real_Atomics_gfx12<0x04c, "global_atomic_inc_u64">; 2718defm GLOBAL_ATOMIC_DEC_X2 : VGLOBAL_Real_Atomics_gfx12<0x04d, "global_atomic_dec_u64">; 2719defm GLOBAL_ATOMIC_COND_SUB_U32 : VGLOBAL_Real_Atomics_gfx12<0x050>; 2720defm GLOBAL_ATOMIC_FMIN : VGLOBAL_Real_Atomics_gfx12<0x051, "global_atomic_min_num_f32", "global_atomic_min_f32">; 2721defm GLOBAL_ATOMIC_FMAX : VGLOBAL_Real_Atomics_gfx12<0x052, "global_atomic_max_num_f32", "global_atomic_max_f32">; 2722defm GLOBAL_ATOMIC_ADD_F32 : VGLOBAL_Real_Atomics_gfx12<0x056>; 2723 2724defm GLOBAL_LOAD_TR_B128_w32 : VGLOBAL_Real_AllAddr_gfx12<0x057>; 2725defm GLOBAL_LOAD_TR_B64_w32 : VGLOBAL_Real_AllAddr_gfx12<0x058>; 2726 2727defm GLOBAL_LOAD_TR_B128_w64 : VGLOBAL_Real_AllAddr_gfx12_w64<0x057>; 2728defm GLOBAL_LOAD_TR_B64_w64 : VGLOBAL_Real_AllAddr_gfx12_w64<0x058>; 2729 2730defm GLOBAL_ATOMIC_ORDERED_ADD_B64 : VGLOBAL_Real_Atomics_gfx12<0x073>; 2731defm GLOBAL_ATOMIC_PK_ADD_F16 : VGLOBAL_Real_Atomics_gfx12<0x059>; 2732defm GLOBAL_ATOMIC_PK_ADD_BF16 : VGLOBAL_Real_Atomics_gfx12<0x05a>; 2733 2734defm GLOBAL_INV : VFLAT_Real_Base_gfx12<0x02b>; 2735defm GLOBAL_WB : VFLAT_Real_Base_gfx12<0x02c>; 2736defm GLOBAL_WBINV : VFLAT_Real_Base_gfx12<0x04f>; 2737 2738// ENC_VSCRATCH. 2739defm SCRATCH_LOAD_UBYTE : VSCRATCH_Real_AllAddr_gfx12<0x10, "scratch_load_u8">; 2740defm SCRATCH_LOAD_SBYTE : VSCRATCH_Real_AllAddr_gfx12<0x11, "scratch_load_i8">; 2741defm SCRATCH_LOAD_USHORT : VSCRATCH_Real_AllAddr_gfx12<0x12, "scratch_load_u16">; 2742defm SCRATCH_LOAD_SSHORT : VSCRATCH_Real_AllAddr_gfx12<0x13, "scratch_load_i16">; 2743defm SCRATCH_LOAD_DWORD : VSCRATCH_Real_AllAddr_gfx12<0x14, "scratch_load_b32">; 2744defm SCRATCH_LOAD_DWORDX2 : VSCRATCH_Real_AllAddr_gfx12<0x15, "scratch_load_b64">; 2745defm SCRATCH_LOAD_DWORDX3 : VSCRATCH_Real_AllAddr_gfx12<0x16, "scratch_load_b96">; 2746defm SCRATCH_LOAD_DWORDX4 : VSCRATCH_Real_AllAddr_gfx12<0x17, "scratch_load_b128">; 2747defm SCRATCH_STORE_BYTE : VSCRATCH_Real_AllAddr_gfx12<0x18, "scratch_store_b8">; 2748defm SCRATCH_STORE_SHORT : VSCRATCH_Real_AllAddr_gfx12<0x19, "scratch_store_b16">; 2749defm SCRATCH_STORE_DWORD : VSCRATCH_Real_AllAddr_gfx12<0x1a, "scratch_store_b32">; 2750defm SCRATCH_STORE_DWORDX2 : VSCRATCH_Real_AllAddr_gfx12<0x1b, "scratch_store_b64">; 2751defm SCRATCH_STORE_DWORDX3 : VSCRATCH_Real_AllAddr_gfx12<0x1c, "scratch_store_b96">; 2752defm SCRATCH_STORE_DWORDX4 : VSCRATCH_Real_AllAddr_gfx12<0x1d, "scratch_store_b128">; 2753defm SCRATCH_LOAD_UBYTE_D16 : VSCRATCH_Real_AllAddr_gfx12<0x1e, "scratch_load_d16_u8">; 2754defm SCRATCH_LOAD_SBYTE_D16 : VSCRATCH_Real_AllAddr_gfx12<0x1f, "scratch_load_d16_i8">; 2755defm SCRATCH_LOAD_SHORT_D16 : VSCRATCH_Real_AllAddr_gfx12<0x20, "scratch_load_d16_b16">; 2756defm SCRATCH_LOAD_UBYTE_D16_HI : VSCRATCH_Real_AllAddr_gfx12<0x21, "scratch_load_d16_hi_u8">; 2757defm SCRATCH_LOAD_SBYTE_D16_HI : VSCRATCH_Real_AllAddr_gfx12<0x22, "scratch_load_d16_hi_i8">; 2758defm SCRATCH_LOAD_SHORT_D16_HI : VSCRATCH_Real_AllAddr_gfx12<0x23, "scratch_load_d16_hi_b16">; 2759defm SCRATCH_STORE_BYTE_D16_HI : VSCRATCH_Real_AllAddr_gfx12<0x24, "scratch_store_d16_hi_b8">; 2760defm SCRATCH_STORE_SHORT_D16_HI : VSCRATCH_Real_AllAddr_gfx12<0x25, "scratch_store_d16_hi_b16">; 2761 2762defm SCRATCH_LOAD_BLOCK : VSCRATCH_Real_AllAddr_gfx12<0x53>; 2763defm SCRATCH_STORE_BLOCK : VSCRATCH_Real_AllAddr_gfx12<0x54>; 2764