1//===-- ROCDLOps.td - ROCDL IR dialect op definition file --*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This is the ROCDL IR operation definition file. 10// 11//===----------------------------------------------------------------------===// 12 13#ifndef ROCDLIR_OPS 14#define ROCDLIR_OPS 15 16include "mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td" 17include "mlir/Dialect/LLVMIR/LLVMOpBase.td" 18include "mlir/Interfaces/SideEffectInterfaces.td" 19 20//===----------------------------------------------------------------------===// 21// ROCDL dialect definitions 22//===----------------------------------------------------------------------===// 23 24def ROCDL_Dialect : Dialect { 25 let name = "rocdl"; 26 let cppNamespace = "::mlir::ROCDL"; 27 let dependentDialects = ["LLVM::LLVMDialect"]; 28 let hasOperationAttrVerify = 1; 29 30 let extraClassDeclaration = [{ 31 /// Get the name of the attribute used to annotate external kernel 32 /// functions. 33 static StringRef getKernelFuncAttrName() { return "rocdl.kernel"; } 34 static constexpr ::llvm::StringLiteral getFlatWorkGroupSizeAttrName() { 35 return ::llvm::StringLiteral("rocdl.flat_work_group_size"); 36 } 37 static constexpr ::llvm::StringLiteral getReqdWorkGroupSizeAttrName() { 38 return ::llvm::StringLiteral("rocdl.reqd_work_group_size"); 39 } 40 /// MLIR's gpu-related infrastructure effectively assume uniform workgroup 41 /// sizes, so this attribute defaults to "true" on `rocdl.kernel` functions. 42 /// It is provided here to allow overriding this assumption. 43 static constexpr ::llvm::StringLiteral getUniformWorkGroupSizeAttrName() { 44 return ::llvm::StringLiteral("rocdl.uniform_work_group_size"); 45 } 46 47 /// The address space value that represents global memory. 48 static constexpr unsigned kGlobalMemoryAddressSpace = 1; 49 /// The address space value that represents shared memory. 50 static constexpr unsigned kSharedMemoryAddressSpace = 3; 51 /// The address space value that represents private memory. 52 static constexpr unsigned kPrivateMemoryAddressSpace = 5; 53 }]; 54 55 let discardableAttrs = (ins 56 "::mlir::UnitAttr":$kernel, 57 "::mlir::DenseI32ArrayAttr":$reqd_work_group_size, 58 "::mlir::StringAttr":$flat_work_group_size, 59 "::mlir::IntegerAttr":$max_flat_work_group_size, 60 "::mlir::IntegerAttr":$waves_per_eu, 61 "::mlir::BoolAttr":$unsafe_fp_atomics, 62 // Correspond to LLVM metadata of the same name 63 "::mlir::UnitAttr":$last_use, 64 "::mlir::UnitAttr":$no_remote_memory, 65 "::mlir::UnitAttr":$no_fine_grained_memory, 66 "::mlir::UnitAttr":$ignore_denormal_mode 67 ); 68 69 let useDefaultAttributePrinterParser = 1; 70} 71 72//===----------------------------------------------------------------------===// 73// ROCDL attribute definitions 74//===----------------------------------------------------------------------===// 75 76class ROCDL_Attr<string attrName, string attrMnemonic, list<Trait> traits = []> 77 : AttrDef<ROCDL_Dialect, attrName, traits> { 78 let mnemonic = attrMnemonic; 79} 80 81 82//===----------------------------------------------------------------------===// 83// ROCDL op definitions 84//===----------------------------------------------------------------------===// 85 86class ROCDL_Op<string mnemonic, list<Trait> traits = []> : 87 LLVM_OpBase<ROCDL_Dialect, mnemonic, traits> { 88} 89 90class ROCDL_IntrPure1Op<string mnemonic> : 91 LLVM_IntrOpBase<ROCDL_Dialect, mnemonic, 92 "amdgcn_" # !subst(".", "_", mnemonic), [], [], [Pure], 1>; 93 94class ROCDL_IntrOp<string mnemonic, list<int> overloadedResults, 95 list<int> overloadedOperands, list<Trait> traits, int numResults, 96 int requiresAccessGroup = 0, int requiresAliasAnalysis = 0, list<int> immArgPositions = [], 97 list<string> immArgAttrNames = []> : 98 LLVM_IntrOpBase<ROCDL_Dialect, mnemonic, 99 "amdgcn_" # !subst(".", "_", mnemonic), overloadedResults, 100 overloadedOperands, traits, numResults, requiresAccessGroup, 101 requiresAliasAnalysis, 0, 0, immArgPositions, immArgAttrNames>; 102 103//===----------------------------------------------------------------------===// 104// ROCDL special register op definitions 105//===----------------------------------------------------------------------===// 106 107class ROCDL_SpecialIdRegisterOp<string mnemonic> : 108 ROCDL_IntrPure1Op<mnemonic>, 109 Arguments<(ins OptionalAttr<LLVM_ConstantRangeAttr>:$range)> { 110 string llvmBuilder = baseLlvmBuilder # setRangeRetAttrCode # baseLlvmBuilderCoda; 111 string mlirBuilder = baseMlirBuilder # importRangeRetAttrCode # baseMlirBuilderCoda; 112 113 let assemblyFormat = "(`range` $range^)? attr-dict `:` type($res)"; 114 115 // Temporaly builder until Nvidia ops also support range attributes. 116 let builders = [ 117 OpBuilder<(ins "Type":$resultType), [{ 118 build($_builder, $_state, resultType, ::mlir::LLVM::ConstantRangeAttr{}); 119 }]> 120 ]; 121} 122 123class ROCDL_DimGetterFunctionOp<string mnemonic, string device_function, 124 int parameter, list<Trait> traits = []> : 125 ROCDL_Op<mnemonic, !listconcat(traits, [Pure])>, 126 Results<(outs LLVM_Type:$res)>, Arguments<(ins OptionalAttr<LLVM_ConstantRangeAttr>:$range)> { 127 string llvmBuilder = "$res = createDimGetterFunctionCall(builder, op, \"" 128 # device_function # "\", " # parameter # ");"; 129 let assemblyFormat = "(`range` $range^)? attr-dict `:` type($res)"; 130 131 // Temporaly builder until Nvidia ops also support range attributes. 132 let builders = [ 133 OpBuilder<(ins "Type":$resultType), [{ 134 build($_builder, $_state, resultType, ::mlir::LLVM::ConstantRangeAttr{}); 135 }]> 136 ]; 137} 138 139//===----------------------------------------------------------------------===// 140// Wave-level primitives 141 142class ROCDL_MbcntOp<string mnemonic> : 143 ROCDL_IntrPure1Op<"mbcnt." # mnemonic>, 144 Arguments<(ins I32:$in0, I32:$in1)> { 145 let assemblyFormat = [{ 146 $in0 `,` $in1 attr-dict `:` `(` type($in0) `,` type($in1) `)` `->` type($res) 147 }]; 148} 149 150def ROCDL_MbcntLoOp : ROCDL_MbcntOp<"lo">; 151def ROCDL_MbcntHiOp : ROCDL_MbcntOp<"hi">; 152 153def ROCDL_DsSwizzleOp : 154ROCDL_Op<"ds_swizzle">, 155Results<(outs I32:$res)>, 156Arguments<(ins I32:$src, 157 I32:$offset)> 158{ 159 string llvmBuilder = [{ 160 $res = createIntrinsicCall(builder, llvm::Intrinsic::amdgcn_ds_swizzle, {$src, $offset}); 161 }]; 162 let assemblyFormat = [{ 163 $src `,` $offset attr-dict `:` `(` type($src) `,` type($offset) `)` `->` type($res) 164 }]; 165} 166 167def ROCDL_DsBpermuteOp : 168ROCDL_Op<"ds_bpermute">, 169Results<(outs I32:$res)>, 170Arguments<(ins I32:$index, 171 I32:$src)> 172{ 173 string llvmBuilder = [{ 174 $res = createIntrinsicCall(builder, llvm::Intrinsic::amdgcn_ds_bpermute, {$index, $src}); 175 }]; 176 let assemblyFormat = [{ 177 $index `,` $src attr-dict `:` `(` type($index) `,` type($src) `)` `->` type($res) 178 }]; 179} 180 181def ROCDL_BallotOp : 182 ROCDL_Op<"ballot">, 183 Results<(outs LLVM_Type:$res)>, 184 Arguments<(ins I1:$pred)> { 185 let summary = "Vote across thread group"; 186 187 let description = [{ 188 Ballot provides a bit mask containing the 1-bit predicate value from each lane. 189 The nth bit of the result contains the 1 bit contributed by the nth warp lane. 190 }]; 191 192 string llvmBuilder = [{ 193 $res = createIntrinsicCall(builder, 194 llvm::Intrinsic::amdgcn_ballot, {$pred}, {$_resultType}); 195 }]; 196 197 let assemblyFormat = "$pred attr-dict `:` type($res)"; 198} 199 200def ROCDL_ReadlaneOp : ROCDL_IntrOp<"readlane", [], [0], [AllTypesMatch<["res", "src0"]>], 1>, 201 Arguments<(ins LLVM_Type:$src0, 202 I32:$src1)> { 203 let results = (outs LLVM_Type:$res); 204 let summary = "Get the value in the specific lane."; 205 206 let description = [{ 207 Get the value in lane `src1` from input `src0`. 208 }]; 209 210 let assemblyFormat = [{ 211 $src0 `,` $src1 attr-dict `:` `(` type($src0) `,` type($src1) `)` `->` type($res) 212 }]; 213} 214 215//===----------------------------------------------------------------------===// 216// Thread index and Block index 217 218def ROCDL_ThreadIdXOp : ROCDL_SpecialIdRegisterOp<"workitem.id.x">; 219def ROCDL_ThreadIdYOp : ROCDL_SpecialIdRegisterOp<"workitem.id.y">; 220def ROCDL_ThreadIdZOp : ROCDL_SpecialIdRegisterOp<"workitem.id.z">; 221 222def ROCDL_BlockIdXOp : ROCDL_SpecialIdRegisterOp<"workgroup.id.x">; 223def ROCDL_BlockIdYOp : ROCDL_SpecialIdRegisterOp<"workgroup.id.y">; 224def ROCDL_BlockIdZOp : ROCDL_SpecialIdRegisterOp<"workgroup.id.z">; 225 226//===----------------------------------------------------------------------===// 227// Thread range and Block range 228 229def ROCDL_BlockDimXOp : ROCDL_DimGetterFunctionOp<"workgroup.dim.x", 230 "__ockl_get_local_size", 0>; 231 232def ROCDL_BlockDimYOp : ROCDL_DimGetterFunctionOp<"workgroup.dim.y", 233 "__ockl_get_local_size", 1>; 234 235def ROCDL_BlockDimZOp : ROCDL_DimGetterFunctionOp<"workgroup.dim.z", 236 "__ockl_get_local_size", 2>; 237 238def ROCDL_GridDimXOp : ROCDL_DimGetterFunctionOp<"grid.dim.x", 239 "__ockl_get_num_groups", 0>; 240 241def ROCDL_GridDimYOp : ROCDL_DimGetterFunctionOp<"grid.dim.y", 242 "__ockl_get_num_groups", 1>; 243 244def ROCDL_GridDimZOp : ROCDL_DimGetterFunctionOp<"grid.dim.z", 245 "__ockl_get_num_groups", 2>; 246 247//===----------------------------------------------------------------------===// 248// Synchronization primitives 249 250// Emits the waintcnt instruction. The bitfield's semantics depend 251// on the target chipset 252def ROCDL_WaitcntOp : ROCDL_Op<"waitcnt">, Arguments<(ins I32Attr:$bitfield)> { 253 string llvmBuilder = [{ 254 createIntrinsicCall(builder, llvm::Intrinsic::amdgcn_s_waitcnt, 255 {builder.getInt32($bitfield)}); 256 }]; 257 let assemblyFormat = "attr-dict $bitfield"; 258} 259 260def ROCDL_SBarrierOp : ROCDL_Op<"s.barrier"> { 261 string llvmBuilder = [{ 262 createIntrinsicCall(builder, llvm::Intrinsic::amdgcn_s_barrier); 263 }]; 264 let assemblyFormat = "attr-dict"; 265} 266 267def ROCDL_BarrierOp : ROCDL_Op<"barrier"> { 268 string llvmBuilder = [{ 269 llvm::LLVMContext &llvmContext = builder.getContext(); 270 builder.CreateFence(llvm::AtomicOrdering::Release, 271 llvmContext.getOrInsertSyncScopeID("workgroup")); 272 createIntrinsicCall(builder, llvm::Intrinsic::amdgcn_s_barrier); 273 builder.CreateFence(llvm::AtomicOrdering::Acquire, 274 llvmContext.getOrInsertSyncScopeID("workgroup")); 275 }]; 276 let assemblyFormat = "attr-dict"; 277} 278 279def ROCDL_BarrierSignalOp : ROCDL_IntrOp<"s.barrier.signal", [], [], [], 0, 0, 0, [0], ["id"]>, 280 Arguments<(ins I32Attr:$id)> { 281 let results = (outs); 282 let assemblyFormat = "$id attr-dict"; 283} 284 285def ROCDL_BarrierWaitOp : ROCDL_IntrOp<"s.barrier.wait", [], [], [], 0, 0, 0, [0], ["id"]>, 286 Arguments<(ins I16Attr:$id)> { 287 let results = (outs); 288 let assemblyFormat = "$id attr-dict"; 289 string llvmBuilder = 290 "createIntrinsicCall(builder, llvm::Intrinsic::amdgcn_s_barrier_wait,builder.getInt16(op.getId()));"; 291} 292 293def ROCDL_WaitDscntOp: ROCDL_IntrOp<"s.wait.dscnt", [], [], [], 0, 0, 0, [0], ["id"]>, 294 Arguments<(ins I16Attr:$id)> { 295 let results = (outs); 296 let assemblyFormat = "$id attr-dict"; 297} 298 299def ROCDL_SetPrioOp : ROCDL_IntrOp<"s.setprio", [], [], [], 0>, 300 Arguments<(ins I16Attr:$priority)> { 301 let results = (outs); 302 let assemblyFormat = "$priority attr-dict"; 303 string llvmBuilder = 304 "createIntrinsicCall(builder, llvm::Intrinsic::amdgcn_s_setprio,builder.getInt16(op.getPriority()));"; 305} 306 307def ROCDL_SchedBarrier : ROCDL_IntrOp<"sched.barrier", [], [], [], 0>, 308 Arguments<(ins I32Attr:$mask)> { 309 let results = (outs); 310 let assemblyFormat = "$mask attr-dict"; 311 string llvmBuilder = 312 "createIntrinsicCall(builder, llvm::Intrinsic::amdgcn_sched_barrier,builder.getInt32(op.getMask()));"; 313} 314 315def ROCDL_SchedGroupBarrier : ROCDL_IntrOp<"sched.group.barrier", [], [], [], 0>, 316 Arguments<(ins I32Attr:$mask, I32Attr:$size, I32Attr:$groupId)> { 317 let results = (outs); 318 let assemblyFormat = "$mask `,` $size `,` $groupId attr-dict"; 319 string llvmBuilder = [{ 320 createIntrinsicCall(builder, 321 llvm::Intrinsic::amdgcn_sched_group_barrier, 322 {builder.getInt32(op.getMask()), builder.getInt32(op.getSize()), builder.getInt32(op.getGroupId())}); 323 }]; 324} 325 326def ROCDL_IglpOpt : ROCDL_IntrOp<"iglp.opt", [], [], [], 0>, 327 Arguments<(ins I32Attr:$variant)> { 328 let results = (outs); 329 let assemblyFormat = "$variant attr-dict"; 330 string llvmBuilder = 331 "createIntrinsicCall(builder, llvm::Intrinsic::amdgcn_iglp_opt,builder.getInt32(op.getVariant()));"; 332} 333 334//===---------------------------------------------------------------------===// 335// Xdlops intrinsics 336 337class ROCDL_Mfma_IntrOp<string mnemonic, list<Trait> traits = []> : 338 LLVM_IntrOpBase<ROCDL_Dialect, mnemonic, 339 "amdgcn_" # !subst(".","_", mnemonic), 340 [], [], traits, 1>, 341 Arguments<(ins Variadic<LLVM_Type>:$args)> { 342 let assemblyFormat = 343 "$args attr-dict `:` functional-type($args, $res)"; 344} 345 346//===---------------------------------------------------------------------===// 347// MFMA intrinsics with overloaded operands 348class ROCDL_Mfma_OO_IntrOp<string mnemonic, list<int> overloadedOperands, 349 list<Trait> traits = []> : 350 LLVM_IntrOpBase<ROCDL_Dialect, mnemonic, 351 "amdgcn_" # !subst(".","_", mnemonic), 352 [], overloadedOperands, traits, 1>, 353 Arguments<(ins Variadic<LLVM_Type>:$args)> { 354 let assemblyFormat = 355 "$args attr-dict `:` functional-type($args, $res)"; 356} 357 358// Available on all CDNA. 359def ROCDL_mfma_f32_32x32x1f32 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x1f32">; 360def ROCDL_mfma_f32_16x16x1f32 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x1f32">; 361def ROCDL_mfma_f32_4x4x1f32 : ROCDL_Mfma_IntrOp<"mfma.f32.4x4x1f32">; 362def ROCDL_mfma_f32_32x32x2f32 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x2f32">; 363def ROCDL_mfma_f32_16x16x4f32 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x4f32">; 364def ROCDL_mfma_f32_32x32x4f16 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x4f16">; 365def ROCDL_mfma_f32_16x16x4f16 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x4f16">; 366def ROCDL_mfma_f32_4x4x4f16 : ROCDL_Mfma_IntrOp<"mfma.f32.4x4x4f16">; 367def ROCDL_mfma_f32_32x32x8f16 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x8f16">; 368def ROCDL_mfma_f32_16x16x16f16 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x16f16">; 369def ROCDL_mfma_i32_32x32x4i8 : ROCDL_Mfma_IntrOp<"mfma.i32.32x32x4i8">; 370def ROCDL_mfma_i32_16x16x4i8 : ROCDL_Mfma_IntrOp<"mfma.i32.16x16x4i8">; 371def ROCDL_mfma_i32_4x4x4i8 : ROCDL_Mfma_IntrOp<"mfma.i32.4x4x4i8">; 372def ROCDL_mfma_i32_32x32x8i8 : ROCDL_Mfma_IntrOp<"mfma.i32.32x32x8i8">; 373def ROCDL_mfma_i32_16x16x16i8 : ROCDL_Mfma_IntrOp<"mfma.i32.16x16x16i8">; 374def ROCDL_mfma_f32_32x32x2bf16 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x2bf16">; 375def ROCDL_mfma_f32_16x16x2bf16 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x2bf16">; 376def ROCDL_mfma_f32_4x4x2bf16 : ROCDL_Mfma_IntrOp<"mfma.f32.4x4x2bf16">; 377def ROCDL_mfma_f32_32x32x4bf16 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x4bf16">; 378def ROCDL_mfma_f32_16x16x8bf16 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x8bf16">; 379// New in gfx90a. 380def ROCDL_mfma_f32_32x32x4bf16_1k : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x4bf16.1k">; 381def ROCDL_mfma_f32_16x16x4bf16_1k : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x4bf16.1k">; 382def ROCDL_mfma_f32_4x4x4bf16_1k : ROCDL_Mfma_IntrOp<"mfma.f32.4x4x4bf16.1k">; 383def ROCDL_mfma_f32_32x32x8bf16_1k : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x8bf16.1k">; 384def ROCDL_mfma_f32_16x16x16bf16_1k : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x16bf16.1k">; 385// Note: in gfx940, unlike in gfx90a, the f64 xdlops use the "blgp" argument as a 386// NEG bitfield. See IntrinsicsAMDGPU.td for more info. 387def ROCDL_mfma_f64_16x16x4f64 : ROCDL_Mfma_IntrOp<"mfma.f64.16x16x4f64">; 388def ROCDL_mfma_f64_4x4x4f64 : ROCDL_Mfma_IntrOp<"mfma.f64.4x4x4f64">; 389// New in gfx940. 390def ROCDL_mfma_i32_16x16x32_i8 : ROCDL_Mfma_IntrOp<"mfma.i32.16x16x32.i8">; 391def ROCDL_mfma_i32_32x32x16_i8 : ROCDL_Mfma_IntrOp<"mfma.i32.32x32x16.i8">; 392def ROCDL_mfma_f32_16x16x8_xf32 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x8.xf32">; 393def ROCDL_mfma_f32_32x32x4_xf32 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x4.xf32">; 394def ROCDL_mfma_f32_16x16x32_bf8_bf8 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x32.bf8.bf8">; 395def ROCDL_mfma_f32_16x16x32_bf8_fp8 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x32.bf8.fp8">; 396def ROCDL_mfma_f32_16x16x32_fp8_bf8 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x32.fp8.bf8">; 397def ROCDL_mfma_f32_16x16x32_fp8_fp8 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x32.fp8.fp8">; 398def ROCDL_mfma_f32_32x32x16_bf8_bf8 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x16.bf8.bf8">; 399def ROCDL_mfma_f32_32x32x16_bf8_fp8 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x16.bf8.fp8">; 400def ROCDL_mfma_f32_32x32x16_fp8_bf8 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x16.fp8.bf8">; 401def ROCDL_mfma_f32_32x32x16_fp8_fp8 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x16.fp8.fp8">; 402// New in gfx950. 403def ROCDL_mfma_f32_16x16x32_bf16 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x32.bf16">; 404def ROCDL_mfma_i32_16x16x64_i8 : ROCDL_Mfma_IntrOp<"mfma.i32.16x16x64.i8">; 405def ROCDL_mfma_f32_16x16x32_f16 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x32.f16">; 406def ROCDL_mfma_f32_32x32x16_bf16 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x16.bf16">; 407def ROCDL_mfma_i32_32x32x32_i8 : ROCDL_Mfma_IntrOp<"mfma.i32.32x32x32.i8">; 408def ROCDL_mfma_f32_32x32x16_f16 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x16.f16">; 409def ROCDL_mfma_scale_f32_16x16x128_f8f6f4 : ROCDL_Mfma_OO_IntrOp<"mfma.scale.f32.16x16x128.f8f6f4", [0,1]>; 410def ROCDL_mfma_scale_f32_32x32x64_f8f6f4 : ROCDL_Mfma_OO_IntrOp<"mfma.scale.f32.32x32x64.f8f6f4", [0,1]>; 411 412// 2:4 Sparsity ops (GFX940) 413def ROCDL_smfmac_f32_16x16x32_f16 : ROCDL_Mfma_IntrOp<"smfmac.f32.16x16x32.f16">; 414def ROCDL_smfmac_f32_32x32x16_f16 : ROCDL_Mfma_IntrOp<"smfmac.f32.32x32x16.f16">; 415def ROCDL_smfmac_f32_16x16x32_bf16 : ROCDL_Mfma_IntrOp<"smfmac.f32.16x16x32.bf16">; 416def ROCDL_smfmac_f32_32x32x16_bf16 : ROCDL_Mfma_IntrOp<"smfmac.f32.32x32x16.bf16">; 417def ROCDL_smfmac_i32_16x16x64_i8 : ROCDL_Mfma_IntrOp<"smfmac.i32.16x16x64.i8">; 418def ROCDL_smfmac_i32_32x32x32_i8 : ROCDL_Mfma_IntrOp<"smfmac.i32.32x32x32.i8">; 419def ROCDL_smfmac_f32_16x16x64_bf8_bf8 : ROCDL_Mfma_IntrOp<"smfmac.f32.16x16x64.bf8.bf8">; 420def ROCDL_smfmac_f32_16x16x64_bf8_fp8 : ROCDL_Mfma_IntrOp<"smfmac.f32.16x16x64.bf8.fp8">; 421def ROCDL_smfmac_f32_16x16x64_fp8_bf8 : ROCDL_Mfma_IntrOp<"smfmac.f32.16x16x64.fp8.bf8">; 422def ROCDL_smfmac_f32_16x16x64_fp8_fp8 : ROCDL_Mfma_IntrOp<"smfmac.f32.16x16x64.fp8.fp8">; 423def ROCDL_smfmac_f32_32x32x32_bf8_bf8 : ROCDL_Mfma_IntrOp<"smfmac.f32.32x32x32.bf8.bf8">; 424def ROCDL_smfmac_f32_32x32x32_bf8_fp8 : ROCDL_Mfma_IntrOp<"smfmac.f32.32x32x32.bf8.fp8">; 425def ROCDL_smfmac_f32_32x32x32_fp8_bf8 : ROCDL_Mfma_IntrOp<"smfmac.f32.32x32x32.fp8.bf8">; 426def ROCDL_smfmac_f32_32x32x32_fp8_fp8 : ROCDL_Mfma_IntrOp<"smfmac.f32.32x32x32.fp8.fp8">; 427 428 429//===---------------------------------------------------------------------===// 430// WMMA intrinsics 431class ROCDL_Wmma_IntrOp<string mnemonic, list<int> overloadedOperands, 432 list<Trait> traits = []> : 433 LLVM_IntrOpBase<ROCDL_Dialect, mnemonic, 434 "amdgcn_" # !subst(".","_", mnemonic), 435 [0], overloadedOperands, traits, 1>, 436 Arguments<(ins Variadic<LLVM_Type>:$args)> { 437 let assemblyFormat = 438 "$args attr-dict `:` functional-type($args, $res)"; 439} 440 441// Available from gfx11 442def ROCDL_wmma_f32_16x16x16_f16 : ROCDL_Wmma_IntrOp<"wmma.f32.16x16x16.f16", [0]>; 443def ROCDL_wmma_f32_16x16x16_bf16 : ROCDL_Wmma_IntrOp<"wmma.f32.16x16x16.bf16", [0]>; 444def ROCDL_wmma_f16_16x16x16_f16 : ROCDL_Wmma_IntrOp<"wmma.f16.16x16x16.f16", [0]>; 445def ROCDL_wmma_bf16_16x16x16_bf16 : ROCDL_Wmma_IntrOp<"wmma.bf16.16x16x16.bf16", [0]>; 446def ROCDL_wmma_i32_16x16x16_iu8 : ROCDL_Wmma_IntrOp<"wmma.i32.16x16x16.iu8", [1]>; 447def ROCDL_wmma_i32_16x16x16_iu4 : ROCDL_Wmma_IntrOp<"wmma.i32.16x16x16.iu4", [1]>; 448// Available from gfx12 449def ROCDL_wmma_f32_16x16x16_fp8 : ROCDL_Wmma_IntrOp<"wmma.f32.16x16x16.fp8_fp8", [1]>; 450def ROCDL_wmma_f32_16x16x16_bf8 : ROCDL_Wmma_IntrOp<"wmma.f32.16x16x16.bf8_bf8", [1]>; 451 452//===---------------------------------------------------------------------===// 453// LDS transpose intrinsics (available in GFX950) 454 455def ROCDLGlobalBuffer : LLVM_PointerInAddressSpace<1>; 456def ROCDLBufferLDS : LLVM_PointerInAddressSpace<3>; 457 458class ROCDL_LDS_Read_Tr_IntrOp<string mnemonic> : 459 ROCDL_IntrOp<mnemonic, [1], [], [], 1>, 460 Arguments<(ins Arg<ROCDLBufferLDS, "", [MemRead]>:$ptr)>{ 461 let assemblyFormat = "$ptr attr-dict `:` type($ptr) `->` type($res)"; 462} 463 464def ROCDL_ds_read_tr4_b64 : ROCDL_LDS_Read_Tr_IntrOp<"ds.read.tr4.b64">; 465def ROCDL_ds_read_tr8_b64 : ROCDL_LDS_Read_Tr_IntrOp<"ds.read.tr8.b64">; 466def ROCDL_ds_read_tr6_b96 : ROCDL_LDS_Read_Tr_IntrOp<"ds.read.tr6.b96">; 467def ROCDL_ds_read_tr16_b64 : ROCDL_LDS_Read_Tr_IntrOp<"ds.read.tr16.b64">; 468 469//===---------------------------------------------------------------------===// 470// Global load to LDS intrinsic (available in GFX950) 471 472def ROCDL_GlobalLoadLDSOp : 473 ROCDL_IntrOp<"global.load.lds", [], [], [], 0>, 474 Arguments<(ins Arg<ROCDLGlobalBuffer, "", [MemRead]>:$globalPtr, 475 Arg<ROCDLBufferLDS, "", [MemWrite]>:$ldsPtr, 476 I32:$size, 477 I32:$offset, 478 I32:$aux)> { 479 let assemblyFormat = "operands attr-dict"; 480} 481 482//===---------------------------------------------------------------------===// 483// Operations on raw buffer resources (stride of 0, bounds checks either off or in 484// raw buffer mode). 485//===---------------------------------------------------------------------===// 486 487def ROCDLBufferRsrc : LLVM_PointerInAddressSpace<8>; 488 489def ROCDL_MakeBufferRsrcOp : 490 ROCDL_IntrOp<"make.buffer.rsrc", [], [0], [Pure], 1>, 491 Arguments<(ins LLVM_AnyPointer:$base, 492 I16:$stride, 493 I32:$numRecords, 494 I32:$flags)> { 495 let results = (outs ROCDLBufferRsrc:$res); 496 let assemblyFormat = "operands attr-dict `:` type($base) `to` type($res)"; 497} 498 499def ROCDL_RawPtrBufferLoadOp : 500 ROCDL_IntrOp<"raw.ptr.buffer.load", [0], [], [], 1, 0, 1> { 501 dag args = (ins Arg<ROCDLBufferRsrc, "", [MemRead]>:$rsrc, 502 I32:$offset, 503 I32:$soffset, 504 I32:$aux); 505 let arguments = !con(args, aliasAttrs); 506 let assemblyFormat = "operands attr-dict `:` type($res)"; 507 let extraClassDefinition = [{ 508 ::llvm::SmallVector<::mlir::Value> $cppClass::getAccessedOperands() { 509 return {getRes()}; 510 } 511 }]; 512} 513 514def ROCDL_RawPtrBufferStoreOp : 515 ROCDL_IntrOp<"raw.ptr.buffer.store", [], [0], [], 0, 0, 1> { 516 dag args = (ins LLVM_Type:$vdata, 517 Arg<ROCDLBufferRsrc, "", [MemWrite]>:$rsrc, 518 I32:$offset, 519 I32:$soffset, 520 I32:$aux); 521 let arguments = !con(args, aliasAttrs); 522 let assemblyFormat = "operands attr-dict `:` type($vdata)"; 523 let extraClassDefinition = [{ 524 ::llvm::SmallVector<::mlir::Value> $cppClass::getAccessedOperands() { 525 return {getRsrc()}; 526 } 527 }]; 528 529} 530 531def ROCDL_RawPtrBufferAtomicCmpSwap : 532 ROCDL_IntrOp<"raw.ptr.buffer.atomic.cmpswap", 533 [0], [], [AllTypesMatch<["res", "src", "cmp"]>], 1, 0, 1> { 534 dag args = (ins LLVM_Type:$src, 535 LLVM_Type:$cmp, 536 Arg<ROCDLBufferRsrc, "", [MemRead, MemWrite]>:$rsrc, 537 I32:$offset, 538 I32:$soffset, 539 I32:$aux); 540 let arguments = !con(args, aliasAttrs); 541 let assemblyFormat = "operands attr-dict `:` type($res)"; 542 let extraClassDefinition = [{ 543 ::llvm::SmallVector<::mlir::Value> $cppClass::getAccessedOperands() { 544 return {getRsrc()}; 545 } 546 }]; 547} 548 549class ROCDL_RawPtrBufferAtomicNoRet<string op> : 550 ROCDL_IntrOp<"raw.ptr.buffer.atomic." # op, [], [0], [], 0, 0, 1> { 551 dag args = (ins LLVM_Type:$vdata, 552 Arg<ROCDLBufferRsrc, "", [MemRead, MemWrite]>:$rsrc, 553 I32:$offset, 554 I32:$soffset, 555 I32:$aux); 556 let arguments = !con(args, aliasAttrs); 557 let assemblyFormat = "operands attr-dict `:` type($vdata)"; 558 let extraClassDefinition = [{ 559 ::llvm::SmallVector<::mlir::Value> $cppClass::getAccessedOperands() { 560 return {getRsrc()}; 561 } 562 }]; 563} 564 565def ROCDL_RawPtrBufferAtomicFmaxOp : ROCDL_RawPtrBufferAtomicNoRet<"fmax">; 566def ROCDL_RawPtrBufferAtomicSmaxOp : ROCDL_RawPtrBufferAtomicNoRet<"smax">; 567def ROCDL_RawPtrBufferAtomicUminOp : ROCDL_RawPtrBufferAtomicNoRet<"umin">; 568// Note: not supported on all architectures 569def ROCDL_RawPtrBufferAtomicFaddOp : ROCDL_RawPtrBufferAtomicNoRet<"fadd">; 570 571//===---------------------------------------------------------------------===// 572// Raw buffer load/store intrinsics 573 574def ROCDL_RawBufferLoadOp : 575 ROCDL_Op<"raw.buffer.load">, 576 Results<(outs LLVM_Type:$res)>, 577 Arguments<(ins LLVM_Type:$rsrc, 578 LLVM_Type:$offset, 579 LLVM_Type:$soffset, 580 LLVM_Type:$aux)> { 581 string llvmBuilder = [{ 582 $res = createIntrinsicCall(builder, 583 llvm::Intrinsic::amdgcn_raw_buffer_load, {$rsrc, $offset, 584 $soffset, $aux}, {$_resultType}); 585 }]; 586 let hasCustomAssemblyFormat = 1; 587} 588 589def ROCDL_RawBufferStoreOp : 590 ROCDL_Op<"raw.buffer.store">, 591 Arguments<(ins LLVM_Type:$vdata, 592 LLVM_Type:$rsrc, 593 LLVM_Type:$offset, 594 LLVM_Type:$soffset, 595 LLVM_Type:$aux)>{ 596 string llvmBuilder = [{ 597 auto vdataType = moduleTranslation.convertType(op.getVdata().getType()); 598 createIntrinsicCall(builder, 599 llvm::Intrinsic::amdgcn_raw_buffer_store, {$vdata, $rsrc, 600 $offset, $soffset, $aux}, {vdataType}); 601 }]; 602 let hasCustomAssemblyFormat = 1; 603} 604 605def ROCDL_RawBufferAtomicCmpSwap : 606 ROCDL_Op<"raw.buffer.atomic.cmpswap", [AllTypesMatch<["res", "src", "cmp"]>]>, 607 Results<(outs LLVM_Type:$res)>, 608 Arguments<(ins LLVM_Type:$src, 609 LLVM_Type:$cmp, 610 LLVM_Type:$rsrc, 611 I32:$offset, 612 I32:$soffset, 613 I32:$aux)>{ 614 string llvmBuilder = [{ 615 $res = createIntrinsicCall(builder, 616 llvm::Intrinsic::amdgcn_raw_buffer_atomic_cmpswap, {$src, $cmp, $rsrc, 617 $offset, $soffset, $aux}, {$_resultType}); 618 }]; 619 let assemblyFormat = [{ 620 attr-dict `(` operands `)` `:` type($res) `,` type($rsrc) 621 }]; 622} 623 624//===---------------------------------------------------------------------===// 625// MI-100 and MI-200 buffer atomic floating point add intrinsic 626 627def ROCDL_RawBufferAtomicFAddOp : 628 ROCDL_Op<"raw.buffer.atomic.fadd">, 629 Arguments<(ins LLVM_Type:$vdata, 630 LLVM_Type:$rsrc, 631 LLVM_Type:$offset, 632 LLVM_Type:$soffset, 633 LLVM_Type:$aux)>{ 634 string llvmBuilder = [{ 635 auto vdataType = moduleTranslation.convertType(op.getVdata().getType()); 636 createIntrinsicCall(builder, 637 llvm::Intrinsic::amdgcn_raw_buffer_atomic_fadd, {$vdata, $rsrc, 638 $offset, $soffset, $aux}, {vdataType}); 639 }]; 640 let hasCustomAssemblyFormat = 1; 641} 642 643//===---------------------------------------------------------------------===// 644// Buffer atomic floating point max intrinsic. GFX9 does not support fp32. 645 646def ROCDL_RawBufferAtomicFMaxOp : 647 ROCDL_Op<"raw.buffer.atomic.fmax">, 648 Arguments<(ins LLVM_Type:$vdata, 649 LLVM_Type:$rsrc, 650 LLVM_Type:$offset, 651 LLVM_Type:$soffset, 652 LLVM_Type:$aux)>{ 653 string llvmBuilder = [{ 654 auto vdataType = moduleTranslation.convertType(op.getVdata().getType()); 655 createIntrinsicCall(builder, 656 llvm::Intrinsic::amdgcn_raw_buffer_atomic_fmax, {$vdata, $rsrc, 657 $offset, $soffset, $aux}, {vdataType}); 658 }]; 659 let hasCustomAssemblyFormat = 1; 660} 661 662//===---------------------------------------------------------------------===// 663// Buffer atomic signed integer max intrinsic. 664 665def ROCDL_RawBufferAtomicSMaxOp : 666 ROCDL_Op<"raw.buffer.atomic.smax">, 667 Arguments<(ins LLVM_Type:$vdata, 668 LLVM_Type:$rsrc, 669 LLVM_Type:$offset, 670 LLVM_Type:$soffset, 671 LLVM_Type:$aux)>{ 672 string llvmBuilder = [{ 673 auto vdataType = moduleTranslation.convertType(op.getVdata().getType()); 674 createIntrinsicCall(builder, 675 llvm::Intrinsic::amdgcn_raw_buffer_atomic_smax, {$vdata, $rsrc, 676 $offset, $soffset, $aux}, {vdataType}); 677 }]; 678 let hasCustomAssemblyFormat = 1; 679} 680 681//===---------------------------------------------------------------------===// 682// Buffer atomic unsigned integer min intrinsic. 683 684def ROCDL_RawBufferAtomicUMinOp : 685 ROCDL_Op<"raw.buffer.atomic.umin">, 686 Arguments<(ins LLVM_Type:$vdata, 687 LLVM_Type:$rsrc, 688 LLVM_Type:$offset, 689 LLVM_Type:$soffset, 690 LLVM_Type:$aux)>{ 691 string llvmBuilder = [{ 692 auto vdataType = moduleTranslation.convertType(op.getVdata().getType()); 693 createIntrinsicCall(builder, 694 llvm::Intrinsic::amdgcn_raw_buffer_atomic_umin, {$vdata, $rsrc, 695 $offset, $soffset, $aux}, {vdataType}); 696 }]; 697 let hasCustomAssemblyFormat = 1; 698} 699 700// DPP Update intrinsic 701def ROCDL_DPPUpdateOp : ROCDL_IntrOp<"update.dpp", [], [0], 702 [AllTypesMatch<["res", "src", "old"]>], 1>, 703 Arguments<(ins LLVM_Type:$old, LLVM_Type:$src, I32Attr:$dppCtrl, I32Attr:$rowMask, 704 I32Attr:$bankMask, I1Attr:$boundCtrl)> { 705 let results = (outs LLVM_Type:$res); 706 let assemblyFormat = [{ 707 attr-dict $old `,` $src `with` $dppCtrl `,` $rowMask `,` $bankMask `,` $boundCtrl `:` type($src) 708 }]; 709 string llvmBuilder = [{ 710 auto vdataType = moduleTranslation.convertType(op.getSrc().getType()); 711 llvm::Value *args[] = { 712 moduleTranslation.lookupValue(op.getOld()), 713 moduleTranslation.lookupValue(op.getSrc()), 714 builder.getInt32(op.getDppCtrl()), 715 builder.getInt32(op.getRowMask()), 716 builder.getInt32(op.getBankMask()), 717 builder.getInt1(op.getBoundCtrl()) 718 }; 719 $res = createIntrinsicCall(builder, 720 llvm::Intrinsic::amdgcn_update_dpp, args, {vdataType}); 721 }]; 722} 723 724//===---------------------------------------------------------------------===// 725// 16-bit float intrinsics 726//===---------------------------------------------------------------------===// 727def ROCDL_CvtPkRtz: 728 ROCDL_IntrOp<"cvt.pkrtz", [], [], [Pure], 1>, 729 Arguments<(ins F32:$srcA, F32:$srcB)> { 730 let summary = "Convert two f32 input into a vector<2xf16>"; 731 let description = [{ 732 Convert two f32 values into a packed vector<2xf16>. 733 }]; 734 let assemblyFormat = [{ 735 attr-dict $srcA `,` $srcB `:` type($res) 736 }]; 737} 738 739//===---------------------------------------------------------------------===// 740// 8-bit float intrinsics 741//===---------------------------------------------------------------------===// 742def ROCDL_CvtF32Bf8Op : 743 ROCDL_IntrOp<"cvt.f32.bf8", [], [], [Pure], 1>, 744 Arguments<(ins I32:$srcA, I32:$byteSel)> { 745 let summary = "Convert bf8 to f32"; 746 let description = [{ 747 Convert 8-bit bf8 value from the `byteSel`th bit of `srcA` to fp32. 748 }]; 749 let assemblyFormat = [{ 750 attr-dict $srcA `[` $byteSel `]` `:` type($res) 751 }]; 752} 753 754def ROCDL_CvtF32Fp8Op : 755 ROCDL_IntrOp<"cvt.f32.fp8", [], [], [Pure], 1>, 756 Arguments<(ins I32:$srcA, I32:$byteSel)> { 757 let summary = "Convert fp8 to f32"; 758 let description = [{ 759 Convert 8-bit fp8 value from the `byteSel`th bit of `srcA` to fp32. 760 }]; 761 let assemblyFormat = [{ 762 attr-dict $srcA `[` $byteSel `]` `:` type($res) 763 }]; 764} 765 766def ROCDL_CvtPkBf8F32Op : 767 ROCDL_IntrOp<"cvt.pk.bf8.f32", [], [], [Pure], 1>, 768 Arguments<(ins F32:$srcA, F32:$srcB, I32:$old, I1:$wordSel)> { 769 let summary = "Convert two f32's to bf8"; 770 let description = [{ 771 Convert `srcA` and `srcB` to bf8 and store into the low/high word of 772 `old`, preserving the other word. 773 }]; 774 let assemblyFormat = [{ 775 attr-dict $srcA `,` $srcB `->` $old `[` $wordSel `]` `:` type($res) 776 }]; 777} 778 779def ROCDL_CvtPkFp8F32Op : 780 ROCDL_IntrOp<"cvt.pk.fp8.f32", [], [], [Pure], 1>, 781 Arguments<(ins F32:$srcA, F32:$srcB, I32:$old, I1:$wordSel)> { 782 let summary = "Convert two f32's to fp8"; 783 let description = [{ 784 Convert `srcA` and `srcB` to fp8 and store into the low/high word of 785 `old`, preserving the other word. 786 }]; 787 let assemblyFormat = [{ 788 attr-dict $srcA `,` $srcB `->` $old `[` $wordSel `]` `:` type($res) 789 }]; 790} 791 792def ROCDL_CvtSrBf8F32Op : 793 ROCDL_IntrOp<"cvt.sr.bf8.f32", [], [], [Pure], 1>, 794 Arguments<(ins F32:$srcA, I32:$srcB, I32:$old, I32:$byteSel)> { 795 let summary = "Convert f32 to bf8, stochiastic rounding"; 796 let description = [{ 797 Convert `srcA` to bf8, adding the rounding factor from `srcB`, 798 and store into the `byteSel`th byte of `old`, preserving the others. 799 }]; 800 let assemblyFormat = [{ 801 attr-dict $srcA `,` $srcB `->` $old `[` $byteSel `]` `:` type($res) 802 }]; 803} 804 805def ROCDL_CvtSrFp8F32Op : 806 ROCDL_IntrOp<"cvt.sr.fp8.f32", [], [], [Pure], 1>, 807 Arguments<(ins F32:$srcA, I32:$srcB, I32:$old, I32:$byteSel)> { 808 let summary = "Convert f32 to fp8, stochiastic rounding"; 809 let description = [{ 810 Convert `srcA` to fp8, adding the rounding factor from `srcB`, 811 and store into the `byteSel`th byte of `old`, preserving the others. 812 }]; 813 let assemblyFormat = [{ 814 attr-dict $srcA `,` $srcB `->` $old `[` $byteSel `]` `:` type($res) 815 }]; 816} 817 818//===----------------------------------------------------------------------===// 819// ROCDL target attribute. 820//===----------------------------------------------------------------------===// 821 822def ROCDL_TargetAttr : 823 ROCDL_Attr<"ROCDLTarget", "target"> { 824 let description = [{ 825 ROCDL target attribute for controlling compilation of AMDGPU targets. All 826 parameters decay into default values if not present. 827 828 Examples: 829 830 1. Target with default values. 831 ``` 832 gpu.module @mymodule [#rocdl.target] attributes {...} { 833 ... 834 } 835 ``` 836 837 2. Target with `gfx90a` chip and fast math. 838 ``` 839 gpu.module @mymodule [#rocdl.target<chip = "gfx90a", flags = {fast, no_wave64}>] { 840 ... 841 } 842 ``` 843 }]; 844 let parameters = (ins 845 DefaultValuedParameter<"int", "2", "Optimization level to apply.">:$O, 846 StringRefParameter<"Target triple.", "\"amdgcn-amd-amdhsa\"">:$triple, 847 StringRefParameter<"Target chip.", "\"gfx900\"">:$chip, 848 StringRefParameter<"Target chip features.", "\"\"">:$features, 849 // Also update the default builder below and rocdl-attach-target in 850 // Dialect/GPU/Transforms/Passes.td . 851 StringRefParameter<"ABI version.", "\"500\"">:$abi, 852 OptionalParameter<"DictionaryAttr", "Target specific flags.">:$flags, 853 OptionalParameter<"ArrayAttr", "Files to link to the LLVM module.">:$link 854 ); 855 let assemblyFormat = [{ 856 (`<` struct($O, $triple, $chip, $features, $abi, $flags, $link)^ `>`)? 857 }]; 858 let builders = [ 859 AttrBuilder<(ins CArg<"int", "2">:$optLevel, 860 CArg<"StringRef", "\"amdgcn-amd-amdhsa\"">:$triple, 861 CArg<"StringRef", "\"gfx900\"">:$chip, 862 CArg<"StringRef", "\"\"">:$features, 863 CArg<"StringRef", "\"500\"">:$abiVersion, 864 CArg<"DictionaryAttr", "nullptr">:$targetFlags, 865 CArg<"ArrayAttr", "nullptr">:$linkFiles), [{ 866 return Base::get($_ctxt, optLevel, triple, chip, features, abiVersion, 867 targetFlags, linkFiles); 868 }]> 869 ]; 870 let skipDefaultBuilders = 1; 871 let genVerifyDecl = 1; 872 let extraClassDeclaration = [{ 873 bool hasFlag(StringRef flag) const; 874 bool hasWave64() const; 875 bool hasFastMath() const; 876 bool hasDaz() const; 877 bool hasFiniteOnly() const; 878 bool hasUnsafeMath() const; 879 bool hasCorrectSqrt() const; 880 }]; 881 let extraClassDefinition = [{ 882 bool $cppClass::hasFlag(StringRef flag) const { 883 if (DictionaryAttr flags = getFlags()) 884 return flags.get(flag) != nullptr; 885 return false; 886 } 887 bool $cppClass::hasWave64() const { 888 return hasFlag("wave64") || !hasFlag("no_wave64"); 889 } 890 bool $cppClass::hasFastMath() const { 891 return hasFlag("fast"); 892 } 893 bool $cppClass::hasDaz() const { 894 return hasFlag("daz"); 895 } 896 bool $cppClass::hasFiniteOnly() const { 897 return hasFlag("finite_only"); 898 } 899 bool $cppClass::hasUnsafeMath() const { 900 return hasFlag("unsafe_math"); 901 } 902 bool $cppClass::hasCorrectSqrt() const { 903 return !hasFlag("unsafe_sqrt"); 904 } 905 }]; 906} 907#endif // ROCDLIR_OPS 908