xref: /llvm-project/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td (revision 43a50deb63453cd3c800f097514d500536f9d436)
1//===-- ROCDLOps.td - ROCDL IR dialect op definition file --*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This is the ROCDL IR operation definition file.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef ROCDLIR_OPS
14#define ROCDLIR_OPS
15
16include "mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td"
17include "mlir/Dialect/LLVMIR/LLVMOpBase.td"
18include "mlir/Interfaces/SideEffectInterfaces.td"
19
20//===----------------------------------------------------------------------===//
21// ROCDL dialect definitions
22//===----------------------------------------------------------------------===//
23
24def ROCDL_Dialect : Dialect {
25  let name = "rocdl";
26  let cppNamespace = "::mlir::ROCDL";
27  let dependentDialects = ["LLVM::LLVMDialect"];
28  let hasOperationAttrVerify = 1;
29
30  let extraClassDeclaration = [{
31    /// Get the name of the attribute used to annotate external kernel
32    /// functions.
33    static StringRef getKernelFuncAttrName() { return "rocdl.kernel"; }
34    static constexpr ::llvm::StringLiteral getFlatWorkGroupSizeAttrName() {
35      return ::llvm::StringLiteral("rocdl.flat_work_group_size");
36    }
37    static constexpr ::llvm::StringLiteral getReqdWorkGroupSizeAttrName() {
38      return ::llvm::StringLiteral("rocdl.reqd_work_group_size");
39    }
40    /// MLIR's gpu-related infrastructure effectively assume uniform workgroup
41    /// sizes, so this attribute defaults to "true" on `rocdl.kernel` functions.
42    /// It is provided here to allow overriding this assumption.
43    static constexpr ::llvm::StringLiteral getUniformWorkGroupSizeAttrName() {
44      return ::llvm::StringLiteral("rocdl.uniform_work_group_size");
45    }
46
47    /// The address space value that represents global memory.
48    static constexpr unsigned kGlobalMemoryAddressSpace = 1;
49    /// The address space value that represents shared memory.
50    static constexpr unsigned kSharedMemoryAddressSpace = 3;
51    /// The address space value that represents private memory.
52    static constexpr unsigned kPrivateMemoryAddressSpace = 5;
53  }];
54
55  let discardableAttrs = (ins
56     "::mlir::UnitAttr":$kernel,
57     "::mlir::DenseI32ArrayAttr":$reqd_work_group_size,
58     "::mlir::StringAttr":$flat_work_group_size,
59     "::mlir::IntegerAttr":$max_flat_work_group_size,
60     "::mlir::IntegerAttr":$waves_per_eu,
61     "::mlir::BoolAttr":$unsafe_fp_atomics,
62     // Correspond to LLVM metadata of the same name
63     "::mlir::UnitAttr":$last_use,
64     "::mlir::UnitAttr":$no_remote_memory,
65     "::mlir::UnitAttr":$no_fine_grained_memory,
66     "::mlir::UnitAttr":$ignore_denormal_mode
67  );
68
69  let useDefaultAttributePrinterParser = 1;
70}
71
72//===----------------------------------------------------------------------===//
73// ROCDL attribute definitions
74//===----------------------------------------------------------------------===//
75
76class ROCDL_Attr<string attrName, string attrMnemonic, list<Trait> traits = []>
77    : AttrDef<ROCDL_Dialect, attrName, traits> {
78  let mnemonic = attrMnemonic;
79}
80
81
82//===----------------------------------------------------------------------===//
83// ROCDL op definitions
84//===----------------------------------------------------------------------===//
85
86class ROCDL_Op<string mnemonic, list<Trait> traits = []> :
87  LLVM_OpBase<ROCDL_Dialect, mnemonic, traits> {
88}
89
90class ROCDL_IntrPure1Op<string mnemonic> :
91  LLVM_IntrOpBase<ROCDL_Dialect, mnemonic,
92  "amdgcn_" # !subst(".", "_", mnemonic), [], [], [Pure], 1>;
93
94class ROCDL_IntrOp<string mnemonic, list<int> overloadedResults,
95  list<int> overloadedOperands, list<Trait> traits, int numResults,
96  int requiresAccessGroup = 0, int requiresAliasAnalysis = 0, list<int> immArgPositions = [],
97  list<string> immArgAttrNames = []> :
98  LLVM_IntrOpBase<ROCDL_Dialect,  mnemonic,
99    "amdgcn_" # !subst(".", "_", mnemonic), overloadedResults,
100    overloadedOperands, traits, numResults, requiresAccessGroup,
101    requiresAliasAnalysis, 0, 0, immArgPositions, immArgAttrNames>;
102
103//===----------------------------------------------------------------------===//
104// ROCDL special register op definitions
105//===----------------------------------------------------------------------===//
106
107class ROCDL_SpecialIdRegisterOp<string mnemonic> :
108    ROCDL_IntrPure1Op<mnemonic>,
109    Arguments<(ins OptionalAttr<LLVM_ConstantRangeAttr>:$range)> {
110  string llvmBuilder = baseLlvmBuilder # setRangeRetAttrCode # baseLlvmBuilderCoda;
111  string mlirBuilder = baseMlirBuilder # importRangeRetAttrCode # baseMlirBuilderCoda;
112
113  let assemblyFormat = "(`range` $range^)? attr-dict `:` type($res)";
114
115    // Temporaly builder until Nvidia ops also support range attributes.
116  let builders = [
117    OpBuilder<(ins "Type":$resultType), [{
118      build($_builder, $_state, resultType, ::mlir::LLVM::ConstantRangeAttr{});
119    }]>
120  ];
121}
122
123class ROCDL_DimGetterFunctionOp<string mnemonic, string device_function,
124                             int parameter, list<Trait> traits = []> :
125  ROCDL_Op<mnemonic, !listconcat(traits, [Pure])>,
126  Results<(outs LLVM_Type:$res)>, Arguments<(ins OptionalAttr<LLVM_ConstantRangeAttr>:$range)> {
127  string llvmBuilder = "$res = createDimGetterFunctionCall(builder, op, \""
128  # device_function # "\", " # parameter # ");";
129  let assemblyFormat = "(`range` $range^)? attr-dict `:` type($res)";
130
131  // Temporaly builder until Nvidia ops also support range attributes.
132  let builders = [
133    OpBuilder<(ins "Type":$resultType), [{
134      build($_builder, $_state, resultType, ::mlir::LLVM::ConstantRangeAttr{});
135    }]>
136  ];
137}
138
139//===----------------------------------------------------------------------===//
140// Wave-level primitives
141
142class ROCDL_MbcntOp<string mnemonic> :
143    ROCDL_IntrPure1Op<"mbcnt." # mnemonic>,
144  Arguments<(ins I32:$in0, I32:$in1)> {
145  let assemblyFormat = [{
146    $in0 `,` $in1  attr-dict `:` `(` type($in0) `,` type($in1) `)` `->` type($res)
147   }];
148}
149
150def ROCDL_MbcntLoOp : ROCDL_MbcntOp<"lo">;
151def ROCDL_MbcntHiOp : ROCDL_MbcntOp<"hi">;
152
153def ROCDL_DsSwizzleOp :
154ROCDL_Op<"ds_swizzle">,
155Results<(outs I32:$res)>,
156Arguments<(ins I32:$src,
157               I32:$offset)>
158{
159  string llvmBuilder = [{
160    $res = createIntrinsicCall(builder, llvm::Intrinsic::amdgcn_ds_swizzle, {$src, $offset});
161  }];
162  let assemblyFormat = [{
163    $src `,` $offset  attr-dict `:` `(` type($src) `,` type($offset) `)` `->` type($res)
164   }];
165}
166
167def ROCDL_DsBpermuteOp :
168ROCDL_Op<"ds_bpermute">,
169Results<(outs I32:$res)>,
170Arguments<(ins I32:$index,
171               I32:$src)>
172{
173  string llvmBuilder = [{
174    $res = createIntrinsicCall(builder, llvm::Intrinsic::amdgcn_ds_bpermute, {$index, $src});
175  }];
176  let assemblyFormat = [{
177    $index `,` $src  attr-dict `:` `(` type($index) `,` type($src) `)` `->` type($res)
178   }];
179}
180
181def ROCDL_BallotOp :
182  ROCDL_Op<"ballot">,
183  Results<(outs LLVM_Type:$res)>,
184  Arguments<(ins I1:$pred)> {
185  let summary = "Vote across thread group";
186
187  let description = [{
188      Ballot provides a bit mask containing the 1-bit predicate value from each lane.
189      The nth bit of the result contains the 1 bit contributed by the nth warp lane.
190  }];
191
192  string llvmBuilder = [{
193      $res = createIntrinsicCall(builder,
194            llvm::Intrinsic::amdgcn_ballot, {$pred}, {$_resultType});
195  }];
196
197  let assemblyFormat = "$pred attr-dict `:` type($res)";
198}
199
200def ROCDL_ReadlaneOp : ROCDL_IntrOp<"readlane", [], [0], [AllTypesMatch<["res", "src0"]>], 1>,
201  Arguments<(ins LLVM_Type:$src0,
202                 I32:$src1)> {
203  let results = (outs LLVM_Type:$res);
204  let summary = "Get the value in the specific lane.";
205
206  let description = [{
207    Get the value in lane `src1` from input `src0`.
208  }];
209
210  let assemblyFormat = [{
211    $src0 `,` $src1  attr-dict `:` `(` type($src0) `,` type($src1) `)` `->` type($res)
212   }];
213}
214
215//===----------------------------------------------------------------------===//
216// Thread index and Block index
217
218def ROCDL_ThreadIdXOp : ROCDL_SpecialIdRegisterOp<"workitem.id.x">;
219def ROCDL_ThreadIdYOp : ROCDL_SpecialIdRegisterOp<"workitem.id.y">;
220def ROCDL_ThreadIdZOp : ROCDL_SpecialIdRegisterOp<"workitem.id.z">;
221
222def ROCDL_BlockIdXOp : ROCDL_SpecialIdRegisterOp<"workgroup.id.x">;
223def ROCDL_BlockIdYOp : ROCDL_SpecialIdRegisterOp<"workgroup.id.y">;
224def ROCDL_BlockIdZOp : ROCDL_SpecialIdRegisterOp<"workgroup.id.z">;
225
226//===----------------------------------------------------------------------===//
227// Thread range and Block range
228
229def ROCDL_BlockDimXOp : ROCDL_DimGetterFunctionOp<"workgroup.dim.x",
230                                               "__ockl_get_local_size", 0>;
231
232def ROCDL_BlockDimYOp : ROCDL_DimGetterFunctionOp<"workgroup.dim.y",
233                                               "__ockl_get_local_size", 1>;
234
235def ROCDL_BlockDimZOp : ROCDL_DimGetterFunctionOp<"workgroup.dim.z",
236                                               "__ockl_get_local_size", 2>;
237
238def ROCDL_GridDimXOp : ROCDL_DimGetterFunctionOp<"grid.dim.x",
239                                               "__ockl_get_num_groups", 0>;
240
241def ROCDL_GridDimYOp : ROCDL_DimGetterFunctionOp<"grid.dim.y",
242                                               "__ockl_get_num_groups", 1>;
243
244def ROCDL_GridDimZOp : ROCDL_DimGetterFunctionOp<"grid.dim.z",
245                                               "__ockl_get_num_groups", 2>;
246
247//===----------------------------------------------------------------------===//
248// Synchronization primitives
249
250// Emits the waintcnt instruction. The bitfield's semantics depend
251// on the target chipset
252def ROCDL_WaitcntOp : ROCDL_Op<"waitcnt">, Arguments<(ins I32Attr:$bitfield)> {
253  string llvmBuilder = [{
254    createIntrinsicCall(builder, llvm::Intrinsic::amdgcn_s_waitcnt,
255      {builder.getInt32($bitfield)});
256  }];
257  let assemblyFormat = "attr-dict $bitfield";
258}
259
260def ROCDL_SBarrierOp : ROCDL_Op<"s.barrier"> {
261  string llvmBuilder = [{
262    createIntrinsicCall(builder, llvm::Intrinsic::amdgcn_s_barrier);
263  }];
264  let assemblyFormat = "attr-dict";
265}
266
267def ROCDL_BarrierOp : ROCDL_Op<"barrier"> {
268  string llvmBuilder = [{
269    llvm::LLVMContext &llvmContext = builder.getContext();
270    builder.CreateFence(llvm::AtomicOrdering::Release,
271                        llvmContext.getOrInsertSyncScopeID("workgroup"));
272    createIntrinsicCall(builder, llvm::Intrinsic::amdgcn_s_barrier);
273    builder.CreateFence(llvm::AtomicOrdering::Acquire,
274                        llvmContext.getOrInsertSyncScopeID("workgroup"));
275  }];
276  let assemblyFormat = "attr-dict";
277}
278
279def ROCDL_BarrierSignalOp : ROCDL_IntrOp<"s.barrier.signal", [], [], [], 0, 0, 0, [0], ["id"]>,
280  Arguments<(ins I32Attr:$id)> {
281  let results = (outs);
282  let assemblyFormat = "$id attr-dict";
283}
284
285def ROCDL_BarrierWaitOp : ROCDL_IntrOp<"s.barrier.wait", [], [], [], 0, 0, 0, [0], ["id"]>,
286  Arguments<(ins I16Attr:$id)> {
287  let results = (outs);
288  let assemblyFormat = "$id attr-dict";
289  string llvmBuilder =
290    "createIntrinsicCall(builder, llvm::Intrinsic::amdgcn_s_barrier_wait,builder.getInt16(op.getId()));";
291}
292
293def ROCDL_WaitDscntOp: ROCDL_IntrOp<"s.wait.dscnt", [], [], [], 0, 0, 0, [0], ["id"]>,
294  Arguments<(ins I16Attr:$id)> {
295  let results = (outs);
296  let assemblyFormat = "$id attr-dict";
297}
298
299def ROCDL_SetPrioOp : ROCDL_IntrOp<"s.setprio", [], [], [], 0>,
300  Arguments<(ins I16Attr:$priority)> {
301  let results = (outs);
302  let assemblyFormat = "$priority attr-dict";
303  string llvmBuilder =
304    "createIntrinsicCall(builder, llvm::Intrinsic::amdgcn_s_setprio,builder.getInt16(op.getPriority()));";
305}
306
307def ROCDL_SchedBarrier : ROCDL_IntrOp<"sched.barrier", [], [], [], 0>,
308  Arguments<(ins I32Attr:$mask)> {
309  let results = (outs);
310  let assemblyFormat = "$mask attr-dict";
311  string llvmBuilder =
312    "createIntrinsicCall(builder, llvm::Intrinsic::amdgcn_sched_barrier,builder.getInt32(op.getMask()));";
313}
314
315def ROCDL_SchedGroupBarrier : ROCDL_IntrOp<"sched.group.barrier", [], [], [], 0>,
316  Arguments<(ins I32Attr:$mask, I32Attr:$size, I32Attr:$groupId)> {
317  let results = (outs);
318  let assemblyFormat = "$mask `,` $size `,` $groupId attr-dict";
319  string llvmBuilder = [{
320    createIntrinsicCall(builder,
321      llvm::Intrinsic::amdgcn_sched_group_barrier,
322      {builder.getInt32(op.getMask()), builder.getInt32(op.getSize()), builder.getInt32(op.getGroupId())});
323  }];
324}
325
326def ROCDL_IglpOpt : ROCDL_IntrOp<"iglp.opt", [], [], [], 0>,
327  Arguments<(ins I32Attr:$variant)> {
328  let results = (outs);
329  let assemblyFormat = "$variant attr-dict";
330  string llvmBuilder =
331    "createIntrinsicCall(builder, llvm::Intrinsic::amdgcn_iglp_opt,builder.getInt32(op.getVariant()));";
332}
333
334//===---------------------------------------------------------------------===//
335// Xdlops intrinsics
336
337class ROCDL_Mfma_IntrOp<string mnemonic, list<Trait> traits = []> :
338  LLVM_IntrOpBase<ROCDL_Dialect, mnemonic,
339                  "amdgcn_" # !subst(".","_", mnemonic),
340                  [], [], traits, 1>,
341  Arguments<(ins Variadic<LLVM_Type>:$args)> {
342  let assemblyFormat =
343    "$args attr-dict `:` functional-type($args, $res)";
344}
345
346//===---------------------------------------------------------------------===//
347// MFMA intrinsics with overloaded operands
348class ROCDL_Mfma_OO_IntrOp<string mnemonic, list<int> overloadedOperands,
349                        list<Trait> traits = []> :
350  LLVM_IntrOpBase<ROCDL_Dialect, mnemonic,
351                  "amdgcn_" # !subst(".","_", mnemonic),
352                  [], overloadedOperands, traits, 1>,
353  Arguments<(ins Variadic<LLVM_Type>:$args)> {
354  let assemblyFormat =
355    "$args attr-dict `:` functional-type($args, $res)";
356}
357
358// Available on all CDNA.
359def ROCDL_mfma_f32_32x32x1f32 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x1f32">;
360def ROCDL_mfma_f32_16x16x1f32 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x1f32">;
361def ROCDL_mfma_f32_4x4x1f32 : ROCDL_Mfma_IntrOp<"mfma.f32.4x4x1f32">;
362def ROCDL_mfma_f32_32x32x2f32 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x2f32">;
363def ROCDL_mfma_f32_16x16x4f32 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x4f32">;
364def ROCDL_mfma_f32_32x32x4f16 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x4f16">;
365def ROCDL_mfma_f32_16x16x4f16 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x4f16">;
366def ROCDL_mfma_f32_4x4x4f16 : ROCDL_Mfma_IntrOp<"mfma.f32.4x4x4f16">;
367def ROCDL_mfma_f32_32x32x8f16 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x8f16">;
368def ROCDL_mfma_f32_16x16x16f16 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x16f16">;
369def ROCDL_mfma_i32_32x32x4i8 : ROCDL_Mfma_IntrOp<"mfma.i32.32x32x4i8">;
370def ROCDL_mfma_i32_16x16x4i8 : ROCDL_Mfma_IntrOp<"mfma.i32.16x16x4i8">;
371def ROCDL_mfma_i32_4x4x4i8 : ROCDL_Mfma_IntrOp<"mfma.i32.4x4x4i8">;
372def ROCDL_mfma_i32_32x32x8i8 : ROCDL_Mfma_IntrOp<"mfma.i32.32x32x8i8">;
373def ROCDL_mfma_i32_16x16x16i8 : ROCDL_Mfma_IntrOp<"mfma.i32.16x16x16i8">;
374def ROCDL_mfma_f32_32x32x2bf16 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x2bf16">;
375def ROCDL_mfma_f32_16x16x2bf16 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x2bf16">;
376def ROCDL_mfma_f32_4x4x2bf16 : ROCDL_Mfma_IntrOp<"mfma.f32.4x4x2bf16">;
377def ROCDL_mfma_f32_32x32x4bf16 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x4bf16">;
378def ROCDL_mfma_f32_16x16x8bf16 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x8bf16">;
379// New in gfx90a.
380def ROCDL_mfma_f32_32x32x4bf16_1k : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x4bf16.1k">;
381def ROCDL_mfma_f32_16x16x4bf16_1k : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x4bf16.1k">;
382def ROCDL_mfma_f32_4x4x4bf16_1k : ROCDL_Mfma_IntrOp<"mfma.f32.4x4x4bf16.1k">;
383def ROCDL_mfma_f32_32x32x8bf16_1k : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x8bf16.1k">;
384def ROCDL_mfma_f32_16x16x16bf16_1k : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x16bf16.1k">;
385// Note: in gfx940, unlike in gfx90a, the f64 xdlops use the "blgp" argument as a
386// NEG bitfield. See IntrinsicsAMDGPU.td for more info.
387def ROCDL_mfma_f64_16x16x4f64 : ROCDL_Mfma_IntrOp<"mfma.f64.16x16x4f64">;
388def ROCDL_mfma_f64_4x4x4f64 : ROCDL_Mfma_IntrOp<"mfma.f64.4x4x4f64">;
389// New in gfx940.
390def ROCDL_mfma_i32_16x16x32_i8 : ROCDL_Mfma_IntrOp<"mfma.i32.16x16x32.i8">;
391def ROCDL_mfma_i32_32x32x16_i8 : ROCDL_Mfma_IntrOp<"mfma.i32.32x32x16.i8">;
392def ROCDL_mfma_f32_16x16x8_xf32 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x8.xf32">;
393def ROCDL_mfma_f32_32x32x4_xf32 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x4.xf32">;
394def ROCDL_mfma_f32_16x16x32_bf8_bf8 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x32.bf8.bf8">;
395def ROCDL_mfma_f32_16x16x32_bf8_fp8 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x32.bf8.fp8">;
396def ROCDL_mfma_f32_16x16x32_fp8_bf8 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x32.fp8.bf8">;
397def ROCDL_mfma_f32_16x16x32_fp8_fp8 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x32.fp8.fp8">;
398def ROCDL_mfma_f32_32x32x16_bf8_bf8 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x16.bf8.bf8">;
399def ROCDL_mfma_f32_32x32x16_bf8_fp8 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x16.bf8.fp8">;
400def ROCDL_mfma_f32_32x32x16_fp8_bf8 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x16.fp8.bf8">;
401def ROCDL_mfma_f32_32x32x16_fp8_fp8 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x16.fp8.fp8">;
402// New in gfx950.
403def ROCDL_mfma_f32_16x16x32_bf16 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x32.bf16">;
404def ROCDL_mfma_i32_16x16x64_i8 : ROCDL_Mfma_IntrOp<"mfma.i32.16x16x64.i8">;
405def ROCDL_mfma_f32_16x16x32_f16 : ROCDL_Mfma_IntrOp<"mfma.f32.16x16x32.f16">;
406def ROCDL_mfma_f32_32x32x16_bf16 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x16.bf16">;
407def ROCDL_mfma_i32_32x32x32_i8 : ROCDL_Mfma_IntrOp<"mfma.i32.32x32x32.i8">;
408def ROCDL_mfma_f32_32x32x16_f16 : ROCDL_Mfma_IntrOp<"mfma.f32.32x32x16.f16">;
409def ROCDL_mfma_scale_f32_16x16x128_f8f6f4 : ROCDL_Mfma_OO_IntrOp<"mfma.scale.f32.16x16x128.f8f6f4", [0,1]>;
410def ROCDL_mfma_scale_f32_32x32x64_f8f6f4 : ROCDL_Mfma_OO_IntrOp<"mfma.scale.f32.32x32x64.f8f6f4", [0,1]>;
411
412// 2:4 Sparsity ops (GFX940)
413def ROCDL_smfmac_f32_16x16x32_f16 : ROCDL_Mfma_IntrOp<"smfmac.f32.16x16x32.f16">;
414def ROCDL_smfmac_f32_32x32x16_f16 : ROCDL_Mfma_IntrOp<"smfmac.f32.32x32x16.f16">;
415def ROCDL_smfmac_f32_16x16x32_bf16 : ROCDL_Mfma_IntrOp<"smfmac.f32.16x16x32.bf16">;
416def ROCDL_smfmac_f32_32x32x16_bf16 : ROCDL_Mfma_IntrOp<"smfmac.f32.32x32x16.bf16">;
417def ROCDL_smfmac_i32_16x16x64_i8 : ROCDL_Mfma_IntrOp<"smfmac.i32.16x16x64.i8">;
418def ROCDL_smfmac_i32_32x32x32_i8 : ROCDL_Mfma_IntrOp<"smfmac.i32.32x32x32.i8">;
419def ROCDL_smfmac_f32_16x16x64_bf8_bf8 : ROCDL_Mfma_IntrOp<"smfmac.f32.16x16x64.bf8.bf8">;
420def ROCDL_smfmac_f32_16x16x64_bf8_fp8 : ROCDL_Mfma_IntrOp<"smfmac.f32.16x16x64.bf8.fp8">;
421def ROCDL_smfmac_f32_16x16x64_fp8_bf8 : ROCDL_Mfma_IntrOp<"smfmac.f32.16x16x64.fp8.bf8">;
422def ROCDL_smfmac_f32_16x16x64_fp8_fp8 : ROCDL_Mfma_IntrOp<"smfmac.f32.16x16x64.fp8.fp8">;
423def ROCDL_smfmac_f32_32x32x32_bf8_bf8 : ROCDL_Mfma_IntrOp<"smfmac.f32.32x32x32.bf8.bf8">;
424def ROCDL_smfmac_f32_32x32x32_bf8_fp8 : ROCDL_Mfma_IntrOp<"smfmac.f32.32x32x32.bf8.fp8">;
425def ROCDL_smfmac_f32_32x32x32_fp8_bf8 : ROCDL_Mfma_IntrOp<"smfmac.f32.32x32x32.fp8.bf8">;
426def ROCDL_smfmac_f32_32x32x32_fp8_fp8 : ROCDL_Mfma_IntrOp<"smfmac.f32.32x32x32.fp8.fp8">;
427
428
429//===---------------------------------------------------------------------===//
430// WMMA intrinsics
431class ROCDL_Wmma_IntrOp<string mnemonic, list<int> overloadedOperands,
432                        list<Trait> traits = []> :
433  LLVM_IntrOpBase<ROCDL_Dialect, mnemonic,
434                  "amdgcn_" # !subst(".","_", mnemonic),
435                  [0], overloadedOperands, traits, 1>,
436  Arguments<(ins Variadic<LLVM_Type>:$args)> {
437  let assemblyFormat =
438    "$args attr-dict `:` functional-type($args, $res)";
439}
440
441// Available from gfx11
442def ROCDL_wmma_f32_16x16x16_f16 : ROCDL_Wmma_IntrOp<"wmma.f32.16x16x16.f16", [0]>;
443def ROCDL_wmma_f32_16x16x16_bf16 : ROCDL_Wmma_IntrOp<"wmma.f32.16x16x16.bf16", [0]>;
444def ROCDL_wmma_f16_16x16x16_f16 : ROCDL_Wmma_IntrOp<"wmma.f16.16x16x16.f16", [0]>;
445def ROCDL_wmma_bf16_16x16x16_bf16 : ROCDL_Wmma_IntrOp<"wmma.bf16.16x16x16.bf16", [0]>;
446def ROCDL_wmma_i32_16x16x16_iu8 : ROCDL_Wmma_IntrOp<"wmma.i32.16x16x16.iu8", [1]>;
447def ROCDL_wmma_i32_16x16x16_iu4 : ROCDL_Wmma_IntrOp<"wmma.i32.16x16x16.iu4", [1]>;
448// Available from gfx12
449def ROCDL_wmma_f32_16x16x16_fp8 : ROCDL_Wmma_IntrOp<"wmma.f32.16x16x16.fp8_fp8", [1]>;
450def ROCDL_wmma_f32_16x16x16_bf8 : ROCDL_Wmma_IntrOp<"wmma.f32.16x16x16.bf8_bf8", [1]>;
451
452//===---------------------------------------------------------------------===//
453// LDS transpose intrinsics (available in GFX950)
454
455def ROCDLGlobalBuffer : LLVM_PointerInAddressSpace<1>;
456def ROCDLBufferLDS : LLVM_PointerInAddressSpace<3>;
457
458class ROCDL_LDS_Read_Tr_IntrOp<string mnemonic> :
459  ROCDL_IntrOp<mnemonic, [1], [], [], 1>,
460  Arguments<(ins Arg<ROCDLBufferLDS, "", [MemRead]>:$ptr)>{
461  let assemblyFormat = "$ptr attr-dict `:` type($ptr) `->` type($res)";
462}
463
464def ROCDL_ds_read_tr4_b64 : ROCDL_LDS_Read_Tr_IntrOp<"ds.read.tr4.b64">;
465def ROCDL_ds_read_tr8_b64 : ROCDL_LDS_Read_Tr_IntrOp<"ds.read.tr8.b64">;
466def ROCDL_ds_read_tr6_b96 : ROCDL_LDS_Read_Tr_IntrOp<"ds.read.tr6.b96">;
467def ROCDL_ds_read_tr16_b64 : ROCDL_LDS_Read_Tr_IntrOp<"ds.read.tr16.b64">;
468
469//===---------------------------------------------------------------------===//
470// Global load to LDS intrinsic (available in GFX950)
471
472def ROCDL_GlobalLoadLDSOp :
473  ROCDL_IntrOp<"global.load.lds", [], [], [], 0>,
474  Arguments<(ins Arg<ROCDLGlobalBuffer, "", [MemRead]>:$globalPtr,
475                 Arg<ROCDLBufferLDS, "", [MemWrite]>:$ldsPtr,
476                 I32:$size,
477                 I32:$offset,
478                 I32:$aux)> {
479  let assemblyFormat = "operands attr-dict";
480}
481
482//===---------------------------------------------------------------------===//
483// Operations on raw buffer resources (stride of 0, bounds checks either off or in
484// raw buffer mode).
485//===---------------------------------------------------------------------===//
486
487def ROCDLBufferRsrc : LLVM_PointerInAddressSpace<8>;
488
489def ROCDL_MakeBufferRsrcOp :
490  ROCDL_IntrOp<"make.buffer.rsrc", [], [0], [Pure], 1>,
491  Arguments<(ins LLVM_AnyPointer:$base,
492                 I16:$stride,
493                 I32:$numRecords,
494                 I32:$flags)> {
495  let results = (outs ROCDLBufferRsrc:$res);
496  let assemblyFormat = "operands attr-dict `:` type($base) `to` type($res)";
497}
498
499def ROCDL_RawPtrBufferLoadOp :
500  ROCDL_IntrOp<"raw.ptr.buffer.load", [0], [], [], 1, 0, 1> {
501  dag args = (ins Arg<ROCDLBufferRsrc, "", [MemRead]>:$rsrc,
502                  I32:$offset,
503                  I32:$soffset,
504                  I32:$aux);
505  let arguments = !con(args, aliasAttrs);
506  let assemblyFormat = "operands attr-dict `:` type($res)";
507  let extraClassDefinition = [{
508    ::llvm::SmallVector<::mlir::Value> $cppClass::getAccessedOperands() {
509      return {getRes()};
510    }
511  }];
512}
513
514def ROCDL_RawPtrBufferStoreOp :
515  ROCDL_IntrOp<"raw.ptr.buffer.store", [], [0], [], 0, 0, 1> {
516  dag args = (ins LLVM_Type:$vdata,
517                  Arg<ROCDLBufferRsrc, "", [MemWrite]>:$rsrc,
518                  I32:$offset,
519                  I32:$soffset,
520                  I32:$aux);
521  let arguments = !con(args, aliasAttrs);
522  let assemblyFormat = "operands attr-dict `:` type($vdata)";
523  let extraClassDefinition = [{
524    ::llvm::SmallVector<::mlir::Value> $cppClass::getAccessedOperands() {
525      return {getRsrc()};
526    }
527  }];
528
529}
530
531def ROCDL_RawPtrBufferAtomicCmpSwap :
532  ROCDL_IntrOp<"raw.ptr.buffer.atomic.cmpswap",
533    [0], [], [AllTypesMatch<["res", "src", "cmp"]>], 1, 0, 1> {
534  dag args = (ins LLVM_Type:$src,
535                  LLVM_Type:$cmp,
536                  Arg<ROCDLBufferRsrc, "", [MemRead, MemWrite]>:$rsrc,
537                  I32:$offset,
538                  I32:$soffset,
539                  I32:$aux);
540  let arguments = !con(args, aliasAttrs);
541  let assemblyFormat = "operands attr-dict `:` type($res)";
542  let extraClassDefinition = [{
543    ::llvm::SmallVector<::mlir::Value> $cppClass::getAccessedOperands() {
544      return {getRsrc()};
545    }
546  }];
547}
548
549class ROCDL_RawPtrBufferAtomicNoRet<string op> :
550  ROCDL_IntrOp<"raw.ptr.buffer.atomic." # op, [], [0], [], 0, 0, 1> {
551  dag args = (ins LLVM_Type:$vdata,
552                  Arg<ROCDLBufferRsrc, "", [MemRead, MemWrite]>:$rsrc,
553                  I32:$offset,
554                  I32:$soffset,
555                  I32:$aux);
556  let arguments = !con(args, aliasAttrs);
557  let assemblyFormat = "operands attr-dict `:` type($vdata)";
558  let extraClassDefinition = [{
559    ::llvm::SmallVector<::mlir::Value> $cppClass::getAccessedOperands() {
560      return {getRsrc()};
561    }
562  }];
563}
564
565def ROCDL_RawPtrBufferAtomicFmaxOp : ROCDL_RawPtrBufferAtomicNoRet<"fmax">;
566def ROCDL_RawPtrBufferAtomicSmaxOp : ROCDL_RawPtrBufferAtomicNoRet<"smax">;
567def ROCDL_RawPtrBufferAtomicUminOp : ROCDL_RawPtrBufferAtomicNoRet<"umin">;
568// Note: not supported on all architectures
569def ROCDL_RawPtrBufferAtomicFaddOp : ROCDL_RawPtrBufferAtomicNoRet<"fadd">;
570
571//===---------------------------------------------------------------------===//
572// Raw buffer load/store intrinsics
573
574def ROCDL_RawBufferLoadOp :
575  ROCDL_Op<"raw.buffer.load">,
576  Results<(outs LLVM_Type:$res)>,
577  Arguments<(ins LLVM_Type:$rsrc,
578                 LLVM_Type:$offset,
579                 LLVM_Type:$soffset,
580                 LLVM_Type:$aux)> {
581  string llvmBuilder = [{
582      $res = createIntrinsicCall(builder,
583          llvm::Intrinsic::amdgcn_raw_buffer_load, {$rsrc, $offset,
584          $soffset, $aux}, {$_resultType});
585  }];
586  let hasCustomAssemblyFormat = 1;
587}
588
589def ROCDL_RawBufferStoreOp :
590  ROCDL_Op<"raw.buffer.store">,
591  Arguments<(ins LLVM_Type:$vdata,
592                 LLVM_Type:$rsrc,
593                 LLVM_Type:$offset,
594                 LLVM_Type:$soffset,
595                 LLVM_Type:$aux)>{
596  string llvmBuilder = [{
597    auto vdataType = moduleTranslation.convertType(op.getVdata().getType());
598    createIntrinsicCall(builder,
599          llvm::Intrinsic::amdgcn_raw_buffer_store, {$vdata, $rsrc,
600          $offset, $soffset, $aux}, {vdataType});
601  }];
602  let hasCustomAssemblyFormat = 1;
603}
604
605def ROCDL_RawBufferAtomicCmpSwap :
606  ROCDL_Op<"raw.buffer.atomic.cmpswap", [AllTypesMatch<["res", "src", "cmp"]>]>,
607  Results<(outs LLVM_Type:$res)>,
608  Arguments<(ins LLVM_Type:$src,
609                 LLVM_Type:$cmp,
610                 LLVM_Type:$rsrc,
611                 I32:$offset,
612                 I32:$soffset,
613                 I32:$aux)>{
614  string llvmBuilder = [{
615      $res = createIntrinsicCall(builder,
616          llvm::Intrinsic::amdgcn_raw_buffer_atomic_cmpswap, {$src, $cmp, $rsrc,
617            $offset, $soffset, $aux}, {$_resultType});
618  }];
619  let assemblyFormat = [{
620    attr-dict `(` operands `)` `:` type($res) `,` type($rsrc)
621  }];
622}
623
624//===---------------------------------------------------------------------===//
625// MI-100 and MI-200 buffer atomic floating point add intrinsic
626
627def ROCDL_RawBufferAtomicFAddOp :
628  ROCDL_Op<"raw.buffer.atomic.fadd">,
629  Arguments<(ins LLVM_Type:$vdata,
630                 LLVM_Type:$rsrc,
631                 LLVM_Type:$offset,
632                 LLVM_Type:$soffset,
633                 LLVM_Type:$aux)>{
634  string llvmBuilder = [{
635      auto vdataType = moduleTranslation.convertType(op.getVdata().getType());
636      createIntrinsicCall(builder,
637          llvm::Intrinsic::amdgcn_raw_buffer_atomic_fadd, {$vdata, $rsrc,
638            $offset, $soffset, $aux}, {vdataType});
639  }];
640  let hasCustomAssemblyFormat = 1;
641}
642
643//===---------------------------------------------------------------------===//
644// Buffer atomic floating point max intrinsic. GFX9 does not support fp32.
645
646def ROCDL_RawBufferAtomicFMaxOp :
647  ROCDL_Op<"raw.buffer.atomic.fmax">,
648  Arguments<(ins LLVM_Type:$vdata,
649                 LLVM_Type:$rsrc,
650                 LLVM_Type:$offset,
651                 LLVM_Type:$soffset,
652                 LLVM_Type:$aux)>{
653  string llvmBuilder = [{
654      auto vdataType = moduleTranslation.convertType(op.getVdata().getType());
655      createIntrinsicCall(builder,
656          llvm::Intrinsic::amdgcn_raw_buffer_atomic_fmax, {$vdata, $rsrc,
657            $offset, $soffset, $aux}, {vdataType});
658  }];
659  let hasCustomAssemblyFormat = 1;
660}
661
662//===---------------------------------------------------------------------===//
663// Buffer atomic signed integer max intrinsic.
664
665def ROCDL_RawBufferAtomicSMaxOp :
666  ROCDL_Op<"raw.buffer.atomic.smax">,
667  Arguments<(ins LLVM_Type:$vdata,
668                 LLVM_Type:$rsrc,
669                 LLVM_Type:$offset,
670                 LLVM_Type:$soffset,
671                 LLVM_Type:$aux)>{
672  string llvmBuilder = [{
673      auto vdataType = moduleTranslation.convertType(op.getVdata().getType());
674      createIntrinsicCall(builder,
675          llvm::Intrinsic::amdgcn_raw_buffer_atomic_smax, {$vdata, $rsrc,
676            $offset, $soffset, $aux}, {vdataType});
677  }];
678  let hasCustomAssemblyFormat = 1;
679}
680
681//===---------------------------------------------------------------------===//
682// Buffer atomic unsigned integer min intrinsic.
683
684def ROCDL_RawBufferAtomicUMinOp :
685  ROCDL_Op<"raw.buffer.atomic.umin">,
686  Arguments<(ins LLVM_Type:$vdata,
687                 LLVM_Type:$rsrc,
688                 LLVM_Type:$offset,
689                 LLVM_Type:$soffset,
690                 LLVM_Type:$aux)>{
691  string llvmBuilder = [{
692      auto vdataType = moduleTranslation.convertType(op.getVdata().getType());
693      createIntrinsicCall(builder,
694          llvm::Intrinsic::amdgcn_raw_buffer_atomic_umin, {$vdata, $rsrc,
695            $offset, $soffset, $aux}, {vdataType});
696  }];
697  let hasCustomAssemblyFormat = 1;
698}
699
700// DPP Update intrinsic
701def ROCDL_DPPUpdateOp : ROCDL_IntrOp<"update.dpp", [], [0],
702    [AllTypesMatch<["res", "src", "old"]>], 1>,
703  Arguments<(ins LLVM_Type:$old, LLVM_Type:$src, I32Attr:$dppCtrl, I32Attr:$rowMask,
704      I32Attr:$bankMask, I1Attr:$boundCtrl)> {
705  let results = (outs LLVM_Type:$res);
706  let assemblyFormat = [{
707    attr-dict $old `,` $src `with` $dppCtrl `,` $rowMask `,` $bankMask `,` $boundCtrl `:` type($src)
708  }];
709  string llvmBuilder = [{
710      auto vdataType = moduleTranslation.convertType(op.getSrc().getType());
711      llvm::Value *args[] = {
712        moduleTranslation.lookupValue(op.getOld()),
713        moduleTranslation.lookupValue(op.getSrc()),
714          builder.getInt32(op.getDppCtrl()),
715          builder.getInt32(op.getRowMask()),
716          builder.getInt32(op.getBankMask()),
717          builder.getInt1(op.getBoundCtrl())
718      };
719      $res = createIntrinsicCall(builder,
720        llvm::Intrinsic::amdgcn_update_dpp, args, {vdataType});
721  }];
722}
723
724//===---------------------------------------------------------------------===//
725// 16-bit float intrinsics
726//===---------------------------------------------------------------------===//
727def ROCDL_CvtPkRtz:
728    ROCDL_IntrOp<"cvt.pkrtz", [], [], [Pure], 1>,
729    Arguments<(ins F32:$srcA, F32:$srcB)> {
730  let summary = "Convert two f32 input into a vector<2xf16>";
731  let description = [{
732    Convert two f32 values into a packed vector<2xf16>.
733  }];
734  let assemblyFormat = [{
735    attr-dict $srcA `,` $srcB `:` type($res)
736  }];
737}
738
739//===---------------------------------------------------------------------===//
740// 8-bit float intrinsics
741//===---------------------------------------------------------------------===//
742def ROCDL_CvtF32Bf8Op :
743    ROCDL_IntrOp<"cvt.f32.bf8", [], [], [Pure], 1>,
744    Arguments<(ins I32:$srcA, I32:$byteSel)> {
745  let summary = "Convert bf8 to f32";
746  let description = [{
747    Convert 8-bit bf8 value from the `byteSel`th bit of `srcA` to fp32.
748  }];
749  let assemblyFormat = [{
750    attr-dict $srcA `[` $byteSel `]` `:` type($res)
751  }];
752}
753
754def ROCDL_CvtF32Fp8Op :
755    ROCDL_IntrOp<"cvt.f32.fp8", [], [], [Pure], 1>,
756    Arguments<(ins I32:$srcA, I32:$byteSel)> {
757  let summary = "Convert fp8 to f32";
758  let description = [{
759    Convert 8-bit fp8 value from the `byteSel`th bit of `srcA` to fp32.
760  }];
761  let assemblyFormat = [{
762    attr-dict $srcA `[` $byteSel `]` `:` type($res)
763  }];
764}
765
766def ROCDL_CvtPkBf8F32Op :
767    ROCDL_IntrOp<"cvt.pk.bf8.f32", [], [], [Pure], 1>,
768    Arguments<(ins F32:$srcA, F32:$srcB, I32:$old, I1:$wordSel)> {
769  let summary = "Convert two f32's to bf8";
770  let description = [{
771    Convert `srcA` and `srcB` to bf8 and store into the low/high word of
772    `old`, preserving the other word.
773  }];
774  let assemblyFormat = [{
775    attr-dict $srcA `,` $srcB `->` $old `[` $wordSel `]` `:` type($res)
776  }];
777}
778
779def ROCDL_CvtPkFp8F32Op :
780    ROCDL_IntrOp<"cvt.pk.fp8.f32", [], [], [Pure], 1>,
781    Arguments<(ins F32:$srcA, F32:$srcB, I32:$old, I1:$wordSel)> {
782  let summary = "Convert two f32's to fp8";
783  let description = [{
784    Convert `srcA` and `srcB` to fp8 and store into the low/high word of
785    `old`, preserving the other word.
786  }];
787  let assemblyFormat = [{
788    attr-dict $srcA `,` $srcB `->` $old `[` $wordSel `]` `:` type($res)
789  }];
790}
791
792def ROCDL_CvtSrBf8F32Op :
793    ROCDL_IntrOp<"cvt.sr.bf8.f32", [], [], [Pure], 1>,
794    Arguments<(ins F32:$srcA, I32:$srcB, I32:$old, I32:$byteSel)> {
795  let summary = "Convert f32 to bf8, stochiastic rounding";
796  let description = [{
797    Convert `srcA` to bf8, adding the rounding factor from `srcB`,
798    and store into the `byteSel`th byte of `old`, preserving the others.
799  }];
800  let assemblyFormat = [{
801    attr-dict $srcA `,` $srcB `->` $old `[` $byteSel `]` `:` type($res)
802  }];
803}
804
805def ROCDL_CvtSrFp8F32Op :
806    ROCDL_IntrOp<"cvt.sr.fp8.f32", [], [], [Pure], 1>,
807    Arguments<(ins F32:$srcA, I32:$srcB, I32:$old, I32:$byteSel)> {
808  let summary = "Convert f32 to fp8, stochiastic rounding";
809  let description = [{
810    Convert `srcA` to fp8, adding the rounding factor from `srcB`,
811    and store into the `byteSel`th byte of `old`, preserving the others.
812  }];
813  let assemblyFormat = [{
814    attr-dict $srcA `,` $srcB `->` $old `[` $byteSel `]` `:` type($res)
815  }];
816}
817
818//===----------------------------------------------------------------------===//
819// ROCDL target attribute.
820//===----------------------------------------------------------------------===//
821
822def ROCDL_TargetAttr :
823    ROCDL_Attr<"ROCDLTarget", "target"> {
824  let description = [{
825    ROCDL target attribute for controlling compilation of AMDGPU targets. All
826    parameters decay into default values if not present.
827
828    Examples:
829
830    1. Target with default values.
831    ```
832      gpu.module @mymodule [#rocdl.target] attributes {...} {
833        ...
834      }
835    ```
836
837    2. Target with `gfx90a` chip and fast math.
838    ```
839      gpu.module @mymodule [#rocdl.target<chip = "gfx90a", flags = {fast, no_wave64}>] {
840        ...
841      }
842    ```
843  }];
844  let parameters = (ins
845    DefaultValuedParameter<"int", "2", "Optimization level to apply.">:$O,
846    StringRefParameter<"Target triple.", "\"amdgcn-amd-amdhsa\"">:$triple,
847    StringRefParameter<"Target chip.", "\"gfx900\"">:$chip,
848    StringRefParameter<"Target chip features.", "\"\"">:$features,
849    // Also update the default builder below and rocdl-attach-target in
850    // Dialect/GPU/Transforms/Passes.td .
851    StringRefParameter<"ABI version.", "\"500\"">:$abi,
852    OptionalParameter<"DictionaryAttr", "Target specific flags.">:$flags,
853    OptionalParameter<"ArrayAttr", "Files to link to the LLVM module.">:$link
854  );
855  let assemblyFormat = [{
856    (`<` struct($O, $triple, $chip, $features, $abi, $flags, $link)^ `>`)?
857  }];
858  let builders = [
859    AttrBuilder<(ins CArg<"int", "2">:$optLevel,
860                     CArg<"StringRef", "\"amdgcn-amd-amdhsa\"">:$triple,
861                     CArg<"StringRef", "\"gfx900\"">:$chip,
862                     CArg<"StringRef", "\"\"">:$features,
863                     CArg<"StringRef", "\"500\"">:$abiVersion,
864                     CArg<"DictionaryAttr", "nullptr">:$targetFlags,
865                     CArg<"ArrayAttr", "nullptr">:$linkFiles), [{
866      return Base::get($_ctxt, optLevel, triple, chip, features, abiVersion,
867                       targetFlags, linkFiles);
868    }]>
869  ];
870  let skipDefaultBuilders = 1;
871  let genVerifyDecl = 1;
872  let extraClassDeclaration = [{
873    bool hasFlag(StringRef flag) const;
874    bool hasWave64() const;
875    bool hasFastMath() const;
876    bool hasDaz() const;
877    bool hasFiniteOnly() const;
878    bool hasUnsafeMath() const;
879    bool hasCorrectSqrt() const;
880  }];
881  let extraClassDefinition = [{
882    bool $cppClass::hasFlag(StringRef flag) const {
883      if (DictionaryAttr flags = getFlags())
884        return flags.get(flag) != nullptr;
885      return false;
886    }
887    bool $cppClass::hasWave64() const {
888      return hasFlag("wave64") || !hasFlag("no_wave64");
889    }
890    bool $cppClass::hasFastMath() const {
891      return hasFlag("fast");
892    }
893    bool $cppClass::hasDaz() const {
894      return hasFlag("daz");
895    }
896    bool $cppClass::hasFiniteOnly() const {
897      return hasFlag("finite_only");
898    }
899    bool $cppClass::hasUnsafeMath() const {
900      return hasFlag("unsafe_math");
901    }
902    bool $cppClass::hasCorrectSqrt() const {
903      return !hasFlag("unsafe_sqrt");
904    }
905  }];
906}
907#endif // ROCDLIR_OPS
908