IntrinsicsAMDGPU.td - OpenGrok cross reference for /freebsd-src/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines Matching +full:4 +full:- +full:pixel +full:- +full:align
1 //===- IntrinsicsAMDGPU.td - Defines AMDGPU intrinsics -----*- tablegen -*-===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines all of the R600-specific intrinsics.
11 //===----------------------------------------------------------------------===//
15 // The amdgpu-no-* attributes (ex amdgpu-no-workitem-id-z) typically inferred
16 // by the backend cause whole-program undefined behavior when violated, such as
18 // values. In non-entry-point functions, attempting to call a function that needs
20 // of the calling convention and also program-level UB. Outside of such IR-level UB,
21 // these preloaded registers are always set to a well-defined value and are thus `noundef`.
144 //===----------------------------------------------------------------------===//
146 //===----------------------------------------------------------------------===//
153   DefaultAttrsIntrinsic<[LLVMQualPointerType<4>], [],
154   [Align<RetIndex, 4>, NoUndef<RetIndex>, NonNull<RetIndex>, IntrNoMem, IntrSpeculatable]>;
158   DefaultAttrsIntrinsic<[LLVMQualPointerType<4>], [],
159   [Align<RetIndex, 4>, NoUndef<RetIndex>, NonNull<RetIndex>, IntrNoMem, IntrSpeculatable]>;
163   DefaultAttrsIntrinsic<[LLVMQualPointerType<4>], [],
164   [Align<RetIndex, 4>, NoUndef<RetIndex>, IntrNoMem, IntrSpeculatable]>;
168   DefaultAttrsIntrinsic<[LLVMQualPointerType<4>], [],
169   [Align<RetIndex, 4>, NoUndef<RetIndex>, IntrNoMem, IntrSpeculatable]>;
172 // This is no longer guaranteed to be a compile-time constant due to linking
188   DefaultAttrsIntrinsic<[LLVMQualPointerType<4>], [],
189   [Align<RetIndex, 4>, NoUndef<RetIndex>,
192 // Set EXEC to the 64-bit value given.
196   [llvm_i64_ty],      // 64-bit literal constant
206   [llvm_i32_ty,       // 32-bit SGPR input
221 //===----------------------------------------------------------------------===//
223 //===----------------------------------------------------------------------===//
288 //     MASK = 0x0000 0001: ALL, non-memory, non-side-effect producing instructions may be
356 // Look Up 2.0 / pi src0 with segment select src1[4:0]
397 // Fused single-precision multiply-add with legacy behaviour for the multiply,
398 // which is that +/- 0.0 * anything (even NaN or infinity) is +0.0. This is
429 // out = 1.0 / sqrt(a) result clamped to +/- max_float.
537                 // gfx10: bits 24-27 indicate the number of active threads/dwords
541    ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>,
565 // New-style image intrinsics
568 // Dimension-aware image intrinsics framework
658   // {offset} {bias} {z-compare}
712 // Helper class to capture the profile of a dimension-aware image intrinsic.
842   int LodArgIndex = !add(VAddrArgIndex, NumVAddrArgs, -1);
851 // All dimension-aware intrinsics are derived from this class.
855     P_.RetTypes,        // vdata(VGPR) -- for load/atomic-with-return
857       !foreach(arg, P_.DataArgs, arg.Type),    // vdata(VGPR) -- for store/atomic
867                                                //                bit 4 = scc (gfx90a)
868                                                //        gfx940: bit 0 = sc0, bit 1 = nt, bit 4 = sc1
869                                                //        gfx12+: bits [0-2] = th, bits [3-4] = scope
1101                      //                bit 3 = swz, bit 4 = scc (gfx90a)
1102                      //        gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
1103                      //        gfx12+: bits [0-2] = th, bits [3-4] = scope,
1117 // The versions of these intrinsics that take <4 x i32> arguments are deprecated
1131                      //                bit 3 = swz, bit 4 = scc (gfx90a)
1132                      //        gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
1133                      //        gfx12+: bits [0-2] = th, bits [3-4] = scope,
1161                           //                bit 3 = swz, bit 4 = scc (gfx90a)
1162                           //        gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
1163                           //        gfx12+: bits [0-2] = th, bits [3-4] = scope,
1193                      //                bit 3 = swz, bit 4 = scc (gfx90a)
1194                      //        gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
1195                      //        gfx12+: bits [0-2] = th, bits [3-4] = scope,
1198   [IntrReadMem, ImmArg<ArgIndex<4>>], "", [SDNPMemOperand]>,
1211                           //                bit 3 = swz, bit 4 = scc (gfx90a)
1212                           //        gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
1213                           //        gfx12+: bits [0-2] = th, bits [3-4] = scope,
1217    ImmArg<ArgIndex<4>>], "", [SDNPMemOperand]>,
1230                      //                bit 3 = swz, bit 4 = scc (gfx90a)
1231                      //        gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
1232                      //        gfx12+: bits [0-2] = th, bits [3-4] = scope,
1235   [IntrWriteMem, ImmArg<ArgIndex<4>>], "", [SDNPMemOperand]>,
1248                           //                bit 3 = swz, bit 4 = scc (gfx90a)
1249                           //        gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
1250                           //        gfx12+: bits [0-2] = th, bits [3-4] = scope,
1254   ImmArg<ArgIndex<4>>], "", [SDNPMemOperand]>,
1268                      //                bit 3 = swz, bit 4 = scc (gfx90a)
1269                      //        gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
1270                      //        gfx12+: bits [0-2] = th, bits [3-4] = scope,
1287                           //                bit 3 = swz, bit 4 = scc (gfx90a)
1288                           //        gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
1289                           //        gfx12+: bits [0-2] = th, bits [3-4] = scope,
1305   [ImmArg<ArgIndex<4>>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>,
1341    ImmArg<ArgIndex<4>>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>,
1461 // - raw and struct variants
1462 // - joint format field
1463 // - joint cachepolicy field
1469      llvm_i32_ty,     // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
1472                       //                bit 3 = swz, bit 4 = scc (gfx90a)
1473                       //        gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
1474                       //        gfx12+: bits [0-2] = th, bits [3-4] = scope,
1477      ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>], "", [SDNPMemOperand]>,
1485      llvm_i32_ty,       // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
1488                         //                bit 3 = swz, bit 4 = scc (gfx90a)
1489                         //        gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
1490                         //        gfx12+: bits [0-2] = th, bits [3-4] = scope,
1494      ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>], "", [SDNPMemOperand]>,
1503      llvm_i32_ty,    // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
1506                      //                bit 3 = swz, bit 4 = scc (gfx90a)
1507                      //        gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
1508                      //        gfx12+: bits [0-2] = th, bits [3-4] = scope,
1512      ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>], "", [SDNPMemOperand]>,
1521      llvm_i32_ty,    // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
1524                      //                bit 3 = swz, bit 4 = scc (gfx90a)
1525                      //        gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
1526                      //        gfx12+: bits [0-2] = th, bits [3-4] = scope,
1530      ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>], "", [SDNPMemOperand]>,
1539      llvm_i32_ty,     // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
1542                       //                bit 3 = swz, bit 4 = scc (gfx90a)
1543                       //        gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
1544                       //        gfx12+: bits [0-2] = th, bits [3-4] = scope,
1548      ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>], "", [SDNPMemOperand]>,
1557      llvm_i32_ty,        // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
1560                          //                bit 3 = swz, bit 4 = scc (gfx90a)
1561                          //        gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
1562                          //        gfx12+: bits [0-2] = th, bits [3-4] = scope,
1566      ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>], "", [SDNPMemOperand]>,
1576      llvm_i32_ty,        // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
1579                          //                bit 3 = swz, bit 4 = scc (gfx90a)
1580                          //        gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
1581                          //        gfx12+: bits [0-2] = th, bits [3-4] = scope,
1595      llvm_i32_ty,    // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
1598                      //                bit 3 = swz, bit 4 = scc (gfx90a)
1599                      //        gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
1600                      //        gfx12+: bits [0-2] = th, bits [3-4] = scope,
1611    llvm_i32_ty,               // Data byte size: 1/2/4
1617                               //                bit 3 = swz, bit 4 = scc (gfx90a)
1618                               //        gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
1619                               //        gfx12+: bits [0-2] = th, bits [3-4] = scope,
1630    llvm_i32_ty,               // Data byte size: 1/2/4
1636                               //                bit 3 = swz, bit 4 = scc (gfx90a)
1637                               //        gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
1638                               //        gfx12+: bits [0-2] = th, bits [3-4] = scope,
1652    llvm_i32_ty,               // Data byte size: 1/2/4
1659                               //                bit 3 = swz, bit 4 = scc (gfx90a)
1660                               //        gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
1661                               //        gfx12+: bits [0-2] = th, bits [3-4] = scope,
1672    llvm_i32_ty,               // Data byte size: 1/2/4
1679                               //                bit 3 = swz, bit 4 = scc (gfx90a)
1680                               //        gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
1681                               //        gfx12+: bits [0-2] = th, bits [3-4] = scope,
1731   [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>>,
1807 // unsafe to change in non-strictfp functions. The register properties
1819 // not cross a 4Gb address boundary. Use for any other purpose may not
1822 // This intrinsic always returns PC sign-extended from 48 bits even if the
1823 // s_getpc_b64 instruction returns a zero-extended value.
1858 // high selects whether high or low 16-bits are loaded from LDS
1867 // high selects whether high or low 16-bits are loaded from LDS
1873              ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
1905 // high selects whether high or low 16-bits are used for p and p0 operands
1913 // high selects whether high or low 16-bits are used for p operand
1921 // gfx11+ fp16 interpolation intrinsic, with round-toward-zero rounding mode.
1922 // high selects whether high or low 16-bits are used for p and p0 operands
1930 // gfx11+ fp16 interpolation intrinsic, with round-toward-zero rounding mode.
1931 // high selects whether high or low 16-bits are used for p operand
2201 // Return true if at least one thread within the pixel quad passes true into
2224 // enabled, with a few exceptions: - Phi nodes which require WWM return an
2290                             // non-zero.
2292             [IntrConvergent, IntrNoReturn, ImmArg<ArgIndex<4>>]>;
2295 //===----------------------------------------------------------------------===//
2297 //===----------------------------------------------------------------------===//
2307 //===----------------------------------------------------------------------===//
2309 //===----------------------------------------------------------------------===//
2318              ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, IntrNoCallback, IntrNoFree]>;
2330               ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>, IntrNoCallback, IntrNoFree]>;
2362 //===----------------------------------------------------------------------===//
2364 //===----------------------------------------------------------------------===//
2372      llvm_i32_ty,                       // Data byte size: 1/2/4
2376                                         //                                   bit 4 = scc))
2378      ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, IntrNoCallback, IntrNoFree],
2386 //===----------------------------------------------------------------------===//
2388 //===----------------------------------------------------------------------===//
2395              ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>, IntrNoCallback, IntrNoFree]>;
2402              ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>, IntrNoCallback, IntrNoFree]>;
2405 // <sel> is a 32-bit constant whose high 8 bits must be zero which selects
2437 //===----------------------------------------------------------------------===//
2439 //===----------------------------------------------------------------------===//
2475 // WMMA (Wave Matrix Multiply-Accumulate) intrinsics
2521 // The content of the other 16-bit half is preserved from the input.
2535 //        The content of the other 16-bit half is undefined.
2541 //===----------------------------------------------------------------------===//
2543 //===----------------------------------------------------------------------===//
2550              ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, IntrNoCallback, IntrNoFree]>;
2557              ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, IntrNoCallback, IntrNoFree]>;
2559 // SWMMAC (Wave Matrix(sparse) Multiply-Accumulate) intrinsics
2593 // WMMA (Wave Matrix Multiply-Accumulate) intrinsics
2638 // <2 x i32>    @llvm.amdgcn.global.load.tr.b64.v2i32(ptr addrspace(1))  -> global_load_tr_b64
2639 // <8 x i16>    @llvm.amdgcn.global.load.tr.b128.v8i16(ptr addrspace(1))  -> global_load_tr_b128
2641 // i32          @llvm.amdgcn.global.load.tr.b64.i32(ptr addrspace(1))    -> global_load_tr_b64
2642 // <4 x i16>    @llvm.amdgcn.global.load.tr.b128.v4i16(ptr addrspace(1))  -> global_load_tr_b128
2651 //===----------------------------------------------------------------------===//
2653 //===----------------------------------------------------------------------===//
2796 //        %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c
2812 //        %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c
2831 //        %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c
2867 //===----------------------------------------------------------------------===//
2869 // ===----------------------------------------------------------------------===//
2880              ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
2905 //===----------------------------------------------------------------------===//
2907 // ===----------------------------------------------------------------------===//
2929 //===----------------------------------------------------------------------===//
2931 // ===----------------------------------------------------------------------===//
2948              ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
3034 //===----------------------------------------------------------------------===//
3037 // ===----------------------------------------------------------------------===//
3039 // Control-flow intrinsics in LLVM IR are convergent because they represent the
3041 // lock-step". But they exist during a small window in the lowering process,