xref: /llvm-project/llvm/test/CodeGen/DirectX/WaveActiveSum.ll (revision 4446a9849aaa7e33e0d544fa6501d3d851b25fd6)
1*4446a984SAdam Yang; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s
2*4446a984SAdam Yang
3*4446a984SAdam Yang; Test that for scalar values, WaveActiveSum maps down to the DirectX op
4*4446a984SAdam Yang
5*4446a984SAdam Yangdefine noundef half @wave_active_sum_half(half noundef %expr) {
6*4446a984SAdam Yangentry:
7*4446a984SAdam Yang; CHECK: call half @dx.op.waveActiveOp.f16(i32 119, half %expr, i8 0, i8 0)
8*4446a984SAdam Yang  %ret = call half @llvm.dx.wave.reduce.sum.f16(half %expr)
9*4446a984SAdam Yang  ret half %ret
10*4446a984SAdam Yang}
11*4446a984SAdam Yang
12*4446a984SAdam Yangdefine noundef float @wave_active_sum_float(float noundef %expr) {
13*4446a984SAdam Yangentry:
14*4446a984SAdam Yang; CHECK: call float @dx.op.waveActiveOp.f32(i32 119, float %expr, i8 0, i8 0)
15*4446a984SAdam Yang  %ret = call float @llvm.dx.wave.reduce.sum.f32(float %expr)
16*4446a984SAdam Yang  ret float %ret
17*4446a984SAdam Yang}
18*4446a984SAdam Yang
19*4446a984SAdam Yangdefine noundef double @wave_active_sum_double(double noundef %expr) {
20*4446a984SAdam Yangentry:
21*4446a984SAdam Yang; CHECK: call double @dx.op.waveActiveOp.f64(i32 119, double %expr, i8 0, i8 0)
22*4446a984SAdam Yang  %ret = call double @llvm.dx.wave.reduce.sum.f64(double %expr)
23*4446a984SAdam Yang  ret double %ret
24*4446a984SAdam Yang}
25*4446a984SAdam Yang
26*4446a984SAdam Yangdefine noundef i16 @wave_active_sum_i16(i16 noundef %expr) {
27*4446a984SAdam Yangentry:
28*4446a984SAdam Yang; CHECK: call i16 @dx.op.waveActiveOp.i16(i32 119, i16 %expr, i8 0, i8 0)
29*4446a984SAdam Yang  %ret = call i16 @llvm.dx.wave.reduce.sum.i16(i16 %expr)
30*4446a984SAdam Yang  ret i16 %ret
31*4446a984SAdam Yang}
32*4446a984SAdam Yang
33*4446a984SAdam Yangdefine noundef i32 @wave_active_sum_i32(i32 noundef %expr) {
34*4446a984SAdam Yangentry:
35*4446a984SAdam Yang; CHECK: call i32 @dx.op.waveActiveOp.i32(i32 119, i32 %expr, i8 0, i8 0)
36*4446a984SAdam Yang  %ret = call i32 @llvm.dx.wave.reduce.sum.i32(i32 %expr)
37*4446a984SAdam Yang  ret i32 %ret
38*4446a984SAdam Yang}
39*4446a984SAdam Yang
40*4446a984SAdam Yangdefine noundef i64 @wave_active_sum_i64(i64 noundef %expr) {
41*4446a984SAdam Yangentry:
42*4446a984SAdam Yang; CHECK: call i64 @dx.op.waveActiveOp.i64(i32 119, i64 %expr, i8 0, i8 0)
43*4446a984SAdam Yang  %ret = call i64 @llvm.dx.wave.reduce.sum.i64(i64 %expr)
44*4446a984SAdam Yang  ret i64 %ret
45*4446a984SAdam Yang}
46*4446a984SAdam Yang
47*4446a984SAdam Yangdefine noundef i16 @wave_active_usum_i16(i16 noundef %expr) {
48*4446a984SAdam Yangentry:
49*4446a984SAdam Yang; CHECK: call i16 @dx.op.waveActiveOp.i16(i32 119, i16 %expr, i8 0, i8 1)
50*4446a984SAdam Yang  %ret = call i16 @llvm.dx.wave.reduce.usum.i16(i16 %expr)
51*4446a984SAdam Yang  ret i16 %ret
52*4446a984SAdam Yang}
53*4446a984SAdam Yang
54*4446a984SAdam Yangdefine noundef i32 @wave_active_usum_i32(i32 noundef %expr) {
55*4446a984SAdam Yangentry:
56*4446a984SAdam Yang; CHECK: call i32 @dx.op.waveActiveOp.i32(i32 119, i32 %expr, i8 0, i8 1)
57*4446a984SAdam Yang  %ret = call i32 @llvm.dx.wave.reduce.usum.i32(i32 %expr)
58*4446a984SAdam Yang  ret i32 %ret
59*4446a984SAdam Yang}
60*4446a984SAdam Yang
61*4446a984SAdam Yangdefine noundef i64 @wave_active_usum_i64(i64 noundef %expr) {
62*4446a984SAdam Yangentry:
63*4446a984SAdam Yang; CHECK: call i64 @dx.op.waveActiveOp.i64(i32 119, i64 %expr, i8 0, i8 1)
64*4446a984SAdam Yang  %ret = call i64 @llvm.dx.wave.reduce.usum.i64(i64 %expr)
65*4446a984SAdam Yang  ret i64 %ret
66*4446a984SAdam Yang}
67*4446a984SAdam Yang
68*4446a984SAdam Yangdeclare half @llvm.dx.wave.reduce.sum.f16(half)
69*4446a984SAdam Yangdeclare float @llvm.dx.wave.reduce.sum.f32(float)
70*4446a984SAdam Yangdeclare double @llvm.dx.wave.reduce.sum.f64(double)
71*4446a984SAdam Yang
72*4446a984SAdam Yangdeclare i16 @llvm.dx.wave.reduce.sum.i16(i16)
73*4446a984SAdam Yangdeclare i32 @llvm.dx.wave.reduce.sum.i32(i32)
74*4446a984SAdam Yangdeclare i64 @llvm.dx.wave.reduce.sum.i64(i64)
75*4446a984SAdam Yang
76*4446a984SAdam Yangdeclare i16 @llvm.dx.wave.reduce.usum.i16(i16)
77*4446a984SAdam Yangdeclare i32 @llvm.dx.wave.reduce.usum.i32(i32)
78*4446a984SAdam Yangdeclare i64 @llvm.dx.wave.reduce.usum.i64(i64)
79*4446a984SAdam Yang
80*4446a984SAdam Yang; Test that for vector values, WaveActiveSum scalarizes and maps down to the
81*4446a984SAdam Yang; DirectX op
82*4446a984SAdam Yang
83*4446a984SAdam Yangdefine noundef <2 x half> @wave_active_sum_v2half(<2 x half> noundef %expr) {
84*4446a984SAdam Yangentry:
85*4446a984SAdam Yang; CHECK: call half @dx.op.waveActiveOp.f16(i32 119, half %expr.i0, i8 0, i8 0)
86*4446a984SAdam Yang; CHECK: call half @dx.op.waveActiveOp.f16(i32 119, half %expr.i1, i8 0, i8 0)
87*4446a984SAdam Yang  %ret = call <2 x half> @llvm.dx.wave.reduce.sum.v2f16(<2 x half> %expr)
88*4446a984SAdam Yang  ret <2 x half> %ret
89*4446a984SAdam Yang}
90*4446a984SAdam Yang
91*4446a984SAdam Yangdefine noundef <3 x i32> @wave_active_sum_v3i32(<3 x i32> noundef %expr) {
92*4446a984SAdam Yangentry:
93*4446a984SAdam Yang; CHECK: call i32 @dx.op.waveActiveOp.i32(i32 119, i32 %expr.i0, i8 0, i8 0)
94*4446a984SAdam Yang; CHECK: call i32 @dx.op.waveActiveOp.i32(i32 119, i32 %expr.i1, i8 0, i8 0)
95*4446a984SAdam Yang; CHECK: call i32 @dx.op.waveActiveOp.i32(i32 119, i32 %expr.i2, i8 0, i8 0)
96*4446a984SAdam Yang  %ret = call <3 x i32> @llvm.dx.wave.reduce.sum.v3i32(<3 x i32> %expr)
97*4446a984SAdam Yang  ret <3 x i32> %ret
98*4446a984SAdam Yang}
99*4446a984SAdam Yang
100*4446a984SAdam Yangdefine noundef <4 x double> @wave_active_sum_v4f64(<4 x double> noundef %expr) {
101*4446a984SAdam Yangentry:
102*4446a984SAdam Yang; CHECK: call double @dx.op.waveActiveOp.f64(i32 119, double %expr.i0, i8 0, i8 0)
103*4446a984SAdam Yang; CHECK: call double @dx.op.waveActiveOp.f64(i32 119, double %expr.i1, i8 0, i8 0)
104*4446a984SAdam Yang; CHECK: call double @dx.op.waveActiveOp.f64(i32 119, double %expr.i2, i8 0, i8 0)
105*4446a984SAdam Yang; CHECK: call double @dx.op.waveActiveOp.f64(i32 119, double %expr.i3, i8 0, i8 0)
106*4446a984SAdam Yang  %ret = call <4 x double> @llvm.dx.wave.reduce.sum.v464(<4 x double> %expr)
107*4446a984SAdam Yang  ret <4 x double> %ret
108*4446a984SAdam Yang}
109*4446a984SAdam Yang
110*4446a984SAdam Yangdeclare <2 x half> @llvm.dx.wave.reduce.sum.v2f16(<2 x half>)
111*4446a984SAdam Yangdeclare <3 x i32> @llvm.dx.wave.reduce.sum.v3i32(<3 x i32>)
112*4446a984SAdam Yangdeclare <4 x double> @llvm.dx.wave.reduce.sum.v4f64(<4 x double>)
113*4446a984SAdam Yang
114*4446a984SAdam Yangdefine noundef <2 x i16> @wave_active_usum_v2i16(<2 x i16> noundef %expr) {
115*4446a984SAdam Yangentry:
116*4446a984SAdam Yang; CHECK: call i16 @dx.op.waveActiveOp.i16(i32 119, i16 %expr.i0, i8 0, i8 1)
117*4446a984SAdam Yang; CHECK: call i16 @dx.op.waveActiveOp.i16(i32 119, i16 %expr.i1, i8 0, i8 1)
118*4446a984SAdam Yang  %ret = call <2 x i16> @llvm.dx.wave.reduce.usum.v2f16(<2 x i16> %expr)
119*4446a984SAdam Yang  ret <2 x i16> %ret
120*4446a984SAdam Yang}
121*4446a984SAdam Yang
122*4446a984SAdam Yangdefine noundef <3 x i32> @wave_active_usum_v3i32(<3 x i32> noundef %expr) {
123*4446a984SAdam Yangentry:
124*4446a984SAdam Yang; CHECK: call i32 @dx.op.waveActiveOp.i32(i32 119, i32 %expr.i0, i8 0, i8 1)
125*4446a984SAdam Yang; CHECK: call i32 @dx.op.waveActiveOp.i32(i32 119, i32 %expr.i1, i8 0, i8 1)
126*4446a984SAdam Yang; CHECK: call i32 @dx.op.waveActiveOp.i32(i32 119, i32 %expr.i2, i8 0, i8 1)
127*4446a984SAdam Yang  %ret = call <3 x i32> @llvm.dx.wave.reduce.usum.v3i32(<3 x i32> %expr)
128*4446a984SAdam Yang  ret <3 x i32> %ret
129*4446a984SAdam Yang}
130*4446a984SAdam Yang
131*4446a984SAdam Yangdefine noundef <4 x i64> @wave_active_usum_v4f64(<4 x i64> noundef %expr) {
132*4446a984SAdam Yangentry:
133*4446a984SAdam Yang; CHECK: call i64 @dx.op.waveActiveOp.i64(i32 119, i64 %expr.i0, i8 0, i8 1)
134*4446a984SAdam Yang; CHECK: call i64 @dx.op.waveActiveOp.i64(i32 119, i64 %expr.i1, i8 0, i8 1)
135*4446a984SAdam Yang; CHECK: call i64 @dx.op.waveActiveOp.i64(i32 119, i64 %expr.i2, i8 0, i8 1)
136*4446a984SAdam Yang; CHECK: call i64 @dx.op.waveActiveOp.i64(i32 119, i64 %expr.i3, i8 0, i8 1)
137*4446a984SAdam Yang  %ret = call <4 x i64> @llvm.dx.wave.reduce.usum.v464(<4 x i64> %expr)
138*4446a984SAdam Yang  ret <4 x i64> %ret
139*4446a984SAdam Yang}
140*4446a984SAdam Yang
141*4446a984SAdam Yangdeclare <2 x i16> @llvm.dx.wave.reduce.usum.v2f16(<2 x i16>)
142*4446a984SAdam Yangdeclare <3 x i32> @llvm.dx.wave.reduce.usum.v3i32(<3 x i32>)
143*4446a984SAdam Yangdeclare <4 x i64> @llvm.dx.wave.reduce.usum.v4f64(<4 x i64>)
144