xref: /llvm-project/llvm/test/CodeGen/DirectX/WaveActiveMax.ll (revision aab25f20f6c06bab7aac6fb83d54705ec4cdfadd)
1*aab25f20SAdam Yang; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s
2*aab25f20SAdam Yang
3*aab25f20SAdam Yang; Test that for scalar values, WaveActiveMax maps down to the DirectX op
4*aab25f20SAdam Yang
5*aab25f20SAdam Yangdefine noundef half @wave_active_max_half(half noundef %expr) {
6*aab25f20SAdam Yangentry:
7*aab25f20SAdam Yang; CHECK: call half @dx.op.waveActiveOp.f16(i32 119, half %expr, i8 3, i8 0){{$}}
8*aab25f20SAdam Yang  %ret = call half @llvm.dx.wave.reduce.max.f16(half %expr)
9*aab25f20SAdam Yang  ret half %ret
10*aab25f20SAdam Yang}
11*aab25f20SAdam Yang
12*aab25f20SAdam Yangdefine noundef float @wave_active_max_float(float noundef %expr) {
13*aab25f20SAdam Yangentry:
14*aab25f20SAdam Yang; CHECK: call float @dx.op.waveActiveOp.f32(i32 119, float %expr, i8 3, i8 0){{$}}
15*aab25f20SAdam Yang  %ret = call float @llvm.dx.wave.reduce.max.f32(float %expr)
16*aab25f20SAdam Yang  ret float %ret
17*aab25f20SAdam Yang}
18*aab25f20SAdam Yang
19*aab25f20SAdam Yangdefine noundef double @wave_active_max_double(double noundef %expr) {
20*aab25f20SAdam Yangentry:
21*aab25f20SAdam Yang; CHECK: call double @dx.op.waveActiveOp.f64(i32 119, double %expr, i8 3, i8 0){{$}}
22*aab25f20SAdam Yang  %ret = call double @llvm.dx.wave.reduce.max.f64(double %expr)
23*aab25f20SAdam Yang  ret double %ret
24*aab25f20SAdam Yang}
25*aab25f20SAdam Yang
26*aab25f20SAdam Yangdefine noundef i16 @wave_active_max_i16(i16 noundef %expr) {
27*aab25f20SAdam Yangentry:
28*aab25f20SAdam Yang; CHECK: call i16 @dx.op.waveActiveOp.i16(i32 119, i16 %expr, i8 3, i8 0){{$}}
29*aab25f20SAdam Yang  %ret = call i16 @llvm.dx.wave.reduce.max.i16(i16 %expr)
30*aab25f20SAdam Yang  ret i16 %ret
31*aab25f20SAdam Yang}
32*aab25f20SAdam Yang
33*aab25f20SAdam Yangdefine noundef i32 @wave_active_max_i32(i32 noundef %expr) {
34*aab25f20SAdam Yangentry:
35*aab25f20SAdam Yang; CHECK: call i32 @dx.op.waveActiveOp.i32(i32 119, i32 %expr, i8 3, i8 0){{$}}
36*aab25f20SAdam Yang  %ret = call i32 @llvm.dx.wave.reduce.max.i32(i32 %expr)
37*aab25f20SAdam Yang  ret i32 %ret
38*aab25f20SAdam Yang}
39*aab25f20SAdam Yang
40*aab25f20SAdam Yangdefine noundef i64 @wave_active_max_i64(i64 noundef %expr) {
41*aab25f20SAdam Yangentry:
42*aab25f20SAdam Yang; CHECK: call i64 @dx.op.waveActiveOp.i64(i32 119, i64 %expr, i8 3, i8 0){{$}}
43*aab25f20SAdam Yang  %ret = call i64 @llvm.dx.wave.reduce.max.i64(i64 %expr)
44*aab25f20SAdam Yang  ret i64 %ret
45*aab25f20SAdam Yang}
46*aab25f20SAdam Yang
47*aab25f20SAdam Yangdefine noundef i16 @wave_active_umax_i16(i16 noundef %expr) {
48*aab25f20SAdam Yangentry:
49*aab25f20SAdam Yang; CHECK: call i16 @dx.op.waveActiveOp.i16(i32 119, i16 %expr, i8 3, i8 1){{$}}
50*aab25f20SAdam Yang  %ret = call i16 @llvm.dx.wave.reduce.umax.i16(i16 %expr)
51*aab25f20SAdam Yang  ret i16 %ret
52*aab25f20SAdam Yang}
53*aab25f20SAdam Yang
54*aab25f20SAdam Yangdefine noundef i32 @wave_active_umax_i32(i32 noundef %expr) {
55*aab25f20SAdam Yangentry:
56*aab25f20SAdam Yang; CHECK: call i32 @dx.op.waveActiveOp.i32(i32 119, i32 %expr, i8 3, i8 1){{$}}
57*aab25f20SAdam Yang  %ret = call i32 @llvm.dx.wave.reduce.umax.i32(i32 %expr)
58*aab25f20SAdam Yang  ret i32 %ret
59*aab25f20SAdam Yang}
60*aab25f20SAdam Yang
61*aab25f20SAdam Yangdefine noundef i64 @wave_active_umax_i64(i64 noundef %expr) {
62*aab25f20SAdam Yangentry:
63*aab25f20SAdam Yang; CHECK: call i64 @dx.op.waveActiveOp.i64(i32 119, i64 %expr, i8 3, i8 1){{$}}
64*aab25f20SAdam Yang  %ret = call i64 @llvm.dx.wave.reduce.umax.i64(i64 %expr)
65*aab25f20SAdam Yang  ret i64 %ret
66*aab25f20SAdam Yang}
67*aab25f20SAdam Yang
68*aab25f20SAdam Yangdeclare half @llvm.dx.wave.reduce.max.f16(half)
69*aab25f20SAdam Yangdeclare float @llvm.dx.wave.reduce.max.f32(float)
70*aab25f20SAdam Yangdeclare double @llvm.dx.wave.reduce.max.f64(double)
71*aab25f20SAdam Yang
72*aab25f20SAdam Yangdeclare i16 @llvm.dx.wave.reduce.max.i16(i16)
73*aab25f20SAdam Yangdeclare i32 @llvm.dx.wave.reduce.max.i32(i32)
74*aab25f20SAdam Yangdeclare i64 @llvm.dx.wave.reduce.max.i64(i64)
75*aab25f20SAdam Yang
76*aab25f20SAdam Yangdeclare i16 @llvm.dx.wave.reduce.umax.i16(i16)
77*aab25f20SAdam Yangdeclare i32 @llvm.dx.wave.reduce.umax.i32(i32)
78*aab25f20SAdam Yangdeclare i64 @llvm.dx.wave.reduce.umax.i64(i64)
79*aab25f20SAdam Yang
80*aab25f20SAdam Yang; Test that for vector values, WaveActiveMax scalarizes and maps down to the
81*aab25f20SAdam Yang; DirectX op
82*aab25f20SAdam Yang
83*aab25f20SAdam Yangdefine noundef <2 x half> @wave_active_max_v2half(<2 x half> noundef %expr) {
84*aab25f20SAdam Yangentry:
85*aab25f20SAdam Yang; CHECK: call half @dx.op.waveActiveOp.f16(i32 119, half %expr.i0, i8 3, i8 0){{$}}
86*aab25f20SAdam Yang; CHECK: call half @dx.op.waveActiveOp.f16(i32 119, half %expr.i1, i8 3, i8 0){{$}}
87*aab25f20SAdam Yang  %ret = call <2 x half> @llvm.dx.wave.reduce.max.v2f16(<2 x half> %expr)
88*aab25f20SAdam Yang  ret <2 x half> %ret
89*aab25f20SAdam Yang}
90*aab25f20SAdam Yang
91*aab25f20SAdam Yangdefine noundef <3 x i32> @wave_active_max_v3i32(<3 x i32> noundef %expr) {
92*aab25f20SAdam Yangentry:
93*aab25f20SAdam Yang; CHECK: call i32 @dx.op.waveActiveOp.i32(i32 119, i32 %expr.i0, i8 3, i8 0){{$}}
94*aab25f20SAdam Yang; CHECK: call i32 @dx.op.waveActiveOp.i32(i32 119, i32 %expr.i1, i8 3, i8 0){{$}}
95*aab25f20SAdam Yang; CHECK: call i32 @dx.op.waveActiveOp.i32(i32 119, i32 %expr.i2, i8 3, i8 0){{$}}
96*aab25f20SAdam Yang  %ret = call <3 x i32> @llvm.dx.wave.reduce.max.v3i32(<3 x i32> %expr)
97*aab25f20SAdam Yang  ret <3 x i32> %ret
98*aab25f20SAdam Yang}
99*aab25f20SAdam Yang
100*aab25f20SAdam Yangdefine noundef <4 x double> @wave_active_max_v4f64(<4 x double> noundef %expr) {
101*aab25f20SAdam Yangentry:
102*aab25f20SAdam Yang; CHECK: call double @dx.op.waveActiveOp.f64(i32 119, double %expr.i0, i8 3, i8 0){{$}}
103*aab25f20SAdam Yang; CHECK: call double @dx.op.waveActiveOp.f64(i32 119, double %expr.i1, i8 3, i8 0){{$}}
104*aab25f20SAdam Yang; CHECK: call double @dx.op.waveActiveOp.f64(i32 119, double %expr.i2, i8 3, i8 0){{$}}
105*aab25f20SAdam Yang; CHECK: call double @dx.op.waveActiveOp.f64(i32 119, double %expr.i3, i8 3, i8 0){{$}}
106*aab25f20SAdam Yang  %ret = call <4 x double> @llvm.dx.wave.reduce.max.v4f64(<4 x double> %expr)
107*aab25f20SAdam Yang  ret <4 x double> %ret
108*aab25f20SAdam Yang}
109*aab25f20SAdam Yang
110*aab25f20SAdam Yangdeclare <2 x half> @llvm.dx.wave.reduce.max.v2f16(<2 x half>)
111*aab25f20SAdam Yangdeclare <3 x i32> @llvm.dx.wave.reduce.max.v3i32(<3 x i32>)
112*aab25f20SAdam Yangdeclare <4 x double> @llvm.dx.wave.reduce.max.v4f64(<4 x double>)
113*aab25f20SAdam Yang
114*aab25f20SAdam Yangdefine noundef <2 x i16> @wave_active_umax_v2i16(<2 x i16> noundef %expr) {
115*aab25f20SAdam Yangentry:
116*aab25f20SAdam Yang; CHECK: call i16 @dx.op.waveActiveOp.i16(i32 119, i16 %expr.i0, i8 3, i8 1){{$}}
117*aab25f20SAdam Yang; CHECK: call i16 @dx.op.waveActiveOp.i16(i32 119, i16 %expr.i1, i8 3, i8 1){{$}}
118*aab25f20SAdam Yang  %ret = call <2 x i16> @llvm.dx.wave.reduce.umax.v2f16(<2 x i16> %expr)
119*aab25f20SAdam Yang  ret <2 x i16> %ret
120*aab25f20SAdam Yang}
121*aab25f20SAdam Yang
122*aab25f20SAdam Yangdefine noundef <3 x i32> @wave_active_umax_v3i32(<3 x i32> noundef %expr) {
123*aab25f20SAdam Yangentry:
124*aab25f20SAdam Yang; CHECK: call i32 @dx.op.waveActiveOp.i32(i32 119, i32 %expr.i0, i8 3, i8 1){{$}}
125*aab25f20SAdam Yang; CHECK: call i32 @dx.op.waveActiveOp.i32(i32 119, i32 %expr.i1, i8 3, i8 1){{$}}
126*aab25f20SAdam Yang; CHECK: call i32 @dx.op.waveActiveOp.i32(i32 119, i32 %expr.i2, i8 3, i8 1){{$}}
127*aab25f20SAdam Yang  %ret = call <3 x i32> @llvm.dx.wave.reduce.umax.v3i32(<3 x i32> %expr)
128*aab25f20SAdam Yang  ret <3 x i32> %ret
129*aab25f20SAdam Yang}
130*aab25f20SAdam Yang
131*aab25f20SAdam Yangdefine noundef <4 x i64> @wave_active_umax_v4f64(<4 x i64> noundef %expr) {
132*aab25f20SAdam Yangentry:
133*aab25f20SAdam Yang; CHECK: call i64 @dx.op.waveActiveOp.i64(i32 119, i64 %expr.i0, i8 3, i8 1){{$}}
134*aab25f20SAdam Yang; CHECK: call i64 @dx.op.waveActiveOp.i64(i32 119, i64 %expr.i1, i8 3, i8 1){{$}}
135*aab25f20SAdam Yang; CHECK: call i64 @dx.op.waveActiveOp.i64(i32 119, i64 %expr.i2, i8 3, i8 1){{$}}
136*aab25f20SAdam Yang; CHECK: call i64 @dx.op.waveActiveOp.i64(i32 119, i64 %expr.i3, i8 3, i8 1){{$}}
137*aab25f20SAdam Yang  %ret = call <4 x i64> @llvm.dx.wave.reduce.umax.v4f64(<4 x i64> %expr)
138*aab25f20SAdam Yang  ret <4 x i64> %ret
139*aab25f20SAdam Yang}
140*aab25f20SAdam Yang
141*aab25f20SAdam Yangdeclare <2 x i16> @llvm.dx.wave.reduce.umax.v2f16(<2 x i16>)
142*aab25f20SAdam Yangdeclare <3 x i32> @llvm.dx.wave.reduce.umax.v3i32(<3 x i32>)
143*aab25f20SAdam Yangdeclare <4 x i64> @llvm.dx.wave.reduce.umax.v4f64(<4 x i64>)
144