xref: /llvm-project/llvm/test/CodeGen/DirectX/WaveActiveMax.ll (revision aab25f20f6c06bab7aac6fb83d54705ec4cdfadd)
1; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s
2
3; Test that for scalar values, WaveActiveMax maps down to the DirectX op
4
5define noundef half @wave_active_max_half(half noundef %expr) {
6entry:
7; CHECK: call half @dx.op.waveActiveOp.f16(i32 119, half %expr, i8 3, i8 0){{$}}
8  %ret = call half @llvm.dx.wave.reduce.max.f16(half %expr)
9  ret half %ret
10}
11
12define noundef float @wave_active_max_float(float noundef %expr) {
13entry:
14; CHECK: call float @dx.op.waveActiveOp.f32(i32 119, float %expr, i8 3, i8 0){{$}}
15  %ret = call float @llvm.dx.wave.reduce.max.f32(float %expr)
16  ret float %ret
17}
18
19define noundef double @wave_active_max_double(double noundef %expr) {
20entry:
21; CHECK: call double @dx.op.waveActiveOp.f64(i32 119, double %expr, i8 3, i8 0){{$}}
22  %ret = call double @llvm.dx.wave.reduce.max.f64(double %expr)
23  ret double %ret
24}
25
26define noundef i16 @wave_active_max_i16(i16 noundef %expr) {
27entry:
28; CHECK: call i16 @dx.op.waveActiveOp.i16(i32 119, i16 %expr, i8 3, i8 0){{$}}
29  %ret = call i16 @llvm.dx.wave.reduce.max.i16(i16 %expr)
30  ret i16 %ret
31}
32
33define noundef i32 @wave_active_max_i32(i32 noundef %expr) {
34entry:
35; CHECK: call i32 @dx.op.waveActiveOp.i32(i32 119, i32 %expr, i8 3, i8 0){{$}}
36  %ret = call i32 @llvm.dx.wave.reduce.max.i32(i32 %expr)
37  ret i32 %ret
38}
39
40define noundef i64 @wave_active_max_i64(i64 noundef %expr) {
41entry:
42; CHECK: call i64 @dx.op.waveActiveOp.i64(i32 119, i64 %expr, i8 3, i8 0){{$}}
43  %ret = call i64 @llvm.dx.wave.reduce.max.i64(i64 %expr)
44  ret i64 %ret
45}
46
47define noundef i16 @wave_active_umax_i16(i16 noundef %expr) {
48entry:
49; CHECK: call i16 @dx.op.waveActiveOp.i16(i32 119, i16 %expr, i8 3, i8 1){{$}}
50  %ret = call i16 @llvm.dx.wave.reduce.umax.i16(i16 %expr)
51  ret i16 %ret
52}
53
54define noundef i32 @wave_active_umax_i32(i32 noundef %expr) {
55entry:
56; CHECK: call i32 @dx.op.waveActiveOp.i32(i32 119, i32 %expr, i8 3, i8 1){{$}}
57  %ret = call i32 @llvm.dx.wave.reduce.umax.i32(i32 %expr)
58  ret i32 %ret
59}
60
61define noundef i64 @wave_active_umax_i64(i64 noundef %expr) {
62entry:
63; CHECK: call i64 @dx.op.waveActiveOp.i64(i32 119, i64 %expr, i8 3, i8 1){{$}}
64  %ret = call i64 @llvm.dx.wave.reduce.umax.i64(i64 %expr)
65  ret i64 %ret
66}
67
68declare half @llvm.dx.wave.reduce.max.f16(half)
69declare float @llvm.dx.wave.reduce.max.f32(float)
70declare double @llvm.dx.wave.reduce.max.f64(double)
71
72declare i16 @llvm.dx.wave.reduce.max.i16(i16)
73declare i32 @llvm.dx.wave.reduce.max.i32(i32)
74declare i64 @llvm.dx.wave.reduce.max.i64(i64)
75
76declare i16 @llvm.dx.wave.reduce.umax.i16(i16)
77declare i32 @llvm.dx.wave.reduce.umax.i32(i32)
78declare i64 @llvm.dx.wave.reduce.umax.i64(i64)
79
80; Test that for vector values, WaveActiveMax scalarizes and maps down to the
81; DirectX op
82
83define noundef <2 x half> @wave_active_max_v2half(<2 x half> noundef %expr) {
84entry:
85; CHECK: call half @dx.op.waveActiveOp.f16(i32 119, half %expr.i0, i8 3, i8 0){{$}}
86; CHECK: call half @dx.op.waveActiveOp.f16(i32 119, half %expr.i1, i8 3, i8 0){{$}}
87  %ret = call <2 x half> @llvm.dx.wave.reduce.max.v2f16(<2 x half> %expr)
88  ret <2 x half> %ret
89}
90
91define noundef <3 x i32> @wave_active_max_v3i32(<3 x i32> noundef %expr) {
92entry:
93; CHECK: call i32 @dx.op.waveActiveOp.i32(i32 119, i32 %expr.i0, i8 3, i8 0){{$}}
94; CHECK: call i32 @dx.op.waveActiveOp.i32(i32 119, i32 %expr.i1, i8 3, i8 0){{$}}
95; CHECK: call i32 @dx.op.waveActiveOp.i32(i32 119, i32 %expr.i2, i8 3, i8 0){{$}}
96  %ret = call <3 x i32> @llvm.dx.wave.reduce.max.v3i32(<3 x i32> %expr)
97  ret <3 x i32> %ret
98}
99
100define noundef <4 x double> @wave_active_max_v4f64(<4 x double> noundef %expr) {
101entry:
102; CHECK: call double @dx.op.waveActiveOp.f64(i32 119, double %expr.i0, i8 3, i8 0){{$}}
103; CHECK: call double @dx.op.waveActiveOp.f64(i32 119, double %expr.i1, i8 3, i8 0){{$}}
104; CHECK: call double @dx.op.waveActiveOp.f64(i32 119, double %expr.i2, i8 3, i8 0){{$}}
105; CHECK: call double @dx.op.waveActiveOp.f64(i32 119, double %expr.i3, i8 3, i8 0){{$}}
106  %ret = call <4 x double> @llvm.dx.wave.reduce.max.v4f64(<4 x double> %expr)
107  ret <4 x double> %ret
108}
109
110declare <2 x half> @llvm.dx.wave.reduce.max.v2f16(<2 x half>)
111declare <3 x i32> @llvm.dx.wave.reduce.max.v3i32(<3 x i32>)
112declare <4 x double> @llvm.dx.wave.reduce.max.v4f64(<4 x double>)
113
114define noundef <2 x i16> @wave_active_umax_v2i16(<2 x i16> noundef %expr) {
115entry:
116; CHECK: call i16 @dx.op.waveActiveOp.i16(i32 119, i16 %expr.i0, i8 3, i8 1){{$}}
117; CHECK: call i16 @dx.op.waveActiveOp.i16(i32 119, i16 %expr.i1, i8 3, i8 1){{$}}
118  %ret = call <2 x i16> @llvm.dx.wave.reduce.umax.v2f16(<2 x i16> %expr)
119  ret <2 x i16> %ret
120}
121
122define noundef <3 x i32> @wave_active_umax_v3i32(<3 x i32> noundef %expr) {
123entry:
124; CHECK: call i32 @dx.op.waveActiveOp.i32(i32 119, i32 %expr.i0, i8 3, i8 1){{$}}
125; CHECK: call i32 @dx.op.waveActiveOp.i32(i32 119, i32 %expr.i1, i8 3, i8 1){{$}}
126; CHECK: call i32 @dx.op.waveActiveOp.i32(i32 119, i32 %expr.i2, i8 3, i8 1){{$}}
127  %ret = call <3 x i32> @llvm.dx.wave.reduce.umax.v3i32(<3 x i32> %expr)
128  ret <3 x i32> %ret
129}
130
131define noundef <4 x i64> @wave_active_umax_v4f64(<4 x i64> noundef %expr) {
132entry:
133; CHECK: call i64 @dx.op.waveActiveOp.i64(i32 119, i64 %expr.i0, i8 3, i8 1){{$}}
134; CHECK: call i64 @dx.op.waveActiveOp.i64(i32 119, i64 %expr.i1, i8 3, i8 1){{$}}
135; CHECK: call i64 @dx.op.waveActiveOp.i64(i32 119, i64 %expr.i2, i8 3, i8 1){{$}}
136; CHECK: call i64 @dx.op.waveActiveOp.i64(i32 119, i64 %expr.i3, i8 3, i8 1){{$}}
137  %ret = call <4 x i64> @llvm.dx.wave.reduce.umax.v4f64(<4 x i64> %expr)
138  ret <4 x i64> %ret
139}
140
141declare <2 x i16> @llvm.dx.wave.reduce.umax.v2f16(<2 x i16>)
142declare <3 x i32> @llvm.dx.wave.reduce.umax.v3i32(<3 x i32>)
143declare <4 x i64> @llvm.dx.wave.reduce.umax.v4f64(<4 x i64>)
144