1*4446a984SAdam Yang; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s 2*4446a984SAdam Yang 3*4446a984SAdam Yang; Test that for scalar values, WaveActiveSum maps down to the DirectX op 4*4446a984SAdam Yang 5*4446a984SAdam Yangdefine noundef half @wave_active_sum_half(half noundef %expr) { 6*4446a984SAdam Yangentry: 7*4446a984SAdam Yang; CHECK: call half @dx.op.waveActiveOp.f16(i32 119, half %expr, i8 0, i8 0) 8*4446a984SAdam Yang %ret = call half @llvm.dx.wave.reduce.sum.f16(half %expr) 9*4446a984SAdam Yang ret half %ret 10*4446a984SAdam Yang} 11*4446a984SAdam Yang 12*4446a984SAdam Yangdefine noundef float @wave_active_sum_float(float noundef %expr) { 13*4446a984SAdam Yangentry: 14*4446a984SAdam Yang; CHECK: call float @dx.op.waveActiveOp.f32(i32 119, float %expr, i8 0, i8 0) 15*4446a984SAdam Yang %ret = call float @llvm.dx.wave.reduce.sum.f32(float %expr) 16*4446a984SAdam Yang ret float %ret 17*4446a984SAdam Yang} 18*4446a984SAdam Yang 19*4446a984SAdam Yangdefine noundef double @wave_active_sum_double(double noundef %expr) { 20*4446a984SAdam Yangentry: 21*4446a984SAdam Yang; CHECK: call double @dx.op.waveActiveOp.f64(i32 119, double %expr, i8 0, i8 0) 22*4446a984SAdam Yang %ret = call double @llvm.dx.wave.reduce.sum.f64(double %expr) 23*4446a984SAdam Yang ret double %ret 24*4446a984SAdam Yang} 25*4446a984SAdam Yang 26*4446a984SAdam Yangdefine noundef i16 @wave_active_sum_i16(i16 noundef %expr) { 27*4446a984SAdam Yangentry: 28*4446a984SAdam Yang; CHECK: call i16 @dx.op.waveActiveOp.i16(i32 119, i16 %expr, i8 0, i8 0) 29*4446a984SAdam Yang %ret = call i16 @llvm.dx.wave.reduce.sum.i16(i16 %expr) 30*4446a984SAdam Yang ret i16 %ret 31*4446a984SAdam Yang} 32*4446a984SAdam Yang 33*4446a984SAdam Yangdefine noundef i32 @wave_active_sum_i32(i32 noundef %expr) { 34*4446a984SAdam Yangentry: 35*4446a984SAdam Yang; CHECK: call i32 @dx.op.waveActiveOp.i32(i32 119, i32 %expr, i8 0, i8 0) 36*4446a984SAdam Yang %ret = call i32 @llvm.dx.wave.reduce.sum.i32(i32 %expr) 37*4446a984SAdam Yang ret i32 %ret 38*4446a984SAdam Yang} 39*4446a984SAdam Yang 40*4446a984SAdam Yangdefine noundef i64 @wave_active_sum_i64(i64 noundef %expr) { 41*4446a984SAdam Yangentry: 42*4446a984SAdam Yang; CHECK: call i64 @dx.op.waveActiveOp.i64(i32 119, i64 %expr, i8 0, i8 0) 43*4446a984SAdam Yang %ret = call i64 @llvm.dx.wave.reduce.sum.i64(i64 %expr) 44*4446a984SAdam Yang ret i64 %ret 45*4446a984SAdam Yang} 46*4446a984SAdam Yang 47*4446a984SAdam Yangdefine noundef i16 @wave_active_usum_i16(i16 noundef %expr) { 48*4446a984SAdam Yangentry: 49*4446a984SAdam Yang; CHECK: call i16 @dx.op.waveActiveOp.i16(i32 119, i16 %expr, i8 0, i8 1) 50*4446a984SAdam Yang %ret = call i16 @llvm.dx.wave.reduce.usum.i16(i16 %expr) 51*4446a984SAdam Yang ret i16 %ret 52*4446a984SAdam Yang} 53*4446a984SAdam Yang 54*4446a984SAdam Yangdefine noundef i32 @wave_active_usum_i32(i32 noundef %expr) { 55*4446a984SAdam Yangentry: 56*4446a984SAdam Yang; CHECK: call i32 @dx.op.waveActiveOp.i32(i32 119, i32 %expr, i8 0, i8 1) 57*4446a984SAdam Yang %ret = call i32 @llvm.dx.wave.reduce.usum.i32(i32 %expr) 58*4446a984SAdam Yang ret i32 %ret 59*4446a984SAdam Yang} 60*4446a984SAdam Yang 61*4446a984SAdam Yangdefine noundef i64 @wave_active_usum_i64(i64 noundef %expr) { 62*4446a984SAdam Yangentry: 63*4446a984SAdam Yang; CHECK: call i64 @dx.op.waveActiveOp.i64(i32 119, i64 %expr, i8 0, i8 1) 64*4446a984SAdam Yang %ret = call i64 @llvm.dx.wave.reduce.usum.i64(i64 %expr) 65*4446a984SAdam Yang ret i64 %ret 66*4446a984SAdam Yang} 67*4446a984SAdam Yang 68*4446a984SAdam Yangdeclare half @llvm.dx.wave.reduce.sum.f16(half) 69*4446a984SAdam Yangdeclare float @llvm.dx.wave.reduce.sum.f32(float) 70*4446a984SAdam Yangdeclare double @llvm.dx.wave.reduce.sum.f64(double) 71*4446a984SAdam Yang 72*4446a984SAdam Yangdeclare i16 @llvm.dx.wave.reduce.sum.i16(i16) 73*4446a984SAdam Yangdeclare i32 @llvm.dx.wave.reduce.sum.i32(i32) 74*4446a984SAdam Yangdeclare i64 @llvm.dx.wave.reduce.sum.i64(i64) 75*4446a984SAdam Yang 76*4446a984SAdam Yangdeclare i16 @llvm.dx.wave.reduce.usum.i16(i16) 77*4446a984SAdam Yangdeclare i32 @llvm.dx.wave.reduce.usum.i32(i32) 78*4446a984SAdam Yangdeclare i64 @llvm.dx.wave.reduce.usum.i64(i64) 79*4446a984SAdam Yang 80*4446a984SAdam Yang; Test that for vector values, WaveActiveSum scalarizes and maps down to the 81*4446a984SAdam Yang; DirectX op 82*4446a984SAdam Yang 83*4446a984SAdam Yangdefine noundef <2 x half> @wave_active_sum_v2half(<2 x half> noundef %expr) { 84*4446a984SAdam Yangentry: 85*4446a984SAdam Yang; CHECK: call half @dx.op.waveActiveOp.f16(i32 119, half %expr.i0, i8 0, i8 0) 86*4446a984SAdam Yang; CHECK: call half @dx.op.waveActiveOp.f16(i32 119, half %expr.i1, i8 0, i8 0) 87*4446a984SAdam Yang %ret = call <2 x half> @llvm.dx.wave.reduce.sum.v2f16(<2 x half> %expr) 88*4446a984SAdam Yang ret <2 x half> %ret 89*4446a984SAdam Yang} 90*4446a984SAdam Yang 91*4446a984SAdam Yangdefine noundef <3 x i32> @wave_active_sum_v3i32(<3 x i32> noundef %expr) { 92*4446a984SAdam Yangentry: 93*4446a984SAdam Yang; CHECK: call i32 @dx.op.waveActiveOp.i32(i32 119, i32 %expr.i0, i8 0, i8 0) 94*4446a984SAdam Yang; CHECK: call i32 @dx.op.waveActiveOp.i32(i32 119, i32 %expr.i1, i8 0, i8 0) 95*4446a984SAdam Yang; CHECK: call i32 @dx.op.waveActiveOp.i32(i32 119, i32 %expr.i2, i8 0, i8 0) 96*4446a984SAdam Yang %ret = call <3 x i32> @llvm.dx.wave.reduce.sum.v3i32(<3 x i32> %expr) 97*4446a984SAdam Yang ret <3 x i32> %ret 98*4446a984SAdam Yang} 99*4446a984SAdam Yang 100*4446a984SAdam Yangdefine noundef <4 x double> @wave_active_sum_v4f64(<4 x double> noundef %expr) { 101*4446a984SAdam Yangentry: 102*4446a984SAdam Yang; CHECK: call double @dx.op.waveActiveOp.f64(i32 119, double %expr.i0, i8 0, i8 0) 103*4446a984SAdam Yang; CHECK: call double @dx.op.waveActiveOp.f64(i32 119, double %expr.i1, i8 0, i8 0) 104*4446a984SAdam Yang; CHECK: call double @dx.op.waveActiveOp.f64(i32 119, double %expr.i2, i8 0, i8 0) 105*4446a984SAdam Yang; CHECK: call double @dx.op.waveActiveOp.f64(i32 119, double %expr.i3, i8 0, i8 0) 106*4446a984SAdam Yang %ret = call <4 x double> @llvm.dx.wave.reduce.sum.v464(<4 x double> %expr) 107*4446a984SAdam Yang ret <4 x double> %ret 108*4446a984SAdam Yang} 109*4446a984SAdam Yang 110*4446a984SAdam Yangdeclare <2 x half> @llvm.dx.wave.reduce.sum.v2f16(<2 x half>) 111*4446a984SAdam Yangdeclare <3 x i32> @llvm.dx.wave.reduce.sum.v3i32(<3 x i32>) 112*4446a984SAdam Yangdeclare <4 x double> @llvm.dx.wave.reduce.sum.v4f64(<4 x double>) 113*4446a984SAdam Yang 114*4446a984SAdam Yangdefine noundef <2 x i16> @wave_active_usum_v2i16(<2 x i16> noundef %expr) { 115*4446a984SAdam Yangentry: 116*4446a984SAdam Yang; CHECK: call i16 @dx.op.waveActiveOp.i16(i32 119, i16 %expr.i0, i8 0, i8 1) 117*4446a984SAdam Yang; CHECK: call i16 @dx.op.waveActiveOp.i16(i32 119, i16 %expr.i1, i8 0, i8 1) 118*4446a984SAdam Yang %ret = call <2 x i16> @llvm.dx.wave.reduce.usum.v2f16(<2 x i16> %expr) 119*4446a984SAdam Yang ret <2 x i16> %ret 120*4446a984SAdam Yang} 121*4446a984SAdam Yang 122*4446a984SAdam Yangdefine noundef <3 x i32> @wave_active_usum_v3i32(<3 x i32> noundef %expr) { 123*4446a984SAdam Yangentry: 124*4446a984SAdam Yang; CHECK: call i32 @dx.op.waveActiveOp.i32(i32 119, i32 %expr.i0, i8 0, i8 1) 125*4446a984SAdam Yang; CHECK: call i32 @dx.op.waveActiveOp.i32(i32 119, i32 %expr.i1, i8 0, i8 1) 126*4446a984SAdam Yang; CHECK: call i32 @dx.op.waveActiveOp.i32(i32 119, i32 %expr.i2, i8 0, i8 1) 127*4446a984SAdam Yang %ret = call <3 x i32> @llvm.dx.wave.reduce.usum.v3i32(<3 x i32> %expr) 128*4446a984SAdam Yang ret <3 x i32> %ret 129*4446a984SAdam Yang} 130*4446a984SAdam Yang 131*4446a984SAdam Yangdefine noundef <4 x i64> @wave_active_usum_v4f64(<4 x i64> noundef %expr) { 132*4446a984SAdam Yangentry: 133*4446a984SAdam Yang; CHECK: call i64 @dx.op.waveActiveOp.i64(i32 119, i64 %expr.i0, i8 0, i8 1) 134*4446a984SAdam Yang; CHECK: call i64 @dx.op.waveActiveOp.i64(i32 119, i64 %expr.i1, i8 0, i8 1) 135*4446a984SAdam Yang; CHECK: call i64 @dx.op.waveActiveOp.i64(i32 119, i64 %expr.i2, i8 0, i8 1) 136*4446a984SAdam Yang; CHECK: call i64 @dx.op.waveActiveOp.i64(i32 119, i64 %expr.i3, i8 0, i8 1) 137*4446a984SAdam Yang %ret = call <4 x i64> @llvm.dx.wave.reduce.usum.v464(<4 x i64> %expr) 138*4446a984SAdam Yang ret <4 x i64> %ret 139*4446a984SAdam Yang} 140*4446a984SAdam Yang 141*4446a984SAdam Yangdeclare <2 x i16> @llvm.dx.wave.reduce.usum.v2f16(<2 x i16>) 142*4446a984SAdam Yangdeclare <3 x i32> @llvm.dx.wave.reduce.usum.v3i32(<3 x i32>) 143*4446a984SAdam Yangdeclare <4 x i64> @llvm.dx.wave.reduce.usum.v4f64(<4 x i64>) 144