1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=instcombine -S | FileCheck %s 3 4define i32 @reduce_add(<4 x i32> %x) { 5; CHECK-LABEL: @reduce_add( 6; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[X:%.*]]) 7; CHECK-NEXT: ret i32 [[RES]] 8; 9 %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 10 %res = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %shuf) 11 ret i32 %res 12} 13 14define i32 @reduce_or(<4 x i32> %x) { 15; CHECK-LABEL: @reduce_or( 16; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[X:%.*]]) 17; CHECK-NEXT: ret i32 [[RES]] 18; 19 %shuf = shufflevector <4 x i32> poison, <4 x i32> %x, <4 x i32> <i32 7, i32 6, i32 5, i32 4> 20 %res = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %shuf) 21 ret i32 %res 22} 23 24define i32 @reduce_and(<4 x i32> %x) { 25; CHECK-LABEL: @reduce_and( 26; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[X:%.*]]) 27; CHECK-NEXT: ret i32 [[RES]] 28; 29 %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 30 %res = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %shuf) 31 ret i32 %res 32} 33 34define i32 @reduce_xor(<4 x i32> %x) { 35; CHECK-LABEL: @reduce_xor( 36; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[X:%.*]]) 37; CHECK-NEXT: ret i32 [[RES]] 38; 39 %shuf = shufflevector <4 x i32> poison, <4 x i32> %x, <4 x i32> <i32 5, i32 6, i32 7, i32 4> 40 %res = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %shuf) 41 ret i32 %res 42} 43 44define i32 @reduce_umax(<4 x i32> %x) { 45; CHECK-LABEL: @reduce_umax( 46; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[X:%.*]]) 47; CHECK-NEXT: ret i32 [[RES]] 48; 49 %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 2, i32 1, i32 3, i32 0> 50 %res = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %shuf) 51 ret i32 %res 52} 53 54define i32 @reduce_umin(<4 x i32> %x) { 55; CHECK-LABEL: @reduce_umin( 56; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[X:%.*]]) 57; CHECK-NEXT: ret i32 [[RES]] 58; 59 %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1> 60 %res = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %shuf) 61 ret i32 %res 62} 63 64define i32 @reduce_smax(<4 x i32> %x) { 65; CHECK-LABEL: @reduce_smax( 66; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[X:%.*]]) 67; CHECK-NEXT: ret i32 [[RES]] 68; 69 %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1> 70 %res = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %shuf) 71 ret i32 %res 72} 73 74define i32 @reduce_smin(<4 x i32> %x) { 75; CHECK-LABEL: @reduce_smin( 76; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[X:%.*]]) 77; CHECK-NEXT: ret i32 [[RES]] 78; 79 %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2> 80 %res = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %shuf) 81 ret i32 %res 82} 83 84define float @reduce_fmax(<4 x float> %x) { 85; CHECK-LABEL: @reduce_fmax( 86; CHECK-NEXT: [[RES:%.*]] = call nnan nsz float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[X:%.*]]) 87; CHECK-NEXT: ret float [[RES]] 88; 89 %shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1> 90 %res = call nsz nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> %shuf) 91 ret float %res 92} 93 94define float @reduce_fmin(<4 x float> %x) { 95; CHECK-LABEL: @reduce_fmin( 96; CHECK-NEXT: [[RES:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[X:%.*]]) 97; CHECK-NEXT: ret float [[RES]] 98; 99 %shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2> 100 %res = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %shuf) 101 ret float %res 102} 103 104define float @reduce_fadd(float %a, <4 x float> %x) { 105; CHECK-LABEL: @reduce_fadd( 106; CHECK-NEXT: [[RES:%.*]] = call reassoc float @llvm.vector.reduce.fadd.v4f32(float [[A:%.*]], <4 x float> [[X:%.*]]) 107; CHECK-NEXT: ret float [[RES]] 108; 109 %shuf = shufflevector <4 x float> %x, <4 x float> %x, <4 x i32> <i32 0, i32 3, i32 1, i32 2> 110 %res = call reassoc float @llvm.vector.reduce.fadd.v4f32(float %a, <4 x float> %shuf) 111 ret float %res 112} 113 114define float @reduce_fmul(float %a, <4 x float> %x) { 115; CHECK-LABEL: @reduce_fmul( 116; CHECK-NEXT: [[RES:%.*]] = call reassoc float @llvm.vector.reduce.fmul.v4f32(float [[A:%.*]], <4 x float> [[X:%.*]]) 117; CHECK-NEXT: ret float [[RES]] 118; 119 %shuf = shufflevector <4 x float> %x, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 3, i32 1, i32 2> 120 %res = call reassoc float @llvm.vector.reduce.fmul.v4f32(float %a, <4 x float> %shuf) 121 ret float %res 122} 123 124; Failed cases 125; TODO: simplify the reductions for shuffles resulting in undef/poison elements. 126 127define i32 @reduce_add_failed(<4 x i32> %x) { 128; CHECK-LABEL: @reduce_add_failed( 129; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 0> 130; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[SHUF]]) 131; CHECK-NEXT: ret i32 [[RES]] 132; 133 %shuf = shufflevector <4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 0, i32 1, i32 2, i32 4> 134 %res = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %shuf) 135 ret i32 %res 136} 137 138define i32 @reduce_or_failed(<4 x i32> %x) { 139; CHECK-LABEL: @reduce_or_failed( 140; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>, <4 x i32> <i32 3, i32 2, i32 1, i32 4> 141; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[SHUF]]) 142; CHECK-NEXT: ret i32 [[RES]] 143; 144 %shuf = shufflevector <4 x i32> %x, <4 x i32> zeroinitializer, <4 x i32> <i32 3, i32 2, i32 1, i32 4> 145 %res = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %shuf) 146 ret i32 %res 147} 148 149define i32 @reduce_and_failed(<4 x i32> %x) { 150; CHECK-LABEL: @reduce_and_failed( 151; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 0> 152; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[SHUF]]) 153; CHECK-NEXT: ret i32 [[RES]] 154; 155 %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 0> 156 %res = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %shuf) 157 ret i32 %res 158} 159 160define i32 @reduce_xor_failed(<4 x i32> %x) { 161; CHECK-LABEL: @reduce_xor_failed( 162; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 poison> 163; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[SHUF]]) 164; CHECK-NEXT: ret i32 [[RES]] 165; 166 %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 undef> 167 %res = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %shuf) 168 ret i32 %res 169} 170 171define i32 @reduce_umax_failed(<2 x i32> %x, <2 x i32> %y) { 172; CHECK-LABEL: @reduce_umax_failed( 173; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <4 x i32> <i32 2, i32 1, i32 3, i32 0> 174; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[SHUF]]) 175; CHECK-NEXT: ret i32 [[RES]] 176; 177 %shuf = shufflevector <2 x i32> %x, <2 x i32> %y, <4 x i32> <i32 2, i32 1, i32 3, i32 0> 178 %res = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %shuf) 179 ret i32 %res 180} 181 182define i32 @reduce_umin_failed(<2 x i32> %x) { 183; CHECK-LABEL: @reduce_umin_failed( 184; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 1> 185; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[SHUF]]) 186; CHECK-NEXT: ret i32 [[RES]] 187; 188 %shuf = shufflevector <2 x i32> %x, <2 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1> 189 %res = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %shuf) 190 ret i32 %res 191} 192 193define i32 @reduce_smax_failed(<8 x i32> %x) { 194; CHECK-LABEL: @reduce_smax_failed( 195; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <8 x i32> [[X:%.*]], <8 x i32> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1> 196; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[SHUF]]) 197; CHECK-NEXT: ret i32 [[RES]] 198; 199 %shuf = shufflevector <8 x i32> %x, <8 x i32> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1> 200 %res = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %shuf) 201 ret i32 %res 202} 203 204define i32 @reduce_smin_failed(<8 x i32> %x) { 205; CHECK-LABEL: @reduce_smin_failed( 206; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <8 x i32> [[X:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2> 207; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[SHUF]]) 208; CHECK-NEXT: ret i32 [[RES]] 209; 210 %shuf = shufflevector <8 x i32> %x, <8 x i32> %x, <4 x i32> <i32 0, i32 3, i32 1, i32 2> 211 %res = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %shuf) 212 ret i32 %res 213} 214 215define float @reduce_fmax_failed(<4 x float> %x) { 216; CHECK-LABEL: @reduce_fmax_failed( 217; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 2, i32 2, i32 3, i32 1> 218; CHECK-NEXT: [[RES:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[SHUF]]) 219; CHECK-NEXT: ret float [[RES]] 220; 221 %shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> <i32 2, i32 2, i32 3, i32 1> 222 %res = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %shuf) 223 ret float %res 224} 225 226define float @reduce_fmin_failed(<4 x float> %x) { 227; CHECK-LABEL: @reduce_fmin_failed( 228; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 3, i32 1, i32 2> 229; CHECK-NEXT: [[RES:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[SHUF]]) 230; CHECK-NEXT: ret float [[RES]] 231; 232 %shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> <i32 poison, i32 3, i32 1, i32 2> 233 %res = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %shuf) 234 ret float %res 235} 236 237define float @reduce_fadd_failed(float %a, <4 x float> %x) { 238; CHECK-LABEL: @reduce_fadd_failed( 239; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2> 240; CHECK-NEXT: [[RES:%.*]] = call float @llvm.vector.reduce.fadd.v4f32(float [[A:%.*]], <4 x float> [[SHUF]]) 241; CHECK-NEXT: ret float [[RES]] 242; 243 %shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2> 244 %res = call float @llvm.vector.reduce.fadd.v4f32(float %a, <4 x float> %shuf) 245 ret float %res 246} 247 248define float @reduce_fmul_failed(float %a, <2 x float> %x) { 249; CHECK-LABEL: @reduce_fmul_failed( 250; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <4 x i32> <i32 0, i32 poison, i32 1, i32 poison> 251; CHECK-NEXT: [[RES:%.*]] = call float @llvm.vector.reduce.fmul.v4f32(float [[A:%.*]], <4 x float> [[SHUF]]) 252; CHECK-NEXT: ret float [[RES]] 253; 254 %shuf = shufflevector <2 x float> %x, <2 x float> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2> 255 %res = call float @llvm.vector.reduce.fmul.v4f32(float %a, <4 x float> %shuf) 256 ret float %res 257} 258 259declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a) 260declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %a) 261declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %a) 262declare i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %a) 263declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %a) 264declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %a) 265declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a) 266declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %a) 267declare float @llvm.vector.reduce.fmax.v4f32(<4 x float> %a) 268declare float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a) 269declare float @llvm.vector.reduce.fadd.v4f32(float %a, <4 x float> %b) 270declare float @llvm.vector.reduce.fmul.v4f32(float %a, <4 x float> %b) 271