1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=slp-vectorizer -mtriple=x86_64-- -S | FileCheck %s --check-prefixes=CHECK,SSE 3; RUN: opt < %s -passes=slp-vectorizer -mtriple=x86_64-- -mattr=avx512vl -S | FileCheck %s --check-prefixes=CHECK,AVX 4 5declare void @use1(i1) 6 7define i1 @logical_and_icmp(<4 x i32> %x) { 8; CHECK-LABEL: @logical_and_icmp( 9; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], zeroinitializer 10; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]] 11; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]]) 12; CHECK-NEXT: ret i1 [[TMP3]] 13; 14 %x0 = extractelement <4 x i32> %x, i32 0 15 %x1 = extractelement <4 x i32> %x, i32 1 16 %x2 = extractelement <4 x i32> %x, i32 2 17 %x3 = extractelement <4 x i32> %x, i32 3 18 %c0 = icmp slt i32 %x0, 0 19 %c1 = icmp slt i32 %x1, 0 20 %c2 = icmp slt i32 %x2, 0 21 %c3 = icmp slt i32 %x3, 0 22 %s1 = select i1 %c0, i1 %c1, i1 false 23 %s2 = select i1 %s1, i1 %c2, i1 false 24 %s3 = select i1 %s2, i1 %c3, i1 false 25 ret i1 %s3 26} 27 28define i1 @logical_or_icmp(<4 x i32> %x, <4 x i32> %y) { 29; CHECK-LABEL: @logical_or_icmp( 30; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[Y:%.*]] 31; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]] 32; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]]) 33; CHECK-NEXT: ret i1 [[TMP3]] 34; 35 %x0 = extractelement <4 x i32> %x, i32 0 36 %x1 = extractelement <4 x i32> %x, i32 1 37 %x2 = extractelement <4 x i32> %x, i32 2 38 %x3 = extractelement <4 x i32> %x, i32 3 39 %y0 = extractelement <4 x i32> %y, i32 0 40 %y1 = extractelement <4 x i32> %y, i32 1 41 %y2 = extractelement <4 x i32> %y, i32 2 42 %y3 = extractelement <4 x i32> %y, i32 3 43 %c0 = icmp slt i32 %x0, %y0 44 %c1 = icmp slt i32 %x1, %y1 45 %c2 = icmp slt i32 %x2, %y2 46 %c3 = icmp slt i32 %x3, %y3 47 %s1 = select i1 %c0, i1 true, i1 %c1 48 %s2 = select i1 %s1, i1 true, i1 %c2 49 %s3 = select i1 %s2, i1 true, i1 %c3 50 ret i1 %s3 51} 52 53define i1 @logical_and_fcmp(<4 x float> %x) { 54; CHECK-LABEL: @logical_and_fcmp( 55; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt <4 x float> [[X:%.*]], zeroinitializer 56; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]] 57; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]]) 58; CHECK-NEXT: ret i1 [[TMP3]] 59; 60 %x0 = extractelement <4 x float> %x, i32 0 61 %x1 = extractelement <4 x float> %x, i32 1 62 %x2 = extractelement <4 x float> %x, i32 2 63 %x3 = extractelement <4 x float> %x, i32 3 64 %c0 = fcmp olt float %x0, 0.0 65 %c1 = fcmp olt float %x1, 0.0 66 %c2 = fcmp olt float %x2, 0.0 67 %c3 = fcmp olt float %x3, 0.0 68 %s1 = select i1 %c0, i1 %c1, i1 false 69 %s2 = select i1 %s1, i1 %c2, i1 false 70 %s3 = select i1 %s2, i1 %c3, i1 false 71 ret i1 %s3 72} 73 74define i1 @logical_or_fcmp(<4 x float> %x) { 75; CHECK-LABEL: @logical_or_fcmp( 76; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt <4 x float> [[X:%.*]], zeroinitializer 77; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]] 78; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]]) 79; CHECK-NEXT: ret i1 [[TMP3]] 80; 81 %x0 = extractelement <4 x float> %x, i32 0 82 %x1 = extractelement <4 x float> %x, i32 1 83 %x2 = extractelement <4 x float> %x, i32 2 84 %x3 = extractelement <4 x float> %x, i32 3 85 %c0 = fcmp olt float %x0, 0.0 86 %c1 = fcmp olt float %x1, 0.0 87 %c2 = fcmp olt float %x2, 0.0 88 %c3 = fcmp olt float %x3, 0.0 89 %s1 = select i1 %c0, i1 true, i1 %c1 90 %s2 = select i1 %s1, i1 true, i1 %c2 91 %s3 = select i1 %s2, i1 true, i1 %c3 92 ret i1 %s3 93} 94 95define i1 @logical_and_icmp_diff_preds(<4 x i32> %x) { 96; SSE-LABEL: @logical_and_icmp_diff_preds( 97; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, <4 x i32> <i32 1, i32 3, i32 6, i32 0> 98; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, <4 x i32> <i32 4, i32 5, i32 2, i32 7> 99; SSE-NEXT: [[TMP3:%.*]] = icmp slt <4 x i32> [[TMP1]], [[TMP2]] 100; SSE-NEXT: [[TMP4:%.*]] = icmp ult <4 x i32> [[TMP1]], [[TMP2]] 101; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 7> 102; SSE-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP5]] 103; SSE-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP6]]) 104; SSE-NEXT: ret i1 [[TMP7]] 105; 106; AVX-LABEL: @logical_and_icmp_diff_preds( 107; AVX-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0 108; AVX-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1 109; AVX-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2 110; AVX-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3 111; AVX-NEXT: [[C0:%.*]] = icmp ult i32 [[X0]], 0 112; AVX-NEXT: [[C1:%.*]] = icmp slt i32 [[X1]], 0 113; AVX-NEXT: [[C2:%.*]] = icmp sgt i32 [[X2]], 0 114; AVX-NEXT: [[C3:%.*]] = icmp slt i32 [[X3]], 0 115; AVX-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false 116; AVX-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false 117; AVX-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[C3]], i1 false 118; AVX-NEXT: ret i1 [[S3]] 119; 120 %x0 = extractelement <4 x i32> %x, i32 0 121 %x1 = extractelement <4 x i32> %x, i32 1 122 %x2 = extractelement <4 x i32> %x, i32 2 123 %x3 = extractelement <4 x i32> %x, i32 3 124 %c0 = icmp ult i32 %x0, 0 125 %c1 = icmp slt i32 %x1, 0 126 %c2 = icmp sgt i32 %x2, 0 127 %c3 = icmp slt i32 %x3, 0 128 %s1 = select i1 %c0, i1 %c1, i1 false 129 %s2 = select i1 %s1, i1 %c2, i1 false 130 %s3 = select i1 %s2, i1 %c3, i1 false 131 ret i1 %s3 132} 133 134define i1 @logical_and_icmp_diff_const(<4 x i32> %x) { 135; CHECK-LABEL: @logical_and_icmp_diff_const( 136; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], <i32 0, i32 1, i32 2, i32 3> 137; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]] 138; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]]) 139; CHECK-NEXT: ret i1 [[TMP3]] 140; 141 %x0 = extractelement <4 x i32> %x, i32 0 142 %x1 = extractelement <4 x i32> %x, i32 1 143 %x2 = extractelement <4 x i32> %x, i32 2 144 %x3 = extractelement <4 x i32> %x, i32 3 145 %c0 = icmp sgt i32 %x0, 0 146 %c1 = icmp sgt i32 %x1, 1 147 %c2 = icmp sgt i32 %x2, 2 148 %c3 = icmp sgt i32 %x3, 3 149 %s1 = select i1 %c0, i1 %c1, i1 false 150 %s2 = select i1 %s1, i1 %c2, i1 false 151 %s3 = select i1 %s2, i1 %c3, i1 false 152 ret i1 %s3 153} 154 155define i1 @mixed_logical_icmp(<4 x i32> %x) { 156; CHECK-LABEL: @mixed_logical_icmp( 157; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], zeroinitializer 158; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 159; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 160; CHECK-NEXT: [[S1:%.*]] = select i1 [[TMP2]], i1 [[TMP3]], i1 false 161; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 162; CHECK-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 true, i1 [[TMP4]] 163; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 164; CHECK-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[TMP5]], i1 false 165; CHECK-NEXT: ret i1 [[S3]] 166; 167 %x0 = extractelement <4 x i32> %x, i32 0 168 %x1 = extractelement <4 x i32> %x, i32 1 169 %x2 = extractelement <4 x i32> %x, i32 2 170 %x3 = extractelement <4 x i32> %x, i32 3 171 %c0 = icmp sgt i32 %x0, 0 172 %c1 = icmp sgt i32 %x1, 0 173 %c2 = icmp sgt i32 %x2, 0 174 %c3 = icmp sgt i32 %x3, 0 175 %s1 = select i1 %c0, i1 %c1, i1 false 176 %s2 = select i1 %s1, i1 true, i1 %c2 177 %s3 = select i1 %s2, i1 %c3, i1 false 178 ret i1 %s3 179} 180 181define i1 @logical_and_icmp_subvec(<4 x i32> %x) { 182; CHECK-LABEL: @logical_and_icmp_subvec( 183; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2 184; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <2 x i32> <i32 0, i32 1> 185; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <2 x i32> [[TMP1]], zeroinitializer 186; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[X2]], 0 187; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 188; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1 189; CHECK-NEXT: [[S1:%.*]] = select i1 [[TMP3]], i1 [[TMP4]], i1 false 190; CHECK-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false 191; CHECK-NEXT: ret i1 [[S2]] 192; 193 %x0 = extractelement <4 x i32> %x, i32 0 194 %x1 = extractelement <4 x i32> %x, i32 1 195 %x2 = extractelement <4 x i32> %x, i32 2 196 %c0 = icmp slt i32 %x0, 0 197 %c1 = icmp slt i32 %x1, 0 198 %c2 = icmp slt i32 %x2, 0 199 %s1 = select i1 %c0, i1 %c1, i1 false 200 %s2 = select i1 %s1, i1 %c2, i1 false 201 ret i1 %s2 202} 203 204; TODO: This is better than all-scalar and still safe, 205; but we want this to be 2 reductions with glue 206; logic...or a wide reduction? 207 208define i1 @logical_and_icmp_clamp(<4 x i32> %x) { 209; CHECK-LABEL: @logical_and_icmp_clamp( 210; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 211; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42> 212; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42> 213; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15> 214; CHECK-NEXT: [[TMP5:%.*]] = freeze <8 x i1> [[TMP4]] 215; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP5]]) 216; CHECK-NEXT: ret i1 [[TMP6]] 217; 218 %x0 = extractelement <4 x i32> %x, i32 0 219 %x1 = extractelement <4 x i32> %x, i32 1 220 %x2 = extractelement <4 x i32> %x, i32 2 221 %x3 = extractelement <4 x i32> %x, i32 3 222 %c0 = icmp slt i32 %x0, 42 223 %c1 = icmp slt i32 %x1, 42 224 %c2 = icmp slt i32 %x2, 42 225 %c3 = icmp slt i32 %x3, 42 226 %d0 = icmp sgt i32 %x0, 17 227 %d1 = icmp sgt i32 %x1, 17 228 %d2 = icmp sgt i32 %x2, 17 229 %d3 = icmp sgt i32 %x3, 17 230 %s1 = select i1 %c0, i1 %c1, i1 false 231 %s2 = select i1 %s1, i1 %c2, i1 false 232 %s3 = select i1 %s2, i1 %c3, i1 false 233 %s4 = select i1 %s3, i1 %d0, i1 false 234 %s5 = select i1 %s4, i1 %d1, i1 false 235 %s6 = select i1 %s5, i1 %d2, i1 false 236 %s7 = select i1 %s6, i1 %d3, i1 false 237 ret i1 %s7 238} 239 240define i1 @logical_and_icmp_clamp_extra_use_cmp(<4 x i32> %x) { 241; CHECK-LABEL: @logical_and_icmp_clamp_extra_use_cmp( 242; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 243; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42> 244; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42> 245; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15> 246; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i32 6 247; CHECK-NEXT: call void @use1(i1 [[TMP5]]) 248; CHECK-NEXT: [[TMP6:%.*]] = freeze <8 x i1> [[TMP4]] 249; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP6]]) 250; CHECK-NEXT: ret i1 [[TMP7]] 251; 252 %x0 = extractelement <4 x i32> %x, i32 0 253 %x1 = extractelement <4 x i32> %x, i32 1 254 %x2 = extractelement <4 x i32> %x, i32 2 255 %x3 = extractelement <4 x i32> %x, i32 3 256 %c0 = icmp slt i32 %x0, 42 257 %c1 = icmp slt i32 %x1, 42 258 %c2 = icmp slt i32 %x2, 42 259 call void @use1(i1 %c2) 260 %c3 = icmp slt i32 %x3, 42 261 %d0 = icmp sgt i32 %x0, 17 262 %d1 = icmp sgt i32 %x1, 17 263 %d2 = icmp sgt i32 %x2, 17 264 %d3 = icmp sgt i32 %x3, 17 265 %s1 = select i1 %c0, i1 %c1, i1 false 266 %s2 = select i1 %s1, i1 %c2, i1 false 267 %s3 = select i1 %s2, i1 %c3, i1 false 268 %s4 = select i1 %s3, i1 %d0, i1 false 269 %s5 = select i1 %s4, i1 %d1, i1 false 270 %s6 = select i1 %s5, i1 %d2, i1 false 271 %s7 = select i1 %s6, i1 %d3, i1 false 272 ret i1 %s7 273} 274 275define i1 @logical_and_icmp_clamp_extra_use_select(<4 x i32> %x) { 276; CHECK-LABEL: @logical_and_icmp_clamp_extra_use_select( 277; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], splat (i32 42) 278; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[X]], splat (i32 17) 279; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 280; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 281; CHECK-NEXT: [[S1:%.*]] = select i1 [[TMP3]], i1 [[TMP4]], i1 false 282; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 283; CHECK-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[TMP5]], i1 false 284; CHECK-NEXT: call void @use1(i1 [[S2]]) 285; CHECK-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP2]] 286; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP6]]) 287; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 288; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP7]], i1 [[TMP8]], i1 false 289; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[S2]], i1 [[OP_RDX]], i1 false 290; CHECK-NEXT: ret i1 [[OP_RDX1]] 291; 292 %x0 = extractelement <4 x i32> %x, i32 0 293 %x1 = extractelement <4 x i32> %x, i32 1 294 %x2 = extractelement <4 x i32> %x, i32 2 295 %x3 = extractelement <4 x i32> %x, i32 3 296 %c0 = icmp slt i32 %x0, 42 297 %c1 = icmp slt i32 %x1, 42 298 %c2 = icmp slt i32 %x2, 42 299 %c3 = icmp slt i32 %x3, 42 300 %d0 = icmp sgt i32 %x0, 17 301 %d1 = icmp sgt i32 %x1, 17 302 %d2 = icmp sgt i32 %x2, 17 303 %d3 = icmp sgt i32 %x3, 17 304 %s1 = select i1 %c0, i1 %c1, i1 false 305 %s2 = select i1 %s1, i1 %c2, i1 false 306 call void @use1(i1 %s2) 307 %s3 = select i1 %s2, i1 %c3, i1 false 308 %s4 = select i1 %s3, i1 %d0, i1 false 309 %s5 = select i1 %s4, i1 %d1, i1 false 310 %s6 = select i1 %s5, i1 %d2, i1 false 311 %s7 = select i1 %s6, i1 %d3, i1 false 312 ret i1 %s7 313} 314 315define i1 @logical_and_icmp_clamp_v8i32(<8 x i32> %x, <8 x i32> %y) { 316; CHECK-LABEL: @logical_and_icmp_clamp_v8i32( 317; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[X:%.*]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 318; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[Y:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 319; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> <i32 42, i32 42, i32 42, i32 42, i32 poison, i32 poison, i32 poison, i32 poison>, <4 x i32> [[TMP2]], i64 4) 320; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <8 x i32> [[TMP1]], [[TMP3]] 321; CHECK-NEXT: [[TMP5:%.*]] = freeze <8 x i1> [[TMP4]] 322; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP5]]) 323; CHECK-NEXT: ret i1 [[TMP6]] 324; 325 %x0 = extractelement <8 x i32> %x, i32 0 326 %x1 = extractelement <8 x i32> %x, i32 1 327 %x2 = extractelement <8 x i32> %x, i32 2 328 %x3 = extractelement <8 x i32> %x, i32 3 329 %y0 = extractelement <8 x i32> %y, i32 0 330 %y1 = extractelement <8 x i32> %y, i32 1 331 %y2 = extractelement <8 x i32> %y, i32 2 332 %y3 = extractelement <8 x i32> %y, i32 3 333 %c0 = icmp slt i32 %x0, 42 334 %c1 = icmp slt i32 %x1, 42 335 %c2 = icmp slt i32 %x2, 42 336 %c3 = icmp slt i32 %x3, 42 337 %d0 = icmp slt i32 %x0, %y0 338 %d1 = icmp slt i32 %x1, %y1 339 %d2 = icmp slt i32 %x2, %y2 340 %d3 = icmp slt i32 %x3, %y3 341 %s1 = select i1 %c0, i1 %c1, i1 false 342 %s2 = select i1 %s1, i1 %c2, i1 false 343 %s3 = select i1 %s2, i1 %c3, i1 false 344 %s4 = select i1 %s3, i1 %d0, i1 false 345 %s5 = select i1 %s4, i1 %d1, i1 false 346 %s6 = select i1 %s5, i1 %d2, i1 false 347 %s7 = select i1 %s6, i1 %d3, i1 false 348 ret i1 %s7 349} 350 351define i1 @logical_and_icmp_clamp_partial(<4 x i32> %x) { 352; CHECK-LABEL: @logical_and_icmp_clamp_partial( 353; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2 354; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <2 x i32> <i32 0, i32 1> 355; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <2 x i32> [[TMP2]], splat (i32 42) 356; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[TMP1]], 42 357; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[X]], splat (i32 17) 358; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]] 359; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP5]]) 360; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 361; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP7]], i1 [[TMP6]], i1 false 362; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 363; CHECK-NEXT: [[TMP9:%.*]] = freeze i1 [[TMP8]] 364; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[TMP9]], i1 [[C2]], i1 false 365; CHECK-NEXT: [[TMP10:%.*]] = freeze i1 [[OP_RDX]] 366; CHECK-NEXT: [[OP_RDX2:%.*]] = select i1 [[TMP10]], i1 [[OP_RDX1]], i1 false 367; CHECK-NEXT: ret i1 [[OP_RDX2]] 368; 369 %x0 = extractelement <4 x i32> %x, i32 0 370 %x1 = extractelement <4 x i32> %x, i32 1 371 %x2 = extractelement <4 x i32> %x, i32 2 372 %x3 = extractelement <4 x i32> %x, i32 3 373 %c0 = icmp slt i32 %x0, 42 374 %c1 = icmp slt i32 %x1, 42 375 %c2 = icmp slt i32 %x2, 42 376 ; remove an element from the previous test 377 %d0 = icmp sgt i32 %x0, 17 378 %d1 = icmp sgt i32 %x1, 17 379 %d2 = icmp sgt i32 %x2, 17 380 %d3 = icmp sgt i32 %x3, 17 381 %s1 = select i1 %c0, i1 %c1, i1 false 382 %s2 = select i1 %s1, i1 %c2, i1 false 383 ; remove an element from the previous test 384 %s4 = select i1 %s2, i1 %d0, i1 false 385 %s5 = select i1 %s4, i1 %d1, i1 false 386 %s6 = select i1 %s5, i1 %d2, i1 false 387 %s7 = select i1 %s6, i1 %d3, i1 false 388 ret i1 %s7 389} 390 391define i1 @logical_and_icmp_clamp_pred_diff(<4 x i32> %x) { 392; CHECK-LABEL: @logical_and_icmp_clamp_pred_diff( 393; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 394; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 42, i32 42, i32 42, i32 poison>, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 12, i32 13, i32 14, i32 3> 395; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP2]], <4 x i32> [[X]], i64 0) 396; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> <i32 17, i32 17, i32 17, i32 17, i32 poison, i32 poison, i32 poison, i32 42>, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 15> 397; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <8 x i32> [[TMP3]], [[TMP4]] 398; CHECK-NEXT: [[TMP6:%.*]] = icmp ult <8 x i32> [[TMP3]], [[TMP4]] 399; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i1> [[TMP5]], <8 x i1> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15> 400; CHECK-NEXT: [[TMP8:%.*]] = freeze <8 x i1> [[TMP7]] 401; CHECK-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP8]]) 402; CHECK-NEXT: ret i1 [[TMP9]] 403; 404 %x0 = extractelement <4 x i32> %x, i32 0 405 %x1 = extractelement <4 x i32> %x, i32 1 406 %x2 = extractelement <4 x i32> %x, i32 2 407 %x3 = extractelement <4 x i32> %x, i32 3 408 %c0 = icmp slt i32 %x0, 42 409 %c1 = icmp slt i32 %x1, 42 410 %c2 = icmp slt i32 %x2, 42 411 %c3 = icmp ult i32 %x3, 42 ; predicate changed 412 %d0 = icmp sgt i32 %x0, 17 413 %d1 = icmp sgt i32 %x1, 17 414 %d2 = icmp sgt i32 %x2, 17 415 %d3 = icmp sgt i32 %x3, 17 416 %s1 = select i1 %c0, i1 %c1, i1 false 417 %s2 = select i1 %s1, i1 %c2, i1 false 418 %s3 = select i1 %s2, i1 %c3, i1 false 419 %s4 = select i1 %s3, i1 %d0, i1 false 420 %s5 = select i1 %s4, i1 %d1, i1 false 421 %s6 = select i1 %s5, i1 %d2, i1 false 422 %s7 = select i1 %s6, i1 %d3, i1 false 423 ret i1 %s7 424} 425 426define i1 @logical_and_icmp_extra_op(<4 x i32> %x, <4 x i32> %y, i1 %c) { 427; CHECK-LABEL: @logical_and_icmp_extra_op( 428; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[Y:%.*]] 429; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]] 430; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]]) 431; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[C:%.*]], i1 [[TMP3]], i1 false 432; CHECK-NEXT: ret i1 [[OP_RDX]] 433; 434 %x0 = extractelement <4 x i32> %x, i32 0 435 %x1 = extractelement <4 x i32> %x, i32 1 436 %x2 = extractelement <4 x i32> %x, i32 2 437 %x3 = extractelement <4 x i32> %x, i32 3 438 %y0 = extractelement <4 x i32> %y, i32 0 439 %y1 = extractelement <4 x i32> %y, i32 1 440 %y2 = extractelement <4 x i32> %y, i32 2 441 %y3 = extractelement <4 x i32> %y, i32 3 442 %d0 = icmp slt i32 %x0, %y0 443 %d1 = icmp slt i32 %x1, %y1 444 %d2 = icmp slt i32 %x2, %y2 445 %d3 = icmp slt i32 %x3, %y3 446 %s3 = select i1 %c, i1 %c, i1 false 447 %s4 = select i1 %s3, i1 %d0, i1 false 448 %s5 = select i1 %s4, i1 %d1, i1 false 449 %s6 = select i1 %s5, i1 %d2, i1 false 450 %s7 = select i1 %s6, i1 %d3, i1 false 451 ret i1 %s7 452} 453 454define i1 @logical_or_icmp_extra_op(<4 x i32> %x, <4 x i32> %y, i1 %c) { 455; CHECK-LABEL: @logical_or_icmp_extra_op( 456; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[Y:%.*]] 457; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]] 458; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]]) 459; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[C:%.*]], i1 true, i1 [[TMP3]] 460; CHECK-NEXT: ret i1 [[OP_RDX]] 461; 462 %x0 = extractelement <4 x i32> %x, i32 0 463 %x1 = extractelement <4 x i32> %x, i32 1 464 %x2 = extractelement <4 x i32> %x, i32 2 465 %x3 = extractelement <4 x i32> %x, i32 3 466 %y0 = extractelement <4 x i32> %y, i32 0 467 %y1 = extractelement <4 x i32> %y, i32 1 468 %y2 = extractelement <4 x i32> %y, i32 2 469 %y3 = extractelement <4 x i32> %y, i32 3 470 %d0 = icmp slt i32 %x0, %y0 471 %d1 = icmp slt i32 %x1, %y1 472 %d2 = icmp slt i32 %x2, %y2 473 %d3 = icmp slt i32 %x3, %y3 474 %s3 = select i1 %c, i1 true, i1 %c 475 %s4 = select i1 %s3, i1 true, i1 %d0 476 %s5 = select i1 %s4, i1 true, i1 %d1 477 %s6 = select i1 %s5, i1 true, i1 %d2 478 %s7 = select i1 %s6, i1 true, i1 %d3 479 ret i1 %s7 480} 481 482define i1 @logical_and_icmp_extra_args(<4 x i32> %x, i1 %c0, i1 %c1, i1 %c2) { 483; CHECK-LABEL: @logical_and_icmp_extra_args( 484; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], splat (i32 17) 485; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]] 486; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]]) 487; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP3]], i1 [[C0:%.*]], i1 false 488; CHECK-NEXT: [[TMP4:%.*]] = freeze i1 [[C1:%.*]] 489; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[TMP4]], i1 [[C2:%.*]], i1 false 490; CHECK-NEXT: [[TMP5:%.*]] = freeze i1 [[OP_RDX]] 491; CHECK-NEXT: [[OP_RDX2:%.*]] = select i1 [[TMP5]], i1 [[OP_RDX1]], i1 false 492; CHECK-NEXT: ret i1 [[OP_RDX2]] 493; 494 %x0 = extractelement <4 x i32> %x, i32 0 495 %x1 = extractelement <4 x i32> %x, i32 1 496 %x2 = extractelement <4 x i32> %x, i32 2 497 %x3 = extractelement <4 x i32> %x, i32 3 498 %d0 = icmp sgt i32 %x0, 17 499 %d1 = icmp sgt i32 %x1, 17 500 %d2 = icmp sgt i32 %x2, 17 501 %d3 = icmp sgt i32 %x3, 17 502 %s1 = select i1 %d0, i1 %c0, i1 false ; <- d0, d1, d2, d3 gets reduced. 503 %s2 = select i1 %s1, i1 %c1, i1 false ; <- c0, c1, c2 remain scalar. 504 %s3 = select i1 %s2, i1 %c2, i1 false 505 %s5 = select i1 %s3, i1 %d1, i1 false 506 %s6 = select i1 %s5, i1 %d2, i1 false 507 %s7 = select i1 %s6, i1 %d3, i1 false 508 ret i1 %s7 509} 510 511