1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -passes=slp-vectorizer -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-100 %s | FileCheck %s 3 4define void @test1(ptr %a, ptr %b, ptr %c) { 5; CHECK-LABEL: @test1( 6; CHECK-NEXT: entry: 7; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A:%.*]], align 4 8; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr [[B:%.*]], align 4 9; CHECK-NEXT: [[TMP2:%.*]] = add <16 x i32> [[TMP1]], [[TMP0]] 10; CHECK-NEXT: store <16 x i32> [[TMP2]], ptr [[C:%.*]], align 4 11; CHECK-NEXT: ret void 12; 13entry: 14 %arrayidx3 = getelementptr inbounds i32, ptr %a, i64 4 15 %arrayidx7 = getelementptr inbounds i32, ptr %a, i64 8 16 %arrayidx11 = getelementptr inbounds i32, ptr %a, i64 12 17 %0 = load <4 x i32>, ptr %a, align 4 18 %1 = load <4 x i32>, ptr %arrayidx3, align 4 19 %2 = load <4 x i32>, ptr %arrayidx7, align 4 20 %3 = load <4 x i32>, ptr %arrayidx11, align 4 21 %arrayidx19 = getelementptr inbounds i32, ptr %b, i64 4 22 %arrayidx23 = getelementptr inbounds i32, ptr %b, i64 8 23 %arrayidx27 = getelementptr inbounds i32, ptr %b, i64 12 24 %4 = load <4 x i32>, ptr %b, align 4 25 %5 = load <4 x i32>, ptr %arrayidx19, align 4 26 %6 = load <4 x i32>, ptr %arrayidx23, align 4 27 %7 = load <4 x i32>, ptr %arrayidx27, align 4 28 %add.i = add <4 x i32> %4, %0 29 %add.i63 = add <4 x i32> %5, %1 30 %add.i64 = add <4 x i32> %6, %2 31 %add.i65 = add <4 x i32> %7, %3 32 %arrayidx36 = getelementptr inbounds i32, ptr %c, i64 4 33 %arrayidx39 = getelementptr inbounds i32, ptr %c, i64 8 34 %arrayidx42 = getelementptr inbounds i32, ptr %c, i64 12 35 store <4 x i32> %add.i, ptr %c, align 4 36 store <4 x i32> %add.i63, ptr %arrayidx36, align 4 37 store <4 x i32> %add.i64, ptr %arrayidx39, align 4 38 store <4 x i32> %add.i65, ptr %arrayidx42, align 4 39 ret void 40} 41 42define void @test2(ptr %in, ptr %out) { 43; CHECK-LABEL: @test2( 44; CHECK-NEXT: entry: 45; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i16>, ptr [[IN:%.*]], align 2 46; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> [[TMP0]], <16 x i16> [[TMP0]]) 47; CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[OUT:%.*]], align 2 48; CHECK-NEXT: ret void 49; 50entry: 51 %0 = getelementptr i16, ptr %in, i64 8 52 %1 = load <8 x i16>, ptr %in, align 2 53 %2 = load <8 x i16>, ptr %0, align 2 54 %3 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %1, <8 x i16> %1) 55 %4 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %2, <8 x i16> %2) 56 %5 = getelementptr i16, ptr %out, i64 8 57 store <8 x i16> %3, ptr %out, align 2 58 store <8 x i16> %4, ptr %5, align 2 59 ret void 60} 61 62define void @test3(ptr %x, ptr %y, ptr %z) { 63; CHECK-LABEL: @test3( 64; CHECK-NEXT: entry: 65; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x ptr> poison, ptr [[X:%.*]], i32 0 66; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> [[TMP0]], ptr [[Y:%.*]], i32 1 67; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x ptr> [[TMP1]], zeroinitializer 68; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[X]], align 4 69; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[Y]], align 4 70; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i1> [[TMP2]], <2 x i1> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1> 71; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP5]], <8 x i32> [[TMP3]], <8 x i32> [[TMP4]] 72; CHECK-NEXT: store <8 x i32> [[TMP6]], ptr [[Z:%.*]], align 4 73; CHECK-NEXT: ret void 74; 75entry: 76 %0 = getelementptr inbounds i32, ptr %x, i64 4 77 %1 = getelementptr inbounds i32, ptr %y, i64 4 78 %2 = load <4 x i32>, ptr %x, align 4 79 %3 = load <4 x i32>, ptr %0, align 4 80 %4 = load <4 x i32>, ptr %y, align 4 81 %5 = load <4 x i32>, ptr %1, align 4 82 %6 = icmp eq ptr %x, null 83 %7 = icmp eq ptr %y, null 84 %8 = select i1 %6, <4 x i32> %2, <4 x i32> %4 85 %9 = select i1 %7, <4 x i32> %3, <4 x i32> %5 86 %10 = getelementptr inbounds i32, ptr %z, i64 4 87 store <4 x i32> %8, ptr %z, align 4 88 store <4 x i32> %9, ptr %10, align 4 89 ret void 90} 91 92define void @test4(ptr %in, ptr %out) { 93; CHECK-LABEL: @test4( 94; CHECK-NEXT: entry: 95; CHECK-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[IN:%.*]], align 4 96; CHECK-NEXT: [[TMP2:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> [[TMP0]], i64 0) 97; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 98; CHECK-NEXT: [[TMP4:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> zeroinitializer, i64 0) 99; CHECK-NEXT: [[TMP5:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP4]], <8 x float> zeroinitializer, i64 8) 100; CHECK-NEXT: [[TMP6:%.*]] = fmul <16 x float> [[TMP3]], [[TMP5]] 101; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> zeroinitializer, i64 8) 102; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> [[TMP8]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 103; CHECK-NEXT: [[TMP10:%.*]] = fadd <16 x float> [[TMP9]], [[TMP6]] 104; CHECK-NEXT: [[TMP11:%.*]] = fcmp ogt <16 x float> [[TMP10]], [[TMP5]] 105; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i1, ptr [[OUT:%.*]], i64 8 106; CHECK-NEXT: [[TMP13:%.*]] = call <8 x i1> @llvm.vector.extract.v8i1.v16i1(<16 x i1> [[TMP11]], i64 8) 107; CHECK-NEXT: store <8 x i1> [[TMP13]], ptr [[OUT]], align 1 108; CHECK-NEXT: [[TMP14:%.*]] = call <8 x i1> @llvm.vector.extract.v8i1.v16i1(<16 x i1> [[TMP11]], i64 0) 109; CHECK-NEXT: store <8 x i1> [[TMP14]], ptr [[TMP12]], align 1 110; CHECK-NEXT: ret void 111; 112entry: 113 %0 = load <8 x float>, ptr %in, align 4 114 %1 = fmul <8 x float> %0, zeroinitializer 115 %2 = fmul <8 x float> %0, zeroinitializer 116 %3 = fadd <8 x float> zeroinitializer, %1 117 %4 = fadd <8 x float> %0, %2 118 %5 = fcmp ogt <8 x float> %3, zeroinitializer 119 %6 = fcmp ogt <8 x float> %4, zeroinitializer 120 %7 = getelementptr i1, ptr %out, i64 8 121 store <8 x i1> %5, ptr %out, align 1 122 store <8 x i1> %6, ptr %7, align 1 123 ret void 124} 125 126define void @test5(ptr %ptr0, ptr %ptr1) { 127; CHECK-LABEL: @test5( 128; CHECK-NEXT: entry: 129; CHECK-NEXT: [[GETELEMENTPTR0:%.*]] = getelementptr i8, ptr null, i64 0 130; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x ptr> <ptr null, ptr null, ptr undef, ptr undef>, ptr [[GETELEMENTPTR0]], i32 2 131; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x ptr> [[TMP0]], ptr null, i32 3 132; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <4 x ptr> zeroinitializer, [[TMP1]] 133; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr> <ptr poison, ptr null, ptr null, ptr null>, ptr [[PTR0:%.*]], i32 0 134; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x ptr> [[TMP1]], ptr [[PTR1:%.*]], i32 3 135; CHECK-NEXT: [[TMP5:%.*]] = icmp ult <4 x ptr> [[TMP3]], [[TMP4]] 136; CHECK-NEXT: ret void 137; 138entry: 139 %getelementptr0 = getelementptr i8, ptr null, i64 0 140 %0 = insertelement <4 x ptr> <ptr null, ptr null, ptr undef, ptr undef>, ptr %getelementptr0, i32 2 141 %1 = insertelement <4 x ptr> %0, ptr null, i32 3 142 %2 = icmp ult <4 x ptr> zeroinitializer, %1 143 %3 = insertelement <4 x ptr> <ptr poison, ptr null, ptr null, ptr null>, ptr %ptr0, i32 0 144 %4 = insertelement <4 x ptr> %1, ptr %ptr1, i32 3 145 %5 = icmp ult <4 x ptr> %3, %4 146 ret void 147} 148 149define <4 x i1> @test6(ptr %in1, ptr %in2) { 150; CHECK-LABEL: @test6( 151; CHECK-NEXT: entry: 152; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[IN1:%.*]], align 4 153; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[IN2:%.*]], align 2 154; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> poison, <4 x i32> [[TMP0]], i64 0) 155; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 156; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> poison, <4 x i32> zeroinitializer, i64 0) 157; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> [[TMP7]], <4 x i32> zeroinitializer, i64 4) 158; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> [[TMP8]], <4 x i32> zeroinitializer, i64 8) 159; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> [[TMP9]], <4 x i32> zeroinitializer, i64 12) 160; CHECK-NEXT: [[TMP11:%.*]] = icmp ugt <16 x i32> [[TMP6]], [[TMP10]] 161; CHECK-NEXT: [[TMP15:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> poison, <4 x i16> [[TMP1]], i64 0) 162; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <16 x i16> [[TMP15]], <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 163; CHECK-NEXT: [[TMP17:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> poison, <4 x i16> zeroinitializer, i64 0) 164; CHECK-NEXT: [[TMP18:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP17]], <4 x i16> zeroinitializer, i64 4) 165; CHECK-NEXT: [[TMP19:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP18]], <4 x i16> zeroinitializer, i64 8) 166; CHECK-NEXT: [[TMP20:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP19]], <4 x i16> zeroinitializer, i64 12) 167; CHECK-NEXT: [[TMP21:%.*]] = icmp eq <16 x i16> [[TMP16]], [[TMP20]] 168; CHECK-NEXT: [[TMP22:%.*]] = and <16 x i1> [[TMP11]], [[TMP21]] 169; CHECK-NEXT: [[TMP23:%.*]] = icmp ugt <16 x i32> [[TMP6]], [[TMP10]] 170; CHECK-NEXT: [[TMP24:%.*]] = and <16 x i1> [[TMP22]], [[TMP23]] 171; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <16 x i1> [[TMP24]], <16 x i1> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 172; CHECK-NEXT: [[TMP26:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP25]]) 173; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i1> poison, i1 [[TMP26]], i64 0 174; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <16 x i1> [[TMP24]], <16 x i1> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13> 175; CHECK-NEXT: [[TMP29:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP28]]) 176; CHECK-NEXT: [[TMP30:%.*]] = insertelement <4 x i1> [[TMP27]], i1 [[TMP29]], i64 1 177; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <16 x i1> [[TMP24]], <16 x i1> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14> 178; CHECK-NEXT: [[TMP32:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP31]]) 179; CHECK-NEXT: [[TMP33:%.*]] = insertelement <4 x i1> [[TMP30]], i1 [[TMP32]], i64 2 180; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <16 x i1> [[TMP24]], <16 x i1> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15> 181; CHECK-NEXT: [[TMP35:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP34]]) 182; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x i1> [[TMP33]], i1 [[TMP35]], i64 3 183; CHECK-NEXT: [[VBSL:%.*]] = select <4 x i1> [[TMP36]], <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> <i32 5, i32 6, i32 7, i32 8> 184; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <4 x i32> [[VBSL]], <i32 2, i32 3, i32 4, i32 5> 185; CHECK-NEXT: ret <4 x i1> [[CMP]] 186; 187entry: 188 %0 = load <4 x i32>, ptr %in1, align 4 189 %1 = load <4 x i16>, ptr %in2, align 2 190 %cmp000 = icmp ugt <4 x i32> %0, zeroinitializer 191 %cmp001 = icmp ugt <4 x i32> %0, zeroinitializer 192 %cmp002 = icmp ugt <4 x i32> %0, zeroinitializer 193 %cmp003 = icmp ugt <4 x i32> %0, zeroinitializer 194 %cmp100 = icmp eq <4 x i16> %1, zeroinitializer 195 %cmp101 = icmp eq <4 x i16> %1, zeroinitializer 196 %cmp102 = icmp eq <4 x i16> %1, zeroinitializer 197 %cmp103 = icmp eq <4 x i16> %1, zeroinitializer 198 %and.cmp0 = and <4 x i1> %cmp000, %cmp100 199 %and.cmp1 = and <4 x i1> %cmp001, %cmp101 200 %and.cmp2 = and <4 x i1> %cmp002, %cmp102 201 %and.cmp3 = and <4 x i1> %cmp003, %cmp103 202 %cmp004 = icmp ugt <4 x i32> %0, zeroinitializer 203 %cmp005 = icmp ugt <4 x i32> %0, zeroinitializer 204 %cmp006 = icmp ugt <4 x i32> %0, zeroinitializer 205 %cmp007 = icmp ugt <4 x i32> %0, zeroinitializer 206 %and.cmp4 = and <4 x i1> %and.cmp0, %cmp004 207 %and.cmp5 = and <4 x i1> %and.cmp1, %cmp005 208 %and.cmp6 = and <4 x i1> %and.cmp2, %cmp006 209 %and.cmp7 = and <4 x i1> %and.cmp3, %cmp007 210 %or0 = or <4 x i1> %and.cmp5, %and.cmp4 211 %or1 = or <4 x i1> %or0, %and.cmp6 212 %or2 = or <4 x i1> %or1, %and.cmp7 213 %vbsl = select <4 x i1> %or2, <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> <i32 5, i32 6, i32 7, i32 8> 214 %cmp = icmp ugt <4 x i32> %vbsl, <i32 2, i32 3, i32 4, i32 5> 215 ret <4 x i1> %cmp 216} 217 218define void @test7() { 219; CHECK-LABEL: @test7( 220; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i64> @llvm.vector.insert.v16i64.v8i64(<16 x i64> poison, <8 x i64> zeroinitializer, i64 0) 221; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i64> @llvm.vector.insert.v16i64.v8i64(<16 x i64> [[TMP1]], <8 x i64> zeroinitializer, i64 8) 222; CHECK-NEXT: [[TMP3:%.*]] = trunc <16 x i64> [[TMP2]] to <16 x i16> 223; CHECK-NEXT: store <16 x i16> [[TMP3]], ptr null, align 2 224; CHECK-NEXT: ret void 225; 226 %1 = getelementptr i8, ptr null, i64 16 227 %2 = trunc <8 x i64> zeroinitializer to <8 x i16> 228 store <8 x i16> %2, ptr %1, align 2 229 %3 = trunc <8 x i64> zeroinitializer to <8 x i16> 230 store <8 x i16> %3, ptr null, align 2 231 ret void 232} 233 234define void @test8() { 235; CHECK-LABEL: @test8( 236; CHECK-NEXT: entry: 237; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v2f32(<8 x float> poison, <2 x float> zeroinitializer, i64 0) 238; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v2f32(<8 x float> [[TMP0]], <2 x float> zeroinitializer, i64 2) 239; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v2f32(<8 x float> [[TMP1]], <2 x float> zeroinitializer, i64 4) 240; CHECK-NEXT: [[TMP3:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v2f32(<8 x float> [[TMP2]], <2 x float> zeroinitializer, i64 6) 241; CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> zeroinitializer, i64 0) 242; CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP4]], <2 x float> zeroinitializer, i64 2) 243; CHECK-NEXT: br i1 false, label [[FOR0:%.*]], label [[FOR_BODY:%.*]] 244; CHECK: for0: 245; CHECK-NEXT: [[TMP6:%.*]] = phi <8 x float> [ [[TMP3]], [[ENTRY:%.*]] ], [ [[TMP8:%.*]], [[FOR_BODY]] ] 246; CHECK-NEXT: ret void 247; CHECK: for.body: 248; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x float> [ [[TMP7]], [[FOR_BODY]] ], [ [[TMP5]], [[ENTRY]] ] 249; CHECK-NEXT: [[TMP8]] = shufflevector <4 x float> [[TMP7]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 250; CHECK-NEXT: br i1 false, label [[FOR0]], label [[FOR_BODY]] 251; 252entry: 253 br i1 false, label %for0, label %for.body 254 255for0: 256 %0 = phi <2 x float> [ zeroinitializer, %entry ], [ %4, %for.body ] 257 %1 = phi <2 x float> [ zeroinitializer, %entry ], [ %5, %for.body ] 258 %2 = phi <2 x float> [ zeroinitializer, %entry ], [ %4, %for.body ] 259 %3 = phi <2 x float> [ zeroinitializer, %entry ], [ %5, %for.body ] 260 ret void 261 262for.body: 263 %4 = phi <2 x float> [ %4, %for.body ], [ zeroinitializer, %entry ] 264 %5 = phi <2 x float> [ %5, %for.body ], [ zeroinitializer, %entry ] 265 br i1 false, label %for0, label %for.body 266} 267 268define void @test9() { 269; CHECK-LABEL: @test9( 270; CHECK-NEXT: entry: 271; CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> poison, <4 x i16> zeroinitializer, i64 0) 272; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> [[TMP0]], <4 x i16> zeroinitializer, i64 4) 273; CHECK-NEXT: br label [[FOR_BODY13:%.*]] 274; CHECK: for.body13: 275; CHECK-NEXT: [[TMP2:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i1> 276; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i1> [[TMP2]] to <8 x i32> 277; CHECK-NEXT: store <8 x i32> [[TMP3]], ptr null, align 4 278; CHECK-NEXT: br label [[FOR_BODY13]] 279; 280entry: 281 br label %for.body13 282 283for.body13: ; preds = %for.body13, %entry 284 %vmovl.i111 = sext <4 x i16> zeroinitializer to <4 x i32> 285 %vmovl.i110 = sext <4 x i16> zeroinitializer to <4 x i32> 286 store <4 x i32> %vmovl.i111, ptr null, align 4 287 %add.ptr29 = getelementptr i8, ptr null, i64 16 288 store <4 x i32> %vmovl.i110, ptr %add.ptr29, align 4 289 br label %for.body13 290} 291 292define void @test10() { 293; CHECK-LABEL: @test10( 294; CHECK-NEXT: entry: 295; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr null, align 1 296; CHECK-NEXT: [[TMP2:%.*]] = call <32 x i8> @llvm.vector.insert.v32i8.v16i8(<32 x i8> poison, <16 x i8> [[TMP0]], i64 0) 297; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i8> [[TMP2]], <32 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 298; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <32 x i8> [[TMP2]], <32 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 299; CHECK-NEXT: [[TMP5:%.*]] = sext <16 x i8> [[TMP4]] to <16 x i16> 300; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i16> [[TMP5]], <16 x i16> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 301; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i16> [[TMP5]], <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 302; CHECK-NEXT: [[TMP8:%.*]] = trunc <16 x i16> [[TMP7]] to <16 x i8> 303; CHECK-NEXT: [[TMP9:%.*]] = sext <16 x i8> [[TMP8]] to <16 x i32> 304; CHECK-NEXT: store <16 x i32> [[TMP9]], ptr null, align 4 305; CHECK-NEXT: ret void 306; 307entry: 308 %0 = load <16 x i8>, ptr null, align 1 309 %shuffle.i = shufflevector <16 x i8> %0, <16 x i8> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 310 %shuffle.i107 = shufflevector <16 x i8> %0, <16 x i8> zeroinitializer, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 311 %vmovl.i106 = sext <8 x i8> %shuffle.i to <8 x i16> 312 %vmovl.i = sext <8 x i8> %shuffle.i107 to <8 x i16> 313 %shuffle.i113 = shufflevector <8 x i16> %vmovl.i106, <8 x i16> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 314 %shuffle.i115 = shufflevector <8 x i16> %vmovl.i106, <8 x i16> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 315 %shuffle.i112 = shufflevector <8 x i16> %vmovl.i, <8 x i16> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 316 %shuffle.i114 = shufflevector <8 x i16> %vmovl.i, <8 x i16> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 317 %vmovl.i111 = sext <4 x i16> %shuffle.i113 to <4 x i32> 318 %vmovl.i110 = sext <4 x i16> %shuffle.i115 to <4 x i32> 319 %vmovl.i109 = sext <4 x i16> %shuffle.i112 to <4 x i32> 320 %vmovl.i108 = sext <4 x i16> %shuffle.i114 to <4 x i32> 321 %add.ptr29 = getelementptr i8, ptr null, i64 16 322 %add.ptr32 = getelementptr i8, ptr null, i64 32 323 %add.ptr35 = getelementptr i8, ptr null, i64 48 324 store <4 x i32> %vmovl.i111, ptr null, align 4 325 store <4 x i32> %vmovl.i110, ptr %add.ptr29, align 4 326 store <4 x i32> %vmovl.i109, ptr %add.ptr32, align 4 327 store <4 x i32> %vmovl.i108, ptr %add.ptr35, align 4 328 ret void 329} 330 331define void @test11(<2 x i64> %0, i64 %1, <2 x i64> %2) { 332; CHECK-LABEL: @test11( 333; CHECK-NEXT: entry: 334; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[TMP0:%.*]], i64 [[TMP1:%.*]], i32 1 335; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i64> <i64 5, i64 0>, [[TMP2:%.*]] 336; CHECK-NEXT: [[TMP5:%.*]] = trunc <2 x i64> [[TMP4]] to <2 x i16> 337; CHECK-NEXT: [[TMP6:%.*]] = call <4 x i16> @llvm.vector.insert.v4i16.v2i16(<4 x i16> poison, <2 x i16> [[TMP5]], i64 0) 338; CHECK-NEXT: [[TMP7:%.*]] = trunc <2 x i64> [[TMP3]] to <2 x i16> 339; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i16> @llvm.vector.insert.v4i16.v2i16(<4 x i16> [[TMP6]], <2 x i16> [[TMP7]], i64 2) 340; CHECK-NEXT: [[TMP9:%.*]] = trunc <4 x i16> [[TMP8]] to <4 x i8> 341; CHECK-NEXT: [[TMP10:%.*]] = call <4 x i8> @llvm.vector.insert.v4i8.v2i8(<4 x i8> poison, <2 x i8> zeroinitializer, i64 0) 342; CHECK-NEXT: [[TMP11:%.*]] = call <4 x i8> @llvm.vector.insert.v4i8.v2i8(<4 x i8> [[TMP10]], <2 x i8> zeroinitializer, i64 2) 343; CHECK-NEXT: [[TMP12:%.*]] = urem <4 x i8> [[TMP9]], [[TMP11]] 344; CHECK-NEXT: [[TMP13:%.*]] = icmp ne <4 x i8> [[TMP12]], [[TMP11]] 345; CHECK-NEXT: ret void 346; 347entry: 348 %3 = insertelement <2 x i64> %0, i64 %1, i32 1 349 %4 = add <2 x i64> <i64 5, i64 0>, %2 350 %5 = trunc <2 x i64> %3 to <2 x i8> 351 %6 = trunc <2 x i64> %4 to <2 x i8> 352 %7 = urem <2 x i8> %5, zeroinitializer 353 %8 = urem <2 x i8> %6, zeroinitializer 354 %9 = icmp ne <2 x i8> %7, zeroinitializer 355 %10 = icmp ne <2 x i8> %8, zeroinitializer 356 ret void 357} 358 359define void @test12() { 360; CHECK-LABEL: @test12( 361; CHECK-NEXT: entry: 362; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr null, i64 33 363; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr null, i64 50 364; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr null, i64 75 365; CHECK-NEXT: [[TMP3:%.*]] = load <8 x float>, ptr [[TMP1]], align 4 366; CHECK-NEXT: [[TMP4:%.*]] = load <8 x float>, ptr [[TMP2]], align 4 367; CHECK-NEXT: [[TMP5:%.*]] = load <16 x float>, ptr [[TMP0]], align 4 368; CHECK-NEXT: [[TMP6:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> poison, <8 x float> [[TMP4]], i64 0) 369; CHECK-NEXT: [[TMP7:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP6]], <8 x float> [[TMP3]], i64 8) 370; CHECK-NEXT: [[TMP8:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v16f32(<32 x float> [[TMP7]], <16 x float> [[TMP5]], i64 16) 371; CHECK-NEXT: [[TMP9:%.*]] = fpext <32 x float> [[TMP8]] to <32 x double> 372; CHECK-NEXT: [[TMP10:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> poison, <8 x double> zeroinitializer, i64 0) 373; CHECK-NEXT: [[TMP11:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> [[TMP10]], <8 x double> zeroinitializer, i64 8) 374; CHECK-NEXT: [[TMP12:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> [[TMP11]], <8 x double> zeroinitializer, i64 16) 375; CHECK-NEXT: [[TMP13:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> [[TMP12]], <8 x double> zeroinitializer, i64 24) 376; CHECK-NEXT: [[TMP14:%.*]] = fadd <32 x double> [[TMP13]], [[TMP9]] 377; CHECK-NEXT: [[TMP15:%.*]] = fptrunc <32 x double> [[TMP14]] to <32 x float> 378; CHECK-NEXT: [[TMP16:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> poison, <8 x float> zeroinitializer, i64 0) 379; CHECK-NEXT: [[TMP17:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP16]], <8 x float> zeroinitializer, i64 8) 380; CHECK-NEXT: [[TMP18:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP17]], <8 x float> zeroinitializer, i64 16) 381; CHECK-NEXT: [[TMP19:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP18]], <8 x float> zeroinitializer, i64 24) 382; CHECK-NEXT: [[TMP20:%.*]] = fcmp ogt <32 x float> [[TMP19]], [[TMP15]] 383; CHECK-NEXT: ret void 384; 385entry: 386 %0 = getelementptr float, ptr null, i64 33 387 %1 = getelementptr float, ptr null, i64 41 388 %2 = getelementptr float, ptr null, i64 50 389 %3 = getelementptr float, ptr null, i64 75 390 %4 = load <8 x float>, ptr %0, align 4 391 %5 = load <8 x float>, ptr %1, align 4 392 %6 = load <8 x float>, ptr %2, align 4 393 %7 = load <8 x float>, ptr %3, align 4 394 %8 = fpext <8 x float> %4 to <8 x double> 395 %9 = fpext <8 x float> %5 to <8 x double> 396 %10 = fpext <8 x float> %6 to <8 x double> 397 %11 = fpext <8 x float> %7 to <8 x double> 398 %12 = fadd <8 x double> zeroinitializer, %8 399 %13 = fadd <8 x double> zeroinitializer, %9 400 %14 = fadd <8 x double> zeroinitializer, %10 401 %15 = fadd <8 x double> zeroinitializer, %11 402 %16 = fptrunc <8 x double> %12 to <8 x float> 403 %17 = fptrunc <8 x double> %13 to <8 x float> 404 %18 = fptrunc <8 x double> %14 to <8 x float> 405 %19 = fptrunc <8 x double> %15 to <8 x float> 406 %20 = fcmp ogt <8 x float> zeroinitializer, %16 407 %21 = fcmp ogt <8 x float> zeroinitializer, %17 408 %22 = fcmp ogt <8 x float> zeroinitializer, %18 409 %23 = fcmp ogt <8 x float> zeroinitializer, %19 410 ret void 411} 412 413define void @test13(<8 x i32> %0, ptr %out0, ptr %out1, ptr %out2) { 414; CHECK-LABEL: @test13( 415; CHECK-NEXT: entry: 416; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v8i32(<32 x i32> poison, <8 x i32> [[TMP0:%.*]], i64 0) 417; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i32> [[TMP1]], <32 x i32> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 418; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i32> [[TMP1]], <32 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 419; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <32 x i32> [[TMP1]], <32 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 420; CHECK-NEXT: br label [[FOR_END_LOOPEXIT:%.*]] 421; CHECK: for.end.loopexit: 422; CHECK-NEXT: [[TMP5:%.*]] = phi <16 x i32> [ [[TMP4]], [[ENTRY:%.*]] ] 423; CHECK-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.v16i32(<16 x i32> [[TMP5]], i64 12) 424; CHECK-NEXT: [[OR0:%.*]] = or <4 x i32> [[TMP6]], zeroinitializer 425; CHECK-NEXT: store <4 x i32> [[OR0]], ptr [[OUT0:%.*]], align 4 426; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.v16i32(<16 x i32> [[TMP4]], i64 0) 427; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr [[OUT1:%.*]], align 4 428; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.v16i32(<16 x i32> [[TMP4]], i64 8) 429; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr [[OUT2:%.*]], align 4 430; CHECK-NEXT: ret void 431; 432entry: 433 %1 = shufflevector <8 x i32> %0, <8 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 434 %2 = shufflevector <8 x i32> %0, <8 x i32> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 435 %3 = shufflevector <8 x i32> %0, <8 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 436 %4 = shufflevector <8 x i32> %0, <8 x i32> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 437 br label %for.end.loopexit 438 439for.end.loopexit: 440 %phi0 = phi <4 x i32> [ %1, %entry ] 441 %phi1 = phi <4 x i32> [ %2, %entry ] 442 %phi2 = phi <4 x i32> [ %3, %entry ] 443 %phi3 = phi <4 x i32> [ %4, %entry ] 444 %or0 = or <4 x i32> %phi1, zeroinitializer 445 store <4 x i32> %or0, ptr %out0, align 4 446 store <4 x i32> %1, ptr %out1, align 4 447 store <4 x i32> %4, ptr %out2, align 4 448 ret void 449} 450 451define void @test14(<8 x i1> %0) { 452; CHECK-LABEL: @test14( 453; CHECK-NEXT: entry: 454; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.vector.insert.v16i1.v8i1(<16 x i1> poison, <8 x i1> [[TMP0:%.*]], i64 0) 455; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i1> [[TMP1]], <16 x i1> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 456; CHECK-NEXT: [[TMP3:%.*]] = sext <16 x i1> [[TMP2]] to <16 x i16> 457; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[TMP3]], <16 x i16> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 458; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[TMP3]], <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 459; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i16> [[TMP3]], <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 4, i32 5, i32 6, i32 7> 460; CHECK-NEXT: br label [[FOR_END_LOOPEXIT:%.*]] 461; CHECK: for.end.loopexit: 462; CHECK-NEXT: [[TMP7:%.*]] = phi <16 x i16> [ [[TMP6]], [[ENTRY:%.*]] ] 463; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i16> @llvm.vector.extract.v4i16.v16i16(<16 x i16> [[TMP7]], i64 12) 464; CHECK-NEXT: [[OR0:%.*]] = or <4 x i16> [[TMP8]], zeroinitializer 465; CHECK-NEXT: ret void 466; 467entry: 468 %sext0 = sext <8 x i1> %0 to <8 x i16> 469 %sext1 = sext <8 x i1> %0 to <8 x i16> 470 %1 = shufflevector <8 x i16> %sext0, <8 x i16> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 471 %2 = shufflevector <8 x i16> %sext0, <8 x i16> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 472 %3 = shufflevector <8 x i16> %sext1, <8 x i16> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 473 %4 = shufflevector <8 x i16> %sext1, <8 x i16> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 474 br label %for.end.loopexit 475 476for.end.loopexit: 477 %phi0 = phi <4 x i16> [ %1, %entry ] 478 %phi1 = phi <4 x i16> [ %2, %entry ] 479 %phi2 = phi <4 x i16> [ %3, %entry ] 480 %phi3 = phi <4 x i16> [ %4, %entry ] 481 %or0 = or <4 x i16> %phi1, zeroinitializer 482 ret void 483} 484