163081dc6SVolkan Keles; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2d6e7e477SRoman Lebedev; RUN: opt -o - -S -passes=load-store-vectorizer,dce %s | FileCheck %s 363081dc6SVolkan Keles 463081dc6SVolkan Keles; Make sure LoadStoreVectorizer vectorizes the loads below. 563081dc6SVolkan Keles; In order to prove that the vectorization is safe, it tries to 663081dc6SVolkan Keles; match nested adds and find an expression that adds a constant 763081dc6SVolkan Keles; value to an existing index and the result doesn't overflow. 863081dc6SVolkan Keles 963081dc6SVolkan Kelestarget triple = "x86_64--" 1063081dc6SVolkan Keles 11*ba1759c4SNikita Popovdefine void @ld_v4i8_add_nsw(i32 %v0, i32 %v1, ptr %src, ptr %dst) { 1263081dc6SVolkan Keles; CHECK-LABEL: @ld_v4i8_add_nsw( 1363081dc6SVolkan Keles; CHECK-NEXT: bb: 1463081dc6SVolkan Keles; CHECK-NEXT: [[TMP:%.*]] = add nsw i32 [[V0:%.*]], -1 1563081dc6SVolkan Keles; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[V1:%.*]], [[TMP]] 1663081dc6SVolkan Keles; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 17*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP2]] 18*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 1963081dc6SVolkan Keles; CHECK-NEXT: [[TMP41:%.*]] = extractelement <4 x i8> [[TMP1]], i32 0 2063081dc6SVolkan Keles; CHECK-NEXT: [[TMP82:%.*]] = extractelement <4 x i8> [[TMP1]], i32 1 2163081dc6SVolkan Keles; CHECK-NEXT: [[TMP133:%.*]] = extractelement <4 x i8> [[TMP1]], i32 2 2263081dc6SVolkan Keles; CHECK-NEXT: [[TMP184:%.*]] = extractelement <4 x i8> [[TMP1]], i32 3 2363081dc6SVolkan Keles; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i8> undef, i8 [[TMP41]], i32 0 2463081dc6SVolkan Keles; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i8> [[TMP19]], i8 [[TMP82]], i32 1 2563081dc6SVolkan Keles; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP133]], i32 2 2663081dc6SVolkan Keles; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i8> [[TMP21]], i8 [[TMP184]], i32 3 27*ba1759c4SNikita Popov; CHECK-NEXT: store <4 x i8> [[TMP22]], ptr [[DST:%.*]] 2863081dc6SVolkan Keles; CHECK-NEXT: ret void 2963081dc6SVolkan Keles; 3063081dc6SVolkan Kelesbb: 3163081dc6SVolkan Keles %tmp = add nsw i32 %v0, -1 3263081dc6SVolkan Keles %tmp1 = add nsw i32 %v1, %tmp 3363081dc6SVolkan Keles %tmp2 = sext i32 %tmp1 to i64 34*ba1759c4SNikita Popov %tmp3 = getelementptr inbounds i8, ptr %src, i64 %tmp2 35*ba1759c4SNikita Popov %tmp4 = load i8, ptr %tmp3, align 1 3663081dc6SVolkan Keles %tmp5 = add nsw i32 %v1, %v0 3763081dc6SVolkan Keles %tmp6 = sext i32 %tmp5 to i64 38*ba1759c4SNikita Popov %tmp7 = getelementptr inbounds i8, ptr %src, i64 %tmp6 39*ba1759c4SNikita Popov %tmp8 = load i8, ptr %tmp7, align 1 4063081dc6SVolkan Keles %tmp9 = add nsw i32 %v0, 1 4163081dc6SVolkan Keles %tmp10 = add nsw i32 %v1, %tmp9 4263081dc6SVolkan Keles %tmp11 = sext i32 %tmp10 to i64 43*ba1759c4SNikita Popov %tmp12 = getelementptr inbounds i8, ptr %src, i64 %tmp11 44*ba1759c4SNikita Popov %tmp13 = load i8, ptr %tmp12, align 1 4563081dc6SVolkan Keles %tmp14 = add nsw i32 %v0, 2 4663081dc6SVolkan Keles %tmp15 = add nsw i32 %v1, %tmp14 4763081dc6SVolkan Keles %tmp16 = sext i32 %tmp15 to i64 48*ba1759c4SNikita Popov %tmp17 = getelementptr inbounds i8, ptr %src, i64 %tmp16 49*ba1759c4SNikita Popov %tmp18 = load i8, ptr %tmp17, align 1 5063081dc6SVolkan Keles %tmp19 = insertelement <4 x i8> undef, i8 %tmp4, i32 0 5163081dc6SVolkan Keles %tmp20 = insertelement <4 x i8> %tmp19, i8 %tmp8, i32 1 5263081dc6SVolkan Keles %tmp21 = insertelement <4 x i8> %tmp20, i8 %tmp13, i32 2 5363081dc6SVolkan Keles %tmp22 = insertelement <4 x i8> %tmp21, i8 %tmp18, i32 3 54*ba1759c4SNikita Popov store <4 x i8> %tmp22, ptr %dst 5563081dc6SVolkan Keles ret void 5663081dc6SVolkan Keles} 5763081dc6SVolkan Keles 5811996586SSlava Nikolaev; Apply different operand orders for the nested add sequences 59*ba1759c4SNikita Popovdefine void @ld_v4i8_add_nsw_operand_orders(i32 %v0, i32 %v1, ptr %src, ptr %dst) { 6011996586SSlava Nikolaev; CHECK-LABEL: @ld_v4i8_add_nsw_operand_orders( 6111996586SSlava Nikolaev; CHECK-NEXT: bb: 6211996586SSlava Nikolaev; CHECK-NEXT: [[TMP:%.*]] = add nsw i32 [[V0:%.*]], -1 6311996586SSlava Nikolaev; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[V1:%.*]], [[TMP]] 6411996586SSlava Nikolaev; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 65*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP2]] 66*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 6711996586SSlava Nikolaev; CHECK-NEXT: [[TMP41:%.*]] = extractelement <4 x i8> [[TMP1]], i32 0 6811996586SSlava Nikolaev; CHECK-NEXT: [[TMP82:%.*]] = extractelement <4 x i8> [[TMP1]], i32 1 6911996586SSlava Nikolaev; CHECK-NEXT: [[TMP133:%.*]] = extractelement <4 x i8> [[TMP1]], i32 2 7011996586SSlava Nikolaev; CHECK-NEXT: [[TMP184:%.*]] = extractelement <4 x i8> [[TMP1]], i32 3 7111996586SSlava Nikolaev; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i8> undef, i8 [[TMP41]], i32 0 7211996586SSlava Nikolaev; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i8> [[TMP19]], i8 [[TMP82]], i32 1 7311996586SSlava Nikolaev; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP133]], i32 2 7411996586SSlava Nikolaev; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i8> [[TMP21]], i8 [[TMP184]], i32 3 75*ba1759c4SNikita Popov; CHECK-NEXT: store <4 x i8> [[TMP22]], ptr [[DST:%.*]] 7611996586SSlava Nikolaev; CHECK-NEXT: ret void 7711996586SSlava Nikolaev; 7811996586SSlava Nikolaevbb: 7911996586SSlava Nikolaev %tmp = add nsw i32 %v0, -1 8011996586SSlava Nikolaev %tmp1 = add nsw i32 %v1, %tmp 8111996586SSlava Nikolaev %tmp2 = sext i32 %tmp1 to i64 82*ba1759c4SNikita Popov %tmp3 = getelementptr inbounds i8, ptr %src, i64 %tmp2 83*ba1759c4SNikita Popov %tmp4 = load i8, ptr %tmp3, align 1 8411996586SSlava Nikolaev %tmp5 = add nsw i32 %v0, %v1 8511996586SSlava Nikolaev %tmp6 = sext i32 %tmp5 to i64 86*ba1759c4SNikita Popov %tmp7 = getelementptr inbounds i8, ptr %src, i64 %tmp6 87*ba1759c4SNikita Popov %tmp8 = load i8, ptr %tmp7, align 1 8811996586SSlava Nikolaev %tmp9 = add nsw i32 %v0, 1 8911996586SSlava Nikolaev %tmp10 = add nsw i32 %tmp9, %v1 9011996586SSlava Nikolaev %tmp11 = sext i32 %tmp10 to i64 91*ba1759c4SNikita Popov %tmp12 = getelementptr inbounds i8, ptr %src, i64 %tmp11 92*ba1759c4SNikita Popov %tmp13 = load i8, ptr %tmp12, align 1 9311996586SSlava Nikolaev %tmp14 = add nsw i32 %v0, 2 9411996586SSlava Nikolaev %tmp15 = add nsw i32 %v1, %tmp14 9511996586SSlava Nikolaev %tmp16 = sext i32 %tmp15 to i64 96*ba1759c4SNikita Popov %tmp17 = getelementptr inbounds i8, ptr %src, i64 %tmp16 97*ba1759c4SNikita Popov %tmp18 = load i8, ptr %tmp17, align 1 9811996586SSlava Nikolaev %tmp19 = insertelement <4 x i8> undef, i8 %tmp4, i32 0 9911996586SSlava Nikolaev %tmp20 = insertelement <4 x i8> %tmp19, i8 %tmp8, i32 1 10011996586SSlava Nikolaev %tmp21 = insertelement <4 x i8> %tmp20, i8 %tmp13, i32 2 10111996586SSlava Nikolaev %tmp22 = insertelement <4 x i8> %tmp21, i8 %tmp18, i32 3 102*ba1759c4SNikita Popov store <4 x i8> %tmp22, ptr %dst 10311996586SSlava Nikolaev ret void 10411996586SSlava Nikolaev} 10511996586SSlava Nikolaev 106*ba1759c4SNikita Popovdefine void @ld_v4i8_add_known_bits(i32 %ind0, i32 %ind1, ptr %src, ptr %dst) { 10795427210SJustin Bogner; CHECK-LABEL: @ld_v4i8_add_known_bits( 10895427210SJustin Bogner; CHECK-NEXT: bb: 10995427210SJustin Bogner; CHECK-NEXT: [[V0:%.*]] = mul i32 [[IND0:%.*]], 4 11095427210SJustin Bogner; CHECK-NEXT: [[V1:%.*]] = mul i32 [[IND1:%.*]], 4 11195427210SJustin Bogner; CHECK-NEXT: [[TMP:%.*]] = add i32 [[V0]], -1 11295427210SJustin Bogner; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[V1]], [[TMP]] 11395427210SJustin Bogner; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 114*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP2]] 115*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1 11695427210SJustin Bogner; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[V1]], [[V0]] 11795427210SJustin Bogner; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 118*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[TMP6]] 119*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP1:%.*]] = load <3 x i8>, ptr [[TMP7]], align 1 12095427210SJustin Bogner; CHECK-NEXT: [[TMP81:%.*]] = extractelement <3 x i8> [[TMP1]], i32 0 12195427210SJustin Bogner; CHECK-NEXT: [[TMP132:%.*]] = extractelement <3 x i8> [[TMP1]], i32 1 12295427210SJustin Bogner; CHECK-NEXT: [[TMP183:%.*]] = extractelement <3 x i8> [[TMP1]], i32 2 12395427210SJustin Bogner; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i8> undef, i8 [[TMP4]], i32 0 12495427210SJustin Bogner; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i8> [[TMP19]], i8 [[TMP81]], i32 1 12595427210SJustin Bogner; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP132]], i32 2 12695427210SJustin Bogner; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i8> [[TMP21]], i8 [[TMP183]], i32 3 127*ba1759c4SNikita Popov; CHECK-NEXT: store <4 x i8> [[TMP22]], ptr [[DST:%.*]] 12895427210SJustin Bogner; CHECK-NEXT: ret void 12995427210SJustin Bogner; 13095427210SJustin Bognerbb: 13195427210SJustin Bogner %v0 = mul i32 %ind0, 4 13295427210SJustin Bogner %v1 = mul i32 %ind1, 4 13395427210SJustin Bogner %tmp = add i32 %v0, -1 13495427210SJustin Bogner %tmp1 = add i32 %v1, %tmp 13595427210SJustin Bogner %tmp2 = sext i32 %tmp1 to i64 136*ba1759c4SNikita Popov %tmp3 = getelementptr inbounds i8, ptr %src, i64 %tmp2 137*ba1759c4SNikita Popov %tmp4 = load i8, ptr %tmp3, align 1 13895427210SJustin Bogner %tmp5 = add i32 %v1, %v0 13995427210SJustin Bogner %tmp6 = sext i32 %tmp5 to i64 140*ba1759c4SNikita Popov %tmp7 = getelementptr inbounds i8, ptr %src, i64 %tmp6 141*ba1759c4SNikita Popov %tmp8 = load i8, ptr %tmp7, align 1 14295427210SJustin Bogner %tmp9 = add i32 %v0, 1 14395427210SJustin Bogner %tmp10 = add i32 %v1, %tmp9 14495427210SJustin Bogner %tmp11 = sext i32 %tmp10 to i64 145*ba1759c4SNikita Popov %tmp12 = getelementptr inbounds i8, ptr %src, i64 %tmp11 146*ba1759c4SNikita Popov %tmp13 = load i8, ptr %tmp12, align 1 14795427210SJustin Bogner %tmp14 = add i32 %v0, 2 14895427210SJustin Bogner %tmp15 = add i32 %v1, %tmp14 14995427210SJustin Bogner %tmp16 = sext i32 %tmp15 to i64 150*ba1759c4SNikita Popov %tmp17 = getelementptr inbounds i8, ptr %src, i64 %tmp16 151*ba1759c4SNikita Popov %tmp18 = load i8, ptr %tmp17, align 1 15295427210SJustin Bogner %tmp19 = insertelement <4 x i8> undef, i8 %tmp4, i32 0 15395427210SJustin Bogner %tmp20 = insertelement <4 x i8> %tmp19, i8 %tmp8, i32 1 15495427210SJustin Bogner %tmp21 = insertelement <4 x i8> %tmp20, i8 %tmp13, i32 2 15595427210SJustin Bogner %tmp22 = insertelement <4 x i8> %tmp21, i8 %tmp18, i32 3 156*ba1759c4SNikita Popov store <4 x i8> %tmp22, ptr %dst 15795427210SJustin Bogner ret void 15895427210SJustin Bogner} 15995427210SJustin Bogner 160*ba1759c4SNikita Popovdefine void @ld_v4i8_add_known_bits1(i32 %ind0, i32 %ind1, ptr %src, ptr %dst) { 16195427210SJustin Bogner; CHECK-LABEL: @ld_v4i8_add_known_bits1( 16295427210SJustin Bogner; CHECK-NEXT: bb: 16395427210SJustin Bogner; CHECK-NEXT: [[V0:%.*]] = mul i32 [[IND0:%.*]], 4 16495427210SJustin Bogner; CHECK-NEXT: [[V1:%.*]] = mul i32 [[IND1:%.*]], 4 16595427210SJustin Bogner; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[V1]], [[V0]] 16695427210SJustin Bogner; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 167*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP6]] 168*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[TMP7]], align 1 16995427210SJustin Bogner; CHECK-NEXT: [[TMP81:%.*]] = extractelement <4 x i8> [[TMP1]], i32 0 17095427210SJustin Bogner; CHECK-NEXT: [[TMP132:%.*]] = extractelement <4 x i8> [[TMP1]], i32 1 17195427210SJustin Bogner; CHECK-NEXT: [[TMP183:%.*]] = extractelement <4 x i8> [[TMP1]], i32 2 17295427210SJustin Bogner; CHECK-NEXT: [[TMP44:%.*]] = extractelement <4 x i8> [[TMP1]], i32 3 17395427210SJustin Bogner; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i8> undef, i8 [[TMP44]], i32 0 17495427210SJustin Bogner; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i8> [[TMP19]], i8 [[TMP81]], i32 1 17595427210SJustin Bogner; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP132]], i32 2 17695427210SJustin Bogner; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i8> [[TMP21]], i8 [[TMP183]], i32 3 177*ba1759c4SNikita Popov; CHECK-NEXT: store <4 x i8> [[TMP22]], ptr [[DST:%.*]] 17895427210SJustin Bogner; CHECK-NEXT: ret void 17995427210SJustin Bogner; 18095427210SJustin Bognerbb: 18195427210SJustin Bogner %v0 = mul i32 %ind0, 4 18295427210SJustin Bogner %v1 = mul i32 %ind1, 4 18395427210SJustin Bogner %tmp = add i32 %v0, 3 18495427210SJustin Bogner %tmp1 = add i32 %v1, %tmp 18595427210SJustin Bogner %tmp2 = sext i32 %tmp1 to i64 186*ba1759c4SNikita Popov %tmp3 = getelementptr inbounds i8, ptr %src, i64 %tmp2 187*ba1759c4SNikita Popov %tmp4 = load i8, ptr %tmp3, align 1 18895427210SJustin Bogner %tmp5 = add i32 %v1, %v0 18995427210SJustin Bogner %tmp6 = sext i32 %tmp5 to i64 190*ba1759c4SNikita Popov %tmp7 = getelementptr inbounds i8, ptr %src, i64 %tmp6 191*ba1759c4SNikita Popov %tmp8 = load i8, ptr %tmp7, align 1 19295427210SJustin Bogner %tmp9 = add i32 %v0, 1 19395427210SJustin Bogner %tmp10 = add i32 %v1, %tmp9 19495427210SJustin Bogner %tmp11 = sext i32 %tmp10 to i64 195*ba1759c4SNikita Popov %tmp12 = getelementptr inbounds i8, ptr %src, i64 %tmp11 196*ba1759c4SNikita Popov %tmp13 = load i8, ptr %tmp12, align 1 19795427210SJustin Bogner %tmp14 = add i32 %v0, 2 19895427210SJustin Bogner %tmp15 = add i32 %v1, %tmp14 19995427210SJustin Bogner %tmp16 = sext i32 %tmp15 to i64 200*ba1759c4SNikita Popov %tmp17 = getelementptr inbounds i8, ptr %src, i64 %tmp16 201*ba1759c4SNikita Popov %tmp18 = load i8, ptr %tmp17, align 1 20295427210SJustin Bogner %tmp19 = insertelement <4 x i8> undef, i8 %tmp4, i32 0 20395427210SJustin Bogner %tmp20 = insertelement <4 x i8> %tmp19, i8 %tmp8, i32 1 20495427210SJustin Bogner %tmp21 = insertelement <4 x i8> %tmp20, i8 %tmp13, i32 2 20595427210SJustin Bogner %tmp22 = insertelement <4 x i8> %tmp21, i8 %tmp18, i32 3 206*ba1759c4SNikita Popov store <4 x i8> %tmp22, ptr %dst 20795427210SJustin Bogner ret void 20895427210SJustin Bogner} 20995427210SJustin Bogner 210*ba1759c4SNikita Popovdefine void @ld_v4i8_add_known_bits_by_assume(i32 %ind0, i32 %ind1, ptr %src, ptr %dst) { 21195427210SJustin Bogner; CHECK-LABEL: @ld_v4i8_add_known_bits_by_assume( 21295427210SJustin Bogner; CHECK-NEXT: bb: 21395427210SJustin Bogner; CHECK-NEXT: [[V0:%.*]] = mul i32 [[IND0:%.*]], 3 21495427210SJustin Bogner; CHECK-NEXT: [[V1:%.*]] = mul i32 [[IND1:%.*]], 3 21595427210SJustin Bogner; CHECK-NEXT: [[AND_I:%.*]] = and i32 [[V0]], 3 21695427210SJustin Bogner; CHECK-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[AND_I]], 0 21795427210SJustin Bogner; CHECK-NEXT: [[AND_I_1:%.*]] = and i32 [[V1]], 3 21895427210SJustin Bogner; CHECK-NEXT: [[CMP_I_1:%.*]] = icmp eq i32 [[AND_I_1]], 0 21995427210SJustin Bogner; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_I]]) 22095427210SJustin Bogner; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_I_1]]) 22195427210SJustin Bogner; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[V1]], [[V0]] 22295427210SJustin Bogner; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 223*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP6]] 224*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[TMP7]], align 1 22595427210SJustin Bogner; CHECK-NEXT: [[TMP81:%.*]] = extractelement <4 x i8> [[TMP1]], i32 0 22695427210SJustin Bogner; CHECK-NEXT: [[TMP132:%.*]] = extractelement <4 x i8> [[TMP1]], i32 1 22795427210SJustin Bogner; CHECK-NEXT: [[TMP183:%.*]] = extractelement <4 x i8> [[TMP1]], i32 2 22895427210SJustin Bogner; CHECK-NEXT: [[TMP44:%.*]] = extractelement <4 x i8> [[TMP1]], i32 3 22995427210SJustin Bogner; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i8> undef, i8 [[TMP44]], i32 0 23095427210SJustin Bogner; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i8> [[TMP19]], i8 [[TMP81]], i32 1 23195427210SJustin Bogner; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP132]], i32 2 23295427210SJustin Bogner; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i8> [[TMP21]], i8 [[TMP183]], i32 3 233*ba1759c4SNikita Popov; CHECK-NEXT: store <4 x i8> [[TMP22]], ptr [[DST:%.*]] 23495427210SJustin Bogner; CHECK-NEXT: ret void 23595427210SJustin Bogner; 23695427210SJustin Bognerbb: 23795427210SJustin Bogner %v0 = mul i32 %ind0, 3 23895427210SJustin Bogner %v1 = mul i32 %ind1, 3 23995427210SJustin Bogner %and.i = and i32 %v0, 3 24095427210SJustin Bogner %cmp.i = icmp eq i32 %and.i, 0 24195427210SJustin Bogner %and.i.1 = and i32 %v1, 3 24295427210SJustin Bogner %cmp.i.1 = icmp eq i32 %and.i.1, 0 24395427210SJustin Bogner call void @llvm.assume(i1 %cmp.i) 24495427210SJustin Bogner call void @llvm.assume(i1 %cmp.i.1) 24595427210SJustin Bogner %tmp = add i32 %v0, 3 24695427210SJustin Bogner %tmp1 = add i32 %v1, %tmp 24795427210SJustin Bogner %tmp2 = sext i32 %tmp1 to i64 248*ba1759c4SNikita Popov %tmp3 = getelementptr inbounds i8, ptr %src, i64 %tmp2 249*ba1759c4SNikita Popov %tmp4 = load i8, ptr %tmp3, align 1 25095427210SJustin Bogner %tmp5 = add i32 %v1, %v0 25195427210SJustin Bogner %tmp6 = sext i32 %tmp5 to i64 252*ba1759c4SNikita Popov %tmp7 = getelementptr inbounds i8, ptr %src, i64 %tmp6 253*ba1759c4SNikita Popov %tmp8 = load i8, ptr %tmp7, align 1 25495427210SJustin Bogner %tmp9 = add i32 %v0, 1 25595427210SJustin Bogner %tmp10 = add i32 %v1, %tmp9 25695427210SJustin Bogner %tmp11 = sext i32 %tmp10 to i64 257*ba1759c4SNikita Popov %tmp12 = getelementptr inbounds i8, ptr %src, i64 %tmp11 258*ba1759c4SNikita Popov %tmp13 = load i8, ptr %tmp12, align 1 25995427210SJustin Bogner %tmp14 = add i32 %v0, 2 26095427210SJustin Bogner %tmp15 = add i32 %v1, %tmp14 26195427210SJustin Bogner %tmp16 = sext i32 %tmp15 to i64 262*ba1759c4SNikita Popov %tmp17 = getelementptr inbounds i8, ptr %src, i64 %tmp16 263*ba1759c4SNikita Popov %tmp18 = load i8, ptr %tmp17, align 1 26495427210SJustin Bogner %tmp19 = insertelement <4 x i8> undef, i8 %tmp4, i32 0 26595427210SJustin Bogner %tmp20 = insertelement <4 x i8> %tmp19, i8 %tmp8, i32 1 26695427210SJustin Bogner %tmp21 = insertelement <4 x i8> %tmp20, i8 %tmp13, i32 2 26795427210SJustin Bogner %tmp22 = insertelement <4 x i8> %tmp21, i8 %tmp18, i32 3 268*ba1759c4SNikita Popov store <4 x i8> %tmp22, ptr %dst 26995427210SJustin Bogner ret void 27095427210SJustin Bogner} 27195427210SJustin Bogner 272e7d26aceSJustin Bognerdeclare void @llvm.assume(i1) 273e7d26aceSJustin Bogner 274*ba1759c4SNikita Popovdefine void @ld_v4i8_add_assume_on_arg(i32 %v0, i32 %v1, ptr %src, ptr %dst) { 275e7d26aceSJustin Bogner; CHECK-LABEL: @ld_v4i8_add_assume_on_arg( 276e7d26aceSJustin Bogner; CHECK-NEXT: bb: 277e7d26aceSJustin Bogner; CHECK-NEXT: [[AND_I:%.*]] = and i32 [[V0:%.*]], 3 278e7d26aceSJustin Bogner; CHECK-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[AND_I]], 0 279e7d26aceSJustin Bogner; CHECK-NEXT: [[AND_I_1:%.*]] = and i32 [[V1:%.*]], 3 280e7d26aceSJustin Bogner; CHECK-NEXT: [[CMP_I_1:%.*]] = icmp eq i32 [[AND_I_1]], 0 281e7d26aceSJustin Bogner; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_I]]) 282e7d26aceSJustin Bogner; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_I_1]]) 283e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP:%.*]] = add nsw i32 [[V0]], -1 284e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[V1]], [[TMP]] 285e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 286*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP2]] 287*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1 288e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[V1]], [[V0]] 289e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 290*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[TMP6]] 291*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP1:%.*]] = load <3 x i8>, ptr [[TMP7]], align 1 292e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP81:%.*]] = extractelement <3 x i8> [[TMP1]], i32 0 293e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP132:%.*]] = extractelement <3 x i8> [[TMP1]], i32 1 294e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP183:%.*]] = extractelement <3 x i8> [[TMP1]], i32 2 295e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i8> undef, i8 [[TMP4]], i32 0 296e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i8> [[TMP19]], i8 [[TMP81]], i32 1 297e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP132]], i32 2 298e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i8> [[TMP21]], i8 [[TMP183]], i32 3 299*ba1759c4SNikita Popov; CHECK-NEXT: store <4 x i8> [[TMP22]], ptr [[DST:%.*]] 300e7d26aceSJustin Bogner; CHECK-NEXT: ret void 301e7d26aceSJustin Bogner; 302e7d26aceSJustin Bognerbb: 303e7d26aceSJustin Bogner %and.i = and i32 %v0, 3 304e7d26aceSJustin Bogner %cmp.i = icmp eq i32 %and.i, 0 305e7d26aceSJustin Bogner %and.i.1 = and i32 %v1, 3 306e7d26aceSJustin Bogner %cmp.i.1 = icmp eq i32 %and.i.1, 0 307e7d26aceSJustin Bogner call void @llvm.assume(i1 %cmp.i) 308e7d26aceSJustin Bogner call void @llvm.assume(i1 %cmp.i.1) 309e7d26aceSJustin Bogner %tmp = add nsw i32 %v0, -1 310e7d26aceSJustin Bogner %tmp1 = add i32 %v1, %tmp 311e7d26aceSJustin Bogner %tmp2 = sext i32 %tmp1 to i64 312*ba1759c4SNikita Popov %tmp3 = getelementptr inbounds i8, ptr %src, i64 %tmp2 313*ba1759c4SNikita Popov %tmp4 = load i8, ptr %tmp3, align 1 314e7d26aceSJustin Bogner %tmp5 = add i32 %v1, %v0 315e7d26aceSJustin Bogner %tmp6 = sext i32 %tmp5 to i64 316*ba1759c4SNikita Popov %tmp7 = getelementptr inbounds i8, ptr %src, i64 %tmp6 317*ba1759c4SNikita Popov %tmp8 = load i8, ptr %tmp7, align 1 318e7d26aceSJustin Bogner %tmp9 = add nsw i32 %v0, 1 319e7d26aceSJustin Bogner %tmp10 = add i32 %v1, %tmp9 320e7d26aceSJustin Bogner %tmp11 = sext i32 %tmp10 to i64 321*ba1759c4SNikita Popov %tmp12 = getelementptr inbounds i8, ptr %src, i64 %tmp11 322*ba1759c4SNikita Popov %tmp13 = load i8, ptr %tmp12, align 1 323e7d26aceSJustin Bogner %tmp14 = add nsw i32 %v0, 2 324e7d26aceSJustin Bogner %tmp15 = add i32 %v1, %tmp14 325e7d26aceSJustin Bogner %tmp16 = sext i32 %tmp15 to i64 326*ba1759c4SNikita Popov %tmp17 = getelementptr inbounds i8, ptr %src, i64 %tmp16 327*ba1759c4SNikita Popov %tmp18 = load i8, ptr %tmp17, align 1 328e7d26aceSJustin Bogner %tmp19 = insertelement <4 x i8> undef, i8 %tmp4, i32 0 329e7d26aceSJustin Bogner %tmp20 = insertelement <4 x i8> %tmp19, i8 %tmp8, i32 1 330e7d26aceSJustin Bogner %tmp21 = insertelement <4 x i8> %tmp20, i8 %tmp13, i32 2 331e7d26aceSJustin Bogner %tmp22 = insertelement <4 x i8> %tmp21, i8 %tmp18, i32 3 332*ba1759c4SNikita Popov store <4 x i8> %tmp22, ptr %dst 333e7d26aceSJustin Bogner ret void 334e7d26aceSJustin Bogner} 335e7d26aceSJustin Bogner 336*ba1759c4SNikita Popovdefine void @ld_v4i8_add_assume_on_arg1(i32 %v0, i32 %v1, ptr %src, ptr %dst) { 337e7d26aceSJustin Bogner; CHECK-LABEL: @ld_v4i8_add_assume_on_arg1( 338e7d26aceSJustin Bogner; CHECK-NEXT: bb: 339e7d26aceSJustin Bogner; CHECK-NEXT: [[AND_I:%.*]] = and i32 [[V0:%.*]], 3 340e7d26aceSJustin Bogner; CHECK-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[AND_I]], 0 341e7d26aceSJustin Bogner; CHECK-NEXT: [[AND_I_1:%.*]] = and i32 [[V1:%.*]], 3 342e7d26aceSJustin Bogner; CHECK-NEXT: [[CMP_I_1:%.*]] = icmp eq i32 [[AND_I_1]], 0 343e7d26aceSJustin Bogner; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_I]]) 344e7d26aceSJustin Bogner; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_I_1]]) 345e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[V1]], [[V0]] 346e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 347*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP6]] 348*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[TMP7]], align 1 349e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP81:%.*]] = extractelement <4 x i8> [[TMP1]], i32 0 350e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP132:%.*]] = extractelement <4 x i8> [[TMP1]], i32 1 351e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP183:%.*]] = extractelement <4 x i8> [[TMP1]], i32 2 352e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP44:%.*]] = extractelement <4 x i8> [[TMP1]], i32 3 353e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i8> undef, i8 [[TMP44]], i32 0 354e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i8> [[TMP19]], i8 [[TMP81]], i32 1 355e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP132]], i32 2 356e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i8> [[TMP21]], i8 [[TMP183]], i32 3 357*ba1759c4SNikita Popov; CHECK-NEXT: store <4 x i8> [[TMP22]], ptr [[DST:%.*]] 358e7d26aceSJustin Bogner; CHECK-NEXT: ret void 359e7d26aceSJustin Bogner; 360e7d26aceSJustin Bognerbb: 361e7d26aceSJustin Bogner %and.i = and i32 %v0, 3 362e7d26aceSJustin Bogner %cmp.i = icmp eq i32 %and.i, 0 363e7d26aceSJustin Bogner %and.i.1 = and i32 %v1, 3 364e7d26aceSJustin Bogner %cmp.i.1 = icmp eq i32 %and.i.1, 0 365e7d26aceSJustin Bogner call void @llvm.assume(i1 %cmp.i) 366e7d26aceSJustin Bogner call void @llvm.assume(i1 %cmp.i.1) 367e7d26aceSJustin Bogner %tmp = add nsw i32 %v0, 3 368e7d26aceSJustin Bogner %tmp1 = add i32 %v1, %tmp 369e7d26aceSJustin Bogner %tmp2 = sext i32 %tmp1 to i64 370*ba1759c4SNikita Popov %tmp3 = getelementptr inbounds i8, ptr %src, i64 %tmp2 371*ba1759c4SNikita Popov %tmp4 = load i8, ptr %tmp3, align 1 372e7d26aceSJustin Bogner %tmp5 = add i32 %v1, %v0 373e7d26aceSJustin Bogner %tmp6 = sext i32 %tmp5 to i64 374*ba1759c4SNikita Popov %tmp7 = getelementptr inbounds i8, ptr %src, i64 %tmp6 375*ba1759c4SNikita Popov %tmp8 = load i8, ptr %tmp7, align 1 376e7d26aceSJustin Bogner %tmp9 = add nsw i32 %v0, 1 377e7d26aceSJustin Bogner %tmp10 = add i32 %v1, %tmp9 378e7d26aceSJustin Bogner %tmp11 = sext i32 %tmp10 to i64 379*ba1759c4SNikita Popov %tmp12 = getelementptr inbounds i8, ptr %src, i64 %tmp11 380*ba1759c4SNikita Popov %tmp13 = load i8, ptr %tmp12, align 1 381e7d26aceSJustin Bogner %tmp14 = add nsw i32 %v0, 2 382e7d26aceSJustin Bogner %tmp15 = add i32 %v1, %tmp14 383e7d26aceSJustin Bogner %tmp16 = sext i32 %tmp15 to i64 384*ba1759c4SNikita Popov %tmp17 = getelementptr inbounds i8, ptr %src, i64 %tmp16 385*ba1759c4SNikita Popov %tmp18 = load i8, ptr %tmp17, align 1 386e7d26aceSJustin Bogner %tmp19 = insertelement <4 x i8> undef, i8 %tmp4, i32 0 387e7d26aceSJustin Bogner %tmp20 = insertelement <4 x i8> %tmp19, i8 %tmp8, i32 1 388e7d26aceSJustin Bogner %tmp21 = insertelement <4 x i8> %tmp20, i8 %tmp13, i32 2 389e7d26aceSJustin Bogner %tmp22 = insertelement <4 x i8> %tmp21, i8 %tmp18, i32 3 390*ba1759c4SNikita Popov store <4 x i8> %tmp22, ptr %dst 391e7d26aceSJustin Bogner ret void 392e7d26aceSJustin Bogner} 393e7d26aceSJustin Bogner 394e7d26aceSJustin Bogner; Address computations are partly separated by control flow and with llvm.assume placed 395e7d26aceSJustin Bogner; in the second basic block 396e7d26aceSJustin Bogner 397*ba1759c4SNikita Popovdefine void @ld_v2i8_add_different_contexts(i32 %ind0, i32 %ind1, ptr %src, ptr %dst) { 398e7d26aceSJustin Bogner; CHECK-LABEL: @ld_v2i8_add_different_contexts( 399e7d26aceSJustin Bogner; CHECK-NEXT: bb: 400e7d26aceSJustin Bogner; CHECK-NEXT: [[V0:%.*]] = mul i32 [[IND0:%.*]], 4 401e7d26aceSJustin Bogner; CHECK-NEXT: [[V1:%.*]] = mul i32 [[IND1:%.*]], 3 402e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[V1]], [[V0]] 403e7d26aceSJustin Bogner; CHECK-NEXT: [[BIT_COND:%.*]] = icmp eq i32 [[V1]], 0 404e7d26aceSJustin Bogner; CHECK-NEXT: br i1 [[BIT_COND]], label [[BB_LOADS:%.*]], label [[BB_SKIP:%.*]] 405e7d26aceSJustin Bogner; CHECK: bb.loads: 406e7d26aceSJustin Bogner; CHECK-NEXT: call void @llvm.assume(i1 [[BIT_COND]]) 407e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 408*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP6]] 409*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr [[TMP7]], align 1 410e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP81:%.*]] = extractelement <2 x i8> [[TMP1]], i32 0 411e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i8> [[TMP1]], i32 1 412e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i8> undef, i8 [[TMP42]], i32 0 413e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x i8> [[TMP19]], i8 [[TMP81]], i32 1 414*ba1759c4SNikita Popov; CHECK-NEXT: store <2 x i8> [[TMP20]], ptr [[DST:%.*]] 415e7d26aceSJustin Bogner; CHECK-NEXT: br label [[BB_SKIP]] 416e7d26aceSJustin Bogner; CHECK: bb.skip: 417e7d26aceSJustin Bogner; CHECK-NEXT: ret void 418e7d26aceSJustin Bogner; 419e7d26aceSJustin Bognerbb: 420e7d26aceSJustin Bogner %v0 = mul i32 %ind0, 4 421e7d26aceSJustin Bogner %v1 = mul i32 %ind1, 3 422e7d26aceSJustin Bogner %tmp5 = add i32 %v1, %v0 423e7d26aceSJustin Bogner %bit_cond = icmp eq i32 %v1, 0 424e7d26aceSJustin Bogner br i1 %bit_cond, label %bb.loads, label %bb.skip 425e7d26aceSJustin Bogner 426e7d26aceSJustin Bognerbb.loads: 427e7d26aceSJustin Bogner call void @llvm.assume(i1 %bit_cond) 428e7d26aceSJustin Bogner %tmp = add nsw i32 %v0, 1 429e7d26aceSJustin Bogner %tmp1 = add i32 %v1, %tmp 430e7d26aceSJustin Bogner %tmp2 = sext i32 %tmp1 to i64 431*ba1759c4SNikita Popov %tmp3 = getelementptr inbounds i8, ptr %src, i64 %tmp2 432*ba1759c4SNikita Popov %tmp4 = load i8, ptr %tmp3, align 1 433e7d26aceSJustin Bogner %tmp6 = sext i32 %tmp5 to i64 434*ba1759c4SNikita Popov %tmp7 = getelementptr inbounds i8, ptr %src, i64 %tmp6 435*ba1759c4SNikita Popov %tmp8 = load i8, ptr %tmp7, align 1 436e7d26aceSJustin Bogner %tmp19 = insertelement <2 x i8> undef, i8 %tmp4, i32 0 437e7d26aceSJustin Bogner %tmp20 = insertelement <2 x i8> %tmp19, i8 %tmp8, i32 1 438*ba1759c4SNikita Popov store <2 x i8> %tmp20, ptr %dst 439e7d26aceSJustin Bogner br label %bb.skip 440e7d26aceSJustin Bogner 441e7d26aceSJustin Bognerbb.skip: 442e7d26aceSJustin Bogner ret void 443e7d26aceSJustin Bogner} 444e7d26aceSJustin Bogner 445e7d26aceSJustin Bogner; Same as ld_v2i8_add_different_contexts but with llvm.assume placed between loads 446e7d26aceSJustin Bogner 447*ba1759c4SNikita Popovdefine void @ld_v2i8_add_different_contexts1(i32 %ind0, i32 %ind1, ptr %src, ptr %dst) { 448e7d26aceSJustin Bogner; CHECK-LABEL: @ld_v2i8_add_different_contexts1( 449e7d26aceSJustin Bogner; CHECK-NEXT: bb: 450e7d26aceSJustin Bogner; CHECK-NEXT: [[V0:%.*]] = mul i32 [[IND0:%.*]], 4 451e7d26aceSJustin Bogner; CHECK-NEXT: [[V1:%.*]] = mul i32 [[IND1:%.*]], 3 452e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[V1]], [[V0]] 453e7d26aceSJustin Bogner; CHECK-NEXT: [[BIT_COND:%.*]] = icmp eq i32 [[V1]], 0 454e7d26aceSJustin Bogner; CHECK-NEXT: br i1 [[BIT_COND]], label [[BB_LOADS:%.*]], label [[BB_SKIP:%.*]] 455e7d26aceSJustin Bogner; CHECK: bb.loads: 456e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 457*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP6]] 458*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr [[TMP7]], align 1 459e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP81:%.*]] = extractelement <2 x i8> [[TMP1]], i32 0 460e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i8> [[TMP1]], i32 1 461e7d26aceSJustin Bogner; CHECK-NEXT: call void @llvm.assume(i1 [[BIT_COND]]) 462e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i8> undef, i8 [[TMP42]], i32 0 463e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x i8> [[TMP19]], i8 [[TMP81]], i32 1 464*ba1759c4SNikita Popov; CHECK-NEXT: store <2 x i8> [[TMP20]], ptr [[DST:%.*]] 465e7d26aceSJustin Bogner; CHECK-NEXT: br label [[BB_SKIP]] 466e7d26aceSJustin Bogner; CHECK: bb.skip: 467e7d26aceSJustin Bogner; CHECK-NEXT: ret void 468e7d26aceSJustin Bogner; 469e7d26aceSJustin Bognerbb: 470e7d26aceSJustin Bogner %v0 = mul i32 %ind0, 4 471e7d26aceSJustin Bogner %v1 = mul i32 %ind1, 3 472e7d26aceSJustin Bogner %tmp5 = add i32 %v1, %v0 473e7d26aceSJustin Bogner %bit_cond = icmp eq i32 %v1, 0 474e7d26aceSJustin Bogner br i1 %bit_cond, label %bb.loads, label %bb.skip 475e7d26aceSJustin Bogner 476e7d26aceSJustin Bognerbb.loads: 477e7d26aceSJustin Bogner %tmp6 = sext i32 %tmp5 to i64 478*ba1759c4SNikita Popov %tmp7 = getelementptr inbounds i8, ptr %src, i64 %tmp6 479*ba1759c4SNikita Popov %tmp8 = load i8, ptr %tmp7, align 1 480e7d26aceSJustin Bogner call void @llvm.assume(i1 %bit_cond) 481e7d26aceSJustin Bogner %tmp = add nsw i32 %v0, 1 482e7d26aceSJustin Bogner %tmp1 = add i32 %v1, %tmp 483e7d26aceSJustin Bogner %tmp2 = sext i32 %tmp1 to i64 484*ba1759c4SNikita Popov %tmp3 = getelementptr inbounds i8, ptr %src, i64 %tmp2 485*ba1759c4SNikita Popov %tmp4 = load i8, ptr %tmp3, align 1 486e7d26aceSJustin Bogner %tmp19 = insertelement <2 x i8> undef, i8 %tmp4, i32 0 487e7d26aceSJustin Bogner %tmp20 = insertelement <2 x i8> %tmp19, i8 %tmp8, i32 1 488*ba1759c4SNikita Popov store <2 x i8> %tmp20, ptr %dst 489e7d26aceSJustin Bogner br label %bb.skip 490e7d26aceSJustin Bogner 491e7d26aceSJustin Bognerbb.skip: 492e7d26aceSJustin Bogner ret void 493e7d26aceSJustin Bogner} 494e7d26aceSJustin Bogner 495e7d26aceSJustin Bogner; llvm.assume is placed between loads in a single basic block 496e7d26aceSJustin Bogner 497*ba1759c4SNikita Popovdefine void @ld_v2i8_add_context(i32 %ind0, i32 %ind1, ptr %src, ptr %dst) { 498e7d26aceSJustin Bogner; CHECK-LABEL: @ld_v2i8_add_context( 499e7d26aceSJustin Bogner; CHECK-NEXT: bb: 500e7d26aceSJustin Bogner; CHECK-NEXT: [[V0:%.*]] = mul i32 [[IND0:%.*]], 4 501e7d26aceSJustin Bogner; CHECK-NEXT: [[V1:%.*]] = mul i32 [[IND1:%.*]], 3 502e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[V1]], [[V0]] 503e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 504*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP6]] 505*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr [[TMP7]], align 1 506e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP81:%.*]] = extractelement <2 x i8> [[TMP1]], i32 0 507e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i8> [[TMP1]], i32 1 508e7d26aceSJustin Bogner; CHECK-NEXT: [[BIT_COND:%.*]] = icmp eq i32 [[TMP5]], 0 509e7d26aceSJustin Bogner; CHECK-NEXT: call void @llvm.assume(i1 [[BIT_COND]]) 510e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i8> undef, i8 [[TMP42]], i32 0 511e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x i8> [[TMP19]], i8 [[TMP81]], i32 1 512*ba1759c4SNikita Popov; CHECK-NEXT: store <2 x i8> [[TMP20]], ptr [[DST:%.*]] 513e7d26aceSJustin Bogner; CHECK-NEXT: ret void 514e7d26aceSJustin Bogner; 515e7d26aceSJustin Bognerbb: 516e7d26aceSJustin Bogner %v0 = mul i32 %ind0, 4 517e7d26aceSJustin Bogner %v1 = mul i32 %ind1, 3 518e7d26aceSJustin Bogner %tmp5 = add i32 %v1, %v0 519e7d26aceSJustin Bogner %tmp6 = sext i32 %tmp5 to i64 520*ba1759c4SNikita Popov %tmp7 = getelementptr inbounds i8, ptr %src, i64 %tmp6 521*ba1759c4SNikita Popov %tmp8 = load i8, ptr %tmp7, align 1 522e7d26aceSJustin Bogner %bit_cond = icmp eq i32 %tmp5, 0 523e7d26aceSJustin Bogner call void @llvm.assume(i1 %bit_cond) 524e7d26aceSJustin Bogner %tmp = add nsw i32 %v0, 1 525e7d26aceSJustin Bogner %tmp1 = add i32 %v1, %tmp 526e7d26aceSJustin Bogner %tmp2 = sext i32 %tmp1 to i64 527*ba1759c4SNikita Popov %tmp3 = getelementptr inbounds i8, ptr %src, i64 %tmp2 528*ba1759c4SNikita Popov %tmp4 = load i8, ptr %tmp3, align 1 529e7d26aceSJustin Bogner %tmp19 = insertelement <2 x i8> undef, i8 %tmp4, i32 0 530e7d26aceSJustin Bogner %tmp20 = insertelement <2 x i8> %tmp19, i8 %tmp8, i32 1 531*ba1759c4SNikita Popov store <2 x i8> %tmp20, ptr %dst 532e7d26aceSJustin Bogner ret void 533e7d26aceSJustin Bogner} 534e7d26aceSJustin Bogner 535e7d26aceSJustin Bogner; Placing llvm.assume after all the loads and stores in the basic block still works 536e7d26aceSJustin Bogner 537*ba1759c4SNikita Popovdefine void @ld_v2i8_add_context1(i32 %ind0, i32 %ind1, ptr %src, ptr %dst) { 538e7d26aceSJustin Bogner; CHECK-LABEL: @ld_v2i8_add_context1( 539e7d26aceSJustin Bogner; CHECK-NEXT: bb: 540e7d26aceSJustin Bogner; CHECK-NEXT: [[V0:%.*]] = mul i32 [[IND0:%.*]], 4 541e7d26aceSJustin Bogner; CHECK-NEXT: [[V1:%.*]] = mul i32 [[IND1:%.*]], 3 542e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[V1]], [[V0]] 543e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 544*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP6]] 545*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr [[TMP7]], align 1 546e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP81:%.*]] = extractelement <2 x i8> [[TMP1]], i32 0 547e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i8> [[TMP1]], i32 1 548e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i8> undef, i8 [[TMP42]], i32 0 549e7d26aceSJustin Bogner; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x i8> [[TMP19]], i8 [[TMP81]], i32 1 550*ba1759c4SNikita Popov; CHECK-NEXT: store <2 x i8> [[TMP20]], ptr [[DST:%.*]] 551e7d26aceSJustin Bogner; CHECK-NEXT: [[BIT_COND:%.*]] = icmp eq i32 [[TMP5]], 0 552e7d26aceSJustin Bogner; CHECK-NEXT: call void @llvm.assume(i1 [[BIT_COND]]) 553e7d26aceSJustin Bogner; CHECK-NEXT: ret void 554e7d26aceSJustin Bogner; 555e7d26aceSJustin Bognerbb: 556e7d26aceSJustin Bogner %v0 = mul i32 %ind0, 4 557e7d26aceSJustin Bogner %v1 = mul i32 %ind1, 3 558e7d26aceSJustin Bogner %tmp5 = add i32 %v1, %v0 559e7d26aceSJustin Bogner %tmp6 = sext i32 %tmp5 to i64 560*ba1759c4SNikita Popov %tmp7 = getelementptr inbounds i8, ptr %src, i64 %tmp6 561*ba1759c4SNikita Popov %tmp8 = load i8, ptr %tmp7, align 1 562e7d26aceSJustin Bogner %tmp = add nsw i32 %v0, 1 563e7d26aceSJustin Bogner %tmp1 = add i32 %v1, %tmp 564e7d26aceSJustin Bogner %tmp2 = sext i32 %tmp1 to i64 565*ba1759c4SNikita Popov %tmp3 = getelementptr inbounds i8, ptr %src, i64 %tmp2 566*ba1759c4SNikita Popov %tmp4 = load i8, ptr %tmp3, align 1 567e7d26aceSJustin Bogner %tmp19 = insertelement <2 x i8> undef, i8 %tmp4, i32 0 568e7d26aceSJustin Bogner %tmp20 = insertelement <2 x i8> %tmp19, i8 %tmp8, i32 1 569*ba1759c4SNikita Popov store <2 x i8> %tmp20, ptr %dst 570e7d26aceSJustin Bogner %bit_cond = icmp eq i32 %tmp5, 0 571e7d26aceSJustin Bogner call void @llvm.assume(i1 %bit_cond) 572e7d26aceSJustin Bogner ret void 573e7d26aceSJustin Bogner} 574e7d26aceSJustin Bogner 57563081dc6SVolkan Keles; Make sure we don't vectorize the loads below because the source of 57695427210SJustin Bogner; sext instructions doesn't have the nsw flag or known bits allowing 57795427210SJustin Bogner; to apply the vectorization. 57863081dc6SVolkan Keles 579*ba1759c4SNikita Popovdefine void @ld_v4i8_add_not_safe(i32 %v0, i32 %v1, ptr %src, ptr %dst) { 58063081dc6SVolkan Keles; CHECK-LABEL: @ld_v4i8_add_not_safe( 58163081dc6SVolkan Keles; CHECK-NEXT: bb: 58263081dc6SVolkan Keles; CHECK-NEXT: [[TMP:%.*]] = add nsw i32 [[V0:%.*]], -1 58363081dc6SVolkan Keles; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[V1:%.*]], [[TMP]] 58463081dc6SVolkan Keles; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 585*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP2]] 586*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1 58763081dc6SVolkan Keles; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[V1]], [[V0]] 58863081dc6SVolkan Keles; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 589*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[TMP6]] 590*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 1 59163081dc6SVolkan Keles; CHECK-NEXT: [[TMP9:%.*]] = add nsw i32 [[V0]], 1 59263081dc6SVolkan Keles; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[V1]], [[TMP9]] 59363081dc6SVolkan Keles; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64 594*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[TMP11]] 595*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP12]], align 1 59663081dc6SVolkan Keles; CHECK-NEXT: [[TMP14:%.*]] = add nsw i32 [[V0]], 2 59763081dc6SVolkan Keles; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[V1]], [[TMP14]] 59863081dc6SVolkan Keles; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[TMP15]] to i64 599*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[TMP16]] 600*ba1759c4SNikita Popov; CHECK-NEXT: [[TMP18:%.*]] = load i8, ptr [[TMP17]], align 1 60163081dc6SVolkan Keles; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i8> undef, i8 [[TMP4]], i32 0 60263081dc6SVolkan Keles; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i8> [[TMP19]], i8 [[TMP8]], i32 1 60363081dc6SVolkan Keles; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP13]], i32 2 60463081dc6SVolkan Keles; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i8> [[TMP21]], i8 [[TMP18]], i32 3 605*ba1759c4SNikita Popov; CHECK-NEXT: store <4 x i8> [[TMP22]], ptr [[DST:%.*]] 60663081dc6SVolkan Keles; CHECK-NEXT: ret void 60763081dc6SVolkan Keles; 60863081dc6SVolkan Kelesbb: 60963081dc6SVolkan Keles %tmp = add nsw i32 %v0, -1 61063081dc6SVolkan Keles %tmp1 = add i32 %v1, %tmp 61163081dc6SVolkan Keles %tmp2 = sext i32 %tmp1 to i64 612*ba1759c4SNikita Popov %tmp3 = getelementptr inbounds i8, ptr %src, i64 %tmp2 613*ba1759c4SNikita Popov %tmp4 = load i8, ptr %tmp3, align 1 61463081dc6SVolkan Keles %tmp5 = add i32 %v1, %v0 61563081dc6SVolkan Keles %tmp6 = sext i32 %tmp5 to i64 616*ba1759c4SNikita Popov %tmp7 = getelementptr inbounds i8, ptr %src, i64 %tmp6 617*ba1759c4SNikita Popov %tmp8 = load i8, ptr %tmp7, align 1 61863081dc6SVolkan Keles %tmp9 = add nsw i32 %v0, 1 61963081dc6SVolkan Keles %tmp10 = add i32 %v1, %tmp9 62063081dc6SVolkan Keles %tmp11 = sext i32 %tmp10 to i64 621*ba1759c4SNikita Popov %tmp12 = getelementptr inbounds i8, ptr %src, i64 %tmp11 622*ba1759c4SNikita Popov %tmp13 = load i8, ptr %tmp12, align 1 62363081dc6SVolkan Keles %tmp14 = add nsw i32 %v0, 2 62463081dc6SVolkan Keles %tmp15 = add i32 %v1, %tmp14 62563081dc6SVolkan Keles %tmp16 = sext i32 %tmp15 to i64 626*ba1759c4SNikita Popov %tmp17 = getelementptr inbounds i8, ptr %src, i64 %tmp16 627*ba1759c4SNikita Popov %tmp18 = load i8, ptr %tmp17, align 1 62863081dc6SVolkan Keles %tmp19 = insertelement <4 x i8> undef, i8 %tmp4, i32 0 62963081dc6SVolkan Keles %tmp20 = insertelement <4 x i8> %tmp19, i8 %tmp8, i32 1 63063081dc6SVolkan Keles %tmp21 = insertelement <4 x i8> %tmp20, i8 %tmp13, i32 2 63163081dc6SVolkan Keles %tmp22 = insertelement <4 x i8> %tmp21, i8 %tmp18, i32 3 632*ba1759c4SNikita Popov store <4 x i8> %tmp22, ptr %dst 63363081dc6SVolkan Keles ret void 63463081dc6SVolkan Keles} 635