xref: /llvm-project/llvm/test/Transforms/LoadStoreVectorizer/X86/vectorize-i8-nested-add.ll (revision 2be0abb7fe72ed4537b3eabcd3102d48ea845717)
163081dc6SVolkan Keles; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2d6e7e477SRoman Lebedev; RUN: opt -o - -S -passes=load-store-vectorizer,dce %s | FileCheck %s
363081dc6SVolkan Keles
463081dc6SVolkan Keles; Make sure LoadStoreVectorizer vectorizes the loads below.
563081dc6SVolkan Keles; In order to prove that the vectorization is safe, it tries to
663081dc6SVolkan Keles; match nested adds and find an expression that adds a constant
763081dc6SVolkan Keles; value to an existing index and the result doesn't overflow.
863081dc6SVolkan Keles
963081dc6SVolkan Kelestarget triple = "x86_64--"
1063081dc6SVolkan Keles
11*ba1759c4SNikita Popovdefine void @ld_v4i8_add_nsw(i32 %v0, i32 %v1, ptr %src, ptr %dst) {
1263081dc6SVolkan Keles; CHECK-LABEL: @ld_v4i8_add_nsw(
1363081dc6SVolkan Keles; CHECK-NEXT:  bb:
1463081dc6SVolkan Keles; CHECK-NEXT:    [[TMP:%.*]] = add nsw i32 [[V0:%.*]], -1
1563081dc6SVolkan Keles; CHECK-NEXT:    [[TMP1:%.*]] = add nsw i32 [[V1:%.*]], [[TMP]]
1663081dc6SVolkan Keles; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
17*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP2]]
18*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1
1963081dc6SVolkan Keles; CHECK-NEXT:    [[TMP41:%.*]] = extractelement <4 x i8> [[TMP1]], i32 0
2063081dc6SVolkan Keles; CHECK-NEXT:    [[TMP82:%.*]] = extractelement <4 x i8> [[TMP1]], i32 1
2163081dc6SVolkan Keles; CHECK-NEXT:    [[TMP133:%.*]] = extractelement <4 x i8> [[TMP1]], i32 2
2263081dc6SVolkan Keles; CHECK-NEXT:    [[TMP184:%.*]] = extractelement <4 x i8> [[TMP1]], i32 3
2363081dc6SVolkan Keles; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <4 x i8> undef, i8 [[TMP41]], i32 0
2463081dc6SVolkan Keles; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <4 x i8> [[TMP19]], i8 [[TMP82]], i32 1
2563081dc6SVolkan Keles; CHECK-NEXT:    [[TMP21:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP133]], i32 2
2663081dc6SVolkan Keles; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i8> [[TMP21]], i8 [[TMP184]], i32 3
27*ba1759c4SNikita Popov; CHECK-NEXT:    store <4 x i8> [[TMP22]], ptr [[DST:%.*]]
2863081dc6SVolkan Keles; CHECK-NEXT:    ret void
2963081dc6SVolkan Keles;
3063081dc6SVolkan Kelesbb:
3163081dc6SVolkan Keles  %tmp = add nsw i32 %v0, -1
3263081dc6SVolkan Keles  %tmp1 = add nsw i32 %v1, %tmp
3363081dc6SVolkan Keles  %tmp2 = sext i32 %tmp1 to i64
34*ba1759c4SNikita Popov  %tmp3 = getelementptr inbounds i8, ptr %src, i64 %tmp2
35*ba1759c4SNikita Popov  %tmp4 = load i8, ptr %tmp3, align 1
3663081dc6SVolkan Keles  %tmp5 = add nsw i32 %v1, %v0
3763081dc6SVolkan Keles  %tmp6 = sext i32 %tmp5 to i64
38*ba1759c4SNikita Popov  %tmp7 = getelementptr inbounds i8, ptr %src, i64 %tmp6
39*ba1759c4SNikita Popov  %tmp8 = load i8, ptr %tmp7, align 1
4063081dc6SVolkan Keles  %tmp9 = add nsw i32 %v0, 1
4163081dc6SVolkan Keles  %tmp10 = add nsw i32 %v1, %tmp9
4263081dc6SVolkan Keles  %tmp11 = sext i32 %tmp10 to i64
43*ba1759c4SNikita Popov  %tmp12 = getelementptr inbounds i8, ptr %src, i64 %tmp11
44*ba1759c4SNikita Popov  %tmp13 = load i8, ptr %tmp12, align 1
4563081dc6SVolkan Keles  %tmp14 = add nsw i32 %v0, 2
4663081dc6SVolkan Keles  %tmp15 = add nsw i32 %v1, %tmp14
4763081dc6SVolkan Keles  %tmp16 = sext i32 %tmp15 to i64
48*ba1759c4SNikita Popov  %tmp17 = getelementptr inbounds i8, ptr %src, i64 %tmp16
49*ba1759c4SNikita Popov  %tmp18 = load i8, ptr %tmp17, align 1
5063081dc6SVolkan Keles  %tmp19 = insertelement <4 x i8> undef, i8 %tmp4, i32 0
5163081dc6SVolkan Keles  %tmp20 = insertelement <4 x i8> %tmp19, i8 %tmp8, i32 1
5263081dc6SVolkan Keles  %tmp21 = insertelement <4 x i8> %tmp20, i8 %tmp13, i32 2
5363081dc6SVolkan Keles  %tmp22 = insertelement <4 x i8> %tmp21, i8 %tmp18, i32 3
54*ba1759c4SNikita Popov  store <4 x i8> %tmp22, ptr %dst
5563081dc6SVolkan Keles  ret void
5663081dc6SVolkan Keles}
5763081dc6SVolkan Keles
5811996586SSlava Nikolaev; Apply different operand orders for the nested add sequences
59*ba1759c4SNikita Popovdefine void @ld_v4i8_add_nsw_operand_orders(i32 %v0, i32 %v1, ptr %src, ptr %dst) {
6011996586SSlava Nikolaev; CHECK-LABEL: @ld_v4i8_add_nsw_operand_orders(
6111996586SSlava Nikolaev; CHECK-NEXT:  bb:
6211996586SSlava Nikolaev; CHECK-NEXT:    [[TMP:%.*]] = add nsw i32 [[V0:%.*]], -1
6311996586SSlava Nikolaev; CHECK-NEXT:    [[TMP1:%.*]] = add nsw i32 [[V1:%.*]], [[TMP]]
6411996586SSlava Nikolaev; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
65*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP2]]
66*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1
6711996586SSlava Nikolaev; CHECK-NEXT:    [[TMP41:%.*]] = extractelement <4 x i8> [[TMP1]], i32 0
6811996586SSlava Nikolaev; CHECK-NEXT:    [[TMP82:%.*]] = extractelement <4 x i8> [[TMP1]], i32 1
6911996586SSlava Nikolaev; CHECK-NEXT:    [[TMP133:%.*]] = extractelement <4 x i8> [[TMP1]], i32 2
7011996586SSlava Nikolaev; CHECK-NEXT:    [[TMP184:%.*]] = extractelement <4 x i8> [[TMP1]], i32 3
7111996586SSlava Nikolaev; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <4 x i8> undef, i8 [[TMP41]], i32 0
7211996586SSlava Nikolaev; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <4 x i8> [[TMP19]], i8 [[TMP82]], i32 1
7311996586SSlava Nikolaev; CHECK-NEXT:    [[TMP21:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP133]], i32 2
7411996586SSlava Nikolaev; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i8> [[TMP21]], i8 [[TMP184]], i32 3
75*ba1759c4SNikita Popov; CHECK-NEXT:    store <4 x i8> [[TMP22]], ptr [[DST:%.*]]
7611996586SSlava Nikolaev; CHECK-NEXT:    ret void
7711996586SSlava Nikolaev;
7811996586SSlava Nikolaevbb:
7911996586SSlava Nikolaev  %tmp = add nsw i32 %v0, -1
8011996586SSlava Nikolaev  %tmp1 = add nsw i32 %v1, %tmp
8111996586SSlava Nikolaev  %tmp2 = sext i32 %tmp1 to i64
82*ba1759c4SNikita Popov  %tmp3 = getelementptr inbounds i8, ptr %src, i64 %tmp2
83*ba1759c4SNikita Popov  %tmp4 = load i8, ptr %tmp3, align 1
8411996586SSlava Nikolaev  %tmp5 = add nsw i32 %v0, %v1
8511996586SSlava Nikolaev  %tmp6 = sext i32 %tmp5 to i64
86*ba1759c4SNikita Popov  %tmp7 = getelementptr inbounds i8, ptr %src, i64 %tmp6
87*ba1759c4SNikita Popov  %tmp8 = load i8, ptr %tmp7, align 1
8811996586SSlava Nikolaev  %tmp9 = add nsw i32 %v0, 1
8911996586SSlava Nikolaev  %tmp10 = add nsw i32 %tmp9, %v1
9011996586SSlava Nikolaev  %tmp11 = sext i32 %tmp10 to i64
91*ba1759c4SNikita Popov  %tmp12 = getelementptr inbounds i8, ptr %src, i64 %tmp11
92*ba1759c4SNikita Popov  %tmp13 = load i8, ptr %tmp12, align 1
9311996586SSlava Nikolaev  %tmp14 = add nsw i32 %v0, 2
9411996586SSlava Nikolaev  %tmp15 = add nsw i32 %v1, %tmp14
9511996586SSlava Nikolaev  %tmp16 = sext i32 %tmp15 to i64
96*ba1759c4SNikita Popov  %tmp17 = getelementptr inbounds i8, ptr %src, i64 %tmp16
97*ba1759c4SNikita Popov  %tmp18 = load i8, ptr %tmp17, align 1
9811996586SSlava Nikolaev  %tmp19 = insertelement <4 x i8> undef, i8 %tmp4, i32 0
9911996586SSlava Nikolaev  %tmp20 = insertelement <4 x i8> %tmp19, i8 %tmp8, i32 1
10011996586SSlava Nikolaev  %tmp21 = insertelement <4 x i8> %tmp20, i8 %tmp13, i32 2
10111996586SSlava Nikolaev  %tmp22 = insertelement <4 x i8> %tmp21, i8 %tmp18, i32 3
102*ba1759c4SNikita Popov  store <4 x i8> %tmp22, ptr %dst
10311996586SSlava Nikolaev  ret void
10411996586SSlava Nikolaev}
10511996586SSlava Nikolaev
106*ba1759c4SNikita Popovdefine void @ld_v4i8_add_known_bits(i32 %ind0, i32 %ind1, ptr %src, ptr %dst) {
10795427210SJustin Bogner; CHECK-LABEL: @ld_v4i8_add_known_bits(
10895427210SJustin Bogner; CHECK-NEXT:  bb:
10995427210SJustin Bogner; CHECK-NEXT:    [[V0:%.*]] = mul i32 [[IND0:%.*]], 4
11095427210SJustin Bogner; CHECK-NEXT:    [[V1:%.*]] = mul i32 [[IND1:%.*]], 4
11195427210SJustin Bogner; CHECK-NEXT:    [[TMP:%.*]] = add i32 [[V0]], -1
11295427210SJustin Bogner; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[V1]], [[TMP]]
11395427210SJustin Bogner; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
114*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP2]]
115*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
11695427210SJustin Bogner; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[V1]], [[V0]]
11795427210SJustin Bogner; CHECK-NEXT:    [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
118*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[TMP6]]
119*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP1:%.*]] = load <3 x i8>, ptr [[TMP7]], align 1
12095427210SJustin Bogner; CHECK-NEXT:    [[TMP81:%.*]] = extractelement <3 x i8> [[TMP1]], i32 0
12195427210SJustin Bogner; CHECK-NEXT:    [[TMP132:%.*]] = extractelement <3 x i8> [[TMP1]], i32 1
12295427210SJustin Bogner; CHECK-NEXT:    [[TMP183:%.*]] = extractelement <3 x i8> [[TMP1]], i32 2
12395427210SJustin Bogner; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <4 x i8> undef, i8 [[TMP4]], i32 0
12495427210SJustin Bogner; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <4 x i8> [[TMP19]], i8 [[TMP81]], i32 1
12595427210SJustin Bogner; CHECK-NEXT:    [[TMP21:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP132]], i32 2
12695427210SJustin Bogner; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i8> [[TMP21]], i8 [[TMP183]], i32 3
127*ba1759c4SNikita Popov; CHECK-NEXT:    store <4 x i8> [[TMP22]], ptr [[DST:%.*]]
12895427210SJustin Bogner; CHECK-NEXT:    ret void
12995427210SJustin Bogner;
13095427210SJustin Bognerbb:
13195427210SJustin Bogner  %v0 = mul i32 %ind0, 4
13295427210SJustin Bogner  %v1 = mul i32 %ind1, 4
13395427210SJustin Bogner  %tmp = add i32 %v0, -1
13495427210SJustin Bogner  %tmp1 = add i32 %v1, %tmp
13595427210SJustin Bogner  %tmp2 = sext i32 %tmp1 to i64
136*ba1759c4SNikita Popov  %tmp3 = getelementptr inbounds i8, ptr %src, i64 %tmp2
137*ba1759c4SNikita Popov  %tmp4 = load i8, ptr %tmp3, align 1
13895427210SJustin Bogner  %tmp5 = add i32 %v1, %v0
13995427210SJustin Bogner  %tmp6 = sext i32 %tmp5 to i64
140*ba1759c4SNikita Popov  %tmp7 = getelementptr inbounds i8, ptr %src, i64 %tmp6
141*ba1759c4SNikita Popov  %tmp8 = load i8, ptr %tmp7, align 1
14295427210SJustin Bogner  %tmp9 = add i32 %v0, 1
14395427210SJustin Bogner  %tmp10 = add i32 %v1, %tmp9
14495427210SJustin Bogner  %tmp11 = sext i32 %tmp10 to i64
145*ba1759c4SNikita Popov  %tmp12 = getelementptr inbounds i8, ptr %src, i64 %tmp11
146*ba1759c4SNikita Popov  %tmp13 = load i8, ptr %tmp12, align 1
14795427210SJustin Bogner  %tmp14 = add i32 %v0, 2
14895427210SJustin Bogner  %tmp15 = add i32 %v1, %tmp14
14995427210SJustin Bogner  %tmp16 = sext i32 %tmp15 to i64
150*ba1759c4SNikita Popov  %tmp17 = getelementptr inbounds i8, ptr %src, i64 %tmp16
151*ba1759c4SNikita Popov  %tmp18 = load i8, ptr %tmp17, align 1
15295427210SJustin Bogner  %tmp19 = insertelement <4 x i8> undef, i8 %tmp4, i32 0
15395427210SJustin Bogner  %tmp20 = insertelement <4 x i8> %tmp19, i8 %tmp8, i32 1
15495427210SJustin Bogner  %tmp21 = insertelement <4 x i8> %tmp20, i8 %tmp13, i32 2
15595427210SJustin Bogner  %tmp22 = insertelement <4 x i8> %tmp21, i8 %tmp18, i32 3
156*ba1759c4SNikita Popov  store <4 x i8> %tmp22, ptr %dst
15795427210SJustin Bogner  ret void
15895427210SJustin Bogner}
15995427210SJustin Bogner
160*ba1759c4SNikita Popovdefine void @ld_v4i8_add_known_bits1(i32 %ind0, i32 %ind1, ptr %src, ptr %dst) {
16195427210SJustin Bogner; CHECK-LABEL: @ld_v4i8_add_known_bits1(
16295427210SJustin Bogner; CHECK-NEXT:  bb:
16395427210SJustin Bogner; CHECK-NEXT:    [[V0:%.*]] = mul i32 [[IND0:%.*]], 4
16495427210SJustin Bogner; CHECK-NEXT:    [[V1:%.*]] = mul i32 [[IND1:%.*]], 4
16595427210SJustin Bogner; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[V1]], [[V0]]
16695427210SJustin Bogner; CHECK-NEXT:    [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
167*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP6]]
168*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr [[TMP7]], align 1
16995427210SJustin Bogner; CHECK-NEXT:    [[TMP81:%.*]] = extractelement <4 x i8> [[TMP1]], i32 0
17095427210SJustin Bogner; CHECK-NEXT:    [[TMP132:%.*]] = extractelement <4 x i8> [[TMP1]], i32 1
17195427210SJustin Bogner; CHECK-NEXT:    [[TMP183:%.*]] = extractelement <4 x i8> [[TMP1]], i32 2
17295427210SJustin Bogner; CHECK-NEXT:    [[TMP44:%.*]] = extractelement <4 x i8> [[TMP1]], i32 3
17395427210SJustin Bogner; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <4 x i8> undef, i8 [[TMP44]], i32 0
17495427210SJustin Bogner; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <4 x i8> [[TMP19]], i8 [[TMP81]], i32 1
17595427210SJustin Bogner; CHECK-NEXT:    [[TMP21:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP132]], i32 2
17695427210SJustin Bogner; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i8> [[TMP21]], i8 [[TMP183]], i32 3
177*ba1759c4SNikita Popov; CHECK-NEXT:    store <4 x i8> [[TMP22]], ptr [[DST:%.*]]
17895427210SJustin Bogner; CHECK-NEXT:    ret void
17995427210SJustin Bogner;
18095427210SJustin Bognerbb:
18195427210SJustin Bogner  %v0 = mul i32 %ind0, 4
18295427210SJustin Bogner  %v1 = mul i32 %ind1, 4
18395427210SJustin Bogner  %tmp = add i32 %v0, 3
18495427210SJustin Bogner  %tmp1 = add i32 %v1, %tmp
18595427210SJustin Bogner  %tmp2 = sext i32 %tmp1 to i64
186*ba1759c4SNikita Popov  %tmp3 = getelementptr inbounds i8, ptr %src, i64 %tmp2
187*ba1759c4SNikita Popov  %tmp4 = load i8, ptr %tmp3, align 1
18895427210SJustin Bogner  %tmp5 = add i32 %v1, %v0
18995427210SJustin Bogner  %tmp6 = sext i32 %tmp5 to i64
190*ba1759c4SNikita Popov  %tmp7 = getelementptr inbounds i8, ptr %src, i64 %tmp6
191*ba1759c4SNikita Popov  %tmp8 = load i8, ptr %tmp7, align 1
19295427210SJustin Bogner  %tmp9 = add i32 %v0, 1
19395427210SJustin Bogner  %tmp10 = add i32 %v1, %tmp9
19495427210SJustin Bogner  %tmp11 = sext i32 %tmp10 to i64
195*ba1759c4SNikita Popov  %tmp12 = getelementptr inbounds i8, ptr %src, i64 %tmp11
196*ba1759c4SNikita Popov  %tmp13 = load i8, ptr %tmp12, align 1
19795427210SJustin Bogner  %tmp14 = add i32 %v0, 2
19895427210SJustin Bogner  %tmp15 = add i32 %v1, %tmp14
19995427210SJustin Bogner  %tmp16 = sext i32 %tmp15 to i64
200*ba1759c4SNikita Popov  %tmp17 = getelementptr inbounds i8, ptr %src, i64 %tmp16
201*ba1759c4SNikita Popov  %tmp18 = load i8, ptr %tmp17, align 1
20295427210SJustin Bogner  %tmp19 = insertelement <4 x i8> undef, i8 %tmp4, i32 0
20395427210SJustin Bogner  %tmp20 = insertelement <4 x i8> %tmp19, i8 %tmp8, i32 1
20495427210SJustin Bogner  %tmp21 = insertelement <4 x i8> %tmp20, i8 %tmp13, i32 2
20595427210SJustin Bogner  %tmp22 = insertelement <4 x i8> %tmp21, i8 %tmp18, i32 3
206*ba1759c4SNikita Popov  store <4 x i8> %tmp22, ptr %dst
20795427210SJustin Bogner  ret void
20895427210SJustin Bogner}
20995427210SJustin Bogner
210*ba1759c4SNikita Popovdefine void @ld_v4i8_add_known_bits_by_assume(i32 %ind0, i32 %ind1, ptr %src, ptr %dst) {
21195427210SJustin Bogner; CHECK-LABEL: @ld_v4i8_add_known_bits_by_assume(
21295427210SJustin Bogner; CHECK-NEXT:  bb:
21395427210SJustin Bogner; CHECK-NEXT:    [[V0:%.*]] = mul i32 [[IND0:%.*]], 3
21495427210SJustin Bogner; CHECK-NEXT:    [[V1:%.*]] = mul i32 [[IND1:%.*]], 3
21595427210SJustin Bogner; CHECK-NEXT:    [[AND_I:%.*]] = and i32 [[V0]], 3
21695427210SJustin Bogner; CHECK-NEXT:    [[CMP_I:%.*]] = icmp eq i32 [[AND_I]], 0
21795427210SJustin Bogner; CHECK-NEXT:    [[AND_I_1:%.*]] = and i32 [[V1]], 3
21895427210SJustin Bogner; CHECK-NEXT:    [[CMP_I_1:%.*]] = icmp eq i32 [[AND_I_1]], 0
21995427210SJustin Bogner; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP_I]])
22095427210SJustin Bogner; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP_I_1]])
22195427210SJustin Bogner; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[V1]], [[V0]]
22295427210SJustin Bogner; CHECK-NEXT:    [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
223*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP6]]
224*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr [[TMP7]], align 1
22595427210SJustin Bogner; CHECK-NEXT:    [[TMP81:%.*]] = extractelement <4 x i8> [[TMP1]], i32 0
22695427210SJustin Bogner; CHECK-NEXT:    [[TMP132:%.*]] = extractelement <4 x i8> [[TMP1]], i32 1
22795427210SJustin Bogner; CHECK-NEXT:    [[TMP183:%.*]] = extractelement <4 x i8> [[TMP1]], i32 2
22895427210SJustin Bogner; CHECK-NEXT:    [[TMP44:%.*]] = extractelement <4 x i8> [[TMP1]], i32 3
22995427210SJustin Bogner; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <4 x i8> undef, i8 [[TMP44]], i32 0
23095427210SJustin Bogner; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <4 x i8> [[TMP19]], i8 [[TMP81]], i32 1
23195427210SJustin Bogner; CHECK-NEXT:    [[TMP21:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP132]], i32 2
23295427210SJustin Bogner; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i8> [[TMP21]], i8 [[TMP183]], i32 3
233*ba1759c4SNikita Popov; CHECK-NEXT:    store <4 x i8> [[TMP22]], ptr [[DST:%.*]]
23495427210SJustin Bogner; CHECK-NEXT:    ret void
23595427210SJustin Bogner;
23695427210SJustin Bognerbb:
23795427210SJustin Bogner  %v0 = mul i32 %ind0, 3
23895427210SJustin Bogner  %v1 = mul i32 %ind1, 3
23995427210SJustin Bogner  %and.i = and i32 %v0, 3
24095427210SJustin Bogner  %cmp.i = icmp eq i32 %and.i, 0
24195427210SJustin Bogner  %and.i.1 = and i32 %v1, 3
24295427210SJustin Bogner  %cmp.i.1 = icmp eq i32 %and.i.1, 0
24395427210SJustin Bogner  call void @llvm.assume(i1 %cmp.i)
24495427210SJustin Bogner  call void @llvm.assume(i1 %cmp.i.1)
24595427210SJustin Bogner  %tmp = add i32 %v0, 3
24695427210SJustin Bogner  %tmp1 = add i32 %v1, %tmp
24795427210SJustin Bogner  %tmp2 = sext i32 %tmp1 to i64
248*ba1759c4SNikita Popov  %tmp3 = getelementptr inbounds i8, ptr %src, i64 %tmp2
249*ba1759c4SNikita Popov  %tmp4 = load i8, ptr %tmp3, align 1
25095427210SJustin Bogner  %tmp5 = add i32 %v1, %v0
25195427210SJustin Bogner  %tmp6 = sext i32 %tmp5 to i64
252*ba1759c4SNikita Popov  %tmp7 = getelementptr inbounds i8, ptr %src, i64 %tmp6
253*ba1759c4SNikita Popov  %tmp8 = load i8, ptr %tmp7, align 1
25495427210SJustin Bogner  %tmp9 = add i32 %v0, 1
25595427210SJustin Bogner  %tmp10 = add i32 %v1, %tmp9
25695427210SJustin Bogner  %tmp11 = sext i32 %tmp10 to i64
257*ba1759c4SNikita Popov  %tmp12 = getelementptr inbounds i8, ptr %src, i64 %tmp11
258*ba1759c4SNikita Popov  %tmp13 = load i8, ptr %tmp12, align 1
25995427210SJustin Bogner  %tmp14 = add i32 %v0, 2
26095427210SJustin Bogner  %tmp15 = add i32 %v1, %tmp14
26195427210SJustin Bogner  %tmp16 = sext i32 %tmp15 to i64
262*ba1759c4SNikita Popov  %tmp17 = getelementptr inbounds i8, ptr %src, i64 %tmp16
263*ba1759c4SNikita Popov  %tmp18 = load i8, ptr %tmp17, align 1
26495427210SJustin Bogner  %tmp19 = insertelement <4 x i8> undef, i8 %tmp4, i32 0
26595427210SJustin Bogner  %tmp20 = insertelement <4 x i8> %tmp19, i8 %tmp8, i32 1
26695427210SJustin Bogner  %tmp21 = insertelement <4 x i8> %tmp20, i8 %tmp13, i32 2
26795427210SJustin Bogner  %tmp22 = insertelement <4 x i8> %tmp21, i8 %tmp18, i32 3
268*ba1759c4SNikita Popov  store <4 x i8> %tmp22, ptr %dst
26995427210SJustin Bogner  ret void
27095427210SJustin Bogner}
27195427210SJustin Bogner
272e7d26aceSJustin Bognerdeclare void @llvm.assume(i1)
273e7d26aceSJustin Bogner
274*ba1759c4SNikita Popovdefine void @ld_v4i8_add_assume_on_arg(i32 %v0, i32 %v1, ptr %src, ptr %dst) {
275e7d26aceSJustin Bogner; CHECK-LABEL: @ld_v4i8_add_assume_on_arg(
276e7d26aceSJustin Bogner; CHECK-NEXT:  bb:
277e7d26aceSJustin Bogner; CHECK-NEXT:    [[AND_I:%.*]] = and i32 [[V0:%.*]], 3
278e7d26aceSJustin Bogner; CHECK-NEXT:    [[CMP_I:%.*]] = icmp eq i32 [[AND_I]], 0
279e7d26aceSJustin Bogner; CHECK-NEXT:    [[AND_I_1:%.*]] = and i32 [[V1:%.*]], 3
280e7d26aceSJustin Bogner; CHECK-NEXT:    [[CMP_I_1:%.*]] = icmp eq i32 [[AND_I_1]], 0
281e7d26aceSJustin Bogner; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP_I]])
282e7d26aceSJustin Bogner; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP_I_1]])
283e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP:%.*]] = add nsw i32 [[V0]], -1
284e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[V1]], [[TMP]]
285e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
286*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP2]]
287*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
288e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[V1]], [[V0]]
289e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
290*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[TMP6]]
291*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP1:%.*]] = load <3 x i8>, ptr [[TMP7]], align 1
292e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP81:%.*]] = extractelement <3 x i8> [[TMP1]], i32 0
293e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP132:%.*]] = extractelement <3 x i8> [[TMP1]], i32 1
294e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP183:%.*]] = extractelement <3 x i8> [[TMP1]], i32 2
295e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <4 x i8> undef, i8 [[TMP4]], i32 0
296e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <4 x i8> [[TMP19]], i8 [[TMP81]], i32 1
297e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP21:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP132]], i32 2
298e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i8> [[TMP21]], i8 [[TMP183]], i32 3
299*ba1759c4SNikita Popov; CHECK-NEXT:    store <4 x i8> [[TMP22]], ptr [[DST:%.*]]
300e7d26aceSJustin Bogner; CHECK-NEXT:    ret void
301e7d26aceSJustin Bogner;
302e7d26aceSJustin Bognerbb:
303e7d26aceSJustin Bogner  %and.i = and i32 %v0, 3
304e7d26aceSJustin Bogner  %cmp.i = icmp eq i32 %and.i, 0
305e7d26aceSJustin Bogner  %and.i.1 = and i32 %v1, 3
306e7d26aceSJustin Bogner  %cmp.i.1 = icmp eq i32 %and.i.1, 0
307e7d26aceSJustin Bogner  call void @llvm.assume(i1 %cmp.i)
308e7d26aceSJustin Bogner  call void @llvm.assume(i1 %cmp.i.1)
309e7d26aceSJustin Bogner  %tmp = add nsw i32 %v0, -1
310e7d26aceSJustin Bogner  %tmp1 = add i32 %v1, %tmp
311e7d26aceSJustin Bogner  %tmp2 = sext i32 %tmp1 to i64
312*ba1759c4SNikita Popov  %tmp3 = getelementptr inbounds i8, ptr %src, i64 %tmp2
313*ba1759c4SNikita Popov  %tmp4 = load i8, ptr %tmp3, align 1
314e7d26aceSJustin Bogner  %tmp5 = add i32 %v1, %v0
315e7d26aceSJustin Bogner  %tmp6 = sext i32 %tmp5 to i64
316*ba1759c4SNikita Popov  %tmp7 = getelementptr inbounds i8, ptr %src, i64 %tmp6
317*ba1759c4SNikita Popov  %tmp8 = load i8, ptr %tmp7, align 1
318e7d26aceSJustin Bogner  %tmp9 = add nsw i32 %v0, 1
319e7d26aceSJustin Bogner  %tmp10 = add i32 %v1, %tmp9
320e7d26aceSJustin Bogner  %tmp11 = sext i32 %tmp10 to i64
321*ba1759c4SNikita Popov  %tmp12 = getelementptr inbounds i8, ptr %src, i64 %tmp11
322*ba1759c4SNikita Popov  %tmp13 = load i8, ptr %tmp12, align 1
323e7d26aceSJustin Bogner  %tmp14 = add nsw i32 %v0, 2
324e7d26aceSJustin Bogner  %tmp15 = add i32 %v1, %tmp14
325e7d26aceSJustin Bogner  %tmp16 = sext i32 %tmp15 to i64
326*ba1759c4SNikita Popov  %tmp17 = getelementptr inbounds i8, ptr %src, i64 %tmp16
327*ba1759c4SNikita Popov  %tmp18 = load i8, ptr %tmp17, align 1
328e7d26aceSJustin Bogner  %tmp19 = insertelement <4 x i8> undef, i8 %tmp4, i32 0
329e7d26aceSJustin Bogner  %tmp20 = insertelement <4 x i8> %tmp19, i8 %tmp8, i32 1
330e7d26aceSJustin Bogner  %tmp21 = insertelement <4 x i8> %tmp20, i8 %tmp13, i32 2
331e7d26aceSJustin Bogner  %tmp22 = insertelement <4 x i8> %tmp21, i8 %tmp18, i32 3
332*ba1759c4SNikita Popov  store <4 x i8> %tmp22, ptr %dst
333e7d26aceSJustin Bogner  ret void
334e7d26aceSJustin Bogner}
335e7d26aceSJustin Bogner
336*ba1759c4SNikita Popovdefine void @ld_v4i8_add_assume_on_arg1(i32 %v0, i32 %v1, ptr %src, ptr %dst) {
337e7d26aceSJustin Bogner; CHECK-LABEL: @ld_v4i8_add_assume_on_arg1(
338e7d26aceSJustin Bogner; CHECK-NEXT:  bb:
339e7d26aceSJustin Bogner; CHECK-NEXT:    [[AND_I:%.*]] = and i32 [[V0:%.*]], 3
340e7d26aceSJustin Bogner; CHECK-NEXT:    [[CMP_I:%.*]] = icmp eq i32 [[AND_I]], 0
341e7d26aceSJustin Bogner; CHECK-NEXT:    [[AND_I_1:%.*]] = and i32 [[V1:%.*]], 3
342e7d26aceSJustin Bogner; CHECK-NEXT:    [[CMP_I_1:%.*]] = icmp eq i32 [[AND_I_1]], 0
343e7d26aceSJustin Bogner; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP_I]])
344e7d26aceSJustin Bogner; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP_I_1]])
345e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[V1]], [[V0]]
346e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
347*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP6]]
348*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr [[TMP7]], align 1
349e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP81:%.*]] = extractelement <4 x i8> [[TMP1]], i32 0
350e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP132:%.*]] = extractelement <4 x i8> [[TMP1]], i32 1
351e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP183:%.*]] = extractelement <4 x i8> [[TMP1]], i32 2
352e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP44:%.*]] = extractelement <4 x i8> [[TMP1]], i32 3
353e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <4 x i8> undef, i8 [[TMP44]], i32 0
354e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <4 x i8> [[TMP19]], i8 [[TMP81]], i32 1
355e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP21:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP132]], i32 2
356e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i8> [[TMP21]], i8 [[TMP183]], i32 3
357*ba1759c4SNikita Popov; CHECK-NEXT:    store <4 x i8> [[TMP22]], ptr [[DST:%.*]]
358e7d26aceSJustin Bogner; CHECK-NEXT:    ret void
359e7d26aceSJustin Bogner;
360e7d26aceSJustin Bognerbb:
361e7d26aceSJustin Bogner  %and.i = and i32 %v0, 3
362e7d26aceSJustin Bogner  %cmp.i = icmp eq i32 %and.i, 0
363e7d26aceSJustin Bogner  %and.i.1 = and i32 %v1, 3
364e7d26aceSJustin Bogner  %cmp.i.1 = icmp eq i32 %and.i.1, 0
365e7d26aceSJustin Bogner  call void @llvm.assume(i1 %cmp.i)
366e7d26aceSJustin Bogner  call void @llvm.assume(i1 %cmp.i.1)
367e7d26aceSJustin Bogner  %tmp = add nsw i32 %v0, 3
368e7d26aceSJustin Bogner  %tmp1 = add i32 %v1, %tmp
369e7d26aceSJustin Bogner  %tmp2 = sext i32 %tmp1 to i64
370*ba1759c4SNikita Popov  %tmp3 = getelementptr inbounds i8, ptr %src, i64 %tmp2
371*ba1759c4SNikita Popov  %tmp4 = load i8, ptr %tmp3, align 1
372e7d26aceSJustin Bogner  %tmp5 = add i32 %v1, %v0
373e7d26aceSJustin Bogner  %tmp6 = sext i32 %tmp5 to i64
374*ba1759c4SNikita Popov  %tmp7 = getelementptr inbounds i8, ptr %src, i64 %tmp6
375*ba1759c4SNikita Popov  %tmp8 = load i8, ptr %tmp7, align 1
376e7d26aceSJustin Bogner  %tmp9 = add nsw i32 %v0, 1
377e7d26aceSJustin Bogner  %tmp10 = add i32 %v1, %tmp9
378e7d26aceSJustin Bogner  %tmp11 = sext i32 %tmp10 to i64
379*ba1759c4SNikita Popov  %tmp12 = getelementptr inbounds i8, ptr %src, i64 %tmp11
380*ba1759c4SNikita Popov  %tmp13 = load i8, ptr %tmp12, align 1
381e7d26aceSJustin Bogner  %tmp14 = add nsw i32 %v0, 2
382e7d26aceSJustin Bogner  %tmp15 = add i32 %v1, %tmp14
383e7d26aceSJustin Bogner  %tmp16 = sext i32 %tmp15 to i64
384*ba1759c4SNikita Popov  %tmp17 = getelementptr inbounds i8, ptr %src, i64 %tmp16
385*ba1759c4SNikita Popov  %tmp18 = load i8, ptr %tmp17, align 1
386e7d26aceSJustin Bogner  %tmp19 = insertelement <4 x i8> undef, i8 %tmp4, i32 0
387e7d26aceSJustin Bogner  %tmp20 = insertelement <4 x i8> %tmp19, i8 %tmp8, i32 1
388e7d26aceSJustin Bogner  %tmp21 = insertelement <4 x i8> %tmp20, i8 %tmp13, i32 2
389e7d26aceSJustin Bogner  %tmp22 = insertelement <4 x i8> %tmp21, i8 %tmp18, i32 3
390*ba1759c4SNikita Popov  store <4 x i8> %tmp22, ptr %dst
391e7d26aceSJustin Bogner  ret void
392e7d26aceSJustin Bogner}
393e7d26aceSJustin Bogner
394e7d26aceSJustin Bogner; Address computations are partly separated by control flow and with llvm.assume placed
395e7d26aceSJustin Bogner; in the second basic block
396e7d26aceSJustin Bogner
397*ba1759c4SNikita Popovdefine void @ld_v2i8_add_different_contexts(i32 %ind0, i32 %ind1, ptr %src, ptr %dst) {
398e7d26aceSJustin Bogner; CHECK-LABEL: @ld_v2i8_add_different_contexts(
399e7d26aceSJustin Bogner; CHECK-NEXT:  bb:
400e7d26aceSJustin Bogner; CHECK-NEXT:    [[V0:%.*]] = mul i32 [[IND0:%.*]], 4
401e7d26aceSJustin Bogner; CHECK-NEXT:    [[V1:%.*]] = mul i32 [[IND1:%.*]], 3
402e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[V1]], [[V0]]
403e7d26aceSJustin Bogner; CHECK-NEXT:    [[BIT_COND:%.*]] = icmp eq i32 [[V1]], 0
404e7d26aceSJustin Bogner; CHECK-NEXT:    br i1 [[BIT_COND]], label [[BB_LOADS:%.*]], label [[BB_SKIP:%.*]]
405e7d26aceSJustin Bogner; CHECK:       bb.loads:
406e7d26aceSJustin Bogner; CHECK-NEXT:    call void @llvm.assume(i1 [[BIT_COND]])
407e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
408*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP6]]
409*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i8>, ptr [[TMP7]], align 1
410e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP81:%.*]] = extractelement <2 x i8> [[TMP1]], i32 0
411e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP42:%.*]] = extractelement <2 x i8> [[TMP1]], i32 1
412e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <2 x i8> undef, i8 [[TMP42]], i32 0
413e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <2 x i8> [[TMP19]], i8 [[TMP81]], i32 1
414*ba1759c4SNikita Popov; CHECK-NEXT:    store <2 x i8> [[TMP20]], ptr [[DST:%.*]]
415e7d26aceSJustin Bogner; CHECK-NEXT:    br label [[BB_SKIP]]
416e7d26aceSJustin Bogner; CHECK:       bb.skip:
417e7d26aceSJustin Bogner; CHECK-NEXT:    ret void
418e7d26aceSJustin Bogner;
419e7d26aceSJustin Bognerbb:
420e7d26aceSJustin Bogner  %v0 = mul i32 %ind0, 4
421e7d26aceSJustin Bogner  %v1 = mul i32 %ind1, 3
422e7d26aceSJustin Bogner  %tmp5 = add i32 %v1, %v0
423e7d26aceSJustin Bogner  %bit_cond = icmp eq i32 %v1, 0
424e7d26aceSJustin Bogner  br i1 %bit_cond, label %bb.loads, label %bb.skip
425e7d26aceSJustin Bogner
426e7d26aceSJustin Bognerbb.loads:
427e7d26aceSJustin Bogner  call void @llvm.assume(i1 %bit_cond)
428e7d26aceSJustin Bogner  %tmp = add nsw i32 %v0, 1
429e7d26aceSJustin Bogner  %tmp1 = add i32 %v1, %tmp
430e7d26aceSJustin Bogner  %tmp2 = sext i32 %tmp1 to i64
431*ba1759c4SNikita Popov  %tmp3 = getelementptr inbounds i8, ptr %src, i64 %tmp2
432*ba1759c4SNikita Popov  %tmp4 = load i8, ptr %tmp3, align 1
433e7d26aceSJustin Bogner  %tmp6 = sext i32 %tmp5 to i64
434*ba1759c4SNikita Popov  %tmp7 = getelementptr inbounds i8, ptr %src, i64 %tmp6
435*ba1759c4SNikita Popov  %tmp8 = load i8, ptr %tmp7, align 1
436e7d26aceSJustin Bogner  %tmp19 = insertelement <2 x i8> undef, i8 %tmp4, i32 0
437e7d26aceSJustin Bogner  %tmp20 = insertelement <2 x i8> %tmp19, i8 %tmp8, i32 1
438*ba1759c4SNikita Popov  store <2 x i8> %tmp20, ptr %dst
439e7d26aceSJustin Bogner  br label %bb.skip
440e7d26aceSJustin Bogner
441e7d26aceSJustin Bognerbb.skip:
442e7d26aceSJustin Bogner  ret void
443e7d26aceSJustin Bogner}
444e7d26aceSJustin Bogner
445e7d26aceSJustin Bogner; Same as ld_v2i8_add_different_contexts but with llvm.assume placed between loads
446e7d26aceSJustin Bogner
447*ba1759c4SNikita Popovdefine void @ld_v2i8_add_different_contexts1(i32 %ind0, i32 %ind1, ptr %src, ptr %dst) {
448e7d26aceSJustin Bogner; CHECK-LABEL: @ld_v2i8_add_different_contexts1(
449e7d26aceSJustin Bogner; CHECK-NEXT:  bb:
450e7d26aceSJustin Bogner; CHECK-NEXT:    [[V0:%.*]] = mul i32 [[IND0:%.*]], 4
451e7d26aceSJustin Bogner; CHECK-NEXT:    [[V1:%.*]] = mul i32 [[IND1:%.*]], 3
452e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[V1]], [[V0]]
453e7d26aceSJustin Bogner; CHECK-NEXT:    [[BIT_COND:%.*]] = icmp eq i32 [[V1]], 0
454e7d26aceSJustin Bogner; CHECK-NEXT:    br i1 [[BIT_COND]], label [[BB_LOADS:%.*]], label [[BB_SKIP:%.*]]
455e7d26aceSJustin Bogner; CHECK:       bb.loads:
456e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
457*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP6]]
458*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i8>, ptr [[TMP7]], align 1
459e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP81:%.*]] = extractelement <2 x i8> [[TMP1]], i32 0
460e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP42:%.*]] = extractelement <2 x i8> [[TMP1]], i32 1
461e7d26aceSJustin Bogner; CHECK-NEXT:    call void @llvm.assume(i1 [[BIT_COND]])
462e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <2 x i8> undef, i8 [[TMP42]], i32 0
463e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <2 x i8> [[TMP19]], i8 [[TMP81]], i32 1
464*ba1759c4SNikita Popov; CHECK-NEXT:    store <2 x i8> [[TMP20]], ptr [[DST:%.*]]
465e7d26aceSJustin Bogner; CHECK-NEXT:    br label [[BB_SKIP]]
466e7d26aceSJustin Bogner; CHECK:       bb.skip:
467e7d26aceSJustin Bogner; CHECK-NEXT:    ret void
468e7d26aceSJustin Bogner;
469e7d26aceSJustin Bognerbb:
470e7d26aceSJustin Bogner  %v0 = mul i32 %ind0, 4
471e7d26aceSJustin Bogner  %v1 = mul i32 %ind1, 3
472e7d26aceSJustin Bogner  %tmp5 = add i32 %v1, %v0
473e7d26aceSJustin Bogner  %bit_cond = icmp eq i32 %v1, 0
474e7d26aceSJustin Bogner  br i1 %bit_cond, label %bb.loads, label %bb.skip
475e7d26aceSJustin Bogner
476e7d26aceSJustin Bognerbb.loads:
477e7d26aceSJustin Bogner  %tmp6 = sext i32 %tmp5 to i64
478*ba1759c4SNikita Popov  %tmp7 = getelementptr inbounds i8, ptr %src, i64 %tmp6
479*ba1759c4SNikita Popov  %tmp8 = load i8, ptr %tmp7, align 1
480e7d26aceSJustin Bogner  call void @llvm.assume(i1 %bit_cond)
481e7d26aceSJustin Bogner  %tmp = add nsw i32 %v0, 1
482e7d26aceSJustin Bogner  %tmp1 = add i32 %v1, %tmp
483e7d26aceSJustin Bogner  %tmp2 = sext i32 %tmp1 to i64
484*ba1759c4SNikita Popov  %tmp3 = getelementptr inbounds i8, ptr %src, i64 %tmp2
485*ba1759c4SNikita Popov  %tmp4 = load i8, ptr %tmp3, align 1
486e7d26aceSJustin Bogner  %tmp19 = insertelement <2 x i8> undef, i8 %tmp4, i32 0
487e7d26aceSJustin Bogner  %tmp20 = insertelement <2 x i8> %tmp19, i8 %tmp8, i32 1
488*ba1759c4SNikita Popov  store <2 x i8> %tmp20, ptr %dst
489e7d26aceSJustin Bogner  br label %bb.skip
490e7d26aceSJustin Bogner
491e7d26aceSJustin Bognerbb.skip:
492e7d26aceSJustin Bogner  ret void
493e7d26aceSJustin Bogner}
494e7d26aceSJustin Bogner
495e7d26aceSJustin Bogner; llvm.assume is placed between loads in a single basic block
496e7d26aceSJustin Bogner
497*ba1759c4SNikita Popovdefine void @ld_v2i8_add_context(i32 %ind0, i32 %ind1, ptr %src, ptr %dst) {
498e7d26aceSJustin Bogner; CHECK-LABEL: @ld_v2i8_add_context(
499e7d26aceSJustin Bogner; CHECK-NEXT:  bb:
500e7d26aceSJustin Bogner; CHECK-NEXT:    [[V0:%.*]] = mul i32 [[IND0:%.*]], 4
501e7d26aceSJustin Bogner; CHECK-NEXT:    [[V1:%.*]] = mul i32 [[IND1:%.*]], 3
502e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[V1]], [[V0]]
503e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
504*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP6]]
505*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i8>, ptr [[TMP7]], align 1
506e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP81:%.*]] = extractelement <2 x i8> [[TMP1]], i32 0
507e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP42:%.*]] = extractelement <2 x i8> [[TMP1]], i32 1
508e7d26aceSJustin Bogner; CHECK-NEXT:    [[BIT_COND:%.*]] = icmp eq i32 [[TMP5]], 0
509e7d26aceSJustin Bogner; CHECK-NEXT:    call void @llvm.assume(i1 [[BIT_COND]])
510e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <2 x i8> undef, i8 [[TMP42]], i32 0
511e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <2 x i8> [[TMP19]], i8 [[TMP81]], i32 1
512*ba1759c4SNikita Popov; CHECK-NEXT:    store <2 x i8> [[TMP20]], ptr [[DST:%.*]]
513e7d26aceSJustin Bogner; CHECK-NEXT:    ret void
514e7d26aceSJustin Bogner;
515e7d26aceSJustin Bognerbb:
516e7d26aceSJustin Bogner  %v0 = mul i32 %ind0, 4
517e7d26aceSJustin Bogner  %v1 = mul i32 %ind1, 3
518e7d26aceSJustin Bogner  %tmp5 = add i32 %v1, %v0
519e7d26aceSJustin Bogner  %tmp6 = sext i32 %tmp5 to i64
520*ba1759c4SNikita Popov  %tmp7 = getelementptr inbounds i8, ptr %src, i64 %tmp6
521*ba1759c4SNikita Popov  %tmp8 = load i8, ptr %tmp7, align 1
522e7d26aceSJustin Bogner  %bit_cond = icmp eq i32 %tmp5, 0
523e7d26aceSJustin Bogner  call void @llvm.assume(i1 %bit_cond)
524e7d26aceSJustin Bogner  %tmp = add nsw i32 %v0, 1
525e7d26aceSJustin Bogner  %tmp1 = add i32 %v1, %tmp
526e7d26aceSJustin Bogner  %tmp2 = sext i32 %tmp1 to i64
527*ba1759c4SNikita Popov  %tmp3 = getelementptr inbounds i8, ptr %src, i64 %tmp2
528*ba1759c4SNikita Popov  %tmp4 = load i8, ptr %tmp3, align 1
529e7d26aceSJustin Bogner  %tmp19 = insertelement <2 x i8> undef, i8 %tmp4, i32 0
530e7d26aceSJustin Bogner  %tmp20 = insertelement <2 x i8> %tmp19, i8 %tmp8, i32 1
531*ba1759c4SNikita Popov  store <2 x i8> %tmp20, ptr %dst
532e7d26aceSJustin Bogner  ret void
533e7d26aceSJustin Bogner}
534e7d26aceSJustin Bogner
535e7d26aceSJustin Bogner; Placing llvm.assume after all the loads and stores in the basic block still works
536e7d26aceSJustin Bogner
537*ba1759c4SNikita Popovdefine void @ld_v2i8_add_context1(i32 %ind0, i32 %ind1, ptr %src, ptr %dst) {
538e7d26aceSJustin Bogner; CHECK-LABEL: @ld_v2i8_add_context1(
539e7d26aceSJustin Bogner; CHECK-NEXT:  bb:
540e7d26aceSJustin Bogner; CHECK-NEXT:    [[V0:%.*]] = mul i32 [[IND0:%.*]], 4
541e7d26aceSJustin Bogner; CHECK-NEXT:    [[V1:%.*]] = mul i32 [[IND1:%.*]], 3
542e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[V1]], [[V0]]
543e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
544*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP6]]
545*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i8>, ptr [[TMP7]], align 1
546e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP81:%.*]] = extractelement <2 x i8> [[TMP1]], i32 0
547e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP42:%.*]] = extractelement <2 x i8> [[TMP1]], i32 1
548e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <2 x i8> undef, i8 [[TMP42]], i32 0
549e7d26aceSJustin Bogner; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <2 x i8> [[TMP19]], i8 [[TMP81]], i32 1
550*ba1759c4SNikita Popov; CHECK-NEXT:    store <2 x i8> [[TMP20]], ptr [[DST:%.*]]
551e7d26aceSJustin Bogner; CHECK-NEXT:    [[BIT_COND:%.*]] = icmp eq i32 [[TMP5]], 0
552e7d26aceSJustin Bogner; CHECK-NEXT:    call void @llvm.assume(i1 [[BIT_COND]])
553e7d26aceSJustin Bogner; CHECK-NEXT:    ret void
554e7d26aceSJustin Bogner;
555e7d26aceSJustin Bognerbb:
556e7d26aceSJustin Bogner  %v0 = mul i32 %ind0, 4
557e7d26aceSJustin Bogner  %v1 = mul i32 %ind1, 3
558e7d26aceSJustin Bogner  %tmp5 = add i32 %v1, %v0
559e7d26aceSJustin Bogner  %tmp6 = sext i32 %tmp5 to i64
560*ba1759c4SNikita Popov  %tmp7 = getelementptr inbounds i8, ptr %src, i64 %tmp6
561*ba1759c4SNikita Popov  %tmp8 = load i8, ptr %tmp7, align 1
562e7d26aceSJustin Bogner  %tmp = add nsw i32 %v0, 1
563e7d26aceSJustin Bogner  %tmp1 = add i32 %v1, %tmp
564e7d26aceSJustin Bogner  %tmp2 = sext i32 %tmp1 to i64
565*ba1759c4SNikita Popov  %tmp3 = getelementptr inbounds i8, ptr %src, i64 %tmp2
566*ba1759c4SNikita Popov  %tmp4 = load i8, ptr %tmp3, align 1
567e7d26aceSJustin Bogner  %tmp19 = insertelement <2 x i8> undef, i8 %tmp4, i32 0
568e7d26aceSJustin Bogner  %tmp20 = insertelement <2 x i8> %tmp19, i8 %tmp8, i32 1
569*ba1759c4SNikita Popov  store <2 x i8> %tmp20, ptr %dst
570e7d26aceSJustin Bogner  %bit_cond = icmp eq i32 %tmp5, 0
571e7d26aceSJustin Bogner  call void @llvm.assume(i1 %bit_cond)
572e7d26aceSJustin Bogner  ret void
573e7d26aceSJustin Bogner}
574e7d26aceSJustin Bogner
57563081dc6SVolkan Keles; Make sure we don't vectorize the loads below because the source of
57695427210SJustin Bogner; sext instructions doesn't have the nsw flag or known bits allowing
57795427210SJustin Bogner; to apply the vectorization.
57863081dc6SVolkan Keles
579*ba1759c4SNikita Popovdefine void @ld_v4i8_add_not_safe(i32 %v0, i32 %v1, ptr %src, ptr %dst) {
58063081dc6SVolkan Keles; CHECK-LABEL: @ld_v4i8_add_not_safe(
58163081dc6SVolkan Keles; CHECK-NEXT:  bb:
58263081dc6SVolkan Keles; CHECK-NEXT:    [[TMP:%.*]] = add nsw i32 [[V0:%.*]], -1
58363081dc6SVolkan Keles; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[V1:%.*]], [[TMP]]
58463081dc6SVolkan Keles; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
585*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP2]]
586*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
58763081dc6SVolkan Keles; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[V1]], [[V0]]
58863081dc6SVolkan Keles; CHECK-NEXT:    [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
589*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[TMP6]]
590*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 1
59163081dc6SVolkan Keles; CHECK-NEXT:    [[TMP9:%.*]] = add nsw i32 [[V0]], 1
59263081dc6SVolkan Keles; CHECK-NEXT:    [[TMP10:%.*]] = add i32 [[V1]], [[TMP9]]
59363081dc6SVolkan Keles; CHECK-NEXT:    [[TMP11:%.*]] = sext i32 [[TMP10]] to i64
594*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[TMP11]]
595*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP13:%.*]] = load i8, ptr [[TMP12]], align 1
59663081dc6SVolkan Keles; CHECK-NEXT:    [[TMP14:%.*]] = add nsw i32 [[V0]], 2
59763081dc6SVolkan Keles; CHECK-NEXT:    [[TMP15:%.*]] = add i32 [[V1]], [[TMP14]]
59863081dc6SVolkan Keles; CHECK-NEXT:    [[TMP16:%.*]] = sext i32 [[TMP15]] to i64
599*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[TMP16]]
600*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP18:%.*]] = load i8, ptr [[TMP17]], align 1
60163081dc6SVolkan Keles; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <4 x i8> undef, i8 [[TMP4]], i32 0
60263081dc6SVolkan Keles; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <4 x i8> [[TMP19]], i8 [[TMP8]], i32 1
60363081dc6SVolkan Keles; CHECK-NEXT:    [[TMP21:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP13]], i32 2
60463081dc6SVolkan Keles; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i8> [[TMP21]], i8 [[TMP18]], i32 3
605*ba1759c4SNikita Popov; CHECK-NEXT:    store <4 x i8> [[TMP22]], ptr [[DST:%.*]]
60663081dc6SVolkan Keles; CHECK-NEXT:    ret void
60763081dc6SVolkan Keles;
60863081dc6SVolkan Kelesbb:
60963081dc6SVolkan Keles  %tmp = add nsw i32 %v0, -1
61063081dc6SVolkan Keles  %tmp1 = add i32 %v1, %tmp
61163081dc6SVolkan Keles  %tmp2 = sext i32 %tmp1 to i64
612*ba1759c4SNikita Popov  %tmp3 = getelementptr inbounds i8, ptr %src, i64 %tmp2
613*ba1759c4SNikita Popov  %tmp4 = load i8, ptr %tmp3, align 1
61463081dc6SVolkan Keles  %tmp5 = add i32 %v1, %v0
61563081dc6SVolkan Keles  %tmp6 = sext i32 %tmp5 to i64
616*ba1759c4SNikita Popov  %tmp7 = getelementptr inbounds i8, ptr %src, i64 %tmp6
617*ba1759c4SNikita Popov  %tmp8 = load i8, ptr %tmp7, align 1
61863081dc6SVolkan Keles  %tmp9 = add nsw i32 %v0, 1
61963081dc6SVolkan Keles  %tmp10 = add i32 %v1, %tmp9
62063081dc6SVolkan Keles  %tmp11 = sext i32 %tmp10 to i64
621*ba1759c4SNikita Popov  %tmp12 = getelementptr inbounds i8, ptr %src, i64 %tmp11
622*ba1759c4SNikita Popov  %tmp13 = load i8, ptr %tmp12, align 1
62363081dc6SVolkan Keles  %tmp14 = add nsw i32 %v0, 2
62463081dc6SVolkan Keles  %tmp15 = add i32 %v1, %tmp14
62563081dc6SVolkan Keles  %tmp16 = sext i32 %tmp15 to i64
626*ba1759c4SNikita Popov  %tmp17 = getelementptr inbounds i8, ptr %src, i64 %tmp16
627*ba1759c4SNikita Popov  %tmp18 = load i8, ptr %tmp17, align 1
62863081dc6SVolkan Keles  %tmp19 = insertelement <4 x i8> undef, i8 %tmp4, i32 0
62963081dc6SVolkan Keles  %tmp20 = insertelement <4 x i8> %tmp19, i8 %tmp8, i32 1
63063081dc6SVolkan Keles  %tmp21 = insertelement <4 x i8> %tmp20, i8 %tmp13, i32 2
63163081dc6SVolkan Keles  %tmp22 = insertelement <4 x i8> %tmp21, i8 %tmp18, i32 3
632*ba1759c4SNikita Popov  store <4 x i8> %tmp22, ptr %dst
63363081dc6SVolkan Keles  ret void
63463081dc6SVolkan Keles}
635