1db7a2f34SJuneyoung Lee; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2d6e7e477SRoman Lebedev; RUN: opt -o - -S -passes=load-store-vectorizer,dce %s | FileCheck %s
3db7a2f34SJuneyoung Lee
4db7a2f34SJuneyoung Lee; Make sure LoadStoreVectorizer vectorizes the loads below.
5db7a2f34SJuneyoung Lee; In order to prove that the vectorization is safe, it tries to
6db7a2f34SJuneyoung Lee; match nested adds and find an expression that adds a constant
7db7a2f34SJuneyoung Lee; value to an existing index and the result doesn't overflow.
8db7a2f34SJuneyoung Lee
9db7a2f34SJuneyoung Leetarget triple = "x86_64--"
10db7a2f34SJuneyoung Lee
11*ba1759c4SNikita Popovdefine void @ld_v4i8_add_nsw(i32 %v0, i32 %v1, ptr %src, ptr %dst) {
12db7a2f34SJuneyoung Lee; CHECK-LABEL: @ld_v4i8_add_nsw(
13db7a2f34SJuneyoung Lee; CHECK-NEXT:  bb:
14db7a2f34SJuneyoung Lee; CHECK-NEXT:    [[TMP:%.*]] = add nsw i32 [[V0:%.*]], -1
15db7a2f34SJuneyoung Lee; CHECK-NEXT:    [[TMP1:%.*]] = add nsw i32 [[V1:%.*]], [[TMP]]
16db7a2f34SJuneyoung Lee; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
17*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP2]]
18*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1
19db7a2f34SJuneyoung Lee; CHECK-NEXT:    [[TMP41:%.*]] = extractelement <4 x i8> [[TMP1]], i32 0
20db7a2f34SJuneyoung Lee; CHECK-NEXT:    [[TMP82:%.*]] = extractelement <4 x i8> [[TMP1]], i32 1
21db7a2f34SJuneyoung Lee; CHECK-NEXT:    [[TMP133:%.*]] = extractelement <4 x i8> [[TMP1]], i32 2
22db7a2f34SJuneyoung Lee; CHECK-NEXT:    [[TMP184:%.*]] = extractelement <4 x i8> [[TMP1]], i32 3
23db7a2f34SJuneyoung Lee; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <4 x i8> poison, i8 [[TMP41]], i32 0
24db7a2f34SJuneyoung Lee; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <4 x i8> [[TMP19]], i8 [[TMP82]], i32 1
25db7a2f34SJuneyoung Lee; CHECK-NEXT:    [[TMP21:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP133]], i32 2
26db7a2f34SJuneyoung Lee; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i8> [[TMP21]], i8 [[TMP184]], i32 3
27*ba1759c4SNikita Popov; CHECK-NEXT:    store <4 x i8> [[TMP22]], ptr [[DST:%.*]], align 4
28db7a2f34SJuneyoung Lee; CHECK-NEXT:    ret void
29db7a2f34SJuneyoung Lee;
30db7a2f34SJuneyoung Leebb:
31db7a2f34SJuneyoung Lee  %tmp = add nsw i32 %v0, -1
32db7a2f34SJuneyoung Lee  %tmp1 = add nsw i32 %v1, %tmp
33db7a2f34SJuneyoung Lee  %tmp2 = sext i32 %tmp1 to i64
34*ba1759c4SNikita Popov  %tmp3 = getelementptr inbounds i8, ptr %src, i64 %tmp2
35*ba1759c4SNikita Popov  %tmp4 = load i8, ptr %tmp3, align 1
36db7a2f34SJuneyoung Lee  %tmp5 = add nsw i32 %v1, %v0
37db7a2f34SJuneyoung Lee  %tmp6 = sext i32 %tmp5 to i64
38*ba1759c4SNikita Popov  %tmp7 = getelementptr inbounds i8, ptr %src, i64 %tmp6
39*ba1759c4SNikita Popov  %tmp8 = load i8, ptr %tmp7, align 1
40db7a2f34SJuneyoung Lee  %tmp9 = add nsw i32 %v0, 1
41db7a2f34SJuneyoung Lee  %tmp10 = add nsw i32 %v1, %tmp9
42db7a2f34SJuneyoung Lee  %tmp11 = sext i32 %tmp10 to i64
43*ba1759c4SNikita Popov  %tmp12 = getelementptr inbounds i8, ptr %src, i64 %tmp11
44*ba1759c4SNikita Popov  %tmp13 = load i8, ptr %tmp12, align 1
45db7a2f34SJuneyoung Lee  %tmp14 = add nsw i32 %v0, 2
46db7a2f34SJuneyoung Lee  %tmp15 = add nsw i32 %v1, %tmp14
47db7a2f34SJuneyoung Lee  %tmp16 = sext i32 %tmp15 to i64
48*ba1759c4SNikita Popov  %tmp17 = getelementptr inbounds i8, ptr %src, i64 %tmp16
49*ba1759c4SNikita Popov  %tmp18 = load i8, ptr %tmp17, align 1
50db7a2f34SJuneyoung Lee  %tmp19 = insertelement <4 x i8> poison, i8 %tmp4, i32 0
51db7a2f34SJuneyoung Lee  %tmp20 = insertelement <4 x i8> %tmp19, i8 %tmp8, i32 1
52db7a2f34SJuneyoung Lee  %tmp21 = insertelement <4 x i8> %tmp20, i8 %tmp13, i32 2
53db7a2f34SJuneyoung Lee  %tmp22 = insertelement <4 x i8> %tmp21, i8 %tmp18, i32 3
54*ba1759c4SNikita Popov  store <4 x i8> %tmp22, ptr %dst
55db7a2f34SJuneyoung Lee  ret void
56db7a2f34SJuneyoung Lee}
57db7a2f34SJuneyoung Lee
58db7a2f34SJuneyoung Lee; Make sure we don't vectorize the loads below because the source of
59db7a2f34SJuneyoung Lee; sext instructions doesn't have the nsw flag.
60db7a2f34SJuneyoung Lee
61*ba1759c4SNikita Popovdefine void @ld_v4i8_add_not_safe(i32 %v0, i32 %v1, ptr %src, ptr %dst) {
62db7a2f34SJuneyoung Lee; CHECK-LABEL: @ld_v4i8_add_not_safe(
63db7a2f34SJuneyoung Lee; CHECK-NEXT:  bb:
64db7a2f34SJuneyoung Lee; CHECK-NEXT:    [[TMP:%.*]] = add nsw i32 [[V0:%.*]], -1
65db7a2f34SJuneyoung Lee; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[V1:%.*]], [[TMP]]
66db7a2f34SJuneyoung Lee; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
67*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP2]]
68*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1
69db7a2f34SJuneyoung Lee; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[V1]], [[V0]]
70db7a2f34SJuneyoung Lee; CHECK-NEXT:    [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
71*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[TMP6]]
72*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 1
73db7a2f34SJuneyoung Lee; CHECK-NEXT:    [[TMP9:%.*]] = add nsw i32 [[V0]], 1
74db7a2f34SJuneyoung Lee; CHECK-NEXT:    [[TMP10:%.*]] = add i32 [[V1]], [[TMP9]]
75db7a2f34SJuneyoung Lee; CHECK-NEXT:    [[TMP11:%.*]] = sext i32 [[TMP10]] to i64
76*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[TMP11]]
77*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP13:%.*]] = load i8, ptr [[TMP12]], align 1
78db7a2f34SJuneyoung Lee; CHECK-NEXT:    [[TMP14:%.*]] = add nsw i32 [[V0]], 2
79db7a2f34SJuneyoung Lee; CHECK-NEXT:    [[TMP15:%.*]] = add i32 [[V1]], [[TMP14]]
80db7a2f34SJuneyoung Lee; CHECK-NEXT:    [[TMP16:%.*]] = sext i32 [[TMP15]] to i64
81*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[TMP16]]
82*ba1759c4SNikita Popov; CHECK-NEXT:    [[TMP18:%.*]] = load i8, ptr [[TMP17]], align 1
83db7a2f34SJuneyoung Lee; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <4 x i8> poison, i8 [[TMP4]], i32 0
84db7a2f34SJuneyoung Lee; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <4 x i8> [[TMP19]], i8 [[TMP8]], i32 1
85db7a2f34SJuneyoung Lee; CHECK-NEXT:    [[TMP21:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP13]], i32 2
86db7a2f34SJuneyoung Lee; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i8> [[TMP21]], i8 [[TMP18]], i32 3
87*ba1759c4SNikita Popov; CHECK-NEXT:    store <4 x i8> [[TMP22]], ptr [[DST:%.*]], align 4
88db7a2f34SJuneyoung Lee; CHECK-NEXT:    ret void
89db7a2f34SJuneyoung Lee;
90db7a2f34SJuneyoung Leebb:
91db7a2f34SJuneyoung Lee  %tmp = add nsw i32 %v0, -1
92db7a2f34SJuneyoung Lee  %tmp1 = add i32 %v1, %tmp
93db7a2f34SJuneyoung Lee  %tmp2 = sext i32 %tmp1 to i64
94*ba1759c4SNikita Popov  %tmp3 = getelementptr inbounds i8, ptr %src, i64 %tmp2
95*ba1759c4SNikita Popov  %tmp4 = load i8, ptr %tmp3, align 1
96db7a2f34SJuneyoung Lee  %tmp5 = add i32 %v1, %v0
97db7a2f34SJuneyoung Lee  %tmp6 = sext i32 %tmp5 to i64
98*ba1759c4SNikita Popov  %tmp7 = getelementptr inbounds i8, ptr %src, i64 %tmp6
99*ba1759c4SNikita Popov  %tmp8 = load i8, ptr %tmp7, align 1
100db7a2f34SJuneyoung Lee  %tmp9 = add nsw i32 %v0, 1
101db7a2f34SJuneyoung Lee  %tmp10 = add i32 %v1, %tmp9
102db7a2f34SJuneyoung Lee  %tmp11 = sext i32 %tmp10 to i64
103*ba1759c4SNikita Popov  %tmp12 = getelementptr inbounds i8, ptr %src, i64 %tmp11
104*ba1759c4SNikita Popov  %tmp13 = load i8, ptr %tmp12, align 1
105db7a2f34SJuneyoung Lee  %tmp14 = add nsw i32 %v0, 2
106db7a2f34SJuneyoung Lee  %tmp15 = add i32 %v1, %tmp14
107db7a2f34SJuneyoung Lee  %tmp16 = sext i32 %tmp15 to i64
108*ba1759c4SNikita Popov  %tmp17 = getelementptr inbounds i8, ptr %src, i64 %tmp16
109*ba1759c4SNikita Popov  %tmp18 = load i8, ptr %tmp17, align 1
110db7a2f34SJuneyoung Lee  %tmp19 = insertelement <4 x i8> poison, i8 %tmp4, i32 0
111db7a2f34SJuneyoung Lee  %tmp20 = insertelement <4 x i8> %tmp19, i8 %tmp8, i32 1
112db7a2f34SJuneyoung Lee  %tmp21 = insertelement <4 x i8> %tmp20, i8 %tmp13, i32 2
113db7a2f34SJuneyoung Lee  %tmp22 = insertelement <4 x i8> %tmp21, i8 %tmp18, i32 3
114*ba1759c4SNikita Popov  store <4 x i8> %tmp22, ptr %dst
115db7a2f34SJuneyoung Lee  ret void
116db7a2f34SJuneyoung Lee}
117