xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll (revision 38fffa630ee80163dc65e759392ad29798905679)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2   -slp-threshold=-1 | FileCheck %s
3; RUN: opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.2 | FileCheck %s
4; RUN: opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64-unknown-linux-gnu -mattr=+avx    | FileCheck %s
5; RUN: opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2   | FileCheck %s
6; RUN: opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512bw,+avx512vl | FileCheck %s
7
8define void @store_i32(ptr nocapture %0, i32 %1, i32 %2) {
9; CHECK-LABEL: @store_i32(
10; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0:![0-9]+]]
11; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1:%.*]], i64 0
12; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> zeroinitializer
13; CHECK-NEXT:    [[TMP7:%.*]] = mul <4 x i32> [[TMP4]], [[TMP6]]
14; CHECK-NEXT:    [[TMP8:%.*]] = lshr <4 x i32> [[TMP7]], splat (i32 15)
15; CHECK-NEXT:    [[TMP9:%.*]] = call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[TMP8]], <4 x i32> splat (i32 255))
16; CHECK-NEXT:    store <4 x i32> [[TMP9]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
17; CHECK-NEXT:    ret void
18;
19  %4 = load i32, ptr %0, align 4, !tbaa !2
20  %5 = mul i32 %4, %1
21  %6 = lshr i32 %5, 15
22  %7 = icmp ult i32 %6, 255
23  %8 = select i1 %7, i32 %6, i32 255
24  store i32 %8, ptr %0, align 4, !tbaa !2
25  %9 = getelementptr inbounds i32, ptr %0, i64 1
26  %10 = load i32, ptr %9, align 4, !tbaa !2
27  %11 = mul i32 %10, %1
28  %12 = lshr i32 %11, 15
29  %13 = icmp ult i32 %12, 255
30  %14 = select i1 %13, i32 %12, i32 255
31  store i32 %14, ptr %9, align 4, !tbaa !2
32  %15 = getelementptr inbounds i32, ptr %0, i64 2
33  %16 = load i32, ptr %15, align 4, !tbaa !2
34  %17 = mul i32 %16, %1
35  %18 = lshr i32 %17, 15
36  %19 = icmp ult i32 %18, 255
37  %20 = select i1 %19, i32 %18, i32 255
38  store i32 %20, ptr %15, align 4, !tbaa !2
39  %21 = getelementptr inbounds i32, ptr %0, i64 3
40  %22 = load i32, ptr %21, align 4, !tbaa !2
41  %23 = mul i32 %22, %1
42  %24 = lshr i32 %23, 15
43  %25 = icmp ult i32 %24, 255
44  %26 = select i1 %25, i32 %24, i32 255
45  store i32 %26, ptr %21, align 4, !tbaa !2
46  ret void
47}
48
49define void @store_i8(ptr nocapture %0, i32 %1, i32 %2) {
50; CHECK-LABEL: @store_i8(
51; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i8>, ptr [[TMP0:%.*]], align 1, !tbaa [[TBAA4:![0-9]+]]
52; CHECK-NEXT:    [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
53; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1:%.*]], i64 0
54; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <4 x i32> zeroinitializer
55; CHECK-NEXT:    [[TMP8:%.*]] = mul <4 x i32> [[TMP7]], [[TMP5]]
56; CHECK-NEXT:    [[TMP9:%.*]] = lshr <4 x i32> [[TMP8]], splat (i32 15)
57; CHECK-NEXT:    [[TMP10:%.*]] = call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[TMP9]], <4 x i32> splat (i32 255))
58; CHECK-NEXT:    [[TMP11:%.*]] = trunc nuw <4 x i32> [[TMP10]] to <4 x i8>
59; CHECK-NEXT:    store <4 x i8> [[TMP11]], ptr [[TMP0]], align 1, !tbaa [[TBAA4]]
60; CHECK-NEXT:    ret void
61;
62  %4 = load i8, ptr %0, align 1, !tbaa !6
63  %5 = zext i8 %4 to i32
64  %6 = mul i32 %5, %1
65  %7 = lshr i32 %6, 15
66  %8 = icmp ult i32 %7, 255
67  %9 = select i1 %8, i32 %7, i32 255
68  %10 = trunc i32 %9 to i8
69  store i8 %10, ptr %0, align 1, !tbaa !6
70  %11 = getelementptr inbounds i8, ptr %0, i64 1
71  %12 = load i8, ptr %11, align 1, !tbaa !6
72  %13 = zext i8 %12 to i32
73  %14 = mul i32 %13, %1
74  %15 = lshr i32 %14, 15
75  %16 = icmp ult i32 %15, 255
76  %17 = select i1 %16, i32 %15, i32 255
77  %18 = trunc i32 %17 to i8
78  store i8 %18, ptr %11, align 1, !tbaa !6
79  %19 = getelementptr inbounds i8, ptr %0, i64 2
80  %20 = load i8, ptr %19, align 1, !tbaa !6
81  %21 = zext i8 %20 to i32
82  %22 = mul i32 %21, %1
83  %23 = lshr i32 %22, 15
84  %24 = icmp ult i32 %23, 255
85  %25 = select i1 %24, i32 %23, i32 255
86  %26 = trunc i32 %25 to i8
87  store i8 %26, ptr %19, align 1, !tbaa !6
88  %27 = getelementptr inbounds i8, ptr %0, i64 3
89  %28 = load i8, ptr %27, align 1, !tbaa !6
90  %29 = zext i8 %28 to i32
91  %30 = mul i32 %29, %1
92  %31 = lshr i32 %30, 15
93  %32 = icmp ult i32 %31, 255
94  %33 = select i1 %32, i32 %31, i32 255
95  %34 = trunc i32 %33 to i8
96  store i8 %34, ptr %27, align 1, !tbaa !6
97  ret void
98}
99
100define void @store_i64(ptr nocapture %0, i32 %1, i32 %2) {
101; CHECK-LABEL: @store_i64(
102; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP1:%.*]] to i64
103; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 8, !tbaa [[TBAA5:![0-9]+]]
104; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i64> poison, i64 [[TMP4]], i64 0
105; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP6]], <4 x i64> poison, <4 x i32> zeroinitializer
106; CHECK-NEXT:    [[TMP8:%.*]] = mul <4 x i64> [[TMP5]], [[TMP7]]
107; CHECK-NEXT:    [[TMP9:%.*]] = lshr <4 x i64> [[TMP8]], splat (i64 15)
108; CHECK-NEXT:    [[TMP10:%.*]] = trunc <4 x i64> [[TMP9]] to <4 x i32>
109; CHECK-NEXT:    [[TMP11:%.*]] = icmp ult <4 x i32> [[TMP10]], splat (i32 255)
110; CHECK-NEXT:    [[TMP12:%.*]] = and <4 x i64> [[TMP9]], splat (i64 4294967295)
111; CHECK-NEXT:    [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i64> [[TMP12]], <4 x i64> splat (i64 255)
112; CHECK-NEXT:    store <4 x i64> [[TMP13]], ptr [[TMP0]], align 8, !tbaa [[TBAA5]]
113; CHECK-NEXT:    ret void
114;
115  %4 = zext i32 %1 to i64
116  %5 = load i64, ptr %0, align 8, !tbaa !7
117  %6 = mul i64 %5, %4
118  %7 = lshr i64 %6, 15
119  %8 = trunc i64 %7 to i32
120  %9 = icmp ult i32 %8, 255
121  %10 = and i64 %7, 4294967295
122  %11 = select i1 %9, i64 %10, i64 255
123  store i64 %11, ptr %0, align 8, !tbaa !7
124  %12 = getelementptr inbounds i64, ptr %0, i64 1
125  %13 = load i64, ptr %12, align 8, !tbaa !7
126  %14 = mul i64 %13, %4
127  %15 = lshr i64 %14, 15
128  %16 = trunc i64 %15 to i32
129  %17 = icmp ult i32 %16, 255
130  %18 = and i64 %15, 4294967295
131  %19 = select i1 %17, i64 %18, i64 255
132  store i64 %19, ptr %12, align 8, !tbaa !7
133  %20 = getelementptr inbounds i64, ptr %0, i64 2
134  %21 = load i64, ptr %20, align 8, !tbaa !7
135  %22 = mul i64 %21, %4
136  %23 = lshr i64 %22, 15
137  %24 = trunc i64 %23 to i32
138  %25 = icmp ult i32 %24, 255
139  %26 = and i64 %23, 4294967295
140  %27 = select i1 %25, i64 %26, i64 255
141  store i64 %27, ptr %20, align 8, !tbaa !7
142  %28 = getelementptr inbounds i64, ptr %0, i64 3
143  %29 = load i64, ptr %28, align 8, !tbaa !7
144  %30 = mul i64 %29, %4
145  %31 = lshr i64 %30, 15
146  %32 = trunc i64 %31 to i32
147  %33 = icmp ult i32 %32, 255
148  %34 = and i64 %31, 4294967295
149  %35 = select i1 %33, i64 %34, i64 255
150  store i64 %35, ptr %28, align 8, !tbaa !7
151  ret void
152}
153
154!2 = !{!3, !3, i64 0}
155!3 = !{!"int", !4, i64 0}
156!4 = !{!"omnipotent char", !5, i64 0}
157!5 = !{!"Simple C++ TBAA"}
158!6 = !{!4, !4, i64 0}
159!7 = !{!8, !8, i64 0}
160!8 = !{!"long", !4, i64 0}
161