xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/X86/pr52275.ll (revision 580210a0c938531ef9fd79f9ffedb93eeb2e66c2)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=slp-vectorizer -S -mcpu=core-i7 | FileCheck %s
3; RUN: opt < %s -passes=slp-vectorizer -slp-threshold=-100 -S -mcpu=core-i7 | FileCheck %s --check-prefix=FORCE_SLP
4
5define  <4 x i8> @test(<4 x i8> %v, ptr %x) {
6; CHECK-LABEL: @test(
7; CHECK-NEXT:    [[X0:%.*]] = load i8, ptr [[X:%.*]], align 4
8; CHECK-NEXT:    [[G1:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 1
9; CHECK-NEXT:    [[X1:%.*]] = load i8, ptr [[G1]], align 4
10; CHECK-NEXT:    [[V0:%.*]] = insertelement <4 x i8> [[V:%.*]], i8 [[X0]], i64 0
11; CHECK-NEXT:    [[V1:%.*]] = insertelement <4 x i8> [[V0]], i8 [[X1]], i64 1
12; CHECK-NEXT:    [[V2:%.*]] = add <4 x i8> [[V0]], [[V1]]
13; CHECK-NEXT:    ret <4 x i8> [[V2]]
14;
15; FORCE_SLP-LABEL: @test(
16; FORCE_SLP-NEXT:    [[X0:%.*]] = load i8, ptr [[X:%.*]], align 4
17; FORCE_SLP-NEXT:    [[G1:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 1
18; FORCE_SLP-NEXT:    [[X1:%.*]] = load i8, ptr [[G1]], align 4
19; FORCE_SLP-NEXT:    [[V0:%.*]] = insertelement <4 x i8> [[V:%.*]], i8 [[X0]], i64 0
20; FORCE_SLP-NEXT:    [[V1:%.*]] = insertelement <4 x i8> [[V0]], i8 [[X1]], i64 1
21; FORCE_SLP-NEXT:    [[V2:%.*]] = add <4 x i8> [[V0]], [[V1]]
22; FORCE_SLP-NEXT:    ret <4 x i8> [[V2]]
23;
24  %x0 = load i8, ptr %x, align 4
25  %g1 = getelementptr inbounds i8, ptr %x, i64 1
26  %x1 = load i8, ptr %g1, align 4
27  %v0 = insertelement <4 x i8> %v, i8 %x0, i64 0
28  %v1 = insertelement <4 x i8> %v0, i8 %x1, i64 1
29  %v2 = add <4 x i8> %v0, %v1
30  ret <4 x i8> %v2
31}
32
33define  <2 x i8> @test2(<2 x i8> %t6, ptr %t1) {
34; CHECK-LABEL: @test2(
35; CHECK-NEXT:    [[T3:%.*]] = load i32, ptr [[T1:%.*]], align 4
36; CHECK-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 1
37; CHECK-NEXT:    [[T5:%.*]] = load i32, ptr [[T4]], align 4
38; CHECK-NEXT:    [[T7:%.*]] = trunc i32 [[T3]] to i8
39; CHECK-NEXT:    [[T8:%.*]] = insertelement <2 x i8> [[T6:%.*]], i8 [[T7]], i64 0
40; CHECK-NEXT:    [[T9:%.*]] = trunc i32 [[T5]] to i8
41; CHECK-NEXT:    [[T10:%.*]] = insertelement <2 x i8> [[T8]], i8 [[T9]], i64 1
42; CHECK-NEXT:    [[T11:%.*]] = add <2 x i8> [[T10]], [[T8]]
43; CHECK-NEXT:    ret <2 x i8> [[T11]]
44;
45; FORCE_SLP-LABEL: @test2(
46; FORCE_SLP-NEXT:    [[T3:%.*]] = load i32, ptr [[T1:%.*]], align 4
47; FORCE_SLP-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 1
48; FORCE_SLP-NEXT:    [[T5:%.*]] = load i32, ptr [[T4]], align 4
49; FORCE_SLP-NEXT:    [[T7:%.*]] = trunc i32 [[T3]] to i8
50; FORCE_SLP-NEXT:    [[T8:%.*]] = insertelement <2 x i8> [[T6:%.*]], i8 [[T7]], i64 0
51; FORCE_SLP-NEXT:    [[T9:%.*]] = trunc i32 [[T5]] to i8
52; FORCE_SLP-NEXT:    [[T10:%.*]] = insertelement <2 x i8> [[T8]], i8 [[T9]], i64 1
53; FORCE_SLP-NEXT:    [[T11:%.*]] = add <2 x i8> [[T10]], [[T8]]
54; FORCE_SLP-NEXT:    ret <2 x i8> [[T11]]
55;
56  %t3 = load i32, ptr %t1, align 4
57  %t4 = getelementptr inbounds i32, ptr %t1, i64 1
58  %t5 = load i32, ptr %t4, align 4
59  %t7 = trunc i32 %t3 to i8
60  %t8 = insertelement <2 x i8> %t6, i8 %t7, i64 0
61  %t9 = trunc i32 %t5 to i8
62  %t10 = insertelement <2 x i8> %t8, i8 %t9, i64 1
63  %t11 = add <2 x i8> %t10, %t8
64  ret <2 x i8> %t11
65}
66
67define  <2 x i8> @test_reorder(<2 x i8> %t6, ptr %t1) {
68; CHECK-LABEL: @test_reorder(
69; CHECK-NEXT:    [[T3:%.*]] = load i32, ptr [[T1:%.*]], align 4
70; CHECK-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 1
71; CHECK-NEXT:    [[T5:%.*]] = load i32, ptr [[T4]], align 4
72; CHECK-NEXT:    [[T7:%.*]] = trunc i32 [[T3]] to i8
73; CHECK-NEXT:    [[T8:%.*]] = insertelement <2 x i8> [[T6:%.*]], i8 [[T7]], i64 1
74; CHECK-NEXT:    [[T9:%.*]] = trunc i32 [[T5]] to i8
75; CHECK-NEXT:    [[T10:%.*]] = insertelement <2 x i8> [[T8]], i8 [[T9]], i64 0
76; CHECK-NEXT:    [[T11:%.*]] = add <2 x i8> [[T10]], [[T8]]
77; CHECK-NEXT:    ret <2 x i8> [[T11]]
78;
79; FORCE_SLP-LABEL: @test_reorder(
80; FORCE_SLP-NEXT:    [[T3:%.*]] = load i32, ptr [[T1:%.*]], align 4
81; FORCE_SLP-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 1
82; FORCE_SLP-NEXT:    [[T5:%.*]] = load i32, ptr [[T4]], align 4
83; FORCE_SLP-NEXT:    [[T7:%.*]] = trunc i32 [[T3]] to i8
84; FORCE_SLP-NEXT:    [[T8:%.*]] = insertelement <2 x i8> [[T6:%.*]], i8 [[T7]], i64 1
85; FORCE_SLP-NEXT:    [[T9:%.*]] = trunc i32 [[T5]] to i8
86; FORCE_SLP-NEXT:    [[T10:%.*]] = insertelement <2 x i8> [[T8]], i8 [[T9]], i64 0
87; FORCE_SLP-NEXT:    [[T11:%.*]] = add <2 x i8> [[T10]], [[T8]]
88; FORCE_SLP-NEXT:    ret <2 x i8> [[T11]]
89;
90  %t3 = load i32, ptr %t1, align 4
91  %t4 = getelementptr inbounds i32, ptr %t1, i64 1
92  %t5 = load i32, ptr %t4, align 4
93  %t7 = trunc i32 %t3 to i8
94  %t8 = insertelement <2 x i8> %t6, i8 %t7, i64 1
95  %t9 = trunc i32 %t5 to i8
96  %t10 = insertelement <2 x i8> %t8, i8 %t9, i64 0
97  %t11 = add <2 x i8> %t10, %t8
98  ret <2 x i8> %t11
99}
100
101define  <4 x i8> @test_subvector(<4 x i8> %t6, ptr %t1) {
102; CHECK-LABEL: @test_subvector(
103; CHECK-NEXT:    [[T3:%.*]] = load i32, ptr [[T1:%.*]], align 4
104; CHECK-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 1
105; CHECK-NEXT:    [[T5:%.*]] = load i32, ptr [[T4]], align 4
106; CHECK-NEXT:    [[T7:%.*]] = trunc i32 [[T3]] to i8
107; CHECK-NEXT:    [[T8:%.*]] = insertelement <4 x i8> [[T6:%.*]], i8 [[T7]], i64 0
108; CHECK-NEXT:    [[T9:%.*]] = trunc i32 [[T5]] to i8
109; CHECK-NEXT:    [[T10:%.*]] = insertelement <4 x i8> [[T8]], i8 [[T9]], i64 1
110; CHECK-NEXT:    [[T11:%.*]] = add <4 x i8> [[T10]], [[T8]]
111; CHECK-NEXT:    ret <4 x i8> [[T11]]
112;
113; FORCE_SLP-LABEL: @test_subvector(
114; FORCE_SLP-NEXT:    [[T3:%.*]] = load i32, ptr [[T1:%.*]], align 4
115; FORCE_SLP-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 1
116; FORCE_SLP-NEXT:    [[T5:%.*]] = load i32, ptr [[T4]], align 4
117; FORCE_SLP-NEXT:    [[T7:%.*]] = trunc i32 [[T3]] to i8
118; FORCE_SLP-NEXT:    [[T8:%.*]] = insertelement <4 x i8> [[T6:%.*]], i8 [[T7]], i64 0
119; FORCE_SLP-NEXT:    [[T9:%.*]] = trunc i32 [[T5]] to i8
120; FORCE_SLP-NEXT:    [[T10:%.*]] = insertelement <4 x i8> [[T8]], i8 [[T9]], i64 1
121; FORCE_SLP-NEXT:    [[T11:%.*]] = add <4 x i8> [[T10]], [[T8]]
122; FORCE_SLP-NEXT:    ret <4 x i8> [[T11]]
123;
124  %t3 = load i32, ptr %t1, align 4
125  %t4 = getelementptr inbounds i32, ptr %t1, i64 1
126  %t5 = load i32, ptr %t4, align 4
127  %t7 = trunc i32 %t3 to i8
128  %t8 = insertelement <4 x i8> %t6, i8 %t7, i64 0
129  %t9 = trunc i32 %t5 to i8
130  %t10 = insertelement <4 x i8> %t8, i8 %t9, i64 1
131  %t11 = add <4 x i8> %t10, %t8
132  ret <4 x i8> %t11
133}
134
135define  <4 x i8> @test_subvector_reorder(<4 x i8> %t6, ptr %t1) {
136; CHECK-LABEL: @test_subvector_reorder(
137; CHECK-NEXT:    [[T3:%.*]] = load i32, ptr [[T1:%.*]], align 4
138; CHECK-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 1
139; CHECK-NEXT:    [[T5:%.*]] = load i32, ptr [[T4]], align 4
140; CHECK-NEXT:    [[T7:%.*]] = trunc i32 [[T3]] to i8
141; CHECK-NEXT:    [[T8:%.*]] = insertelement <4 x i8> [[T6:%.*]], i8 [[T7]], i64 3
142; CHECK-NEXT:    [[T9:%.*]] = trunc i32 [[T5]] to i8
143; CHECK-NEXT:    [[T10:%.*]] = insertelement <4 x i8> [[T8]], i8 [[T9]], i64 2
144; CHECK-NEXT:    [[T11:%.*]] = add <4 x i8> [[T10]], [[T8]]
145; CHECK-NEXT:    ret <4 x i8> [[T11]]
146;
147; FORCE_SLP-LABEL: @test_subvector_reorder(
148; FORCE_SLP-NEXT:    [[T3:%.*]] = load i32, ptr [[T1:%.*]], align 4
149; FORCE_SLP-NEXT:    [[T4:%.*]] = getelementptr inbounds i32, ptr [[T1]], i64 1
150; FORCE_SLP-NEXT:    [[T5:%.*]] = load i32, ptr [[T4]], align 4
151; FORCE_SLP-NEXT:    [[T7:%.*]] = trunc i32 [[T3]] to i8
152; FORCE_SLP-NEXT:    [[T8:%.*]] = insertelement <4 x i8> [[T6:%.*]], i8 [[T7]], i64 3
153; FORCE_SLP-NEXT:    [[T9:%.*]] = trunc i32 [[T5]] to i8
154; FORCE_SLP-NEXT:    [[T10:%.*]] = insertelement <4 x i8> [[T8]], i8 [[T9]], i64 2
155; FORCE_SLP-NEXT:    [[T11:%.*]] = add <4 x i8> [[T10]], [[T8]]
156; FORCE_SLP-NEXT:    ret <4 x i8> [[T11]]
157;
158  %t3 = load i32, ptr %t1, align 4
159  %t4 = getelementptr inbounds i32, ptr %t1, i64 1
160  %t5 = load i32, ptr %t4, align 4
161  %t7 = trunc i32 %t3 to i8
162  %t8 = insertelement <4 x i8> %t6, i8 %t7, i64 3
163  %t9 = trunc i32 %t5 to i8
164  %t10 = insertelement <4 x i8> %t8, i8 %t9, i64 2
165  %t11 = add <4 x i8> %t10, %t8
166  ret <4 x i8> %t11
167}
168