xref: /llvm-project/llvm/test/Transforms/LoadStoreVectorizer/X86/massive_indirection.ll (revision 9184c42869b87a59839cafdb8a3679e7ec2faeb1)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2; RUN: opt %s -mtriple=x86_64-unknown-linux-gnu -passes=load-store-vectorizer -mcpu=skx -S -o - | FileCheck %s
3
4; This test verifies that the vectorizer can handle an extended sequence of
5; getelementptr instructions and generate longer vectors. With special handling,
6; some elements can still be vectorized even if they require looking up the
7; common underlying object deeper than 6 levels from the original pointer.
8
9; The test below is the simplified version of actual performance oriented
10; workload; the offsets in getelementptr instructions are similar or same for
11; the test simplicity.
12
13define void @v1_v2_v4_v1_to_v8_levels_6_7_8_8(i32 %arg0, ptr align 16 %arg1) {
14; CHECK-LABEL: define void @v1_v2_v4_v1_to_v8_levels_6_7_8_8(
15; CHECK-SAME: i32 [[ARG0:%.*]], ptr align 16 [[ARG1:%.*]]) #[[ATTR0:[0-9]+]] {
16; CHECK-NEXT:    [[LEVEL1:%.*]] = getelementptr i8, ptr [[ARG1]], i32 917504
17; CHECK-NEXT:    [[LEVEL2:%.*]] = getelementptr i8, ptr [[LEVEL1]], i32 [[ARG0]]
18; CHECK-NEXT:    [[LEVEL3:%.*]] = getelementptr i8, ptr [[LEVEL2]], i32 32768
19; CHECK-NEXT:    [[LEVEL4:%.*]] = getelementptr i8, ptr [[LEVEL3]], i32 [[ARG0]]
20; CHECK-NEXT:    [[LEVEL5:%.*]] = getelementptr i8, ptr [[LEVEL4]], i32 [[ARG0]]
21; CHECK-NEXT:    [[A6:%.*]] = getelementptr i8, ptr [[LEVEL5]], i32 [[ARG0]]
22; CHECK-NEXT:    store <8 x half> zeroinitializer, ptr [[A6]], align 16
23; CHECK-NEXT:    ret void
24;
25
26  %level1 = getelementptr i8, ptr %arg1, i32 917504
27  %level2 = getelementptr i8, ptr %level1, i32 %arg0
28  %level3 = getelementptr i8, ptr %level2, i32 32768
29  %level4 = getelementptr i8, ptr %level3, i32 %arg0
30  %level5 = getelementptr i8, ptr %level4, i32 %arg0
31
32  %a6 = getelementptr i8, ptr %level5, i32 %arg0
33  %b7 = getelementptr i8, ptr %a6, i32 2
34  %c8 = getelementptr i8, ptr %b7, i32 8
35  %d8 = getelementptr i8, ptr %b7, i32 12
36
37  store half 0xH0000, ptr %a6, align 16
38  store <4 x half> zeroinitializer, ptr %b7, align 2
39  store <2 x half> zeroinitializer, ptr %c8, align 2
40  store half 0xH0000, ptr %d8, align 2
41  ret void
42}
43
44define void @v1x8_levels_6_7_8_9_10_11_12_13(i32 %arg0, ptr align 16 %arg1) {
45; CHECK-LABEL: define void @v1x8_levels_6_7_8_9_10_11_12_13(
46; CHECK-SAME: i32 [[ARG0:%.*]], ptr align 16 [[ARG1:%.*]]) #[[ATTR0]] {
47; CHECK-NEXT:    [[LEVEL1:%.*]] = getelementptr i8, ptr [[ARG1]], i32 917504
48; CHECK-NEXT:    [[LEVEL2:%.*]] = getelementptr i8, ptr [[LEVEL1]], i32 [[ARG0]]
49; CHECK-NEXT:    [[LEVEL3:%.*]] = getelementptr i8, ptr [[LEVEL2]], i32 32768
50; CHECK-NEXT:    [[LEVEL4:%.*]] = getelementptr i8, ptr [[LEVEL3]], i32 [[ARG0]]
51; CHECK-NEXT:    [[LEVEL5:%.*]] = getelementptr i8, ptr [[LEVEL4]], i32 [[ARG0]]
52; CHECK-NEXT:    [[A6:%.*]] = getelementptr i8, ptr [[LEVEL5]], i32 [[ARG0]]
53; CHECK-NEXT:    store <8 x half> zeroinitializer, ptr [[A6]], align 16
54; CHECK-NEXT:    ret void
55;
56
57  %level1 = getelementptr i8, ptr %arg1, i32 917504
58  %level2 = getelementptr i8, ptr %level1, i32 %arg0
59  %level3 = getelementptr i8, ptr %level2, i32 32768
60  %level4 = getelementptr i8, ptr %level3, i32 %arg0
61  %level5 = getelementptr i8, ptr %level4, i32 %arg0
62
63  %a6 = getelementptr i8, ptr %level5, i32 %arg0
64  %b7 = getelementptr i8, ptr %a6, i32 2
65  %c8 = getelementptr i8, ptr %b7, i32 2
66  %d9 = getelementptr i8, ptr %c8, i32 2
67  %e10 = getelementptr i8, ptr %d9, i32 2
68  %f11 = getelementptr i8, ptr %e10, i32 2
69  %g12 = getelementptr i8, ptr %f11, i32 2
70  %h13 = getelementptr i8, ptr %g12, i32 2
71
72  store half 0xH0000, ptr %a6, align 16
73  store half 0xH0000, ptr %b7, align 2
74  store half 0xH0000, ptr %c8, align 2
75  store half 0xH0000, ptr %d9, align 2
76  store half 0xH0000, ptr %e10, align 8
77  store half 0xH0000, ptr %f11, align 2
78  store half 0xH0000, ptr %g12, align 2
79  store half 0xH0000, ptr %h13, align 2
80  ret void
81}
82
83define void @v1_4_4_4_2_1_to_v8_8_levels_6_7(i32 %arg0, ptr addrspace(3) align 16 %arg1_ptr, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, half %arg6_half, half %arg7_half, <2 x half> %arg8_2xhalf) {
84; CHECK-LABEL: define void @v1_4_4_4_2_1_to_v8_8_levels_6_7(
85; CHECK-SAME: i32 [[ARG0:%.*]], ptr addrspace(3) align 16 [[ARG1_PTR:%.*]], i32 [[ARG2:%.*]], i32 [[ARG3:%.*]], i32 [[ARG4:%.*]], i32 [[ARG5:%.*]], half [[ARG6_HALF:%.*]], half [[ARG7_HALF:%.*]], <2 x half> [[ARG8_2XHALF:%.*]]) #[[ATTR0]] {
86; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[ARG1_PTR]], i32 458752
87; CHECK-NEXT:    br [[DOTPREHEADER11_PREHEADER:label %.*]]
88; CHECK:       [[_PREHEADER11_PREHEADER:.*:]]
89; CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i32 [[ARG0]], 6
90; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP1]], i32 [[TMP2]]
91; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP3]], i32 [[ARG2]]
92; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[ARG3]]
93; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[ARG0]], 2
94; CHECK-NEXT:    br i1 [[CMP]], [[DOTLR_PH:label %.*]], [[DOTEXIT_POINT:label %.*]]
95; CHECK:       [[_LR_PH:.*:]]
96; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP5]], i32 [[ARG4]]
97; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[GEP]], i32 [[ARG5]]
98; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <8 x half> poison, half [[ARG6_HALF]], i32 0
99; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <8 x half> [[TMP7]], half 0xH0000, i32 1
100; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <8 x half> [[TMP8]], half 0xH0000, i32 2
101; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <8 x half> [[TMP9]], half 0xH0000, i32 3
102; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <8 x half> [[TMP10]], half 0xH0000, i32 4
103; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x half> [[ARG8_2XHALF]], i32 0
104; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <8 x half> [[TMP11]], half [[TMP12]], i32 5
105; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x half> [[ARG8_2XHALF]], i32 1
106; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <8 x half> [[TMP13]], half [[TMP14]], i32 6
107; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <8 x half> [[TMP15]], half [[ARG7_HALF]], i32 7
108; CHECK-NEXT:    store <8 x half> [[TMP16]], ptr addrspace(3) [[TMP6]], align 2
109; CHECK-NEXT:    br [[DOTEXIT_POINT]]
110; CHECK:       [[_EXIT_POINT:.*:]]
111; CHECK-NEXT:    ret void
112;
113  %base1 = getelementptr inbounds i8, ptr addrspace(3) %arg1_ptr, i32 458752
114  br label %.preheader11.preheader
115
116.preheader11.preheader:
117  %base2 = shl nuw nsw i32 %arg0, 6
118  %base3 = getelementptr inbounds i8, ptr addrspace(3) %base1, i32 %base2
119
120  %base4 = getelementptr inbounds i8, ptr addrspace(3) %base3, i32 %arg2
121  %base5 = getelementptr inbounds i8, ptr addrspace(3) %base4, i32 %arg3
122
123  %cmp = icmp sgt i32 %arg0, 2
124  br i1 %cmp, label %.lr.ph, label %.exit_point
125
126.lr.ph:
127  %gep = getelementptr inbounds i8, ptr addrspace(3) %base5, i32 %arg4
128
129  %dst = getelementptr inbounds i8, ptr addrspace(3) %gep, i32 %arg5
130  %dst_off2 = getelementptr inbounds i8, ptr addrspace(3) %dst, i32 2
131  %dst_off10 = getelementptr inbounds i8, ptr addrspace(3) %dst, i32 10
132  %dst_off14 = getelementptr inbounds i8, ptr addrspace(3) %dst, i32 14
133
134  store half %arg6_half, ptr addrspace(3) %dst, align 2
135  store <4 x half> zeroinitializer, ptr addrspace(3) %dst_off2, align 2
136  store <2 x half> %arg8_2xhalf, ptr addrspace(3) %dst_off10, align 2
137  store half %arg7_half, ptr addrspace(3) %dst_off14, align 2
138  br label %.exit_point
139
140.exit_point:
141  ret void
142}
143
144; The regression test for merging equivalence classes. It is reduced and adapted
145; for LSV from llvm/test/CodeGen/NVPTX/variadics-backend.ll, which failed at
146; post-commit checks with memory sanitizer on the initial attempt to implement
147; the merging of the equivalence classes.
148define void @variadics1(ptr %vlist) {
149; CHECK-LABEL: define void @variadics1(
150; CHECK-SAME: ptr [[VLIST:%.*]]) #[[ATTR0]] {
151; CHECK-NEXT:    [[ARGP_CUR7_ALIGNED2:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[VLIST]], i64 0)
152; CHECK-NEXT:    [[ARGP_NEXT8:%.*]] = getelementptr i8, ptr [[ARGP_CUR7_ALIGNED2]], i64 8
153; CHECK-NEXT:    [[X0:%.*]] = getelementptr i8, ptr [[ARGP_NEXT8]], i32 7
154; CHECK-NEXT:    [[ARGP_CUR11_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[X0]], i64 0)
155; CHECK-NEXT:    [[ARGP_NEXT12:%.*]] = getelementptr i8, ptr [[ARGP_CUR11_ALIGNED]], i64 8
156; CHECK-NEXT:    [[X2:%.*]] = getelementptr i8, ptr [[ARGP_NEXT12]], i32 7
157; CHECK-NEXT:    [[ARGP_CUR16_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[X2]], i64 0)
158; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[ARGP_CUR16_ALIGNED]], align 4294967296
159; CHECK-NEXT:    [[X31:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
160; CHECK-NEXT:    [[X42:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
161; CHECK-NEXT:    [[X5:%.*]] = fadd double [[X42]], [[X31]]
162; CHECK-NEXT:    store double [[X5]], ptr null, align 8
163; CHECK-NEXT:    ret void
164;
165  %argp.cur7.aligned2 = call ptr @llvm.ptrmask.p0.i64(ptr %vlist, i64 0)
166  %argp.next8 = getelementptr i8, ptr %argp.cur7.aligned2, i64 8
167  %x0 = getelementptr i8, ptr %argp.next8, i32 7
168  %argp.cur11.aligned = call ptr @llvm.ptrmask.p0.i64(ptr %x0, i64 0)
169  %argp.next12 = getelementptr i8, ptr %argp.cur11.aligned, i64 8
170  %x2 = getelementptr i8, ptr %argp.next12, i32 7
171  %argp.cur16.aligned = call ptr @llvm.ptrmask.p0.i64(ptr %x2, i64 0)
172  %x3 = load double, ptr %argp.cur16.aligned, align 8
173  %argp.cur16.aligned_off8 = getelementptr i8, ptr %argp.cur16.aligned, i32 8
174  %x4 = load double, ptr %argp.cur16.aligned_off8, align 8
175  %x5 = fadd double %x4, %x3
176  store double %x5, ptr null, align 8
177  ret void
178}
179
180declare ptr @llvm.ptrmask.p0.i64(ptr, i64)
181