xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/X86/schedule_budget.ll (revision 8813c1087e5e548f64905c057a2e7e1f48a5cce5)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=slp-vectorizer -S  -slp-schedule-budget=16 -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck --check-prefix LOBUDGET %s
3; RUN: opt < %s -passes=slp-vectorizer -S  -slp-schedule-budget=32 -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck --check-prefix HIBUDGET %s
4
5target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
6target triple = "x86_64-apple-macosx10.9.0"
7
8; Test if the budget for the scheduling region size works.
9; We test with a reduced budget of 16 which should prevent vectorizing the loads.
10
11declare void @unknown()
12
13define void @test(ptr %a, ptr %b, ptr %c, ptr %d) {
14; LOBUDGET-LABEL: @test(
15; LOBUDGET-NEXT:  entry:
16; LOBUDGET-NEXT:    [[L0:%.*]] = load float, ptr [[A:%.*]], align 4
17; LOBUDGET-NEXT:    [[A1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 1
18; LOBUDGET-NEXT:    [[L1:%.*]] = load float, ptr [[A1]], align 4
19; LOBUDGET-NEXT:    [[A2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 2
20; LOBUDGET-NEXT:    [[L2:%.*]] = load float, ptr [[A2]], align 4
21; LOBUDGET-NEXT:    [[A3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 3
22; LOBUDGET-NEXT:    [[L3:%.*]] = load float, ptr [[A3]], align 4
23; LOBUDGET-NEXT:    [[L00:%.*]] = fadd float [[L0]], [[L0]]
24; LOBUDGET-NEXT:    [[L10:%.*]] = fadd float [[L1]], [[L1]]
25; LOBUDGET-NEXT:    [[L20:%.*]] = fadd float [[L2]], [[L2]]
26; LOBUDGET-NEXT:    [[L30:%.*]] = fadd float [[L3]], [[L3]]
27; LOBUDGET-NEXT:    call void @unknown()
28; LOBUDGET-NEXT:    call void @unknown()
29; LOBUDGET-NEXT:    call void @unknown()
30; LOBUDGET-NEXT:    call void @unknown()
31; LOBUDGET-NEXT:    call void @unknown()
32; LOBUDGET-NEXT:    call void @unknown()
33; LOBUDGET-NEXT:    call void @unknown()
34; LOBUDGET-NEXT:    call void @unknown()
35; LOBUDGET-NEXT:    call void @unknown()
36; LOBUDGET-NEXT:    call void @unknown()
37; LOBUDGET-NEXT:    call void @unknown()
38; LOBUDGET-NEXT:    call void @unknown()
39; LOBUDGET-NEXT:    call void @unknown()
40; LOBUDGET-NEXT:    call void @unknown()
41; LOBUDGET-NEXT:    call void @unknown()
42; LOBUDGET-NEXT:    call void @unknown()
43; LOBUDGET-NEXT:    call void @unknown()
44; LOBUDGET-NEXT:    call void @unknown()
45; LOBUDGET-NEXT:    call void @unknown()
46; LOBUDGET-NEXT:    call void @unknown()
47; LOBUDGET-NEXT:    call void @unknown()
48; LOBUDGET-NEXT:    call void @unknown()
49; LOBUDGET-NEXT:    call void @unknown()
50; LOBUDGET-NEXT:    call void @unknown()
51; LOBUDGET-NEXT:    call void @unknown()
52; LOBUDGET-NEXT:    call void @unknown()
53; LOBUDGET-NEXT:    call void @unknown()
54; LOBUDGET-NEXT:    call void @unknown()
55; LOBUDGET-NEXT:    store float [[L00]], ptr [[B:%.*]], align 4
56; LOBUDGET-NEXT:    [[B1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 1
57; LOBUDGET-NEXT:    store float [[L10]], ptr [[B1]], align 4
58; LOBUDGET-NEXT:    [[B2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 2
59; LOBUDGET-NEXT:    store float [[L20]], ptr [[B2]], align 4
60; LOBUDGET-NEXT:    [[B3:%.*]] = getelementptr inbounds float, ptr [[B]], i64 3
61; LOBUDGET-NEXT:    store float [[L30]], ptr [[B3]], align 4
62; LOBUDGET-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[C:%.*]], align 4
63; LOBUDGET-NEXT:    store <4 x float> [[TMP0]], ptr [[D:%.*]], align 4
64; LOBUDGET-NEXT:    ret void
65;
66; HIBUDGET-LABEL: @test(
67; HIBUDGET-NEXT:  entry:
68; HIBUDGET-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 4
69; HIBUDGET-NEXT:    [[TMP1:%.*]] = fadd <4 x float> [[TMP0]], [[TMP0]]
70; HIBUDGET-NEXT:    call void @unknown()
71; HIBUDGET-NEXT:    call void @unknown()
72; HIBUDGET-NEXT:    call void @unknown()
73; HIBUDGET-NEXT:    call void @unknown()
74; HIBUDGET-NEXT:    call void @unknown()
75; HIBUDGET-NEXT:    call void @unknown()
76; HIBUDGET-NEXT:    call void @unknown()
77; HIBUDGET-NEXT:    call void @unknown()
78; HIBUDGET-NEXT:    call void @unknown()
79; HIBUDGET-NEXT:    call void @unknown()
80; HIBUDGET-NEXT:    call void @unknown()
81; HIBUDGET-NEXT:    call void @unknown()
82; HIBUDGET-NEXT:    call void @unknown()
83; HIBUDGET-NEXT:    call void @unknown()
84; HIBUDGET-NEXT:    call void @unknown()
85; HIBUDGET-NEXT:    call void @unknown()
86; HIBUDGET-NEXT:    call void @unknown()
87; HIBUDGET-NEXT:    call void @unknown()
88; HIBUDGET-NEXT:    call void @unknown()
89; HIBUDGET-NEXT:    call void @unknown()
90; HIBUDGET-NEXT:    call void @unknown()
91; HIBUDGET-NEXT:    call void @unknown()
92; HIBUDGET-NEXT:    call void @unknown()
93; HIBUDGET-NEXT:    call void @unknown()
94; HIBUDGET-NEXT:    call void @unknown()
95; HIBUDGET-NEXT:    call void @unknown()
96; HIBUDGET-NEXT:    call void @unknown()
97; HIBUDGET-NEXT:    call void @unknown()
98; HIBUDGET-NEXT:    store <4 x float> [[TMP1]], ptr [[B:%.*]], align 4
99; HIBUDGET-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[C:%.*]], align 4
100; HIBUDGET-NEXT:    store <4 x float> [[TMP2]], ptr [[D:%.*]], align 4
101; HIBUDGET-NEXT:    ret void
102;
103entry:
104  ; Don't vectorize these loads (with the reduced budget).
105  %l0 = load float, ptr %a
106  %a1 = getelementptr inbounds float, ptr %a, i64 1
107  %l1 = load float, ptr %a1
108  %a2 = getelementptr inbounds float, ptr %a, i64 2
109  %l2 = load float, ptr %a2
110  %a3 = getelementptr inbounds float, ptr %a, i64 3
111  %l3 = load float, ptr %a3
112  %l00 = fadd float %l0, %l0
113  %l10 = fadd float %l1, %l1
114  %l20 = fadd float %l2, %l2
115  %l30 = fadd float %l3, %l3
116
117  ; some unrelated instructions inbetween to enlarge the scheduling region
118  call void @unknown()
119  call void @unknown()
120  call void @unknown()
121  call void @unknown()
122  call void @unknown()
123  call void @unknown()
124  call void @unknown()
125  call void @unknown()
126  call void @unknown()
127  call void @unknown()
128  call void @unknown()
129  call void @unknown()
130  call void @unknown()
131  call void @unknown()
132  call void @unknown()
133  call void @unknown()
134  call void @unknown()
135  call void @unknown()
136  call void @unknown()
137  call void @unknown()
138  call void @unknown()
139  call void @unknown()
140  call void @unknown()
141  call void @unknown()
142  call void @unknown()
143  call void @unknown()
144  call void @unknown()
145  call void @unknown()
146
147  ; Don't vectorize these stores because their operands are too far away (with
148  ; the reduced budget).
149  store float %l00, ptr %b
150  %b1 = getelementptr inbounds float, ptr %b, i64 1
151  store float %l10, ptr %b1
152  %b2 = getelementptr inbounds float, ptr %b, i64 2
153  store float %l20, ptr %b2
154  %b3 = getelementptr inbounds float, ptr %b, i64 3
155  store float %l30, ptr %b3
156
157  ; But still vectorize the following instructions, because even if the budget
158  ; is exceeded there is a minimum region size.
159  %l4 = load float, ptr %c
160  %c1 = getelementptr inbounds float, ptr %c, i64 1
161  %l5 = load float, ptr %c1
162  %c2 = getelementptr inbounds float, ptr %c, i64 2
163  %l6 = load float, ptr %c2
164  %c3 = getelementptr inbounds float, ptr %c, i64 3
165  %l7 = load float, ptr %c3
166
167  store float %l4, ptr %d
168  %d1 = getelementptr inbounds float, ptr %d, i64 1
169  store float %l5, ptr %d1
170  %d2 = getelementptr inbounds float, ptr %d, i64 2
171  store float %l6, ptr %d2
172  %d3 = getelementptr inbounds float, ptr %d, i64 3
173  store float %l7, ptr %d3
174
175  ret void
176}
177
178