xref: /llvm-project/llvm/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll (revision 38fffa630ee80163dc65e759392ad29798905679)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2; RUN: opt -passes='require<profile-summary>,function(codegenprepare)' < %s -mtriple=aarch64-apple-ios -S | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefixes=OPT,NONSTRESS
3; RUN: opt -passes='require<profile-summary>,function(codegenprepare)' < %s -mtriple=aarch64-apple-ios -S -stress-cgp-ext-ld-promotion | FileCheck -enable-var-scope %s --check-prefixes=OPTALL,OPT,STRESS
4; RUN: opt -passes='require<profile-summary>,function(codegenprepare)' < %s -mtriple=aarch64-apple-ios -S -disable-cgp-ext-ld-promotion | FileCheck -enable-var-scope %s --check-prefixes=OPTALL,DISABLE
5
6; CodeGenPrepare should move the zext into the block with the load
7; so that SelectionDAG can select it with the load.
8define void @foo(ptr %p, ptr %q) {
9; OPTALL-LABEL: define void @foo(
10; OPTALL-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
11; OPTALL-NEXT:  [[ENTRY:.*:]]
12; OPTALL-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
13; OPTALL-NEXT:    [[S:%.*]] = zext i8 [[T]] to i32
14; OPTALL-NEXT:    [[A:%.*]] = icmp slt i8 [[T]], 20
15; OPTALL-NEXT:    br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]]
16; OPTALL:       [[TRUE]]:
17; OPTALL-NEXT:    store i32 [[S]], ptr [[Q]], align 4
18; OPTALL-NEXT:    ret void
19; OPTALL:       [[FALSE]]:
20; OPTALL-NEXT:    ret void
21;
22entry:
23  %t = load i8, ptr %p
24  %a = icmp slt i8 %t, 20
25  br i1 %a, label %true, label %false
26true:
27  %s = zext i8 %t to i32
28  store i32 %s, ptr %q
29  ret void
30false:
31  ret void
32}
33
34; Check that we manage to form a zextload is an operation with only one
35; argument to explicitly extend is in the way.
36; Make sure the operation is not promoted when the promotion pass is disabled.
37define void @promoteOneArg(ptr %p, ptr %q) {
38; OPT-LABEL: define void @promoteOneArg(
39; OPT-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
40; OPT-NEXT:  [[ENTRY:.*:]]
41; OPT-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
42; OPT-NEXT:    [[PROMOTED:%.*]] = zext i8 [[T]] to i32
43; OPT-NEXT:    [[ADD:%.*]] = add nuw i32 [[PROMOTED]], 2
44; OPT-NEXT:    [[A:%.*]] = icmp slt i8 [[T]], 20
45; OPT-NEXT:    br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]]
46; OPT:       [[TRUE]]:
47; OPT-NEXT:    store i32 [[ADD]], ptr [[Q]], align 4
48; OPT-NEXT:    ret void
49; OPT:       [[FALSE]]:
50; OPT-NEXT:    ret void
51;
52; DISABLE-LABEL: define void @promoteOneArg(
53; DISABLE-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
54; DISABLE-NEXT:  [[ENTRY:.*:]]
55; DISABLE-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
56; DISABLE-NEXT:    [[ADD:%.*]] = add nuw i8 [[T]], 2
57; DISABLE-NEXT:    [[A:%.*]] = icmp slt i8 [[T]], 20
58; DISABLE-NEXT:    br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]]
59; DISABLE:       [[TRUE]]:
60; DISABLE-NEXT:    [[S:%.*]] = zext i8 [[ADD]] to i32
61; DISABLE-NEXT:    store i32 [[S]], ptr [[Q]], align 4
62; DISABLE-NEXT:    ret void
63; DISABLE:       [[FALSE]]:
64; DISABLE-NEXT:    ret void
65;
66entry:
67  %t = load i8, ptr %p
68  %add = add nuw i8 %t, 2
69  %a = icmp slt i8 %t, 20
70  br i1 %a, label %true, label %false
71true:
72  %s = zext i8 %add to i32
73  store i32 %s, ptr %q
74  ret void
75false:
76  ret void
77}
78
79; Check that we manage to form a sextload is an operation with only one
80; argument to explicitly extend is in the way.
81; Version with sext.
82define void @promoteOneArgSExt(ptr %p, ptr %q) {
83; OPT-LABEL: define void @promoteOneArgSExt(
84; OPT-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
85; OPT-NEXT:  [[ENTRY:.*:]]
86; OPT-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
87; OPT-NEXT:    [[PROMOTED:%.*]] = sext i8 [[T]] to i32
88; OPT-NEXT:    [[ADD:%.*]] = add nsw i32 [[PROMOTED]], 2
89; OPT-NEXT:    [[A:%.*]] = icmp slt i8 [[T]], 20
90; OPT-NEXT:    br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]]
91; OPT:       [[TRUE]]:
92; OPT-NEXT:    store i32 [[ADD]], ptr [[Q]], align 4
93; OPT-NEXT:    ret void
94; OPT:       [[FALSE]]:
95; OPT-NEXT:    ret void
96;
97; DISABLE-LABEL: define void @promoteOneArgSExt(
98; DISABLE-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) {
99; DISABLE-NEXT:  [[ENTRY:.*:]]
100; DISABLE-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
101; DISABLE-NEXT:    [[ADD:%.*]] = add nsw i8 [[T]], 2
102; DISABLE-NEXT:    [[A:%.*]] = icmp slt i8 [[T]], 20
103; DISABLE-NEXT:    br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]]
104; DISABLE:       [[TRUE]]:
105; DISABLE-NEXT:    [[S:%.*]] = sext i8 [[ADD]] to i32
106; DISABLE-NEXT:    store i32 [[S]], ptr [[Q]], align 4
107; DISABLE-NEXT:    ret void
108; DISABLE:       [[FALSE]]:
109; DISABLE-NEXT:    ret void
110;
111entry:
112  %t = load i8, ptr %p
113  %add = add nsw i8 %t, 2
114  %a = icmp slt i8 %t, 20
115  br i1 %a, label %true, label %false
116true:
117  %s = sext i8 %add to i32
118  store i32 %s, ptr %q
119  ret void
120false:
121  ret void
122}
123
124; Check that we manage to form a zextload is an operation with two
125; arguments to explicitly extend is in the way.
126; Extending %add will create two extensions:
127; 1. One for %b.
128; 2. One for %t.
129; #1 will not be removed as we do not know anything about %b.
130; #2 may not be merged with the load because %t is used in a comparison.
131; Since two extensions may be emitted in the end instead of one before the
132; transformation, the regular heuristic does not apply the optimization.
133define void @promoteTwoArgZext(ptr %p, ptr %q, i8 %b) {
134; NONSTRESS-LABEL: define void @promoteTwoArgZext(
135; NONSTRESS-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i8 [[B:%.*]]) {
136; NONSTRESS-NEXT:  [[ENTRY:.*:]]
137; NONSTRESS-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
138; NONSTRESS-NEXT:    [[ADD:%.*]] = add nuw i8 [[T]], [[B]]
139; NONSTRESS-NEXT:    [[A:%.*]] = icmp slt i8 [[T]], 20
140; NONSTRESS-NEXT:    br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]]
141; NONSTRESS:       [[TRUE]]:
142; NONSTRESS-NEXT:    [[S:%.*]] = zext i8 [[ADD]] to i32
143; NONSTRESS-NEXT:    store i32 [[S]], ptr [[Q]], align 4
144; NONSTRESS-NEXT:    ret void
145; NONSTRESS:       [[FALSE]]:
146; NONSTRESS-NEXT:    ret void
147;
148; STRESS-LABEL: define void @promoteTwoArgZext(
149; STRESS-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i8 [[B:%.*]]) {
150; STRESS-NEXT:  [[ENTRY:.*:]]
151; STRESS-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
152; STRESS-NEXT:    [[PROMOTED:%.*]] = zext i8 [[T]] to i32
153; STRESS-NEXT:    [[PROMOTED1:%.*]] = zext i8 [[B]] to i32
154; STRESS-NEXT:    [[ADD:%.*]] = add nuw i32 [[PROMOTED]], [[PROMOTED1]]
155; STRESS-NEXT:    [[A:%.*]] = icmp slt i8 [[T]], 20
156; STRESS-NEXT:    br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]]
157; STRESS:       [[TRUE]]:
158; STRESS-NEXT:    store i32 [[ADD]], ptr [[Q]], align 4
159; STRESS-NEXT:    ret void
160; STRESS:       [[FALSE]]:
161; STRESS-NEXT:    ret void
162;
163; DISABLE-LABEL: define void @promoteTwoArgZext(
164; DISABLE-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i8 [[B:%.*]]) {
165; DISABLE-NEXT:  [[ENTRY:.*:]]
166; DISABLE-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
167; DISABLE-NEXT:    [[ADD:%.*]] = add nuw i8 [[T]], [[B]]
168; DISABLE-NEXT:    [[A:%.*]] = icmp slt i8 [[T]], 20
169; DISABLE-NEXT:    br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]]
170; DISABLE:       [[TRUE]]:
171; DISABLE-NEXT:    [[S:%.*]] = zext i8 [[ADD]] to i32
172; DISABLE-NEXT:    store i32 [[S]], ptr [[Q]], align 4
173; DISABLE-NEXT:    ret void
174; DISABLE:       [[FALSE]]:
175; DISABLE-NEXT:    ret void
176;
177entry:
178  %t = load i8, ptr %p
179  %add = add nuw i8 %t, %b
180  %a = icmp slt i8 %t, 20
181  br i1 %a, label %true, label %false
182true:
183  %s = zext i8 %add to i32
184  store i32 %s, ptr %q
185  ret void
186false:
187  ret void
188}
189
190; Check that we manage to form a sextload is an operation with two
191; arguments to explicitly extend is in the way.
192; Version with sext.
193define void @promoteTwoArgSExt(ptr %p, ptr %q, i8 %b) {
194; NONSTRESS-LABEL: define void @promoteTwoArgSExt(
195; NONSTRESS-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i8 [[B:%.*]]) {
196; NONSTRESS-NEXT:  [[ENTRY:.*:]]
197; NONSTRESS-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
198; NONSTRESS-NEXT:    [[ADD:%.*]] = add nsw i8 [[T]], [[B]]
199; NONSTRESS-NEXT:    [[A:%.*]] = icmp slt i8 [[T]], 20
200; NONSTRESS-NEXT:    br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]]
201; NONSTRESS:       [[TRUE]]:
202; NONSTRESS-NEXT:    [[S:%.*]] = sext i8 [[ADD]] to i32
203; NONSTRESS-NEXT:    store i32 [[S]], ptr [[Q]], align 4
204; NONSTRESS-NEXT:    ret void
205; NONSTRESS:       [[FALSE]]:
206; NONSTRESS-NEXT:    ret void
207;
208; STRESS-LABEL: define void @promoteTwoArgSExt(
209; STRESS-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i8 [[B:%.*]]) {
210; STRESS-NEXT:  [[ENTRY:.*:]]
211; STRESS-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
212; STRESS-NEXT:    [[PROMOTED:%.*]] = sext i8 [[T]] to i32
213; STRESS-NEXT:    [[PROMOTED1:%.*]] = sext i8 [[B]] to i32
214; STRESS-NEXT:    [[ADD:%.*]] = add nsw i32 [[PROMOTED]], [[PROMOTED1]]
215; STRESS-NEXT:    [[A:%.*]] = icmp slt i8 [[T]], 20
216; STRESS-NEXT:    br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]]
217; STRESS:       [[TRUE]]:
218; STRESS-NEXT:    store i32 [[ADD]], ptr [[Q]], align 4
219; STRESS-NEXT:    ret void
220; STRESS:       [[FALSE]]:
221; STRESS-NEXT:    ret void
222;
223; DISABLE-LABEL: define void @promoteTwoArgSExt(
224; DISABLE-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i8 [[B:%.*]]) {
225; DISABLE-NEXT:  [[ENTRY:.*:]]
226; DISABLE-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
227; DISABLE-NEXT:    [[ADD:%.*]] = add nsw i8 [[T]], [[B]]
228; DISABLE-NEXT:    [[A:%.*]] = icmp slt i8 [[T]], 20
229; DISABLE-NEXT:    br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]]
230; DISABLE:       [[TRUE]]:
231; DISABLE-NEXT:    [[S:%.*]] = sext i8 [[ADD]] to i32
232; DISABLE-NEXT:    store i32 [[S]], ptr [[Q]], align 4
233; DISABLE-NEXT:    ret void
234; DISABLE:       [[FALSE]]:
235; DISABLE-NEXT:    ret void
236;
237entry:
238  %t = load i8, ptr %p
239  %add = add nsw i8 %t, %b
240  %a = icmp slt i8 %t, 20
241  br i1 %a, label %true, label %false
242true:
243  %s = sext i8 %add to i32
244  store i32 %s, ptr %q
245  ret void
246false:
247  ret void
248}
249
250; Check that we do not a zextload if we need to introduce more than
251; one additional extension.
252define void @promoteThreeArgZext(ptr %p, ptr %q, i8 %b, i8 %c) {
253; NONSTRESS-LABEL: define void @promoteThreeArgZext(
254; NONSTRESS-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) {
255; NONSTRESS-NEXT:  [[ENTRY:.*:]]
256; NONSTRESS-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
257; NONSTRESS-NEXT:    [[TMP:%.*]] = add nuw i8 [[T]], [[B]]
258; NONSTRESS-NEXT:    [[ADD:%.*]] = add nuw i8 [[TMP]], [[C]]
259; NONSTRESS-NEXT:    [[A:%.*]] = icmp slt i8 [[T]], 20
260; NONSTRESS-NEXT:    br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]]
261; NONSTRESS:       [[TRUE]]:
262; NONSTRESS-NEXT:    [[S:%.*]] = zext i8 [[ADD]] to i32
263; NONSTRESS-NEXT:    store i32 [[S]], ptr [[Q]], align 4
264; NONSTRESS-NEXT:    ret void
265; NONSTRESS:       [[FALSE]]:
266; NONSTRESS-NEXT:    ret void
267;
268; STRESS-LABEL: define void @promoteThreeArgZext(
269; STRESS-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) {
270; STRESS-NEXT:  [[ENTRY:.*:]]
271; STRESS-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
272; STRESS-NEXT:    [[PROMOTED2:%.*]] = zext i8 [[T]] to i32
273; STRESS-NEXT:    [[PROMOTED3:%.*]] = zext i8 [[B]] to i32
274; STRESS-NEXT:    [[TMP:%.*]] = add nuw i32 [[PROMOTED2]], [[PROMOTED3]]
275; STRESS-NEXT:    [[PROMOTED1:%.*]] = zext i8 [[C]] to i32
276; STRESS-NEXT:    [[ADD:%.*]] = add nuw i32 [[TMP]], [[PROMOTED1]]
277; STRESS-NEXT:    [[A:%.*]] = icmp slt i8 [[T]], 20
278; STRESS-NEXT:    br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]]
279; STRESS:       [[TRUE]]:
280; STRESS-NEXT:    store i32 [[ADD]], ptr [[Q]], align 4
281; STRESS-NEXT:    ret void
282; STRESS:       [[FALSE]]:
283; STRESS-NEXT:    ret void
284;
285; DISABLE-LABEL: define void @promoteThreeArgZext(
286; DISABLE-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) {
287; DISABLE-NEXT:  [[ENTRY:.*:]]
288; DISABLE-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
289; DISABLE-NEXT:    [[TMP:%.*]] = add nuw i8 [[T]], [[B]]
290; DISABLE-NEXT:    [[ADD:%.*]] = add nuw i8 [[TMP]], [[C]]
291; DISABLE-NEXT:    [[A:%.*]] = icmp slt i8 [[T]], 20
292; DISABLE-NEXT:    br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]]
293; DISABLE:       [[TRUE]]:
294; DISABLE-NEXT:    [[S:%.*]] = zext i8 [[ADD]] to i32
295; DISABLE-NEXT:    store i32 [[S]], ptr [[Q]], align 4
296; DISABLE-NEXT:    ret void
297; DISABLE:       [[FALSE]]:
298; DISABLE-NEXT:    ret void
299;
300entry:
301  %t = load i8, ptr %p
302  %tmp = add nuw i8 %t, %b
303  %add = add nuw i8 %tmp, %c
304  %a = icmp slt i8 %t, 20
305  br i1 %a, label %true, label %false
306true:
307  %s = zext i8 %add to i32
308  store i32 %s, ptr %q
309  ret void
310false:
311  ret void
312}
313
314; Check that we manage to form a zextload after promoting and merging
315; two extensions.
316define void @promoteMergeExtArgZExt(ptr %p, ptr %q, i16 %b) {
317; NONSTRESS-LABEL: define void @promoteMergeExtArgZExt(
318; NONSTRESS-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i16 [[B:%.*]]) {
319; NONSTRESS-NEXT:  [[ENTRY:.*:]]
320; NONSTRESS-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
321; NONSTRESS-NEXT:    [[EXT:%.*]] = zext i8 [[T]] to i16
322; NONSTRESS-NEXT:    [[ADD:%.*]] = add nuw i16 [[EXT]], [[B]]
323; NONSTRESS-NEXT:    [[A:%.*]] = icmp slt i8 [[T]], 20
324; NONSTRESS-NEXT:    br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]]
325; NONSTRESS:       [[TRUE]]:
326; NONSTRESS-NEXT:    [[S:%.*]] = zext i16 [[ADD]] to i32
327; NONSTRESS-NEXT:    store i32 [[S]], ptr [[Q]], align 4
328; NONSTRESS-NEXT:    ret void
329; NONSTRESS:       [[FALSE]]:
330; NONSTRESS-NEXT:    ret void
331;
332; STRESS-LABEL: define void @promoteMergeExtArgZExt(
333; STRESS-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i16 [[B:%.*]]) {
334; STRESS-NEXT:  [[ENTRY:.*:]]
335; STRESS-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
336; STRESS-NEXT:    [[PROMOTED2:%.*]] = zext i8 [[T]] to i32
337; STRESS-NEXT:    [[PROMOTED1:%.*]] = zext i16 [[B]] to i32
338; STRESS-NEXT:    [[ADD:%.*]] = add nuw i32 [[PROMOTED2]], [[PROMOTED1]]
339; STRESS-NEXT:    [[A:%.*]] = icmp slt i8 [[T]], 20
340; STRESS-NEXT:    br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]]
341; STRESS:       [[TRUE]]:
342; STRESS-NEXT:    store i32 [[ADD]], ptr [[Q]], align 4
343; STRESS-NEXT:    ret void
344; STRESS:       [[FALSE]]:
345; STRESS-NEXT:    ret void
346;
347; DISABLE-LABEL: define void @promoteMergeExtArgZExt(
348; DISABLE-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i16 [[B:%.*]]) {
349; DISABLE-NEXT:  [[ENTRY:.*:]]
350; DISABLE-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
351; DISABLE-NEXT:    [[EXT:%.*]] = zext i8 [[T]] to i16
352; DISABLE-NEXT:    [[ADD:%.*]] = add nuw i16 [[EXT]], [[B]]
353; DISABLE-NEXT:    [[A:%.*]] = icmp slt i8 [[T]], 20
354; DISABLE-NEXT:    br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]]
355; DISABLE:       [[TRUE]]:
356; DISABLE-NEXT:    [[S:%.*]] = zext i16 [[ADD]] to i32
357; DISABLE-NEXT:    store i32 [[S]], ptr [[Q]], align 4
358; DISABLE-NEXT:    ret void
359; DISABLE:       [[FALSE]]:
360; DISABLE-NEXT:    ret void
361;
362entry:
363  %t = load i8, ptr %p
364  %ext = zext i8 %t to i16
365  %add = add nuw i16 %ext, %b
366  %a = icmp slt i8 %t, 20
367  br i1 %a, label %true, label %false
368true:
369  %s = zext i16 %add to i32
370  store i32 %s, ptr %q
371  ret void
372false:
373  ret void
374}
375
376; Check that we manage to form a sextload after promoting and merging
377; two extensions.
378; Version with sext.
379define void @promoteMergeExtArgSExt(ptr %p, ptr %q, i16 %b) {
380; NONSTRESS-LABEL: define void @promoteMergeExtArgSExt(
381; NONSTRESS-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i16 [[B:%.*]]) {
382; NONSTRESS-NEXT:  [[ENTRY:.*:]]
383; NONSTRESS-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
384; NONSTRESS-NEXT:    [[EXT:%.*]] = zext i8 [[T]] to i16
385; NONSTRESS-NEXT:    [[ADD:%.*]] = add nsw i16 [[EXT]], [[B]]
386; NONSTRESS-NEXT:    [[A:%.*]] = icmp slt i8 [[T]], 20
387; NONSTRESS-NEXT:    br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]]
388; NONSTRESS:       [[TRUE]]:
389; NONSTRESS-NEXT:    [[S:%.*]] = sext i16 [[ADD]] to i32
390; NONSTRESS-NEXT:    store i32 [[S]], ptr [[Q]], align 4
391; NONSTRESS-NEXT:    ret void
392; NONSTRESS:       [[FALSE]]:
393; NONSTRESS-NEXT:    ret void
394;
395; STRESS-LABEL: define void @promoteMergeExtArgSExt(
396; STRESS-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i16 [[B:%.*]]) {
397; STRESS-NEXT:  [[ENTRY:.*:]]
398; STRESS-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
399; STRESS-NEXT:    [[PROMOTED2:%.*]] = zext i8 [[T]] to i32
400; STRESS-NEXT:    [[PROMOTED1:%.*]] = sext i16 [[B]] to i32
401; STRESS-NEXT:    [[ADD:%.*]] = add nsw i32 [[PROMOTED2]], [[PROMOTED1]]
402; STRESS-NEXT:    [[A:%.*]] = icmp slt i8 [[T]], 20
403; STRESS-NEXT:    br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]]
404; STRESS:       [[TRUE]]:
405; STRESS-NEXT:    store i32 [[ADD]], ptr [[Q]], align 4
406; STRESS-NEXT:    ret void
407; STRESS:       [[FALSE]]:
408; STRESS-NEXT:    ret void
409;
410; DISABLE-LABEL: define void @promoteMergeExtArgSExt(
411; DISABLE-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i16 [[B:%.*]]) {
412; DISABLE-NEXT:  [[ENTRY:.*:]]
413; DISABLE-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
414; DISABLE-NEXT:    [[EXT:%.*]] = zext i8 [[T]] to i16
415; DISABLE-NEXT:    [[ADD:%.*]] = add nsw i16 [[EXT]], [[B]]
416; DISABLE-NEXT:    [[A:%.*]] = icmp slt i8 [[T]], 20
417; DISABLE-NEXT:    br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]]
418; DISABLE:       [[TRUE]]:
419; DISABLE-NEXT:    [[S:%.*]] = sext i16 [[ADD]] to i32
420; DISABLE-NEXT:    store i32 [[S]], ptr [[Q]], align 4
421; DISABLE-NEXT:    ret void
422; DISABLE:       [[FALSE]]:
423; DISABLE-NEXT:    ret void
424;
425entry:
426  %t = load i8, ptr %p
427  %ext = zext i8 %t to i16
428  %add = add nsw i16 %ext, %b
429  %a = icmp slt i8 %t, 20
430  br i1 %a, label %true, label %false
431true:
432  %s = sext i16 %add to i32
433  store i32 %s, ptr %q
434  ret void
435false:
436  ret void
437}
438
439; Check that we manage to catch all the extload opportunities that are exposed
440; by the different iterations of codegen prepare.
441; Moreover, check that we do not promote more than we need to.
442; Here is what is happening in this test (not necessarly in this order):
443; 1. We try to promote the operand of %sextadd.
444;    a. This creates one sext of %ld2 and one of %zextld
445;    b. The sext of %ld2 can be combine with %ld2, so we remove one sext but
446;       introduced one. This is fine with the current heuristic: neutral.
447;    => We have one zext of %zextld left and we created one sext of %ld2.
448; 2. We try to promote the operand of %sextaddza.
449;    a. This creates one sext of %zexta and one of %zextld
450;    b. The sext of %zexta can be combined with the zext of %a.
451;    c. The sext of %zextld leads to %ld and can be combined with it. This is
452;       done by promoting %zextld. This is fine with the current heuristic:
453;       neutral.
454;    => We have created a new zext of %ld and we created one sext of %zexta.
455; 3. We try to promote the operand of %sextaddb.
456;    a. This creates one sext of %b and one of %zextld
457;    b. The sext of %b is a dead-end, nothing to be done.
458;    c. Same thing as 2.c. happens.
459;    => We have created a new zext of %ld and we created one sext of %b.
460; 4. We try to promote the operand of the zext of %zextld introduced in #1.
461;    a. Same thing as 2.c. happens.
462;    b. %zextld does not have any other uses. It is dead coded.
463;    => We have created a new zext of %ld and we removed a zext of %zextld and
464;       a zext of %ld.
465; Currently we do not try to reuse existing extensions, so in the end we have
466; 3 identical zext of %ld. The extensions will be CSE'ed by SDag.
467define void @severalPromotions(ptr %addr1, ptr %addr2, i8 %a, i32 %b) {
468; OPT-LABEL: define void @severalPromotions(
469; OPT-SAME: ptr [[ADDR1:%.*]], ptr [[ADDR2:%.*]], i8 [[A:%.*]], i32 [[B:%.*]]) {
470; OPT-NEXT:    [[LD:%.*]] = load i8, ptr [[ADDR1]], align 1
471; OPT-NEXT:    [[PROMOTED9:%.*]] = zext i8 [[LD]] to i64
472; OPT-NEXT:    [[PROMOTED6:%.*]] = zext i8 [[LD]] to i64
473; OPT-NEXT:    [[LD2:%.*]] = load i32, ptr [[ADDR2]], align 4
474; OPT-NEXT:    [[PROMOTED:%.*]] = sext i32 [[LD2]] to i64
475; OPT-NEXT:    [[PROMOTED2:%.*]] = zext i8 [[LD]] to i64
476; OPT-NEXT:    [[ADD:%.*]] = add nsw i64 [[PROMOTED]], [[PROMOTED2]]
477; OPT-NEXT:    [[PROMOTED5:%.*]] = zext i8 [[A]] to i64
478; OPT-NEXT:    [[ADDZA:%.*]] = add nsw i64 [[PROMOTED5]], [[PROMOTED6]]
479; OPT-NEXT:    [[PROMOTED7:%.*]] = sext i32 [[B]] to i64
480; OPT-NEXT:    [[ADDB:%.*]] = add nsw i64 [[PROMOTED7]], [[PROMOTED9]]
481; OPT-NEXT:    call void @dummy(i64 [[ADD]], i64 [[ADDZA]], i64 [[ADDB]])
482; OPT-NEXT:    ret void
483;
484; DISABLE-LABEL: define void @severalPromotions(
485; DISABLE-SAME: ptr [[ADDR1:%.*]], ptr [[ADDR2:%.*]], i8 [[A:%.*]], i32 [[B:%.*]]) {
486; DISABLE-NEXT:    [[LD:%.*]] = load i8, ptr [[ADDR1]], align 1
487; DISABLE-NEXT:    [[ZEXTLD:%.*]] = zext i8 [[LD]] to i32
488; DISABLE-NEXT:    [[LD2:%.*]] = load i32, ptr [[ADDR2]], align 4
489; DISABLE-NEXT:    [[ADD:%.*]] = add nsw i32 [[LD2]], [[ZEXTLD]]
490; DISABLE-NEXT:    [[SEXTADD:%.*]] = sext i32 [[ADD]] to i64
491; DISABLE-NEXT:    [[ZEXTA:%.*]] = zext i8 [[A]] to i32
492; DISABLE-NEXT:    [[ADDZA:%.*]] = add nsw i32 [[ZEXTA]], [[ZEXTLD]]
493; DISABLE-NEXT:    [[SEXTADDZA:%.*]] = sext i32 [[ADDZA]] to i64
494; DISABLE-NEXT:    [[ADDB:%.*]] = add nsw i32 [[B]], [[ZEXTLD]]
495; DISABLE-NEXT:    [[SEXTADDB:%.*]] = sext i32 [[ADDB]] to i64
496; DISABLE-NEXT:    call void @dummy(i64 [[SEXTADD]], i64 [[SEXTADDZA]], i64 [[SEXTADDB]])
497; DISABLE-NEXT:    ret void
498;
499  %ld = load i8, ptr %addr1
500  %zextld = zext i8 %ld to i32
501  %ld2 = load i32, ptr %addr2
502  %add = add nsw i32 %ld2, %zextld
503  %sextadd = sext i32 %add to i64
504  %zexta = zext i8 %a to i32
505  %addza = add nsw i32 %zexta, %zextld
506  %sextaddza = sext i32 %addza to i64
507  %addb = add nsw i32 %b, %zextld
508  %sextaddb = sext i32 %addb to i64
509  call void @dummy(i64 %sextadd, i64 %sextaddza, i64 %sextaddb)
510  ret void
511}
512
513declare void @dummy(i64, i64, i64)
514
515; Make sure we do not try to promote vector types since the type promotion
516; helper does not support them for now.
517define void @vectorPromotion() {
518; OPTALL-LABEL: define void @vectorPromotion() {
519; OPTALL-NEXT:  [[ENTRY:.*:]]
520; OPTALL-NEXT:    [[A:%.*]] = shl nuw nsw <2 x i32> zeroinitializer, splat (i32 8)
521; OPTALL-NEXT:    [[B:%.*]] = zext <2 x i32> [[A]] to <2 x i64>
522; OPTALL-NEXT:    ret void
523;
524entry:
525  %a = shl nuw nsw <2 x i32> zeroinitializer, <i32 8, i32 8>
526  %b = zext <2 x i32> %a to <2 x i64>
527  ret void
528}
529
530@a = common global i32 0, align 4
531@c = common global [2 x i32] zeroinitializer, align 4
532
533; Make sure we support promotion of operands that produces a Value as opposed
534; to an instruction.
535; This used to cause a crash.
536define i32 @promotionOfArgEndsUpInValue(ptr %addr) {
537; OPT-LABEL: define i32 @promotionOfArgEndsUpInValue(
538; OPT-SAME: ptr [[ADDR:%.*]]) {
539; OPT-NEXT:  [[ENTRY:.*:]]
540; OPT-NEXT:    [[VAL:%.*]] = load i16, ptr [[ADDR]], align 2
541; OPT-NEXT:    [[PROMOTED:%.*]] = sext i16 [[VAL]] to i32
542; OPT-NEXT:    [[CMP:%.*]] = icmp ne ptr getelementptr inbounds ([2 x i32], ptr @c, i64 0, i64 1), @a
543; OPT-NEXT:    [[PROMOTED2:%.*]] = zext i1 [[CMP]] to i32
544; OPT-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[PROMOTED]], [[PROMOTED2]]
545; OPT-NEXT:    ret i32 [[ADD]]
546;
547; DISABLE-LABEL: define i32 @promotionOfArgEndsUpInValue(
548; DISABLE-SAME: ptr [[ADDR:%.*]]) {
549; DISABLE-NEXT:  [[ENTRY:.*:]]
550; DISABLE-NEXT:    [[VAL:%.*]] = load i16, ptr [[ADDR]], align 2
551; DISABLE-NEXT:    [[CMP:%.*]] = icmp ne ptr getelementptr inbounds ([2 x i32], ptr @c, i64 0, i64 1), @a
552; DISABLE-NEXT:    [[EXT:%.*]] = zext i1 [[CMP]] to i16
553; DISABLE-NEXT:    [[ADD:%.*]] = add nuw nsw i16 [[VAL]], [[EXT]]
554; DISABLE-NEXT:    [[CONV3:%.*]] = sext i16 [[ADD]] to i32
555; DISABLE-NEXT:    ret i32 [[CONV3]]
556;
557entry:
558  %val = load i16, ptr %addr
559  %cmp = icmp ne ptr getelementptr inbounds ([2 x i32], ptr @c, i64 0, i64 1), @a
560  %ext = zext i1 %cmp to i16
561  %add = add nuw nsw i16 %val, %ext
562  %conv3 = sext i16 %add to i32
563  ret i32 %conv3
564}
565
566; Check that we see that one zext can be derived from the other for free.
567define void @promoteTwoArgZextWithSourceExtendedTwice(ptr %p, ptr %q, i32 %b, ptr %addr) {
568; OPT-LABEL: define void @promoteTwoArgZextWithSourceExtendedTwice(
569; OPT-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]]) {
570; OPT-NEXT:  [[ENTRY:.*:]]
571; OPT-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
572; OPT-NEXT:    [[PROMOTED1:%.*]] = zext i8 [[T]] to i64
573; OPT-NEXT:    [[ZEXTT:%.*]] = zext i8 [[T]] to i32
574; OPT-NEXT:    [[ADD:%.*]] = add nuw i32 [[ZEXTT]], [[B]]
575; OPT-NEXT:    [[ADD2:%.*]] = add nuw i64 [[PROMOTED1]], 12
576; OPT-NEXT:    store i32 [[ADD]], ptr [[ADDR]], align 4
577; OPT-NEXT:    store i64 [[ADD2]], ptr [[Q]], align 8
578; OPT-NEXT:    ret void
579;
580; DISABLE-LABEL: define void @promoteTwoArgZextWithSourceExtendedTwice(
581; DISABLE-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]]) {
582; DISABLE-NEXT:  [[ENTRY:.*:]]
583; DISABLE-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
584; DISABLE-NEXT:    [[ZEXTT:%.*]] = zext i8 [[T]] to i32
585; DISABLE-NEXT:    [[ADD:%.*]] = add nuw i32 [[ZEXTT]], [[B]]
586; DISABLE-NEXT:    [[ADD2:%.*]] = add nuw i32 [[ZEXTT]], 12
587; DISABLE-NEXT:    store i32 [[ADD]], ptr [[ADDR]], align 4
588; DISABLE-NEXT:    [[S:%.*]] = zext i32 [[ADD2]] to i64
589; DISABLE-NEXT:    store i64 [[S]], ptr [[Q]], align 8
590; DISABLE-NEXT:    ret void
591;
592entry:
593  %t = load i8, ptr %p
594  %zextt = zext i8 %t to i32
595  %add = add nuw i32 %zextt, %b
596  %add2 = add nuw i32 %zextt, 12
597  store i32 %add, ptr %addr
598  %s = zext i32 %add2 to i64
599  store i64 %s, ptr %q
600  ret void
601}
602
603; Check that we do not increase the cost of the code.
604; The input has one free zext and one free sext. If we would have promoted
605; all the way through the load we would end up with a free zext and a
606; non-free sext (of %b).
607define void @doNotPromoteFreeSExtFromAddrMode(ptr %p, i32 %b, ptr %addr) {
608; NONSTRESS-LABEL: define void @doNotPromoteFreeSExtFromAddrMode(
609; NONSTRESS-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]]) {
610; NONSTRESS-NEXT:  [[ENTRY:.*:]]
611; NONSTRESS-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
612; NONSTRESS-NEXT:    [[ZEXTT:%.*]] = zext i8 [[T]] to i32
613; NONSTRESS-NEXT:    [[ADD:%.*]] = add nsw i32 [[ZEXTT]], [[B]]
614; NONSTRESS-NEXT:    [[IDX64:%.*]] = sext i32 [[ADD]] to i64
615; NONSTRESS-NEXT:    [[STADDR:%.*]] = getelementptr inbounds i32, ptr [[ADDR]], i64 [[IDX64]]
616; NONSTRESS-NEXT:    store i32 [[ADD]], ptr [[STADDR]], align 4
617; NONSTRESS-NEXT:    ret void
618;
619; STRESS-LABEL: define void @doNotPromoteFreeSExtFromAddrMode(
620; STRESS-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]]) {
621; STRESS-NEXT:  [[ENTRY:.*:]]
622; STRESS-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
623; STRESS-NEXT:    [[PROMOTED3:%.*]] = zext i8 [[T]] to i64
624; STRESS-NEXT:    [[PROMOTED2:%.*]] = sext i32 [[B]] to i64
625; STRESS-NEXT:    [[ADD:%.*]] = add nsw i64 [[PROMOTED3]], [[PROMOTED2]]
626; STRESS-NEXT:    [[PROMOTED:%.*]] = trunc i64 [[ADD]] to i32
627; STRESS-NEXT:    [[STADDR:%.*]] = getelementptr inbounds i32, ptr [[ADDR]], i64 [[ADD]]
628; STRESS-NEXT:    store i32 [[PROMOTED]], ptr [[STADDR]], align 4
629; STRESS-NEXT:    ret void
630;
631; DISABLE-LABEL: define void @doNotPromoteFreeSExtFromAddrMode(
632; DISABLE-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]]) {
633; DISABLE-NEXT:  [[ENTRY:.*:]]
634; DISABLE-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
635; DISABLE-NEXT:    [[ZEXTT:%.*]] = zext i8 [[T]] to i32
636; DISABLE-NEXT:    [[ADD:%.*]] = add nsw i32 [[ZEXTT]], [[B]]
637; DISABLE-NEXT:    [[IDX64:%.*]] = sext i32 [[ADD]] to i64
638; DISABLE-NEXT:    [[STADDR:%.*]] = getelementptr inbounds i32, ptr [[ADDR]], i64 [[IDX64]]
639; DISABLE-NEXT:    store i32 [[ADD]], ptr [[STADDR]], align 4
640; DISABLE-NEXT:    ret void
641;
642entry:
643  %t = load i8, ptr %p
644  %zextt = zext i8 %t to i32
645  %add = add nsw i32 %zextt, %b
646  %idx64 = sext i32 %add to i64
647  %staddr = getelementptr inbounds i32, ptr %addr, i64 %idx64
648  store i32 %add, ptr %staddr
649  ret void
650}
651
652; Check that we do not increase the cost of the code.
653; The input has one free zext and one free sext. If we would have promoted
654; all the way through the load we would end up with a free zext and a
655; non-free sext (of %b).
656define void @doNotPromoteFreeSExtFromAddrMode64(ptr %p, i32 %b, ptr %addr, i64 %stuff) {
657; NONSTRESS-LABEL: define void @doNotPromoteFreeSExtFromAddrMode64(
658; NONSTRESS-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]], i64 [[STUFF:%.*]]) {
659; NONSTRESS-NEXT:  [[ENTRY:.*:]]
660; NONSTRESS-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
661; NONSTRESS-NEXT:    [[ZEXTT:%.*]] = zext i8 [[T]] to i32
662; NONSTRESS-NEXT:    [[ADD:%.*]] = add nsw i32 [[ZEXTT]], [[B]]
663; NONSTRESS-NEXT:    [[IDX64:%.*]] = sext i32 [[ADD]] to i64
664; NONSTRESS-NEXT:    [[STADDR:%.*]] = getelementptr inbounds i64, ptr [[ADDR]], i64 [[IDX64]]
665; NONSTRESS-NEXT:    store i64 [[STUFF]], ptr [[STADDR]], align 8
666; NONSTRESS-NEXT:    ret void
667;
668; STRESS-LABEL: define void @doNotPromoteFreeSExtFromAddrMode64(
669; STRESS-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]], i64 [[STUFF:%.*]]) {
670; STRESS-NEXT:  [[ENTRY:.*:]]
671; STRESS-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
672; STRESS-NEXT:    [[PROMOTED2:%.*]] = zext i8 [[T]] to i64
673; STRESS-NEXT:    [[PROMOTED1:%.*]] = sext i32 [[B]] to i64
674; STRESS-NEXT:    [[ADD:%.*]] = add nsw i64 [[PROMOTED2]], [[PROMOTED1]]
675; STRESS-NEXT:    [[STADDR:%.*]] = getelementptr inbounds i64, ptr [[ADDR]], i64 [[ADD]]
676; STRESS-NEXT:    store i64 [[STUFF]], ptr [[STADDR]], align 8
677; STRESS-NEXT:    ret void
678;
679; DISABLE-LABEL: define void @doNotPromoteFreeSExtFromAddrMode64(
680; DISABLE-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]], i64 [[STUFF:%.*]]) {
681; DISABLE-NEXT:  [[ENTRY:.*:]]
682; DISABLE-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
683; DISABLE-NEXT:    [[ZEXTT:%.*]] = zext i8 [[T]] to i32
684; DISABLE-NEXT:    [[ADD:%.*]] = add nsw i32 [[ZEXTT]], [[B]]
685; DISABLE-NEXT:    [[IDX64:%.*]] = sext i32 [[ADD]] to i64
686; DISABLE-NEXT:    [[STADDR:%.*]] = getelementptr inbounds i64, ptr [[ADDR]], i64 [[IDX64]]
687; DISABLE-NEXT:    store i64 [[STUFF]], ptr [[STADDR]], align 8
688; DISABLE-NEXT:    ret void
689;
690entry:
691  %t = load i8, ptr %p
692  %zextt = zext i8 %t to i32
693  %add = add nsw i32 %zextt, %b
694  %idx64 = sext i32 %add to i64
695  %staddr = getelementptr inbounds i64, ptr %addr, i64 %idx64
696  store i64 %stuff, ptr %staddr
697  ret void
698}
699
700; Check that we do not increase the cost of the code.
701; The input has one free zext and one free sext. If we would have promoted
702; all the way through the load we would end up with a free zext and a
703; non-free sext (of %b).
704define void @doNotPromoteFreeSExtFromAddrMode128(ptr %p, i32 %b, ptr %addr, i128 %stuff) {
705; NONSTRESS-LABEL: define void @doNotPromoteFreeSExtFromAddrMode128(
706; NONSTRESS-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]], i128 [[STUFF:%.*]]) {
707; NONSTRESS-NEXT:  [[ENTRY:.*:]]
708; NONSTRESS-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
709; NONSTRESS-NEXT:    [[ZEXTT:%.*]] = zext i8 [[T]] to i32
710; NONSTRESS-NEXT:    [[ADD:%.*]] = add nsw i32 [[ZEXTT]], [[B]]
711; NONSTRESS-NEXT:    [[IDX64:%.*]] = sext i32 [[ADD]] to i64
712; NONSTRESS-NEXT:    [[STADDR:%.*]] = getelementptr inbounds i128, ptr [[ADDR]], i64 [[IDX64]]
713; NONSTRESS-NEXT:    store i128 [[STUFF]], ptr [[STADDR]], align 16
714; NONSTRESS-NEXT:    ret void
715;
716; STRESS-LABEL: define void @doNotPromoteFreeSExtFromAddrMode128(
717; STRESS-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]], i128 [[STUFF:%.*]]) {
718; STRESS-NEXT:  [[ENTRY:.*:]]
719; STRESS-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
720; STRESS-NEXT:    [[PROMOTED2:%.*]] = zext i8 [[T]] to i64
721; STRESS-NEXT:    [[PROMOTED1:%.*]] = sext i32 [[B]] to i64
722; STRESS-NEXT:    [[ADD:%.*]] = add nsw i64 [[PROMOTED2]], [[PROMOTED1]]
723; STRESS-NEXT:    [[STADDR:%.*]] = getelementptr inbounds i128, ptr [[ADDR]], i64 [[ADD]]
724; STRESS-NEXT:    store i128 [[STUFF]], ptr [[STADDR]], align 16
725; STRESS-NEXT:    ret void
726;
727; DISABLE-LABEL: define void @doNotPromoteFreeSExtFromAddrMode128(
728; DISABLE-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]], i128 [[STUFF:%.*]]) {
729; DISABLE-NEXT:  [[ENTRY:.*:]]
730; DISABLE-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
731; DISABLE-NEXT:    [[ZEXTT:%.*]] = zext i8 [[T]] to i32
732; DISABLE-NEXT:    [[ADD:%.*]] = add nsw i32 [[ZEXTT]], [[B]]
733; DISABLE-NEXT:    [[IDX64:%.*]] = sext i32 [[ADD]] to i64
734; DISABLE-NEXT:    [[STADDR:%.*]] = getelementptr inbounds i128, ptr [[ADDR]], i64 [[IDX64]]
735; DISABLE-NEXT:    store i128 [[STUFF]], ptr [[STADDR]], align 16
736; DISABLE-NEXT:    ret void
737;
738entry:
739  %t = load i8, ptr %p
740  %zextt = zext i8 %t to i32
741  %add = add nsw i32 %zextt, %b
742  %idx64 = sext i32 %add to i64
743  %staddr = getelementptr inbounds i128, ptr %addr, i64 %idx64
744  store i128 %stuff, ptr %staddr
745  ret void
746}
747
748
749; Check that we do not increase the cost of the code.
750; The input has one free zext and one free sext. If we would have promoted
751; all the way through the load we would end up with a free zext and a
752; non-free sext (of %b).
753define void @promoteSExtFromAddrMode256(ptr %p, i32 %b, ptr %addr, i256 %stuff) {
754; OPT-LABEL: define void @promoteSExtFromAddrMode256(
755; OPT-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]], i256 [[STUFF:%.*]]) {
756; OPT-NEXT:  [[ENTRY:.*:]]
757; OPT-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
758; OPT-NEXT:    [[PROMOTED2:%.*]] = zext i8 [[T]] to i64
759; OPT-NEXT:    [[PROMOTED1:%.*]] = sext i32 [[B]] to i64
760; OPT-NEXT:    [[ADD:%.*]] = add nsw i64 [[PROMOTED2]], [[PROMOTED1]]
761; OPT-NEXT:    [[STADDR:%.*]] = getelementptr inbounds i256, ptr [[ADDR]], i64 [[ADD]]
762; OPT-NEXT:    store i256 [[STUFF]], ptr [[STADDR]], align 16
763; OPT-NEXT:    ret void
764;
765; DISABLE-LABEL: define void @promoteSExtFromAddrMode256(
766; DISABLE-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]], i256 [[STUFF:%.*]]) {
767; DISABLE-NEXT:  [[ENTRY:.*:]]
768; DISABLE-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
769; DISABLE-NEXT:    [[ZEXTT:%.*]] = zext i8 [[T]] to i32
770; DISABLE-NEXT:    [[ADD:%.*]] = add nsw i32 [[ZEXTT]], [[B]]
771; DISABLE-NEXT:    [[IDX64:%.*]] = sext i32 [[ADD]] to i64
772; DISABLE-NEXT:    [[STADDR:%.*]] = getelementptr inbounds i256, ptr [[ADDR]], i64 [[IDX64]]
773; DISABLE-NEXT:    store i256 [[STUFF]], ptr [[STADDR]], align 16
774; DISABLE-NEXT:    ret void
775;
776entry:
777  %t = load i8, ptr %p
778  %zextt = zext i8 %t to i32
779  %add = add nsw i32 %zextt, %b
780  %idx64 = sext i32 %add to i64
781  %staddr = getelementptr inbounds i256, ptr %addr, i64 %idx64
782  store i256 %stuff, ptr %staddr
783  ret void
784}
785
786; Check that we do not increase the cost of the code.
787; The input has one free zext and one free zext.
788; When we promote all the way through the load, we end up with
789; a free zext and a non-free zext (of %b).
790; However, the current target lowering says zext i32 to i64 is free
791; so the promotion happens because the cost did not change and may
792; expose more opportunities.
793; This would need to be fixed at some point.
794;
795; This transformation should really happen only for stress mode.
796define void @doNotPromoteFreeZExtFromAddrMode(ptr %p, i32 %b, ptr %addr) {
797; NONSTRESS-LABEL: define void @doNotPromoteFreeZExtFromAddrMode(
798; NONSTRESS-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]]) {
799; NONSTRESS-NEXT:  [[ENTRY:.*:]]
800; NONSTRESS-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
801; NONSTRESS-NEXT:    [[ZEXTT:%.*]] = zext i8 [[T]] to i32
802; NONSTRESS-NEXT:    [[ADD:%.*]] = add nuw i32 [[ZEXTT]], [[B]]
803; NONSTRESS-NEXT:    [[IDX64:%.*]] = zext i32 [[ADD]] to i64
804; NONSTRESS-NEXT:    [[STADDR:%.*]] = getelementptr inbounds i32, ptr [[ADDR]], i64 [[IDX64]]
805; NONSTRESS-NEXT:    store i32 [[ADD]], ptr [[STADDR]], align 4
806; NONSTRESS-NEXT:    ret void
807;
808; STRESS-LABEL: define void @doNotPromoteFreeZExtFromAddrMode(
809; STRESS-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]]) {
810; STRESS-NEXT:  [[ENTRY:.*:]]
811; STRESS-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
812; STRESS-NEXT:    [[PROMOTED3:%.*]] = zext i8 [[T]] to i64
813; STRESS-NEXT:    [[PROMOTED2:%.*]] = zext i32 [[B]] to i64
814; STRESS-NEXT:    [[ADD:%.*]] = add nuw i64 [[PROMOTED3]], [[PROMOTED2]]
815; STRESS-NEXT:    [[PROMOTED:%.*]] = trunc i64 [[ADD]] to i32
816; STRESS-NEXT:    [[STADDR:%.*]] = getelementptr inbounds i32, ptr [[ADDR]], i64 [[ADD]]
817; STRESS-NEXT:    store i32 [[PROMOTED]], ptr [[STADDR]], align 4
818; STRESS-NEXT:    ret void
819;
820; DISABLE-LABEL: define void @doNotPromoteFreeZExtFromAddrMode(
821; DISABLE-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]]) {
822; DISABLE-NEXT:  [[ENTRY:.*:]]
823; DISABLE-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
824; DISABLE-NEXT:    [[ZEXTT:%.*]] = zext i8 [[T]] to i32
825; DISABLE-NEXT:    [[ADD:%.*]] = add nuw i32 [[ZEXTT]], [[B]]
826; DISABLE-NEXT:    [[IDX64:%.*]] = zext i32 [[ADD]] to i64
827; DISABLE-NEXT:    [[STADDR:%.*]] = getelementptr inbounds i32, ptr [[ADDR]], i64 [[IDX64]]
828; DISABLE-NEXT:    store i32 [[ADD]], ptr [[STADDR]], align 4
829; DISABLE-NEXT:    ret void
830;
831entry:
832  %t = load i8, ptr %p
833  %zextt = zext i8 %t to i32
834  %add = add nuw i32 %zextt, %b
835  %idx64 = zext i32 %add to i64
836  %staddr = getelementptr inbounds i32, ptr %addr, i64 %idx64
837  store i32 %add, ptr %staddr
838  ret void
839}
840
841define i64 @doNotPromoteFreeSExtFromShift(ptr %p, i32 %b) {
842; NONSTRESS-LABEL: define i64 @doNotPromoteFreeSExtFromShift(
843; NONSTRESS-SAME: ptr [[P:%.*]], i32 [[B:%.*]]) {
844; NONSTRESS-NEXT:  [[ENTRY:.*:]]
845; NONSTRESS-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
846; NONSTRESS-NEXT:    [[ZEXTT:%.*]] = zext i8 [[T]] to i32
847; NONSTRESS-NEXT:    [[ADD:%.*]] = add nsw i32 [[ZEXTT]], [[B]]
848; NONSTRESS-NEXT:    [[IDX64:%.*]] = sext i32 [[ADD]] to i64
849; NONSTRESS-NEXT:    [[STADDR:%.*]] = shl i64 [[IDX64]], 12
850; NONSTRESS-NEXT:    ret i64 [[STADDR]]
851;
852; STRESS-LABEL: define i64 @doNotPromoteFreeSExtFromShift(
853; STRESS-SAME: ptr [[P:%.*]], i32 [[B:%.*]]) {
854; STRESS-NEXT:  [[ENTRY:.*:]]
855; STRESS-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
856; STRESS-NEXT:    [[PROMOTED2:%.*]] = zext i8 [[T]] to i64
857; STRESS-NEXT:    [[PROMOTED1:%.*]] = sext i32 [[B]] to i64
858; STRESS-NEXT:    [[ADD:%.*]] = add nsw i64 [[PROMOTED2]], [[PROMOTED1]]
859; STRESS-NEXT:    [[STADDR:%.*]] = shl i64 [[ADD]], 12
860; STRESS-NEXT:    ret i64 [[STADDR]]
861;
862; DISABLE-LABEL: define i64 @doNotPromoteFreeSExtFromShift(
863; DISABLE-SAME: ptr [[P:%.*]], i32 [[B:%.*]]) {
864; DISABLE-NEXT:  [[ENTRY:.*:]]
865; DISABLE-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
866; DISABLE-NEXT:    [[ZEXTT:%.*]] = zext i8 [[T]] to i32
867; DISABLE-NEXT:    [[ADD:%.*]] = add nsw i32 [[ZEXTT]], [[B]]
868; DISABLE-NEXT:    [[IDX64:%.*]] = sext i32 [[ADD]] to i64
869; DISABLE-NEXT:    [[STADDR:%.*]] = shl i64 [[IDX64]], 12
870; DISABLE-NEXT:    ret i64 [[STADDR]]
871;
872entry:
873  %t = load i8, ptr %p
874  %zextt = zext i8 %t to i32
875  %add = add nsw i32 %zextt, %b
876  %idx64 = sext i32 %add to i64
877  %staddr = shl i64 %idx64, 12
878  ret i64 %staddr
879}
880
881; Same comment as doNotPromoteFreeZExtFromAddrMode.
882;
883; This transformation should really happen only for stress mode.
884define i64 @doNotPromoteFreeZExtFromShift(ptr %p, i32 %b) {
885; NONSTRESS-LABEL: define i64 @doNotPromoteFreeZExtFromShift(
886; NONSTRESS-SAME: ptr [[P:%.*]], i32 [[B:%.*]]) {
887; NONSTRESS-NEXT:  [[ENTRY:.*:]]
888; NONSTRESS-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
889; NONSTRESS-NEXT:    [[ZEXTT:%.*]] = zext i8 [[T]] to i32
890; NONSTRESS-NEXT:    [[ADD:%.*]] = add nuw i32 [[ZEXTT]], [[B]]
891; NONSTRESS-NEXT:    [[IDX64:%.*]] = zext i32 [[ADD]] to i64
892; NONSTRESS-NEXT:    [[STADDR:%.*]] = shl i64 [[IDX64]], 12
893; NONSTRESS-NEXT:    ret i64 [[STADDR]]
894;
895; STRESS-LABEL: define i64 @doNotPromoteFreeZExtFromShift(
896; STRESS-SAME: ptr [[P:%.*]], i32 [[B:%.*]]) {
897; STRESS-NEXT:  [[ENTRY:.*:]]
898; STRESS-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
899; STRESS-NEXT:    [[PROMOTED2:%.*]] = zext i8 [[T]] to i64
900; STRESS-NEXT:    [[PROMOTED1:%.*]] = zext i32 [[B]] to i64
901; STRESS-NEXT:    [[ADD:%.*]] = add nuw i64 [[PROMOTED2]], [[PROMOTED1]]
902; STRESS-NEXT:    [[STADDR:%.*]] = shl i64 [[ADD]], 12
903; STRESS-NEXT:    ret i64 [[STADDR]]
904;
905; DISABLE-LABEL: define i64 @doNotPromoteFreeZExtFromShift(
906; DISABLE-SAME: ptr [[P:%.*]], i32 [[B:%.*]]) {
907; DISABLE-NEXT:  [[ENTRY:.*:]]
908; DISABLE-NEXT:    [[T:%.*]] = load i8, ptr [[P]], align 1
909; DISABLE-NEXT:    [[ZEXTT:%.*]] = zext i8 [[T]] to i32
910; DISABLE-NEXT:    [[ADD:%.*]] = add nuw i32 [[ZEXTT]], [[B]]
911; DISABLE-NEXT:    [[IDX64:%.*]] = zext i32 [[ADD]] to i64
912; DISABLE-NEXT:    [[STADDR:%.*]] = shl i64 [[IDX64]], 12
913; DISABLE-NEXT:    ret i64 [[STADDR]]
914;
915entry:
916  %t = load i8, ptr %p
917  %zextt = zext i8 %t to i32
918  %add = add nuw i32 %zextt, %b
919  %idx64 = zext i32 %add to i64
920  %staddr = shl i64 %idx64, 12
921  ret i64 %staddr
922}
923
924; The input has one free zext and one non-free sext.
925; When we promote all the way through to the load, we end up with
926; a free zext, a free sext (%ld1), and a non-free sext (of %cst).
927; However, we when generate load pair and the free sext(%ld1) becomes
928; non-free. So technically, we trade a non-free sext to two non-free
929; sext.
930; This would need to be fixed at some point.
931;
932; This transformation should really happen only for stress mode.
933define i64 @doNotPromoteBecauseOfPairedLoad(ptr %p, i32 %cst) {
934; OPT-LABEL: define i64 @doNotPromoteBecauseOfPairedLoad(
935; OPT-SAME: ptr [[P:%.*]], i32 [[CST:%.*]]) {
936; OPT-NEXT:    [[LD0:%.*]] = load i32, ptr [[P]], align 4
937; OPT-NEXT:    [[IDXLD1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 1
938; OPT-NEXT:    [[LD1:%.*]] = load i32, ptr [[IDXLD1]], align 4
939; OPT-NEXT:    [[PROMOTED:%.*]] = sext i32 [[LD1]] to i64
940; OPT-NEXT:    [[PROMOTED1:%.*]] = sext i32 [[CST]] to i64
941; OPT-NEXT:    [[RES:%.*]] = add nsw i64 [[PROMOTED]], [[PROMOTED1]]
942; OPT-NEXT:    [[ZEXTLD0:%.*]] = zext i32 [[LD0]] to i64
943; OPT-NEXT:    [[FINAL:%.*]] = add i64 [[RES]], [[ZEXTLD0]]
944; OPT-NEXT:    ret i64 [[FINAL]]
945;
946; DISABLE-LABEL: define i64 @doNotPromoteBecauseOfPairedLoad(
947; DISABLE-SAME: ptr [[P:%.*]], i32 [[CST:%.*]]) {
948; DISABLE-NEXT:    [[LD0:%.*]] = load i32, ptr [[P]], align 4
949; DISABLE-NEXT:    [[IDXLD1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 1
950; DISABLE-NEXT:    [[LD1:%.*]] = load i32, ptr [[IDXLD1]], align 4
951; DISABLE-NEXT:    [[RES:%.*]] = add nsw i32 [[LD1]], [[CST]]
952; DISABLE-NEXT:    [[SEXTRES:%.*]] = sext i32 [[RES]] to i64
953; DISABLE-NEXT:    [[ZEXTLD0:%.*]] = zext i32 [[LD0]] to i64
954; DISABLE-NEXT:    [[FINAL:%.*]] = add i64 [[SEXTRES]], [[ZEXTLD0]]
955; DISABLE-NEXT:    ret i64 [[FINAL]]
956;
957  %ld0 = load i32, ptr %p
958  %idxLd1 = getelementptr inbounds i32, ptr %p, i64 1
959  %ld1 = load i32, ptr %idxLd1
960  %res = add nsw i32 %ld1, %cst
961  %sextres = sext i32 %res to i64
962  %zextLd0 = zext i32 %ld0 to i64
963  %final = add i64 %sextres, %zextLd0
964  ret i64 %final
965}
966
967define i64 @promoteZextShl(i1 %c, ptr %P) {
968; OPT-LABEL: define i64 @promoteZextShl(
969; OPT-SAME: i1 [[C:%.*]], ptr [[P:%.*]]) {
970; OPT-NEXT:  [[ENTRY:.*:]]
971; OPT-NEXT:    [[LD:%.*]] = load i16, ptr [[P]], align 2
972; OPT-NEXT:    [[PROMOTED1:%.*]] = zext i16 [[LD]] to i64
973; OPT-NEXT:    br i1 [[C]], label %[[END:.*]], label %[[IF_THEN:.*]]
974; OPT:       [[IF_THEN]]:
975; OPT-NEXT:    [[SHL2:%.*]] = shl nsw i64 [[PROMOTED1]], 1
976; OPT-NEXT:    ret i64 [[SHL2]]
977; OPT:       [[END]]:
978; OPT-NEXT:    ret i64 0
979;
980; DISABLE-LABEL: define i64 @promoteZextShl(
981; DISABLE-SAME: i1 [[C:%.*]], ptr [[P:%.*]]) {
982; DISABLE-NEXT:  [[ENTRY:.*:]]
983; DISABLE-NEXT:    [[LD:%.*]] = load i16, ptr [[P]], align 2
984; DISABLE-NEXT:    [[Z:%.*]] = zext i16 [[LD]] to i32
985; DISABLE-NEXT:    br i1 [[C]], label %[[END:.*]], label %[[IF_THEN:.*]]
986; DISABLE:       [[IF_THEN]]:
987; DISABLE-NEXT:    [[SHL2:%.*]] = shl nsw i32 [[Z]], 1
988; DISABLE-NEXT:    [[R:%.*]] = sext i32 [[SHL2]] to i64
989; DISABLE-NEXT:    ret i64 [[R]]
990; DISABLE:       [[END]]:
991; DISABLE-NEXT:    ret i64 0
992;
993entry:
994  %ld = load i16, ptr %P
995  br i1 %c, label %end, label %if.then
996if.then:
997  %z = zext i16 %ld to i32
998  %shl2 = shl nsw i32 %z, 1
999  %r = sext i32 %shl2 to i64
1000  ret i64 %r
1001end:
1002  ret i64 0
1003}
1004