xref: /llvm-project/llvm/test/Transforms/LoopUnrollAndJam/pragma-explicit.ll (revision 055fb7795aa219a3d274d280ec9129784f169f56)
1; RUN: opt -passes=loop-unroll-and-jam -unroll-runtime -unroll-partial-threshold=60 < %s -S | FileCheck %s
2; RUN: opt -aa-pipeline=tbaa,basic-aa -passes='loop-unroll-and-jam' -unroll-runtime -unroll-partial-threshold=60 < %s -S | FileCheck %s
3
4target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
5target triple = "x86_64-unknown-linux-gnu"
6
7; CHECK-LABEL: function
8; The explicit metadata here should force this to be unroll and jammed 4 times (hence the %.pre60.3)
9; CHECK: %.pre = phi i8 [ %.pre60.3, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ %.pre.pre, %for.cond1.preheader.us.preheader.new ]
10; CHECK: %indvars.iv.3 = phi i64 [ 0, %for.cond1.preheader.us ], [ %indvars.iv.next.3, %for.body4.us ]
11define void @function(ptr noalias nocapture %dst, i32 %dst_stride, ptr noalias nocapture readonly %src, i32 %src_stride, i32 %A, i32 %B, i32 %C, i32 %D, i32 %width, i32 %height) {
12entry:
13  %idxprom = sext i32 %src_stride to i64
14  %cmp52 = icmp sgt i32 %height, 0
15  br i1 %cmp52, label %for.cond1.preheader.lr.ph, label %for.cond.cleanup
16
17for.cond1.preheader.lr.ph:                        ; preds = %entry
18  %cmp249 = icmp sgt i32 %width, 0
19  %idx.ext = sext i32 %dst_stride to i64
20  br i1 %cmp249, label %for.cond1.preheader.us.preheader, label %for.cond.cleanup
21
22for.cond1.preheader.us.preheader:                 ; preds = %for.cond1.preheader.lr.ph
23  %.pre.pre = load i8, ptr %src, align 1
24  %wide.trip.count = zext i32 %width to i64
25  br label %for.cond1.preheader.us
26
27for.cond1.preheader.us:                           ; preds = %for.cond1.for.cond.cleanup3_crit_edge.us, %for.cond1.preheader.us.preheader
28  %.pre = phi i8 [ %.pre60, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ %.pre.pre, %for.cond1.preheader.us.preheader ]
29  %srcp.056.us.pn = phi ptr [ %srcp.056.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ %src, %for.cond1.preheader.us.preheader ]
30  %y.055.us = phi i32 [ %inc30.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ 0, %for.cond1.preheader.us.preheader ]
31  %dst.addr.054.us = phi ptr [ %add.ptr.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ %dst, %for.cond1.preheader.us.preheader ]
32  %srcp.056.us = getelementptr inbounds i8, ptr %srcp.056.us.pn, i64 %idxprom
33  %.pre60 = load i8, ptr %srcp.056.us, align 1
34  br label %for.body4.us
35
36for.body4.us:                                     ; preds = %for.body4.us, %for.cond1.preheader.us
37  %0 = phi i8 [ %.pre60, %for.cond1.preheader.us ], [ %3, %for.body4.us ]
38  %1 = phi i8 [ %.pre, %for.cond1.preheader.us ], [ %2, %for.body4.us ]
39  %indvars.iv = phi i64 [ 0, %for.cond1.preheader.us ], [ %indvars.iv.next, %for.body4.us ]
40  %conv.us = zext i8 %1 to i32
41  %mul.us = mul nsw i32 %conv.us, %A
42  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
43  %arrayidx8.us = getelementptr inbounds i8, ptr %srcp.056.us.pn, i64 %indvars.iv.next
44  %2 = load i8, ptr %arrayidx8.us, align 1
45  %conv9.us = zext i8 %2 to i32
46  %mul10.us = mul nsw i32 %conv9.us, %B
47  %conv14.us = zext i8 %0 to i32
48  %mul15.us = mul nsw i32 %conv14.us, %C
49  %arrayidx19.us = getelementptr inbounds i8, ptr %srcp.056.us, i64 %indvars.iv.next
50  %3 = load i8, ptr %arrayidx19.us, align 1
51  %conv20.us = zext i8 %3 to i32
52  %mul21.us = mul nsw i32 %conv20.us, %D
53  %add11.us = add i32 %mul.us, 32
54  %add16.us = add i32 %add11.us, %mul10.us
55  %add22.us = add i32 %add16.us, %mul15.us
56  %add23.us = add i32 %add22.us, %mul21.us
57  %4 = lshr i32 %add23.us, 6
58  %conv24.us = trunc i32 %4 to i8
59  %arrayidx26.us = getelementptr inbounds i8, ptr %dst.addr.054.us, i64 %indvars.iv
60  store i8 %conv24.us, ptr %arrayidx26.us, align 1
61  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
62  br i1 %exitcond, label %for.cond1.for.cond.cleanup3_crit_edge.us, label %for.body4.us
63
64for.cond1.for.cond.cleanup3_crit_edge.us:         ; preds = %for.body4.us
65  %add.ptr.us = getelementptr inbounds i8, ptr %dst.addr.054.us, i64 %idx.ext
66  %inc30.us = add nuw nsw i32 %y.055.us, 1
67  %exitcond58 = icmp eq i32 %inc30.us, %height
68  br i1 %exitcond58, label %for.cond.cleanup, label %for.cond1.preheader.us, !llvm.loop !5
69
70for.cond.cleanup:                                 ; preds = %for.cond1.for.cond.cleanup3_crit_edge.us, %for.cond1.preheader.lr.ph, %entry
71  ret void
72}
73
74; CHECK-LABEL: function2
75; The explicit metadata here should force this to be unroll and jammed, but
76; the count is left to thresholds. In this case 2 (hence %.pre60.1).
77; CHECK: %.pre = phi i8 [ %.pre60.1, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ %.pre.pre, %for.cond1.preheader.us.preheader.new ]
78; CHECK: %indvars.iv.1 = phi i64 [ 0, %for.cond1.preheader.us ], [ %indvars.iv.next.1, %for.body4.us ]
79define void @function2(ptr noalias nocapture %dst, i32 %dst_stride, ptr noalias nocapture readonly %src, i32 %src_stride, i32 %A, i32 %B, i32 %C, i32 %D, i32 %width, i32 %height) {
80entry:
81  %idxprom = sext i32 %src_stride to i64
82  %cmp52 = icmp sgt i32 %height, 0
83  br i1 %cmp52, label %for.cond1.preheader.lr.ph, label %for.cond.cleanup
84
85for.cond1.preheader.lr.ph:                        ; preds = %entry
86  %cmp249 = icmp sgt i32 %width, 0
87  %idx.ext = sext i32 %dst_stride to i64
88  br i1 %cmp249, label %for.cond1.preheader.us.preheader, label %for.cond.cleanup
89
90for.cond1.preheader.us.preheader:                 ; preds = %for.cond1.preheader.lr.ph
91  %.pre.pre = load i8, ptr %src, align 1
92  %wide.trip.count = zext i32 %width to i64
93  br label %for.cond1.preheader.us
94
95for.cond1.preheader.us:                           ; preds = %for.cond1.for.cond.cleanup3_crit_edge.us, %for.cond1.preheader.us.preheader
96  %.pre = phi i8 [ %.pre60, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ %.pre.pre, %for.cond1.preheader.us.preheader ]
97  %srcp.056.us.pn = phi ptr [ %srcp.056.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ %src, %for.cond1.preheader.us.preheader ]
98  %y.055.us = phi i32 [ %inc30.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ 0, %for.cond1.preheader.us.preheader ]
99  %dst.addr.054.us = phi ptr [ %add.ptr.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ %dst, %for.cond1.preheader.us.preheader ]
100  %srcp.056.us = getelementptr inbounds i8, ptr %srcp.056.us.pn, i64 %idxprom
101  %.pre60 = load i8, ptr %srcp.056.us, align 1
102  br label %for.body4.us
103
104for.body4.us:                                     ; preds = %for.body4.us, %for.cond1.preheader.us
105  %0 = phi i8 [ %.pre60, %for.cond1.preheader.us ], [ %3, %for.body4.us ]
106  %1 = phi i8 [ %.pre, %for.cond1.preheader.us ], [ %2, %for.body4.us ]
107  %indvars.iv = phi i64 [ 0, %for.cond1.preheader.us ], [ %indvars.iv.next, %for.body4.us ]
108  %conv.us = zext i8 %1 to i32
109  %mul.us = mul nsw i32 %conv.us, %A
110  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
111  %arrayidx8.us = getelementptr inbounds i8, ptr %srcp.056.us.pn, i64 %indvars.iv.next
112  %2 = load i8, ptr %arrayidx8.us, align 1
113  %conv9.us = zext i8 %2 to i32
114  %mul10.us = mul nsw i32 %conv9.us, %B
115  %conv14.us = zext i8 %0 to i32
116  %mul15.us = mul nsw i32 %conv14.us, %C
117  %arrayidx19.us = getelementptr inbounds i8, ptr %srcp.056.us, i64 %indvars.iv.next
118  %3 = load i8, ptr %arrayidx19.us, align 1
119  %conv20.us = zext i8 %3 to i32
120  %mul21.us = mul nsw i32 %conv20.us, %D
121  %add11.us = add i32 %mul.us, 32
122  %add16.us = add i32 %add11.us, %mul10.us
123  %add22.us = add i32 %add16.us, %mul15.us
124  %add23.us = add i32 %add22.us, %mul21.us
125  %4 = lshr i32 %add23.us, 6
126  %conv24.us = trunc i32 %4 to i8
127  %arrayidx26.us = getelementptr inbounds i8, ptr %dst.addr.054.us, i64 %indvars.iv
128  store i8 %conv24.us, ptr %arrayidx26.us, align 1
129  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
130  br i1 %exitcond, label %for.cond1.for.cond.cleanup3_crit_edge.us, label %for.body4.us
131
132for.cond1.for.cond.cleanup3_crit_edge.us:         ; preds = %for.body4.us
133  %add.ptr.us = getelementptr inbounds i8, ptr %dst.addr.054.us, i64 %idx.ext
134  %inc30.us = add nuw nsw i32 %y.055.us, 1
135  %exitcond58 = icmp eq i32 %inc30.us, %height
136  br i1 %exitcond58, label %for.cond.cleanup, label %for.cond1.preheader.us, !llvm.loop !7
137
138for.cond.cleanup:                                 ; preds = %for.cond1.for.cond.cleanup3_crit_edge.us, %for.cond1.preheader.lr.ph, %entry
139  ret void
140}
141
142!5 = distinct !{!5, !6}
143!6 = !{!"llvm.loop.unroll_and_jam.count", i32 4}
144!7 = distinct !{!7, !8}
145!8 = !{!"llvm.loop.unroll_and_jam.enable"}
146