xref: /llvm-project/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matvecmul.ll (revision 670259466b238176ac302c8dedf806d2b2be7e0c)
1; RUN: opt < %s -passes='print<loop-cache-cost>' -disable-output 2>&1 | FileCheck %s
2
3target datalayout = "e-m:e-i64:64-n32:64"
4target triple = "powerpc64le-unknown-linux-gnu"
5
6; void matvecmul(const double *__restrict y, const double * __restrict x, const double * __restrict b,
7;    const int * __restrict nb, const int * __restrict nx, const int * __restrict ny, const int * __restrict nz) {
8;
9;   for (int k=1;k<nz,++k)
10;      for (int j=1;j<ny,++j)
11;        for (int i=1;i<nx,++i)
12;          for (int l=1;l<nb,++l)
13;            for (int m=1;m<nb,++m)
14;                 y[k+1][j][i][l] = y[k+1][j][i][l] + b[k][j][i][m][l]*x[k][j][i][m]
15; }
16
17; CHECK: Loop 'k_loop' has cost = 10200000000000000
18; CHECK-NEXT: Loop 'j_loop' has cost = 102000000000000
19; CHECK-NEXT: Loop 'i_loop' has cost = 1020000000000
20; CHECK-NEXT: Loop 'm_loop' has cost = 10800000000
21; CHECK-NEXT: Loop 'l_loop' has cost = 1500000000
22
23%_elem_type_of_double = type <{ double }>
24
25; Function Attrs: norecurse nounwind
26define void @mat_vec_mpy(ptr noalias %y, ptr noalias readonly %x,
27    ptr noalias readonly %b, ptr noalias readonly %nb, ptr noalias readonly %nx,
28    ptr noalias readonly %ny, ptr noalias readonly %nz) {
29mat_times_vec_entry:
30  %_ind_val = load i32, ptr %nb, align 4
31  %_conv = sext i32 %_ind_val to i64
32  %_grt_tmp.i = icmp sgt i64 %_conv, 0
33  %a_b.i = select i1 %_grt_tmp.i, i64 %_conv, i64 0
34  %_ind_val1 = load i32, ptr %nx, align 4
35  %_conv2 = sext i32 %_ind_val1 to i64
36  %_grt_tmp.i266 = icmp sgt i64 %_conv2, 0
37  %a_b.i267 = select i1 %_grt_tmp.i266, i64 %_conv2, i64 0
38  %_ind_val3 = load i32, ptr %ny, align 4
39  %_conv4 = sext i32 %_ind_val3 to i64
40  %_grt_tmp.i264 = icmp sgt i64 %_conv4, 0
41  %a_b.i265 = select i1 %_grt_tmp.i264, i64 %_conv4, i64 0
42  %_ind_val5 = load i32, ptr %nz, align 4
43  %_mult_tmp = shl nsw i64 %a_b.i, 3
44  %_mult_tmp7 = mul i64 %_mult_tmp, %a_b.i267
45  %_mult_tmp8 = mul i64 %_mult_tmp7, %a_b.i265
46  %_sub_tmp = sub nuw nsw i64 -8, %_mult_tmp
47  %_sub_tmp21 = sub i64 %_sub_tmp, %_mult_tmp7
48  %_sub_tmp23 = sub i64 %_sub_tmp21, %_mult_tmp8
49  %_mult_tmp73 = mul i64 %_mult_tmp, %a_b.i
50  %_mult_tmp74 = mul i64 %_mult_tmp73, %a_b.i267
51  %_mult_tmp75 = mul i64 %_mult_tmp74, %a_b.i265
52  %_sub_tmp93 = sub i64 %_sub_tmp, %_mult_tmp73
53  %_sub_tmp95 = sub i64 %_sub_tmp93, %_mult_tmp74
54  %_sub_tmp97 = sub i64 %_sub_tmp95, %_mult_tmp75
55  %_grt_tmp853288 = icmp slt i32 %_ind_val5, 1
56  br i1 %_grt_tmp853288, label %_return_bb, label %k_loop.lr.ph
57
58k_loop.lr.ph:                                     ; preds = %mat_times_vec_entry
59  %_grt_tmp851279 = icmp slt i32 %_ind_val3, 1
60  %_grt_tmp847270 = icmp slt i32 %_ind_val, 1
61  %_aa_conv = bitcast ptr %y to ptr
62  %_adda_ = getelementptr inbounds i8, ptr %_aa_conv, i64 %_sub_tmp23
63  %_aa_conv434 = bitcast ptr %x to ptr
64  %_adda_435 = getelementptr inbounds i8, ptr %_aa_conv434, i64 %_sub_tmp23
65  %_aa_conv785 = bitcast ptr %b to ptr
66  %_adda_786 = getelementptr inbounds i8, ptr %_aa_conv785, i64 %_sub_tmp97
67  br i1 %_grt_tmp851279, label %k_loop.us.preheader, label %k_loop.lr.ph.split
68
69k_loop.us.preheader:                              ; preds = %k_loop.lr.ph
70  br label %_return_bb.loopexit
71
72k_loop.lr.ph.split:                               ; preds = %k_loop.lr.ph
73  %_grt_tmp849273 = icmp slt i32 %_ind_val1, 1
74  br i1 %_grt_tmp849273, label %k_loop.us291.preheader, label %k_loop.lr.ph.split.split
75
76k_loop.us291.preheader:                           ; preds = %k_loop.lr.ph.split
77  br label %_return_bb.loopexit300
78
79k_loop.lr.ph.split.split:                         ; preds = %k_loop.lr.ph.split
80  br i1 %_grt_tmp847270, label %k_loop.us294.preheader, label %k_loop.preheader
81
82k_loop.preheader:                                 ; preds = %k_loop.lr.ph.split.split
83  %0 = add i32 %_ind_val, 1
84  %1 = add i32 %_ind_val1, 1
85  %2 = add i32 %_ind_val3, 1
86  %3 = add i32 %_ind_val5, 1
87  br label %k_loop
88
89k_loop.us294.preheader:                           ; preds = %k_loop.lr.ph.split.split
90  br label %_return_bb.loopexit301
91
92k_loop:                                           ; preds = %k_loop._label_18_crit_edge.split.split.split, %k_loop.preheader
93  %indvars.iv316 = phi i64 [ 1, %k_loop.preheader ], [ %indvars.iv.next317, %k_loop._label_18_crit_edge.split.split.split ]
94  %indvars.iv.next317 = add nuw nsw i64 %indvars.iv316, 1
95  %_ix_x_len = mul i64 %_mult_tmp8, %indvars.iv.next317
96  %_ix_x_len410 = mul i64 %_mult_tmp75, %indvars.iv316
97  %_ix_x_len822 = mul i64 %_mult_tmp8, %indvars.iv316
98  br label %j_loop
99
100j_loop:                                           ; preds = %j_loop._label_15_crit_edge.split.split, %k_loop
101  %indvars.iv312 = phi i64 [ %indvars.iv.next313, %j_loop._label_15_crit_edge.split.split ], [ 1, %k_loop ]
102  %_ix_x_len371 = mul i64 %_mult_tmp7, %indvars.iv312
103  %_ix_x_len415 = mul i64 %_mult_tmp74, %indvars.iv312
104  br label %i_loop
105
106i_loop:                                           ; preds = %i_loop._label_12_crit_edge.split, %j_loop
107  %indvars.iv307 = phi i64 [ %indvars.iv.next308, %i_loop._label_12_crit_edge.split ], [ 1, %j_loop ]
108  %_ix_x_len375 = mul i64 %_mult_tmp, %indvars.iv307
109  %_ix_x_len420 = mul i64 %_mult_tmp73, %indvars.iv307
110  br label %l_loop
111
112l_loop:                                           ; preds = %l_loop._label_9_crit_edge, %i_loop
113  %indvars.iv303 = phi i64 [ %indvars.iv.next304, %l_loop._label_9_crit_edge ], [ 1, %i_loop ]
114  %_ix_x_len378 = shl nuw nsw i64 %indvars.iv303, 3
115  br label %m_loop
116
117m_loop:                                           ; preds = %m_loop, %l_loop
118  %indvars.iv = phi i64 [ %indvars.iv.next, %m_loop ], [ 1, %l_loop ]
119  %_ix_x_len424 = mul i64 %_mult_tmp, %indvars.iv
120  %_ix_x_len454 = shl nuw nsw i64 %indvars.iv, 3
121  %_ixa_gep = getelementptr inbounds i8, ptr %_adda_, i64 %_ix_x_len
122  %_ixa_gep791 = getelementptr inbounds i8, ptr %_adda_786, i64 %_ix_x_len410
123  %_ixa_gep823 = getelementptr inbounds i8, ptr %_adda_435, i64 %_ix_x_len822
124  %_ixa_gep372 = getelementptr inbounds i8, ptr %_ixa_gep, i64 %_ix_x_len371
125  %_ixa_gep376 = getelementptr inbounds i8, ptr %_ixa_gep372, i64 %_ix_x_len375
126  %_ixa_gep796 = getelementptr inbounds i8, ptr %_ixa_gep791, i64 %_ix_x_len415
127  %_ixa_gep828 = getelementptr inbounds i8, ptr %_ixa_gep823, i64 %_ix_x_len371
128  %_ixa_gep379 = getelementptr inbounds i8, ptr %_ixa_gep376, i64 %_ix_x_len378
129  %_ixa_gep801 = getelementptr inbounds i8, ptr %_ixa_gep796, i64 %_ix_x_len420
130  %_ixa_gep833 = getelementptr inbounds i8, ptr %_ixa_gep828, i64 %_ix_x_len375
131  %_ixa_gep806 = getelementptr inbounds i8, ptr %_ixa_gep801, i64 %_ix_x_len378
132  %_ixa_gep810 = getelementptr inbounds i8, ptr %_ixa_gep806, i64 %_ix_x_len424
133  %_gepp = bitcast ptr %_ixa_gep379 to ptr
134  %_gepp813 = bitcast ptr %_ixa_gep810 to ptr
135  %_ind_val814 = load double, ptr %_gepp813, align 8
136  %_ixa_gep837 = getelementptr inbounds i8, ptr %_ixa_gep833, i64 %_ix_x_len454
137  %_gepp840 = bitcast ptr %_ixa_gep837 to ptr
138  %_ind_val841 = load double, ptr %_gepp840, align 8
139  %_mult_tmp842 = fmul double %_ind_val814, %_ind_val841
140  store double %_mult_tmp842, ptr %_gepp, align 8
141  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
142  %wide.trip.count = zext i32 %0 to i64
143  %wide.trip.count305 = zext i32 %0 to i64
144  %wide.trip.count309 = zext i32 %1 to i64
145  %wide.trip.count314 = zext i32 %2 to i64
146  %wide.trip.count319 = zext i32 %3 to i64
147  %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
148  br i1 %exitcond, label %m_loop, label %l_loop._label_9_crit_edge
149
150l_loop._label_9_crit_edge:                        ; preds = %m_loop
151  %indvars.iv.next304 = add nuw nsw i64 %indvars.iv303, 1
152  %exitcond306 = icmp ne i64 %indvars.iv.next304, %wide.trip.count305
153  br i1 %exitcond306, label %l_loop, label %i_loop._label_12_crit_edge.split
154
155i_loop._label_12_crit_edge.split:                 ; preds = %l_loop._label_9_crit_edge
156  %indvars.iv.next308 = add nuw nsw i64 %indvars.iv307, 1
157  %exitcond310 = icmp ne i64 %indvars.iv.next308, %wide.trip.count309
158  br i1 %exitcond310, label %i_loop, label %j_loop._label_15_crit_edge.split.split
159
160j_loop._label_15_crit_edge.split.split:           ; preds = %i_loop._label_12_crit_edge.split
161  %indvars.iv.next313 = add nuw nsw i64 %indvars.iv312, 1
162  %exitcond315 = icmp ne i64 %indvars.iv.next313, %wide.trip.count314
163  br i1 %exitcond315, label %j_loop, label %k_loop._label_18_crit_edge.split.split.split
164
165k_loop._label_18_crit_edge.split.split.split:     ; preds = %j_loop._label_15_crit_edge.split.split
166  %exitcond320 = icmp ne i64 %indvars.iv.next317, %wide.trip.count319
167  br i1 %exitcond320, label %k_loop, label %_return_bb.loopexit302
168
169_return_bb.loopexit:                              ; preds = %k_loop.us.preheader
170  br label %_return_bb
171
172_return_bb.loopexit300:                           ; preds = %k_loop.us291.preheader
173  br label %_return_bb
174
175_return_bb.loopexit301:                           ; preds = %k_loop.us294.preheader
176  br label %_return_bb
177
178_return_bb.loopexit302:                           ; preds = %k_loop._label_18_crit_edge.split.split.split
179  br label %_return_bb
180
181_return_bb:                                       ; preds = %_return_bb.loopexit302, %_return_bb.loopexit301, %_return_bb.loopexit300, %_return_bb.loopexit, %mat_times_vec_entry
182  ret void
183}
184
185
186