xref: /llvm-project/polly/docs/experiments/matmul/matmul.polly.interchanged+tiled+vector+openmp.ll (revision 4ab3041acbdc274050d6c53f72619c7455cbc97a)
1; ModuleID = '<stdin>'
2source_filename = "matmul.c"
3target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
4target triple = "x86_64-unknown-linux-gnu"
5
6%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
7%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
8
9@A = common dso_local local_unnamed_addr global [1536 x [1536 x float]] zeroinitializer, align 16
10@B = common dso_local local_unnamed_addr global [1536 x [1536 x float]] zeroinitializer, align 16
11@stdout = external dso_local local_unnamed_addr global %struct._IO_FILE*, align 8
12@.str = private unnamed_addr constant [5 x i8] c"%lf \00", align 1
13@C = common dso_local local_unnamed_addr global [1536 x [1536 x float]] zeroinitializer, align 16
14
15; Function Attrs: noinline nounwind uwtable
16define dso_local void @init_array() local_unnamed_addr #0 {
17entry:
18  %polly.par.userContext = alloca {}, align 8
19  %polly.par.userContext1 = bitcast {}* %polly.par.userContext to i8*
20  call void @GOMP_parallel_loop_runtime_start(void (i8*)* nonnull @init_array_polly_subfn, i8* nonnull %polly.par.userContext1, i32 0, i64 0, i64 1536, i64 1) #3
21  call void @init_array_polly_subfn(i8* nonnull %polly.par.userContext1) #3
22  call void @GOMP_parallel_end() #3
23  ret void
24}
25
26; Function Attrs: noinline nounwind uwtable
27define dso_local void @print_array() local_unnamed_addr #1 {
28entry:
29  br label %for.cond1.preheader
30
31for.cond1.preheader:                              ; preds = %for.end, %entry
32  %indvars.iv6 = phi i64 [ 0, %entry ], [ %indvars.iv.next7, %for.end ]
33  %0 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8
34  br label %for.body3
35
36for.body3:                                        ; preds = %for.inc, %for.cond1.preheader
37  %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.inc ]
38  %1 = phi %struct._IO_FILE* [ %0, %for.cond1.preheader ], [ %5, %for.inc ]
39  %arrayidx5 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %indvars.iv6, i64 %indvars.iv
40  %2 = load float, float* %arrayidx5, align 4
41  %conv = fpext float %2 to double
42  %call = tail call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), double %conv) #3
43  %3 = trunc i64 %indvars.iv to i32
44  %rem = urem i32 %3, 80
45  %cmp6 = icmp eq i32 %rem, 79
46  br i1 %cmp6, label %if.then, label %for.inc
47
48if.then:                                          ; preds = %for.body3
49  %4 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8
50  %fputc3 = tail call i32 @fputc(i32 10, %struct._IO_FILE* %4)
51  br label %for.inc
52
53for.inc:                                          ; preds = %if.then, %for.body3
54  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
55  %5 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8
56  %exitcond = icmp eq i64 %indvars.iv.next, 1536
57  br i1 %exitcond, label %for.end, label %for.body3
58
59for.end:                                          ; preds = %for.inc
60  %fputc = tail call i32 @fputc(i32 10, %struct._IO_FILE* %5)
61  %indvars.iv.next7 = add nuw nsw i64 %indvars.iv6, 1
62  %exitcond8 = icmp eq i64 %indvars.iv.next7, 1536
63  br i1 %exitcond8, label %for.end12, label %for.cond1.preheader
64
65for.end12:                                        ; preds = %for.end
66  ret void
67}
68
69; Function Attrs: nounwind
70declare dso_local i32 @fprintf(%struct._IO_FILE* nocapture, i8* nocapture readonly, ...) local_unnamed_addr #2
71
72; Function Attrs: noinline nounwind uwtable
73define dso_local i32 @main() local_unnamed_addr #0 {
74entry:
75  %polly.par.userContext3 = alloca {}, align 8
76  tail call void @init_array()
77  %polly.par.userContext1 = bitcast {}* %polly.par.userContext3 to i8*
78  call void @GOMP_parallel_loop_runtime_start(void (i8*)* nonnull @main_polly_subfn, i8* nonnull %polly.par.userContext1, i32 0, i64 0, i64 1536, i64 1) #3
79  call void @main_polly_subfn(i8* nonnull %polly.par.userContext1) #3
80  call void @GOMP_parallel_end() #3
81  call void @GOMP_parallel_loop_runtime_start(void (i8*)* nonnull @main_polly_subfn_1, i8* nonnull %polly.par.userContext1, i32 0, i64 0, i64 1536, i64 64) #3
82  call void @main_polly_subfn_1(i8* nonnull %polly.par.userContext1) #3
83  call void @GOMP_parallel_end() #3
84  ret i32 0
85}
86
87; Function Attrs: nounwind
88declare i32 @fputc(i32, %struct._IO_FILE* nocapture) local_unnamed_addr #3
89
90define internal void @init_array_polly_subfn(i8* nocapture readnone %polly.par.userContext) #4 {
91polly.par.setup:
92  %polly.par.LBPtr = alloca i64, align 8
93  %polly.par.UBPtr = alloca i64, align 8
94  %0 = call i8 @GOMP_loop_runtime_next(i64* nonnull %polly.par.LBPtr, i64* nonnull %polly.par.UBPtr)
95  %1 = icmp eq i8 %0, 0
96  br i1 %1, label %polly.par.exit, label %polly.par.loadIVBounds
97
98polly.par.exit:                                   ; preds = %polly.par.checkNext.loopexit, %polly.par.setup
99  call void @GOMP_loop_end_nowait()
100  ret void
101
102polly.par.checkNext.loopexit:                     ; preds = %polly.loop_exit4
103  %2 = call i8 @GOMP_loop_runtime_next(i64* nonnull %polly.par.LBPtr, i64* nonnull %polly.par.UBPtr)
104  %3 = icmp eq i8 %2, 0
105  br i1 %3, label %polly.par.exit, label %polly.par.loadIVBounds
106
107polly.par.loadIVBounds:                           ; preds = %polly.par.setup, %polly.par.checkNext.loopexit
108  %polly.par.LB = load i64, i64* %polly.par.LBPtr, align 8
109  %polly.par.UB = load i64, i64* %polly.par.UBPtr, align 8
110  %polly.par.UBAdjusted = add i64 %polly.par.UB, -1
111  br label %polly.loop_header
112
113polly.loop_header:                                ; preds = %polly.par.loadIVBounds, %polly.loop_exit4
114  %polly.indvar = phi i64 [ %polly.par.LB, %polly.par.loadIVBounds ], [ %polly.indvar_next, %polly.loop_exit4 ]
115  %4 = trunc i64 %polly.indvar to i32
116  br label %polly.loop_header2
117
118polly.loop_exit4:                                 ; preds = %polly.loop_header2
119  %polly.indvar_next = add nsw i64 %polly.indvar, 1
120  %polly.loop_cond = icmp slt i64 %polly.indvar, %polly.par.UBAdjusted
121  br i1 %polly.loop_cond, label %polly.loop_header, label %polly.par.checkNext.loopexit
122
123polly.loop_header2:                               ; preds = %polly.loop_header2, %polly.loop_header
124  %polly.indvar5 = phi i64 [ 0, %polly.loop_header ], [ %polly.indvar_next6, %polly.loop_header2 ]
125  %5 = trunc i64 %polly.indvar5 to i32
126  %6 = mul i32 %5, %4
127  %7 = and i32 %6, 1023
128  %8 = add nuw nsw i32 %7, 1
129  %p_conv = sitofp i32 %8 to double
130  %p_div = fmul double %p_conv, 5.000000e-01
131  %p_conv4 = fptrunc double %p_div to float
132  %scevgep8 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %polly.indvar, i64 %polly.indvar5
133  store float %p_conv4, float* %scevgep8, align 4, !alias.scope !2, !noalias !4
134  %scevgep10 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %polly.indvar, i64 %polly.indvar5
135  store float %p_conv4, float* %scevgep10, align 4, !alias.scope !5, !noalias !6
136  %polly.indvar_next6 = add nuw nsw i64 %polly.indvar5, 1
137  %exitcond = icmp eq i64 %polly.indvar_next6, 1536
138  br i1 %exitcond, label %polly.loop_exit4, label %polly.loop_header2
139}
140
141declare i8 @GOMP_loop_runtime_next(i64*, i64*) local_unnamed_addr
142
143declare void @GOMP_loop_end_nowait() local_unnamed_addr
144
145declare void @GOMP_parallel_loop_runtime_start(void (i8*)*, i8*, i32, i64, i64, i64) local_unnamed_addr
146
147declare void @GOMP_parallel_end() local_unnamed_addr
148
149define internal void @main_polly_subfn(i8* nocapture readnone %polly.par.userContext) #4 {
150polly.par.setup:
151  %polly.par.LBPtr = alloca i64, align 8
152  %polly.par.UBPtr = alloca i64, align 8
153  %0 = call i8 @GOMP_loop_runtime_next(i64* nonnull %polly.par.LBPtr, i64* nonnull %polly.par.UBPtr)
154  %1 = icmp eq i8 %0, 0
155  br i1 %1, label %polly.par.exit, label %polly.par.loadIVBounds
156
157polly.par.exit:                                   ; preds = %polly.par.loadIVBounds, %polly.par.setup
158  call void @GOMP_loop_end_nowait()
159  ret void
160
161polly.par.loadIVBounds:                           ; preds = %polly.par.setup, %polly.par.loadIVBounds
162  %polly.par.LB = load i64, i64* %polly.par.LBPtr, align 8
163  %polly.par.UB = load i64, i64* %polly.par.UBPtr, align 8
164  %polly.par.UBAdjusted = add i64 %polly.par.UB, -1
165  %scevgep2 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %polly.par.LB, i64 0
166  %scevgep23 = bitcast float* %scevgep2 to i8*
167  %2 = icmp sgt i64 %polly.par.LB, %polly.par.UBAdjusted
168  %smax = select i1 %2, i64 %polly.par.LB, i64 %polly.par.UBAdjusted
169  %3 = add i64 %smax, 1
170  %4 = sub i64 %3, %polly.par.LB
171  %5 = mul i64 %4, 6144
172  call void @llvm.memset.p0i8.i64(i8* align 16 %scevgep23, i8 0, i64 %5, i1 false)
173  %6 = call i8 @GOMP_loop_runtime_next(i64* nonnull %polly.par.LBPtr, i64* nonnull %polly.par.UBPtr)
174  %7 = icmp eq i8 %6, 0
175  br i1 %7, label %polly.par.exit, label %polly.par.loadIVBounds
176}
177
178define internal void @main_polly_subfn_1(i8* nocapture readnone %polly.par.userContext) #4 {
179polly.par.setup:
180  %polly.par.LBPtr = alloca i64, align 8
181  %polly.par.UBPtr = alloca i64, align 8
182  %0 = call i8 @GOMP_loop_runtime_next(i64* nonnull %polly.par.LBPtr, i64* nonnull %polly.par.UBPtr)
183  %1 = icmp eq i8 %0, 0
184  br i1 %1, label %polly.par.exit, label %polly.par.loadIVBounds
185
186polly.par.exit:                                   ; preds = %polly.par.checkNext.loopexit, %polly.par.setup
187  call void @GOMP_loop_end_nowait()
188  ret void
189
190polly.par.checkNext.loopexit:                     ; preds = %polly.loop_exit4
191  %2 = call i8 @GOMP_loop_runtime_next(i64* nonnull %polly.par.LBPtr, i64* nonnull %polly.par.UBPtr)
192  %3 = icmp eq i8 %2, 0
193  br i1 %3, label %polly.par.exit, label %polly.par.loadIVBounds
194
195polly.par.loadIVBounds:                           ; preds = %polly.par.setup, %polly.par.checkNext.loopexit
196  %polly.par.LB = load i64, i64* %polly.par.LBPtr, align 8
197  %polly.par.UB = load i64, i64* %polly.par.UBPtr, align 8
198  %polly.par.UBAdjusted = add i64 %polly.par.UB, -1
199  br label %polly.loop_header
200
201polly.loop_header:                                ; preds = %polly.loop_exit4, %polly.par.loadIVBounds
202  %polly.indvar = phi i64 [ %polly.par.LB, %polly.par.loadIVBounds ], [ %polly.indvar_next, %polly.loop_exit4 ]
203  %4 = add nsw i64 %polly.indvar, 63
204  br label %polly.loop_header2
205
206polly.loop_exit4:                                 ; preds = %polly.loop_exit10
207  %polly.indvar_next = add nsw i64 %polly.indvar, 64
208  %polly.loop_cond = icmp sgt i64 %polly.indvar_next, %polly.par.UBAdjusted
209  br i1 %polly.loop_cond, label %polly.par.checkNext.loopexit, label %polly.loop_header
210
211polly.loop_header2:                               ; preds = %polly.loop_header, %polly.loop_exit10
212  %indvar = phi i64 [ 0, %polly.loop_header ], [ %indvar.next, %polly.loop_exit10 ]
213  %polly.indvar5 = phi i64 [ 0, %polly.loop_header ], [ %polly.indvar_next6, %polly.loop_exit10 ]
214  %5 = shl i64 %indvar, 6
215  %offset.idx.1 = or i64 %5, 16
216  %offset.idx.2 = or i64 %5, 32
217  %offset.idx.3 = or i64 %5, 48
218  br label %polly.loop_header8
219
220polly.loop_exit10:                                ; preds = %polly.loop_exit16
221  %polly.indvar_next6 = add nuw nsw i64 %polly.indvar5, 64
222  %polly.loop_cond7 = icmp ult i64 %polly.indvar_next6, 1536
223  %indvar.next = add i64 %indvar, 1
224  br i1 %polly.loop_cond7, label %polly.loop_header2, label %polly.loop_exit4
225
226polly.loop_header8:                               ; preds = %polly.loop_header2, %polly.loop_exit16
227  %indvars.iv3 = phi i64 [ 64, %polly.loop_header2 ], [ %indvars.iv.next4, %polly.loop_exit16 ]
228  %polly.indvar11 = phi i64 [ 0, %polly.loop_header2 ], [ %polly.indvar_next12, %polly.loop_exit16 ]
229  br label %polly.loop_header14
230
231polly.loop_exit16:                                ; preds = %polly.loop_exit22
232  %polly.indvar_next12 = add nuw nsw i64 %polly.indvar11, 64
233  %polly.loop_cond13 = icmp ult i64 %polly.indvar_next12, 1536
234  %indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, 64
235  br i1 %polly.loop_cond13, label %polly.loop_header8, label %polly.loop_exit10
236
237polly.loop_header14:                              ; preds = %polly.loop_header8, %polly.loop_exit22
238  %polly.indvar17 = phi i64 [ %polly.indvar_next18, %polly.loop_exit22 ], [ %polly.indvar, %polly.loop_header8 ]
239  %6 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %polly.indvar17, i64 %5
240  %7 = bitcast float* %6 to <16 x float>*
241  %8 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %polly.indvar17, i64 %offset.idx.1
242  %9 = bitcast float* %8 to <16 x float>*
243  %10 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %polly.indvar17, i64 %offset.idx.2
244  %11 = bitcast float* %10 to <16 x float>*
245  %12 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %polly.indvar17, i64 %offset.idx.3
246  %13 = bitcast float* %12 to <16 x float>*
247  %.promoted = load <16 x float>, <16 x float>* %7, align 4, !alias.scope !7, !noalias !9
248  %.promoted16 = load <16 x float>, <16 x float>* %9, align 4, !alias.scope !7, !noalias !9
249  %.promoted18 = load <16 x float>, <16 x float>* %11, align 4, !alias.scope !7, !noalias !9
250  %.promoted20 = load <16 x float>, <16 x float>* %13, align 4, !alias.scope !7, !noalias !9
251  br label %vector.ph
252
253polly.loop_exit22:                                ; preds = %vector.ph
254  store <16 x float> %interleaved.vec, <16 x float>* %7, align 4, !alias.scope !7, !noalias !9
255  store <16 x float> %interleaved.vec.1, <16 x float>* %9, align 4, !alias.scope !7, !noalias !9
256  store <16 x float> %interleaved.vec.2, <16 x float>* %11, align 4, !alias.scope !7, !noalias !9
257  store <16 x float> %interleaved.vec.3, <16 x float>* %13, align 4, !alias.scope !7, !noalias !9
258  %polly.indvar_next18 = add nsw i64 %polly.indvar17, 1
259  %polly.loop_cond19 = icmp slt i64 %polly.indvar17, %4
260  br i1 %polly.loop_cond19, label %polly.loop_header14, label %polly.loop_exit16
261
262vector.ph:                                        ; preds = %polly.loop_header14, %vector.ph
263  %wide.vec.321 = phi <16 x float> [ %.promoted20, %polly.loop_header14 ], [ %interleaved.vec.3, %vector.ph ]
264  %wide.vec.219 = phi <16 x float> [ %.promoted18, %polly.loop_header14 ], [ %interleaved.vec.2, %vector.ph ]
265  %wide.vec.117 = phi <16 x float> [ %.promoted16, %polly.loop_header14 ], [ %interleaved.vec.1, %vector.ph ]
266  %wide.vec15 = phi <16 x float> [ %.promoted, %polly.loop_header14 ], [ %interleaved.vec, %vector.ph ]
267  %polly.indvar23 = phi i64 [ %polly.indvar11, %polly.loop_header14 ], [ %polly.indvar_next24, %vector.ph ]
268  %scevgep40 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %polly.indvar17, i64 %polly.indvar23
269  %_p_scalar_41 = load float, float* %scevgep40, align 4, !alias.scope !10, !noalias !12
270  %broadcast.splatinsert13 = insertelement <4 x float> undef, float %_p_scalar_41, i32 0
271  %broadcast.splat14 = shufflevector <4 x float> %broadcast.splatinsert13, <4 x float> undef, <4 x i32> zeroinitializer
272  %strided.vec = shufflevector <16 x float> %wide.vec15, <16 x float> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
273  %strided.vec5 = shufflevector <16 x float> %wide.vec15, <16 x float> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
274  %strided.vec6 = shufflevector <16 x float> %wide.vec15, <16 x float> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
275  %strided.vec7 = shufflevector <16 x float> %wide.vec15, <16 x float> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
276  %14 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %polly.indvar23, i64 %5
277  %15 = bitcast float* %14 to <16 x float>*
278  %wide.vec8 = load <16 x float>, <16 x float>* %15, align 16, !alias.scope !11, !noalias !13
279  %strided.vec9 = shufflevector <16 x float> %wide.vec8, <16 x float> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
280  %strided.vec10 = shufflevector <16 x float> %wide.vec8, <16 x float> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
281  %strided.vec11 = shufflevector <16 x float> %wide.vec8, <16 x float> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
282  %strided.vec12 = shufflevector <16 x float> %wide.vec8, <16 x float> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
283  %16 = fmul <4 x float> %broadcast.splat14, %strided.vec9
284  %17 = fadd <4 x float> %strided.vec, %16
285  %18 = fmul <4 x float> %broadcast.splat14, %strided.vec10
286  %19 = fadd <4 x float> %strided.vec5, %18
287  %20 = fmul <4 x float> %broadcast.splat14, %strided.vec11
288  %21 = fadd <4 x float> %strided.vec6, %20
289  %22 = fmul <4 x float> %broadcast.splat14, %strided.vec12
290  %23 = fadd <4 x float> %strided.vec7, %22
291  %24 = shufflevector <4 x float> %17, <4 x float> %19, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
292  %25 = shufflevector <4 x float> %21, <4 x float> %23, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
293  %interleaved.vec = shufflevector <8 x float> %24, <8 x float> %25, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
294  %strided.vec.1 = shufflevector <16 x float> %wide.vec.117, <16 x float> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
295  %strided.vec5.1 = shufflevector <16 x float> %wide.vec.117, <16 x float> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
296  %strided.vec6.1 = shufflevector <16 x float> %wide.vec.117, <16 x float> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
297  %strided.vec7.1 = shufflevector <16 x float> %wide.vec.117, <16 x float> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
298  %26 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %polly.indvar23, i64 %offset.idx.1
299  %27 = bitcast float* %26 to <16 x float>*
300  %wide.vec8.1 = load <16 x float>, <16 x float>* %27, align 16, !alias.scope !11, !noalias !13
301  %strided.vec9.1 = shufflevector <16 x float> %wide.vec8.1, <16 x float> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
302  %strided.vec10.1 = shufflevector <16 x float> %wide.vec8.1, <16 x float> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
303  %strided.vec11.1 = shufflevector <16 x float> %wide.vec8.1, <16 x float> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
304  %strided.vec12.1 = shufflevector <16 x float> %wide.vec8.1, <16 x float> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
305  %28 = fmul <4 x float> %broadcast.splat14, %strided.vec9.1
306  %29 = fadd <4 x float> %strided.vec.1, %28
307  %30 = fmul <4 x float> %broadcast.splat14, %strided.vec10.1
308  %31 = fadd <4 x float> %strided.vec5.1, %30
309  %32 = fmul <4 x float> %broadcast.splat14, %strided.vec11.1
310  %33 = fadd <4 x float> %strided.vec6.1, %32
311  %34 = fmul <4 x float> %broadcast.splat14, %strided.vec12.1
312  %35 = fadd <4 x float> %strided.vec7.1, %34
313  %36 = shufflevector <4 x float> %29, <4 x float> %31, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
314  %37 = shufflevector <4 x float> %33, <4 x float> %35, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
315  %interleaved.vec.1 = shufflevector <8 x float> %36, <8 x float> %37, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
316  %strided.vec.2 = shufflevector <16 x float> %wide.vec.219, <16 x float> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
317  %strided.vec5.2 = shufflevector <16 x float> %wide.vec.219, <16 x float> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
318  %strided.vec6.2 = shufflevector <16 x float> %wide.vec.219, <16 x float> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
319  %strided.vec7.2 = shufflevector <16 x float> %wide.vec.219, <16 x float> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
320  %38 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %polly.indvar23, i64 %offset.idx.2
321  %39 = bitcast float* %38 to <16 x float>*
322  %wide.vec8.2 = load <16 x float>, <16 x float>* %39, align 16, !alias.scope !11, !noalias !13
323  %strided.vec9.2 = shufflevector <16 x float> %wide.vec8.2, <16 x float> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
324  %strided.vec10.2 = shufflevector <16 x float> %wide.vec8.2, <16 x float> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
325  %strided.vec11.2 = shufflevector <16 x float> %wide.vec8.2, <16 x float> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
326  %strided.vec12.2 = shufflevector <16 x float> %wide.vec8.2, <16 x float> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
327  %40 = fmul <4 x float> %broadcast.splat14, %strided.vec9.2
328  %41 = fadd <4 x float> %strided.vec.2, %40
329  %42 = fmul <4 x float> %broadcast.splat14, %strided.vec10.2
330  %43 = fadd <4 x float> %strided.vec5.2, %42
331  %44 = fmul <4 x float> %broadcast.splat14, %strided.vec11.2
332  %45 = fadd <4 x float> %strided.vec6.2, %44
333  %46 = fmul <4 x float> %broadcast.splat14, %strided.vec12.2
334  %47 = fadd <4 x float> %strided.vec7.2, %46
335  %48 = shufflevector <4 x float> %41, <4 x float> %43, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
336  %49 = shufflevector <4 x float> %45, <4 x float> %47, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
337  %interleaved.vec.2 = shufflevector <8 x float> %48, <8 x float> %49, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
338  %strided.vec.3 = shufflevector <16 x float> %wide.vec.321, <16 x float> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
339  %strided.vec5.3 = shufflevector <16 x float> %wide.vec.321, <16 x float> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
340  %strided.vec6.3 = shufflevector <16 x float> %wide.vec.321, <16 x float> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
341  %strided.vec7.3 = shufflevector <16 x float> %wide.vec.321, <16 x float> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
342  %50 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %polly.indvar23, i64 %offset.idx.3
343  %51 = bitcast float* %50 to <16 x float>*
344  %wide.vec8.3 = load <16 x float>, <16 x float>* %51, align 16, !alias.scope !11, !noalias !13
345  %strided.vec9.3 = shufflevector <16 x float> %wide.vec8.3, <16 x float> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
346  %strided.vec10.3 = shufflevector <16 x float> %wide.vec8.3, <16 x float> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
347  %strided.vec11.3 = shufflevector <16 x float> %wide.vec8.3, <16 x float> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
348  %strided.vec12.3 = shufflevector <16 x float> %wide.vec8.3, <16 x float> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
349  %52 = fmul <4 x float> %broadcast.splat14, %strided.vec9.3
350  %53 = fadd <4 x float> %strided.vec.3, %52
351  %54 = fmul <4 x float> %broadcast.splat14, %strided.vec10.3
352  %55 = fadd <4 x float> %strided.vec5.3, %54
353  %56 = fmul <4 x float> %broadcast.splat14, %strided.vec11.3
354  %57 = fadd <4 x float> %strided.vec6.3, %56
355  %58 = fmul <4 x float> %broadcast.splat14, %strided.vec12.3
356  %59 = fadd <4 x float> %strided.vec7.3, %58
357  %60 = shufflevector <4 x float> %53, <4 x float> %55, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
358  %61 = shufflevector <4 x float> %57, <4 x float> %59, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
359  %interleaved.vec.3 = shufflevector <8 x float> %60, <8 x float> %61, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
360  %polly.indvar_next24 = add nuw nsw i64 %polly.indvar23, 1
361  %exitcond = icmp eq i64 %polly.indvar_next24, %indvars.iv3
362  br i1 %exitcond, label %polly.loop_exit22, label %vector.ph
363}
364
365; Function Attrs: argmemonly nounwind
366declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #5
367
368attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "polly-optimized" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
369attributes #1 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
370attributes #2 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
371attributes #3 = { nounwind }
372attributes #4 = { "polly.skip.fn" }
373attributes #5 = { argmemonly nounwind }
374
375!llvm.module.flags = !{!0}
376!llvm.ident = !{!1}
377
378!0 = !{i32 1, !"wchar_size", i32 4}
379!1 = !{!"clang version 8.0.0 (trunk 342834) (llvm/trunk 342856)"}
380!2 = distinct !{!2, !3, !"polly.alias.scope.MemRef_A"}
381!3 = distinct !{!3, !"polly.alias.scope.domain"}
382!4 = !{!5}
383!5 = distinct !{!5, !3, !"polly.alias.scope.MemRef_B"}
384!6 = !{!2}
385!7 = distinct !{!7, !8, !"polly.alias.scope.MemRef_C"}
386!8 = distinct !{!8, !"polly.alias.scope.domain"}
387!9 = !{!10, !11}
388!10 = distinct !{!10, !8, !"polly.alias.scope.MemRef_A"}
389!11 = distinct !{!11, !8, !"polly.alias.scope.MemRef_B"}
390!12 = !{!7, !11}
391!13 = !{!7, !10}
392