1; ModuleID = '<stdin>' 2source_filename = "matmul.c" 3target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 4target triple = "x86_64-unknown-linux-gnu" 5 6%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } 7%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } 8 9@A = common dso_local local_unnamed_addr global [1536 x [1536 x float]] zeroinitializer, align 16 10@B = common dso_local local_unnamed_addr global [1536 x [1536 x float]] zeroinitializer, align 16 11@stdout = external dso_local local_unnamed_addr global %struct._IO_FILE*, align 8 12@.str = private unnamed_addr constant [5 x i8] c"%lf \00", align 1 13@C = common dso_local local_unnamed_addr global [1536 x [1536 x float]] zeroinitializer, align 16 14 15; Function Attrs: noinline nounwind uwtable 16define dso_local void @init_array() local_unnamed_addr #0 { 17entry: 18 %polly.par.userContext = alloca {}, align 8 19 %polly.par.userContext1 = bitcast {}* %polly.par.userContext to i8* 20 call void @GOMP_parallel_loop_runtime_start(void (i8*)* nonnull @init_array_polly_subfn, i8* nonnull %polly.par.userContext1, i32 0, i64 0, i64 1536, i64 1) #3 21 call void @init_array_polly_subfn(i8* nonnull %polly.par.userContext1) #3 22 call void @GOMP_parallel_end() #3 23 ret void 24} 25 26; Function Attrs: noinline nounwind uwtable 27define dso_local void @print_array() local_unnamed_addr #1 { 28entry: 29 br label %for.cond1.preheader 30 31for.cond1.preheader: ; preds = %for.end, %entry 32 %indvars.iv6 = phi i64 [ 0, %entry ], [ %indvars.iv.next7, %for.end ] 33 %0 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8 34 br label %for.body3 35 36for.body3: ; preds = %for.inc, %for.cond1.preheader 37 %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.inc ] 38 %1 = phi %struct._IO_FILE* [ %0, %for.cond1.preheader ], [ %5, %for.inc ] 39 %arrayidx5 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %indvars.iv6, i64 %indvars.iv 40 %2 = load float, float* %arrayidx5, align 4 41 %conv = fpext float %2 to double 42 %call = tail call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), double %conv) #3 43 %3 = trunc i64 %indvars.iv to i32 44 %rem = urem i32 %3, 80 45 %cmp6 = icmp eq i32 %rem, 79 46 br i1 %cmp6, label %if.then, label %for.inc 47 48if.then: ; preds = %for.body3 49 %4 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8 50 %fputc3 = tail call i32 @fputc(i32 10, %struct._IO_FILE* %4) 51 br label %for.inc 52 53for.inc: ; preds = %if.then, %for.body3 54 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 55 %5 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8 56 %exitcond = icmp eq i64 %indvars.iv.next, 1536 57 br i1 %exitcond, label %for.end, label %for.body3 58 59for.end: ; preds = %for.inc 60 %fputc = tail call i32 @fputc(i32 10, %struct._IO_FILE* %5) 61 %indvars.iv.next7 = add nuw nsw i64 %indvars.iv6, 1 62 %exitcond8 = icmp eq i64 %indvars.iv.next7, 1536 63 br i1 %exitcond8, label %for.end12, label %for.cond1.preheader 64 65for.end12: ; preds = %for.end 66 ret void 67} 68 69; Function Attrs: nounwind 70declare dso_local i32 @fprintf(%struct._IO_FILE* nocapture, i8* nocapture readonly, ...) local_unnamed_addr #2 71 72; Function Attrs: noinline nounwind uwtable 73define dso_local i32 @main() local_unnamed_addr #0 { 74entry: 75 %polly.par.userContext3 = alloca {}, align 8 76 tail call void @init_array() 77 %polly.par.userContext1 = bitcast {}* %polly.par.userContext3 to i8* 78 call void @GOMP_parallel_loop_runtime_start(void (i8*)* nonnull @main_polly_subfn, i8* nonnull %polly.par.userContext1, i32 0, i64 0, i64 1536, i64 1) #3 79 call void @main_polly_subfn(i8* nonnull %polly.par.userContext1) #3 80 call void @GOMP_parallel_end() #3 81 call void @GOMP_parallel_loop_runtime_start(void (i8*)* nonnull @main_polly_subfn_1, i8* nonnull %polly.par.userContext1, i32 0, i64 0, i64 1536, i64 64) #3 82 call void @main_polly_subfn_1(i8* nonnull %polly.par.userContext1) #3 83 call void @GOMP_parallel_end() #3 84 ret i32 0 85} 86 87; Function Attrs: nounwind 88declare i32 @fputc(i32, %struct._IO_FILE* nocapture) local_unnamed_addr #3 89 90define internal void @init_array_polly_subfn(i8* nocapture readnone %polly.par.userContext) #4 { 91polly.par.setup: 92 %polly.par.LBPtr = alloca i64, align 8 93 %polly.par.UBPtr = alloca i64, align 8 94 %0 = call i8 @GOMP_loop_runtime_next(i64* nonnull %polly.par.LBPtr, i64* nonnull %polly.par.UBPtr) 95 %1 = icmp eq i8 %0, 0 96 br i1 %1, label %polly.par.exit, label %polly.par.loadIVBounds 97 98polly.par.exit: ; preds = %polly.par.checkNext.loopexit, %polly.par.setup 99 call void @GOMP_loop_end_nowait() 100 ret void 101 102polly.par.checkNext.loopexit: ; preds = %polly.loop_exit4 103 %2 = call i8 @GOMP_loop_runtime_next(i64* nonnull %polly.par.LBPtr, i64* nonnull %polly.par.UBPtr) 104 %3 = icmp eq i8 %2, 0 105 br i1 %3, label %polly.par.exit, label %polly.par.loadIVBounds 106 107polly.par.loadIVBounds: ; preds = %polly.par.setup, %polly.par.checkNext.loopexit 108 %polly.par.LB = load i64, i64* %polly.par.LBPtr, align 8 109 %polly.par.UB = load i64, i64* %polly.par.UBPtr, align 8 110 %polly.par.UBAdjusted = add i64 %polly.par.UB, -1 111 br label %polly.loop_header 112 113polly.loop_header: ; preds = %polly.par.loadIVBounds, %polly.loop_exit4 114 %polly.indvar = phi i64 [ %polly.par.LB, %polly.par.loadIVBounds ], [ %polly.indvar_next, %polly.loop_exit4 ] 115 %4 = trunc i64 %polly.indvar to i32 116 br label %polly.loop_header2 117 118polly.loop_exit4: ; preds = %polly.loop_header2 119 %polly.indvar_next = add nsw i64 %polly.indvar, 1 120 %polly.loop_cond = icmp slt i64 %polly.indvar, %polly.par.UBAdjusted 121 br i1 %polly.loop_cond, label %polly.loop_header, label %polly.par.checkNext.loopexit 122 123polly.loop_header2: ; preds = %polly.loop_header2, %polly.loop_header 124 %polly.indvar5 = phi i64 [ 0, %polly.loop_header ], [ %polly.indvar_next6, %polly.loop_header2 ] 125 %5 = trunc i64 %polly.indvar5 to i32 126 %6 = mul i32 %5, %4 127 %7 = and i32 %6, 1023 128 %8 = add nuw nsw i32 %7, 1 129 %p_conv = sitofp i32 %8 to double 130 %p_div = fmul double %p_conv, 5.000000e-01 131 %p_conv4 = fptrunc double %p_div to float 132 %scevgep8 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %polly.indvar, i64 %polly.indvar5 133 store float %p_conv4, float* %scevgep8, align 4, !alias.scope !2, !noalias !4 134 %scevgep10 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %polly.indvar, i64 %polly.indvar5 135 store float %p_conv4, float* %scevgep10, align 4, !alias.scope !5, !noalias !6 136 %polly.indvar_next6 = add nuw nsw i64 %polly.indvar5, 1 137 %exitcond = icmp eq i64 %polly.indvar_next6, 1536 138 br i1 %exitcond, label %polly.loop_exit4, label %polly.loop_header2 139} 140 141declare i8 @GOMP_loop_runtime_next(i64*, i64*) local_unnamed_addr 142 143declare void @GOMP_loop_end_nowait() local_unnamed_addr 144 145declare void @GOMP_parallel_loop_runtime_start(void (i8*)*, i8*, i32, i64, i64, i64) local_unnamed_addr 146 147declare void @GOMP_parallel_end() local_unnamed_addr 148 149define internal void @main_polly_subfn(i8* nocapture readnone %polly.par.userContext) #4 { 150polly.par.setup: 151 %polly.par.LBPtr = alloca i64, align 8 152 %polly.par.UBPtr = alloca i64, align 8 153 %0 = call i8 @GOMP_loop_runtime_next(i64* nonnull %polly.par.LBPtr, i64* nonnull %polly.par.UBPtr) 154 %1 = icmp eq i8 %0, 0 155 br i1 %1, label %polly.par.exit, label %polly.par.loadIVBounds 156 157polly.par.exit: ; preds = %polly.par.loadIVBounds, %polly.par.setup 158 call void @GOMP_loop_end_nowait() 159 ret void 160 161polly.par.loadIVBounds: ; preds = %polly.par.setup, %polly.par.loadIVBounds 162 %polly.par.LB = load i64, i64* %polly.par.LBPtr, align 8 163 %polly.par.UB = load i64, i64* %polly.par.UBPtr, align 8 164 %polly.par.UBAdjusted = add i64 %polly.par.UB, -1 165 %scevgep2 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %polly.par.LB, i64 0 166 %scevgep23 = bitcast float* %scevgep2 to i8* 167 %2 = icmp sgt i64 %polly.par.LB, %polly.par.UBAdjusted 168 %smax = select i1 %2, i64 %polly.par.LB, i64 %polly.par.UBAdjusted 169 %3 = add i64 %smax, 1 170 %4 = sub i64 %3, %polly.par.LB 171 %5 = mul i64 %4, 6144 172 call void @llvm.memset.p0i8.i64(i8* align 16 %scevgep23, i8 0, i64 %5, i1 false) 173 %6 = call i8 @GOMP_loop_runtime_next(i64* nonnull %polly.par.LBPtr, i64* nonnull %polly.par.UBPtr) 174 %7 = icmp eq i8 %6, 0 175 br i1 %7, label %polly.par.exit, label %polly.par.loadIVBounds 176} 177 178define internal void @main_polly_subfn_1(i8* nocapture readnone %polly.par.userContext) #4 { 179polly.par.setup: 180 %polly.par.LBPtr = alloca i64, align 8 181 %polly.par.UBPtr = alloca i64, align 8 182 %0 = call i8 @GOMP_loop_runtime_next(i64* nonnull %polly.par.LBPtr, i64* nonnull %polly.par.UBPtr) 183 %1 = icmp eq i8 %0, 0 184 br i1 %1, label %polly.par.exit, label %polly.par.loadIVBounds 185 186polly.par.exit: ; preds = %polly.par.checkNext.loopexit, %polly.par.setup 187 call void @GOMP_loop_end_nowait() 188 ret void 189 190polly.par.checkNext.loopexit: ; preds = %polly.loop_exit4 191 %2 = call i8 @GOMP_loop_runtime_next(i64* nonnull %polly.par.LBPtr, i64* nonnull %polly.par.UBPtr) 192 %3 = icmp eq i8 %2, 0 193 br i1 %3, label %polly.par.exit, label %polly.par.loadIVBounds 194 195polly.par.loadIVBounds: ; preds = %polly.par.setup, %polly.par.checkNext.loopexit 196 %polly.par.LB = load i64, i64* %polly.par.LBPtr, align 8 197 %polly.par.UB = load i64, i64* %polly.par.UBPtr, align 8 198 %polly.par.UBAdjusted = add i64 %polly.par.UB, -1 199 br label %polly.loop_header 200 201polly.loop_header: ; preds = %polly.loop_exit4, %polly.par.loadIVBounds 202 %polly.indvar = phi i64 [ %polly.par.LB, %polly.par.loadIVBounds ], [ %polly.indvar_next, %polly.loop_exit4 ] 203 %4 = add nsw i64 %polly.indvar, 63 204 br label %polly.loop_header2 205 206polly.loop_exit4: ; preds = %polly.loop_exit10 207 %polly.indvar_next = add nsw i64 %polly.indvar, 64 208 %polly.loop_cond = icmp sgt i64 %polly.indvar_next, %polly.par.UBAdjusted 209 br i1 %polly.loop_cond, label %polly.par.checkNext.loopexit, label %polly.loop_header 210 211polly.loop_header2: ; preds = %polly.loop_header, %polly.loop_exit10 212 %indvar = phi i64 [ 0, %polly.loop_header ], [ %indvar.next, %polly.loop_exit10 ] 213 %polly.indvar5 = phi i64 [ 0, %polly.loop_header ], [ %polly.indvar_next6, %polly.loop_exit10 ] 214 %5 = shl i64 %indvar, 6 215 %offset.idx.1 = or i64 %5, 16 216 %offset.idx.2 = or i64 %5, 32 217 %offset.idx.3 = or i64 %5, 48 218 br label %polly.loop_header8 219 220polly.loop_exit10: ; preds = %polly.loop_exit16 221 %polly.indvar_next6 = add nuw nsw i64 %polly.indvar5, 64 222 %polly.loop_cond7 = icmp ult i64 %polly.indvar_next6, 1536 223 %indvar.next = add i64 %indvar, 1 224 br i1 %polly.loop_cond7, label %polly.loop_header2, label %polly.loop_exit4 225 226polly.loop_header8: ; preds = %polly.loop_header2, %polly.loop_exit16 227 %indvars.iv3 = phi i64 [ 64, %polly.loop_header2 ], [ %indvars.iv.next4, %polly.loop_exit16 ] 228 %polly.indvar11 = phi i64 [ 0, %polly.loop_header2 ], [ %polly.indvar_next12, %polly.loop_exit16 ] 229 br label %polly.loop_header14 230 231polly.loop_exit16: ; preds = %polly.loop_exit22 232 %polly.indvar_next12 = add nuw nsw i64 %polly.indvar11, 64 233 %polly.loop_cond13 = icmp ult i64 %polly.indvar_next12, 1536 234 %indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, 64 235 br i1 %polly.loop_cond13, label %polly.loop_header8, label %polly.loop_exit10 236 237polly.loop_header14: ; preds = %polly.loop_header8, %polly.loop_exit22 238 %polly.indvar17 = phi i64 [ %polly.indvar_next18, %polly.loop_exit22 ], [ %polly.indvar, %polly.loop_header8 ] 239 %6 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %polly.indvar17, i64 %5 240 %7 = bitcast float* %6 to <16 x float>* 241 %8 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %polly.indvar17, i64 %offset.idx.1 242 %9 = bitcast float* %8 to <16 x float>* 243 %10 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %polly.indvar17, i64 %offset.idx.2 244 %11 = bitcast float* %10 to <16 x float>* 245 %12 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %polly.indvar17, i64 %offset.idx.3 246 %13 = bitcast float* %12 to <16 x float>* 247 %.promoted = load <16 x float>, <16 x float>* %7, align 4, !alias.scope !7, !noalias !9 248 %.promoted16 = load <16 x float>, <16 x float>* %9, align 4, !alias.scope !7, !noalias !9 249 %.promoted18 = load <16 x float>, <16 x float>* %11, align 4, !alias.scope !7, !noalias !9 250 %.promoted20 = load <16 x float>, <16 x float>* %13, align 4, !alias.scope !7, !noalias !9 251 br label %vector.ph 252 253polly.loop_exit22: ; preds = %vector.ph 254 store <16 x float> %interleaved.vec, <16 x float>* %7, align 4, !alias.scope !7, !noalias !9 255 store <16 x float> %interleaved.vec.1, <16 x float>* %9, align 4, !alias.scope !7, !noalias !9 256 store <16 x float> %interleaved.vec.2, <16 x float>* %11, align 4, !alias.scope !7, !noalias !9 257 store <16 x float> %interleaved.vec.3, <16 x float>* %13, align 4, !alias.scope !7, !noalias !9 258 %polly.indvar_next18 = add nsw i64 %polly.indvar17, 1 259 %polly.loop_cond19 = icmp slt i64 %polly.indvar17, %4 260 br i1 %polly.loop_cond19, label %polly.loop_header14, label %polly.loop_exit16 261 262vector.ph: ; preds = %polly.loop_header14, %vector.ph 263 %wide.vec.321 = phi <16 x float> [ %.promoted20, %polly.loop_header14 ], [ %interleaved.vec.3, %vector.ph ] 264 %wide.vec.219 = phi <16 x float> [ %.promoted18, %polly.loop_header14 ], [ %interleaved.vec.2, %vector.ph ] 265 %wide.vec.117 = phi <16 x float> [ %.promoted16, %polly.loop_header14 ], [ %interleaved.vec.1, %vector.ph ] 266 %wide.vec15 = phi <16 x float> [ %.promoted, %polly.loop_header14 ], [ %interleaved.vec, %vector.ph ] 267 %polly.indvar23 = phi i64 [ %polly.indvar11, %polly.loop_header14 ], [ %polly.indvar_next24, %vector.ph ] 268 %scevgep40 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %polly.indvar17, i64 %polly.indvar23 269 %_p_scalar_41 = load float, float* %scevgep40, align 4, !alias.scope !10, !noalias !12 270 %broadcast.splatinsert13 = insertelement <4 x float> undef, float %_p_scalar_41, i32 0 271 %broadcast.splat14 = shufflevector <4 x float> %broadcast.splatinsert13, <4 x float> undef, <4 x i32> zeroinitializer 272 %strided.vec = shufflevector <16 x float> %wide.vec15, <16 x float> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 273 %strided.vec5 = shufflevector <16 x float> %wide.vec15, <16 x float> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13> 274 %strided.vec6 = shufflevector <16 x float> %wide.vec15, <16 x float> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14> 275 %strided.vec7 = shufflevector <16 x float> %wide.vec15, <16 x float> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15> 276 %14 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %polly.indvar23, i64 %5 277 %15 = bitcast float* %14 to <16 x float>* 278 %wide.vec8 = load <16 x float>, <16 x float>* %15, align 16, !alias.scope !11, !noalias !13 279 %strided.vec9 = shufflevector <16 x float> %wide.vec8, <16 x float> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 280 %strided.vec10 = shufflevector <16 x float> %wide.vec8, <16 x float> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13> 281 %strided.vec11 = shufflevector <16 x float> %wide.vec8, <16 x float> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14> 282 %strided.vec12 = shufflevector <16 x float> %wide.vec8, <16 x float> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15> 283 %16 = fmul <4 x float> %broadcast.splat14, %strided.vec9 284 %17 = fadd <4 x float> %strided.vec, %16 285 %18 = fmul <4 x float> %broadcast.splat14, %strided.vec10 286 %19 = fadd <4 x float> %strided.vec5, %18 287 %20 = fmul <4 x float> %broadcast.splat14, %strided.vec11 288 %21 = fadd <4 x float> %strided.vec6, %20 289 %22 = fmul <4 x float> %broadcast.splat14, %strided.vec12 290 %23 = fadd <4 x float> %strided.vec7, %22 291 %24 = shufflevector <4 x float> %17, <4 x float> %19, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 292 %25 = shufflevector <4 x float> %21, <4 x float> %23, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 293 %interleaved.vec = shufflevector <8 x float> %24, <8 x float> %25, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> 294 %strided.vec.1 = shufflevector <16 x float> %wide.vec.117, <16 x float> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 295 %strided.vec5.1 = shufflevector <16 x float> %wide.vec.117, <16 x float> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13> 296 %strided.vec6.1 = shufflevector <16 x float> %wide.vec.117, <16 x float> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14> 297 %strided.vec7.1 = shufflevector <16 x float> %wide.vec.117, <16 x float> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15> 298 %26 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %polly.indvar23, i64 %offset.idx.1 299 %27 = bitcast float* %26 to <16 x float>* 300 %wide.vec8.1 = load <16 x float>, <16 x float>* %27, align 16, !alias.scope !11, !noalias !13 301 %strided.vec9.1 = shufflevector <16 x float> %wide.vec8.1, <16 x float> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 302 %strided.vec10.1 = shufflevector <16 x float> %wide.vec8.1, <16 x float> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13> 303 %strided.vec11.1 = shufflevector <16 x float> %wide.vec8.1, <16 x float> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14> 304 %strided.vec12.1 = shufflevector <16 x float> %wide.vec8.1, <16 x float> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15> 305 %28 = fmul <4 x float> %broadcast.splat14, %strided.vec9.1 306 %29 = fadd <4 x float> %strided.vec.1, %28 307 %30 = fmul <4 x float> %broadcast.splat14, %strided.vec10.1 308 %31 = fadd <4 x float> %strided.vec5.1, %30 309 %32 = fmul <4 x float> %broadcast.splat14, %strided.vec11.1 310 %33 = fadd <4 x float> %strided.vec6.1, %32 311 %34 = fmul <4 x float> %broadcast.splat14, %strided.vec12.1 312 %35 = fadd <4 x float> %strided.vec7.1, %34 313 %36 = shufflevector <4 x float> %29, <4 x float> %31, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 314 %37 = shufflevector <4 x float> %33, <4 x float> %35, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 315 %interleaved.vec.1 = shufflevector <8 x float> %36, <8 x float> %37, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> 316 %strided.vec.2 = shufflevector <16 x float> %wide.vec.219, <16 x float> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 317 %strided.vec5.2 = shufflevector <16 x float> %wide.vec.219, <16 x float> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13> 318 %strided.vec6.2 = shufflevector <16 x float> %wide.vec.219, <16 x float> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14> 319 %strided.vec7.2 = shufflevector <16 x float> %wide.vec.219, <16 x float> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15> 320 %38 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %polly.indvar23, i64 %offset.idx.2 321 %39 = bitcast float* %38 to <16 x float>* 322 %wide.vec8.2 = load <16 x float>, <16 x float>* %39, align 16, !alias.scope !11, !noalias !13 323 %strided.vec9.2 = shufflevector <16 x float> %wide.vec8.2, <16 x float> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 324 %strided.vec10.2 = shufflevector <16 x float> %wide.vec8.2, <16 x float> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13> 325 %strided.vec11.2 = shufflevector <16 x float> %wide.vec8.2, <16 x float> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14> 326 %strided.vec12.2 = shufflevector <16 x float> %wide.vec8.2, <16 x float> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15> 327 %40 = fmul <4 x float> %broadcast.splat14, %strided.vec9.2 328 %41 = fadd <4 x float> %strided.vec.2, %40 329 %42 = fmul <4 x float> %broadcast.splat14, %strided.vec10.2 330 %43 = fadd <4 x float> %strided.vec5.2, %42 331 %44 = fmul <4 x float> %broadcast.splat14, %strided.vec11.2 332 %45 = fadd <4 x float> %strided.vec6.2, %44 333 %46 = fmul <4 x float> %broadcast.splat14, %strided.vec12.2 334 %47 = fadd <4 x float> %strided.vec7.2, %46 335 %48 = shufflevector <4 x float> %41, <4 x float> %43, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 336 %49 = shufflevector <4 x float> %45, <4 x float> %47, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 337 %interleaved.vec.2 = shufflevector <8 x float> %48, <8 x float> %49, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> 338 %strided.vec.3 = shufflevector <16 x float> %wide.vec.321, <16 x float> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 339 %strided.vec5.3 = shufflevector <16 x float> %wide.vec.321, <16 x float> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13> 340 %strided.vec6.3 = shufflevector <16 x float> %wide.vec.321, <16 x float> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14> 341 %strided.vec7.3 = shufflevector <16 x float> %wide.vec.321, <16 x float> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15> 342 %50 = getelementptr [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %polly.indvar23, i64 %offset.idx.3 343 %51 = bitcast float* %50 to <16 x float>* 344 %wide.vec8.3 = load <16 x float>, <16 x float>* %51, align 16, !alias.scope !11, !noalias !13 345 %strided.vec9.3 = shufflevector <16 x float> %wide.vec8.3, <16 x float> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 346 %strided.vec10.3 = shufflevector <16 x float> %wide.vec8.3, <16 x float> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13> 347 %strided.vec11.3 = shufflevector <16 x float> %wide.vec8.3, <16 x float> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14> 348 %strided.vec12.3 = shufflevector <16 x float> %wide.vec8.3, <16 x float> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15> 349 %52 = fmul <4 x float> %broadcast.splat14, %strided.vec9.3 350 %53 = fadd <4 x float> %strided.vec.3, %52 351 %54 = fmul <4 x float> %broadcast.splat14, %strided.vec10.3 352 %55 = fadd <4 x float> %strided.vec5.3, %54 353 %56 = fmul <4 x float> %broadcast.splat14, %strided.vec11.3 354 %57 = fadd <4 x float> %strided.vec6.3, %56 355 %58 = fmul <4 x float> %broadcast.splat14, %strided.vec12.3 356 %59 = fadd <4 x float> %strided.vec7.3, %58 357 %60 = shufflevector <4 x float> %53, <4 x float> %55, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 358 %61 = shufflevector <4 x float> %57, <4 x float> %59, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 359 %interleaved.vec.3 = shufflevector <8 x float> %60, <8 x float> %61, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> 360 %polly.indvar_next24 = add nuw nsw i64 %polly.indvar23, 1 361 %exitcond = icmp eq i64 %polly.indvar_next24, %indvars.iv3 362 br i1 %exitcond, label %polly.loop_exit22, label %vector.ph 363} 364 365; Function Attrs: argmemonly nounwind 366declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #5 367 368attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "polly-optimized" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } 369attributes #1 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } 370attributes #2 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } 371attributes #3 = { nounwind } 372attributes #4 = { "polly.skip.fn" } 373attributes #5 = { argmemonly nounwind } 374 375!llvm.module.flags = !{!0} 376!llvm.ident = !{!1} 377 378!0 = !{i32 1, !"wchar_size", i32 4} 379!1 = !{!"clang version 8.0.0 (trunk 342834) (llvm/trunk 342856)"} 380!2 = distinct !{!2, !3, !"polly.alias.scope.MemRef_A"} 381!3 = distinct !{!3, !"polly.alias.scope.domain"} 382!4 = !{!5} 383!5 = distinct !{!5, !3, !"polly.alias.scope.MemRef_B"} 384!6 = !{!2} 385!7 = distinct !{!7, !8, !"polly.alias.scope.MemRef_C"} 386!8 = distinct !{!8, !"polly.alias.scope.domain"} 387!9 = !{!10, !11} 388!10 = distinct !{!10, !8, !"polly.alias.scope.MemRef_A"} 389!11 = distinct !{!11, !8, !"polly.alias.scope.MemRef_B"} 390!12 = !{!7, !11} 391!13 = !{!7, !10} 392