1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64 -hoist-const-loads=false < %s | FileCheck %s 3 4@A = external dso_local local_unnamed_addr global [8 x [8 x i64]], align 8 5@B = external dso_local local_unnamed_addr global [8 x [8 x i64]], align 8 6@C = external dso_local local_unnamed_addr global [8 x [8 x i64]], align 8 7 8define dso_local void @run_test() local_unnamed_addr uwtable { 9; CHECK-LABEL: run_test: 10; CHECK: // %bb.0: // %entry 11; CHECK-NEXT: sub sp, sp, #208 12; CHECK-NEXT: .cfi_def_cfa_offset 208 13; CHECK-NEXT: stp d15, d14, [sp, #96] // 16-byte Folded Spill 14; CHECK-NEXT: stp d13, d12, [sp, #112] // 16-byte Folded Spill 15; CHECK-NEXT: stp d11, d10, [sp, #128] // 16-byte Folded Spill 16; CHECK-NEXT: stp d9, d8, [sp, #144] // 16-byte Folded Spill 17; CHECK-NEXT: str x23, [sp, #160] // 8-byte Folded Spill 18; CHECK-NEXT: stp x22, x21, [sp, #176] // 16-byte Folded Spill 19; CHECK-NEXT: stp x20, x19, [sp, #192] // 16-byte Folded Spill 20; CHECK-NEXT: .cfi_offset w19, -8 21; CHECK-NEXT: .cfi_offset w20, -16 22; CHECK-NEXT: .cfi_offset w21, -24 23; CHECK-NEXT: .cfi_offset w22, -32 24; CHECK-NEXT: .cfi_offset w23, -48 25; CHECK-NEXT: .cfi_offset b8, -56 26; CHECK-NEXT: .cfi_offset b9, -64 27; CHECK-NEXT: .cfi_offset b10, -72 28; CHECK-NEXT: .cfi_offset b11, -80 29; CHECK-NEXT: .cfi_offset b12, -88 30; CHECK-NEXT: .cfi_offset b13, -96 31; CHECK-NEXT: .cfi_offset b14, -104 32; CHECK-NEXT: .cfi_offset b15, -112 33; CHECK-NEXT: movi v2.2d, #0000000000000000 34; CHECK-NEXT: // implicit-def: $q1 35; CHECK-NEXT: mov x8, xzr 36; CHECK-NEXT: adrp x9, B+48 37; CHECK-NEXT: add x9, x9, :lo12:B+48 38; CHECK-NEXT: adrp x10, A 39; CHECK-NEXT: add x10, x10, :lo12:A 40; CHECK-NEXT: mov x11, xzr 41; CHECK-NEXT: // kill: killed $q1 42; CHECK-NEXT: // implicit-def: $q1 43; CHECK-NEXT: mov x12, xzr 44; CHECK-NEXT: // implicit-def: $q0 45; CHECK-NEXT: // implicit-def: $q3 46; CHECK-NEXT: // implicit-def: $q4 47; CHECK-NEXT: // implicit-def: $q5 48; CHECK-NEXT: // implicit-def: $q7 49; CHECK-NEXT: // implicit-def: $q10 50; CHECK-NEXT: // implicit-def: $q17 51; CHECK-NEXT: // implicit-def: $q6 52; CHECK-NEXT: // implicit-def: $q18 53; CHECK-NEXT: // implicit-def: $q19 54; CHECK-NEXT: // implicit-def: $q20 55; CHECK-NEXT: // implicit-def: $q21 56; CHECK-NEXT: // implicit-def: $q22 57; CHECK-NEXT: // implicit-def: $q23 58; CHECK-NEXT: // implicit-def: $q24 59; CHECK-NEXT: // implicit-def: $q9 60; CHECK-NEXT: // implicit-def: $q27 61; CHECK-NEXT: // implicit-def: $q12 62; CHECK-NEXT: // implicit-def: $q28 63; CHECK-NEXT: // implicit-def: $q14 64; CHECK-NEXT: // implicit-def: $q15 65; CHECK-NEXT: // implicit-def: $q29 66; CHECK-NEXT: // implicit-def: $q30 67; CHECK-NEXT: // implicit-def: $q11 68; CHECK-NEXT: // implicit-def: $q31 69; CHECK-NEXT: // implicit-def: $q13 70; CHECK-NEXT: // kill: killed $q1 71; CHECK-NEXT: // implicit-def: $q1 72; CHECK-NEXT: // kill: killed $q1 73; CHECK-NEXT: .LBB0_1: // %for.cond1.preheader 74; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 75; CHECK-NEXT: stp q29, q15, [sp] // 32-byte Folded Spill 76; CHECK-NEXT: ldr q15, [x8] 77; CHECK-NEXT: ldr x15, [x8] 78; CHECK-NEXT: str q14, [sp, #32] // 16-byte Folded Spill 79; CHECK-NEXT: add x20, x10, x11 80; CHECK-NEXT: mov v8.16b, v28.16b 81; CHECK-NEXT: fmov x2, d15 82; CHECK-NEXT: mov x17, v15.d[1] 83; CHECK-NEXT: ldr q14, [x8] 84; CHECK-NEXT: mov v28.16b, v24.16b 85; CHECK-NEXT: mov v24.16b, v20.16b 86; CHECK-NEXT: mov v20.16b, v17.16b 87; CHECK-NEXT: fmov x13, d14 88; CHECK-NEXT: mov x16, v14.d[1] 89; CHECK-NEXT: mov v17.16b, v5.16b 90; CHECK-NEXT: mul x3, x2, x15 91; CHECK-NEXT: ldr q14, [x9], #64 92; CHECK-NEXT: ldr q5, [sp, #64] // 16-byte Folded Reload 93; CHECK-NEXT: ldr x6, [x8] 94; CHECK-NEXT: ldr x20, [x20, #128] 95; CHECK-NEXT: mul x1, x17, x15 96; CHECK-NEXT: mov x14, v14.d[1] 97; CHECK-NEXT: fmov x5, d14 98; CHECK-NEXT: mov v29.16b, v21.16b 99; CHECK-NEXT: mov v21.16b, v0.16b 100; CHECK-NEXT: mov v25.16b, v6.16b 101; CHECK-NEXT: mul x18, x13, x15 102; CHECK-NEXT: mov v6.16b, v2.16b 103; CHECK-NEXT: mov v26.16b, v22.16b 104; CHECK-NEXT: fmov d15, x3 105; CHECK-NEXT: mov v22.16b, v18.16b 106; CHECK-NEXT: mov v18.16b, v7.16b 107; CHECK-NEXT: mul x0, x16, x15 108; CHECK-NEXT: mov v7.16b, v3.16b 109; CHECK-NEXT: mov v16.16b, v4.16b 110; CHECK-NEXT: add x11, x11, #8 111; CHECK-NEXT: add x12, x12, #1 112; CHECK-NEXT: mov v15.d[1], x1 113; CHECK-NEXT: mul x4, x14, x15 114; CHECK-NEXT: cmp x11, #64 115; CHECK-NEXT: fmov d14, x18 116; CHECK-NEXT: mul x15, x5, x15 117; CHECK-NEXT: add v5.2d, v5.2d, v15.2d 118; CHECK-NEXT: mul x21, x2, x6 119; CHECK-NEXT: mov v14.d[1], x0 120; CHECK-NEXT: mul x2, x2, x20 121; CHECK-NEXT: fmov d0, x15 122; CHECK-NEXT: str q5, [sp, #64] // 16-byte Folded Spill 123; CHECK-NEXT: ldr q5, [sp, #48] // 16-byte Folded Reload 124; CHECK-NEXT: mul x22, x13, x20 125; CHECK-NEXT: add v5.2d, v5.2d, v14.2d 126; CHECK-NEXT: fmov d3, x21 127; CHECK-NEXT: mul x19, x17, x6 128; CHECK-NEXT: mov v0.d[1], x4 129; CHECK-NEXT: fmov d1, x2 130; CHECK-NEXT: mul x17, x17, x20 131; CHECK-NEXT: str q5, [sp, #48] // 16-byte Folded Spill 132; CHECK-NEXT: add v5.2d, v13.2d, v14.2d 133; CHECK-NEXT: fmov d2, x22 134; CHECK-NEXT: ldr q13, [sp, #80] // 16-byte Folded Reload 135; CHECK-NEXT: mul x7, x16, x6 136; CHECK-NEXT: ldp q15, q14, [sp, #16] // 32-byte Folded Reload 137; CHECK-NEXT: mov v3.d[1], x19 138; CHECK-NEXT: add v13.2d, v13.2d, v0.2d 139; CHECK-NEXT: mul x16, x16, x20 140; CHECK-NEXT: mov v1.d[1], x17 141; CHECK-NEXT: mul x23, x5, x20 142; CHECK-NEXT: str q13, [sp, #80] // 16-byte Folded Spill 143; CHECK-NEXT: mov v13.16b, v5.16b 144; CHECK-NEXT: mov v5.16b, v17.16b 145; CHECK-NEXT: mov v17.16b, v20.16b 146; CHECK-NEXT: mov v20.16b, v24.16b 147; CHECK-NEXT: mul x13, x13, x6 148; CHECK-NEXT: mov v24.16b, v28.16b 149; CHECK-NEXT: add v11.2d, v11.2d, v3.2d 150; CHECK-NEXT: mov v2.d[1], x16 151; CHECK-NEXT: add v15.2d, v15.2d, v1.2d 152; CHECK-NEXT: add v27.2d, v27.2d, v3.2d 153; CHECK-NEXT: mul x18, x14, x20 154; CHECK-NEXT: add v23.2d, v23.2d, v3.2d 155; CHECK-NEXT: add v19.2d, v19.2d, v3.2d 156; CHECK-NEXT: fmov d4, x23 157; CHECK-NEXT: add v10.2d, v10.2d, v3.2d 158; CHECK-NEXT: mul x15, x5, x6 159; CHECK-NEXT: fmov d0, x13 160; CHECK-NEXT: add v14.2d, v14.2d, v2.2d 161; CHECK-NEXT: add v2.2d, v6.2d, v3.2d 162; CHECK-NEXT: mul x14, x14, x6 163; CHECK-NEXT: mov v3.16b, v7.16b 164; CHECK-NEXT: mov v7.16b, v18.16b 165; CHECK-NEXT: mov v4.d[1], x18 166; CHECK-NEXT: mov v18.16b, v22.16b 167; CHECK-NEXT: mov v0.d[1], x7 168; CHECK-NEXT: fmov d1, x15 169; CHECK-NEXT: add v28.2d, v8.2d, v4.2d 170; CHECK-NEXT: mov v1.d[1], x14 171; CHECK-NEXT: add v31.2d, v31.2d, v0.2d 172; CHECK-NEXT: add v30.2d, v30.2d, v0.2d 173; CHECK-NEXT: add v12.2d, v12.2d, v0.2d 174; CHECK-NEXT: add v24.2d, v24.2d, v0.2d 175; CHECK-NEXT: add v22.2d, v26.2d, v0.2d 176; CHECK-NEXT: add v20.2d, v20.2d, v0.2d 177; CHECK-NEXT: add v18.2d, v18.2d, v0.2d 178; CHECK-NEXT: add v17.2d, v17.2d, v0.2d 179; CHECK-NEXT: add v7.2d, v7.2d, v0.2d 180; CHECK-NEXT: add v4.2d, v16.2d, v0.2d 181; CHECK-NEXT: add v3.2d, v3.2d, v0.2d 182; CHECK-NEXT: mov v0.16b, v21.16b 183; CHECK-NEXT: mov v21.16b, v29.16b 184; CHECK-NEXT: ldr q29, [sp] // 16-byte Folded Reload 185; CHECK-NEXT: add v9.2d, v9.2d, v1.2d 186; CHECK-NEXT: add v6.2d, v25.2d, v1.2d 187; CHECK-NEXT: add v5.2d, v5.2d, v1.2d 188; CHECK-NEXT: add v29.2d, v29.2d, v1.2d 189; CHECK-NEXT: add v21.2d, v21.2d, v1.2d 190; CHECK-NEXT: add v0.2d, v0.2d, v1.2d 191; CHECK-NEXT: b.ne .LBB0_1 192; CHECK-NEXT: // %bb.2: // %for.cond.cleanup 193; CHECK-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload 194; CHECK-NEXT: adrp x8, C 195; CHECK-NEXT: add x8, x8, :lo12:C 196; CHECK-NEXT: stp q11, q30, [x8, #80] 197; CHECK-NEXT: ldp x20, x19, [sp, #192] // 16-byte Folded Reload 198; CHECK-NEXT: str q1, [x8] 199; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload 200; CHECK-NEXT: ldr x23, [sp, #160] // 8-byte Folded Reload 201; CHECK-NEXT: stp q15, q14, [x8, #144] 202; CHECK-NEXT: ldp x22, x21, [sp, #176] // 16-byte Folded Reload 203; CHECK-NEXT: stp q1, q13, [x8, #16] 204; CHECK-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload 205; CHECK-NEXT: stp q28, q12, [x8, #176] 206; CHECK-NEXT: ldp d13, d12, [sp, #112] // 16-byte Folded Reload 207; CHECK-NEXT: stp q1, q31, [x8, #48] 208; CHECK-NEXT: ldp d15, d14, [sp, #96] // 16-byte Folded Reload 209; CHECK-NEXT: stp q9, q24, [x8, #240] 210; CHECK-NEXT: ldp d9, d8, [sp, #144] // 16-byte Folded Reload 211; CHECK-NEXT: stp q19, q18, [x8, #336] 212; CHECK-NEXT: stp q10, q7, [x8, #400] 213; CHECK-NEXT: ldp d11, d10, [sp, #128] // 16-byte Folded Reload 214; CHECK-NEXT: str q29, [x8, #112] 215; CHECK-NEXT: str q27, [x8, #208] 216; CHECK-NEXT: stp q23, q22, [x8, #272] 217; CHECK-NEXT: stp q21, q20, [x8, #304] 218; CHECK-NEXT: stp q6, q17, [x8, #368] 219; CHECK-NEXT: stp q5, q4, [x8, #432] 220; CHECK-NEXT: stp q2, q3, [x8, #464] 221; CHECK-NEXT: str q0, [x8, #496] 222; CHECK-NEXT: add sp, sp, #208 223; CHECK-NEXT: .cfi_def_cfa_offset 0 224; CHECK-NEXT: .cfi_restore w19 225; CHECK-NEXT: .cfi_restore w20 226; CHECK-NEXT: .cfi_restore w21 227; CHECK-NEXT: .cfi_restore w22 228; CHECK-NEXT: .cfi_restore w23 229; CHECK-NEXT: .cfi_restore b8 230; CHECK-NEXT: .cfi_restore b9 231; CHECK-NEXT: .cfi_restore b10 232; CHECK-NEXT: .cfi_restore b11 233; CHECK-NEXT: .cfi_restore b12 234; CHECK-NEXT: .cfi_restore b13 235; CHECK-NEXT: .cfi_restore b14 236; CHECK-NEXT: .cfi_restore b15 237; CHECK-NEXT: ret 238; CH`ECK-NEXT: .cfi_offset b9, -16 239entry: 240 br label %for.cond1.preheader 241 242for.cond1.preheader: ; preds = %for.cond1.preheader, %entry 243 %0 = phi <2 x i64> [ undef, %entry ], [ %118, %for.cond1.preheader ] 244 %1 = phi <2 x i64> [ undef, %entry ], [ %116, %for.cond1.preheader ] 245 %2 = phi <2 x i64> [ zeroinitializer, %entry ], [ %114, %for.cond1.preheader ] 246 %3 = phi <2 x i64> [ undef, %entry ], [ %112, %for.cond1.preheader ] 247 %4 = phi <2 x i64> [ undef, %entry ], [ %107, %for.cond1.preheader ] 248 %5 = phi <2 x i64> [ undef, %entry ], [ %105, %for.cond1.preheader ] 249 %6 = phi <2 x i64> [ undef, %entry ], [ %103, %for.cond1.preheader ] 250 %7 = phi <2 x i64> [ undef, %entry ], [ %101, %for.cond1.preheader ] 251 %8 = phi <2 x i64> [ undef, %entry ], [ %96, %for.cond1.preheader ] 252 %9 = phi <2 x i64> [ undef, %entry ], [ %94, %for.cond1.preheader ] 253 %10 = phi <2 x i64> [ undef, %entry ], [ %92, %for.cond1.preheader ] 254 %11 = phi <2 x i64> [ undef, %entry ], [ %90, %for.cond1.preheader ] 255 %12 = phi <2 x i64> [ undef, %entry ], [ %85, %for.cond1.preheader ] 256 %13 = phi <2 x i64> [ undef, %entry ], [ %83, %for.cond1.preheader ] 257 %14 = phi <2 x i64> [ undef, %entry ], [ %81, %for.cond1.preheader ] 258 %15 = phi <2 x i64> [ undef, %entry ], [ %79, %for.cond1.preheader ] 259 %16 = phi <2 x i64> [ undef, %entry ], [ %74, %for.cond1.preheader ] 260 %17 = phi <2 x i64> [ undef, %entry ], [ %72, %for.cond1.preheader ] 261 %18 = phi <2 x i64> [ undef, %entry ], [ %70, %for.cond1.preheader ] 262 %19 = phi <2 x i64> [ undef, %entry ], [ %65, %for.cond1.preheader ] 263 %20 = phi <2 x i64> [ undef, %entry ], [ %63, %for.cond1.preheader ] 264 %21 = phi <2 x i64> [ undef, %entry ], [ %61, %for.cond1.preheader ] 265 %22 = phi <2 x i64> [ undef, %entry ], [ %56, %for.cond1.preheader ] 266 %23 = phi <2 x i64> [ undef, %entry ], [ %54, %for.cond1.preheader ] 267 %24 = phi <2 x i64> [ undef, %entry ], [ %52, %for.cond1.preheader ] 268 %25 = phi <2 x i64> [ undef, %entry ], [ %50, %for.cond1.preheader ] 269 %26 = phi <2 x i64> [ undef, %entry ], [ %45, %for.cond1.preheader ] 270 %27 = phi <2 x i64> [ undef, %entry ], [ %43, %for.cond1.preheader ] 271 %28 = phi <2 x i64> [ undef, %entry ], [ %41, %for.cond1.preheader ] 272 %29 = phi <2 x i64> [ undef, %entry ], [ %39, %for.cond1.preheader ] 273 %indvars.iv40 = phi i64 [ 0, %entry ], [ %indvars.iv.next41, %for.cond1.preheader ] 274 %30 = load <2 x i64>, ptr null, align 8 275 %31 = load <2 x i64>, ptr undef, align 8 276 %arrayidx14.4.phi.trans.insert = getelementptr inbounds [8 x [8 x i64]], ptr @B, i64 0, i64 %indvars.iv40, i64 4 277 %32 = load <2 x i64>, ptr null, align 8 278 %arrayidx14.6.phi.trans.insert = getelementptr inbounds [8 x [8 x i64]], ptr @B, i64 0, i64 %indvars.iv40, i64 6 279 %33 = bitcast ptr %arrayidx14.6.phi.trans.insert to ptr 280 %34 = load <2 x i64>, ptr %33, align 8 281 %35 = load i64, ptr null, align 8 282 %36 = insertelement <2 x i64> undef, i64 %35, i32 0 283 %37 = shufflevector <2 x i64> %36, <2 x i64> undef, <2 x i32> zeroinitializer 284 %38 = mul nsw <2 x i64> %30, %37 285 %39 = add nsw <2 x i64> %29, %38 286 %40 = mul nsw <2 x i64> %31, %37 287 %41 = add nsw <2 x i64> %28, %40 288 %42 = mul nsw <2 x i64> %32, %37 289 %43 = add nsw <2 x i64> %27, %42 290 %44 = mul nsw <2 x i64> %34, %37 291 %45 = add nsw <2 x i64> %26, %44 292 %46 = load i64, ptr undef, align 8 293 %47 = insertelement <2 x i64> undef, i64 %46, i32 0 294 %48 = shufflevector <2 x i64> %47, <2 x i64> undef, <2 x i32> zeroinitializer 295 %49 = mul nsw <2 x i64> %30, %48 296 %50 = add nsw <2 x i64> %25, %49 297 %51 = mul nsw <2 x i64> %31, %48 298 %52 = add nsw <2 x i64> %24, %51 299 %53 = mul nsw <2 x i64> %32, %48 300 %54 = add nsw <2 x i64> %23, %53 301 %55 = mul nsw <2 x i64> %34, %48 302 %56 = add nsw <2 x i64> %22, %55 303 %arrayidx10.2 = getelementptr inbounds [8 x [8 x i64]], ptr @A, i64 0, i64 2, i64 %indvars.iv40 304 %57 = load i64, ptr %arrayidx10.2, align 8 305 %58 = insertelement <2 x i64> undef, i64 %57, i32 0 306 %59 = shufflevector <2 x i64> %58, <2 x i64> undef, <2 x i32> zeroinitializer 307 %60 = mul nsw <2 x i64> %31, %59 308 %61 = add nsw <2 x i64> %21, %60 309 %62 = mul nsw <2 x i64> %32, %59 310 %63 = add nsw <2 x i64> %20, %62 311 %64 = mul nsw <2 x i64> %34, %59 312 %65 = add nsw <2 x i64> %19, %64 313 %66 = load i64, ptr undef, align 8 314 %67 = insertelement <2 x i64> undef, i64 %66, i32 0 315 %68 = shufflevector <2 x i64> %67, <2 x i64> undef, <2 x i32> zeroinitializer 316 %69 = mul nsw <2 x i64> %30, %68 317 %70 = add nsw <2 x i64> %18, %69 318 %71 = mul nsw <2 x i64> %31, %68 319 %72 = add nsw <2 x i64> %17, %71 320 %73 = mul nsw <2 x i64> %34, %68 321 %74 = add nsw <2 x i64> %16, %73 322 %75 = load i64, ptr undef, align 8 323 %76 = insertelement <2 x i64> undef, i64 %75, i32 0 324 %77 = shufflevector <2 x i64> %76, <2 x i64> undef, <2 x i32> zeroinitializer 325 %78 = mul nsw <2 x i64> %30, %77 326 %79 = add nsw <2 x i64> %15, %78 327 %80 = mul nsw <2 x i64> %31, %77 328 %81 = add nsw <2 x i64> %14, %80 329 %82 = mul nsw <2 x i64> %32, %77 330 %83 = add nsw <2 x i64> %13, %82 331 %84 = mul nsw <2 x i64> %34, %77 332 %85 = add nsw <2 x i64> %12, %84 333 %86 = load i64, ptr undef, align 8 334 %87 = insertelement <2 x i64> undef, i64 %86, i32 0 335 %88 = shufflevector <2 x i64> %87, <2 x i64> undef, <2 x i32> zeroinitializer 336 %89 = mul nsw <2 x i64> %30, %88 337 %90 = add nsw <2 x i64> %11, %89 338 %91 = mul nsw <2 x i64> %31, %88 339 %92 = add nsw <2 x i64> %10, %91 340 %93 = mul nsw <2 x i64> %32, %88 341 %94 = add nsw <2 x i64> %9, %93 342 %95 = mul nsw <2 x i64> %34, %88 343 %96 = add nsw <2 x i64> %8, %95 344 %97 = load i64, ptr undef, align 8 345 %98 = insertelement <2 x i64> undef, i64 %97, i32 0 346 %99 = shufflevector <2 x i64> %98, <2 x i64> undef, <2 x i32> zeroinitializer 347 %100 = mul nsw <2 x i64> %30, %99 348 %101 = add nsw <2 x i64> %7, %100 349 %102 = mul nsw <2 x i64> %31, %99 350 %103 = add nsw <2 x i64> %6, %102 351 %104 = mul nsw <2 x i64> %32, %99 352 %105 = add nsw <2 x i64> %5, %104 353 %106 = mul nsw <2 x i64> %34, %99 354 %107 = add nsw <2 x i64> %4, %106 355 %108 = load i64, ptr undef, align 8 356 %109 = insertelement <2 x i64> undef, i64 %108, i32 0 357 %110 = shufflevector <2 x i64> %109, <2 x i64> undef, <2 x i32> zeroinitializer 358 %111 = mul nsw <2 x i64> %30, %110 359 %112 = add nsw <2 x i64> %3, %111 360 %113 = mul nsw <2 x i64> %31, %110 361 %114 = add nsw <2 x i64> %2, %113 362 %115 = mul nsw <2 x i64> %32, %110 363 %116 = add nsw <2 x i64> %1, %115 364 %117 = mul nsw <2 x i64> %34, %110 365 %118 = add nsw <2 x i64> %0, %117 366 %indvars.iv.next41 = add nuw nsw i64 %indvars.iv40, 1 367 %exitcond42 = icmp eq i64 %indvars.iv.next41, 8 368 br i1 %exitcond42, label %for.cond.cleanup, label %for.cond1.preheader 369 370for.cond.cleanup: ; preds = %for.cond1.preheader 371 store <2 x i64> %39, ptr @C, align 8 372 store <2 x i64> %41, ptr getelementptr inbounds ([8 x [8 x i64]], ptr @C, i64 0, i64 0, i64 2), align 8 373 store <2 x i64> %43, ptr getelementptr inbounds ([8 x [8 x i64]], ptr @C, i64 0, i64 0, i64 4), align 8 374 store <2 x i64> %45, ptr getelementptr inbounds ([8 x [8 x i64]], ptr @C, i64 0, i64 0, i64 6), align 8 375 store <2 x i64> %50, ptr getelementptr inbounds ([8 x [8 x i64]], ptr @C, i64 0, i64 1, i64 0), align 8 376 store <2 x i64> %52, ptr getelementptr inbounds ([8 x [8 x i64]], ptr @C, i64 0, i64 1, i64 2), align 8 377 store <2 x i64> %54, ptr getelementptr inbounds ([8 x [8 x i64]], ptr @C, i64 0, i64 1, i64 4), align 8 378 store <2 x i64> %56, ptr getelementptr inbounds ([8 x [8 x i64]], ptr @C, i64 0, i64 1, i64 6), align 8 379 store <2 x i64> %61, ptr getelementptr inbounds ([8 x [8 x i64]], ptr @C, i64 0, i64 2, i64 2), align 8 380 store <2 x i64> %63, ptr getelementptr inbounds ([8 x [8 x i64]], ptr @C, i64 0, i64 2, i64 4), align 8 381 store <2 x i64> %65, ptr getelementptr inbounds ([8 x [8 x i64]], ptr @C, i64 0, i64 2, i64 6), align 8 382 store <2 x i64> %70, ptr getelementptr inbounds ([8 x [8 x i64]], ptr @C, i64 0, i64 3, i64 0), align 8 383 store <2 x i64> %72, ptr getelementptr inbounds ([8 x [8 x i64]], ptr @C, i64 0, i64 3, i64 2), align 8 384 store <2 x i64> %74, ptr getelementptr inbounds ([8 x [8 x i64]], ptr @C, i64 0, i64 3, i64 6), align 8 385 store <2 x i64> %79, ptr getelementptr inbounds ([8 x [8 x i64]], ptr @C, i64 0, i64 4, i64 0), align 8 386 store <2 x i64> %81, ptr getelementptr inbounds ([8 x [8 x i64]], ptr @C, i64 0, i64 4, i64 2), align 8 387 store <2 x i64> %83, ptr getelementptr inbounds ([8 x [8 x i64]], ptr @C, i64 0, i64 4, i64 4), align 8 388 store <2 x i64> %85, ptr getelementptr inbounds ([8 x [8 x i64]], ptr @C, i64 0, i64 4, i64 6), align 8 389 store <2 x i64> %90, ptr getelementptr inbounds ([8 x [8 x i64]], ptr @C, i64 0, i64 5, i64 0), align 8 390 store <2 x i64> %92, ptr getelementptr inbounds ([8 x [8 x i64]], ptr @C, i64 0, i64 5, i64 2), align 8 391 store <2 x i64> %94, ptr getelementptr inbounds ([8 x [8 x i64]], ptr @C, i64 0, i64 5, i64 4), align 8 392 store <2 x i64> %96, ptr getelementptr inbounds ([8 x [8 x i64]], ptr @C, i64 0, i64 5, i64 6), align 8 393 store <2 x i64> %101, ptr getelementptr inbounds ([8 x [8 x i64]], ptr @C, i64 0, i64 6, i64 0), align 8 394 store <2 x i64> %103, ptr getelementptr inbounds ([8 x [8 x i64]], ptr @C, i64 0, i64 6, i64 2), align 8 395 store <2 x i64> %105, ptr getelementptr inbounds ([8 x [8 x i64]], ptr @C, i64 0, i64 6, i64 4), align 8 396 store <2 x i64> %107, ptr getelementptr inbounds ([8 x [8 x i64]], ptr @C, i64 0, i64 6, i64 6), align 8 397 store <2 x i64> %112, ptr getelementptr inbounds ([8 x [8 x i64]], ptr @C, i64 0, i64 7, i64 0), align 8 398 store <2 x i64> %114, ptr getelementptr inbounds ([8 x [8 x i64]], ptr @C, i64 0, i64 7, i64 2), align 8 399 store <2 x i64> %116, ptr getelementptr inbounds ([8 x [8 x i64]], ptr @C, i64 0, i64 7, i64 4), align 8 400 store <2 x i64> %118, ptr getelementptr inbounds ([8 x [8 x i64]], ptr @C, i64 0, i64 7, i64 6), align 8 401 ret void 402} 403