1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s --mattr=+sve -o - | FileCheck %s 3 4target triple = "aarch64" 5 6%"class.std::complex" = type { { double, double } } 7 8; Zero initialized reduction. The IR is generated with predicated tail folding (-prefer-predicate-over-epilogue=predicate-dont-vectorize) 9; 10; complex<double> x = 0.0 + 0.0i; 11; for (int i = 0; i < 100; ++i) 12; x += a[i] * b[i]; 13; 14define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) { 15; CHECK-LABEL: complex_mul_v2f64: 16; CHECK: // %bb.0: // %entry 17; CHECK-NEXT: mov z1.d, #0 // =0x0 18; CHECK-NEXT: mov w8, #100 // =0x64 19; CHECK-NEXT: cntd x9 20; CHECK-NEXT: whilelo p1.d, xzr, x8 21; CHECK-NEXT: rdvl x10, #2 22; CHECK-NEXT: mov x11, x9 23; CHECK-NEXT: ptrue p0.d 24; CHECK-NEXT: zip2 z0.d, z1.d, z1.d 25; CHECK-NEXT: zip1 z1.d, z1.d, z1.d 26; CHECK-NEXT: .LBB0_1: // %vector.body 27; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 28; CHECK-NEXT: zip2 p2.d, p1.d, p1.d 29; CHECK-NEXT: mov z6.d, z1.d 30; CHECK-NEXT: mov z7.d, z0.d 31; CHECK-NEXT: zip1 p1.d, p1.d, p1.d 32; CHECK-NEXT: ld1d { z2.d }, p2/z, [x0, #1, mul vl] 33; CHECK-NEXT: ld1d { z4.d }, p2/z, [x1, #1, mul vl] 34; CHECK-NEXT: ld1d { z3.d }, p1/z, [x0] 35; CHECK-NEXT: ld1d { z5.d }, p1/z, [x1] 36; CHECK-NEXT: add x1, x1, x10 37; CHECK-NEXT: add x0, x0, x10 38; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #0 39; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0 40; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90 41; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90 42; CHECK-NEXT: mov z0.d, p2/m, z7.d 43; CHECK-NEXT: mov z1.d, p1/m, z6.d 44; CHECK-NEXT: whilelo p1.d, x11, x8 45; CHECK-NEXT: add x11, x11, x9 46; CHECK-NEXT: b.mi .LBB0_1 47; CHECK-NEXT: // %bb.2: // %exit.block 48; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d 49; CHECK-NEXT: uzp2 z1.d, z1.d, z0.d 50; CHECK-NEXT: faddv d0, p0, z2.d 51; CHECK-NEXT: faddv d1, p0, z1.d 52; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 53; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z1 54; CHECK-NEXT: ret 55entry: 56 %active.lane.mask.entry = tail call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 100) 57 %0 = tail call i64 @llvm.vscale.i64() 58 %1 = shl i64 %0, 1 59 %2 = shl nuw nsw i64 %0, 5 60 br label %vector.body 61 62vector.body: ; preds = %vector.body, %entry 63 %lsr.iv35 = phi i64 [ %lsr.iv.next36, %vector.body ], [ %1, %entry ] 64 %lsr.iv = phi i64 [ %lsr.iv.next, %vector.body ], [ 0, %entry ] 65 %active.lane.mask = phi <vscale x 2 x i1> [ %active.lane.mask.entry, %entry ], [ %active.lane.mask.next, %vector.body ] 66 %vec.phi = phi <vscale x 2 x double> [ zeroinitializer, %entry ], [ %15, %vector.body ] 67 %vec.phi27 = phi <vscale x 2 x double> [ zeroinitializer, %entry ], [ %16, %vector.body ] 68 %scevgep = getelementptr i8, ptr %a, i64 %lsr.iv 69 %scevgep34 = getelementptr i8, ptr %b, i64 %lsr.iv 70 %interleaved.mask = tail call <vscale x 4 x i1> @llvm.vector.interleave2.nxv4i1(<vscale x 2 x i1> %active.lane.mask, <vscale x 2 x i1> %active.lane.mask) 71 %wide.masked.vec = tail call <vscale x 4 x double> @llvm.masked.load.nxv4f64.p0(ptr %scevgep, i32 8, <vscale x 4 x i1> %interleaved.mask, <vscale x 4 x double> poison) 72 %strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %wide.masked.vec) 73 %3 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 0 74 %4 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 1 75 %interleaved.mask28 = tail call <vscale x 4 x i1> @llvm.vector.interleave2.nxv4i1(<vscale x 2 x i1> %active.lane.mask, <vscale x 2 x i1> %active.lane.mask) 76 %wide.masked.vec29 = tail call <vscale x 4 x double> @llvm.masked.load.nxv4f64.p0(ptr %scevgep34, i32 8, <vscale x 4 x i1> %interleaved.mask28, <vscale x 4 x double> poison) 77 %strided.vec30 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %wide.masked.vec29) 78 %5 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec30, 0 79 %6 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec30, 1 80 %7 = fmul fast <vscale x 2 x double> %6, %3 81 %8 = fmul fast <vscale x 2 x double> %5, %4 82 %9 = fmul fast <vscale x 2 x double> %5, %3 83 %10 = fadd fast <vscale x 2 x double> %9, %vec.phi27 84 %11 = fmul fast <vscale x 2 x double> %6, %4 85 %12 = fsub fast <vscale x 2 x double> %10, %11 86 %13 = fadd fast <vscale x 2 x double> %8, %vec.phi 87 %14 = fadd fast <vscale x 2 x double> %13, %7 88 %15 = select fast <vscale x 2 x i1> %active.lane.mask, <vscale x 2 x double> %14, <vscale x 2 x double> %vec.phi 89 %16 = select fast <vscale x 2 x i1> %active.lane.mask, <vscale x 2 x double> %12, <vscale x 2 x double> %vec.phi27 90 %active.lane.mask.next = tail call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 %lsr.iv35, i64 100) 91 %17 = extractelement <vscale x 2 x i1> %active.lane.mask.next, i64 0 92 %lsr.iv.next = add i64 %lsr.iv, %2 93 %lsr.iv.next36 = add i64 %lsr.iv35, %1 94 br i1 %17, label %vector.body, label %exit.block 95 96exit.block: ; preds = %vector.body 97 %18 = tail call fast double @llvm.vector.reduce.fadd.nxv2f64(double -0.000000e+00, <vscale x 2 x double> %16) 98 %19 = tail call fast double @llvm.vector.reduce.fadd.nxv2f64(double -0.000000e+00, <vscale x 2 x double> %15) 99 %.fca.0.0.insert = insertvalue %"class.std::complex" poison, double %18, 0, 0 100 %.fca.0.1.insert = insertvalue %"class.std::complex" %.fca.0.0.insert, double %19, 0, 1 101 ret %"class.std::complex" %.fca.0.1.insert 102} 103 104; Zero initialized reduction with conditional block. The IR is generated with scalar tail folding (-prefer-predicate-over-epilogue=scalar-epilogue) 105; 106; complex<double> x = 0.0 + 0.0i; 107; for (int i = 0; i < 100; ++i) 108; if (cond[i]) 109; x += a[i] * b[i]; 110; 111define %"class.std::complex" @complex_mul_predicated_v2f64(ptr %a, ptr %b, ptr %cond) { 112; CHECK-LABEL: complex_mul_predicated_v2f64: 113; CHECK: // %bb.0: // %entry 114; CHECK-NEXT: mov z1.d, #0 // =0x0 115; CHECK-NEXT: cntd x9 116; CHECK-NEXT: mov w11, #100 // =0x64 117; CHECK-NEXT: neg x10, x9 118; CHECK-NEXT: ptrue p0.d 119; CHECK-NEXT: mov x8, xzr 120; CHECK-NEXT: and x10, x10, x11 121; CHECK-NEXT: rdvl x11, #2 122; CHECK-NEXT: zip2 z0.d, z1.d, z1.d 123; CHECK-NEXT: zip1 z1.d, z1.d, z1.d 124; CHECK-NEXT: .LBB1_1: // %vector.body 125; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 126; CHECK-NEXT: ld1w { z2.d }, p0/z, [x2, x8, lsl #2] 127; CHECK-NEXT: mov z6.d, z1.d 128; CHECK-NEXT: mov z7.d, z0.d 129; CHECK-NEXT: add x8, x8, x9 130; CHECK-NEXT: cmpne p1.d, p0/z, z2.d, #0 131; CHECK-NEXT: cmp x10, x8 132; CHECK-NEXT: zip2 p2.d, p1.d, p1.d 133; CHECK-NEXT: zip1 p1.d, p1.d, p1.d 134; CHECK-NEXT: ld1d { z2.d }, p2/z, [x0, #1, mul vl] 135; CHECK-NEXT: ld1d { z4.d }, p2/z, [x1, #1, mul vl] 136; CHECK-NEXT: ld1d { z3.d }, p1/z, [x0] 137; CHECK-NEXT: ld1d { z5.d }, p1/z, [x1] 138; CHECK-NEXT: add x1, x1, x11 139; CHECK-NEXT: add x0, x0, x11 140; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #0 141; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0 142; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90 143; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90 144; CHECK-NEXT: mov z0.d, p2/m, z7.d 145; CHECK-NEXT: mov z1.d, p1/m, z6.d 146; CHECK-NEXT: b.ne .LBB1_1 147; CHECK-NEXT: // %bb.2: // %exit.block 148; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d 149; CHECK-NEXT: uzp2 z1.d, z1.d, z0.d 150; CHECK-NEXT: faddv d0, p0, z2.d 151; CHECK-NEXT: faddv d1, p0, z1.d 152; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 153; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z1 154; CHECK-NEXT: ret 155entry: 156 %0 = tail call i64 @llvm.vscale.i64() 157 %1 = shl nuw nsw i64 %0, 1 158 %n.mod.vf = urem i64 100, %1 159 %n.vec = sub i64 100, %n.mod.vf 160 %2 = shl nuw nsw i64 %0, 5 161 br label %vector.body 162 163vector.body: ; preds = %vector.body, %entry 164 %lsr.iv48 = phi i64 [ %lsr.iv.next, %vector.body ], [ 0, %entry ] 165 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 166 %vec.phi = phi <vscale x 2 x double> [ zeroinitializer, %entry ], [ %predphi34, %vector.body ] 167 %vec.phi30 = phi <vscale x 2 x double> [ zeroinitializer, %entry ], [ %predphi, %vector.body ] 168 %3 = shl i64 %index, 2 169 %scevgep47 = getelementptr i8, ptr %cond, i64 %3 170 %wide.load = load <vscale x 2 x i32>, ptr %scevgep47, align 4 171 %4 = icmp ne <vscale x 2 x i32> %wide.load, zeroinitializer 172 %scevgep49 = getelementptr i8, ptr %a, i64 %lsr.iv48 173 %scevgep50 = getelementptr i8, ptr %b, i64 %lsr.iv48 174 %interleaved.mask = tail call <vscale x 4 x i1> @llvm.vector.interleave2.nxv4i1(<vscale x 2 x i1> %4, <vscale x 2 x i1> %4) 175 %wide.masked.vec = tail call <vscale x 4 x double> @llvm.masked.load.nxv4f64.p0(ptr %scevgep49, i32 8, <vscale x 4 x i1> %interleaved.mask, <vscale x 4 x double> poison) 176 %strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %wide.masked.vec) 177 %5 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 0 178 %6 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 1 179 %wide.masked.vec32 = tail call <vscale x 4 x double> @llvm.masked.load.nxv4f64.p0(ptr %scevgep50, i32 8, <vscale x 4 x i1> %interleaved.mask, <vscale x 4 x double> poison) 180 %strided.vec33 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %wide.masked.vec32) 181 %7 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec33, 0 182 %8 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec33, 1 183 %9 = fmul fast <vscale x 2 x double> %8, %5 184 %10 = fmul fast <vscale x 2 x double> %7, %6 185 %11 = fmul fast <vscale x 2 x double> %7, %5 186 %12 = fadd fast <vscale x 2 x double> %11, %vec.phi30 187 %13 = fmul fast <vscale x 2 x double> %8, %6 188 %14 = fsub fast <vscale x 2 x double> %12, %13 189 %15 = fadd fast <vscale x 2 x double> %10, %vec.phi 190 %16 = fadd fast <vscale x 2 x double> %15, %9 191 %predphi = select <vscale x 2 x i1> %4, <vscale x 2 x double> %14, <vscale x 2 x double> %vec.phi30 192 %predphi34 = select <vscale x 2 x i1> %4, <vscale x 2 x double> %16, <vscale x 2 x double> %vec.phi 193 %index.next = add nuw i64 %index, %1 194 %lsr.iv.next = add i64 %lsr.iv48, %2 195 %17 = icmp eq i64 %n.vec, %index.next 196 br i1 %17, label %exit.block, label %vector.body 197 198exit.block: ; preds = %vector.body 199 %18 = tail call fast double @llvm.vector.reduce.fadd.nxv2f64(double -0.000000e+00, <vscale x 2 x double> %predphi) 200 %19 = tail call fast double @llvm.vector.reduce.fadd.nxv2f64(double -0.000000e+00, <vscale x 2 x double> %predphi34) 201 %.fca.0.0.insert = insertvalue %"class.std::complex" poison, double %18, 0, 0 202 %.fca.0.1.insert = insertvalue %"class.std::complex" %.fca.0.0.insert, double %19, 0, 1 203 ret %"class.std::complex" %.fca.0.1.insert 204} 205 206; Zero initialized reduction with conditional block. The IR is generated with scalar tail folding (-predicate-over-epilogue=predicate-dont-vectorize) 207; 208; complex<double> x = 0.0 + 0.0i; 209; for (int i = 0; i < 100; ++i) 210; if (cond[i]) 211; x += a[i] * b[i]; 212; 213define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, ptr %cond) { 214; CHECK-LABEL: complex_mul_predicated_x2_v2f64: 215; CHECK: // %bb.0: // %entry 216; CHECK-NEXT: mov z1.d, #0 // =0x0 217; CHECK-NEXT: mov w8, #100 // =0x64 218; CHECK-NEXT: cntd x9 219; CHECK-NEXT: whilelo p1.d, xzr, x8 220; CHECK-NEXT: rdvl x10, #2 221; CHECK-NEXT: cnth x11 222; CHECK-NEXT: ptrue p0.d 223; CHECK-NEXT: mov x12, x9 224; CHECK-NEXT: zip2 z0.d, z1.d, z1.d 225; CHECK-NEXT: zip1 z1.d, z1.d, z1.d 226; CHECK-NEXT: .LBB2_1: // %vector.body 227; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 228; CHECK-NEXT: ld1w { z2.d }, p1/z, [x2] 229; CHECK-NEXT: mov z6.d, z1.d 230; CHECK-NEXT: mov z7.d, z0.d 231; CHECK-NEXT: add x2, x2, x11 232; CHECK-NEXT: and z2.d, z2.d, #0xffffffff 233; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0 234; CHECK-NEXT: zip2 p2.d, p1.d, p1.d 235; CHECK-NEXT: zip1 p1.d, p1.d, p1.d 236; CHECK-NEXT: ld1d { z2.d }, p2/z, [x0, #1, mul vl] 237; CHECK-NEXT: ld1d { z4.d }, p2/z, [x1, #1, mul vl] 238; CHECK-NEXT: ld1d { z3.d }, p1/z, [x0] 239; CHECK-NEXT: ld1d { z5.d }, p1/z, [x1] 240; CHECK-NEXT: add x1, x1, x10 241; CHECK-NEXT: add x0, x0, x10 242; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #0 243; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0 244; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90 245; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90 246; CHECK-NEXT: mov z0.d, p2/m, z7.d 247; CHECK-NEXT: mov z1.d, p1/m, z6.d 248; CHECK-NEXT: whilelo p1.d, x12, x8 249; CHECK-NEXT: add x12, x12, x9 250; CHECK-NEXT: b.mi .LBB2_1 251; CHECK-NEXT: // %bb.2: // %exit.block 252; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d 253; CHECK-NEXT: uzp2 z1.d, z1.d, z0.d 254; CHECK-NEXT: faddv d0, p0, z2.d 255; CHECK-NEXT: faddv d1, p0, z1.d 256; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 257; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z1 258; CHECK-NEXT: ret 259entry: 260 %active.lane.mask.entry = tail call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 100) 261 %0 = tail call i64 @llvm.vscale.i64() 262 %1 = shl i64 %0, 1 263 %2 = shl nuw nsw i64 %0, 5 264 br label %vector.body 265 266vector.body: ; preds = %vector.body, %entry 267 %lsr.iv = phi i64 [ %lsr.iv.next, %vector.body ], [ 0, %entry ] 268 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 269 %active.lane.mask = phi <vscale x 2 x i1> [ %active.lane.mask.entry, %entry ], [ %active.lane.mask.next, %vector.body ] 270 %vec.phi = phi <vscale x 2 x double> [ zeroinitializer, %entry ], [ %19, %vector.body ] 271 %vec.phi30 = phi <vscale x 2 x double> [ zeroinitializer, %entry ], [ %21, %vector.body ] 272 %3 = shl i64 %index, 2 273 %scevgep = getelementptr i8, ptr %cond, i64 %3 274 %wide.masked.load = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr %scevgep, i32 4, <vscale x 2 x i1> %active.lane.mask, <vscale x 2 x i32> poison) 275 %4 = icmp ne <vscale x 2 x i32> %wide.masked.load, zeroinitializer 276 %scevgep38 = getelementptr i8, ptr %a, i64 %lsr.iv 277 %scevgep39 = getelementptr i8, ptr %b, i64 %lsr.iv 278 %5 = select <vscale x 2 x i1> %active.lane.mask, <vscale x 2 x i1> %4, <vscale x 2 x i1> zeroinitializer 279 %interleaved.mask = tail call <vscale x 4 x i1> @llvm.vector.interleave2.nxv4i1(<vscale x 2 x i1> %5, <vscale x 2 x i1> %5) 280 %wide.masked.vec = tail call <vscale x 4 x double> @llvm.masked.load.nxv4f64.p0(ptr %scevgep38, i32 8, <vscale x 4 x i1> %interleaved.mask, <vscale x 4 x double> poison) 281 %strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %wide.masked.vec) 282 %6 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 0 283 %7 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 1 284 %interleaved.mask31 = tail call <vscale x 4 x i1> @llvm.vector.interleave2.nxv4i1(<vscale x 2 x i1> %5, <vscale x 2 x i1> %5) 285 %wide.masked.vec32 = tail call <vscale x 4 x double> @llvm.masked.load.nxv4f64.p0(ptr %scevgep39, i32 8, <vscale x 4 x i1> %interleaved.mask31, <vscale x 4 x double> poison) 286 %strided.vec33 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %wide.masked.vec32) 287 %8 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec33, 0 288 %9 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec33, 1 289 %10 = fmul fast <vscale x 2 x double> %9, %6 290 %11 = fmul fast <vscale x 2 x double> %8, %7 291 %12 = fmul fast <vscale x 2 x double> %8, %6 292 %13 = fadd fast <vscale x 2 x double> %12, %vec.phi30 293 %14 = fmul fast <vscale x 2 x double> %9, %7 294 %15 = fsub fast <vscale x 2 x double> %13, %14 295 %16 = fadd fast <vscale x 2 x double> %11, %vec.phi 296 %17 = fadd fast <vscale x 2 x double> %16, %10 297 %18 = select <vscale x 2 x i1> %active.lane.mask, <vscale x 2 x i1> %4, <vscale x 2 x i1> zeroinitializer 298 %19 = select fast <vscale x 2 x i1> %18, <vscale x 2 x double> %17, <vscale x 2 x double> %vec.phi 299 %20 = select <vscale x 2 x i1> %active.lane.mask, <vscale x 2 x i1> %4, <vscale x 2 x i1> zeroinitializer 300 %21 = select fast <vscale x 2 x i1> %20, <vscale x 2 x double> %15, <vscale x 2 x double> %vec.phi30 301 %index.next = add i64 %index, %1 302 %22 = add i64 %1, %index 303 %active.lane.mask.next = tail call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 %22, i64 100) 304 %23 = extractelement <vscale x 2 x i1> %active.lane.mask.next, i64 0 305 %lsr.iv.next = add i64 %lsr.iv, %2 306 br i1 %23, label %vector.body, label %exit.block 307 308exit.block: ; preds = %vector.body 309 %24 = tail call fast double @llvm.vector.reduce.fadd.nxv2f64(double -0.000000e+00, <vscale x 2 x double> %21) 310 %25 = tail call fast double @llvm.vector.reduce.fadd.nxv2f64(double -0.000000e+00, <vscale x 2 x double> %19) 311 %.fca.0.0.insert = insertvalue %"class.std::complex" poison, double %24, 0, 0 312 %.fca.0.1.insert = insertvalue %"class.std::complex" %.fca.0.0.insert, double %25, 0, 1 313 ret %"class.std::complex" %.fca.0.1.insert 314} 315 316declare i64 @llvm.vscale.i64() 317declare <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64, i64) 318declare <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr nocapture, i32 immarg, <vscale x 2 x i1>, <vscale x 2 x i32>) 319declare <vscale x 4 x double> @llvm.masked.load.nxv4f64.p0(ptr nocapture, i32 immarg, <vscale x 4 x i1>, <vscale x 4 x double>) 320declare <vscale x 4 x i1> @llvm.vector.interleave2.nxv4i1(<vscale x 2 x i1>, <vscale x 2 x i1>) 321declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double>) 322declare double @llvm.vector.reduce.fadd.nxv2f64(double, <vscale x 2 x double>) 323