1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s --mattr=+sve -o - | FileCheck %s
3
4target triple = "aarch64"
5
6%"class.std::complex" = type { { double, double } }
7
8; Zero initialized reduction. The IR is generated with predicated tail folding (-prefer-predicate-over-epilogue=predicate-dont-vectorize)
9;
10;   complex<double> x = 0.0 + 0.0i;
11;   for (int i = 0; i < 100; ++i)
12;       x += a[i] * b[i];
13;
14define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
15; CHECK-LABEL: complex_mul_v2f64:
16; CHECK:       // %bb.0: // %entry
17; CHECK-NEXT:    mov z1.d, #0 // =0x0
18; CHECK-NEXT:    mov w8, #100 // =0x64
19; CHECK-NEXT:    cntd x9
20; CHECK-NEXT:    whilelo p1.d, xzr, x8
21; CHECK-NEXT:    rdvl x10, #2
22; CHECK-NEXT:    mov x11, x9
23; CHECK-NEXT:    ptrue p0.d
24; CHECK-NEXT:    zip2 z0.d, z1.d, z1.d
25; CHECK-NEXT:    zip1 z1.d, z1.d, z1.d
26; CHECK-NEXT:  .LBB0_1: // %vector.body
27; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
28; CHECK-NEXT:    zip2 p2.d, p1.d, p1.d
29; CHECK-NEXT:    mov z6.d, z1.d
30; CHECK-NEXT:    mov z7.d, z0.d
31; CHECK-NEXT:    zip1 p1.d, p1.d, p1.d
32; CHECK-NEXT:    ld1d { z2.d }, p2/z, [x0, #1, mul vl]
33; CHECK-NEXT:    ld1d { z4.d }, p2/z, [x1, #1, mul vl]
34; CHECK-NEXT:    ld1d { z3.d }, p1/z, [x0]
35; CHECK-NEXT:    ld1d { z5.d }, p1/z, [x1]
36; CHECK-NEXT:    add x1, x1, x10
37; CHECK-NEXT:    add x0, x0, x10
38; CHECK-NEXT:    fcmla z7.d, p0/m, z4.d, z2.d, #0
39; CHECK-NEXT:    fcmla z6.d, p0/m, z5.d, z3.d, #0
40; CHECK-NEXT:    fcmla z7.d, p0/m, z4.d, z2.d, #90
41; CHECK-NEXT:    fcmla z6.d, p0/m, z5.d, z3.d, #90
42; CHECK-NEXT:    mov z0.d, p2/m, z7.d
43; CHECK-NEXT:    mov z1.d, p1/m, z6.d
44; CHECK-NEXT:    whilelo p1.d, x11, x8
45; CHECK-NEXT:    add x11, x11, x9
46; CHECK-NEXT:    b.mi .LBB0_1
47; CHECK-NEXT:  // %bb.2: // %exit.block
48; CHECK-NEXT:    uzp1 z2.d, z1.d, z0.d
49; CHECK-NEXT:    uzp2 z1.d, z1.d, z0.d
50; CHECK-NEXT:    faddv d0, p0, z2.d
51; CHECK-NEXT:    faddv d1, p0, z1.d
52; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
53; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z1
54; CHECK-NEXT:    ret
55entry:
56  %active.lane.mask.entry = tail call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 100)
57  %0 = tail call i64 @llvm.vscale.i64()
58  %1 = shl i64 %0, 1
59  %2 = shl nuw nsw i64 %0, 5
60  br label %vector.body
61
62vector.body:                                      ; preds = %vector.body, %entry
63  %lsr.iv35 = phi i64 [ %lsr.iv.next36, %vector.body ], [ %1, %entry ]
64  %lsr.iv = phi i64 [ %lsr.iv.next, %vector.body ], [ 0, %entry ]
65  %active.lane.mask = phi <vscale x 2 x i1> [ %active.lane.mask.entry, %entry ], [ %active.lane.mask.next, %vector.body ]
66  %vec.phi = phi <vscale x 2 x double> [ zeroinitializer, %entry ], [ %15, %vector.body ]
67  %vec.phi27 = phi <vscale x 2 x double> [ zeroinitializer, %entry ], [ %16, %vector.body ]
68  %scevgep = getelementptr i8, ptr %a, i64 %lsr.iv
69  %scevgep34 = getelementptr i8, ptr %b, i64 %lsr.iv
70  %interleaved.mask = tail call <vscale x 4 x i1> @llvm.vector.interleave2.nxv4i1(<vscale x 2 x i1> %active.lane.mask, <vscale x 2 x i1> %active.lane.mask)
71  %wide.masked.vec = tail call <vscale x 4 x double> @llvm.masked.load.nxv4f64.p0(ptr %scevgep, i32 8, <vscale x 4 x i1> %interleaved.mask, <vscale x 4 x double> poison)
72  %strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %wide.masked.vec)
73  %3 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 0
74  %4 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 1
75  %interleaved.mask28 = tail call <vscale x 4 x i1> @llvm.vector.interleave2.nxv4i1(<vscale x 2 x i1> %active.lane.mask, <vscale x 2 x i1> %active.lane.mask)
76  %wide.masked.vec29 = tail call <vscale x 4 x double> @llvm.masked.load.nxv4f64.p0(ptr %scevgep34, i32 8, <vscale x 4 x i1> %interleaved.mask28, <vscale x 4 x double> poison)
77  %strided.vec30 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %wide.masked.vec29)
78  %5 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec30, 0
79  %6 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec30, 1
80  %7 = fmul fast <vscale x 2 x double> %6, %3
81  %8 = fmul fast <vscale x 2 x double> %5, %4
82  %9 = fmul fast <vscale x 2 x double> %5, %3
83  %10 = fadd fast <vscale x 2 x double> %9, %vec.phi27
84  %11 = fmul fast <vscale x 2 x double> %6, %4
85  %12 = fsub fast <vscale x 2 x double> %10, %11
86  %13 = fadd fast <vscale x 2 x double> %8, %vec.phi
87  %14 = fadd fast <vscale x 2 x double> %13, %7
88  %15 = select fast <vscale x 2 x i1> %active.lane.mask, <vscale x 2 x double> %14, <vscale x 2 x double> %vec.phi
89  %16 = select fast <vscale x 2 x i1> %active.lane.mask, <vscale x 2 x double> %12, <vscale x 2 x double> %vec.phi27
90  %active.lane.mask.next = tail call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 %lsr.iv35, i64 100)
91  %17 = extractelement <vscale x 2 x i1> %active.lane.mask.next, i64 0
92  %lsr.iv.next = add i64 %lsr.iv, %2
93  %lsr.iv.next36 = add i64 %lsr.iv35, %1
94  br i1 %17, label %vector.body, label %exit.block
95
96exit.block:                                     ; preds = %vector.body
97  %18 = tail call fast double @llvm.vector.reduce.fadd.nxv2f64(double -0.000000e+00, <vscale x 2 x double> %16)
98  %19 = tail call fast double @llvm.vector.reduce.fadd.nxv2f64(double -0.000000e+00, <vscale x 2 x double> %15)
99  %.fca.0.0.insert = insertvalue %"class.std::complex" poison, double %18, 0, 0
100  %.fca.0.1.insert = insertvalue %"class.std::complex" %.fca.0.0.insert, double %19, 0, 1
101  ret %"class.std::complex" %.fca.0.1.insert
102}
103
104; Zero initialized reduction with conditional block. The IR is generated with scalar tail folding (-prefer-predicate-over-epilogue=scalar-epilogue)
105;
106;   complex<double> x = 0.0 + 0.0i;
107;   for (int i = 0; i < 100; ++i)
108;       if (cond[i])
109;           x += a[i] * b[i];
110;
111define %"class.std::complex" @complex_mul_predicated_v2f64(ptr %a, ptr %b, ptr %cond) {
112; CHECK-LABEL: complex_mul_predicated_v2f64:
113; CHECK:       // %bb.0: // %entry
114; CHECK-NEXT:    mov z1.d, #0 // =0x0
115; CHECK-NEXT:    cntd x9
116; CHECK-NEXT:    mov w11, #100 // =0x64
117; CHECK-NEXT:    neg x10, x9
118; CHECK-NEXT:    ptrue p0.d
119; CHECK-NEXT:    mov x8, xzr
120; CHECK-NEXT:    and x10, x10, x11
121; CHECK-NEXT:    rdvl x11, #2
122; CHECK-NEXT:    zip2 z0.d, z1.d, z1.d
123; CHECK-NEXT:    zip1 z1.d, z1.d, z1.d
124; CHECK-NEXT:  .LBB1_1: // %vector.body
125; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
126; CHECK-NEXT:    ld1w { z2.d }, p0/z, [x2, x8, lsl #2]
127; CHECK-NEXT:    mov z6.d, z1.d
128; CHECK-NEXT:    mov z7.d, z0.d
129; CHECK-NEXT:    add x8, x8, x9
130; CHECK-NEXT:    cmpne p1.d, p0/z, z2.d, #0
131; CHECK-NEXT:    cmp x10, x8
132; CHECK-NEXT:    zip2 p2.d, p1.d, p1.d
133; CHECK-NEXT:    zip1 p1.d, p1.d, p1.d
134; CHECK-NEXT:    ld1d { z2.d }, p2/z, [x0, #1, mul vl]
135; CHECK-NEXT:    ld1d { z4.d }, p2/z, [x1, #1, mul vl]
136; CHECK-NEXT:    ld1d { z3.d }, p1/z, [x0]
137; CHECK-NEXT:    ld1d { z5.d }, p1/z, [x1]
138; CHECK-NEXT:    add x1, x1, x11
139; CHECK-NEXT:    add x0, x0, x11
140; CHECK-NEXT:    fcmla z7.d, p0/m, z4.d, z2.d, #0
141; CHECK-NEXT:    fcmla z6.d, p0/m, z5.d, z3.d, #0
142; CHECK-NEXT:    fcmla z7.d, p0/m, z4.d, z2.d, #90
143; CHECK-NEXT:    fcmla z6.d, p0/m, z5.d, z3.d, #90
144; CHECK-NEXT:    mov z0.d, p2/m, z7.d
145; CHECK-NEXT:    mov z1.d, p1/m, z6.d
146; CHECK-NEXT:    b.ne .LBB1_1
147; CHECK-NEXT:  // %bb.2: // %exit.block
148; CHECK-NEXT:    uzp1 z2.d, z1.d, z0.d
149; CHECK-NEXT:    uzp2 z1.d, z1.d, z0.d
150; CHECK-NEXT:    faddv d0, p0, z2.d
151; CHECK-NEXT:    faddv d1, p0, z1.d
152; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
153; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z1
154; CHECK-NEXT:    ret
155entry:
156  %0 = tail call i64 @llvm.vscale.i64()
157  %1 = shl nuw nsw i64 %0, 1
158  %n.mod.vf = urem i64 100, %1
159  %n.vec = sub i64 100, %n.mod.vf
160  %2 = shl nuw nsw i64 %0, 5
161  br label %vector.body
162
163vector.body:                                      ; preds = %vector.body, %entry
164  %lsr.iv48 = phi i64 [ %lsr.iv.next, %vector.body ], [ 0, %entry ]
165  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
166  %vec.phi = phi <vscale x 2 x double> [ zeroinitializer, %entry ], [ %predphi34, %vector.body ]
167  %vec.phi30 = phi <vscale x 2 x double> [ zeroinitializer, %entry ], [ %predphi, %vector.body ]
168  %3 = shl i64 %index, 2
169  %scevgep47 = getelementptr i8, ptr %cond, i64 %3
170  %wide.load = load <vscale x 2 x i32>, ptr %scevgep47, align 4
171  %4 = icmp ne <vscale x 2 x i32> %wide.load, zeroinitializer
172  %scevgep49 = getelementptr i8, ptr %a, i64 %lsr.iv48
173  %scevgep50 = getelementptr i8, ptr %b, i64 %lsr.iv48
174  %interleaved.mask = tail call <vscale x 4 x i1> @llvm.vector.interleave2.nxv4i1(<vscale x 2 x i1> %4, <vscale x 2 x i1> %4)
175  %wide.masked.vec = tail call <vscale x 4 x double> @llvm.masked.load.nxv4f64.p0(ptr %scevgep49, i32 8, <vscale x 4 x i1> %interleaved.mask, <vscale x 4 x double> poison)
176  %strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %wide.masked.vec)
177  %5 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 0
178  %6 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 1
179  %wide.masked.vec32 = tail call <vscale x 4 x double> @llvm.masked.load.nxv4f64.p0(ptr %scevgep50, i32 8, <vscale x 4 x i1> %interleaved.mask, <vscale x 4 x double> poison)
180  %strided.vec33 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %wide.masked.vec32)
181  %7 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec33, 0
182  %8 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec33, 1
183  %9 = fmul fast <vscale x 2 x double> %8, %5
184  %10 = fmul fast <vscale x 2 x double> %7, %6
185  %11 = fmul fast <vscale x 2 x double> %7, %5
186  %12 = fadd fast <vscale x 2 x double> %11, %vec.phi30
187  %13 = fmul fast <vscale x 2 x double> %8, %6
188  %14 = fsub fast <vscale x 2 x double> %12, %13
189  %15 = fadd fast <vscale x 2 x double> %10, %vec.phi
190  %16 = fadd fast <vscale x 2 x double> %15, %9
191  %predphi = select <vscale x 2 x i1> %4, <vscale x 2 x double> %14, <vscale x 2 x double> %vec.phi30
192  %predphi34 = select <vscale x 2 x i1> %4, <vscale x 2 x double> %16, <vscale x 2 x double> %vec.phi
193  %index.next = add nuw i64 %index, %1
194  %lsr.iv.next = add i64 %lsr.iv48, %2
195  %17 = icmp eq i64 %n.vec, %index.next
196  br i1 %17, label %exit.block, label %vector.body
197
198exit.block:                                     ; preds = %vector.body
199  %18 = tail call fast double @llvm.vector.reduce.fadd.nxv2f64(double -0.000000e+00, <vscale x 2 x double> %predphi)
200  %19 = tail call fast double @llvm.vector.reduce.fadd.nxv2f64(double -0.000000e+00, <vscale x 2 x double> %predphi34)
201  %.fca.0.0.insert = insertvalue %"class.std::complex" poison, double %18, 0, 0
202  %.fca.0.1.insert = insertvalue %"class.std::complex" %.fca.0.0.insert, double %19, 0, 1
203  ret %"class.std::complex" %.fca.0.1.insert
204}
205
206; Zero initialized reduction with conditional block. The IR is generated with scalar tail folding (-predicate-over-epilogue=predicate-dont-vectorize)
207;
208;   complex<double> x = 0.0 + 0.0i;
209;   for (int i = 0; i < 100; ++i)
210;       if (cond[i])
211;           x += a[i] * b[i];
212;
213define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, ptr %cond) {
214; CHECK-LABEL: complex_mul_predicated_x2_v2f64:
215; CHECK:       // %bb.0: // %entry
216; CHECK-NEXT:    mov z1.d, #0 // =0x0
217; CHECK-NEXT:    mov w8, #100 // =0x64
218; CHECK-NEXT:    cntd x9
219; CHECK-NEXT:    whilelo p1.d, xzr, x8
220; CHECK-NEXT:    rdvl x10, #2
221; CHECK-NEXT:    cnth x11
222; CHECK-NEXT:    ptrue p0.d
223; CHECK-NEXT:    mov x12, x9
224; CHECK-NEXT:    zip2 z0.d, z1.d, z1.d
225; CHECK-NEXT:    zip1 z1.d, z1.d, z1.d
226; CHECK-NEXT:  .LBB2_1: // %vector.body
227; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
228; CHECK-NEXT:    ld1w { z2.d }, p1/z, [x2]
229; CHECK-NEXT:    mov z6.d, z1.d
230; CHECK-NEXT:    mov z7.d, z0.d
231; CHECK-NEXT:    add x2, x2, x11
232; CHECK-NEXT:    and z2.d, z2.d, #0xffffffff
233; CHECK-NEXT:    cmpne p1.d, p1/z, z2.d, #0
234; CHECK-NEXT:    zip2 p2.d, p1.d, p1.d
235; CHECK-NEXT:    zip1 p1.d, p1.d, p1.d
236; CHECK-NEXT:    ld1d { z2.d }, p2/z, [x0, #1, mul vl]
237; CHECK-NEXT:    ld1d { z4.d }, p2/z, [x1, #1, mul vl]
238; CHECK-NEXT:    ld1d { z3.d }, p1/z, [x0]
239; CHECK-NEXT:    ld1d { z5.d }, p1/z, [x1]
240; CHECK-NEXT:    add x1, x1, x10
241; CHECK-NEXT:    add x0, x0, x10
242; CHECK-NEXT:    fcmla z7.d, p0/m, z4.d, z2.d, #0
243; CHECK-NEXT:    fcmla z6.d, p0/m, z5.d, z3.d, #0
244; CHECK-NEXT:    fcmla z7.d, p0/m, z4.d, z2.d, #90
245; CHECK-NEXT:    fcmla z6.d, p0/m, z5.d, z3.d, #90
246; CHECK-NEXT:    mov z0.d, p2/m, z7.d
247; CHECK-NEXT:    mov z1.d, p1/m, z6.d
248; CHECK-NEXT:    whilelo p1.d, x12, x8
249; CHECK-NEXT:    add x12, x12, x9
250; CHECK-NEXT:    b.mi .LBB2_1
251; CHECK-NEXT:  // %bb.2: // %exit.block
252; CHECK-NEXT:    uzp1 z2.d, z1.d, z0.d
253; CHECK-NEXT:    uzp2 z1.d, z1.d, z0.d
254; CHECK-NEXT:    faddv d0, p0, z2.d
255; CHECK-NEXT:    faddv d1, p0, z1.d
256; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
257; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z1
258; CHECK-NEXT:    ret
259entry:
260  %active.lane.mask.entry = tail call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 100)
261  %0 = tail call i64 @llvm.vscale.i64()
262  %1 = shl i64 %0, 1
263  %2 = shl nuw nsw i64 %0, 5
264  br label %vector.body
265
266vector.body:                                      ; preds = %vector.body, %entry
267  %lsr.iv = phi i64 [ %lsr.iv.next, %vector.body ], [ 0, %entry ]
268  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
269  %active.lane.mask = phi <vscale x 2 x i1> [ %active.lane.mask.entry, %entry ], [ %active.lane.mask.next, %vector.body ]
270  %vec.phi = phi <vscale x 2 x double> [ zeroinitializer, %entry ], [ %19, %vector.body ]
271  %vec.phi30 = phi <vscale x 2 x double> [ zeroinitializer, %entry ], [ %21, %vector.body ]
272  %3 = shl i64 %index, 2
273  %scevgep = getelementptr i8, ptr %cond, i64 %3
274  %wide.masked.load = tail call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr %scevgep, i32 4, <vscale x 2 x i1> %active.lane.mask, <vscale x 2 x i32> poison)
275  %4 = icmp ne <vscale x 2 x i32> %wide.masked.load, zeroinitializer
276  %scevgep38 = getelementptr i8, ptr %a, i64 %lsr.iv
277  %scevgep39 = getelementptr i8, ptr %b, i64 %lsr.iv
278  %5 = select <vscale x 2 x i1> %active.lane.mask, <vscale x 2 x i1> %4, <vscale x 2 x i1> zeroinitializer
279  %interleaved.mask = tail call <vscale x 4 x i1> @llvm.vector.interleave2.nxv4i1(<vscale x 2 x i1> %5, <vscale x 2 x i1> %5)
280  %wide.masked.vec = tail call <vscale x 4 x double> @llvm.masked.load.nxv4f64.p0(ptr %scevgep38, i32 8, <vscale x 4 x i1> %interleaved.mask, <vscale x 4 x double> poison)
281  %strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %wide.masked.vec)
282  %6 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 0
283  %7 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 1
284  %interleaved.mask31 = tail call <vscale x 4 x i1> @llvm.vector.interleave2.nxv4i1(<vscale x 2 x i1> %5, <vscale x 2 x i1> %5)
285  %wide.masked.vec32 = tail call <vscale x 4 x double> @llvm.masked.load.nxv4f64.p0(ptr %scevgep39, i32 8, <vscale x 4 x i1> %interleaved.mask31, <vscale x 4 x double> poison)
286  %strided.vec33 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %wide.masked.vec32)
287  %8 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec33, 0
288  %9 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec33, 1
289  %10 = fmul fast <vscale x 2 x double> %9, %6
290  %11 = fmul fast <vscale x 2 x double> %8, %7
291  %12 = fmul fast <vscale x 2 x double> %8, %6
292  %13 = fadd fast <vscale x 2 x double> %12, %vec.phi30
293  %14 = fmul fast <vscale x 2 x double> %9, %7
294  %15 = fsub fast <vscale x 2 x double> %13, %14
295  %16 = fadd fast <vscale x 2 x double> %11, %vec.phi
296  %17 = fadd fast <vscale x 2 x double> %16, %10
297  %18 = select <vscale x 2 x i1> %active.lane.mask, <vscale x 2 x i1> %4, <vscale x 2 x i1> zeroinitializer
298  %19 = select fast <vscale x 2 x i1> %18, <vscale x 2 x double> %17, <vscale x 2 x double> %vec.phi
299  %20 = select <vscale x 2 x i1> %active.lane.mask, <vscale x 2 x i1> %4, <vscale x 2 x i1> zeroinitializer
300  %21 = select fast <vscale x 2 x i1> %20, <vscale x 2 x double> %15, <vscale x 2 x double> %vec.phi30
301  %index.next = add i64 %index, %1
302  %22 = add i64 %1, %index
303  %active.lane.mask.next = tail call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 %22, i64 100)
304  %23 = extractelement <vscale x 2 x i1> %active.lane.mask.next, i64 0
305  %lsr.iv.next = add i64 %lsr.iv, %2
306  br i1 %23, label %vector.body, label %exit.block
307
308exit.block:                                     ; preds = %vector.body
309  %24 = tail call fast double @llvm.vector.reduce.fadd.nxv2f64(double -0.000000e+00, <vscale x 2 x double> %21)
310  %25 = tail call fast double @llvm.vector.reduce.fadd.nxv2f64(double -0.000000e+00, <vscale x 2 x double> %19)
311  %.fca.0.0.insert = insertvalue %"class.std::complex" poison, double %24, 0, 0
312  %.fca.0.1.insert = insertvalue %"class.std::complex" %.fca.0.0.insert, double %25, 0, 1
313  ret %"class.std::complex" %.fca.0.1.insert
314}
315
316declare i64 @llvm.vscale.i64()
317declare <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64, i64)
318declare <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr nocapture, i32 immarg, <vscale x 2 x i1>, <vscale x 2 x i32>)
319declare <vscale x 4 x double> @llvm.masked.load.nxv4f64.p0(ptr nocapture, i32 immarg, <vscale x 4 x i1>, <vscale x 4 x double>)
320declare <vscale x 4 x i1> @llvm.vector.interleave2.nxv4i1(<vscale x 2 x i1>, <vscale x 2 x i1>)
321declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double>)
322declare double @llvm.vector.reduce.fadd.nxv2f64(double, <vscale x 2 x double>)
323