xref: /llvm-project/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll (revision 7b3bbd83c0c24087072ec5b22a76799ab31f87d5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -mcpu=pwr10 < %s | FileCheck %s
3
4target datalayout = "e-m:e-i64:64-p:64:64-n32:64-v256:256:256-v512:512:512"
5target triple = "powerpc64le-unknown-linux-gnu"
6
7%_elem_type_of_a = type <{ double }>
8%_elem_type_of_x = type <{ double }>
9
10define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %.vy02, ptr %.vy03, ptr %.vy04, ptr %.vy05, ptr %.vy06, ptr %.vy07, ptr %.vy08, ptr %.vy09, ptr %.vy0a, ptr %.vy0b, ptr %.vy0c, ptr %.vy21, ptr %.vy22, ptr %.vy23, ptr %.vy24, ptr %.vy25, ptr %.vy26, ptr %.vy27, ptr %.vy28, ptr %.vy29, ptr %.vy2a, ptr %.vy2b, ptr %.vy2c) {
11; CHECK-LABEL: foo:
12; CHECK:       # %bb.0: # %entry
13; CHECK-NEXT:    lwz 4, 0(4)
14; CHECK-NEXT:    cmpwi 4, 1
15; CHECK-NEXT:    bltlr 0
16; CHECK-NEXT:  # %bb.1: # %_loop_1_do_.lr.ph
17; CHECK-NEXT:    lwz 3, 0(3)
18; CHECK-NEXT:    cmpwi 3, 1
19; CHECK-NEXT:    bltlr 0
20; CHECK-NEXT:  # %bb.2: # %_loop_1_do_.preheader
21; CHECK-NEXT:    stdu 1, -592(1)
22; CHECK-NEXT:    .cfi_def_cfa_offset 592
23; CHECK-NEXT:    .cfi_offset r14, -192
24; CHECK-NEXT:    .cfi_offset r15, -184
25; CHECK-NEXT:    .cfi_offset r16, -176
26; CHECK-NEXT:    .cfi_offset r17, -168
27; CHECK-NEXT:    .cfi_offset r18, -160
28; CHECK-NEXT:    .cfi_offset r19, -152
29; CHECK-NEXT:    .cfi_offset r20, -144
30; CHECK-NEXT:    .cfi_offset r21, -136
31; CHECK-NEXT:    .cfi_offset r22, -128
32; CHECK-NEXT:    .cfi_offset r23, -120
33; CHECK-NEXT:    .cfi_offset r24, -112
34; CHECK-NEXT:    .cfi_offset r25, -104
35; CHECK-NEXT:    .cfi_offset r26, -96
36; CHECK-NEXT:    .cfi_offset r27, -88
37; CHECK-NEXT:    .cfi_offset r28, -80
38; CHECK-NEXT:    .cfi_offset r29, -72
39; CHECK-NEXT:    .cfi_offset r30, -64
40; CHECK-NEXT:    .cfi_offset r31, -56
41; CHECK-NEXT:    .cfi_offset f26, -48
42; CHECK-NEXT:    .cfi_offset f27, -40
43; CHECK-NEXT:    .cfi_offset f28, -32
44; CHECK-NEXT:    .cfi_offset f29, -24
45; CHECK-NEXT:    .cfi_offset f30, -16
46; CHECK-NEXT:    .cfi_offset f31, -8
47; CHECK-NEXT:    .cfi_offset v20, -384
48; CHECK-NEXT:    .cfi_offset v21, -368
49; CHECK-NEXT:    .cfi_offset v22, -352
50; CHECK-NEXT:    .cfi_offset v23, -336
51; CHECK-NEXT:    .cfi_offset v24, -320
52; CHECK-NEXT:    .cfi_offset v25, -304
53; CHECK-NEXT:    .cfi_offset v26, -288
54; CHECK-NEXT:    .cfi_offset v27, -272
55; CHECK-NEXT:    .cfi_offset v28, -256
56; CHECK-NEXT:    .cfi_offset v29, -240
57; CHECK-NEXT:    .cfi_offset v30, -224
58; CHECK-NEXT:    .cfi_offset v31, -208
59; CHECK-NEXT:    std 22, 464(1) # 8-byte Folded Spill
60; CHECK-NEXT:    std 23, 472(1) # 8-byte Folded Spill
61; CHECK-NEXT:    mr 22, 5
62; CHECK-NEXT:    ld 5, 848(1)
63; CHECK-NEXT:    addi 3, 3, 1
64; CHECK-NEXT:    mr 11, 7
65; CHECK-NEXT:    ld 23, 688(1)
66; CHECK-NEXT:    ld 7, 728(1)
67; CHECK-NEXT:    std 18, 432(1) # 8-byte Folded Spill
68; CHECK-NEXT:    std 19, 440(1) # 8-byte Folded Spill
69; CHECK-NEXT:    mr 18, 6
70; CHECK-NEXT:    li 6, 9
71; CHECK-NEXT:    ld 19, 768(1)
72; CHECK-NEXT:    ld 2, 760(1)
73; CHECK-NEXT:    std 26, 496(1) # 8-byte Folded Spill
74; CHECK-NEXT:    std 27, 504(1) # 8-byte Folded Spill
75; CHECK-NEXT:    cmpldi 3, 9
76; CHECK-NEXT:    ld 27, 816(1)
77; CHECK-NEXT:    ld 26, 808(1)
78; CHECK-NEXT:    std 14, 400(1) # 8-byte Folded Spill
79; CHECK-NEXT:    std 15, 408(1) # 8-byte Folded Spill
80; CHECK-NEXT:    ld 15, 736(1)
81; CHECK-NEXT:    lxv 39, 0(8)
82; CHECK-NEXT:    std 30, 528(1) # 8-byte Folded Spill
83; CHECK-NEXT:    std 31, 536(1) # 8-byte Folded Spill
84; CHECK-NEXT:    ld 30, 704(1)
85; CHECK-NEXT:    lxv 38, 0(9)
86; CHECK-NEXT:    std 20, 448(1) # 8-byte Folded Spill
87; CHECK-NEXT:    std 21, 456(1) # 8-byte Folded Spill
88; CHECK-NEXT:    ld 21, 784(1)
89; CHECK-NEXT:    ld 20, 776(1)
90; CHECK-NEXT:    std 24, 480(1) # 8-byte Folded Spill
91; CHECK-NEXT:    std 25, 488(1) # 8-byte Folded Spill
92; CHECK-NEXT:    iselgt 3, 3, 6
93; CHECK-NEXT:    ld 6, 720(1)
94; CHECK-NEXT:    ld 24, 792(1)
95; CHECK-NEXT:    std 10, 72(1) # 8-byte Folded Spill
96; CHECK-NEXT:    std 7, 80(1) # 8-byte Folded Spill
97; CHECK-NEXT:    addi 3, 3, -2
98; CHECK-NEXT:    lxv 6, 0(19)
99; CHECK-NEXT:    lxv 11, 0(7)
100; CHECK-NEXT:    std 5, 200(1) # 8-byte Folded Spill
101; CHECK-NEXT:    std 23, 40(1) # 8-byte Folded Spill
102; CHECK-NEXT:    std 6, 48(1) # 8-byte Folded Spill
103; CHECK-NEXT:    ld 5, 840(1)
104; CHECK-NEXT:    lxv 12, 0(6)
105; CHECK-NEXT:    rldicl 12, 3, 61, 3
106; CHECK-NEXT:    std 19, 120(1) # 8-byte Folded Spill
107; CHECK-NEXT:    std 20, 128(1) # 8-byte Folded Spill
108; CHECK-NEXT:    std 21, 136(1) # 8-byte Folded Spill
109; CHECK-NEXT:    std 24, 144(1) # 8-byte Folded Spill
110; CHECK-NEXT:    lxv 4, 0(21)
111; CHECK-NEXT:    ld 25, 800(1)
112; CHECK-NEXT:    lxv 33, 0(10)
113; CHECK-NEXT:    lxv 32, 0(23)
114; CHECK-NEXT:    lxv 36, 0(30)
115; CHECK-NEXT:    std 16, 416(1) # 8-byte Folded Spill
116; CHECK-NEXT:    std 17, 424(1) # 8-byte Folded Spill
117; CHECK-NEXT:    ld 17, 752(1)
118; CHECK-NEXT:    ld 16, 744(1)
119; CHECK-NEXT:    std 28, 512(1) # 8-byte Folded Spill
120; CHECK-NEXT:    std 29, 520(1) # 8-byte Folded Spill
121; CHECK-NEXT:    ld 29, 712(1)
122; CHECK-NEXT:    ld 28, 696(1)
123; CHECK-NEXT:    std 8, 56(1) # 8-byte Folded Spill
124; CHECK-NEXT:    std 9, 64(1) # 8-byte Folded Spill
125; CHECK-NEXT:    lxv 37, 0(28)
126; CHECK-NEXT:    lxv 13, 0(29)
127; CHECK-NEXT:    mr 8, 29
128; CHECK-NEXT:    mr 9, 30
129; CHECK-NEXT:    mr 10, 28
130; CHECK-NEXT:    std 25, 152(1) # 8-byte Folded Spill
131; CHECK-NEXT:    std 26, 160(1) # 8-byte Folded Spill
132; CHECK-NEXT:    lxv 10, 0(15)
133; CHECK-NEXT:    lxv 9, 0(16)
134; CHECK-NEXT:    li 28, 1
135; CHECK-NEXT:    stfd 26, 544(1) # 8-byte Folded Spill
136; CHECK-NEXT:    stfd 27, 552(1) # 8-byte Folded Spill
137; CHECK-NEXT:    lxv 8, 0(17)
138; CHECK-NEXT:    lxv 7, 0(2)
139; CHECK-NEXT:    stfd 28, 560(1) # 8-byte Folded Spill
140; CHECK-NEXT:    stfd 29, 568(1) # 8-byte Folded Spill
141; CHECK-NEXT:    lxv 5, 0(20)
142; CHECK-NEXT:    lxv 3, 0(24)
143; CHECK-NEXT:    stfd 30, 576(1) # 8-byte Folded Spill
144; CHECK-NEXT:    stfd 31, 584(1) # 8-byte Folded Spill
145; CHECK-NEXT:    lxv 2, 0(25)
146; CHECK-NEXT:    lxv 1, 0(26)
147; CHECK-NEXT:    stxv 52, 208(1) # 16-byte Folded Spill
148; CHECK-NEXT:    stxv 53, 224(1) # 16-byte Folded Spill
149; CHECK-NEXT:    lxv 0, 0(27)
150; CHECK-NEXT:    stxv 54, 240(1) # 16-byte Folded Spill
151; CHECK-NEXT:    stxv 55, 256(1) # 16-byte Folded Spill
152; CHECK-NEXT:    stxv 56, 272(1) # 16-byte Folded Spill
153; CHECK-NEXT:    stxv 57, 288(1) # 16-byte Folded Spill
154; CHECK-NEXT:    stxv 58, 304(1) # 16-byte Folded Spill
155; CHECK-NEXT:    std 5, 192(1) # 8-byte Folded Spill
156; CHECK-NEXT:    ld 5, 832(1)
157; CHECK-NEXT:    stxv 59, 320(1) # 16-byte Folded Spill
158; CHECK-NEXT:    stxv 60, 336(1) # 16-byte Folded Spill
159; CHECK-NEXT:    stxv 61, 352(1) # 16-byte Folded Spill
160; CHECK-NEXT:    stxv 62, 368(1) # 16-byte Folded Spill
161; CHECK-NEXT:    stxv 63, 384(1) # 16-byte Folded Spill
162; CHECK-NEXT:    std 15, 88(1) # 8-byte Folded Spill
163; CHECK-NEXT:    std 16, 96(1) # 8-byte Folded Spill
164; CHECK-NEXT:    std 17, 104(1) # 8-byte Folded Spill
165; CHECK-NEXT:    std 2, 112(1) # 8-byte Folded Spill
166; CHECK-NEXT:    std 5, 184(1) # 8-byte Folded Spill
167; CHECK-NEXT:    ld 5, 824(1)
168; CHECK-NEXT:    std 5, 176(1) # 8-byte Folded Spill
169; CHECK-NEXT:    std 27, 168(1) # 8-byte Folded Spill
170; CHECK-NEXT:    lwa 5, 0(11)
171; CHECK-NEXT:    li 27, 0
172; CHECK-NEXT:    ld 7, 176(1) # 8-byte Folded Reload
173; CHECK-NEXT:    mulli 6, 5, 40
174; CHECK-NEXT:    sldi 0, 5, 4
175; CHECK-NEXT:    extswsli 14, 5, 3
176; CHECK-NEXT:    lxv 40, 0(7)
177; CHECK-NEXT:    ld 7, 184(1) # 8-byte Folded Reload
178; CHECK-NEXT:    add 31, 14, 22
179; CHECK-NEXT:    add 11, 0, 22
180; CHECK-NEXT:    mr 26, 22
181; CHECK-NEXT:    addi 3, 11, 32
182; CHECK-NEXT:    addi 11, 12, 1
183; CHECK-NEXT:    mulli 12, 5, 48
184; CHECK-NEXT:    addi 31, 31, 32
185; CHECK-NEXT:    add 19, 22, 6
186; CHECK-NEXT:    sldi 6, 5, 5
187; CHECK-NEXT:    mulli 5, 5, 24
188; CHECK-NEXT:    lxv 41, 0(7)
189; CHECK-NEXT:    add 20, 22, 6
190; CHECK-NEXT:    add 21, 22, 5
191; CHECK-NEXT:    ld 5, 192(1) # 8-byte Folded Reload
192; CHECK-NEXT:    lxv 43, 0(5)
193; CHECK-NEXT:    ld 5, 200(1) # 8-byte Folded Reload
194; CHECK-NEXT:    lxv 42, 0(5)
195; CHECK-NEXT:    .p2align 4
196; CHECK-NEXT:  .LBB0_3: # %_loop_2_do_.lr.ph
197; CHECK-NEXT:    # =>This Loop Header: Depth=1
198; CHECK-NEXT:    # Child Loop BB0_4 Depth 2
199; CHECK-NEXT:    maddld 5, 12, 27, 0
200; CHECK-NEXT:    mr 6, 18
201; CHECK-NEXT:    mr 29, 21
202; CHECK-NEXT:    mr 30, 20
203; CHECK-NEXT:    mr 2, 19
204; CHECK-NEXT:    mtctr 11
205; CHECK-NEXT:    add 25, 22, 5
206; CHECK-NEXT:    maddld 5, 12, 27, 14
207; CHECK-NEXT:    add 24, 22, 5
208; CHECK-NEXT:    mr 5, 26
209; CHECK-NEXT:    .p2align 5
210; CHECK-NEXT:  .LBB0_4: # %_loop_2_do_
211; CHECK-NEXT:    # Parent Loop BB0_3 Depth=1
212; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
213; CHECK-NEXT:    lxvp 34, 0(6)
214; CHECK-NEXT:    lxvp 44, 0(5)
215; CHECK-NEXT:    xvmaddadp 39, 45, 35
216; CHECK-NEXT:    lxvp 46, 0(24)
217; CHECK-NEXT:    xvmaddadp 38, 47, 35
218; CHECK-NEXT:    lxvp 48, 0(25)
219; CHECK-NEXT:    lxvp 50, 0(29)
220; CHECK-NEXT:    lxvp 62, 0(30)
221; CHECK-NEXT:    lxvp 60, 0(2)
222; CHECK-NEXT:    lxvp 58, 32(6)
223; CHECK-NEXT:    lxvp 56, 32(5)
224; CHECK-NEXT:    lxvp 54, 32(24)
225; CHECK-NEXT:    lxvp 52, 32(25)
226; CHECK-NEXT:    lxvp 30, 32(29)
227; CHECK-NEXT:    lxvp 28, 32(30)
228; CHECK-NEXT:    lxvp 26, 32(2)
229; CHECK-NEXT:    xvmaddadp 33, 49, 35
230; CHECK-NEXT:    xvmaddadp 32, 51, 35
231; CHECK-NEXT:    xvmaddadp 37, 63, 35
232; CHECK-NEXT:    xvmaddadp 36, 61, 35
233; CHECK-NEXT:    xvmaddadp 13, 44, 34
234; CHECK-NEXT:    xvmaddadp 12, 46, 34
235; CHECK-NEXT:    xvmaddadp 11, 48, 34
236; CHECK-NEXT:    xvmaddadp 10, 50, 34
237; CHECK-NEXT:    xvmaddadp 9, 62, 34
238; CHECK-NEXT:    xvmaddadp 8, 60, 34
239; CHECK-NEXT:    xvmaddadp 7, 57, 59
240; CHECK-NEXT:    xvmaddadp 6, 55, 59
241; CHECK-NEXT:    xvmaddadp 5, 53, 59
242; CHECK-NEXT:    xvmaddadp 4, 31, 59
243; CHECK-NEXT:    xvmaddadp 3, 29, 59
244; CHECK-NEXT:    xvmaddadp 2, 27, 59
245; CHECK-NEXT:    xvmaddadp 1, 56, 58
246; CHECK-NEXT:    xvmaddadp 0, 54, 58
247; CHECK-NEXT:    xvmaddadp 40, 52, 58
248; CHECK-NEXT:    xvmaddadp 41, 30, 58
249; CHECK-NEXT:    xvmaddadp 43, 28, 58
250; CHECK-NEXT:    xvmaddadp 42, 26, 58
251; CHECK-NEXT:    addi 6, 6, 64
252; CHECK-NEXT:    addi 5, 5, 64
253; CHECK-NEXT:    addi 24, 24, 64
254; CHECK-NEXT:    addi 25, 25, 64
255; CHECK-NEXT:    addi 29, 29, 64
256; CHECK-NEXT:    addi 30, 30, 64
257; CHECK-NEXT:    addi 2, 2, 64
258; CHECK-NEXT:    bdnz .LBB0_4
259; CHECK-NEXT:  # %bb.5: # %_loop_2_endl_
260; CHECK-NEXT:    #
261; CHECK-NEXT:    addi 28, 28, 6
262; CHECK-NEXT:    add 26, 26, 12
263; CHECK-NEXT:    add 31, 31, 12
264; CHECK-NEXT:    add 19, 19, 12
265; CHECK-NEXT:    add 3, 3, 12
266; CHECK-NEXT:    add 20, 20, 12
267; CHECK-NEXT:    add 21, 21, 12
268; CHECK-NEXT:    addi 27, 27, 1
269; CHECK-NEXT:    cmpld 28, 4
270; CHECK-NEXT:    ble 0, .LBB0_3
271; CHECK-NEXT:  # %bb.6: # %_loop_1_loopHeader_._return_bb_crit_edge.loopexit
272; CHECK-NEXT:    ld 3, 56(1) # 8-byte Folded Reload
273; CHECK-NEXT:    lxv 63, 384(1) # 16-byte Folded Reload
274; CHECK-NEXT:    stxv 39, 0(3)
275; CHECK-NEXT:    ld 3, 64(1) # 8-byte Folded Reload
276; CHECK-NEXT:    lxv 62, 368(1) # 16-byte Folded Reload
277; CHECK-NEXT:    lxv 61, 352(1) # 16-byte Folded Reload
278; CHECK-NEXT:    lxv 60, 336(1) # 16-byte Folded Reload
279; CHECK-NEXT:    lxv 59, 320(1) # 16-byte Folded Reload
280; CHECK-NEXT:    lxv 58, 304(1) # 16-byte Folded Reload
281; CHECK-NEXT:    lxv 57, 288(1) # 16-byte Folded Reload
282; CHECK-NEXT:    lxv 56, 272(1) # 16-byte Folded Reload
283; CHECK-NEXT:    lxv 55, 256(1) # 16-byte Folded Reload
284; CHECK-NEXT:    lxv 54, 240(1) # 16-byte Folded Reload
285; CHECK-NEXT:    lxv 53, 224(1) # 16-byte Folded Reload
286; CHECK-NEXT:    lxv 52, 208(1) # 16-byte Folded Reload
287; CHECK-NEXT:    stxv 38, 0(3)
288; CHECK-NEXT:    ld 3, 72(1) # 8-byte Folded Reload
289; CHECK-NEXT:    lfd 31, 584(1) # 8-byte Folded Reload
290; CHECK-NEXT:    lfd 30, 576(1) # 8-byte Folded Reload
291; CHECK-NEXT:    lfd 29, 568(1) # 8-byte Folded Reload
292; CHECK-NEXT:    lfd 28, 560(1) # 8-byte Folded Reload
293; CHECK-NEXT:    lfd 27, 552(1) # 8-byte Folded Reload
294; CHECK-NEXT:    lfd 26, 544(1) # 8-byte Folded Reload
295; CHECK-NEXT:    ld 31, 536(1) # 8-byte Folded Reload
296; CHECK-NEXT:    ld 30, 528(1) # 8-byte Folded Reload
297; CHECK-NEXT:    ld 29, 520(1) # 8-byte Folded Reload
298; CHECK-NEXT:    ld 28, 512(1) # 8-byte Folded Reload
299; CHECK-NEXT:    ld 27, 504(1) # 8-byte Folded Reload
300; CHECK-NEXT:    stxv 33, 0(3)
301; CHECK-NEXT:    ld 3, 40(1) # 8-byte Folded Reload
302; CHECK-NEXT:    ld 26, 496(1) # 8-byte Folded Reload
303; CHECK-NEXT:    ld 25, 488(1) # 8-byte Folded Reload
304; CHECK-NEXT:    ld 24, 480(1) # 8-byte Folded Reload
305; CHECK-NEXT:    ld 23, 472(1) # 8-byte Folded Reload
306; CHECK-NEXT:    ld 22, 464(1) # 8-byte Folded Reload
307; CHECK-NEXT:    ld 21, 456(1) # 8-byte Folded Reload
308; CHECK-NEXT:    ld 20, 448(1) # 8-byte Folded Reload
309; CHECK-NEXT:    ld 19, 440(1) # 8-byte Folded Reload
310; CHECK-NEXT:    ld 18, 432(1) # 8-byte Folded Reload
311; CHECK-NEXT:    ld 17, 424(1) # 8-byte Folded Reload
312; CHECK-NEXT:    ld 16, 416(1) # 8-byte Folded Reload
313; CHECK-NEXT:    stxv 32, 0(3)
314; CHECK-NEXT:    ld 3, 48(1) # 8-byte Folded Reload
315; CHECK-NEXT:    stxv 37, 0(10)
316; CHECK-NEXT:    stxv 36, 0(9)
317; CHECK-NEXT:    stxv 13, 0(8)
318; CHECK-NEXT:    ld 15, 408(1) # 8-byte Folded Reload
319; CHECK-NEXT:    ld 14, 400(1) # 8-byte Folded Reload
320; CHECK-NEXT:    stxv 12, 0(3)
321; CHECK-NEXT:    ld 3, 80(1) # 8-byte Folded Reload
322; CHECK-NEXT:    stxv 11, 0(3)
323; CHECK-NEXT:    ld 3, 88(1) # 8-byte Folded Reload
324; CHECK-NEXT:    stxv 10, 0(3)
325; CHECK-NEXT:    ld 3, 96(1) # 8-byte Folded Reload
326; CHECK-NEXT:    stxv 9, 0(3)
327; CHECK-NEXT:    ld 3, 104(1) # 8-byte Folded Reload
328; CHECK-NEXT:    stxv 8, 0(3)
329; CHECK-NEXT:    ld 3, 112(1) # 8-byte Folded Reload
330; CHECK-NEXT:    stxv 7, 0(3)
331; CHECK-NEXT:    ld 3, 120(1) # 8-byte Folded Reload
332; CHECK-NEXT:    stxv 6, 0(3)
333; CHECK-NEXT:    ld 3, 128(1) # 8-byte Folded Reload
334; CHECK-NEXT:    stxv 5, 0(3)
335; CHECK-NEXT:    ld 3, 136(1) # 8-byte Folded Reload
336; CHECK-NEXT:    stxv 4, 0(3)
337; CHECK-NEXT:    ld 3, 144(1) # 8-byte Folded Reload
338; CHECK-NEXT:    stxv 3, 0(3)
339; CHECK-NEXT:    ld 3, 152(1) # 8-byte Folded Reload
340; CHECK-NEXT:    stxv 2, 0(3)
341; CHECK-NEXT:    ld 3, 160(1) # 8-byte Folded Reload
342; CHECK-NEXT:    stxv 1, 0(3)
343; CHECK-NEXT:    ld 3, 168(1) # 8-byte Folded Reload
344; CHECK-NEXT:    stxv 0, 0(3)
345; CHECK-NEXT:    ld 3, 176(1) # 8-byte Folded Reload
346; CHECK-NEXT:    stxv 40, 0(3)
347; CHECK-NEXT:    ld 3, 184(1) # 8-byte Folded Reload
348; CHECK-NEXT:    stxv 41, 0(3)
349; CHECK-NEXT:    ld 3, 192(1) # 8-byte Folded Reload
350; CHECK-NEXT:    stxv 43, 0(3)
351; CHECK-NEXT:    ld 3, 200(1) # 8-byte Folded Reload
352; CHECK-NEXT:    stxv 42, 0(3)
353; CHECK-NEXT:    addi 1, 1, 592
354; CHECK-NEXT:    blr
355entry:
356  %_val_l_ = load i32, ptr %.l, align 4
357  %_conv = sext i32 %_val_l_ to i64
358  %_mult_tmp = shl nsw i64 %_conv, 3
359  %_sub_tmp4 = sub nuw nsw i64 -8, %_mult_tmp
360  %_val_n_ = load i32, ptr %.n, align 4
361  %_leq_tmp.not116 = icmp slt i32 %_val_n_, 1
362  br i1 %_leq_tmp.not116, label %_return_bb, label %_loop_1_do_.lr.ph
363
364_loop_1_do_.lr.ph:                                ; preds = %entry
365  %_val_m_ = load i32, ptr %.m, align 4
366  %_leq_tmp6.not114 = icmp slt i32 %_val_m_, 1
367  br i1 %_leq_tmp6.not114, label %_return_bb, label %_loop_1_do_.preheader
368
369_loop_1_do_.preheader:                            ; preds = %_loop_1_do_.lr.ph
370  %x_rvo_based_addr_112 = getelementptr inbounds [0 x %_elem_type_of_x], ptr %.x, i64 0, i64 -1
371  %a_rvo_based_addr_ = getelementptr inbounds i8, ptr %.a, i64 %_sub_tmp4
372  %.vy01.promoted = load <2 x double>, ptr %.vy01, align 16
373  %.vy02.promoted = load <2 x double>, ptr %.vy02, align 16
374  %.vy03.promoted = load <2 x double>, ptr %.vy03, align 16
375  %.vy04.promoted = load <2 x double>, ptr %.vy04, align 16
376  %.vy05.promoted = load <2 x double>, ptr %.vy05, align 16
377  %.vy06.promoted = load <2 x double>, ptr %.vy06, align 16
378  %.vy07.promoted = load <2 x double>, ptr %.vy07, align 16
379  %.vy08.promoted = load <2 x double>, ptr %.vy08, align 16
380  %.vy09.promoted = load <2 x double>, ptr %.vy09, align 16
381  %.vy0a.promoted = load <2 x double>, ptr %.vy0a, align 16
382  %.vy0b.promoted = load <2 x double>, ptr %.vy0b, align 16
383  %.vy0c.promoted = load <2 x double>, ptr %.vy0c, align 16
384  %.vy21.promoted = load <2 x double>, ptr %.vy21, align 16
385  %.vy22.promoted = load <2 x double>, ptr %.vy22, align 16
386  %.vy23.promoted = load <2 x double>, ptr %.vy23, align 16
387  %.vy24.promoted = load <2 x double>, ptr %.vy24, align 16
388  %.vy25.promoted = load <2 x double>, ptr %.vy25, align 16
389  %.vy26.promoted = load <2 x double>, ptr %.vy26, align 16
390  %.vy27.promoted = load <2 x double>, ptr %.vy27, align 16
391  %.vy28.promoted = load <2 x double>, ptr %.vy28, align 16
392  %.vy29.promoted = load <2 x double>, ptr %.vy29, align 16
393  %.vy2a.promoted = load <2 x double>, ptr %.vy2a, align 16
394  %.vy2b.promoted = load <2 x double>, ptr %.vy2b, align 16
395  %.vy2c.promoted = load <2 x double>, ptr %.vy2c, align 16
396  %i = zext i32 %_val_m_ to i64
397  %i1 = zext i32 %_val_n_ to i64
398  br label %_loop_2_do_.lr.ph
399
400_loop_2_do_.lr.ph:                                ; preds = %_loop_2_endl_, %_loop_1_do_.preheader
401  %indvars.iv212 = phi i64 [ %indvars.iv.next213, %_loop_2_endl_ ], [ 1, %_loop_1_do_.preheader ]
402  %i2 = phi <2 x double> [ %i142, %_loop_2_endl_ ], [ %.vy2c.promoted, %_loop_1_do_.preheader ]
403  %i3 = phi <2 x double> [ %i140, %_loop_2_endl_ ], [ %.vy2b.promoted, %_loop_1_do_.preheader ]
404  %i4 = phi <2 x double> [ %i138, %_loop_2_endl_ ], [ %.vy2a.promoted, %_loop_1_do_.preheader ]
405  %i5 = phi <2 x double> [ %i136, %_loop_2_endl_ ], [ %.vy29.promoted, %_loop_1_do_.preheader ]
406  %i6 = phi <2 x double> [ %i134, %_loop_2_endl_ ], [ %.vy28.promoted, %_loop_1_do_.preheader ]
407  %i7 = phi <2 x double> [ %i132, %_loop_2_endl_ ], [ %.vy27.promoted, %_loop_1_do_.preheader ]
408  %i8 = phi <2 x double> [ %i129, %_loop_2_endl_ ], [ %.vy26.promoted, %_loop_1_do_.preheader ]
409  %i9 = phi <2 x double> [ %i127, %_loop_2_endl_ ], [ %.vy25.promoted, %_loop_1_do_.preheader ]
410  %i10 = phi <2 x double> [ %i125, %_loop_2_endl_ ], [ %.vy24.promoted, %_loop_1_do_.preheader ]
411  %i11 = phi <2 x double> [ %i123, %_loop_2_endl_ ], [ %.vy23.promoted, %_loop_1_do_.preheader ]
412  %i12 = phi <2 x double> [ %i121, %_loop_2_endl_ ], [ %.vy22.promoted, %_loop_1_do_.preheader ]
413  %i13 = phi <2 x double> [ %i119, %_loop_2_endl_ ], [ %.vy21.promoted, %_loop_1_do_.preheader ]
414  %i14 = phi <2 x double> [ %i116, %_loop_2_endl_ ], [ %.vy0c.promoted, %_loop_1_do_.preheader ]
415  %i15 = phi <2 x double> [ %i114, %_loop_2_endl_ ], [ %.vy0b.promoted, %_loop_1_do_.preheader ]
416  %i16 = phi <2 x double> [ %i112, %_loop_2_endl_ ], [ %.vy0a.promoted, %_loop_1_do_.preheader ]
417  %i17 = phi <2 x double> [ %i110, %_loop_2_endl_ ], [ %.vy09.promoted, %_loop_1_do_.preheader ]
418  %i18 = phi <2 x double> [ %i108, %_loop_2_endl_ ], [ %.vy08.promoted, %_loop_1_do_.preheader ]
419  %i19 = phi <2 x double> [ %i106, %_loop_2_endl_ ], [ %.vy07.promoted, %_loop_1_do_.preheader ]
420  %i20 = phi <2 x double> [ %i81, %_loop_2_endl_ ], [ %.vy06.promoted, %_loop_1_do_.preheader ]
421  %i21 = phi <2 x double> [ %i79, %_loop_2_endl_ ], [ %.vy05.promoted, %_loop_1_do_.preheader ]
422  %i22 = phi <2 x double> [ %i77, %_loop_2_endl_ ], [ %.vy04.promoted, %_loop_1_do_.preheader ]
423  %i23 = phi <2 x double> [ %i75, %_loop_2_endl_ ], [ %.vy03.promoted, %_loop_1_do_.preheader ]
424  %i24 = phi <2 x double> [ %i73, %_loop_2_endl_ ], [ %.vy02.promoted, %_loop_1_do_.preheader ]
425  %i25 = phi <2 x double> [ %i71, %_loop_2_endl_ ], [ %.vy01.promoted, %_loop_1_do_.preheader ]
426  %_ix_x_len10 = mul i64 %_mult_tmp, %indvars.iv212
427  %a_ix_dim_0_ = getelementptr inbounds i8, ptr %a_rvo_based_addr_, i64 %_ix_x_len10
428  %i26 = add nuw nsw i64 %indvars.iv212, 1
429  %_ix_x_len24 = mul i64 %_mult_tmp, %i26
430  %a_ix_dim_0_25 = getelementptr inbounds i8, ptr %a_rvo_based_addr_, i64 %_ix_x_len24
431  %i27 = add nuw nsw i64 %indvars.iv212, 2
432  %_ix_x_len40 = mul i64 %_mult_tmp, %i27
433  %a_ix_dim_0_41 = getelementptr inbounds i8, ptr %a_rvo_based_addr_, i64 %_ix_x_len40
434  %i28 = add nuw nsw i64 %indvars.iv212, 3
435  %_ix_x_len56 = mul i64 %_mult_tmp, %i28
436  %a_ix_dim_0_57 = getelementptr inbounds i8, ptr %a_rvo_based_addr_, i64 %_ix_x_len56
437  %i29 = add nuw nsw i64 %indvars.iv212, 4
438  %_ix_x_len72 = mul i64 %_mult_tmp, %i29
439  %a_ix_dim_0_73 = getelementptr inbounds i8, ptr %a_rvo_based_addr_, i64 %_ix_x_len72
440  %i30 = add nuw nsw i64 %indvars.iv212, 5
441  %_ix_x_len88 = mul i64 %_mult_tmp, %i30
442  %a_ix_dim_0_89 = getelementptr inbounds i8, ptr %a_rvo_based_addr_, i64 %_ix_x_len88
443  br label %_loop_2_do_
444
445_loop_2_do_:                                      ; preds = %_loop_2_do_, %_loop_2_do_.lr.ph
446  %indvars.iv = phi i64 [ 1, %_loop_2_do_.lr.ph ], [ %indvars.iv.next, %_loop_2_do_ ]
447  %i31 = phi <2 x double> [ %i2, %_loop_2_do_.lr.ph ], [ %i142, %_loop_2_do_ ]
448  %i32 = phi <2 x double> [ %i3, %_loop_2_do_.lr.ph ], [ %i140, %_loop_2_do_ ]
449  %i33 = phi <2 x double> [ %i4, %_loop_2_do_.lr.ph ], [ %i138, %_loop_2_do_ ]
450  %i34 = phi <2 x double> [ %i5, %_loop_2_do_.lr.ph ], [ %i136, %_loop_2_do_ ]
451  %i35 = phi <2 x double> [ %i6, %_loop_2_do_.lr.ph ], [ %i134, %_loop_2_do_ ]
452  %i36 = phi <2 x double> [ %i7, %_loop_2_do_.lr.ph ], [ %i132, %_loop_2_do_ ]
453  %i37 = phi <2 x double> [ %i8, %_loop_2_do_.lr.ph ], [ %i129, %_loop_2_do_ ]
454  %i38 = phi <2 x double> [ %i9, %_loop_2_do_.lr.ph ], [ %i127, %_loop_2_do_ ]
455  %i39 = phi <2 x double> [ %i10, %_loop_2_do_.lr.ph ], [ %i125, %_loop_2_do_ ]
456  %i40 = phi <2 x double> [ %i11, %_loop_2_do_.lr.ph ], [ %i123, %_loop_2_do_ ]
457  %i41 = phi <2 x double> [ %i12, %_loop_2_do_.lr.ph ], [ %i121, %_loop_2_do_ ]
458  %i42 = phi <2 x double> [ %i13, %_loop_2_do_.lr.ph ], [ %i119, %_loop_2_do_ ]
459  %i43 = phi <2 x double> [ %i14, %_loop_2_do_.lr.ph ], [ %i116, %_loop_2_do_ ]
460  %i44 = phi <2 x double> [ %i15, %_loop_2_do_.lr.ph ], [ %i114, %_loop_2_do_ ]
461  %i45 = phi <2 x double> [ %i16, %_loop_2_do_.lr.ph ], [ %i112, %_loop_2_do_ ]
462  %i46 = phi <2 x double> [ %i17, %_loop_2_do_.lr.ph ], [ %i110, %_loop_2_do_ ]
463  %i47 = phi <2 x double> [ %i18, %_loop_2_do_.lr.ph ], [ %i108, %_loop_2_do_ ]
464  %i48 = phi <2 x double> [ %i19, %_loop_2_do_.lr.ph ], [ %i106, %_loop_2_do_ ]
465  %i49 = phi <2 x double> [ %i20, %_loop_2_do_.lr.ph ], [ %i81, %_loop_2_do_ ]
466  %i50 = phi <2 x double> [ %i21, %_loop_2_do_.lr.ph ], [ %i79, %_loop_2_do_ ]
467  %i51 = phi <2 x double> [ %i22, %_loop_2_do_.lr.ph ], [ %i77, %_loop_2_do_ ]
468  %i52 = phi <2 x double> [ %i23, %_loop_2_do_.lr.ph ], [ %i75, %_loop_2_do_ ]
469  %i53 = phi <2 x double> [ %i24, %_loop_2_do_.lr.ph ], [ %i73, %_loop_2_do_ ]
470  %i54 = phi <2 x double> [ %i25, %_loop_2_do_.lr.ph ], [ %i71, %_loop_2_do_ ]
471  %_ix_x_len = shl nuw nsw i64 %indvars.iv, 3
472  %x_ix_dim_0_113 = getelementptr inbounds %_elem_type_of_x, ptr %x_rvo_based_addr_112, i64 %indvars.iv
473  %i55 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr nonnull %x_ix_dim_0_113)
474  %a_ix_dim_1_ = getelementptr inbounds i8, ptr %a_ix_dim_0_, i64 %_ix_x_len
475  %i56 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr nonnull %a_ix_dim_1_)
476  %a_ix_dim_1_29 = getelementptr inbounds i8, ptr %a_ix_dim_0_25, i64 %_ix_x_len
477  %i57 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr nonnull %a_ix_dim_1_29)
478  %a_ix_dim_1_45 = getelementptr inbounds i8, ptr %a_ix_dim_0_41, i64 %_ix_x_len
479  %i58 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr nonnull %a_ix_dim_1_45)
480  %a_ix_dim_1_61 = getelementptr inbounds i8, ptr %a_ix_dim_0_57, i64 %_ix_x_len
481  %i59 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr nonnull %a_ix_dim_1_61)
482  %a_ix_dim_1_77 = getelementptr inbounds i8, ptr %a_ix_dim_0_73, i64 %_ix_x_len
483  %i60 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr nonnull %a_ix_dim_1_77)
484  %a_ix_dim_1_93 = getelementptr inbounds i8, ptr %a_ix_dim_0_89, i64 %_ix_x_len
485  %i61 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr nonnull %a_ix_dim_1_93)
486  %i62 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %i55)
487  %.fca.0.extract35 = extractvalue { <16 x i8>, <16 x i8> } %i62, 0
488  %.fca.1.extract36 = extractvalue { <16 x i8>, <16 x i8> } %i62, 1
489  %i63 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %i56)
490  %.fca.0.extract29 = extractvalue { <16 x i8>, <16 x i8> } %i63, 0
491  %.fca.1.extract30 = extractvalue { <16 x i8>, <16 x i8> } %i63, 1
492  %i64 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %i57)
493  %.fca.0.extract23 = extractvalue { <16 x i8>, <16 x i8> } %i64, 0
494  %.fca.1.extract24 = extractvalue { <16 x i8>, <16 x i8> } %i64, 1
495  %i65 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %i58)
496  %.fca.0.extract17 = extractvalue { <16 x i8>, <16 x i8> } %i65, 0
497  %.fca.1.extract18 = extractvalue { <16 x i8>, <16 x i8> } %i65, 1
498  %i66 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %i59)
499  %.fca.0.extract11 = extractvalue { <16 x i8>, <16 x i8> } %i66, 0
500  %.fca.1.extract12 = extractvalue { <16 x i8>, <16 x i8> } %i66, 1
501  %i67 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %i60)
502  %.fca.0.extract5 = extractvalue { <16 x i8>, <16 x i8> } %i67, 0
503  %.fca.1.extract6 = extractvalue { <16 x i8>, <16 x i8> } %i67, 1
504  %i68 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %i61)
505  %.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %i68, 0
506  %.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %i68, 1
507  %i69 = bitcast <16 x i8> %.fca.0.extract29 to <2 x double>
508  %i70 = bitcast <16 x i8> %.fca.0.extract35 to <2 x double>
509  %i71 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i69, <2 x double> %i70, <2 x double> %i54)
510  %i72 = bitcast <16 x i8> %.fca.0.extract23 to <2 x double>
511  %i73 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i72, <2 x double> %i70, <2 x double> %i53)
512  %i74 = bitcast <16 x i8> %.fca.0.extract17 to <2 x double>
513  %i75 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i74, <2 x double> %i70, <2 x double> %i52)
514  %i76 = bitcast <16 x i8> %.fca.0.extract11 to <2 x double>
515  %i77 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i76, <2 x double> %i70, <2 x double> %i51)
516  %i78 = bitcast <16 x i8> %.fca.0.extract5 to <2 x double>
517  %i79 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i78, <2 x double> %i70, <2 x double> %i50)
518  %i80 = bitcast <16 x i8> %.fca.0.extract to <2 x double>
519  %i81 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i80, <2 x double> %i70, <2 x double> %i49)
520  %i82 = getelementptr %_elem_type_of_x, ptr %x_ix_dim_0_113, i64 4
521  %i84 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %i82)
522  %i85 = getelementptr i8, ptr %a_ix_dim_1_, i64 32
523  %i86 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %i85)
524  %i87 = getelementptr i8, ptr %a_ix_dim_1_29, i64 32
525  %i88 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %i87)
526  %i89 = getelementptr i8, ptr %a_ix_dim_1_45, i64 32
527  %i90 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %i89)
528  %i91 = getelementptr i8, ptr %a_ix_dim_1_61, i64 32
529  %i92 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %i91)
530  %i93 = getelementptr i8, ptr %a_ix_dim_1_77, i64 32
531  %i94 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %i93)
532  %i95 = getelementptr i8, ptr %a_ix_dim_1_93, i64 32
533  %i96 = tail call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %i95)
534  %i97 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %i84)
535  %.fca.0.extract37 = extractvalue { <16 x i8>, <16 x i8> } %i97, 0
536  %.fca.1.extract39 = extractvalue { <16 x i8>, <16 x i8> } %i97, 1
537  %i98 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %i86)
538  %.fca.0.extract31 = extractvalue { <16 x i8>, <16 x i8> } %i98, 0
539  %.fca.1.extract33 = extractvalue { <16 x i8>, <16 x i8> } %i98, 1
540  %i99 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %i88)
541  %.fca.0.extract25 = extractvalue { <16 x i8>, <16 x i8> } %i99, 0
542  %.fca.1.extract27 = extractvalue { <16 x i8>, <16 x i8> } %i99, 1
543  %i100 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %i90)
544  %.fca.0.extract19 = extractvalue { <16 x i8>, <16 x i8> } %i100, 0
545  %.fca.1.extract21 = extractvalue { <16 x i8>, <16 x i8> } %i100, 1
546  %i101 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %i92)
547  %.fca.0.extract13 = extractvalue { <16 x i8>, <16 x i8> } %i101, 0
548  %.fca.1.extract15 = extractvalue { <16 x i8>, <16 x i8> } %i101, 1
549  %i102 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %i94)
550  %.fca.0.extract7 = extractvalue { <16 x i8>, <16 x i8> } %i102, 0
551  %.fca.1.extract9 = extractvalue { <16 x i8>, <16 x i8> } %i102, 1
552  %i103 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %i96)
553  %.fca.0.extract1 = extractvalue { <16 x i8>, <16 x i8> } %i103, 0
554  %.fca.1.extract3 = extractvalue { <16 x i8>, <16 x i8> } %i103, 1
555  %i104 = bitcast <16 x i8> %.fca.1.extract30 to <2 x double>
556  %i105 = bitcast <16 x i8> %.fca.1.extract36 to <2 x double>
557  %i106 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i104, <2 x double> %i105, <2 x double> %i48)
558  %i107 = bitcast <16 x i8> %.fca.1.extract24 to <2 x double>
559  %i108 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i107, <2 x double> %i105, <2 x double> %i47)
560  %i109 = bitcast <16 x i8> %.fca.1.extract18 to <2 x double>
561  %i110 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i109, <2 x double> %i105, <2 x double> %i46)
562  %i111 = bitcast <16 x i8> %.fca.1.extract12 to <2 x double>
563  %i112 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i111, <2 x double> %i105, <2 x double> %i45)
564  %i113 = bitcast <16 x i8> %.fca.1.extract6 to <2 x double>
565  %i114 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i113, <2 x double> %i105, <2 x double> %i44)
566  %i115 = bitcast <16 x i8> %.fca.1.extract to <2 x double>
567  %i116 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i115, <2 x double> %i105, <2 x double> %i43)
568  %i117 = bitcast <16 x i8> %.fca.0.extract31 to <2 x double>
569  %i118 = bitcast <16 x i8> %.fca.0.extract37 to <2 x double>
570  %i119 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i117, <2 x double> %i118, <2 x double> %i42)
571  %i120 = bitcast <16 x i8> %.fca.0.extract25 to <2 x double>
572  %i121 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i120, <2 x double> %i118, <2 x double> %i41)
573  %i122 = bitcast <16 x i8> %.fca.0.extract19 to <2 x double>
574  %i123 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i122, <2 x double> %i118, <2 x double> %i40)
575  %i124 = bitcast <16 x i8> %.fca.0.extract13 to <2 x double>
576  %i125 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i124, <2 x double> %i118, <2 x double> %i39)
577  %i126 = bitcast <16 x i8> %.fca.0.extract7 to <2 x double>
578  %i127 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i126, <2 x double> %i118, <2 x double> %i38)
579  %i128 = bitcast <16 x i8> %.fca.0.extract1 to <2 x double>
580  %i129 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i128, <2 x double> %i118, <2 x double> %i37)
581  %i130 = bitcast <16 x i8> %.fca.1.extract33 to <2 x double>
582  %i131 = bitcast <16 x i8> %.fca.1.extract39 to <2 x double>
583  %i132 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i130, <2 x double> %i131, <2 x double> %i36)
584  %i133 = bitcast <16 x i8> %.fca.1.extract27 to <2 x double>
585  %i134 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i133, <2 x double> %i131, <2 x double> %i35)
586  %i135 = bitcast <16 x i8> %.fca.1.extract21 to <2 x double>
587  %i136 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i135, <2 x double> %i131, <2 x double> %i34)
588  %i137 = bitcast <16 x i8> %.fca.1.extract15 to <2 x double>
589  %i138 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i137, <2 x double> %i131, <2 x double> %i33)
590  %i139 = bitcast <16 x i8> %.fca.1.extract9 to <2 x double>
591  %i140 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i139, <2 x double> %i131, <2 x double> %i32)
592  %i141 = bitcast <16 x i8> %.fca.1.extract3 to <2 x double>
593  %i142 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i141, <2 x double> %i131, <2 x double> %i31)
594  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 8
595  %_leq_tmp6.not = icmp ugt i64 %indvars.iv.next, %i
596  br i1 %_leq_tmp6.not, label %_loop_2_endl_, label %_loop_2_do_
597
598_loop_2_endl_:                                    ; preds = %_loop_2_do_
599  %indvars.iv.next213 = add nuw nsw i64 %indvars.iv212, 6
600  %_leq_tmp.not = icmp ugt i64 %indvars.iv.next213, %i1
601  br i1 %_leq_tmp.not, label %_loop_1_loopHeader_._return_bb_crit_edge.loopexit, label %_loop_2_do_.lr.ph
602
603_loop_1_loopHeader_._return_bb_crit_edge.loopexit: ; preds = %_loop_2_endl_
604  store <2 x double> %i71, ptr %.vy01, align 16
605  store <2 x double> %i73, ptr %.vy02, align 16
606  store <2 x double> %i75, ptr %.vy03, align 16
607  store <2 x double> %i77, ptr %.vy04, align 16
608  store <2 x double> %i79, ptr %.vy05, align 16
609  store <2 x double> %i81, ptr %.vy06, align 16
610  store <2 x double> %i106, ptr %.vy07, align 16
611  store <2 x double> %i108, ptr %.vy08, align 16
612  store <2 x double> %i110, ptr %.vy09, align 16
613  store <2 x double> %i112, ptr %.vy0a, align 16
614  store <2 x double> %i114, ptr %.vy0b, align 16
615  store <2 x double> %i116, ptr %.vy0c, align 16
616  store <2 x double> %i119, ptr %.vy21, align 16
617  store <2 x double> %i121, ptr %.vy22, align 16
618  store <2 x double> %i123, ptr %.vy23, align 16
619  store <2 x double> %i125, ptr %.vy24, align 16
620  store <2 x double> %i127, ptr %.vy25, align 16
621  store <2 x double> %i129, ptr %.vy26, align 16
622  store <2 x double> %i132, ptr %.vy27, align 16
623  store <2 x double> %i134, ptr %.vy28, align 16
624  store <2 x double> %i136, ptr %.vy29, align 16
625  store <2 x double> %i138, ptr %.vy2a, align 16
626  store <2 x double> %i140, ptr %.vy2b, align 16
627  store <2 x double> %i142, ptr %.vy2c, align 16
628  br label %_return_bb
629
630_return_bb:                                       ; preds = %_loop_1_loopHeader_._return_bb_crit_edge.loopexit, %_loop_1_do_.lr.ph, %entry
631  ret void
632}
633
634declare <256 x i1> @llvm.ppc.vsx.lxvp(ptr)
635declare { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1>)
636declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
637
638