xref: /llvm-project/llvm/test/CodeGen/SystemZ/fp-move-02.ll (revision 0a76f7d9d8c1fc693568ed26420c47d92a6ba0e7)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; Test moves between FPRs and GPRs.  The 32-bit cases test the z10
3; implementation, which has no high-word support.
4;
5; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
6
7declare i64 @foo()
8declare double @bar()
9@dptr = external global double
10@iptr = external global i64
11
12; Test 32-bit moves from GPRs to FPRs.  The GPR must be moved into the high
13; 32 bits of the FPR.
14define float @f1(i32 %a) {
15; CHECK-LABEL: f1:
16; CHECK:       # %bb.0:
17; CHECK-NEXT:    # kill: def $r2l killed $r2l def $r2d
18; CHECK-NEXT:    sllg %r0, %r2, 32
19; CHECK-NEXT:    ldgr %f0, %r0
20; CHECK-NEXT:    # kill: def $f0s killed $f0s killed $f0d
21; CHECK-NEXT:    br %r14
22  %res = bitcast i32 %a to float
23  ret float %res
24}
25
26; Like f1, but create a situation where the shift can be folded with
27; surrounding code.
28define float @f2(i64 %big) {
29; CHECK-LABEL: f2:
30; CHECK:       # %bb.0:
31; CHECK-NEXT:    risbg %r0, %r2, 0, 159, 31
32; CHECK-NEXT:    ldgr %f0, %r0
33; CHECK-NEXT:    # kill: def $f0s killed $f0s killed $f0d
34; CHECK-NEXT:    br %r14
35  %shift = lshr i64 %big, 1
36  %a = trunc i64 %shift to i32
37  %res = bitcast i32 %a to float
38  ret float %res
39}
40
41; Another example of the same thing.
42define float @f3(i64 %big) {
43; CHECK-LABEL: f3:
44; CHECK:       # %bb.0:
45; CHECK-NEXT:    risbg %r0, %r2, 0, 159, 2
46; CHECK-NEXT:    ldgr %f0, %r0
47; CHECK-NEXT:    # kill: def $f0s killed $f0s killed $f0d
48; CHECK-NEXT:    br %r14
49  %shift = ashr i64 %big, 30
50  %a = trunc i64 %shift to i32
51  %res = bitcast i32 %a to float
52  ret float %res
53}
54
55; Like f1, but the value to transfer is already in the high 32 bits.
56define float @f4(i64 %big) {
57; CHECK-LABEL: f4:
58; CHECK:       # %bb.0:
59; CHECK-NEXT:    nilf %r2, 0
60; CHECK-NEXT:    ldgr %f0, %r2
61; CHECK-NEXT:    # kill: def $f0s killed $f0s killed $f0d
62; CHECK-NEXT:    br %r14
63  %shift = ashr i64 %big, 32
64  %a = trunc i64 %shift to i32
65  %res = bitcast i32 %a to float
66  ret float %res
67}
68
69; Test 64-bit moves from GPRs to FPRs.
70define double @f5(i64 %a) {
71; CHECK-LABEL: f5:
72; CHECK:       # %bb.0:
73; CHECK-NEXT:    ldgr %f0, %r2
74; CHECK-NEXT:    br %r14
75  %res = bitcast i64 %a to double
76  ret double %res
77}
78
79; Test 128-bit moves from GPRs to FPRs.  i128 isn't a legitimate type,
80; so this goes through memory.
81define void @f6(ptr %a, ptr %b) {
82; CHECK-LABEL: f6:
83; CHECK:       # %bb.0:
84; CHECK-NEXT:    lg %r0, 8(%r3)
85; CHECK-NEXT:    lg %r1, 0(%r3)
86; CHECK-NEXT:    stg %r0, 8(%r2)
87; CHECK-NEXT:    stg %r1, 0(%r2)
88; CHECK-NEXT:    br %r14
89  %val = load i128, ptr %b
90  %res = bitcast i128 %val to fp128
91  store fp128 %res, ptr %a
92  ret void
93}
94
95; Test 32-bit moves from FPRs to GPRs.  The high 32 bits of the FPR should
96; be moved into the low 32 bits of the GPR.
97define i32 @f7(float %a) {
98; CHECK-LABEL: f7:
99; CHECK:       # %bb.0:
100; CHECK-NEXT:    # kill: def $f0s killed $f0s def $f0d
101; CHECK-NEXT:    lgdr %r0, %f0
102; CHECK-NEXT:    srlg %r2, %r0, 32
103; CHECK-NEXT:    # kill: def $r2l killed $r2l killed $r2d
104; CHECK-NEXT:    br %r14
105  %res = bitcast float %a to i32
106  ret i32 %res
107}
108
109; Test 64-bit moves from FPRs to GPRs.
110define i64 @f8(double %a) {
111; CHECK-LABEL: f8:
112; CHECK:       # %bb.0:
113; CHECK-NEXT:    lgdr %r2, %f0
114; CHECK-NEXT:    br %r14
115  %res = bitcast double %a to i64
116  ret i64 %res
117}
118
119; Test 128-bit moves from FPRs to GPRs, with the same restriction as f6.
120define void @f9(ptr %a, ptr %b) {
121; CHECK-LABEL: f9:
122; CHECK:       # %bb.0:
123; CHECK-NEXT:    ld %f0, 0(%r2)
124; CHECK-NEXT:    ld %f2, 8(%r2)
125; CHECK-NEXT:    std %f0, 0(%r3)
126; CHECK-NEXT:    std %f2, 8(%r3)
127; CHECK-NEXT:    br %r14
128  %val = load fp128, ptr %a
129  %res = bitcast fp128 %val to i128
130  store i128 %res, ptr %b
131  ret void
132}
133
134; Test cases where the destination of an LGDR needs to be spilled.
135; We shouldn't have any integer stack stores or floating-point loads.
136define void @f10(double %extra) {
137; CHECK-LABEL: f10:
138; CHECK:       # %bb.0: # %entry
139; CHECK-NEXT:    stmg %r6, %r15, 48(%r15)
140; CHECK-NEXT:    .cfi_offset %r6, -112
141; CHECK-NEXT:    .cfi_offset %r7, -104
142; CHECK-NEXT:    .cfi_offset %r8, -96
143; CHECK-NEXT:    .cfi_offset %r9, -88
144; CHECK-NEXT:    .cfi_offset %r10, -80
145; CHECK-NEXT:    .cfi_offset %r11, -72
146; CHECK-NEXT:    .cfi_offset %r12, -64
147; CHECK-NEXT:    .cfi_offset %r13, -56
148; CHECK-NEXT:    .cfi_offset %r14, -48
149; CHECK-NEXT:    .cfi_offset %r15, -40
150; CHECK-NEXT:    aghi %r15, -184
151; CHECK-NEXT:    .cfi_def_cfa_offset 344
152; CHECK-NEXT:    lgrl %r1, dptr@GOT
153; CHECK-NEXT:    ldr %f1, %f0
154; CHECK-NEXT:    adb %f1, 0(%r1)
155; CHECK-NEXT:    ldr %f2, %f0
156; CHECK-NEXT:    adb %f2, 0(%r1)
157; CHECK-NEXT:    ldr %f3, %f0
158; CHECK-NEXT:    adb %f3, 0(%r1)
159; CHECK-NEXT:    std %f1, 176(%r15) # 8-byte Folded Spill
160; CHECK-NEXT:    std %f2, 168(%r15) # 8-byte Folded Spill
161; CHECK-NEXT:    std %f3, 160(%r15) # 8-byte Folded Spill
162; CHECK-NEXT:    ldr %f1, %f0
163; CHECK-NEXT:    adb %f1, 0(%r1)
164; CHECK-NEXT:    ldr %f2, %f0
165; CHECK-NEXT:    adb %f2, 0(%r1)
166; CHECK-NEXT:    ldr %f3, %f0
167; CHECK-NEXT:    adb %f3, 0(%r1)
168; CHECK-NEXT:    ldr %f4, %f0
169; CHECK-NEXT:    adb %f4, 0(%r1)
170; CHECK-NEXT:    lgdr %r10, %f1
171; CHECK-NEXT:    lgdr %r9, %f2
172; CHECK-NEXT:    lgdr %r8, %f3
173; CHECK-NEXT:    lgdr %r7, %f4
174; CHECK-NEXT:    ldr %f1, %f0
175; CHECK-NEXT:    adb %f1, 0(%r1)
176; CHECK-NEXT:    ldr %f2, %f0
177; CHECK-NEXT:    adb %f2, 0(%r1)
178; CHECK-NEXT:    adb %f0, 0(%r1)
179; CHECK-NEXT:    lgrl %r6, iptr@GOT
180; CHECK-NEXT:    lgdr %r13, %f1
181; CHECK-NEXT:    lgdr %r12, %f2
182; CHECK-NEXT:    lgdr %r11, %f0
183; CHECK-NEXT:  .LBB9_1: # %loop
184; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
185; CHECK-NEXT:    brasl %r14, foo@PLT
186; CHECK-NEXT:    lgr %r0, %r2
187; CHECK-NEXT:    og %r0, 176(%r15) # 8-byte Folded Reload
188; CHECK-NEXT:    og %r0, 168(%r15) # 8-byte Folded Reload
189; CHECK-NEXT:    og %r0, 160(%r15) # 8-byte Folded Reload
190; CHECK-NEXT:    ogr %r0, %r10
191; CHECK-NEXT:    ogr %r0, %r9
192; CHECK-NEXT:    ogr %r0, %r8
193; CHECK-NEXT:    ogr %r0, %r7
194; CHECK-NEXT:    ogr %r0, %r13
195; CHECK-NEXT:    ogr %r0, %r12
196; CHECK-NEXT:    ogr %r0, %r11
197; CHECK-NEXT:    stg %r0, 0(%r6)
198; CHECK-NEXT:    cgijlh %r2, 1, .LBB9_1
199; CHECK-NEXT:  # %bb.2: # %exit
200; CHECK-NEXT:    lmg %r6, %r15, 232(%r15)
201; CHECK-NEXT:    br %r14
202entry:
203  %double0 = load volatile double, ptr@dptr
204  %biased0 = fadd double %double0, %extra
205  %int0 = bitcast double %biased0 to i64
206  %double1 = load volatile double, ptr@dptr
207  %biased1 = fadd double %double1, %extra
208  %int1 = bitcast double %biased1 to i64
209  %double2 = load volatile double, ptr@dptr
210  %biased2 = fadd double %double2, %extra
211  %int2 = bitcast double %biased2 to i64
212  %double3 = load volatile double, ptr@dptr
213  %biased3 = fadd double %double3, %extra
214  %int3 = bitcast double %biased3 to i64
215  %double4 = load volatile double, ptr@dptr
216  %biased4 = fadd double %double4, %extra
217  %int4 = bitcast double %biased4 to i64
218  %double5 = load volatile double, ptr@dptr
219  %biased5 = fadd double %double5, %extra
220  %int5 = bitcast double %biased5 to i64
221  %double6 = load volatile double, ptr@dptr
222  %biased6 = fadd double %double6, %extra
223  %int6 = bitcast double %biased6 to i64
224  %double7 = load volatile double, ptr@dptr
225  %biased7 = fadd double %double7, %extra
226  %int7 = bitcast double %biased7 to i64
227  %double8 = load volatile double, ptr@dptr
228  %biased8 = fadd double %double8, %extra
229  %int8 = bitcast double %biased8 to i64
230  %double9 = load volatile double, ptr@dptr
231  %biased9 = fadd double %double9, %extra
232  %int9 = bitcast double %biased9 to i64
233  br label %loop
234
235loop:
236  %start = call i64 @foo()
237  %or0 = or i64 %start, %int0
238  %or1 = or i64 %or0, %int1
239  %or2 = or i64 %or1, %int2
240  %or3 = or i64 %or2, %int3
241  %or4 = or i64 %or3, %int4
242  %or5 = or i64 %or4, %int5
243  %or6 = or i64 %or5, %int6
244  %or7 = or i64 %or6, %int7
245  %or8 = or i64 %or7, %int8
246  %or9 = or i64 %or8, %int9
247  store i64 %or9, ptr@iptr
248  %cont = icmp ne i64 %start, 1
249  br i1 %cont, label %loop, label %exit
250
251exit:
252  ret void
253}
254
255; ...likewise LDGR, with the requirements the other way around.
256define void @f11(i64 %mask) {
257; CHECK-LABEL: f11:
258; CHECK:       # %bb.0: # %entry
259; CHECK-NEXT:    stmg %r12, %r15, 96(%r15)
260; CHECK-NEXT:    .cfi_offset %r12, -64
261; CHECK-NEXT:    .cfi_offset %r13, -56
262; CHECK-NEXT:    .cfi_offset %r14, -48
263; CHECK-NEXT:    .cfi_offset %r15, -40
264; CHECK-NEXT:    aghi %r15, -240
265; CHECK-NEXT:    .cfi_def_cfa_offset 400
266; CHECK-NEXT:    std %f8, 232(%r15) # 8-byte Folded Spill
267; CHECK-NEXT:    std %f9, 224(%r15) # 8-byte Folded Spill
268; CHECK-NEXT:    std %f10, 216(%r15) # 8-byte Folded Spill
269; CHECK-NEXT:    std %f11, 208(%r15) # 8-byte Folded Spill
270; CHECK-NEXT:    std %f12, 200(%r15) # 8-byte Folded Spill
271; CHECK-NEXT:    std %f13, 192(%r15) # 8-byte Folded Spill
272; CHECK-NEXT:    std %f14, 184(%r15) # 8-byte Folded Spill
273; CHECK-NEXT:    std %f15, 176(%r15) # 8-byte Folded Spill
274; CHECK-NEXT:    .cfi_offset %f8, -168
275; CHECK-NEXT:    .cfi_offset %f9, -176
276; CHECK-NEXT:    .cfi_offset %f10, -184
277; CHECK-NEXT:    .cfi_offset %f11, -192
278; CHECK-NEXT:    .cfi_offset %f12, -200
279; CHECK-NEXT:    .cfi_offset %f13, -208
280; CHECK-NEXT:    .cfi_offset %f14, -216
281; CHECK-NEXT:    .cfi_offset %f15, -224
282; CHECK-NEXT:    lgrl %r1, iptr@GOT
283; CHECK-NEXT:    lgr %r0, %r2
284; CHECK-NEXT:    ng %r0, 0(%r1)
285; CHECK-NEXT:    lgr %r3, %r2
286; CHECK-NEXT:    ng %r3, 0(%r1)
287; CHECK-NEXT:    lgr %r4, %r2
288; CHECK-NEXT:    ng %r4, 0(%r1)
289; CHECK-NEXT:    stg %r0, 168(%r15) # 8-byte Folded Spill
290; CHECK-NEXT:    stg %r3, 160(%r15) # 8-byte Folded Spill
291; CHECK-NEXT:    lgr %r0, %r2
292; CHECK-NEXT:    ng %r0, 0(%r1)
293; CHECK-NEXT:    ldgr %f10, %r4
294; CHECK-NEXT:    lgr %r3, %r2
295; CHECK-NEXT:    ng %r3, 0(%r1)
296; CHECK-NEXT:    lgr %r4, %r2
297; CHECK-NEXT:    ng %r4, 0(%r1)
298; CHECK-NEXT:    ldgr %f11, %r0
299; CHECK-NEXT:    lgr %r0, %r2
300; CHECK-NEXT:    ng %r0, 0(%r1)
301; CHECK-NEXT:    ldgr %f12, %r3
302; CHECK-NEXT:    ldgr %f13, %r4
303; CHECK-NEXT:    lgr %r3, %r2
304; CHECK-NEXT:    ng %r3, 0(%r1)
305; CHECK-NEXT:    ldgr %f14, %r0
306; CHECK-NEXT:    lgr %r0, %r2
307; CHECK-NEXT:    ng %r0, 0(%r1)
308; CHECK-NEXT:    ng %r2, 0(%r1)
309; CHECK-NEXT:    ldgr %f15, %r3
310; CHECK-NEXT:    lgrl %r13, dptr@GOT
311; CHECK-NEXT:    ldgr %f8, %r0
312; CHECK-NEXT:    ldgr %f9, %r2
313; CHECK-NEXT:    larl %r12, .LCPI10_0
314; CHECK-NEXT:  .LBB10_1: # %loop
315; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
316; CHECK-NEXT:    brasl %r14, bar@PLT
317; CHECK-NEXT:    ldr %f1, %f0
318; CHECK-NEXT:    adb %f1, 168(%r15) # 8-byte Folded Reload
319; CHECK-NEXT:    adb %f1, 160(%r15) # 8-byte Folded Reload
320; CHECK-NEXT:    adbr %f1, %f10
321; CHECK-NEXT:    adbr %f1, %f11
322; CHECK-NEXT:    adbr %f1, %f12
323; CHECK-NEXT:    adbr %f1, %f13
324; CHECK-NEXT:    adbr %f1, %f14
325; CHECK-NEXT:    adbr %f1, %f15
326; CHECK-NEXT:    adbr %f1, %f8
327; CHECK-NEXT:    adbr %f1, %f9
328; CHECK-NEXT:    cdb %f0, 0(%r12)
329; CHECK-NEXT:    std %f1, 0(%r13)
330; CHECK-NEXT:    jlh .LBB10_1
331; CHECK-NEXT:  # %bb.2: # %exit
332; CHECK-NEXT:    ld %f8, 232(%r15) # 8-byte Folded Reload
333; CHECK-NEXT:    ld %f9, 224(%r15) # 8-byte Folded Reload
334; CHECK-NEXT:    ld %f10, 216(%r15) # 8-byte Folded Reload
335; CHECK-NEXT:    ld %f11, 208(%r15) # 8-byte Folded Reload
336; CHECK-NEXT:    ld %f12, 200(%r15) # 8-byte Folded Reload
337; CHECK-NEXT:    ld %f13, 192(%r15) # 8-byte Folded Reload
338; CHECK-NEXT:    ld %f14, 184(%r15) # 8-byte Folded Reload
339; CHECK-NEXT:    ld %f15, 176(%r15) # 8-byte Folded Reload
340; CHECK-NEXT:    lmg %r12, %r15, 336(%r15)
341; CHECK-NEXT:    br %r14
342entry:
343  %int0 = load volatile i64, ptr@iptr
344  %masked0 = and i64 %int0, %mask
345  %double0 = bitcast i64 %masked0 to double
346  %int1 = load volatile i64, ptr@iptr
347  %masked1 = and i64 %int1, %mask
348  %double1 = bitcast i64 %masked1 to double
349  %int2 = load volatile i64, ptr@iptr
350  %masked2 = and i64 %int2, %mask
351  %double2 = bitcast i64 %masked2 to double
352  %int3 = load volatile i64, ptr@iptr
353  %masked3 = and i64 %int3, %mask
354  %double3 = bitcast i64 %masked3 to double
355  %int4 = load volatile i64, ptr@iptr
356  %masked4 = and i64 %int4, %mask
357  %double4 = bitcast i64 %masked4 to double
358  %int5 = load volatile i64, ptr@iptr
359  %masked5 = and i64 %int5, %mask
360  %double5 = bitcast i64 %masked5 to double
361  %int6 = load volatile i64, ptr@iptr
362  %masked6 = and i64 %int6, %mask
363  %double6 = bitcast i64 %masked6 to double
364  %int7 = load volatile i64, ptr@iptr
365  %masked7 = and i64 %int7, %mask
366  %double7 = bitcast i64 %masked7 to double
367  %int8 = load volatile i64, ptr@iptr
368  %masked8 = and i64 %int8, %mask
369  %double8 = bitcast i64 %masked8 to double
370  %int9 = load volatile i64, ptr@iptr
371  %masked9 = and i64 %int9, %mask
372  %double9 = bitcast i64 %masked9 to double
373  br label %loop
374
375loop:
376  %start = call double @bar()
377  %add0 = fadd double %start, %double0
378  %add1 = fadd double %add0, %double1
379  %add2 = fadd double %add1, %double2
380  %add3 = fadd double %add2, %double3
381  %add4 = fadd double %add3, %double4
382  %add5 = fadd double %add4, %double5
383  %add6 = fadd double %add5, %double6
384  %add7 = fadd double %add6, %double7
385  %add8 = fadd double %add7, %double8
386  %add9 = fadd double %add8, %double9
387  store double %add9, ptr@dptr
388  %cont = fcmp one double %start, 1.0
389  br i1 %cont, label %loop, label %exit
390
391exit:
392  ret void
393}
394
395; Test cases where the source of an LDGR needs to be spilled.
396; We shouldn't have any integer stack stores or floating-point loads.
397define void @f12() {
398; CHECK-LABEL: f12:
399; CHECK:       # %bb.0: # %entry
400; CHECK-NEXT:    stmg %r6, %r15, 48(%r15)
401; CHECK-NEXT:    .cfi_offset %r6, -112
402; CHECK-NEXT:    .cfi_offset %r7, -104
403; CHECK-NEXT:    .cfi_offset %r8, -96
404; CHECK-NEXT:    .cfi_offset %r9, -88
405; CHECK-NEXT:    .cfi_offset %r10, -80
406; CHECK-NEXT:    .cfi_offset %r11, -72
407; CHECK-NEXT:    .cfi_offset %r12, -64
408; CHECK-NEXT:    .cfi_offset %r13, -56
409; CHECK-NEXT:    .cfi_offset %r14, -48
410; CHECK-NEXT:    .cfi_offset %r15, -40
411; CHECK-NEXT:    aghi %r15, -176
412; CHECK-NEXT:    .cfi_def_cfa_offset 336
413; CHECK-NEXT:    lghi %r12, 0
414; CHECK-NEXT:    lghi %r13, 0
415; CHECK-NEXT:    lghi %r6, 0
416; CHECK-NEXT:    lghi %r7, 0
417; CHECK-NEXT:    lghi %r8, 0
418; CHECK-NEXT:    lghi %r9, 0
419; CHECK-NEXT:    lghi %r10, 0
420; CHECK-NEXT:    lghi %r11, 0
421; CHECK-NEXT:    mvghi 160(%r15), 0 # 8-byte Folded Spill
422; CHECK-NEXT:    mvghi 168(%r15), 0 # 8-byte Folded Spill
423; CHECK-NEXT:  .LBB11_1: # %loop
424; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
425; CHECK-NEXT:    brasl %r14, foo@PLT
426; CHECK-NEXT:    agr %r12, %r2
427; CHECK-NEXT:    agr %r13, %r2
428; CHECK-NEXT:    agr %r6, %r2
429; CHECK-NEXT:    agr %r7, %r2
430; CHECK-NEXT:    agr %r8, %r2
431; CHECK-NEXT:    agr %r9, %r2
432; CHECK-NEXT:    agr %r10, %r2
433; CHECK-NEXT:    agr %r11, %r2
434; CHECK-NEXT:    lg %r0, 160(%r15) # 8-byte Folded Reload
435; CHECK-NEXT:    agr %r0, %r2
436; CHECK-NEXT:    stg %r0, 160(%r15) # 8-byte Folded Spill
437; CHECK-NEXT:    lg %r0, 168(%r15) # 8-byte Folded Reload
438; CHECK-NEXT:    agr %r0, %r2
439; CHECK-NEXT:    stg %r0, 168(%r15) # 8-byte Folded Spill
440; CHECK-NEXT:    cgijlh %r2, 1, .LBB11_1
441; CHECK-NEXT:  # %bb.2: # %exit
442; CHECK-NEXT:    brasl %r14, foo@PLT
443; CHECK-NEXT:    lgrl %r1, dptr@GOT
444; CHECK-NEXT:    ld %f0, 0(%r1)
445; CHECK-NEXT:    ldgr %f1, %r12
446; CHECK-NEXT:    mdbr %f1, %f0
447; CHECK-NEXT:    std %f1, 0(%r1)
448; CHECK-NEXT:    ldgr %f1, %r13
449; CHECK-NEXT:    mdbr %f1, %f0
450; CHECK-NEXT:    std %f1, 0(%r1)
451; CHECK-NEXT:    ldgr %f1, %r6
452; CHECK-NEXT:    mdbr %f1, %f0
453; CHECK-NEXT:    std %f1, 0(%r1)
454; CHECK-NEXT:    ldgr %f1, %r7
455; CHECK-NEXT:    mdbr %f1, %f0
456; CHECK-NEXT:    std %f1, 0(%r1)
457; CHECK-NEXT:    ldgr %f1, %r8
458; CHECK-NEXT:    mdbr %f1, %f0
459; CHECK-NEXT:    std %f1, 0(%r1)
460; CHECK-NEXT:    ldgr %f1, %r9
461; CHECK-NEXT:    mdbr %f1, %f0
462; CHECK-NEXT:    std %f1, 0(%r1)
463; CHECK-NEXT:    ldgr %f1, %r10
464; CHECK-NEXT:    mdbr %f1, %f0
465; CHECK-NEXT:    std %f1, 0(%r1)
466; CHECK-NEXT:    ldgr %f1, %r11
467; CHECK-NEXT:    mdbr %f1, %f0
468; CHECK-NEXT:    std %f1, 0(%r1)
469; CHECK-NEXT:    ld %f1, 160(%r15) # 8-byte Folded Reload
470; CHECK-NEXT:    mdbr %f1, %f0
471; CHECK-NEXT:    std %f1, 0(%r1)
472; CHECK-NEXT:    ld %f1, 168(%r15) # 8-byte Folded Reload
473; CHECK-NEXT:    mdbr %f1, %f0
474; CHECK-NEXT:    std %f1, 0(%r1)
475; CHECK-NEXT:    brasl %r14, foo@PLT
476; CHECK-NEXT:    lmg %r6, %r15, 224(%r15)
477; CHECK-NEXT:    br %r14
478entry:
479  br label %loop
480
481loop:
482  %int0 = phi i64 [ 0, %entry ], [ %add0, %loop ]
483  %int1 = phi i64 [ 0, %entry ], [ %add1, %loop ]
484  %int2 = phi i64 [ 0, %entry ], [ %add2, %loop ]
485  %int3 = phi i64 [ 0, %entry ], [ %add3, %loop ]
486  %int4 = phi i64 [ 0, %entry ], [ %add4, %loop ]
487  %int5 = phi i64 [ 0, %entry ], [ %add5, %loop ]
488  %int6 = phi i64 [ 0, %entry ], [ %add6, %loop ]
489  %int7 = phi i64 [ 0, %entry ], [ %add7, %loop ]
490  %int8 = phi i64 [ 0, %entry ], [ %add8, %loop ]
491  %int9 = phi i64 [ 0, %entry ], [ %add9, %loop ]
492
493  %bias = call i64 @foo()
494  %add0 = add i64 %int0, %bias
495  %add1 = add i64 %int1, %bias
496  %add2 = add i64 %int2, %bias
497  %add3 = add i64 %int3, %bias
498  %add4 = add i64 %int4, %bias
499  %add5 = add i64 %int5, %bias
500  %add6 = add i64 %int6, %bias
501  %add7 = add i64 %int7, %bias
502  %add8 = add i64 %int8, %bias
503  %add9 = add i64 %int9, %bias
504  %cont = icmp ne i64 %bias, 1
505  br i1 %cont, label %loop, label %exit
506
507exit:
508  %unused1 = call i64 @foo()
509  %factor = load volatile double, ptr@dptr
510
511  %conv0 = bitcast i64 %add0 to double
512  %mul0 = fmul double %conv0, %factor
513  store volatile double %mul0, ptr@dptr
514  %conv1 = bitcast i64 %add1 to double
515  %mul1 = fmul double %conv1, %factor
516  store volatile double %mul1, ptr@dptr
517  %conv2 = bitcast i64 %add2 to double
518  %mul2 = fmul double %conv2, %factor
519  store volatile double %mul2, ptr@dptr
520  %conv3 = bitcast i64 %add3 to double
521  %mul3 = fmul double %conv3, %factor
522  store volatile double %mul3, ptr@dptr
523  %conv4 = bitcast i64 %add4 to double
524  %mul4 = fmul double %conv4, %factor
525  store volatile double %mul4, ptr@dptr
526  %conv5 = bitcast i64 %add5 to double
527  %mul5 = fmul double %conv5, %factor
528  store volatile double %mul5, ptr@dptr
529  %conv6 = bitcast i64 %add6 to double
530  %mul6 = fmul double %conv6, %factor
531  store volatile double %mul6, ptr@dptr
532  %conv7 = bitcast i64 %add7 to double
533  %mul7 = fmul double %conv7, %factor
534  store volatile double %mul7, ptr@dptr
535  %conv8 = bitcast i64 %add8 to double
536  %mul8 = fmul double %conv8, %factor
537  store volatile double %mul8, ptr@dptr
538  %conv9 = bitcast i64 %add9 to double
539  %mul9 = fmul double %conv9, %factor
540  store volatile double %mul9, ptr@dptr
541
542  %unused2 = call i64 @foo()
543
544  ret void
545}
546
547; ...likewise LGDR, with the requirements the other way around.
548define void @f13() {
549; CHECK-LABEL: f13:
550; CHECK:       # %bb.0: # %entry
551; CHECK-NEXT:    stmg %r13, %r15, 104(%r15)
552; CHECK-NEXT:    .cfi_offset %r13, -56
553; CHECK-NEXT:    .cfi_offset %r14, -48
554; CHECK-NEXT:    .cfi_offset %r15, -40
555; CHECK-NEXT:    aghi %r15, -240
556; CHECK-NEXT:    .cfi_def_cfa_offset 400
557; CHECK-NEXT:    std %f8, 232(%r15) # 8-byte Folded Spill
558; CHECK-NEXT:    std %f9, 224(%r15) # 8-byte Folded Spill
559; CHECK-NEXT:    std %f10, 216(%r15) # 8-byte Folded Spill
560; CHECK-NEXT:    std %f11, 208(%r15) # 8-byte Folded Spill
561; CHECK-NEXT:    std %f12, 200(%r15) # 8-byte Folded Spill
562; CHECK-NEXT:    std %f13, 192(%r15) # 8-byte Folded Spill
563; CHECK-NEXT:    std %f14, 184(%r15) # 8-byte Folded Spill
564; CHECK-NEXT:    std %f15, 176(%r15) # 8-byte Folded Spill
565; CHECK-NEXT:    .cfi_offset %f8, -168
566; CHECK-NEXT:    .cfi_offset %f9, -176
567; CHECK-NEXT:    .cfi_offset %f10, -184
568; CHECK-NEXT:    .cfi_offset %f11, -192
569; CHECK-NEXT:    .cfi_offset %f12, -200
570; CHECK-NEXT:    .cfi_offset %f13, -208
571; CHECK-NEXT:    .cfi_offset %f14, -216
572; CHECK-NEXT:    .cfi_offset %f15, -224
573; CHECK-NEXT:    larl %r13, .LCPI12_0
574; CHECK-NEXT:    ld %f8, 0(%r13)
575; CHECK-NEXT:    ldr %f9, %f8
576; CHECK-NEXT:    ldr %f15, %f8
577; CHECK-NEXT:    ldr %f14, %f8
578; CHECK-NEXT:    ldr %f13, %f8
579; CHECK-NEXT:    ldr %f12, %f8
580; CHECK-NEXT:    ldr %f11, %f8
581; CHECK-NEXT:    ldr %f10, %f8
582; CHECK-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
583; CHECK-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
584; CHECK-NEXT:  .LBB12_1: # %loop
585; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
586; CHECK-NEXT:    brasl %r14, bar@PLT
587; CHECK-NEXT:    mdbr %f8, %f0
588; CHECK-NEXT:    mdbr %f9, %f0
589; CHECK-NEXT:    mdbr %f15, %f0
590; CHECK-NEXT:    mdbr %f14, %f0
591; CHECK-NEXT:    mdbr %f13, %f0
592; CHECK-NEXT:    mdbr %f12, %f0
593; CHECK-NEXT:    cdb %f0, 0(%r13)
594; CHECK-NEXT:    mdbr %f11, %f0
595; CHECK-NEXT:    mdbr %f10, %f0
596; CHECK-NEXT:    ld %f1, 160(%r15) # 8-byte Folded Reload
597; CHECK-NEXT:    mdbr %f1, %f0
598; CHECK-NEXT:    std %f1, 160(%r15) # 8-byte Folded Spill
599; CHECK-NEXT:    ld %f1, 168(%r15) # 8-byte Folded Reload
600; CHECK-NEXT:    mdbr %f1, %f0
601; CHECK-NEXT:    std %f1, 168(%r15) # 8-byte Folded Spill
602; CHECK-NEXT:    jlh .LBB12_1
603; CHECK-NEXT:  # %bb.2: # %exit
604; CHECK-NEXT:    brasl %r14, foo@PLT
605; CHECK-NEXT:    lgrl %r1, iptr@GOT
606; CHECK-NEXT:    lg %r0, 0(%r1)
607; CHECK-NEXT:    lgdr %r2, %f8
608; CHECK-NEXT:    agr %r2, %r0
609; CHECK-NEXT:    stg %r2, 0(%r1)
610; CHECK-NEXT:    lgdr %r2, %f9
611; CHECK-NEXT:    agr %r2, %r0
612; CHECK-NEXT:    stg %r2, 0(%r1)
613; CHECK-NEXT:    lgdr %r2, %f15
614; CHECK-NEXT:    agr %r2, %r0
615; CHECK-NEXT:    stg %r2, 0(%r1)
616; CHECK-NEXT:    lgdr %r2, %f14
617; CHECK-NEXT:    agr %r2, %r0
618; CHECK-NEXT:    stg %r2, 0(%r1)
619; CHECK-NEXT:    lgdr %r2, %f13
620; CHECK-NEXT:    agr %r2, %r0
621; CHECK-NEXT:    stg %r2, 0(%r1)
622; CHECK-NEXT:    lgdr %r2, %f12
623; CHECK-NEXT:    agr %r2, %r0
624; CHECK-NEXT:    stg %r2, 0(%r1)
625; CHECK-NEXT:    lgdr %r2, %f11
626; CHECK-NEXT:    agr %r2, %r0
627; CHECK-NEXT:    stg %r2, 0(%r1)
628; CHECK-NEXT:    lgdr %r2, %f10
629; CHECK-NEXT:    agr %r2, %r0
630; CHECK-NEXT:    stg %r2, 0(%r1)
631; CHECK-NEXT:    lg %r2, 160(%r15) # 8-byte Folded Reload
632; CHECK-NEXT:    agr %r2, %r0
633; CHECK-NEXT:    stg %r2, 0(%r1)
634; CHECK-NEXT:    lg %r2, 168(%r15) # 8-byte Folded Reload
635; CHECK-NEXT:    agr %r2, %r0
636; CHECK-NEXT:    stg %r2, 0(%r1)
637; CHECK-NEXT:    brasl %r14, foo@PLT
638; CHECK-NEXT:    ld %f8, 232(%r15) # 8-byte Folded Reload
639; CHECK-NEXT:    ld %f9, 224(%r15) # 8-byte Folded Reload
640; CHECK-NEXT:    ld %f10, 216(%r15) # 8-byte Folded Reload
641; CHECK-NEXT:    ld %f11, 208(%r15) # 8-byte Folded Reload
642; CHECK-NEXT:    ld %f12, 200(%r15) # 8-byte Folded Reload
643; CHECK-NEXT:    ld %f13, 192(%r15) # 8-byte Folded Reload
644; CHECK-NEXT:    ld %f14, 184(%r15) # 8-byte Folded Reload
645; CHECK-NEXT:    ld %f15, 176(%r15) # 8-byte Folded Reload
646; CHECK-NEXT:    lmg %r13, %r15, 344(%r15)
647; CHECK-NEXT:    br %r14
648entry:
649  br label %loop
650
651loop:
652  %double0 = phi double [ 1.0, %entry ], [ %mul0, %loop ]
653  %double1 = phi double [ 1.0, %entry ], [ %mul1, %loop ]
654  %double2 = phi double [ 1.0, %entry ], [ %mul2, %loop ]
655  %double3 = phi double [ 1.0, %entry ], [ %mul3, %loop ]
656  %double4 = phi double [ 1.0, %entry ], [ %mul4, %loop ]
657  %double5 = phi double [ 1.0, %entry ], [ %mul5, %loop ]
658  %double6 = phi double [ 1.0, %entry ], [ %mul6, %loop ]
659  %double7 = phi double [ 1.0, %entry ], [ %mul7, %loop ]
660  %double8 = phi double [ 1.0, %entry ], [ %mul8, %loop ]
661  %double9 = phi double [ 1.0, %entry ], [ %mul9, %loop ]
662
663  %factor = call double @bar()
664  %mul0 = fmul double %double0, %factor
665  %mul1 = fmul double %double1, %factor
666  %mul2 = fmul double %double2, %factor
667  %mul3 = fmul double %double3, %factor
668  %mul4 = fmul double %double4, %factor
669  %mul5 = fmul double %double5, %factor
670  %mul6 = fmul double %double6, %factor
671  %mul7 = fmul double %double7, %factor
672  %mul8 = fmul double %double8, %factor
673  %mul9 = fmul double %double9, %factor
674  %cont = fcmp one double %factor, 1.0
675  br i1 %cont, label %loop, label %exit
676
677exit:
678  %unused1 = call i64 @foo()
679  %bias = load volatile i64, ptr@iptr
680
681  %conv0 = bitcast double %mul0 to i64
682  %add0 = add i64 %conv0, %bias
683  store volatile i64 %add0, ptr@iptr
684  %conv1 = bitcast double %mul1 to i64
685  %add1 = add i64 %conv1, %bias
686  store volatile i64 %add1, ptr@iptr
687  %conv2 = bitcast double %mul2 to i64
688  %add2 = add i64 %conv2, %bias
689  store volatile i64 %add2, ptr@iptr
690  %conv3 = bitcast double %mul3 to i64
691  %add3 = add i64 %conv3, %bias
692  store volatile i64 %add3, ptr@iptr
693  %conv4 = bitcast double %mul4 to i64
694  %add4 = add i64 %conv4, %bias
695  store volatile i64 %add4, ptr@iptr
696  %conv5 = bitcast double %mul5 to i64
697  %add5 = add i64 %conv5, %bias
698  store volatile i64 %add5, ptr@iptr
699  %conv6 = bitcast double %mul6 to i64
700  %add6 = add i64 %conv6, %bias
701  store volatile i64 %add6, ptr@iptr
702  %conv7 = bitcast double %mul7 to i64
703  %add7 = add i64 %conv7, %bias
704  store volatile i64 %add7, ptr@iptr
705  %conv8 = bitcast double %mul8 to i64
706  %add8 = add i64 %conv8, %bias
707  store volatile i64 %add8, ptr@iptr
708  %conv9 = bitcast double %mul9 to i64
709  %add9 = add i64 %conv9, %bias
710  store volatile i64 %add9, ptr@iptr
711
712  %unused2 = call i64 @foo()
713
714  ret void
715}
716