1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; Test moves between FPRs and GPRs. The 32-bit cases test the z10 3; implementation, which has no high-word support. 4; 5; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s 6 7declare i64 @foo() 8declare double @bar() 9@dptr = external global double 10@iptr = external global i64 11 12; Test 32-bit moves from GPRs to FPRs. The GPR must be moved into the high 13; 32 bits of the FPR. 14define float @f1(i32 %a) { 15; CHECK-LABEL: f1: 16; CHECK: # %bb.0: 17; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d 18; CHECK-NEXT: sllg %r0, %r2, 32 19; CHECK-NEXT: ldgr %f0, %r0 20; CHECK-NEXT: # kill: def $f0s killed $f0s killed $f0d 21; CHECK-NEXT: br %r14 22 %res = bitcast i32 %a to float 23 ret float %res 24} 25 26; Like f1, but create a situation where the shift can be folded with 27; surrounding code. 28define float @f2(i64 %big) { 29; CHECK-LABEL: f2: 30; CHECK: # %bb.0: 31; CHECK-NEXT: risbg %r0, %r2, 0, 159, 31 32; CHECK-NEXT: ldgr %f0, %r0 33; CHECK-NEXT: # kill: def $f0s killed $f0s killed $f0d 34; CHECK-NEXT: br %r14 35 %shift = lshr i64 %big, 1 36 %a = trunc i64 %shift to i32 37 %res = bitcast i32 %a to float 38 ret float %res 39} 40 41; Another example of the same thing. 42define float @f3(i64 %big) { 43; CHECK-LABEL: f3: 44; CHECK: # %bb.0: 45; CHECK-NEXT: risbg %r0, %r2, 0, 159, 2 46; CHECK-NEXT: ldgr %f0, %r0 47; CHECK-NEXT: # kill: def $f0s killed $f0s killed $f0d 48; CHECK-NEXT: br %r14 49 %shift = ashr i64 %big, 30 50 %a = trunc i64 %shift to i32 51 %res = bitcast i32 %a to float 52 ret float %res 53} 54 55; Like f1, but the value to transfer is already in the high 32 bits. 56define float @f4(i64 %big) { 57; CHECK-LABEL: f4: 58; CHECK: # %bb.0: 59; CHECK-NEXT: nilf %r2, 0 60; CHECK-NEXT: ldgr %f0, %r2 61; CHECK-NEXT: # kill: def $f0s killed $f0s killed $f0d 62; CHECK-NEXT: br %r14 63 %shift = ashr i64 %big, 32 64 %a = trunc i64 %shift to i32 65 %res = bitcast i32 %a to float 66 ret float %res 67} 68 69; Test 64-bit moves from GPRs to FPRs. 70define double @f5(i64 %a) { 71; CHECK-LABEL: f5: 72; CHECK: # %bb.0: 73; CHECK-NEXT: ldgr %f0, %r2 74; CHECK-NEXT: br %r14 75 %res = bitcast i64 %a to double 76 ret double %res 77} 78 79; Test 128-bit moves from GPRs to FPRs. i128 isn't a legitimate type, 80; so this goes through memory. 81define void @f6(ptr %a, ptr %b) { 82; CHECK-LABEL: f6: 83; CHECK: # %bb.0: 84; CHECK-NEXT: lg %r0, 8(%r3) 85; CHECK-NEXT: lg %r1, 0(%r3) 86; CHECK-NEXT: stg %r0, 8(%r2) 87; CHECK-NEXT: stg %r1, 0(%r2) 88; CHECK-NEXT: br %r14 89 %val = load i128, ptr %b 90 %res = bitcast i128 %val to fp128 91 store fp128 %res, ptr %a 92 ret void 93} 94 95; Test 32-bit moves from FPRs to GPRs. The high 32 bits of the FPR should 96; be moved into the low 32 bits of the GPR. 97define i32 @f7(float %a) { 98; CHECK-LABEL: f7: 99; CHECK: # %bb.0: 100; CHECK-NEXT: # kill: def $f0s killed $f0s def $f0d 101; CHECK-NEXT: lgdr %r0, %f0 102; CHECK-NEXT: srlg %r2, %r0, 32 103; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d 104; CHECK-NEXT: br %r14 105 %res = bitcast float %a to i32 106 ret i32 %res 107} 108 109; Test 64-bit moves from FPRs to GPRs. 110define i64 @f8(double %a) { 111; CHECK-LABEL: f8: 112; CHECK: # %bb.0: 113; CHECK-NEXT: lgdr %r2, %f0 114; CHECK-NEXT: br %r14 115 %res = bitcast double %a to i64 116 ret i64 %res 117} 118 119; Test 128-bit moves from FPRs to GPRs, with the same restriction as f6. 120define void @f9(ptr %a, ptr %b) { 121; CHECK-LABEL: f9: 122; CHECK: # %bb.0: 123; CHECK-NEXT: ld %f0, 0(%r2) 124; CHECK-NEXT: ld %f2, 8(%r2) 125; CHECK-NEXT: std %f0, 0(%r3) 126; CHECK-NEXT: std %f2, 8(%r3) 127; CHECK-NEXT: br %r14 128 %val = load fp128, ptr %a 129 %res = bitcast fp128 %val to i128 130 store i128 %res, ptr %b 131 ret void 132} 133 134; Test cases where the destination of an LGDR needs to be spilled. 135; We shouldn't have any integer stack stores or floating-point loads. 136define void @f10(double %extra) { 137; CHECK-LABEL: f10: 138; CHECK: # %bb.0: # %entry 139; CHECK-NEXT: stmg %r6, %r15, 48(%r15) 140; CHECK-NEXT: .cfi_offset %r6, -112 141; CHECK-NEXT: .cfi_offset %r7, -104 142; CHECK-NEXT: .cfi_offset %r8, -96 143; CHECK-NEXT: .cfi_offset %r9, -88 144; CHECK-NEXT: .cfi_offset %r10, -80 145; CHECK-NEXT: .cfi_offset %r11, -72 146; CHECK-NEXT: .cfi_offset %r12, -64 147; CHECK-NEXT: .cfi_offset %r13, -56 148; CHECK-NEXT: .cfi_offset %r14, -48 149; CHECK-NEXT: .cfi_offset %r15, -40 150; CHECK-NEXT: aghi %r15, -184 151; CHECK-NEXT: .cfi_def_cfa_offset 344 152; CHECK-NEXT: lgrl %r1, dptr@GOT 153; CHECK-NEXT: ldr %f1, %f0 154; CHECK-NEXT: adb %f1, 0(%r1) 155; CHECK-NEXT: ldr %f2, %f0 156; CHECK-NEXT: adb %f2, 0(%r1) 157; CHECK-NEXT: ldr %f3, %f0 158; CHECK-NEXT: adb %f3, 0(%r1) 159; CHECK-NEXT: std %f1, 176(%r15) # 8-byte Folded Spill 160; CHECK-NEXT: std %f2, 168(%r15) # 8-byte Folded Spill 161; CHECK-NEXT: std %f3, 160(%r15) # 8-byte Folded Spill 162; CHECK-NEXT: ldr %f1, %f0 163; CHECK-NEXT: adb %f1, 0(%r1) 164; CHECK-NEXT: ldr %f2, %f0 165; CHECK-NEXT: adb %f2, 0(%r1) 166; CHECK-NEXT: ldr %f3, %f0 167; CHECK-NEXT: adb %f3, 0(%r1) 168; CHECK-NEXT: ldr %f4, %f0 169; CHECK-NEXT: adb %f4, 0(%r1) 170; CHECK-NEXT: lgdr %r10, %f1 171; CHECK-NEXT: lgdr %r9, %f2 172; CHECK-NEXT: lgdr %r8, %f3 173; CHECK-NEXT: lgdr %r7, %f4 174; CHECK-NEXT: ldr %f1, %f0 175; CHECK-NEXT: adb %f1, 0(%r1) 176; CHECK-NEXT: ldr %f2, %f0 177; CHECK-NEXT: adb %f2, 0(%r1) 178; CHECK-NEXT: adb %f0, 0(%r1) 179; CHECK-NEXT: lgrl %r6, iptr@GOT 180; CHECK-NEXT: lgdr %r13, %f1 181; CHECK-NEXT: lgdr %r12, %f2 182; CHECK-NEXT: lgdr %r11, %f0 183; CHECK-NEXT: .LBB9_1: # %loop 184; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 185; CHECK-NEXT: brasl %r14, foo@PLT 186; CHECK-NEXT: lgr %r0, %r2 187; CHECK-NEXT: og %r0, 176(%r15) # 8-byte Folded Reload 188; CHECK-NEXT: og %r0, 168(%r15) # 8-byte Folded Reload 189; CHECK-NEXT: og %r0, 160(%r15) # 8-byte Folded Reload 190; CHECK-NEXT: ogr %r0, %r10 191; CHECK-NEXT: ogr %r0, %r9 192; CHECK-NEXT: ogr %r0, %r8 193; CHECK-NEXT: ogr %r0, %r7 194; CHECK-NEXT: ogr %r0, %r13 195; CHECK-NEXT: ogr %r0, %r12 196; CHECK-NEXT: ogr %r0, %r11 197; CHECK-NEXT: stg %r0, 0(%r6) 198; CHECK-NEXT: cgijlh %r2, 1, .LBB9_1 199; CHECK-NEXT: # %bb.2: # %exit 200; CHECK-NEXT: lmg %r6, %r15, 232(%r15) 201; CHECK-NEXT: br %r14 202entry: 203 %double0 = load volatile double, ptr@dptr 204 %biased0 = fadd double %double0, %extra 205 %int0 = bitcast double %biased0 to i64 206 %double1 = load volatile double, ptr@dptr 207 %biased1 = fadd double %double1, %extra 208 %int1 = bitcast double %biased1 to i64 209 %double2 = load volatile double, ptr@dptr 210 %biased2 = fadd double %double2, %extra 211 %int2 = bitcast double %biased2 to i64 212 %double3 = load volatile double, ptr@dptr 213 %biased3 = fadd double %double3, %extra 214 %int3 = bitcast double %biased3 to i64 215 %double4 = load volatile double, ptr@dptr 216 %biased4 = fadd double %double4, %extra 217 %int4 = bitcast double %biased4 to i64 218 %double5 = load volatile double, ptr@dptr 219 %biased5 = fadd double %double5, %extra 220 %int5 = bitcast double %biased5 to i64 221 %double6 = load volatile double, ptr@dptr 222 %biased6 = fadd double %double6, %extra 223 %int6 = bitcast double %biased6 to i64 224 %double7 = load volatile double, ptr@dptr 225 %biased7 = fadd double %double7, %extra 226 %int7 = bitcast double %biased7 to i64 227 %double8 = load volatile double, ptr@dptr 228 %biased8 = fadd double %double8, %extra 229 %int8 = bitcast double %biased8 to i64 230 %double9 = load volatile double, ptr@dptr 231 %biased9 = fadd double %double9, %extra 232 %int9 = bitcast double %biased9 to i64 233 br label %loop 234 235loop: 236 %start = call i64 @foo() 237 %or0 = or i64 %start, %int0 238 %or1 = or i64 %or0, %int1 239 %or2 = or i64 %or1, %int2 240 %or3 = or i64 %or2, %int3 241 %or4 = or i64 %or3, %int4 242 %or5 = or i64 %or4, %int5 243 %or6 = or i64 %or5, %int6 244 %or7 = or i64 %or6, %int7 245 %or8 = or i64 %or7, %int8 246 %or9 = or i64 %or8, %int9 247 store i64 %or9, ptr@iptr 248 %cont = icmp ne i64 %start, 1 249 br i1 %cont, label %loop, label %exit 250 251exit: 252 ret void 253} 254 255; ...likewise LDGR, with the requirements the other way around. 256define void @f11(i64 %mask) { 257; CHECK-LABEL: f11: 258; CHECK: # %bb.0: # %entry 259; CHECK-NEXT: stmg %r12, %r15, 96(%r15) 260; CHECK-NEXT: .cfi_offset %r12, -64 261; CHECK-NEXT: .cfi_offset %r13, -56 262; CHECK-NEXT: .cfi_offset %r14, -48 263; CHECK-NEXT: .cfi_offset %r15, -40 264; CHECK-NEXT: aghi %r15, -240 265; CHECK-NEXT: .cfi_def_cfa_offset 400 266; CHECK-NEXT: std %f8, 232(%r15) # 8-byte Folded Spill 267; CHECK-NEXT: std %f9, 224(%r15) # 8-byte Folded Spill 268; CHECK-NEXT: std %f10, 216(%r15) # 8-byte Folded Spill 269; CHECK-NEXT: std %f11, 208(%r15) # 8-byte Folded Spill 270; CHECK-NEXT: std %f12, 200(%r15) # 8-byte Folded Spill 271; CHECK-NEXT: std %f13, 192(%r15) # 8-byte Folded Spill 272; CHECK-NEXT: std %f14, 184(%r15) # 8-byte Folded Spill 273; CHECK-NEXT: std %f15, 176(%r15) # 8-byte Folded Spill 274; CHECK-NEXT: .cfi_offset %f8, -168 275; CHECK-NEXT: .cfi_offset %f9, -176 276; CHECK-NEXT: .cfi_offset %f10, -184 277; CHECK-NEXT: .cfi_offset %f11, -192 278; CHECK-NEXT: .cfi_offset %f12, -200 279; CHECK-NEXT: .cfi_offset %f13, -208 280; CHECK-NEXT: .cfi_offset %f14, -216 281; CHECK-NEXT: .cfi_offset %f15, -224 282; CHECK-NEXT: lgrl %r1, iptr@GOT 283; CHECK-NEXT: lgr %r0, %r2 284; CHECK-NEXT: ng %r0, 0(%r1) 285; CHECK-NEXT: lgr %r3, %r2 286; CHECK-NEXT: ng %r3, 0(%r1) 287; CHECK-NEXT: lgr %r4, %r2 288; CHECK-NEXT: ng %r4, 0(%r1) 289; CHECK-NEXT: stg %r0, 168(%r15) # 8-byte Folded Spill 290; CHECK-NEXT: stg %r3, 160(%r15) # 8-byte Folded Spill 291; CHECK-NEXT: lgr %r0, %r2 292; CHECK-NEXT: ng %r0, 0(%r1) 293; CHECK-NEXT: ldgr %f10, %r4 294; CHECK-NEXT: lgr %r3, %r2 295; CHECK-NEXT: ng %r3, 0(%r1) 296; CHECK-NEXT: lgr %r4, %r2 297; CHECK-NEXT: ng %r4, 0(%r1) 298; CHECK-NEXT: ldgr %f11, %r0 299; CHECK-NEXT: lgr %r0, %r2 300; CHECK-NEXT: ng %r0, 0(%r1) 301; CHECK-NEXT: ldgr %f12, %r3 302; CHECK-NEXT: ldgr %f13, %r4 303; CHECK-NEXT: lgr %r3, %r2 304; CHECK-NEXT: ng %r3, 0(%r1) 305; CHECK-NEXT: ldgr %f14, %r0 306; CHECK-NEXT: lgr %r0, %r2 307; CHECK-NEXT: ng %r0, 0(%r1) 308; CHECK-NEXT: ng %r2, 0(%r1) 309; CHECK-NEXT: ldgr %f15, %r3 310; CHECK-NEXT: lgrl %r13, dptr@GOT 311; CHECK-NEXT: ldgr %f8, %r0 312; CHECK-NEXT: ldgr %f9, %r2 313; CHECK-NEXT: larl %r12, .LCPI10_0 314; CHECK-NEXT: .LBB10_1: # %loop 315; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 316; CHECK-NEXT: brasl %r14, bar@PLT 317; CHECK-NEXT: ldr %f1, %f0 318; CHECK-NEXT: adb %f1, 168(%r15) # 8-byte Folded Reload 319; CHECK-NEXT: adb %f1, 160(%r15) # 8-byte Folded Reload 320; CHECK-NEXT: adbr %f1, %f10 321; CHECK-NEXT: adbr %f1, %f11 322; CHECK-NEXT: adbr %f1, %f12 323; CHECK-NEXT: adbr %f1, %f13 324; CHECK-NEXT: adbr %f1, %f14 325; CHECK-NEXT: adbr %f1, %f15 326; CHECK-NEXT: adbr %f1, %f8 327; CHECK-NEXT: adbr %f1, %f9 328; CHECK-NEXT: cdb %f0, 0(%r12) 329; CHECK-NEXT: std %f1, 0(%r13) 330; CHECK-NEXT: jlh .LBB10_1 331; CHECK-NEXT: # %bb.2: # %exit 332; CHECK-NEXT: ld %f8, 232(%r15) # 8-byte Folded Reload 333; CHECK-NEXT: ld %f9, 224(%r15) # 8-byte Folded Reload 334; CHECK-NEXT: ld %f10, 216(%r15) # 8-byte Folded Reload 335; CHECK-NEXT: ld %f11, 208(%r15) # 8-byte Folded Reload 336; CHECK-NEXT: ld %f12, 200(%r15) # 8-byte Folded Reload 337; CHECK-NEXT: ld %f13, 192(%r15) # 8-byte Folded Reload 338; CHECK-NEXT: ld %f14, 184(%r15) # 8-byte Folded Reload 339; CHECK-NEXT: ld %f15, 176(%r15) # 8-byte Folded Reload 340; CHECK-NEXT: lmg %r12, %r15, 336(%r15) 341; CHECK-NEXT: br %r14 342entry: 343 %int0 = load volatile i64, ptr@iptr 344 %masked0 = and i64 %int0, %mask 345 %double0 = bitcast i64 %masked0 to double 346 %int1 = load volatile i64, ptr@iptr 347 %masked1 = and i64 %int1, %mask 348 %double1 = bitcast i64 %masked1 to double 349 %int2 = load volatile i64, ptr@iptr 350 %masked2 = and i64 %int2, %mask 351 %double2 = bitcast i64 %masked2 to double 352 %int3 = load volatile i64, ptr@iptr 353 %masked3 = and i64 %int3, %mask 354 %double3 = bitcast i64 %masked3 to double 355 %int4 = load volatile i64, ptr@iptr 356 %masked4 = and i64 %int4, %mask 357 %double4 = bitcast i64 %masked4 to double 358 %int5 = load volatile i64, ptr@iptr 359 %masked5 = and i64 %int5, %mask 360 %double5 = bitcast i64 %masked5 to double 361 %int6 = load volatile i64, ptr@iptr 362 %masked6 = and i64 %int6, %mask 363 %double6 = bitcast i64 %masked6 to double 364 %int7 = load volatile i64, ptr@iptr 365 %masked7 = and i64 %int7, %mask 366 %double7 = bitcast i64 %masked7 to double 367 %int8 = load volatile i64, ptr@iptr 368 %masked8 = and i64 %int8, %mask 369 %double8 = bitcast i64 %masked8 to double 370 %int9 = load volatile i64, ptr@iptr 371 %masked9 = and i64 %int9, %mask 372 %double9 = bitcast i64 %masked9 to double 373 br label %loop 374 375loop: 376 %start = call double @bar() 377 %add0 = fadd double %start, %double0 378 %add1 = fadd double %add0, %double1 379 %add2 = fadd double %add1, %double2 380 %add3 = fadd double %add2, %double3 381 %add4 = fadd double %add3, %double4 382 %add5 = fadd double %add4, %double5 383 %add6 = fadd double %add5, %double6 384 %add7 = fadd double %add6, %double7 385 %add8 = fadd double %add7, %double8 386 %add9 = fadd double %add8, %double9 387 store double %add9, ptr@dptr 388 %cont = fcmp one double %start, 1.0 389 br i1 %cont, label %loop, label %exit 390 391exit: 392 ret void 393} 394 395; Test cases where the source of an LDGR needs to be spilled. 396; We shouldn't have any integer stack stores or floating-point loads. 397define void @f12() { 398; CHECK-LABEL: f12: 399; CHECK: # %bb.0: # %entry 400; CHECK-NEXT: stmg %r6, %r15, 48(%r15) 401; CHECK-NEXT: .cfi_offset %r6, -112 402; CHECK-NEXT: .cfi_offset %r7, -104 403; CHECK-NEXT: .cfi_offset %r8, -96 404; CHECK-NEXT: .cfi_offset %r9, -88 405; CHECK-NEXT: .cfi_offset %r10, -80 406; CHECK-NEXT: .cfi_offset %r11, -72 407; CHECK-NEXT: .cfi_offset %r12, -64 408; CHECK-NEXT: .cfi_offset %r13, -56 409; CHECK-NEXT: .cfi_offset %r14, -48 410; CHECK-NEXT: .cfi_offset %r15, -40 411; CHECK-NEXT: aghi %r15, -176 412; CHECK-NEXT: .cfi_def_cfa_offset 336 413; CHECK-NEXT: lghi %r12, 0 414; CHECK-NEXT: lghi %r13, 0 415; CHECK-NEXT: lghi %r6, 0 416; CHECK-NEXT: lghi %r7, 0 417; CHECK-NEXT: lghi %r8, 0 418; CHECK-NEXT: lghi %r9, 0 419; CHECK-NEXT: lghi %r10, 0 420; CHECK-NEXT: lghi %r11, 0 421; CHECK-NEXT: mvghi 160(%r15), 0 # 8-byte Folded Spill 422; CHECK-NEXT: mvghi 168(%r15), 0 # 8-byte Folded Spill 423; CHECK-NEXT: .LBB11_1: # %loop 424; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 425; CHECK-NEXT: brasl %r14, foo@PLT 426; CHECK-NEXT: agr %r12, %r2 427; CHECK-NEXT: agr %r13, %r2 428; CHECK-NEXT: agr %r6, %r2 429; CHECK-NEXT: agr %r7, %r2 430; CHECK-NEXT: agr %r8, %r2 431; CHECK-NEXT: agr %r9, %r2 432; CHECK-NEXT: agr %r10, %r2 433; CHECK-NEXT: agr %r11, %r2 434; CHECK-NEXT: lg %r0, 160(%r15) # 8-byte Folded Reload 435; CHECK-NEXT: agr %r0, %r2 436; CHECK-NEXT: stg %r0, 160(%r15) # 8-byte Folded Spill 437; CHECK-NEXT: lg %r0, 168(%r15) # 8-byte Folded Reload 438; CHECK-NEXT: agr %r0, %r2 439; CHECK-NEXT: stg %r0, 168(%r15) # 8-byte Folded Spill 440; CHECK-NEXT: cgijlh %r2, 1, .LBB11_1 441; CHECK-NEXT: # %bb.2: # %exit 442; CHECK-NEXT: brasl %r14, foo@PLT 443; CHECK-NEXT: lgrl %r1, dptr@GOT 444; CHECK-NEXT: ld %f0, 0(%r1) 445; CHECK-NEXT: ldgr %f1, %r12 446; CHECK-NEXT: mdbr %f1, %f0 447; CHECK-NEXT: std %f1, 0(%r1) 448; CHECK-NEXT: ldgr %f1, %r13 449; CHECK-NEXT: mdbr %f1, %f0 450; CHECK-NEXT: std %f1, 0(%r1) 451; CHECK-NEXT: ldgr %f1, %r6 452; CHECK-NEXT: mdbr %f1, %f0 453; CHECK-NEXT: std %f1, 0(%r1) 454; CHECK-NEXT: ldgr %f1, %r7 455; CHECK-NEXT: mdbr %f1, %f0 456; CHECK-NEXT: std %f1, 0(%r1) 457; CHECK-NEXT: ldgr %f1, %r8 458; CHECK-NEXT: mdbr %f1, %f0 459; CHECK-NEXT: std %f1, 0(%r1) 460; CHECK-NEXT: ldgr %f1, %r9 461; CHECK-NEXT: mdbr %f1, %f0 462; CHECK-NEXT: std %f1, 0(%r1) 463; CHECK-NEXT: ldgr %f1, %r10 464; CHECK-NEXT: mdbr %f1, %f0 465; CHECK-NEXT: std %f1, 0(%r1) 466; CHECK-NEXT: ldgr %f1, %r11 467; CHECK-NEXT: mdbr %f1, %f0 468; CHECK-NEXT: std %f1, 0(%r1) 469; CHECK-NEXT: ld %f1, 160(%r15) # 8-byte Folded Reload 470; CHECK-NEXT: mdbr %f1, %f0 471; CHECK-NEXT: std %f1, 0(%r1) 472; CHECK-NEXT: ld %f1, 168(%r15) # 8-byte Folded Reload 473; CHECK-NEXT: mdbr %f1, %f0 474; CHECK-NEXT: std %f1, 0(%r1) 475; CHECK-NEXT: brasl %r14, foo@PLT 476; CHECK-NEXT: lmg %r6, %r15, 224(%r15) 477; CHECK-NEXT: br %r14 478entry: 479 br label %loop 480 481loop: 482 %int0 = phi i64 [ 0, %entry ], [ %add0, %loop ] 483 %int1 = phi i64 [ 0, %entry ], [ %add1, %loop ] 484 %int2 = phi i64 [ 0, %entry ], [ %add2, %loop ] 485 %int3 = phi i64 [ 0, %entry ], [ %add3, %loop ] 486 %int4 = phi i64 [ 0, %entry ], [ %add4, %loop ] 487 %int5 = phi i64 [ 0, %entry ], [ %add5, %loop ] 488 %int6 = phi i64 [ 0, %entry ], [ %add6, %loop ] 489 %int7 = phi i64 [ 0, %entry ], [ %add7, %loop ] 490 %int8 = phi i64 [ 0, %entry ], [ %add8, %loop ] 491 %int9 = phi i64 [ 0, %entry ], [ %add9, %loop ] 492 493 %bias = call i64 @foo() 494 %add0 = add i64 %int0, %bias 495 %add1 = add i64 %int1, %bias 496 %add2 = add i64 %int2, %bias 497 %add3 = add i64 %int3, %bias 498 %add4 = add i64 %int4, %bias 499 %add5 = add i64 %int5, %bias 500 %add6 = add i64 %int6, %bias 501 %add7 = add i64 %int7, %bias 502 %add8 = add i64 %int8, %bias 503 %add9 = add i64 %int9, %bias 504 %cont = icmp ne i64 %bias, 1 505 br i1 %cont, label %loop, label %exit 506 507exit: 508 %unused1 = call i64 @foo() 509 %factor = load volatile double, ptr@dptr 510 511 %conv0 = bitcast i64 %add0 to double 512 %mul0 = fmul double %conv0, %factor 513 store volatile double %mul0, ptr@dptr 514 %conv1 = bitcast i64 %add1 to double 515 %mul1 = fmul double %conv1, %factor 516 store volatile double %mul1, ptr@dptr 517 %conv2 = bitcast i64 %add2 to double 518 %mul2 = fmul double %conv2, %factor 519 store volatile double %mul2, ptr@dptr 520 %conv3 = bitcast i64 %add3 to double 521 %mul3 = fmul double %conv3, %factor 522 store volatile double %mul3, ptr@dptr 523 %conv4 = bitcast i64 %add4 to double 524 %mul4 = fmul double %conv4, %factor 525 store volatile double %mul4, ptr@dptr 526 %conv5 = bitcast i64 %add5 to double 527 %mul5 = fmul double %conv5, %factor 528 store volatile double %mul5, ptr@dptr 529 %conv6 = bitcast i64 %add6 to double 530 %mul6 = fmul double %conv6, %factor 531 store volatile double %mul6, ptr@dptr 532 %conv7 = bitcast i64 %add7 to double 533 %mul7 = fmul double %conv7, %factor 534 store volatile double %mul7, ptr@dptr 535 %conv8 = bitcast i64 %add8 to double 536 %mul8 = fmul double %conv8, %factor 537 store volatile double %mul8, ptr@dptr 538 %conv9 = bitcast i64 %add9 to double 539 %mul9 = fmul double %conv9, %factor 540 store volatile double %mul9, ptr@dptr 541 542 %unused2 = call i64 @foo() 543 544 ret void 545} 546 547; ...likewise LGDR, with the requirements the other way around. 548define void @f13() { 549; CHECK-LABEL: f13: 550; CHECK: # %bb.0: # %entry 551; CHECK-NEXT: stmg %r13, %r15, 104(%r15) 552; CHECK-NEXT: .cfi_offset %r13, -56 553; CHECK-NEXT: .cfi_offset %r14, -48 554; CHECK-NEXT: .cfi_offset %r15, -40 555; CHECK-NEXT: aghi %r15, -240 556; CHECK-NEXT: .cfi_def_cfa_offset 400 557; CHECK-NEXT: std %f8, 232(%r15) # 8-byte Folded Spill 558; CHECK-NEXT: std %f9, 224(%r15) # 8-byte Folded Spill 559; CHECK-NEXT: std %f10, 216(%r15) # 8-byte Folded Spill 560; CHECK-NEXT: std %f11, 208(%r15) # 8-byte Folded Spill 561; CHECK-NEXT: std %f12, 200(%r15) # 8-byte Folded Spill 562; CHECK-NEXT: std %f13, 192(%r15) # 8-byte Folded Spill 563; CHECK-NEXT: std %f14, 184(%r15) # 8-byte Folded Spill 564; CHECK-NEXT: std %f15, 176(%r15) # 8-byte Folded Spill 565; CHECK-NEXT: .cfi_offset %f8, -168 566; CHECK-NEXT: .cfi_offset %f9, -176 567; CHECK-NEXT: .cfi_offset %f10, -184 568; CHECK-NEXT: .cfi_offset %f11, -192 569; CHECK-NEXT: .cfi_offset %f12, -200 570; CHECK-NEXT: .cfi_offset %f13, -208 571; CHECK-NEXT: .cfi_offset %f14, -216 572; CHECK-NEXT: .cfi_offset %f15, -224 573; CHECK-NEXT: larl %r13, .LCPI12_0 574; CHECK-NEXT: ld %f8, 0(%r13) 575; CHECK-NEXT: ldr %f9, %f8 576; CHECK-NEXT: ldr %f15, %f8 577; CHECK-NEXT: ldr %f14, %f8 578; CHECK-NEXT: ldr %f13, %f8 579; CHECK-NEXT: ldr %f12, %f8 580; CHECK-NEXT: ldr %f11, %f8 581; CHECK-NEXT: ldr %f10, %f8 582; CHECK-NEXT: std %f8, 160(%r15) # 8-byte Folded Spill 583; CHECK-NEXT: std %f8, 168(%r15) # 8-byte Folded Spill 584; CHECK-NEXT: .LBB12_1: # %loop 585; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 586; CHECK-NEXT: brasl %r14, bar@PLT 587; CHECK-NEXT: mdbr %f8, %f0 588; CHECK-NEXT: mdbr %f9, %f0 589; CHECK-NEXT: mdbr %f15, %f0 590; CHECK-NEXT: mdbr %f14, %f0 591; CHECK-NEXT: mdbr %f13, %f0 592; CHECK-NEXT: mdbr %f12, %f0 593; CHECK-NEXT: cdb %f0, 0(%r13) 594; CHECK-NEXT: mdbr %f11, %f0 595; CHECK-NEXT: mdbr %f10, %f0 596; CHECK-NEXT: ld %f1, 160(%r15) # 8-byte Folded Reload 597; CHECK-NEXT: mdbr %f1, %f0 598; CHECK-NEXT: std %f1, 160(%r15) # 8-byte Folded Spill 599; CHECK-NEXT: ld %f1, 168(%r15) # 8-byte Folded Reload 600; CHECK-NEXT: mdbr %f1, %f0 601; CHECK-NEXT: std %f1, 168(%r15) # 8-byte Folded Spill 602; CHECK-NEXT: jlh .LBB12_1 603; CHECK-NEXT: # %bb.2: # %exit 604; CHECK-NEXT: brasl %r14, foo@PLT 605; CHECK-NEXT: lgrl %r1, iptr@GOT 606; CHECK-NEXT: lg %r0, 0(%r1) 607; CHECK-NEXT: lgdr %r2, %f8 608; CHECK-NEXT: agr %r2, %r0 609; CHECK-NEXT: stg %r2, 0(%r1) 610; CHECK-NEXT: lgdr %r2, %f9 611; CHECK-NEXT: agr %r2, %r0 612; CHECK-NEXT: stg %r2, 0(%r1) 613; CHECK-NEXT: lgdr %r2, %f15 614; CHECK-NEXT: agr %r2, %r0 615; CHECK-NEXT: stg %r2, 0(%r1) 616; CHECK-NEXT: lgdr %r2, %f14 617; CHECK-NEXT: agr %r2, %r0 618; CHECK-NEXT: stg %r2, 0(%r1) 619; CHECK-NEXT: lgdr %r2, %f13 620; CHECK-NEXT: agr %r2, %r0 621; CHECK-NEXT: stg %r2, 0(%r1) 622; CHECK-NEXT: lgdr %r2, %f12 623; CHECK-NEXT: agr %r2, %r0 624; CHECK-NEXT: stg %r2, 0(%r1) 625; CHECK-NEXT: lgdr %r2, %f11 626; CHECK-NEXT: agr %r2, %r0 627; CHECK-NEXT: stg %r2, 0(%r1) 628; CHECK-NEXT: lgdr %r2, %f10 629; CHECK-NEXT: agr %r2, %r0 630; CHECK-NEXT: stg %r2, 0(%r1) 631; CHECK-NEXT: lg %r2, 160(%r15) # 8-byte Folded Reload 632; CHECK-NEXT: agr %r2, %r0 633; CHECK-NEXT: stg %r2, 0(%r1) 634; CHECK-NEXT: lg %r2, 168(%r15) # 8-byte Folded Reload 635; CHECK-NEXT: agr %r2, %r0 636; CHECK-NEXT: stg %r2, 0(%r1) 637; CHECK-NEXT: brasl %r14, foo@PLT 638; CHECK-NEXT: ld %f8, 232(%r15) # 8-byte Folded Reload 639; CHECK-NEXT: ld %f9, 224(%r15) # 8-byte Folded Reload 640; CHECK-NEXT: ld %f10, 216(%r15) # 8-byte Folded Reload 641; CHECK-NEXT: ld %f11, 208(%r15) # 8-byte Folded Reload 642; CHECK-NEXT: ld %f12, 200(%r15) # 8-byte Folded Reload 643; CHECK-NEXT: ld %f13, 192(%r15) # 8-byte Folded Reload 644; CHECK-NEXT: ld %f14, 184(%r15) # 8-byte Folded Reload 645; CHECK-NEXT: ld %f15, 176(%r15) # 8-byte Folded Reload 646; CHECK-NEXT: lmg %r13, %r15, 344(%r15) 647; CHECK-NEXT: br %r14 648entry: 649 br label %loop 650 651loop: 652 %double0 = phi double [ 1.0, %entry ], [ %mul0, %loop ] 653 %double1 = phi double [ 1.0, %entry ], [ %mul1, %loop ] 654 %double2 = phi double [ 1.0, %entry ], [ %mul2, %loop ] 655 %double3 = phi double [ 1.0, %entry ], [ %mul3, %loop ] 656 %double4 = phi double [ 1.0, %entry ], [ %mul4, %loop ] 657 %double5 = phi double [ 1.0, %entry ], [ %mul5, %loop ] 658 %double6 = phi double [ 1.0, %entry ], [ %mul6, %loop ] 659 %double7 = phi double [ 1.0, %entry ], [ %mul7, %loop ] 660 %double8 = phi double [ 1.0, %entry ], [ %mul8, %loop ] 661 %double9 = phi double [ 1.0, %entry ], [ %mul9, %loop ] 662 663 %factor = call double @bar() 664 %mul0 = fmul double %double0, %factor 665 %mul1 = fmul double %double1, %factor 666 %mul2 = fmul double %double2, %factor 667 %mul3 = fmul double %double3, %factor 668 %mul4 = fmul double %double4, %factor 669 %mul5 = fmul double %double5, %factor 670 %mul6 = fmul double %double6, %factor 671 %mul7 = fmul double %double7, %factor 672 %mul8 = fmul double %double8, %factor 673 %mul9 = fmul double %double9, %factor 674 %cont = fcmp one double %factor, 1.0 675 br i1 %cont, label %loop, label %exit 676 677exit: 678 %unused1 = call i64 @foo() 679 %bias = load volatile i64, ptr@iptr 680 681 %conv0 = bitcast double %mul0 to i64 682 %add0 = add i64 %conv0, %bias 683 store volatile i64 %add0, ptr@iptr 684 %conv1 = bitcast double %mul1 to i64 685 %add1 = add i64 %conv1, %bias 686 store volatile i64 %add1, ptr@iptr 687 %conv2 = bitcast double %mul2 to i64 688 %add2 = add i64 %conv2, %bias 689 store volatile i64 %add2, ptr@iptr 690 %conv3 = bitcast double %mul3 to i64 691 %add3 = add i64 %conv3, %bias 692 store volatile i64 %add3, ptr@iptr 693 %conv4 = bitcast double %mul4 to i64 694 %add4 = add i64 %conv4, %bias 695 store volatile i64 %add4, ptr@iptr 696 %conv5 = bitcast double %mul5 to i64 697 %add5 = add i64 %conv5, %bias 698 store volatile i64 %add5, ptr@iptr 699 %conv6 = bitcast double %mul6 to i64 700 %add6 = add i64 %conv6, %bias 701 store volatile i64 %add6, ptr@iptr 702 %conv7 = bitcast double %mul7 to i64 703 %add7 = add i64 %conv7, %bias 704 store volatile i64 %add7, ptr@iptr 705 %conv8 = bitcast double %mul8 to i64 706 %add8 = add i64 %conv8, %bias 707 store volatile i64 %add8, ptr@iptr 708 %conv9 = bitcast double %mul9 to i64 709 %add9 = add i64 %conv9, %bias 710 store volatile i64 %add9, ptr@iptr 711 712 %unused2 = call i64 @foo() 713 714 ret void 715} 716