1; Test the generated function prologs/epilogs under XPLINK64 on z/OS 2; 3; RUN: llc < %s -mtriple=s390x-ibm-zos -mcpu=z13 | FileCheck --check-prefixes=CHECK64,CHECK %s 4 5; Test prolog/epilog for non-XPLEAF. 6 7; Small stack frame. 8; CHECK-LABEL: func0 9; CHECK64: stmg 6,7,1872(4) 10; stmg instruction's displacement field must be 2064-dsa_size 11; as per ABI 12; CHECK64: aghi 4,-192 13 14; CHECK64: lg 7,2072(4) 15; CHECK64: aghi 4,192 16; CHECK64: b 2(7) 17 18; CHECK64: L#PPA1_func0_0: 19; CHECK64: .short 0 * Length/4 of Parms 20define void @func0() { 21 call i64 (i64) @fun(i64 10) 22 ret void 23} 24 25; Spill all GPR CSRs 26; CHECK-LABEL: func1 27; CHECK64: stmg 6,15,1904(4) 28; CHECK64: aghi 4,-160 29 30; CHECK64: lmg 7,15,2072(4) 31; CHECK64: aghi 4,160 32; CHECK64: b 2(7) 33 34; CHECK64: L#PPA1_func1_0: 35; CHECK64: .short 2 * Length/4 of Parms 36define void @func1(ptr %ptr) { 37 %l01 = load volatile i64, ptr %ptr 38 %l02 = load volatile i64, ptr %ptr 39 %l03 = load volatile i64, ptr %ptr 40 %l04 = load volatile i64, ptr %ptr 41 %l05 = load volatile i64, ptr %ptr 42 %l06 = load volatile i64, ptr %ptr 43 %l07 = load volatile i64, ptr %ptr 44 %l08 = load volatile i64, ptr %ptr 45 %l09 = load volatile i64, ptr %ptr 46 %l10 = load volatile i64, ptr %ptr 47 %l11 = load volatile i64, ptr %ptr 48 %l12 = load volatile i64, ptr %ptr 49 %l13 = load volatile i64, ptr %ptr 50 %l14 = load volatile i64, ptr %ptr 51 %l15 = load volatile i64, ptr %ptr 52 %add01 = add i64 %l01, %l01 53 %add02 = add i64 %l02, %add01 54 %add03 = add i64 %l03, %add02 55 %add04 = add i64 %l04, %add03 56 %add05 = add i64 %l05, %add04 57 %add06 = add i64 %l06, %add05 58 %add07 = add i64 %l07, %add06 59 %add08 = add i64 %l08, %add07 60 %add09 = add i64 %l09, %add08 61 %add10 = add i64 %l10, %add09 62 %add11 = add i64 %l11, %add10 63 %add12 = add i64 %l12, %add11 64 %add13 = add i64 %l13, %add12 65 %add14 = add i64 %l14, %add13 66 %add15 = add i64 %l15, %add14 67 store volatile i64 %add01, ptr %ptr 68 store volatile i64 %add02, ptr %ptr 69 store volatile i64 %add03, ptr %ptr 70 store volatile i64 %add04, ptr %ptr 71 store volatile i64 %add05, ptr %ptr 72 store volatile i64 %add06, ptr %ptr 73 store volatile i64 %add07, ptr %ptr 74 store volatile i64 %add08, ptr %ptr 75 store volatile i64 %add09, ptr %ptr 76 store volatile i64 %add10, ptr %ptr 77 store volatile i64 %add11, ptr %ptr 78 store volatile i64 %add12, ptr %ptr 79 store volatile i64 %add13, ptr %ptr 80 store volatile i64 %add14, ptr %ptr 81 store volatile i64 %add15, ptr %ptr 82 ret void 83} 84 85 86; Spill all FPRs and VRs 87; CHECK-LABEL: func2 88; CHECK64: stmg 6,7,1744(4) 89; CHECK64: aghi 4,-320 90; CHECK64: std 15,{{[0-9]+}}(4) * 8-byte Folded Spill 91; CHECK64: std 14,{{[0-9]+}}(4) * 8-byte Folded Spill 92; CHECK64: std 13,{{[0-9]+}}(4) * 8-byte Folded Spill 93; CHECK64: std 12,{{[0-9]+}}(4) * 8-byte Folded Spill 94; CHECK64: std 11,{{[0-9]+}}(4) * 8-byte Folded Spill 95; CHECK64: std 10,{{[0-9]+}}(4) * 8-byte Folded Spill 96; CHECK64: std 9,{{[0-9]+}}(4) * 8-byte Folded Spill 97; CHECK64: std 8,{{[0-9]+}}(4) * 8-byte Folded Spill 98; CHECK64: vst 23,{{[0-9]+}}(4),4 * 16-byte Folded Spill 99; CHECK64: vst 22,{{[0-9]+}}(4),4 * 16-byte Folded Spill 100; CHECK64: vst 21,{{[0-9]+}}(4),4 * 16-byte Folded Spill 101; CHECK64: vst 20,{{[0-9]+}}(4),4 * 16-byte Folded Spill 102; CHECK64: vst 19,{{[0-9]+}}(4),4 * 16-byte Folded Spill 103; CHECK64: vst 18,{{[0-9]+}}(4),4 * 16-byte Folded Spill 104; CHECK64: vst 17,{{[0-9]+}}(4),4 * 16-byte Folded Spill 105; CHECK64: vst 16,{{[0-9]+}}(4),4 * 16-byte Folded Spill 106 107; CHECK64: ld 15,{{[0-9]+}}(4) * 8-byte Folded Reload 108; CHECK64: ld 14,{{[0-9]+}}(4) * 8-byte Folded Reload 109; CHECK64: ld 13,{{[0-9]+}}(4) * 8-byte Folded Reload 110; CHECK64: ld 12,{{[0-9]+}}(4) * 8-byte Folded Reload 111; CHECK64: ld 11,{{[0-9]+}}(4) * 8-byte Folded Reload 112; CHECK64: ld 10,{{[0-9]+}}(4) * 8-byte Folded Reload 113; CHECK64: ld 9,{{[0-9]+}}(4) * 8-byte Folded Reload 114; CHECK64: ld 8,{{[0-9]+}}(4) * 8-byte Folded Reload 115; CHECK64: vl 23,{{[0-9]+}}(4),4 * 16-byte Folded Reload 116; CHECK64: vl 22,{{[0-9]+}}(4),4 * 16-byte Folded Reload 117; CHECK64: vl 21,{{[0-9]+}}(4),4 * 16-byte Folded Reload 118; CHECK64: vl 20,{{[0-9]+}}(4),4 * 16-byte Folded Reload 119; CHECK64: vl 19,{{[0-9]+}}(4),4 * 16-byte Folded Reload 120; CHECK64: vl 18,{{[0-9]+}}(4),4 * 16-byte Folded Reload 121; CHECK64: vl 17,{{[0-9]+}}(4),4 * 16-byte Folded Reload 122; CHECK64: vl 16,{{[0-9]+}}(4),4 * 16-byte Folded Reload 123; CHECK64: lg 7,2072(4) 124; CHECK64: aghi 4,320 125; CHECK64: b 2(7) 126 127define void @func2(ptr %ptr, ptr %vec_ptr) { 128 %l00 = load volatile double, ptr %ptr 129 %l01 = load volatile double, ptr %ptr 130 %l02 = load volatile double, ptr %ptr 131 %l03 = load volatile double, ptr %ptr 132 %l04 = load volatile double, ptr %ptr 133 %l05 = load volatile double, ptr %ptr 134 %l06 = load volatile double, ptr %ptr 135 %l07 = load volatile double, ptr %ptr 136 %l08 = load volatile double, ptr %ptr 137 %l09 = load volatile double, ptr %ptr 138 %l10 = load volatile double, ptr %ptr 139 %l11 = load volatile double, ptr %ptr 140 %l12 = load volatile double, ptr %ptr 141 %l13 = load volatile double, ptr %ptr 142 %l14 = load volatile double, ptr %ptr 143 %l15 = load volatile double, ptr %ptr 144 %add00 = fadd double %l01, %l00 145 %add01 = fadd double %l01, %add00 146 %add02 = fadd double %l02, %add01 147 %add03 = fadd double %l03, %add02 148 %add04 = fadd double %l04, %add03 149 %add05 = fadd double %l05, %add04 150 %add06 = fadd double %l06, %add05 151 %add07 = fadd double %l07, %add06 152 %add08 = fadd double %l08, %add07 153 %add09 = fadd double %l09, %add08 154 %add10 = fadd double %l10, %add09 155 %add11 = fadd double %l11, %add10 156 %add12 = fadd double %l12, %add11 157 %add13 = fadd double %l13, %add12 158 %add14 = fadd double %l14, %add13 159 %add15 = fadd double %l15, %add14 160 store volatile double %add00, ptr %ptr 161 store volatile double %add01, ptr %ptr 162 store volatile double %add02, ptr %ptr 163 store volatile double %add03, ptr %ptr 164 store volatile double %add04, ptr %ptr 165 store volatile double %add05, ptr %ptr 166 store volatile double %add06, ptr %ptr 167 store volatile double %add07, ptr %ptr 168 store volatile double %add08, ptr %ptr 169 store volatile double %add09, ptr %ptr 170 store volatile double %add10, ptr %ptr 171 store volatile double %add11, ptr %ptr 172 store volatile double %add12, ptr %ptr 173 store volatile double %add13, ptr %ptr 174 store volatile double %add14, ptr %ptr 175 store volatile double %add15, ptr %ptr 176 177 %v00 = load volatile <2 x i64>, ptr %vec_ptr 178 %v01 = load volatile <2 x i64>, ptr %vec_ptr 179 %v02 = load volatile <2 x i64>, ptr %vec_ptr 180 %v03 = load volatile <2 x i64>, ptr %vec_ptr 181 %v04 = load volatile <2 x i64>, ptr %vec_ptr 182 %v05 = load volatile <2 x i64>, ptr %vec_ptr 183 %v06 = load volatile <2 x i64>, ptr %vec_ptr 184 %v07 = load volatile <2 x i64>, ptr %vec_ptr 185 %v08 = load volatile <2 x i64>, ptr %vec_ptr 186 %v09 = load volatile <2 x i64>, ptr %vec_ptr 187 %v10 = load volatile <2 x i64>, ptr %vec_ptr 188 %v11 = load volatile <2 x i64>, ptr %vec_ptr 189 %v12 = load volatile <2 x i64>, ptr %vec_ptr 190 %v13 = load volatile <2 x i64>, ptr %vec_ptr 191 %v14 = load volatile <2 x i64>, ptr %vec_ptr 192 %v15 = load volatile <2 x i64>, ptr %vec_ptr 193 %v16 = load volatile <2 x i64>, ptr %vec_ptr 194 %v17 = load volatile <2 x i64>, ptr %vec_ptr 195 %v18 = load volatile <2 x i64>, ptr %vec_ptr 196 %v19 = load volatile <2 x i64>, ptr %vec_ptr 197 %v20 = load volatile <2 x i64>, ptr %vec_ptr 198 %v21 = load volatile <2 x i64>, ptr %vec_ptr 199 %v22 = load volatile <2 x i64>, ptr %vec_ptr 200 %v23 = load volatile <2 x i64>, ptr %vec_ptr 201 %v24 = load volatile <2 x i64>, ptr %vec_ptr 202 %v25 = load volatile <2 x i64>, ptr %vec_ptr 203 %v26 = load volatile <2 x i64>, ptr %vec_ptr 204 %v27 = load volatile <2 x i64>, ptr %vec_ptr 205 %v28 = load volatile <2 x i64>, ptr %vec_ptr 206 %v29 = load volatile <2 x i64>, ptr %vec_ptr 207 %v30 = load volatile <2 x i64>, ptr %vec_ptr 208 %v31 = load volatile <2 x i64>, ptr %vec_ptr 209 %vadd00 = add <2 x i64> %v00, %v00 210 %vadd01 = add <2 x i64> %v01, %vadd00 211 %vadd02 = add <2 x i64> %v02, %vadd01 212 %vadd03 = add <2 x i64> %v03, %vadd02 213 %vadd04 = add <2 x i64> %v04, %vadd03 214 %vadd05 = add <2 x i64> %v05, %vadd04 215 %vadd06 = add <2 x i64> %v06, %vadd05 216 %vadd07 = add <2 x i64> %v07, %vadd06 217 %vadd08 = add <2 x i64> %v08, %vadd07 218 %vadd09 = add <2 x i64> %v09, %vadd08 219 %vadd10 = add <2 x i64> %v10, %vadd09 220 %vadd11 = add <2 x i64> %v11, %vadd10 221 %vadd12 = add <2 x i64> %v12, %vadd11 222 %vadd13 = add <2 x i64> %v13, %vadd12 223 %vadd14 = add <2 x i64> %v14, %vadd13 224 %vadd15 = add <2 x i64> %v15, %vadd14 225 %vadd16 = add <2 x i64> %v16, %vadd15 226 %vadd17 = add <2 x i64> %v17, %vadd16 227 %vadd18 = add <2 x i64> %v18, %vadd17 228 %vadd19 = add <2 x i64> %v19, %vadd18 229 %vadd20 = add <2 x i64> %v20, %vadd19 230 %vadd21 = add <2 x i64> %v21, %vadd20 231 %vadd22 = add <2 x i64> %v22, %vadd21 232 %vadd23 = add <2 x i64> %v23, %vadd22 233 %vadd24 = add <2 x i64> %v24, %vadd23 234 %vadd25 = add <2 x i64> %v25, %vadd24 235 %vadd26 = add <2 x i64> %v26, %vadd25 236 %vadd27 = add <2 x i64> %v27, %vadd26 237 %vadd28 = add <2 x i64> %v28, %vadd27 238 %vadd29 = add <2 x i64> %v29, %vadd28 239 %vadd30 = add <2 x i64> %v30, %vadd29 240 %vadd31 = add <2 x i64> %v31, %vadd30 241 store volatile <2 x i64> %vadd00, ptr %vec_ptr 242 store volatile <2 x i64> %vadd01, ptr %vec_ptr 243 store volatile <2 x i64> %vadd02, ptr %vec_ptr 244 store volatile <2 x i64> %vadd03, ptr %vec_ptr 245 store volatile <2 x i64> %vadd04, ptr %vec_ptr 246 store volatile <2 x i64> %vadd05, ptr %vec_ptr 247 store volatile <2 x i64> %vadd06, ptr %vec_ptr 248 store volatile <2 x i64> %vadd07, ptr %vec_ptr 249 store volatile <2 x i64> %vadd08, ptr %vec_ptr 250 store volatile <2 x i64> %vadd09, ptr %vec_ptr 251 store volatile <2 x i64> %vadd10, ptr %vec_ptr 252 store volatile <2 x i64> %vadd11, ptr %vec_ptr 253 store volatile <2 x i64> %vadd12, ptr %vec_ptr 254 store volatile <2 x i64> %vadd13, ptr %vec_ptr 255 store volatile <2 x i64> %vadd14, ptr %vec_ptr 256 store volatile <2 x i64> %vadd15, ptr %vec_ptr 257 store volatile <2 x i64> %vadd16, ptr %vec_ptr 258 store volatile <2 x i64> %vadd17, ptr %vec_ptr 259 store volatile <2 x i64> %vadd18, ptr %vec_ptr 260 store volatile <2 x i64> %vadd19, ptr %vec_ptr 261 store volatile <2 x i64> %vadd20, ptr %vec_ptr 262 store volatile <2 x i64> %vadd21, ptr %vec_ptr 263 store volatile <2 x i64> %vadd22, ptr %vec_ptr 264 store volatile <2 x i64> %vadd23, ptr %vec_ptr 265 store volatile <2 x i64> %vadd24, ptr %vec_ptr 266 store volatile <2 x i64> %vadd25, ptr %vec_ptr 267 store volatile <2 x i64> %vadd26, ptr %vec_ptr 268 store volatile <2 x i64> %vadd27, ptr %vec_ptr 269 store volatile <2 x i64> %vadd28, ptr %vec_ptr 270 store volatile <2 x i64> %vadd29, ptr %vec_ptr 271 store volatile <2 x i64> %vadd30, ptr %vec_ptr 272 store volatile <2 x i64> %vadd31, ptr %vec_ptr 273 ret void 274} 275 276; Big stack frame, force the use of agfi before stmg 277; despite not requiring stack extension routine. 278; CHECK64: agfi 4,-1040768 279; CHECK64: stmg 6,7,2064(4) 280; CHECK64: agfi 4,1040768 281define void @func3() { 282 %arr = alloca [130070 x i64], align 8 283 call i64 (ptr) @fun1(ptr %arr) 284 ret void 285} 286 287; Requires the saving of r4 due to variable sized 288; object in stack frame. (Eg: VLA) Sets up frame pointer in r8 289; CHECK64: stmg 4,10,1856(4) 290; CHECK64: aghi 4,-192 291; CHECK64: lg 6,40(5) 292; CHECK64: lg 5,32(5) 293; CHECK64: lgr 8,4 294; CHECK64: basr 7,6 295; CHECK64-NEXT: bcr 0,0 296; CHECK64: lmg 4,10,2048(4) 297define i64 @func4(i64 %n) { 298 %vla = alloca i64, i64 %n, align 8 299 %call = call i64 @fun2(i64 %n, ptr nonnull %vla, ptr nonnull %vla) 300 ret i64 %call 301} 302 303; Require saving of r4 and in addition, a displacement large enough 304; to force use of agfi before stmg. 305; CHECK64: lgr 0,4 306; CHECK64: agfi 4,-1040224 307; CHECK64: stmg 4,10,2048(4) 308; CHECK64: lgr 8,4 309; CHECK64: basr 7,6 310; CHECK64-NEXT: bcr 0,0 311; CHECK64: lmg 4,10,2048(4) 312define i64 @func5(i64 %n) { 313 %vla = alloca i64, i64 %n, align 8 314 %arr = alloca [130000 x i64], align 8 315 %call = call i64 @fun2(i64 %n, ptr nonnull %vla, ptr %arr) 316 ret i64 %call 317} 318 319; CHECK-LABEL: large_stack 320; CHECK64: agfi 4,-1048800 321; CHECK64-NEXT: llgt 3,1208 322; CHECK64-NEXT: cg 4,64(3) 323; CHECK64-NEXT: jhe 324; CHECK64: * %bb.1: 325; CHECK64: lg 3,72(3) 326; CHECK64: basr 3,3 327; CHECK64: stmg 6,7,2064(4) 328define void @large_stack0() { 329 %arr = alloca [131072 x i64], align 8 330 call i64 (ptr) @fun1(ptr %arr) 331 ret void 332} 333 334; CHECK-LABEL: large_stack1 335; CHECK64: agfi 4,-1048800 336; CHECK64: lgr 0,3 337; CHECK64: llgt 3,1208 338; CHECK64: cg 4,64(3) 339; CHECK64: jhe L#BB7_2 340; CHECK64: %bb.1: 341; CHECK64: lg 3,72(3) 342; CHECK64: basr 3,3 343; CHECK64: bcr 0,7 344; CHECK64: L#BB7_2: 345; CHECK64: stmg 6,7,2064(4) 346; CHECK64: lgr 3,0 347 348; CHECK64: L#PPA1_large_stack1_0: 349; CHECK64: .short 6 * Length/4 of Parms 350define void @large_stack1(i64 %n1, i64 %n2, i64 %n3) { 351 %arr = alloca [131072 x i64], align 8 352 call i64 (ptr, i64, i64, i64) @fun3(ptr %arr, 353 i64 %n1, i64 %n2, i64 %n3) 354 ret void 355} 356 357 358; CHECK-LABEL: large_stack2 359; CHECK64: lgr 0,4 360; CHECK64: stg 3,2192(4) 361; CHECK64: agfi 4,-1048800 362; CHECK64: llgt 3,1208 363; CHECK64: cg 4,64(3) 364; CHECK64: jhe L#BB8_2 365; CHECK64: %bb.1: 366; CHECK64: lg 3,72(3) 367; CHECK64: basr 3,3 368; CHECK64: bcr 0,7 369; CHECK64: L#BB8_2: 370; CHECK64: lgr 3,0 371; CHECK64: lg 3,2192(3) 372; CHECK64: stmg 4,12,2048(4) 373; CHECK64: lgr 8,4 374define void @large_stack2(i64 %n1, i64 %n2, i64 %n3) { 375 %arr0 = alloca [131072 x i64], align 8 376 %arr1 = alloca i64, i64 %n1, align 8 377 call i64 (ptr, ptr, i64, i64, i64) @fun4(ptr %arr0, 378 ptr %arr1, i64 %n1, i64 %n2, i64 %n3) 379 ret void 380} 381 382; CHECK-LABEL: leaf_func 383; CHECK: .long 8 * DSA Size 0x0 384; CHECK-NEXT: * Entry Flags 385; CHECK-NEXT: * Bit 1: 1 = Leaf function 386; CHECK-NEXT: * Bit 2: 0 = Does not use alloca 387; CHECK-NOT: aghi 4, 388; CHECK-NOT: stmg 389; CHECK: agr 1,2 390; CHECK: msgr 1,3 391; CHECK: aghik 3,1,-4 392; CHECK-NOT: aghi 4, 393; CHECK-NOT: lmg 394define i64 @leaf_func0(i64 %a, i64 %b, i64 %c) { 395 %n = add i64 %a, %b 396 %m = mul i64 %n, %c 397 %o = sub i64 %m, 4 398 ret i64 %o 399} 400 401 402; ============================= 403; Tests for PPA1 Fields 404; ============================= 405; CHECK-LABEL: named_func 406; CHECK: .byte 129 * PPA1 Flags 4 407; CHECK-NEXT: * Bit 7: 1 = Name Length and Name 408define i64 @named_func(i64 %arg) { 409 %sum = add i64 1, %arg 410 ret i64 %sum 411} 412 413; CHECK-LABEL: __unnamed_1 414; CHECK: .byte 128 * PPA1 Flags 4 415; CHECK-NOT: * Bit 7: 1 = Name Length and Name 416define void @""(ptr %p) { 417 call i64 (ptr) @fun1(ptr %p) 418 ret void 419} 420 421 422declare i64 @fun(i64 %arg0) 423declare i64 @fun1(ptr %ptr) 424declare i64 @fun2(i64 %n, ptr %arr0, ptr %arr1) 425declare i64 @fun3(ptr %ptr, i64 %n1, i64 %n2, i64 %n3) 426declare i64 @fun4(ptr %ptr0, ptr %ptr1, i64 %n1, i64 %n2, i64 %n3) 427