1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple riscv32-unknown-linux-gnu -o - %s | FileCheck %s 3; RUN: llc -mtriple riscv32-unknown-linux-gnu -mattr=experimental-zicfilp \ 4; RUN: -code-model=large -o - %s \ 5; RUN: | FileCheck %s -check-prefix=CHECK-LARGE-ZICFILP 6; RUN: llc -mtriple riscv32-unknown-elf -o - %s | FileCheck %s 7 8; Perform tail call optimization for global address. 9declare i32 @callee_tail(i32 %i) 10define i32 @caller_tail(i32 %i) nounwind { 11; CHECK-LABEL: caller_tail: 12; CHECK: # %bb.0: # %entry 13; CHECK-NEXT: tail callee_tail 14; 15; CHECK-LARGE-ZICFILP-LABEL: caller_tail: 16; CHECK-LARGE-ZICFILP: # %bb.0: # %entry 17; CHECK-LARGE-ZICFILP-NEXT: lpad 0 18; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi0: 19; CHECK-LARGE-ZICFILP-NEXT: auipc a1, %pcrel_hi(.LCPI0_0) 20; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi0)(a1) 21; CHECK-LARGE-ZICFILP-NEXT: jr t2 22entry: 23 %r = tail call i32 @callee_tail(i32 %i) 24 ret i32 %r 25} 26 27; Perform tail call optimization for external symbol. 28@dest = global [2 x i8] zeroinitializer 29declare void @llvm.memcpy.p0.p0.i32(ptr, ptr, i32, i1) 30define void @caller_extern(ptr %src) optsize { 31; CHECK-LABEL: caller_extern: 32; CHECK: # %bb.0: # %entry 33; CHECK-NEXT: lui a1, %hi(dest) 34; CHECK-NEXT: addi a1, a1, %lo(dest) 35; CHECK-NEXT: li a2, 7 36; CHECK-NEXT: mv a3, a0 37; CHECK-NEXT: mv a0, a1 38; CHECK-NEXT: mv a1, a3 39; CHECK-NEXT: tail memcpy 40; 41; CHECK-LARGE-ZICFILP-LABEL: caller_extern: 42; CHECK-LARGE-ZICFILP: # %bb.0: # %entry 43; CHECK-LARGE-ZICFILP-NEXT: lpad 0 44; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi1: 45; CHECK-LARGE-ZICFILP-NEXT: auipc a1, %pcrel_hi(.LCPI1_0) 46; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi2: 47; CHECK-LARGE-ZICFILP-NEXT: auipc a2, %pcrel_hi(.LCPI1_1) 48; CHECK-LARGE-ZICFILP-NEXT: lw a1, %pcrel_lo(.Lpcrel_hi1)(a1) 49; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi2)(a2) 50; CHECK-LARGE-ZICFILP-NEXT: li a2, 7 51; CHECK-LARGE-ZICFILP-NEXT: mv a3, a0 52; CHECK-LARGE-ZICFILP-NEXT: mv a0, a1 53; CHECK-LARGE-ZICFILP-NEXT: mv a1, a3 54; CHECK-LARGE-ZICFILP-NEXT: jr t2 55entry: 56 tail call void @llvm.memcpy.p0.p0.i32(ptr @dest, ptr %src, i32 7, i1 false) 57 ret void 58} 59 60; Perform tail call optimization for external symbol. 61@dest_pgso = global [2 x i8] zeroinitializer 62define void @caller_extern_pgso(ptr %src) !prof !14 { 63; CHECK-LABEL: caller_extern_pgso: 64; CHECK: # %bb.0: # %entry 65; CHECK-NEXT: lui a1, %hi(dest_pgso) 66; CHECK-NEXT: addi a1, a1, %lo(dest_pgso) 67; CHECK-NEXT: li a2, 7 68; CHECK-NEXT: mv a3, a0 69; CHECK-NEXT: mv a0, a1 70; CHECK-NEXT: mv a1, a3 71; CHECK-NEXT: tail memcpy 72; 73; CHECK-LARGE-ZICFILP-LABEL: caller_extern_pgso: 74; CHECK-LARGE-ZICFILP: # %bb.0: # %entry 75; CHECK-LARGE-ZICFILP-NEXT: lpad 0 76; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi3: 77; CHECK-LARGE-ZICFILP-NEXT: auipc a1, %pcrel_hi(.LCPI2_0) 78; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi4: 79; CHECK-LARGE-ZICFILP-NEXT: auipc a2, %pcrel_hi(.LCPI2_1) 80; CHECK-LARGE-ZICFILP-NEXT: lw a1, %pcrel_lo(.Lpcrel_hi3)(a1) 81; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi4)(a2) 82; CHECK-LARGE-ZICFILP-NEXT: li a2, 7 83; CHECK-LARGE-ZICFILP-NEXT: mv a3, a0 84; CHECK-LARGE-ZICFILP-NEXT: mv a0, a1 85; CHECK-LARGE-ZICFILP-NEXT: mv a1, a3 86; CHECK-LARGE-ZICFILP-NEXT: jr t2 87entry: 88 tail call void @llvm.memcpy.p0.p0.i32(ptr @dest_pgso, ptr %src, i32 7, i1 false) 89 ret void 90} 91 92; Perform indirect tail call optimization (for function pointer call). 93declare void @callee_indirect1() 94declare void @callee_indirect2() 95define void @caller_indirect_tail(i32 %a) nounwind { 96; CHECK-LABEL: caller_indirect_tail: 97; CHECK: # %bb.0: # %entry 98; CHECK-NEXT: beqz a0, .LBB3_2 99; CHECK-NEXT: # %bb.1: # %entry 100; CHECK-NEXT: lui t1, %hi(callee_indirect2) 101; CHECK-NEXT: addi t1, t1, %lo(callee_indirect2) 102; CHECK-NEXT: jr t1 103; CHECK-NEXT: .LBB3_2: 104; CHECK-NEXT: lui t1, %hi(callee_indirect1) 105; CHECK-NEXT: addi t1, t1, %lo(callee_indirect1) 106; CHECK-NEXT: jr t1 107; 108; CHECK-LARGE-ZICFILP-LABEL: caller_indirect_tail: 109; CHECK-LARGE-ZICFILP: # %bb.0: # %entry 110; CHECK-LARGE-ZICFILP-NEXT: lpad 0 111; CHECK-LARGE-ZICFILP-NEXT: beqz a0, .LBB3_2 112; CHECK-LARGE-ZICFILP-NEXT: # %bb.1: # %entry 113; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi6: 114; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI3_0) 115; CHECK-LARGE-ZICFILP-NEXT: lw t1, %pcrel_lo(.Lpcrel_hi6)(a0) 116; CHECK-LARGE-ZICFILP-NEXT: jr t1 117; CHECK-LARGE-ZICFILP-NEXT: .LBB3_2: 118; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi5: 119; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI3_1) 120; CHECK-LARGE-ZICFILP-NEXT: lw t1, %pcrel_lo(.Lpcrel_hi5)(a0) 121; CHECK-LARGE-ZICFILP-NEXT: jr t1 122entry: 123 %tobool = icmp eq i32 %a, 0 124 %callee = select i1 %tobool, ptr @callee_indirect1, ptr @callee_indirect2 125 tail call void %callee() 126 ret void 127} 128 129; Make sure we don't use t0 as the source for jr as that is a hint to pop the 130; return address stack on some microarchitectures. 131define i32 @caller_indirect_no_t0(ptr %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) { 132; CHECK-LABEL: caller_indirect_no_t0: 133; CHECK: # %bb.0: 134; CHECK-NEXT: mv t1, a0 135; CHECK-NEXT: mv a0, a1 136; CHECK-NEXT: mv a1, a2 137; CHECK-NEXT: mv a2, a3 138; CHECK-NEXT: mv a3, a4 139; CHECK-NEXT: mv a4, a5 140; CHECK-NEXT: mv a5, a6 141; CHECK-NEXT: mv a6, a7 142; CHECK-NEXT: jr t1 143; 144; CHECK-LARGE-ZICFILP-LABEL: caller_indirect_no_t0: 145; CHECK-LARGE-ZICFILP: # %bb.0: 146; CHECK-LARGE-ZICFILP-NEXT: lpad 0 147; CHECK-LARGE-ZICFILP-NEXT: mv t1, a0 148; CHECK-LARGE-ZICFILP-NEXT: mv a0, a1 149; CHECK-LARGE-ZICFILP-NEXT: mv a1, a2 150; CHECK-LARGE-ZICFILP-NEXT: mv a2, a3 151; CHECK-LARGE-ZICFILP-NEXT: mv a3, a4 152; CHECK-LARGE-ZICFILP-NEXT: mv a4, a5 153; CHECK-LARGE-ZICFILP-NEXT: mv a5, a6 154; CHECK-LARGE-ZICFILP-NEXT: mv a6, a7 155; CHECK-LARGE-ZICFILP-NEXT: jr t1 156 %9 = tail call i32 %0(i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) 157 ret i32 %9 158} 159 160; Do not tail call optimize functions with varargs passed by stack. 161declare i32 @callee_varargs(i32, ...) 162define void @caller_varargs(i32 %a, i32 %b) nounwind { 163; CHECK-LABEL: caller_varargs: 164; CHECK: # %bb.0: # %entry 165; CHECK-NEXT: addi sp, sp, -16 166; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 167; CHECK-NEXT: sw a0, 0(sp) 168; CHECK-NEXT: mv a2, a1 169; CHECK-NEXT: mv a3, a0 170; CHECK-NEXT: mv a4, a0 171; CHECK-NEXT: mv a5, a1 172; CHECK-NEXT: mv a6, a1 173; CHECK-NEXT: mv a7, a0 174; CHECK-NEXT: call callee_varargs 175; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 176; CHECK-NEXT: addi sp, sp, 16 177; CHECK-NEXT: ret 178; 179; CHECK-LARGE-ZICFILP-LABEL: caller_varargs: 180; CHECK-LARGE-ZICFILP: # %bb.0: # %entry 181; CHECK-LARGE-ZICFILP-NEXT: lpad 0 182; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -16 183; CHECK-LARGE-ZICFILP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 184; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi7: 185; CHECK-LARGE-ZICFILP-NEXT: auipc a2, %pcrel_hi(.LCPI5_0) 186; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi7)(a2) 187; CHECK-LARGE-ZICFILP-NEXT: sw a0, 0(sp) 188; CHECK-LARGE-ZICFILP-NEXT: mv a2, a1 189; CHECK-LARGE-ZICFILP-NEXT: mv a3, a0 190; CHECK-LARGE-ZICFILP-NEXT: mv a4, a0 191; CHECK-LARGE-ZICFILP-NEXT: mv a5, a1 192; CHECK-LARGE-ZICFILP-NEXT: mv a6, a1 193; CHECK-LARGE-ZICFILP-NEXT: mv a7, a0 194; CHECK-LARGE-ZICFILP-NEXT: jalr t2 195; CHECK-LARGE-ZICFILP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 196; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 16 197; CHECK-LARGE-ZICFILP-NEXT: ret 198entry: 199 %call = tail call i32 (i32, ...) @callee_varargs(i32 %a, i32 %b, i32 %b, i32 %a, i32 %a, i32 %b, i32 %b, i32 %a, i32 %a) 200 ret void 201} 202 203; Do not tail call optimize if stack is used to pass parameters. 204declare i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n) 205define i32 @caller_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n) nounwind { 206; CHECK-LABEL: caller_args: 207; CHECK: # %bb.0: # %entry 208; CHECK-NEXT: addi sp, sp, -32 209; CHECK-NEXT: sw ra, 28(sp) # 4-byte Folded Spill 210; CHECK-NEXT: lw t0, 32(sp) 211; CHECK-NEXT: lw t1, 36(sp) 212; CHECK-NEXT: lw t2, 40(sp) 213; CHECK-NEXT: lw t3, 44(sp) 214; CHECK-NEXT: lw t4, 48(sp) 215; CHECK-NEXT: lw t5, 52(sp) 216; CHECK-NEXT: sw t4, 16(sp) 217; CHECK-NEXT: sw t5, 20(sp) 218; CHECK-NEXT: sw t0, 0(sp) 219; CHECK-NEXT: sw t1, 4(sp) 220; CHECK-NEXT: sw t2, 8(sp) 221; CHECK-NEXT: sw t3, 12(sp) 222; CHECK-NEXT: call callee_args 223; CHECK-NEXT: lw ra, 28(sp) # 4-byte Folded Reload 224; CHECK-NEXT: addi sp, sp, 32 225; CHECK-NEXT: ret 226; 227; CHECK-LARGE-ZICFILP-LABEL: caller_args: 228; CHECK-LARGE-ZICFILP: # %bb.0: # %entry 229; CHECK-LARGE-ZICFILP-NEXT: lpad 0 230; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -32 231; CHECK-LARGE-ZICFILP-NEXT: sw ra, 28(sp) # 4-byte Folded Spill 232; CHECK-LARGE-ZICFILP-NEXT: lw t0, 32(sp) 233; CHECK-LARGE-ZICFILP-NEXT: lw t1, 36(sp) 234; CHECK-LARGE-ZICFILP-NEXT: lw t3, 40(sp) 235; CHECK-LARGE-ZICFILP-NEXT: lw t4, 44(sp) 236; CHECK-LARGE-ZICFILP-NEXT: lw t2, 48(sp) 237; CHECK-LARGE-ZICFILP-NEXT: lw t5, 52(sp) 238; CHECK-LARGE-ZICFILP-NEXT: sw t2, 16(sp) 239; CHECK-LARGE-ZICFILP-NEXT: sw t5, 20(sp) 240; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi8: 241; CHECK-LARGE-ZICFILP-NEXT: auipc t2, %pcrel_hi(.LCPI6_0) 242; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi8)(t2) 243; CHECK-LARGE-ZICFILP-NEXT: sw t0, 0(sp) 244; CHECK-LARGE-ZICFILP-NEXT: sw t1, 4(sp) 245; CHECK-LARGE-ZICFILP-NEXT: sw t3, 8(sp) 246; CHECK-LARGE-ZICFILP-NEXT: sw t4, 12(sp) 247; CHECK-LARGE-ZICFILP-NEXT: jalr t2 248; CHECK-LARGE-ZICFILP-NEXT: lw ra, 28(sp) # 4-byte Folded Reload 249; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 32 250; CHECK-LARGE-ZICFILP-NEXT: ret 251entry: 252 %r = tail call i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n) 253 ret i32 %r 254} 255 256; Do not tail call optimize if parameters need to be passed indirectly. 257declare i32 @callee_indirect_args(fp128 %a) 258define void @caller_indirect_args() nounwind { 259; CHECK-LABEL: caller_indirect_args: 260; CHECK: # %bb.0: # %entry 261; CHECK-NEXT: addi sp, sp, -32 262; CHECK-NEXT: sw ra, 28(sp) # 4-byte Folded Spill 263; CHECK-NEXT: lui a1, 262128 264; CHECK-NEXT: mv a0, sp 265; CHECK-NEXT: sw zero, 0(sp) 266; CHECK-NEXT: sw zero, 4(sp) 267; CHECK-NEXT: sw zero, 8(sp) 268; CHECK-NEXT: sw a1, 12(sp) 269; CHECK-NEXT: call callee_indirect_args 270; CHECK-NEXT: lw ra, 28(sp) # 4-byte Folded Reload 271; CHECK-NEXT: addi sp, sp, 32 272; CHECK-NEXT: ret 273; 274; CHECK-LARGE-ZICFILP-LABEL: caller_indirect_args: 275; CHECK-LARGE-ZICFILP: # %bb.0: # %entry 276; CHECK-LARGE-ZICFILP-NEXT: lpad 0 277; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -32 278; CHECK-LARGE-ZICFILP-NEXT: sw ra, 28(sp) # 4-byte Folded Spill 279; CHECK-LARGE-ZICFILP-NEXT: lui a1, 262128 280; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi9: 281; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI7_0) 282; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi9)(a0) 283; CHECK-LARGE-ZICFILP-NEXT: mv a0, sp 284; CHECK-LARGE-ZICFILP-NEXT: sw zero, 0(sp) 285; CHECK-LARGE-ZICFILP-NEXT: sw zero, 4(sp) 286; CHECK-LARGE-ZICFILP-NEXT: sw zero, 8(sp) 287; CHECK-LARGE-ZICFILP-NEXT: sw a1, 12(sp) 288; CHECK-LARGE-ZICFILP-NEXT: jalr t2 289; CHECK-LARGE-ZICFILP-NEXT: lw ra, 28(sp) # 4-byte Folded Reload 290; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 32 291; CHECK-LARGE-ZICFILP-NEXT: ret 292entry: 293 %call = tail call i32 @callee_indirect_args(fp128 0xL00000000000000003FFF000000000000) 294 ret void 295} 296 297; Perform tail call optimization for external weak symbol. 298declare extern_weak void @callee_weak() 299define void @caller_weak() nounwind { 300; CHECK-LABEL: caller_weak: 301; CHECK: # %bb.0: # %entry 302; CHECK-NEXT: tail callee_weak 303; 304; CHECK-LARGE-ZICFILP-LABEL: caller_weak: 305; CHECK-LARGE-ZICFILP: # %bb.0: # %entry 306; CHECK-LARGE-ZICFILP-NEXT: lpad 0 307; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi10: 308; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI8_0) 309; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi10)(a0) 310; CHECK-LARGE-ZICFILP-NEXT: jr t2 311entry: 312 tail call void @callee_weak() 313 ret void 314} 315 316; Exception-handling functions need a special set of instructions to indicate a 317; return to the hardware. Tail-calling another function would probably break 318; this. 319declare void @callee_irq() 320define void @caller_irq() nounwind "interrupt"="machine" { 321; CHECK-LABEL: caller_irq: 322; CHECK: # %bb.0: # %entry 323; CHECK-NEXT: addi sp, sp, -64 324; CHECK-NEXT: sw ra, 60(sp) # 4-byte Folded Spill 325; CHECK-NEXT: sw t0, 56(sp) # 4-byte Folded Spill 326; CHECK-NEXT: sw t1, 52(sp) # 4-byte Folded Spill 327; CHECK-NEXT: sw t2, 48(sp) # 4-byte Folded Spill 328; CHECK-NEXT: sw a0, 44(sp) # 4-byte Folded Spill 329; CHECK-NEXT: sw a1, 40(sp) # 4-byte Folded Spill 330; CHECK-NEXT: sw a2, 36(sp) # 4-byte Folded Spill 331; CHECK-NEXT: sw a3, 32(sp) # 4-byte Folded Spill 332; CHECK-NEXT: sw a4, 28(sp) # 4-byte Folded Spill 333; CHECK-NEXT: sw a5, 24(sp) # 4-byte Folded Spill 334; CHECK-NEXT: sw a6, 20(sp) # 4-byte Folded Spill 335; CHECK-NEXT: sw a7, 16(sp) # 4-byte Folded Spill 336; CHECK-NEXT: sw t3, 12(sp) # 4-byte Folded Spill 337; CHECK-NEXT: sw t4, 8(sp) # 4-byte Folded Spill 338; CHECK-NEXT: sw t5, 4(sp) # 4-byte Folded Spill 339; CHECK-NEXT: sw t6, 0(sp) # 4-byte Folded Spill 340; CHECK-NEXT: call callee_irq 341; CHECK-NEXT: lw ra, 60(sp) # 4-byte Folded Reload 342; CHECK-NEXT: lw t0, 56(sp) # 4-byte Folded Reload 343; CHECK-NEXT: lw t1, 52(sp) # 4-byte Folded Reload 344; CHECK-NEXT: lw t2, 48(sp) # 4-byte Folded Reload 345; CHECK-NEXT: lw a0, 44(sp) # 4-byte Folded Reload 346; CHECK-NEXT: lw a1, 40(sp) # 4-byte Folded Reload 347; CHECK-NEXT: lw a2, 36(sp) # 4-byte Folded Reload 348; CHECK-NEXT: lw a3, 32(sp) # 4-byte Folded Reload 349; CHECK-NEXT: lw a4, 28(sp) # 4-byte Folded Reload 350; CHECK-NEXT: lw a5, 24(sp) # 4-byte Folded Reload 351; CHECK-NEXT: lw a6, 20(sp) # 4-byte Folded Reload 352; CHECK-NEXT: lw a7, 16(sp) # 4-byte Folded Reload 353; CHECK-NEXT: lw t3, 12(sp) # 4-byte Folded Reload 354; CHECK-NEXT: lw t4, 8(sp) # 4-byte Folded Reload 355; CHECK-NEXT: lw t5, 4(sp) # 4-byte Folded Reload 356; CHECK-NEXT: lw t6, 0(sp) # 4-byte Folded Reload 357; CHECK-NEXT: addi sp, sp, 64 358; CHECK-NEXT: mret 359; 360; CHECK-LARGE-ZICFILP-LABEL: caller_irq: 361; CHECK-LARGE-ZICFILP: # %bb.0: # %entry 362; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -64 363; CHECK-LARGE-ZICFILP-NEXT: sw ra, 60(sp) # 4-byte Folded Spill 364; CHECK-LARGE-ZICFILP-NEXT: sw t0, 56(sp) # 4-byte Folded Spill 365; CHECK-LARGE-ZICFILP-NEXT: sw t1, 52(sp) # 4-byte Folded Spill 366; CHECK-LARGE-ZICFILP-NEXT: sw t2, 48(sp) # 4-byte Folded Spill 367; CHECK-LARGE-ZICFILP-NEXT: sw a0, 44(sp) # 4-byte Folded Spill 368; CHECK-LARGE-ZICFILP-NEXT: sw a1, 40(sp) # 4-byte Folded Spill 369; CHECK-LARGE-ZICFILP-NEXT: sw a2, 36(sp) # 4-byte Folded Spill 370; CHECK-LARGE-ZICFILP-NEXT: sw a3, 32(sp) # 4-byte Folded Spill 371; CHECK-LARGE-ZICFILP-NEXT: sw a4, 28(sp) # 4-byte Folded Spill 372; CHECK-LARGE-ZICFILP-NEXT: sw a5, 24(sp) # 4-byte Folded Spill 373; CHECK-LARGE-ZICFILP-NEXT: sw a6, 20(sp) # 4-byte Folded Spill 374; CHECK-LARGE-ZICFILP-NEXT: sw a7, 16(sp) # 4-byte Folded Spill 375; CHECK-LARGE-ZICFILP-NEXT: sw t3, 12(sp) # 4-byte Folded Spill 376; CHECK-LARGE-ZICFILP-NEXT: sw t4, 8(sp) # 4-byte Folded Spill 377; CHECK-LARGE-ZICFILP-NEXT: sw t5, 4(sp) # 4-byte Folded Spill 378; CHECK-LARGE-ZICFILP-NEXT: sw t6, 0(sp) # 4-byte Folded Spill 379; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi11: 380; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI9_0) 381; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi11)(a0) 382; CHECK-LARGE-ZICFILP-NEXT: jalr t2 383; CHECK-LARGE-ZICFILP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload 384; CHECK-LARGE-ZICFILP-NEXT: lw t0, 56(sp) # 4-byte Folded Reload 385; CHECK-LARGE-ZICFILP-NEXT: lw t1, 52(sp) # 4-byte Folded Reload 386; CHECK-LARGE-ZICFILP-NEXT: lw t2, 48(sp) # 4-byte Folded Reload 387; CHECK-LARGE-ZICFILP-NEXT: lw a0, 44(sp) # 4-byte Folded Reload 388; CHECK-LARGE-ZICFILP-NEXT: lw a1, 40(sp) # 4-byte Folded Reload 389; CHECK-LARGE-ZICFILP-NEXT: lw a2, 36(sp) # 4-byte Folded Reload 390; CHECK-LARGE-ZICFILP-NEXT: lw a3, 32(sp) # 4-byte Folded Reload 391; CHECK-LARGE-ZICFILP-NEXT: lw a4, 28(sp) # 4-byte Folded Reload 392; CHECK-LARGE-ZICFILP-NEXT: lw a5, 24(sp) # 4-byte Folded Reload 393; CHECK-LARGE-ZICFILP-NEXT: lw a6, 20(sp) # 4-byte Folded Reload 394; CHECK-LARGE-ZICFILP-NEXT: lw a7, 16(sp) # 4-byte Folded Reload 395; CHECK-LARGE-ZICFILP-NEXT: lw t3, 12(sp) # 4-byte Folded Reload 396; CHECK-LARGE-ZICFILP-NEXT: lw t4, 8(sp) # 4-byte Folded Reload 397; CHECK-LARGE-ZICFILP-NEXT: lw t5, 4(sp) # 4-byte Folded Reload 398; CHECK-LARGE-ZICFILP-NEXT: lw t6, 0(sp) # 4-byte Folded Reload 399; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 64 400; CHECK-LARGE-ZICFILP-NEXT: mret 401entry: 402 tail call void @callee_irq() 403 ret void 404} 405 406; Byval parameters hand the function a pointer directly into the stack area 407; we want to reuse during a tail call. Do not tail call optimize functions with 408; byval parameters. 409declare i32 @callee_byval(ptr byval(ptr) %a) 410define i32 @caller_byval() nounwind { 411; CHECK-LABEL: caller_byval: 412; CHECK: # %bb.0: # %entry 413; CHECK-NEXT: addi sp, sp, -16 414; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 415; CHECK-NEXT: lw a0, 8(sp) 416; CHECK-NEXT: sw a0, 4(sp) 417; CHECK-NEXT: addi a0, sp, 4 418; CHECK-NEXT: call callee_byval 419; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 420; CHECK-NEXT: addi sp, sp, 16 421; CHECK-NEXT: ret 422; 423; CHECK-LARGE-ZICFILP-LABEL: caller_byval: 424; CHECK-LARGE-ZICFILP: # %bb.0: # %entry 425; CHECK-LARGE-ZICFILP-NEXT: lpad 0 426; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -16 427; CHECK-LARGE-ZICFILP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 428; CHECK-LARGE-ZICFILP-NEXT: lw a0, 8(sp) 429; CHECK-LARGE-ZICFILP-NEXT: sw a0, 4(sp) 430; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi12: 431; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI10_0) 432; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi12)(a0) 433; CHECK-LARGE-ZICFILP-NEXT: addi a0, sp, 4 434; CHECK-LARGE-ZICFILP-NEXT: jalr t2 435; CHECK-LARGE-ZICFILP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 436; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 16 437; CHECK-LARGE-ZICFILP-NEXT: ret 438entry: 439 %a = alloca ptr 440 %r = tail call i32 @callee_byval(ptr byval(ptr) %a) 441 ret i32 %r 442} 443 444; Do not tail call optimize if callee uses structret semantics. 445%struct.A = type { i32 } 446@a = global %struct.A zeroinitializer 447 448declare void @callee_struct(ptr sret(%struct.A) %a) 449define void @caller_nostruct() nounwind { 450; CHECK-LABEL: caller_nostruct: 451; CHECK: # %bb.0: # %entry 452; CHECK-NEXT: addi sp, sp, -16 453; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 454; CHECK-NEXT: lui a0, %hi(a) 455; CHECK-NEXT: addi a0, a0, %lo(a) 456; CHECK-NEXT: call callee_struct 457; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 458; CHECK-NEXT: addi sp, sp, 16 459; CHECK-NEXT: ret 460; 461; CHECK-LARGE-ZICFILP-LABEL: caller_nostruct: 462; CHECK-LARGE-ZICFILP: # %bb.0: # %entry 463; CHECK-LARGE-ZICFILP-NEXT: lpad 0 464; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -16 465; CHECK-LARGE-ZICFILP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 466; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi13: 467; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI11_0) 468; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi14: 469; CHECK-LARGE-ZICFILP-NEXT: auipc a1, %pcrel_hi(.LCPI11_1) 470; CHECK-LARGE-ZICFILP-NEXT: lw a0, %pcrel_lo(.Lpcrel_hi13)(a0) 471; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi14)(a1) 472; CHECK-LARGE-ZICFILP-NEXT: jalr t2 473; CHECK-LARGE-ZICFILP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 474; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 16 475; CHECK-LARGE-ZICFILP-NEXT: ret 476entry: 477 tail call void @callee_struct(ptr sret(%struct.A) @a) 478 ret void 479} 480 481; Do not tail call optimize if caller uses structret semantics. 482declare void @callee_nostruct() 483define void @caller_struct(ptr sret(%struct.A) %a) nounwind { 484; CHECK-LABEL: caller_struct: 485; CHECK: # %bb.0: # %entry 486; CHECK-NEXT: addi sp, sp, -16 487; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 488; CHECK-NEXT: call callee_nostruct 489; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 490; CHECK-NEXT: addi sp, sp, 16 491; CHECK-NEXT: ret 492; 493; CHECK-LARGE-ZICFILP-LABEL: caller_struct: 494; CHECK-LARGE-ZICFILP: # %bb.0: # %entry 495; CHECK-LARGE-ZICFILP-NEXT: lpad 0 496; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -16 497; CHECK-LARGE-ZICFILP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 498; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi15: 499; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI12_0) 500; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi15)(a0) 501; CHECK-LARGE-ZICFILP-NEXT: jalr t2 502; CHECK-LARGE-ZICFILP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 503; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 16 504; CHECK-LARGE-ZICFILP-NEXT: ret 505entry: 506 tail call void @callee_nostruct() 507 ret void 508} 509 510; Do not tail call optimize if disabled. 511define i32 @disable_tail_calls(i32 %i) nounwind "disable-tail-calls"="true" { 512; CHECK-LABEL: disable_tail_calls: 513; CHECK: # %bb.0: # %entry 514; CHECK-NEXT: addi sp, sp, -16 515; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 516; CHECK-NEXT: call callee_tail 517; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 518; CHECK-NEXT: addi sp, sp, 16 519; CHECK-NEXT: ret 520; 521; CHECK-LARGE-ZICFILP-LABEL: disable_tail_calls: 522; CHECK-LARGE-ZICFILP: # %bb.0: # %entry 523; CHECK-LARGE-ZICFILP-NEXT: lpad 0 524; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -16 525; CHECK-LARGE-ZICFILP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 526; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi16: 527; CHECK-LARGE-ZICFILP-NEXT: auipc a1, %pcrel_hi(.LCPI13_0) 528; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi16)(a1) 529; CHECK-LARGE-ZICFILP-NEXT: jalr t2 530; CHECK-LARGE-ZICFILP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 531; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 16 532; CHECK-LARGE-ZICFILP-NEXT: ret 533entry: 534 %rv = tail call i32 @callee_tail(i32 %i) 535 ret i32 %rv 536} 537 538; Duplicate returns to enable tail call optimizations. 539declare i32 @test() 540declare i32 @test1() 541declare i32 @test2() 542declare i32 @test3() 543define i32 @duplicate_returns(i32 %a, i32 %b) nounwind { 544; CHECK-LABEL: duplicate_returns: 545; CHECK: # %bb.0: # %entry 546; CHECK-NEXT: beqz a0, .LBB14_4 547; CHECK-NEXT: # %bb.1: # %if.else 548; CHECK-NEXT: beqz a1, .LBB14_5 549; CHECK-NEXT: # %bb.2: # %if.else4 550; CHECK-NEXT: bge a1, a0, .LBB14_6 551; CHECK-NEXT: # %bb.3: # %if.then6 552; CHECK-NEXT: tail test2 553; CHECK-NEXT: .LBB14_4: # %if.then 554; CHECK-NEXT: tail test 555; CHECK-NEXT: .LBB14_5: # %if.then2 556; CHECK-NEXT: tail test1 557; CHECK-NEXT: .LBB14_6: # %if.else8 558; CHECK-NEXT: tail test3 559; 560; CHECK-LARGE-ZICFILP-LABEL: duplicate_returns: 561; CHECK-LARGE-ZICFILP: # %bb.0: # %entry 562; CHECK-LARGE-ZICFILP-NEXT: lpad 0 563; CHECK-LARGE-ZICFILP-NEXT: beqz a0, .LBB14_4 564; CHECK-LARGE-ZICFILP-NEXT: # %bb.1: # %if.else 565; CHECK-LARGE-ZICFILP-NEXT: beqz a1, .LBB14_5 566; CHECK-LARGE-ZICFILP-NEXT: # %bb.2: # %if.else4 567; CHECK-LARGE-ZICFILP-NEXT: bge a1, a0, .LBB14_6 568; CHECK-LARGE-ZICFILP-NEXT: # %bb.3: # %if.then6 569; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi19: 570; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI14_1) 571; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi19)(a0) 572; CHECK-LARGE-ZICFILP-NEXT: jr t2 573; CHECK-LARGE-ZICFILP-NEXT: .LBB14_4: # %if.then 574; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi17: 575; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI14_3) 576; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi17)(a0) 577; CHECK-LARGE-ZICFILP-NEXT: jr t2 578; CHECK-LARGE-ZICFILP-NEXT: .LBB14_5: # %if.then2 579; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi18: 580; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI14_2) 581; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi18)(a0) 582; CHECK-LARGE-ZICFILP-NEXT: jr t2 583; CHECK-LARGE-ZICFILP-NEXT: .LBB14_6: # %if.else8 584; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi20: 585; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI14_0) 586; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi20)(a0) 587; CHECK-LARGE-ZICFILP-NEXT: jr t2 588entry: 589 %cmp = icmp eq i32 %a, 0 590 br i1 %cmp, label %if.then, label %if.else 591 592if.then: ; preds = %entry 593 %call = tail call i32 @test() 594 br label %return 595 596if.else: ; preds = %entry 597 %cmp1 = icmp eq i32 %b, 0 598 br i1 %cmp1, label %if.then2, label %if.else4 599 600if.then2: ; preds = %if.else 601 %call3 = tail call i32 @test1() 602 br label %return 603 604if.else4: ; preds = %if.else 605 %cmp5 = icmp sgt i32 %a, %b 606 br i1 %cmp5, label %if.then6, label %if.else8 607 608if.then6: ; preds = %if.else4 609 %call7 = tail call i32 @test2() 610 br label %return 611 612if.else8: ; preds = %if.else4 613 %call9 = tail call i32 @test3() 614 br label %return 615 616return: ; preds = %if.else8, %if.then6, %if.then2, %if.then 617 %retval = phi i32 [ %call, %if.then ], [ %call3, %if.then2 ], [ %call7, %if.then6 ], [ %call9, %if.else8 ] 618 ret i32 %retval 619} 620 621!llvm.module.flags = !{!0} 622!0 = !{i32 1, !"ProfileSummary", !1} 623!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} 624!2 = !{!"ProfileFormat", !"InstrProf"} 625!3 = !{!"TotalCount", i64 10000} 626!4 = !{!"MaxCount", i64 10} 627!5 = !{!"MaxInternalCount", i64 1} 628!6 = !{!"MaxFunctionCount", i64 1000} 629!7 = !{!"NumCounts", i64 3} 630!8 = !{!"NumFunctions", i64 3} 631!9 = !{!"DetailedSummary", !10} 632!10 = !{!11, !12, !13} 633!11 = !{i32 10000, i64 100, i32 1} 634!12 = !{i32 999000, i64 100, i32 1} 635!13 = !{i32 999999, i64 1, i32 2} 636!14 = !{!"function_entry_count", i64 0} 637