1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv64 -mattr=+m,+v -O2 < %s \ 3; RUN: | FileCheck %s -check-prefix=RV64I 4; RUN: llc -mtriple=riscv32 -mattr=+m,+v -O2 < %s \ 5; RUN: | FileCheck %s -check-prefix=RV32I 6 7; Tests copied from AArch64. 8 9; Dynamically-sized allocation, needs a loop which can handle any size at 10; runtime. The final iteration of the loop will temporarily put SP below the 11; target address, but this doesn't break any of the ABI constraints on the 12; stack, and also doesn't probe below the target SP value. 13define void @dynamic(i64 %size, ptr %out) #0 { 14; RV64I-LABEL: dynamic: 15; RV64I: # %bb.0: 16; RV64I-NEXT: addi sp, sp, -16 17; RV64I-NEXT: .cfi_def_cfa_offset 16 18; RV64I-NEXT: sd zero, 0(sp) 19; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 20; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill 21; RV64I-NEXT: .cfi_offset ra, -8 22; RV64I-NEXT: .cfi_offset s0, -16 23; RV64I-NEXT: addi s0, sp, 16 24; RV64I-NEXT: .cfi_def_cfa s0, 0 25; RV64I-NEXT: addi a0, a0, 15 26; RV64I-NEXT: andi a0, a0, -16 27; RV64I-NEXT: sub a0, sp, a0 28; RV64I-NEXT: lui a2, 1 29; RV64I-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 30; RV64I-NEXT: sub sp, sp, a2 31; RV64I-NEXT: sd zero, 0(sp) 32; RV64I-NEXT: blt a0, sp, .LBB0_1 33; RV64I-NEXT: # %bb.2: 34; RV64I-NEXT: mv sp, a0 35; RV64I-NEXT: sd a0, 0(a1) 36; RV64I-NEXT: addi sp, s0, -16 37; RV64I-NEXT: .cfi_def_cfa sp, 16 38; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 39; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload 40; RV64I-NEXT: .cfi_restore ra 41; RV64I-NEXT: .cfi_restore s0 42; RV64I-NEXT: addi sp, sp, 16 43; RV64I-NEXT: .cfi_def_cfa_offset 0 44; RV64I-NEXT: ret 45; 46; RV32I-LABEL: dynamic: 47; RV32I: # %bb.0: 48; RV32I-NEXT: addi sp, sp, -16 49; RV32I-NEXT: .cfi_def_cfa_offset 16 50; RV32I-NEXT: sw zero, 0(sp) 51; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 52; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill 53; RV32I-NEXT: .cfi_offset ra, -4 54; RV32I-NEXT: .cfi_offset s0, -8 55; RV32I-NEXT: addi s0, sp, 16 56; RV32I-NEXT: .cfi_def_cfa s0, 0 57; RV32I-NEXT: addi a0, a0, 15 58; RV32I-NEXT: andi a0, a0, -16 59; RV32I-NEXT: sub a0, sp, a0 60; RV32I-NEXT: lui a1, 1 61; RV32I-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 62; RV32I-NEXT: sub sp, sp, a1 63; RV32I-NEXT: sw zero, 0(sp) 64; RV32I-NEXT: blt a0, sp, .LBB0_1 65; RV32I-NEXT: # %bb.2: 66; RV32I-NEXT: mv sp, a0 67; RV32I-NEXT: sw a0, 0(a2) 68; RV32I-NEXT: addi sp, s0, -16 69; RV32I-NEXT: .cfi_def_cfa sp, 16 70; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 71; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload 72; RV32I-NEXT: .cfi_restore ra 73; RV32I-NEXT: .cfi_restore s0 74; RV32I-NEXT: addi sp, sp, 16 75; RV32I-NEXT: .cfi_def_cfa_offset 0 76; RV32I-NEXT: ret 77 %v = alloca i8, i64 %size, align 1 78 store ptr %v, ptr %out, align 8 79 ret void 80} 81 82; This function has a fixed-size stack slot and a dynamic one. The fixed size 83; slot isn't large enough that we would normally probe it, but we need to do so 84; here otherwise the gap between the CSR save and the first probe of the 85; dynamic allocation could be too far apart when the size of the dynamic 86; allocation is close to the guard size. 87define void @dynamic_fixed(i64 %size, ptr %out1, ptr %out2) #0 { 88; RV64I-LABEL: dynamic_fixed: 89; RV64I: # %bb.0: 90; RV64I-NEXT: addi sp, sp, -80 91; RV64I-NEXT: .cfi_def_cfa_offset 80 92; RV64I-NEXT: sd zero, 0(sp) 93; RV64I-NEXT: sd ra, 72(sp) # 8-byte Folded Spill 94; RV64I-NEXT: sd s0, 64(sp) # 8-byte Folded Spill 95; RV64I-NEXT: .cfi_offset ra, -8 96; RV64I-NEXT: .cfi_offset s0, -16 97; RV64I-NEXT: addi s0, sp, 80 98; RV64I-NEXT: .cfi_def_cfa s0, 0 99; RV64I-NEXT: addi a3, s0, -80 100; RV64I-NEXT: addi a0, a0, 15 101; RV64I-NEXT: sd a3, 0(a1) 102; RV64I-NEXT: andi a0, a0, -16 103; RV64I-NEXT: sub a0, sp, a0 104; RV64I-NEXT: lui a1, 1 105; RV64I-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 106; RV64I-NEXT: sub sp, sp, a1 107; RV64I-NEXT: sd zero, 0(sp) 108; RV64I-NEXT: blt a0, sp, .LBB1_1 109; RV64I-NEXT: # %bb.2: 110; RV64I-NEXT: mv sp, a0 111; RV64I-NEXT: sd a0, 0(a2) 112; RV64I-NEXT: addi sp, s0, -80 113; RV64I-NEXT: .cfi_def_cfa sp, 80 114; RV64I-NEXT: ld ra, 72(sp) # 8-byte Folded Reload 115; RV64I-NEXT: ld s0, 64(sp) # 8-byte Folded Reload 116; RV64I-NEXT: .cfi_restore ra 117; RV64I-NEXT: .cfi_restore s0 118; RV64I-NEXT: addi sp, sp, 80 119; RV64I-NEXT: .cfi_def_cfa_offset 0 120; RV64I-NEXT: ret 121; 122; RV32I-LABEL: dynamic_fixed: 123; RV32I: # %bb.0: 124; RV32I-NEXT: addi sp, sp, -80 125; RV32I-NEXT: .cfi_def_cfa_offset 80 126; RV32I-NEXT: sw zero, 0(sp) 127; RV32I-NEXT: sw ra, 76(sp) # 4-byte Folded Spill 128; RV32I-NEXT: sw s0, 72(sp) # 4-byte Folded Spill 129; RV32I-NEXT: .cfi_offset ra, -4 130; RV32I-NEXT: .cfi_offset s0, -8 131; RV32I-NEXT: addi s0, sp, 80 132; RV32I-NEXT: .cfi_def_cfa s0, 0 133; RV32I-NEXT: addi a1, s0, -72 134; RV32I-NEXT: addi a0, a0, 15 135; RV32I-NEXT: sw a1, 0(a2) 136; RV32I-NEXT: andi a0, a0, -16 137; RV32I-NEXT: sub a0, sp, a0 138; RV32I-NEXT: lui a1, 1 139; RV32I-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 140; RV32I-NEXT: sub sp, sp, a1 141; RV32I-NEXT: sw zero, 0(sp) 142; RV32I-NEXT: blt a0, sp, .LBB1_1 143; RV32I-NEXT: # %bb.2: 144; RV32I-NEXT: mv sp, a0 145; RV32I-NEXT: sw a0, 0(a3) 146; RV32I-NEXT: addi sp, s0, -80 147; RV32I-NEXT: .cfi_def_cfa sp, 80 148; RV32I-NEXT: lw ra, 76(sp) # 4-byte Folded Reload 149; RV32I-NEXT: lw s0, 72(sp) # 4-byte Folded Reload 150; RV32I-NEXT: .cfi_restore ra 151; RV32I-NEXT: .cfi_restore s0 152; RV32I-NEXT: addi sp, sp, 80 153; RV32I-NEXT: .cfi_def_cfa_offset 0 154; RV32I-NEXT: ret 155 %v1 = alloca i8, i64 64, align 1 156 store ptr %v1, ptr %out1, align 8 157 %v2 = alloca i8, i64 %size, align 1 158 store ptr %v2, ptr %out2, align 8 159 ret void 160} 161 162; Dynamic allocation, with an alignment requirement greater than the alignment 163; of SP. Done by ANDing the target SP with a constant to align it down, then 164; doing the loop as normal. Note that we also re-align the stack in the prolog, 165; which isn't actually needed because the only aligned allocations are dynamic, 166; this is done even without stack probing. 167define void @dynamic_align_64(i64 %size, ptr %out) #0 { 168; RV64I-LABEL: dynamic_align_64: 169; RV64I: # %bb.0: 170; RV64I-NEXT: addi sp, sp, -64 171; RV64I-NEXT: .cfi_def_cfa_offset 64 172; RV64I-NEXT: sd zero, 0(sp) 173; RV64I-NEXT: sd ra, 56(sp) # 8-byte Folded Spill 174; RV64I-NEXT: sd s0, 48(sp) # 8-byte Folded Spill 175; RV64I-NEXT: sd s1, 40(sp) # 8-byte Folded Spill 176; RV64I-NEXT: .cfi_offset ra, -8 177; RV64I-NEXT: .cfi_offset s0, -16 178; RV64I-NEXT: .cfi_offset s1, -24 179; RV64I-NEXT: addi s0, sp, 64 180; RV64I-NEXT: .cfi_def_cfa s0, 0 181; RV64I-NEXT: andi sp, sp, -64 182; RV64I-NEXT: mv s1, sp 183; RV64I-NEXT: addi a0, a0, 15 184; RV64I-NEXT: andi a0, a0, -16 185; RV64I-NEXT: sub a0, sp, a0 186; RV64I-NEXT: andi a0, a0, -64 187; RV64I-NEXT: lui a2, 1 188; RV64I-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 189; RV64I-NEXT: sub sp, sp, a2 190; RV64I-NEXT: sd zero, 0(sp) 191; RV64I-NEXT: blt a0, sp, .LBB2_1 192; RV64I-NEXT: # %bb.2: 193; RV64I-NEXT: mv sp, a0 194; RV64I-NEXT: sd a0, 0(a1) 195; RV64I-NEXT: addi sp, s0, -64 196; RV64I-NEXT: .cfi_def_cfa sp, 64 197; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload 198; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload 199; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload 200; RV64I-NEXT: .cfi_restore ra 201; RV64I-NEXT: .cfi_restore s0 202; RV64I-NEXT: .cfi_restore s1 203; RV64I-NEXT: addi sp, sp, 64 204; RV64I-NEXT: .cfi_def_cfa_offset 0 205; RV64I-NEXT: ret 206; 207; RV32I-LABEL: dynamic_align_64: 208; RV32I: # %bb.0: 209; RV32I-NEXT: addi sp, sp, -64 210; RV32I-NEXT: .cfi_def_cfa_offset 64 211; RV32I-NEXT: sw zero, 0(sp) 212; RV32I-NEXT: sw ra, 60(sp) # 4-byte Folded Spill 213; RV32I-NEXT: sw s0, 56(sp) # 4-byte Folded Spill 214; RV32I-NEXT: sw s1, 52(sp) # 4-byte Folded Spill 215; RV32I-NEXT: .cfi_offset ra, -4 216; RV32I-NEXT: .cfi_offset s0, -8 217; RV32I-NEXT: .cfi_offset s1, -12 218; RV32I-NEXT: addi s0, sp, 64 219; RV32I-NEXT: .cfi_def_cfa s0, 0 220; RV32I-NEXT: andi sp, sp, -64 221; RV32I-NEXT: mv s1, sp 222; RV32I-NEXT: addi a0, a0, 15 223; RV32I-NEXT: andi a0, a0, -16 224; RV32I-NEXT: sub a0, sp, a0 225; RV32I-NEXT: andi a0, a0, -64 226; RV32I-NEXT: lui a1, 1 227; RV32I-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 228; RV32I-NEXT: sub sp, sp, a1 229; RV32I-NEXT: sw zero, 0(sp) 230; RV32I-NEXT: blt a0, sp, .LBB2_1 231; RV32I-NEXT: # %bb.2: 232; RV32I-NEXT: mv sp, a0 233; RV32I-NEXT: sw a0, 0(a2) 234; RV32I-NEXT: addi sp, s0, -64 235; RV32I-NEXT: .cfi_def_cfa sp, 64 236; RV32I-NEXT: lw ra, 60(sp) # 4-byte Folded Reload 237; RV32I-NEXT: lw s0, 56(sp) # 4-byte Folded Reload 238; RV32I-NEXT: lw s1, 52(sp) # 4-byte Folded Reload 239; RV32I-NEXT: .cfi_restore ra 240; RV32I-NEXT: .cfi_restore s0 241; RV32I-NEXT: .cfi_restore s1 242; RV32I-NEXT: addi sp, sp, 64 243; RV32I-NEXT: .cfi_def_cfa_offset 0 244; RV32I-NEXT: ret 245 %v = alloca i8, i64 %size, align 64 246 store ptr %v, ptr %out, align 8 247 ret void 248} 249 250; Dynamic allocation, with an alignment greater than the stack guard size. The 251; only difference to the dynamic allocation is the constant used for aligning 252; the target SP, the loop will probe the whole allocation without needing to 253; know about the alignment padding. 254define void @dynamic_align_8192(i64 %size, ptr %out) #0 { 255; RV64I-LABEL: dynamic_align_8192: 256; RV64I: # %bb.0: 257; RV64I-NEXT: addi sp, sp, -2032 258; RV64I-NEXT: .cfi_def_cfa_offset 2032 259; RV64I-NEXT: sd zero, 0(sp) 260; RV64I-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill 261; RV64I-NEXT: sd s0, 2016(sp) # 8-byte Folded Spill 262; RV64I-NEXT: sd s1, 2008(sp) # 8-byte Folded Spill 263; RV64I-NEXT: .cfi_offset ra, -8 264; RV64I-NEXT: .cfi_offset s0, -16 265; RV64I-NEXT: .cfi_offset s1, -24 266; RV64I-NEXT: addi s0, sp, 2032 267; RV64I-NEXT: .cfi_def_cfa s0, 0 268; RV64I-NEXT: lui a2, 1 269; RV64I-NEXT: sub sp, sp, a2 270; RV64I-NEXT: sd zero, 0(sp) 271; RV64I-NEXT: sub sp, sp, a2 272; RV64I-NEXT: sd zero, 0(sp) 273; RV64I-NEXT: sub sp, sp, a2 274; RV64I-NEXT: sd zero, 0(sp) 275; RV64I-NEXT: addi sp, sp, -2048 276; RV64I-NEXT: addi sp, sp, -16 277; RV64I-NEXT: sd zero, 0(sp) 278; RV64I-NEXT: srli a2, sp, 13 279; RV64I-NEXT: slli sp, a2, 13 280; RV64I-NEXT: mv s1, sp 281; RV64I-NEXT: addi a0, a0, 15 282; RV64I-NEXT: lui a2, 1048574 283; RV64I-NEXT: andi a0, a0, -16 284; RV64I-NEXT: sub a0, sp, a0 285; RV64I-NEXT: and a0, a0, a2 286; RV64I-NEXT: lui a2, 1 287; RV64I-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 288; RV64I-NEXT: sub sp, sp, a2 289; RV64I-NEXT: sd zero, 0(sp) 290; RV64I-NEXT: blt a0, sp, .LBB3_1 291; RV64I-NEXT: # %bb.2: 292; RV64I-NEXT: mv sp, a0 293; RV64I-NEXT: sd a0, 0(a1) 294; RV64I-NEXT: addi sp, s0, -2032 295; RV64I-NEXT: .cfi_def_cfa sp, 2032 296; RV64I-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload 297; RV64I-NEXT: ld s0, 2016(sp) # 8-byte Folded Reload 298; RV64I-NEXT: ld s1, 2008(sp) # 8-byte Folded Reload 299; RV64I-NEXT: .cfi_restore ra 300; RV64I-NEXT: .cfi_restore s0 301; RV64I-NEXT: .cfi_restore s1 302; RV64I-NEXT: addi sp, sp, 2032 303; RV64I-NEXT: .cfi_def_cfa_offset 0 304; RV64I-NEXT: ret 305; 306; RV32I-LABEL: dynamic_align_8192: 307; RV32I: # %bb.0: 308; RV32I-NEXT: addi sp, sp, -2032 309; RV32I-NEXT: .cfi_def_cfa_offset 2032 310; RV32I-NEXT: sw zero, 0(sp) 311; RV32I-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill 312; RV32I-NEXT: sw s0, 2024(sp) # 4-byte Folded Spill 313; RV32I-NEXT: sw s1, 2020(sp) # 4-byte Folded Spill 314; RV32I-NEXT: .cfi_offset ra, -4 315; RV32I-NEXT: .cfi_offset s0, -8 316; RV32I-NEXT: .cfi_offset s1, -12 317; RV32I-NEXT: addi s0, sp, 2032 318; RV32I-NEXT: .cfi_def_cfa s0, 0 319; RV32I-NEXT: lui a1, 1 320; RV32I-NEXT: sub sp, sp, a1 321; RV32I-NEXT: sw zero, 0(sp) 322; RV32I-NEXT: sub sp, sp, a1 323; RV32I-NEXT: sw zero, 0(sp) 324; RV32I-NEXT: sub sp, sp, a1 325; RV32I-NEXT: sw zero, 0(sp) 326; RV32I-NEXT: addi sp, sp, -2048 327; RV32I-NEXT: addi sp, sp, -16 328; RV32I-NEXT: sw zero, 0(sp) 329; RV32I-NEXT: srli a1, sp, 13 330; RV32I-NEXT: slli sp, a1, 13 331; RV32I-NEXT: mv s1, sp 332; RV32I-NEXT: addi a0, a0, 15 333; RV32I-NEXT: lui a1, 1048574 334; RV32I-NEXT: andi a0, a0, -16 335; RV32I-NEXT: sub a0, sp, a0 336; RV32I-NEXT: and a0, a0, a1 337; RV32I-NEXT: lui a1, 1 338; RV32I-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 339; RV32I-NEXT: sub sp, sp, a1 340; RV32I-NEXT: sw zero, 0(sp) 341; RV32I-NEXT: blt a0, sp, .LBB3_1 342; RV32I-NEXT: # %bb.2: 343; RV32I-NEXT: mv sp, a0 344; RV32I-NEXT: sw a0, 0(a2) 345; RV32I-NEXT: addi sp, s0, -2032 346; RV32I-NEXT: .cfi_def_cfa sp, 2032 347; RV32I-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload 348; RV32I-NEXT: lw s0, 2024(sp) # 4-byte Folded Reload 349; RV32I-NEXT: lw s1, 2020(sp) # 4-byte Folded Reload 350; RV32I-NEXT: .cfi_restore ra 351; RV32I-NEXT: .cfi_restore s0 352; RV32I-NEXT: .cfi_restore s1 353; RV32I-NEXT: addi sp, sp, 2032 354; RV32I-NEXT: .cfi_def_cfa_offset 0 355; RV32I-NEXT: ret 356 %v = alloca i8, i64 %size, align 8192 357 store ptr %v, ptr %out, align 8 358 ret void 359} 360 361; If a function has variable-sized stack objects, then any function calls which 362; need to pass arguments on the stack must allocate the stack space for them 363; dynamically, to ensure they are at the bottom of the frame. 364define void @no_reserved_call_frame(i64 %n, i32 %dummy) #0 { 365; RV64I-LABEL: no_reserved_call_frame: 366; RV64I: # %bb.0: # %entry 367; RV64I-NEXT: addi sp, sp, -16 368; RV64I-NEXT: .cfi_def_cfa_offset 16 369; RV64I-NEXT: sd zero, 0(sp) 370; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 371; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill 372; RV64I-NEXT: .cfi_offset ra, -8 373; RV64I-NEXT: .cfi_offset s0, -16 374; RV64I-NEXT: addi s0, sp, 16 375; RV64I-NEXT: .cfi_def_cfa s0, 0 376; RV64I-NEXT: slli a0, a0, 2 377; RV64I-NEXT: addi a0, a0, 15 378; RV64I-NEXT: andi a0, a0, -16 379; RV64I-NEXT: sub a0, sp, a0 380; RV64I-NEXT: lui a2, 1 381; RV64I-NEXT: .LBB4_1: # %entry 382; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 383; RV64I-NEXT: sub sp, sp, a2 384; RV64I-NEXT: sd zero, 0(sp) 385; RV64I-NEXT: blt a0, sp, .LBB4_1 386; RV64I-NEXT: # %bb.2: # %entry 387; RV64I-NEXT: mv sp, a0 388; RV64I-NEXT: call callee_stack_args 389; RV64I-NEXT: addi sp, s0, -16 390; RV64I-NEXT: .cfi_def_cfa sp, 16 391; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 392; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload 393; RV64I-NEXT: .cfi_restore ra 394; RV64I-NEXT: .cfi_restore s0 395; RV64I-NEXT: addi sp, sp, 16 396; RV64I-NEXT: .cfi_def_cfa_offset 0 397; RV64I-NEXT: ret 398; 399; RV32I-LABEL: no_reserved_call_frame: 400; RV32I: # %bb.0: # %entry 401; RV32I-NEXT: addi sp, sp, -16 402; RV32I-NEXT: .cfi_def_cfa_offset 16 403; RV32I-NEXT: sw zero, 0(sp) 404; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 405; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill 406; RV32I-NEXT: .cfi_offset ra, -4 407; RV32I-NEXT: .cfi_offset s0, -8 408; RV32I-NEXT: addi s0, sp, 16 409; RV32I-NEXT: .cfi_def_cfa s0, 0 410; RV32I-NEXT: mv a1, a2 411; RV32I-NEXT: slli a0, a0, 2 412; RV32I-NEXT: addi a0, a0, 15 413; RV32I-NEXT: andi a0, a0, -16 414; RV32I-NEXT: sub a0, sp, a0 415; RV32I-NEXT: lui a2, 1 416; RV32I-NEXT: .LBB4_1: # %entry 417; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 418; RV32I-NEXT: sub sp, sp, a2 419; RV32I-NEXT: sw zero, 0(sp) 420; RV32I-NEXT: blt a0, sp, .LBB4_1 421; RV32I-NEXT: # %bb.2: # %entry 422; RV32I-NEXT: mv sp, a0 423; RV32I-NEXT: call callee_stack_args 424; RV32I-NEXT: addi sp, s0, -16 425; RV32I-NEXT: .cfi_def_cfa sp, 16 426; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 427; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload 428; RV32I-NEXT: .cfi_restore ra 429; RV32I-NEXT: .cfi_restore s0 430; RV32I-NEXT: addi sp, sp, 16 431; RV32I-NEXT: .cfi_def_cfa_offset 0 432; RV32I-NEXT: ret 433entry: 434 %v = alloca i32, i64 %n 435 call void @callee_stack_args(ptr %v, i32 %dummy) 436 ret void 437} 438 439; Same as above but without a variable-sized allocation, so the reserved call 440; frame can be folded into the fixed-size allocation in the prologue. 441define void @reserved_call_frame(i64 %n, i32 %dummy) #0 { 442; RV64I-LABEL: reserved_call_frame: 443; RV64I: # %bb.0: # %entry 444; RV64I-NEXT: addi sp, sp, -416 445; RV64I-NEXT: .cfi_def_cfa_offset 416 446; RV64I-NEXT: sd ra, 408(sp) # 8-byte Folded Spill 447; RV64I-NEXT: .cfi_offset ra, -8 448; RV64I-NEXT: addi a0, sp, 8 449; RV64I-NEXT: call callee_stack_args 450; RV64I-NEXT: ld ra, 408(sp) # 8-byte Folded Reload 451; RV64I-NEXT: .cfi_restore ra 452; RV64I-NEXT: addi sp, sp, 416 453; RV64I-NEXT: .cfi_def_cfa_offset 0 454; RV64I-NEXT: ret 455; 456; RV32I-LABEL: reserved_call_frame: 457; RV32I: # %bb.0: # %entry 458; RV32I-NEXT: addi sp, sp, -416 459; RV32I-NEXT: .cfi_def_cfa_offset 416 460; RV32I-NEXT: sw ra, 412(sp) # 4-byte Folded Spill 461; RV32I-NEXT: .cfi_offset ra, -4 462; RV32I-NEXT: mv a1, a2 463; RV32I-NEXT: addi a0, sp, 12 464; RV32I-NEXT: call callee_stack_args 465; RV32I-NEXT: lw ra, 412(sp) # 4-byte Folded Reload 466; RV32I-NEXT: .cfi_restore ra 467; RV32I-NEXT: addi sp, sp, 416 468; RV32I-NEXT: .cfi_def_cfa_offset 0 469; RV32I-NEXT: ret 470entry: 471 %v = alloca i32, i64 100 472 call void @callee_stack_args(ptr %v, i32 %dummy) 473 ret void 474} 475 476declare void @callee_stack_args(ptr, i32) 477 478; Dynamic allocation of vectors 479define void @dynamic_vector(i64 %size, ptr %out) #0 { 480; RV64I-LABEL: dynamic_vector: 481; RV64I: # %bb.0: 482; RV64I-NEXT: addi sp, sp, -16 483; RV64I-NEXT: .cfi_def_cfa_offset 16 484; RV64I-NEXT: sd zero, 0(sp) 485; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 486; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill 487; RV64I-NEXT: .cfi_offset ra, -8 488; RV64I-NEXT: .cfi_offset s0, -16 489; RV64I-NEXT: addi s0, sp, 16 490; RV64I-NEXT: .cfi_def_cfa s0, 0 491; RV64I-NEXT: csrr a2, vlenb 492; RV64I-NEXT: mul a0, a2, a0 493; RV64I-NEXT: slli a0, a0, 1 494; RV64I-NEXT: sub a0, sp, a0 495; RV64I-NEXT: lui a2, 1 496; RV64I-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 497; RV64I-NEXT: sub sp, sp, a2 498; RV64I-NEXT: sd zero, 0(sp) 499; RV64I-NEXT: blt a0, sp, .LBB6_1 500; RV64I-NEXT: # %bb.2: 501; RV64I-NEXT: mv sp, a0 502; RV64I-NEXT: sd a0, 0(a1) 503; RV64I-NEXT: addi sp, s0, -16 504; RV64I-NEXT: .cfi_def_cfa sp, 16 505; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 506; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload 507; RV64I-NEXT: .cfi_restore ra 508; RV64I-NEXT: .cfi_restore s0 509; RV64I-NEXT: addi sp, sp, 16 510; RV64I-NEXT: .cfi_def_cfa_offset 0 511; RV64I-NEXT: ret 512; 513; RV32I-LABEL: dynamic_vector: 514; RV32I: # %bb.0: 515; RV32I-NEXT: addi sp, sp, -16 516; RV32I-NEXT: .cfi_def_cfa_offset 16 517; RV32I-NEXT: sw zero, 0(sp) 518; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 519; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill 520; RV32I-NEXT: .cfi_offset ra, -4 521; RV32I-NEXT: .cfi_offset s0, -8 522; RV32I-NEXT: addi s0, sp, 16 523; RV32I-NEXT: .cfi_def_cfa s0, 0 524; RV32I-NEXT: csrr a1, vlenb 525; RV32I-NEXT: mul a0, a1, a0 526; RV32I-NEXT: slli a0, a0, 1 527; RV32I-NEXT: sub a0, sp, a0 528; RV32I-NEXT: lui a1, 1 529; RV32I-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 530; RV32I-NEXT: sub sp, sp, a1 531; RV32I-NEXT: sw zero, 0(sp) 532; RV32I-NEXT: blt a0, sp, .LBB6_1 533; RV32I-NEXT: # %bb.2: 534; RV32I-NEXT: mv sp, a0 535; RV32I-NEXT: sw a0, 0(a2) 536; RV32I-NEXT: addi sp, s0, -16 537; RV32I-NEXT: .cfi_def_cfa sp, 16 538; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 539; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload 540; RV32I-NEXT: .cfi_restore ra 541; RV32I-NEXT: .cfi_restore s0 542; RV32I-NEXT: addi sp, sp, 16 543; RV32I-NEXT: .cfi_def_cfa_offset 0 544; RV32I-NEXT: ret 545 %v = alloca <vscale x 4 x float>, i64 %size, align 16 546 store ptr %v, ptr %out, align 8 547 ret void 548} 549 550attributes #0 = { uwtable(async) "probe-stack"="inline-asm" "frame-pointer"="none" } 551