1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv64 -mattr=+m,+v -O2 < %s \ 3; RUN: | FileCheck %s -check-prefix=RV64IV 4; RUN: llc -mtriple=riscv32 -mattr=+m,+v -O2 < %s \ 5; RUN: | FileCheck %s -check-prefix=RV32IV 6 7; Tests adapted from AArch64. 8 9; Test prolog sequences for stack probing when vector is involved. 10 11; The space for vector objects needs probing in the general case, because 12; the stack adjustment may happen to be too big (i.e. greater than the 13; probe size). 14 15define void @f_vector(ptr %out) #0 { 16; RV64IV-LABEL: f_vector: 17; RV64IV: # %bb.0: # %entry 18; RV64IV-NEXT: csrr t1, vlenb 19; RV64IV-NEXT: slli t1, t1, 1 20; RV64IV-NEXT: .cfi_def_cfa t1, -16 21; RV64IV-NEXT: lui t2, 1 22; RV64IV-NEXT: .LBB0_1: # %entry 23; RV64IV-NEXT: # =>This Inner Loop Header: Depth=1 24; RV64IV-NEXT: sub sp, sp, t2 25; RV64IV-NEXT: sd zero, 0(sp) 26; RV64IV-NEXT: sub t1, t1, t2 27; RV64IV-NEXT: bge t1, t2, .LBB0_1 28; RV64IV-NEXT: # %bb.2: # %entry 29; RV64IV-NEXT: .cfi_def_cfa_register sp 30; RV64IV-NEXT: sub sp, sp, t1 31; RV64IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 2 * vlenb 32; RV64IV-NEXT: csrr a0, vlenb 33; RV64IV-NEXT: slli a0, a0, 1 34; RV64IV-NEXT: add sp, sp, a0 35; RV64IV-NEXT: .cfi_def_cfa sp, 0 36; RV64IV-NEXT: ret 37; 38; RV32IV-LABEL: f_vector: 39; RV32IV: # %bb.0: # %entry 40; RV32IV-NEXT: csrr t1, vlenb 41; RV32IV-NEXT: slli t1, t1, 1 42; RV32IV-NEXT: .cfi_def_cfa t1, -16 43; RV32IV-NEXT: lui t2, 1 44; RV32IV-NEXT: .LBB0_1: # %entry 45; RV32IV-NEXT: # =>This Inner Loop Header: Depth=1 46; RV32IV-NEXT: sub sp, sp, t2 47; RV32IV-NEXT: sw zero, 0(sp) 48; RV32IV-NEXT: sub t1, t1, t2 49; RV32IV-NEXT: bge t1, t2, .LBB0_1 50; RV32IV-NEXT: # %bb.2: # %entry 51; RV32IV-NEXT: .cfi_def_cfa_register sp 52; RV32IV-NEXT: sub sp, sp, t1 53; RV32IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 2 * vlenb 54; RV32IV-NEXT: csrr a0, vlenb 55; RV32IV-NEXT: slli a0, a0, 1 56; RV32IV-NEXT: add sp, sp, a0 57; RV32IV-NEXT: .cfi_def_cfa sp, 0 58; RV32IV-NEXT: ret 59entry: 60 %vec = alloca <vscale x 4 x float>, align 16 61 ret void 62} 63 64; As above, but with 4 vectors of stack space. 65define void @f4_vector(ptr %out) #0 { 66; RV64IV-LABEL: f4_vector: 67; RV64IV: # %bb.0: # %entry 68; RV64IV-NEXT: csrr t1, vlenb 69; RV64IV-NEXT: slli t1, t1, 3 70; RV64IV-NEXT: .cfi_def_cfa t1, -64 71; RV64IV-NEXT: lui t2, 1 72; RV64IV-NEXT: .LBB1_1: # %entry 73; RV64IV-NEXT: # =>This Inner Loop Header: Depth=1 74; RV64IV-NEXT: sub sp, sp, t2 75; RV64IV-NEXT: sd zero, 0(sp) 76; RV64IV-NEXT: sub t1, t1, t2 77; RV64IV-NEXT: bge t1, t2, .LBB1_1 78; RV64IV-NEXT: # %bb.2: # %entry 79; RV64IV-NEXT: .cfi_def_cfa_register sp 80; RV64IV-NEXT: sub sp, sp, t1 81; RV64IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 8 * vlenb 82; RV64IV-NEXT: csrr a0, vlenb 83; RV64IV-NEXT: slli a0, a0, 3 84; RV64IV-NEXT: add sp, sp, a0 85; RV64IV-NEXT: .cfi_def_cfa sp, 0 86; RV64IV-NEXT: ret 87; 88; RV32IV-LABEL: f4_vector: 89; RV32IV: # %bb.0: # %entry 90; RV32IV-NEXT: csrr t1, vlenb 91; RV32IV-NEXT: slli t1, t1, 3 92; RV32IV-NEXT: .cfi_def_cfa t1, -64 93; RV32IV-NEXT: lui t2, 1 94; RV32IV-NEXT: .LBB1_1: # %entry 95; RV32IV-NEXT: # =>This Inner Loop Header: Depth=1 96; RV32IV-NEXT: sub sp, sp, t2 97; RV32IV-NEXT: sw zero, 0(sp) 98; RV32IV-NEXT: sub t1, t1, t2 99; RV32IV-NEXT: bge t1, t2, .LBB1_1 100; RV32IV-NEXT: # %bb.2: # %entry 101; RV32IV-NEXT: .cfi_def_cfa_register sp 102; RV32IV-NEXT: sub sp, sp, t1 103; RV32IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 8 * vlenb 104; RV32IV-NEXT: csrr a0, vlenb 105; RV32IV-NEXT: slli a0, a0, 3 106; RV32IV-NEXT: add sp, sp, a0 107; RV32IV-NEXT: .cfi_def_cfa sp, 0 108; RV32IV-NEXT: ret 109entry: 110 %vec1 = alloca <vscale x 4 x float>, align 16 111 %vec2 = alloca <vscale x 4 x float>, align 16 112 %vec3 = alloca <vscale x 4 x float>, align 16 113 %vec4 = alloca <vscale x 4 x float>, align 16 114 ret void 115} 116 117; As above, but with 16 vectors of stack space. 118; The stack adjustment is less than or equal to 16 x 256 = 4096, so 119; we can allocate the locals at once. 120define void @f16_vector(ptr %out) #0 { 121; RV64IV-LABEL: f16_vector: 122; RV64IV: # %bb.0: # %entry 123; RV64IV-NEXT: csrr t1, vlenb 124; RV64IV-NEXT: slli t1, t1, 5 125; RV64IV-NEXT: .cfi_def_cfa t1, -256 126; RV64IV-NEXT: lui t2, 1 127; RV64IV-NEXT: .LBB2_1: # %entry 128; RV64IV-NEXT: # =>This Inner Loop Header: Depth=1 129; RV64IV-NEXT: sub sp, sp, t2 130; RV64IV-NEXT: sd zero, 0(sp) 131; RV64IV-NEXT: sub t1, t1, t2 132; RV64IV-NEXT: bge t1, t2, .LBB2_1 133; RV64IV-NEXT: # %bb.2: # %entry 134; RV64IV-NEXT: .cfi_def_cfa_register sp 135; RV64IV-NEXT: sub sp, sp, t1 136; RV64IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 * vlenb 137; RV64IV-NEXT: csrr a0, vlenb 138; RV64IV-NEXT: slli a0, a0, 5 139; RV64IV-NEXT: add sp, sp, a0 140; RV64IV-NEXT: .cfi_def_cfa sp, 0 141; RV64IV-NEXT: ret 142; 143; RV32IV-LABEL: f16_vector: 144; RV32IV: # %bb.0: # %entry 145; RV32IV-NEXT: csrr t1, vlenb 146; RV32IV-NEXT: slli t1, t1, 5 147; RV32IV-NEXT: .cfi_def_cfa t1, -256 148; RV32IV-NEXT: lui t2, 1 149; RV32IV-NEXT: .LBB2_1: # %entry 150; RV32IV-NEXT: # =>This Inner Loop Header: Depth=1 151; RV32IV-NEXT: sub sp, sp, t2 152; RV32IV-NEXT: sw zero, 0(sp) 153; RV32IV-NEXT: sub t1, t1, t2 154; RV32IV-NEXT: bge t1, t2, .LBB2_1 155; RV32IV-NEXT: # %bb.2: # %entry 156; RV32IV-NEXT: .cfi_def_cfa_register sp 157; RV32IV-NEXT: sub sp, sp, t1 158; RV32IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 * vlenb 159; RV32IV-NEXT: csrr a0, vlenb 160; RV32IV-NEXT: slli a0, a0, 5 161; RV32IV-NEXT: add sp, sp, a0 162; RV32IV-NEXT: .cfi_def_cfa sp, 0 163; RV32IV-NEXT: ret 164entry: 165 %vec1 = alloca <vscale x 4 x float>, align 16 166 %vec2 = alloca <vscale x 4 x float>, align 16 167 %vec3 = alloca <vscale x 4 x float>, align 16 168 %vec4 = alloca <vscale x 4 x float>, align 16 169 %vec5 = alloca <vscale x 4 x float>, align 16 170 %vec6 = alloca <vscale x 4 x float>, align 16 171 %vec7 = alloca <vscale x 4 x float>, align 16 172 %vec8 = alloca <vscale x 4 x float>, align 16 173 %vec9 = alloca <vscale x 4 x float>, align 16 174 %vec10 = alloca <vscale x 4 x float>, align 16 175 %vec11 = alloca <vscale x 4 x float>, align 16 176 %vec12 = alloca <vscale x 4 x float>, align 16 177 %vec13 = alloca <vscale x 4 x float>, align 16 178 %vec14 = alloca <vscale x 4 x float>, align 16 179 %vec15 = alloca <vscale x 4 x float>, align 16 180 %vec16 = alloca <vscale x 4 x float>, align 16 181 ret void 182} 183 184; As above, but with 17 vectors of stack space. 185define void @f17_vector(ptr %out) #0 { 186; RV64IV-LABEL: f17_vector: 187; RV64IV: # %bb.0: # %entry 188; RV64IV-NEXT: csrr t1, vlenb 189; RV64IV-NEXT: li a0, 34 190; RV64IV-NEXT: mul t1, t1, a0 191; RV64IV-NEXT: .cfi_def_cfa t1, -272 192; RV64IV-NEXT: lui t2, 1 193; RV64IV-NEXT: .LBB3_1: # %entry 194; RV64IV-NEXT: # =>This Inner Loop Header: Depth=1 195; RV64IV-NEXT: sub sp, sp, t2 196; RV64IV-NEXT: sd zero, 0(sp) 197; RV64IV-NEXT: sub t1, t1, t2 198; RV64IV-NEXT: bge t1, t2, .LBB3_1 199; RV64IV-NEXT: # %bb.2: # %entry 200; RV64IV-NEXT: .cfi_def_cfa_register sp 201; RV64IV-NEXT: sub sp, sp, t1 202; RV64IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 34 * vlenb 203; RV64IV-NEXT: csrr a0, vlenb 204; RV64IV-NEXT: li a1, 34 205; RV64IV-NEXT: mul a0, a0, a1 206; RV64IV-NEXT: add sp, sp, a0 207; RV64IV-NEXT: .cfi_def_cfa sp, 0 208; RV64IV-NEXT: ret 209; 210; RV32IV-LABEL: f17_vector: 211; RV32IV: # %bb.0: # %entry 212; RV32IV-NEXT: csrr t1, vlenb 213; RV32IV-NEXT: li a0, 34 214; RV32IV-NEXT: mul t1, t1, a0 215; RV32IV-NEXT: .cfi_def_cfa t1, -272 216; RV32IV-NEXT: lui t2, 1 217; RV32IV-NEXT: .LBB3_1: # %entry 218; RV32IV-NEXT: # =>This Inner Loop Header: Depth=1 219; RV32IV-NEXT: sub sp, sp, t2 220; RV32IV-NEXT: sw zero, 0(sp) 221; RV32IV-NEXT: sub t1, t1, t2 222; RV32IV-NEXT: bge t1, t2, .LBB3_1 223; RV32IV-NEXT: # %bb.2: # %entry 224; RV32IV-NEXT: .cfi_def_cfa_register sp 225; RV32IV-NEXT: sub sp, sp, t1 226; RV32IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 34 * vlenb 227; RV32IV-NEXT: csrr a0, vlenb 228; RV32IV-NEXT: li a1, 34 229; RV32IV-NEXT: mul a0, a0, a1 230; RV32IV-NEXT: add sp, sp, a0 231; RV32IV-NEXT: .cfi_def_cfa sp, 0 232; RV32IV-NEXT: ret 233entry: 234 %vec1 = alloca <vscale x 4 x float>, align 16 235 %vec2 = alloca <vscale x 4 x float>, align 16 236 %vec3 = alloca <vscale x 4 x float>, align 16 237 %vec4 = alloca <vscale x 4 x float>, align 16 238 %vec5 = alloca <vscale x 4 x float>, align 16 239 %vec6 = alloca <vscale x 4 x float>, align 16 240 %vec7 = alloca <vscale x 4 x float>, align 16 241 %vec8 = alloca <vscale x 4 x float>, align 16 242 %vec9 = alloca <vscale x 4 x float>, align 16 243 %vec10 = alloca <vscale x 4 x float>, align 16 244 %vec11 = alloca <vscale x 4 x float>, align 16 245 %vec12 = alloca <vscale x 4 x float>, align 16 246 %vec13 = alloca <vscale x 4 x float>, align 16 247 %vec14 = alloca <vscale x 4 x float>, align 16 248 %vec15 = alloca <vscale x 4 x float>, align 16 249 %vec16 = alloca <vscale x 4 x float>, align 16 250 %vec17 = alloca <vscale x 4 x float>, align 16 251 ret void 252} 253 254; A vector and a 16-byte fixed size object. 255define void @f1_vector_16_arr(ptr %out) #0 { 256; RV64IV-LABEL: f1_vector_16_arr: 257; RV64IV: # %bb.0: # %entry 258; RV64IV-NEXT: addi sp, sp, -16 259; RV64IV-NEXT: .cfi_def_cfa_offset 16 260; RV64IV-NEXT: csrr t1, vlenb 261; RV64IV-NEXT: slli t1, t1, 1 262; RV64IV-NEXT: .cfi_def_cfa t1, -16 263; RV64IV-NEXT: lui t2, 1 264; RV64IV-NEXT: .LBB4_1: # %entry 265; RV64IV-NEXT: # =>This Inner Loop Header: Depth=1 266; RV64IV-NEXT: sub sp, sp, t2 267; RV64IV-NEXT: sd zero, 0(sp) 268; RV64IV-NEXT: sub t1, t1, t2 269; RV64IV-NEXT: bge t1, t2, .LBB4_1 270; RV64IV-NEXT: # %bb.2: # %entry 271; RV64IV-NEXT: .cfi_def_cfa_register sp 272; RV64IV-NEXT: sub sp, sp, t1 273; RV64IV-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb 274; RV64IV-NEXT: csrr a0, vlenb 275; RV64IV-NEXT: slli a0, a0, 1 276; RV64IV-NEXT: add sp, sp, a0 277; RV64IV-NEXT: .cfi_def_cfa sp, 16 278; RV64IV-NEXT: addi sp, sp, 16 279; RV64IV-NEXT: .cfi_def_cfa_offset 0 280; RV64IV-NEXT: ret 281; 282; RV32IV-LABEL: f1_vector_16_arr: 283; RV32IV: # %bb.0: # %entry 284; RV32IV-NEXT: addi sp, sp, -16 285; RV32IV-NEXT: .cfi_def_cfa_offset 16 286; RV32IV-NEXT: csrr t1, vlenb 287; RV32IV-NEXT: slli t1, t1, 1 288; RV32IV-NEXT: .cfi_def_cfa t1, -16 289; RV32IV-NEXT: lui t2, 1 290; RV32IV-NEXT: .LBB4_1: # %entry 291; RV32IV-NEXT: # =>This Inner Loop Header: Depth=1 292; RV32IV-NEXT: sub sp, sp, t2 293; RV32IV-NEXT: sw zero, 0(sp) 294; RV32IV-NEXT: sub t1, t1, t2 295; RV32IV-NEXT: bge t1, t2, .LBB4_1 296; RV32IV-NEXT: # %bb.2: # %entry 297; RV32IV-NEXT: .cfi_def_cfa_register sp 298; RV32IV-NEXT: sub sp, sp, t1 299; RV32IV-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb 300; RV32IV-NEXT: csrr a0, vlenb 301; RV32IV-NEXT: slli a0, a0, 1 302; RV32IV-NEXT: add sp, sp, a0 303; RV32IV-NEXT: .cfi_def_cfa sp, 16 304; RV32IV-NEXT: addi sp, sp, 16 305; RV32IV-NEXT: .cfi_def_cfa_offset 0 306; RV32IV-NEXT: ret 307entry: 308 %vec = alloca <vscale x 4 x float>, align 16 309 %arr = alloca i8, i64 16, align 1 310 ret void 311} 312 313; A large vector object and a large slot, both of which need probing. 314define void @f1_vector_4096_arr(ptr %out) #0 { 315; RV64IV-LABEL: f1_vector_4096_arr: 316; RV64IV: # %bb.0: # %entry 317; RV64IV-NEXT: lui a0, 1 318; RV64IV-NEXT: sub sp, sp, a0 319; RV64IV-NEXT: sd zero, 0(sp) 320; RV64IV-NEXT: .cfi_def_cfa_offset 4096 321; RV64IV-NEXT: lui a0, 1 322; RV64IV-NEXT: sub sp, sp, a0 323; RV64IV-NEXT: sd zero, 0(sp) 324; RV64IV-NEXT: .cfi_def_cfa_offset 8192 325; RV64IV-NEXT: lui a0, 1 326; RV64IV-NEXT: sub sp, sp, a0 327; RV64IV-NEXT: sd zero, 0(sp) 328; RV64IV-NEXT: .cfi_def_cfa_offset 12288 329; RV64IV-NEXT: addi sp, sp, -16 330; RV64IV-NEXT: .cfi_def_cfa_offset 12304 331; RV64IV-NEXT: csrr t1, vlenb 332; RV64IV-NEXT: slli t1, t1, 7 333; RV64IV-NEXT: .cfi_def_cfa t1, -1024 334; RV64IV-NEXT: lui t2, 1 335; RV64IV-NEXT: .LBB5_1: # %entry 336; RV64IV-NEXT: # =>This Inner Loop Header: Depth=1 337; RV64IV-NEXT: sub sp, sp, t2 338; RV64IV-NEXT: sd zero, 0(sp) 339; RV64IV-NEXT: sub t1, t1, t2 340; RV64IV-NEXT: bge t1, t2, .LBB5_1 341; RV64IV-NEXT: # %bb.2: # %entry 342; RV64IV-NEXT: .cfi_def_cfa_register sp 343; RV64IV-NEXT: sub sp, sp, t1 344; RV64IV-NEXT: .cfi_escape 0x0f, 0x10, 0x72, 0x00, 0x11, 0x90, 0xe0, 0x00, 0x22, 0x11, 0x80, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 12304 + 128 * vlenb 345; RV64IV-NEXT: csrr a0, vlenb 346; RV64IV-NEXT: slli a0, a0, 7 347; RV64IV-NEXT: add sp, sp, a0 348; RV64IV-NEXT: .cfi_def_cfa sp, 12304 349; RV64IV-NEXT: lui a0, 3 350; RV64IV-NEXT: addiw a0, a0, 16 351; RV64IV-NEXT: add sp, sp, a0 352; RV64IV-NEXT: .cfi_def_cfa_offset 0 353; RV64IV-NEXT: ret 354; 355; RV32IV-LABEL: f1_vector_4096_arr: 356; RV32IV: # %bb.0: # %entry 357; RV32IV-NEXT: lui a0, 1 358; RV32IV-NEXT: sub sp, sp, a0 359; RV32IV-NEXT: sw zero, 0(sp) 360; RV32IV-NEXT: .cfi_def_cfa_offset 4096 361; RV32IV-NEXT: lui a0, 1 362; RV32IV-NEXT: sub sp, sp, a0 363; RV32IV-NEXT: sw zero, 0(sp) 364; RV32IV-NEXT: .cfi_def_cfa_offset 8192 365; RV32IV-NEXT: lui a0, 1 366; RV32IV-NEXT: sub sp, sp, a0 367; RV32IV-NEXT: sw zero, 0(sp) 368; RV32IV-NEXT: .cfi_def_cfa_offset 12288 369; RV32IV-NEXT: addi sp, sp, -16 370; RV32IV-NEXT: .cfi_def_cfa_offset 12304 371; RV32IV-NEXT: csrr t1, vlenb 372; RV32IV-NEXT: slli t1, t1, 7 373; RV32IV-NEXT: .cfi_def_cfa t1, -1024 374; RV32IV-NEXT: lui t2, 1 375; RV32IV-NEXT: .LBB5_1: # %entry 376; RV32IV-NEXT: # =>This Inner Loop Header: Depth=1 377; RV32IV-NEXT: sub sp, sp, t2 378; RV32IV-NEXT: sw zero, 0(sp) 379; RV32IV-NEXT: sub t1, t1, t2 380; RV32IV-NEXT: bge t1, t2, .LBB5_1 381; RV32IV-NEXT: # %bb.2: # %entry 382; RV32IV-NEXT: .cfi_def_cfa_register sp 383; RV32IV-NEXT: sub sp, sp, t1 384; RV32IV-NEXT: .cfi_escape 0x0f, 0x10, 0x72, 0x00, 0x11, 0x90, 0xe0, 0x00, 0x22, 0x11, 0x80, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 12304 + 128 * vlenb 385; RV32IV-NEXT: csrr a0, vlenb 386; RV32IV-NEXT: slli a0, a0, 7 387; RV32IV-NEXT: add sp, sp, a0 388; RV32IV-NEXT: .cfi_def_cfa sp, 12304 389; RV32IV-NEXT: lui a0, 3 390; RV32IV-NEXT: addi a0, a0, 16 391; RV32IV-NEXT: add sp, sp, a0 392; RV32IV-NEXT: .cfi_def_cfa_offset 0 393; RV32IV-NEXT: ret 394entry: 395 %vec = alloca <vscale x 256 x float>, align 16 396 %arr = alloca i8, i64 12288, align 1 397 ret void 398} 399 400attributes #0 = { uwtable(async) "probe-stack"="inline-asm" "frame-pointer"="none" } 401