1*6f53886aSRaphael Moreira Zinsly; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2*6f53886aSRaphael Moreira Zinsly; RUN: llc -mtriple=riscv64 -mattr=+m,+v -O2 < %s \ 3*6f53886aSRaphael Moreira Zinsly; RUN: | FileCheck %s -check-prefix=RV64IV 4*6f53886aSRaphael Moreira Zinsly; RUN: llc -mtriple=riscv32 -mattr=+m,+v -O2 < %s \ 5*6f53886aSRaphael Moreira Zinsly; RUN: | FileCheck %s -check-prefix=RV32IV 6*6f53886aSRaphael Moreira Zinsly 7*6f53886aSRaphael Moreira Zinsly; Tests adapted from AArch64. 8*6f53886aSRaphael Moreira Zinsly 9*6f53886aSRaphael Moreira Zinsly; Test prolog sequences for stack probing when vector is involved. 10*6f53886aSRaphael Moreira Zinsly 11*6f53886aSRaphael Moreira Zinsly; The space for vector objects needs probing in the general case, because 12*6f53886aSRaphael Moreira Zinsly; the stack adjustment may happen to be too big (i.e. greater than the 13*6f53886aSRaphael Moreira Zinsly; probe size). 14*6f53886aSRaphael Moreira Zinsly 15*6f53886aSRaphael Moreira Zinslydefine void @f_vector(ptr %out) #0 { 16*6f53886aSRaphael Moreira Zinsly; RV64IV-LABEL: f_vector: 17*6f53886aSRaphael Moreira Zinsly; RV64IV: # %bb.0: # %entry 18*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: csrr t1, vlenb 19*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: slli t1, t1, 1 20*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: .cfi_def_cfa t1, -16 21*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: lui t2, 1 22*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: .LBB0_1: # %entry 23*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: # =>This Inner Loop Header: Depth=1 24*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: sub sp, sp, t2 25*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: sd zero, 0(sp) 26*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: sub t1, t1, t2 27*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: bge t1, t2, .LBB0_1 28*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: # %bb.2: # %entry 29*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: .cfi_def_cfa_register sp 30*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: sub sp, sp, t1 31*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 2 * vlenb 32*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: csrr a0, vlenb 33*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: slli a0, a0, 1 34*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: add sp, sp, a0 35*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: .cfi_def_cfa sp, 0 36*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: ret 37*6f53886aSRaphael Moreira Zinsly; 38*6f53886aSRaphael Moreira Zinsly; RV32IV-LABEL: f_vector: 39*6f53886aSRaphael Moreira Zinsly; RV32IV: # %bb.0: # %entry 40*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: csrr t1, vlenb 41*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: slli t1, t1, 1 42*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: .cfi_def_cfa t1, -16 43*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: lui t2, 1 44*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: .LBB0_1: # %entry 45*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: # =>This Inner Loop Header: Depth=1 46*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: sub sp, sp, t2 47*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: sw zero, 0(sp) 48*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: sub t1, t1, t2 49*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: bge t1, t2, .LBB0_1 50*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: # %bb.2: # %entry 51*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: .cfi_def_cfa_register sp 52*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: sub sp, sp, t1 53*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 2 * vlenb 54*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: csrr a0, vlenb 55*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: slli a0, a0, 1 56*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: add sp, sp, a0 57*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: .cfi_def_cfa sp, 0 58*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: ret 59*6f53886aSRaphael Moreira Zinslyentry: 60*6f53886aSRaphael Moreira Zinsly %vec = alloca <vscale x 4 x float>, align 16 61*6f53886aSRaphael Moreira Zinsly ret void 62*6f53886aSRaphael Moreira Zinsly} 63*6f53886aSRaphael Moreira Zinsly 64*6f53886aSRaphael Moreira Zinsly; As above, but with 4 vectors of stack space. 65*6f53886aSRaphael Moreira Zinslydefine void @f4_vector(ptr %out) #0 { 66*6f53886aSRaphael Moreira Zinsly; RV64IV-LABEL: f4_vector: 67*6f53886aSRaphael Moreira Zinsly; RV64IV: # %bb.0: # %entry 68*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: csrr t1, vlenb 69*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: slli t1, t1, 3 70*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: .cfi_def_cfa t1, -64 71*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: lui t2, 1 72*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: .LBB1_1: # %entry 73*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: # =>This Inner Loop Header: Depth=1 74*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: sub sp, sp, t2 75*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: sd zero, 0(sp) 76*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: sub t1, t1, t2 77*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: bge t1, t2, .LBB1_1 78*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: # %bb.2: # %entry 79*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: .cfi_def_cfa_register sp 80*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: sub sp, sp, t1 81*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 8 * vlenb 82*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: csrr a0, vlenb 83*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: slli a0, a0, 3 84*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: add sp, sp, a0 85*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: .cfi_def_cfa sp, 0 86*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: ret 87*6f53886aSRaphael Moreira Zinsly; 88*6f53886aSRaphael Moreira Zinsly; RV32IV-LABEL: f4_vector: 89*6f53886aSRaphael Moreira Zinsly; RV32IV: # %bb.0: # %entry 90*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: csrr t1, vlenb 91*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: slli t1, t1, 3 92*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: .cfi_def_cfa t1, -64 93*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: lui t2, 1 94*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: .LBB1_1: # %entry 95*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: # =>This Inner Loop Header: Depth=1 96*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: sub sp, sp, t2 97*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: sw zero, 0(sp) 98*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: sub t1, t1, t2 99*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: bge t1, t2, .LBB1_1 100*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: # %bb.2: # %entry 101*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: .cfi_def_cfa_register sp 102*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: sub sp, sp, t1 103*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 8 * vlenb 104*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: csrr a0, vlenb 105*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: slli a0, a0, 3 106*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: add sp, sp, a0 107*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: .cfi_def_cfa sp, 0 108*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: ret 109*6f53886aSRaphael Moreira Zinslyentry: 110*6f53886aSRaphael Moreira Zinsly %vec1 = alloca <vscale x 4 x float>, align 16 111*6f53886aSRaphael Moreira Zinsly %vec2 = alloca <vscale x 4 x float>, align 16 112*6f53886aSRaphael Moreira Zinsly %vec3 = alloca <vscale x 4 x float>, align 16 113*6f53886aSRaphael Moreira Zinsly %vec4 = alloca <vscale x 4 x float>, align 16 114*6f53886aSRaphael Moreira Zinsly ret void 115*6f53886aSRaphael Moreira Zinsly} 116*6f53886aSRaphael Moreira Zinsly 117*6f53886aSRaphael Moreira Zinsly; As above, but with 16 vectors of stack space. 118*6f53886aSRaphael Moreira Zinsly; The stack adjustment is less than or equal to 16 x 256 = 4096, so 119*6f53886aSRaphael Moreira Zinsly; we can allocate the locals at once. 120*6f53886aSRaphael Moreira Zinslydefine void @f16_vector(ptr %out) #0 { 121*6f53886aSRaphael Moreira Zinsly; RV64IV-LABEL: f16_vector: 122*6f53886aSRaphael Moreira Zinsly; RV64IV: # %bb.0: # %entry 123*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: csrr t1, vlenb 124*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: slli t1, t1, 5 125*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: .cfi_def_cfa t1, -256 126*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: lui t2, 1 127*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: .LBB2_1: # %entry 128*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: # =>This Inner Loop Header: Depth=1 129*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: sub sp, sp, t2 130*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: sd zero, 0(sp) 131*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: sub t1, t1, t2 132*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: bge t1, t2, .LBB2_1 133*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: # %bb.2: # %entry 134*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: .cfi_def_cfa_register sp 135*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: sub sp, sp, t1 136*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 * vlenb 137*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: csrr a0, vlenb 138*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: slli a0, a0, 5 139*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: add sp, sp, a0 140*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: .cfi_def_cfa sp, 0 141*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: ret 142*6f53886aSRaphael Moreira Zinsly; 143*6f53886aSRaphael Moreira Zinsly; RV32IV-LABEL: f16_vector: 144*6f53886aSRaphael Moreira Zinsly; RV32IV: # %bb.0: # %entry 145*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: csrr t1, vlenb 146*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: slli t1, t1, 5 147*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: .cfi_def_cfa t1, -256 148*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: lui t2, 1 149*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: .LBB2_1: # %entry 150*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: # =>This Inner Loop Header: Depth=1 151*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: sub sp, sp, t2 152*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: sw zero, 0(sp) 153*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: sub t1, t1, t2 154*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: bge t1, t2, .LBB2_1 155*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: # %bb.2: # %entry 156*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: .cfi_def_cfa_register sp 157*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: sub sp, sp, t1 158*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 * vlenb 159*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: csrr a0, vlenb 160*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: slli a0, a0, 5 161*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: add sp, sp, a0 162*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: .cfi_def_cfa sp, 0 163*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: ret 164*6f53886aSRaphael Moreira Zinslyentry: 165*6f53886aSRaphael Moreira Zinsly %vec1 = alloca <vscale x 4 x float>, align 16 166*6f53886aSRaphael Moreira Zinsly %vec2 = alloca <vscale x 4 x float>, align 16 167*6f53886aSRaphael Moreira Zinsly %vec3 = alloca <vscale x 4 x float>, align 16 168*6f53886aSRaphael Moreira Zinsly %vec4 = alloca <vscale x 4 x float>, align 16 169*6f53886aSRaphael Moreira Zinsly %vec5 = alloca <vscale x 4 x float>, align 16 170*6f53886aSRaphael Moreira Zinsly %vec6 = alloca <vscale x 4 x float>, align 16 171*6f53886aSRaphael Moreira Zinsly %vec7 = alloca <vscale x 4 x float>, align 16 172*6f53886aSRaphael Moreira Zinsly %vec8 = alloca <vscale x 4 x float>, align 16 173*6f53886aSRaphael Moreira Zinsly %vec9 = alloca <vscale x 4 x float>, align 16 174*6f53886aSRaphael Moreira Zinsly %vec10 = alloca <vscale x 4 x float>, align 16 175*6f53886aSRaphael Moreira Zinsly %vec11 = alloca <vscale x 4 x float>, align 16 176*6f53886aSRaphael Moreira Zinsly %vec12 = alloca <vscale x 4 x float>, align 16 177*6f53886aSRaphael Moreira Zinsly %vec13 = alloca <vscale x 4 x float>, align 16 178*6f53886aSRaphael Moreira Zinsly %vec14 = alloca <vscale x 4 x float>, align 16 179*6f53886aSRaphael Moreira Zinsly %vec15 = alloca <vscale x 4 x float>, align 16 180*6f53886aSRaphael Moreira Zinsly %vec16 = alloca <vscale x 4 x float>, align 16 181*6f53886aSRaphael Moreira Zinsly ret void 182*6f53886aSRaphael Moreira Zinsly} 183*6f53886aSRaphael Moreira Zinsly 184*6f53886aSRaphael Moreira Zinsly; As above, but with 17 vectors of stack space. 185*6f53886aSRaphael Moreira Zinslydefine void @f17_vector(ptr %out) #0 { 186*6f53886aSRaphael Moreira Zinsly; RV64IV-LABEL: f17_vector: 187*6f53886aSRaphael Moreira Zinsly; RV64IV: # %bb.0: # %entry 188*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: csrr t1, vlenb 189*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: li a0, 34 190*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: mul t1, t1, a0 191*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: .cfi_def_cfa t1, -272 192*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: lui t2, 1 193*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: .LBB3_1: # %entry 194*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: # =>This Inner Loop Header: Depth=1 195*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: sub sp, sp, t2 196*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: sd zero, 0(sp) 197*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: sub t1, t1, t2 198*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: bge t1, t2, .LBB3_1 199*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: # %bb.2: # %entry 200*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: .cfi_def_cfa_register sp 201*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: sub sp, sp, t1 202*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 34 * vlenb 203*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: csrr a0, vlenb 204*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: li a1, 34 205*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: mul a0, a0, a1 206*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: add sp, sp, a0 207*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: .cfi_def_cfa sp, 0 208*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: ret 209*6f53886aSRaphael Moreira Zinsly; 210*6f53886aSRaphael Moreira Zinsly; RV32IV-LABEL: f17_vector: 211*6f53886aSRaphael Moreira Zinsly; RV32IV: # %bb.0: # %entry 212*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: csrr t1, vlenb 213*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: li a0, 34 214*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: mul t1, t1, a0 215*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: .cfi_def_cfa t1, -272 216*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: lui t2, 1 217*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: .LBB3_1: # %entry 218*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: # =>This Inner Loop Header: Depth=1 219*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: sub sp, sp, t2 220*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: sw zero, 0(sp) 221*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: sub t1, t1, t2 222*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: bge t1, t2, .LBB3_1 223*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: # %bb.2: # %entry 224*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: .cfi_def_cfa_register sp 225*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: sub sp, sp, t1 226*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 34 * vlenb 227*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: csrr a0, vlenb 228*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: li a1, 34 229*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: mul a0, a0, a1 230*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: add sp, sp, a0 231*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: .cfi_def_cfa sp, 0 232*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: ret 233*6f53886aSRaphael Moreira Zinslyentry: 234*6f53886aSRaphael Moreira Zinsly %vec1 = alloca <vscale x 4 x float>, align 16 235*6f53886aSRaphael Moreira Zinsly %vec2 = alloca <vscale x 4 x float>, align 16 236*6f53886aSRaphael Moreira Zinsly %vec3 = alloca <vscale x 4 x float>, align 16 237*6f53886aSRaphael Moreira Zinsly %vec4 = alloca <vscale x 4 x float>, align 16 238*6f53886aSRaphael Moreira Zinsly %vec5 = alloca <vscale x 4 x float>, align 16 239*6f53886aSRaphael Moreira Zinsly %vec6 = alloca <vscale x 4 x float>, align 16 240*6f53886aSRaphael Moreira Zinsly %vec7 = alloca <vscale x 4 x float>, align 16 241*6f53886aSRaphael Moreira Zinsly %vec8 = alloca <vscale x 4 x float>, align 16 242*6f53886aSRaphael Moreira Zinsly %vec9 = alloca <vscale x 4 x float>, align 16 243*6f53886aSRaphael Moreira Zinsly %vec10 = alloca <vscale x 4 x float>, align 16 244*6f53886aSRaphael Moreira Zinsly %vec11 = alloca <vscale x 4 x float>, align 16 245*6f53886aSRaphael Moreira Zinsly %vec12 = alloca <vscale x 4 x float>, align 16 246*6f53886aSRaphael Moreira Zinsly %vec13 = alloca <vscale x 4 x float>, align 16 247*6f53886aSRaphael Moreira Zinsly %vec14 = alloca <vscale x 4 x float>, align 16 248*6f53886aSRaphael Moreira Zinsly %vec15 = alloca <vscale x 4 x float>, align 16 249*6f53886aSRaphael Moreira Zinsly %vec16 = alloca <vscale x 4 x float>, align 16 250*6f53886aSRaphael Moreira Zinsly %vec17 = alloca <vscale x 4 x float>, align 16 251*6f53886aSRaphael Moreira Zinsly ret void 252*6f53886aSRaphael Moreira Zinsly} 253*6f53886aSRaphael Moreira Zinsly 254*6f53886aSRaphael Moreira Zinsly; A vector and a 16-byte fixed size object. 255*6f53886aSRaphael Moreira Zinslydefine void @f1_vector_16_arr(ptr %out) #0 { 256*6f53886aSRaphael Moreira Zinsly; RV64IV-LABEL: f1_vector_16_arr: 257*6f53886aSRaphael Moreira Zinsly; RV64IV: # %bb.0: # %entry 258*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: addi sp, sp, -16 259*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: .cfi_def_cfa_offset 16 260*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: csrr t1, vlenb 261*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: slli t1, t1, 1 262*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: .cfi_def_cfa t1, -16 263*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: lui t2, 1 264*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: .LBB4_1: # %entry 265*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: # =>This Inner Loop Header: Depth=1 266*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: sub sp, sp, t2 267*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: sd zero, 0(sp) 268*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: sub t1, t1, t2 269*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: bge t1, t2, .LBB4_1 270*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: # %bb.2: # %entry 271*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: .cfi_def_cfa_register sp 272*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: sub sp, sp, t1 273*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb 274*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: csrr a0, vlenb 275*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: slli a0, a0, 1 276*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: add sp, sp, a0 277*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: .cfi_def_cfa sp, 16 278*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: addi sp, sp, 16 279*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: .cfi_def_cfa_offset 0 280*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: ret 281*6f53886aSRaphael Moreira Zinsly; 282*6f53886aSRaphael Moreira Zinsly; RV32IV-LABEL: f1_vector_16_arr: 283*6f53886aSRaphael Moreira Zinsly; RV32IV: # %bb.0: # %entry 284*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: addi sp, sp, -16 285*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: .cfi_def_cfa_offset 16 286*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: csrr t1, vlenb 287*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: slli t1, t1, 1 288*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: .cfi_def_cfa t1, -16 289*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: lui t2, 1 290*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: .LBB4_1: # %entry 291*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: # =>This Inner Loop Header: Depth=1 292*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: sub sp, sp, t2 293*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: sw zero, 0(sp) 294*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: sub t1, t1, t2 295*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: bge t1, t2, .LBB4_1 296*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: # %bb.2: # %entry 297*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: .cfi_def_cfa_register sp 298*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: sub sp, sp, t1 299*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb 300*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: csrr a0, vlenb 301*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: slli a0, a0, 1 302*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: add sp, sp, a0 303*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: .cfi_def_cfa sp, 16 304*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: addi sp, sp, 16 305*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: .cfi_def_cfa_offset 0 306*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: ret 307*6f53886aSRaphael Moreira Zinslyentry: 308*6f53886aSRaphael Moreira Zinsly %vec = alloca <vscale x 4 x float>, align 16 309*6f53886aSRaphael Moreira Zinsly %arr = alloca i8, i64 16, align 1 310*6f53886aSRaphael Moreira Zinsly ret void 311*6f53886aSRaphael Moreira Zinsly} 312*6f53886aSRaphael Moreira Zinsly 313*6f53886aSRaphael Moreira Zinsly; A large vector object and a large slot, both of which need probing. 314*6f53886aSRaphael Moreira Zinslydefine void @f1_vector_4096_arr(ptr %out) #0 { 315*6f53886aSRaphael Moreira Zinsly; RV64IV-LABEL: f1_vector_4096_arr: 316*6f53886aSRaphael Moreira Zinsly; RV64IV: # %bb.0: # %entry 317*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: lui a0, 1 318*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: sub sp, sp, a0 319*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: sd zero, 0(sp) 320*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: .cfi_def_cfa_offset 4096 321*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: lui a0, 1 322*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: sub sp, sp, a0 323*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: sd zero, 0(sp) 324*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: .cfi_def_cfa_offset 8192 325*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: lui a0, 1 326*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: sub sp, sp, a0 327*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: sd zero, 0(sp) 328*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: .cfi_def_cfa_offset 12288 329*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: addi sp, sp, -16 330*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: .cfi_def_cfa_offset 12304 331*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: csrr t1, vlenb 332*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: slli t1, t1, 7 333*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: .cfi_def_cfa t1, -1024 334*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: lui t2, 1 335*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: .LBB5_1: # %entry 336*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: # =>This Inner Loop Header: Depth=1 337*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: sub sp, sp, t2 338*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: sd zero, 0(sp) 339*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: sub t1, t1, t2 340*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: bge t1, t2, .LBB5_1 341*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: # %bb.2: # %entry 342*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: .cfi_def_cfa_register sp 343*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: sub sp, sp, t1 344*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: .cfi_escape 0x0f, 0x10, 0x72, 0x00, 0x11, 0x90, 0xe0, 0x00, 0x22, 0x11, 0x80, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 12304 + 128 * vlenb 345*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: csrr a0, vlenb 346*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: slli a0, a0, 7 347*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: add sp, sp, a0 348*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: .cfi_def_cfa sp, 12304 349*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: lui a0, 3 350*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: addiw a0, a0, 16 351*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: add sp, sp, a0 352*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: .cfi_def_cfa_offset 0 353*6f53886aSRaphael Moreira Zinsly; RV64IV-NEXT: ret 354*6f53886aSRaphael Moreira Zinsly; 355*6f53886aSRaphael Moreira Zinsly; RV32IV-LABEL: f1_vector_4096_arr: 356*6f53886aSRaphael Moreira Zinsly; RV32IV: # %bb.0: # %entry 357*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: lui a0, 1 358*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: sub sp, sp, a0 359*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: sw zero, 0(sp) 360*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: .cfi_def_cfa_offset 4096 361*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: lui a0, 1 362*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: sub sp, sp, a0 363*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: sw zero, 0(sp) 364*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: .cfi_def_cfa_offset 8192 365*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: lui a0, 1 366*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: sub sp, sp, a0 367*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: sw zero, 0(sp) 368*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: .cfi_def_cfa_offset 12288 369*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: addi sp, sp, -16 370*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: .cfi_def_cfa_offset 12304 371*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: csrr t1, vlenb 372*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: slli t1, t1, 7 373*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: .cfi_def_cfa t1, -1024 374*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: lui t2, 1 375*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: .LBB5_1: # %entry 376*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: # =>This Inner Loop Header: Depth=1 377*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: sub sp, sp, t2 378*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: sw zero, 0(sp) 379*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: sub t1, t1, t2 380*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: bge t1, t2, .LBB5_1 381*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: # %bb.2: # %entry 382*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: .cfi_def_cfa_register sp 383*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: sub sp, sp, t1 384*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: .cfi_escape 0x0f, 0x10, 0x72, 0x00, 0x11, 0x90, 0xe0, 0x00, 0x22, 0x11, 0x80, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 12304 + 128 * vlenb 385*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: csrr a0, vlenb 386*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: slli a0, a0, 7 387*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: add sp, sp, a0 388*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: .cfi_def_cfa sp, 12304 389*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: lui a0, 3 390*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: addi a0, a0, 16 391*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: add sp, sp, a0 392*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: .cfi_def_cfa_offset 0 393*6f53886aSRaphael Moreira Zinsly; RV32IV-NEXT: ret 394*6f53886aSRaphael Moreira Zinslyentry: 395*6f53886aSRaphael Moreira Zinsly %vec = alloca <vscale x 256 x float>, align 16 396*6f53886aSRaphael Moreira Zinsly %arr = alloca i8, i64 12288, align 1 397*6f53886aSRaphael Moreira Zinsly ret void 398*6f53886aSRaphael Moreira Zinsly} 399*6f53886aSRaphael Moreira Zinsly 400*6f53886aSRaphael Moreira Zinslyattributes #0 = { uwtable(async) "probe-stack"="inline-asm" "frame-pointer"="none" } 401