1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=riscv32 -mattr=+v,m -O2 | FileCheck -check-prefixes=CHECK,RV32 %s 3; RUN: llc < %s -mtriple=riscv64 -mattr=+v,m -O2 | FileCheck -check-prefixes=CHECK,RV64 %s 4 5; ------------------------------------------------------------------------------ 6; Loads 7; ------------------------------------------------------------------------------ 8 9; FIXME: This should be widened to a vlseg2 of <4 x i32> with VL set to 3 10define {<3 x i32>, <3 x i32>} @load_factor2_v3(ptr %ptr) { 11; RV32-LABEL: load_factor2_v3: 12; RV32: # %bb.0: 13; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma 14; RV32-NEXT: vle32.v v10, (a0) 15; RV32-NEXT: li a0, 32 16; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 17; RV32-NEXT: vnsrl.wi v8, v10, 0 18; RV32-NEXT: vnsrl.wx v9, v10, a0 19; RV32-NEXT: ret 20; 21; RV64-LABEL: load_factor2_v3: 22; RV64: # %bb.0: 23; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma 24; RV64-NEXT: vle32.v v10, (a0) 25; RV64-NEXT: li a0, 32 26; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma 27; RV64-NEXT: vnsrl.wx v9, v10, a0 28; RV64-NEXT: vnsrl.wi v8, v10, 0 29; RV64-NEXT: ret 30 %interleaved.vec = load <6 x i32>, ptr %ptr 31 %v0 = shufflevector <6 x i32> %interleaved.vec, <6 x i32> poison, <3 x i32> <i32 0, i32 2, i32 4> 32 %v1 = shufflevector <6 x i32> %interleaved.vec, <6 x i32> poison, <3 x i32> <i32 1, i32 3, i32 5> 33 %res0 = insertvalue {<3 x i32>, <3 x i32>} undef, <3 x i32> %v0, 0 34 %res1 = insertvalue {<3 x i32>, <3 x i32>} %res0, <3 x i32> %v1, 1 35 ret {<3 x i32>, <3 x i32>} %res1 36} 37 38define {<4 x i32>, <4 x i32>} @load_factor2(ptr %ptr) { 39; CHECK-LABEL: load_factor2: 40; CHECK: # %bb.0: 41; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 42; CHECK-NEXT: vlseg2e32.v v8, (a0) 43; CHECK-NEXT: ret 44 %interleaved.vec = load <8 x i32>, ptr %ptr 45 %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 46 %v1 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 47 %res0 = insertvalue {<4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0 48 %res1 = insertvalue {<4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1 49 ret {<4 x i32>, <4 x i32>} %res1 50} 51 52 53define {<4 x i32>, <4 x i32>, <4 x i32>} @load_factor3(ptr %ptr) { 54; CHECK-LABEL: load_factor3: 55; CHECK: # %bb.0: 56; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 57; CHECK-NEXT: vlseg3e32.v v8, (a0) 58; CHECK-NEXT: ret 59 %interleaved.vec = load <12 x i32>, ptr %ptr 60 %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9> 61 %v1 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10> 62 %v2 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11> 63 %res0 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0 64 %res1 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1 65 %res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2 66 ret {<4 x i32>, <4 x i32>, <4 x i32>} %res2 67} 68 69define {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} @load_factor4(ptr %ptr) { 70; CHECK-LABEL: load_factor4: 71; CHECK: # %bb.0: 72; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 73; CHECK-NEXT: vlseg4e32.v v8, (a0) 74; CHECK-NEXT: ret 75 %interleaved.vec = load <16 x i32>, ptr %ptr 76 %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 77 %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13> 78 %v2 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14> 79 %v3 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15> 80 %res0 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0 81 %res1 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1 82 %res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2 83 %res3 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res2, <4 x i32> %v3, 3 84 ret {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res3 85} 86 87define {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} @load_factor5(ptr %ptr) { 88; CHECK-LABEL: load_factor5: 89; CHECK: # %bb.0: 90; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 91; CHECK-NEXT: vlseg5e32.v v8, (a0) 92; CHECK-NEXT: ret 93 %interleaved.vec = load <20 x i32>, ptr %ptr 94 %v0 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 0, i32 5, i32 10, i32 15> 95 %v1 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 1, i32 6, i32 11, i32 16> 96 %v2 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 2, i32 7, i32 12, i32 17> 97 %v3 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 3, i32 8, i32 13, i32 18> 98 %v4 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 4, i32 9, i32 14, i32 19> 99 %res0 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0 100 %res1 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1 101 %res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2 102 %res3 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res2, <4 x i32> %v3, 3 103 %res4 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res3, <4 x i32> %v4, 4 104 ret {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res4 105} 106 107define {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} @load_factor6(ptr %ptr) { 108; CHECK-LABEL: load_factor6: 109; CHECK: # %bb.0: 110; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 111; CHECK-NEXT: vlseg6e16.v v8, (a0) 112; CHECK-NEXT: ret 113 %interleaved.vec = load <12 x i16>, ptr %ptr 114 %v0 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> <i32 0, i32 6> 115 %v1 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> <i32 1, i32 7> 116 %v2 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> <i32 2, i32 8> 117 %v3 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> <i32 3, i32 9> 118 %v4 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> <i32 4, i32 10> 119 %v5 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> <i32 5, i32 11> 120 %res0 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} undef, <2 x i16> %v0, 0 121 %res1 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res0, <2 x i16> %v1, 1 122 %res2 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res1, <2 x i16> %v2, 2 123 %res3 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res2, <2 x i16> %v3, 3 124 %res4 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res3, <2 x i16> %v4, 4 125 %res5 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res4, <2 x i16> %v5, 5 126 ret {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res5 127} 128 129define {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} @load_factor7(ptr %ptr) { 130; CHECK-LABEL: load_factor7: 131; CHECK: # %bb.0: 132; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 133; CHECK-NEXT: vlseg7e16.v v8, (a0) 134; CHECK-NEXT: ret 135 %interleaved.vec = load <14 x i16>, ptr %ptr 136 %v0 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> <i32 0, i32 7> 137 %v1 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> <i32 1, i32 8> 138 %v2 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> <i32 2, i32 9> 139 %v3 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> <i32 3, i32 10> 140 %v4 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> <i32 4, i32 11> 141 %v5 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> <i32 5, i32 12> 142 %v6 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> <i32 6, i32 13> 143 %res0 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} undef, <2 x i16> %v0, 0 144 %res1 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res0, <2 x i16> %v1, 1 145 %res2 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res1, <2 x i16> %v2, 2 146 %res3 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res2, <2 x i16> %v3, 3 147 %res4 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res3, <2 x i16> %v4, 4 148 %res5 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res4, <2 x i16> %v5, 5 149 %res6 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res5, <2 x i16> %v6, 6 150 ret {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res6 151} 152 153define {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} @load_factor8(ptr %ptr) { 154; CHECK-LABEL: load_factor8: 155; CHECK: # %bb.0: 156; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 157; CHECK-NEXT: vlseg8e16.v v8, (a0) 158; CHECK-NEXT: ret 159 %interleaved.vec = load <16 x i16>, ptr %ptr 160 %v0 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> <i32 0, i32 8> 161 %v1 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> <i32 1, i32 9> 162 %v2 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> <i32 2, i32 10> 163 %v3 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> <i32 3, i32 11> 164 %v4 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> <i32 4, i32 12> 165 %v5 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> <i32 5, i32 13> 166 %v6 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> <i32 6, i32 14> 167 %v7 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> <i32 7, i32 15> 168 %res0 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} undef, <2 x i16> %v0, 0 169 %res1 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res0, <2 x i16> %v1, 1 170 %res2 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res1, <2 x i16> %v2, 2 171 %res3 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res2, <2 x i16> %v3, 3 172 %res4 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res3, <2 x i16> %v4, 4 173 %res5 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res4, <2 x i16> %v5, 5 174 %res6 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res5, <2 x i16> %v6, 6 175 %res7 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res6, <2 x i16> %v7, 7 176 ret {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res7 177} 178 179; LMUL * NF is > 8 here and so shouldn't be lowered to a vlseg 180define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_factor6_too_big(ptr %ptr) { 181; RV32-LABEL: load_factor6_too_big: 182; RV32: # %bb.0: 183; RV32-NEXT: addi sp, sp, -16 184; RV32-NEXT: .cfi_def_cfa_offset 16 185; RV32-NEXT: csrr a2, vlenb 186; RV32-NEXT: li a3, 92 187; RV32-NEXT: mul a2, a2, a3 188; RV32-NEXT: sub sp, sp, a2 189; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xdc, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 92 * vlenb 190; RV32-NEXT: addi a3, a1, 256 191; RV32-NEXT: addi a4, a1, 128 192; RV32-NEXT: li a2, 32 193; RV32-NEXT: lui a5, 12291 194; RV32-NEXT: lui a6, %hi(.LCPI8_0) 195; RV32-NEXT: addi a6, a6, %lo(.LCPI8_0) 196; RV32-NEXT: li a7, 768 197; RV32-NEXT: lui t0, 49164 198; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma 199; RV32-NEXT: vle32.v v16, (a1) 200; RV32-NEXT: csrr a1, vlenb 201; RV32-NEXT: li t1, 76 202; RV32-NEXT: mul a1, a1, t1 203; RV32-NEXT: add a1, sp, a1 204; RV32-NEXT: addi a1, a1, 16 205; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 206; RV32-NEXT: vle32.v v8, (a4) 207; RV32-NEXT: csrr a1, vlenb 208; RV32-NEXT: li a4, 68 209; RV32-NEXT: mul a1, a1, a4 210; RV32-NEXT: add a1, sp, a1 211; RV32-NEXT: addi a1, a1, 16 212; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 213; RV32-NEXT: addi a5, a5, 3 214; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 215; RV32-NEXT: vle16.v v6, (a6) 216; RV32-NEXT: vmv.s.x v0, a5 217; RV32-NEXT: lui a1, %hi(.LCPI8_1) 218; RV32-NEXT: addi a1, a1, %lo(.LCPI8_1) 219; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma 220; RV32-NEXT: vmerge.vvm v16, v8, v16, v0 221; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 222; RV32-NEXT: vrgatherei16.vv v24, v16, v6 223; RV32-NEXT: csrr a4, vlenb 224; RV32-NEXT: li a5, 52 225; RV32-NEXT: mul a4, a4, a5 226; RV32-NEXT: add a4, sp, a4 227; RV32-NEXT: addi a4, a4, 16 228; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill 229; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma 230; RV32-NEXT: vle32.v v16, (a3) 231; RV32-NEXT: addi t0, t0, 12 232; RV32-NEXT: vmv.s.x v0, a7 233; RV32-NEXT: vmv.s.x v7, t0 234; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma 235; RV32-NEXT: vle16.v v4, (a1) 236; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma 237; RV32-NEXT: vslidedown.vi v24, v16, 16 238; RV32-NEXT: csrr a1, vlenb 239; RV32-NEXT: li a3, 60 240; RV32-NEXT: mul a1, a1, a3 241; RV32-NEXT: add a1, sp, a1 242; RV32-NEXT: addi a1, a1, 16 243; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill 244; RV32-NEXT: csrr a1, vlenb 245; RV32-NEXT: li a3, 84 246; RV32-NEXT: mul a1, a1, a3 247; RV32-NEXT: add a1, sp, a1 248; RV32-NEXT: addi a1, a1, 16 249; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 250; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 251; RV32-NEXT: vmerge.vvm v20, v24, v16, v0 252; RV32-NEXT: csrr a1, vlenb 253; RV32-NEXT: li a3, 40 254; RV32-NEXT: mul a1, a1, a3 255; RV32-NEXT: add a1, sp, a1 256; RV32-NEXT: addi a1, a1, 16 257; RV32-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill 258; RV32-NEXT: vmv1r.v v0, v7 259; RV32-NEXT: csrr a1, vlenb 260; RV32-NEXT: li a3, 76 261; RV32-NEXT: mul a1, a1, a3 262; RV32-NEXT: add a1, sp, a1 263; RV32-NEXT: addi a1, a1, 16 264; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 265; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma 266; RV32-NEXT: vmerge.vvm v24, v8, v16, v0 267; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 268; RV32-NEXT: vrgatherei16.vv v8, v24, v4 269; RV32-NEXT: csrr a1, vlenb 270; RV32-NEXT: li a3, 44 271; RV32-NEXT: mul a1, a1, a3 272; RV32-NEXT: add a1, sp, a1 273; RV32-NEXT: addi a1, a1, 16 274; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 275; RV32-NEXT: li a1, 3 276; RV32-NEXT: lui a3, 196656 277; RV32-NEXT: lui a4, %hi(.LCPI8_2) 278; RV32-NEXT: addi a4, a4, %lo(.LCPI8_2) 279; RV32-NEXT: slli a1, a1, 10 280; RV32-NEXT: addi a3, a3, 48 281; RV32-NEXT: vmv.s.x v0, a1 282; RV32-NEXT: vle16.v v14, (a4) 283; RV32-NEXT: vmv.s.x v12, a3 284; RV32-NEXT: csrr a1, vlenb 285; RV32-NEXT: li a3, 84 286; RV32-NEXT: mul a1, a1, a3 287; RV32-NEXT: add a1, sp, a1 288; RV32-NEXT: addi a1, a1, 16 289; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 290; RV32-NEXT: vmv4r.v v8, v24 291; RV32-NEXT: csrr a1, vlenb 292; RV32-NEXT: li a3, 60 293; RV32-NEXT: mul a1, a1, a3 294; RV32-NEXT: add a1, sp, a1 295; RV32-NEXT: addi a1, a1, 16 296; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 297; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma 298; RV32-NEXT: vmerge.vvm v8, v24, v8, v0 299; RV32-NEXT: csrr a1, vlenb 300; RV32-NEXT: li a3, 24 301; RV32-NEXT: mul a1, a1, a3 302; RV32-NEXT: add a1, sp, a1 303; RV32-NEXT: addi a1, a1, 16 304; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill 305; RV32-NEXT: vmv1r.v v0, v12 306; RV32-NEXT: csrr a1, vlenb 307; RV32-NEXT: li a3, 68 308; RV32-NEXT: mul a1, a1, a3 309; RV32-NEXT: add a1, sp, a1 310; RV32-NEXT: addi a1, a1, 16 311; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 312; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma 313; RV32-NEXT: vmerge.vvm v24, v24, v16, v0 314; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 315; RV32-NEXT: vrgatherei16.vv v16, v24, v14 316; RV32-NEXT: csrr a1, vlenb 317; RV32-NEXT: slli a1, a1, 5 318; RV32-NEXT: add a1, sp, a1 319; RV32-NEXT: addi a1, a1, 16 320; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 321; RV32-NEXT: lui a1, 3 322; RV32-NEXT: lui a3, 786624 323; RV32-NEXT: lui a4, 12 324; RV32-NEXT: lui a5, 768 325; RV32-NEXT: li a6, 48 326; RV32-NEXT: lui a7, 3073 327; RV32-NEXT: li t0, 192 328; RV32-NEXT: addi a1, a1, 3 329; RV32-NEXT: addi a3, a3, 192 330; RV32-NEXT: addi a4, a4, 12 331; RV32-NEXT: addi a5, a5, 768 332; RV32-NEXT: addi a7, a7, -1024 333; RV32-NEXT: vmv.s.x v1, a6 334; RV32-NEXT: vmv.s.x v12, t0 335; RV32-NEXT: vmv.s.x v0, a1 336; RV32-NEXT: vmv.s.x v3, a3 337; RV32-NEXT: vmv.s.x v2, a4 338; RV32-NEXT: vmv.s.x v13, a5 339; RV32-NEXT: vmv.s.x v14, a7 340; RV32-NEXT: csrr a1, vlenb 341; RV32-NEXT: li a3, 60 342; RV32-NEXT: mul a1, a1, a3 343; RV32-NEXT: add a1, sp, a1 344; RV32-NEXT: addi a1, a1, 16 345; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 346; RV32-NEXT: vmv4r.v v8, v16 347; RV32-NEXT: csrr a1, vlenb 348; RV32-NEXT: li a3, 84 349; RV32-NEXT: mul a1, a1, a3 350; RV32-NEXT: add a1, sp, a1 351; RV32-NEXT: addi a1, a1, 16 352; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 353; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma 354; RV32-NEXT: vmerge.vvm v20, v8, v16, v0 355; RV32-NEXT: addi a1, sp, 16 356; RV32-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill 357; RV32-NEXT: vmv1r.v v0, v3 358; RV32-NEXT: csrr a1, vlenb 359; RV32-NEXT: li a3, 68 360; RV32-NEXT: mul a1, a1, a3 361; RV32-NEXT: add a1, sp, a1 362; RV32-NEXT: addi a1, a1, 16 363; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 364; RV32-NEXT: csrr a1, vlenb 365; RV32-NEXT: li a3, 76 366; RV32-NEXT: mul a1, a1, a3 367; RV32-NEXT: add a1, sp, a1 368; RV32-NEXT: addi a1, a1, 16 369; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 370; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma 371; RV32-NEXT: vmerge.vvm v24, v16, v24, v0 372; RV32-NEXT: csrr a1, vlenb 373; RV32-NEXT: slli a1, a1, 4 374; RV32-NEXT: add a1, sp, a1 375; RV32-NEXT: addi a1, a1, 16 376; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill 377; RV32-NEXT: vmv1r.v v0, v2 378; RV32-NEXT: csrr a1, vlenb 379; RV32-NEXT: li a3, 84 380; RV32-NEXT: mul a1, a1, a3 381; RV32-NEXT: add a1, sp, a1 382; RV32-NEXT: addi a1, a1, 16 383; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 384; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 385; RV32-NEXT: vmerge.vvm v24, v8, v24, v0 386; RV32-NEXT: csrr a1, vlenb 387; RV32-NEXT: li a3, 12 388; RV32-NEXT: mul a1, a1, a3 389; RV32-NEXT: add a1, sp, a1 390; RV32-NEXT: addi a1, a1, 16 391; RV32-NEXT: vs4r.v v24, (a1) # Unknown-size Folded Spill 392; RV32-NEXT: vmv1r.v v0, v13 393; RV32-NEXT: csrr a1, vlenb 394; RV32-NEXT: li a3, 76 395; RV32-NEXT: mul a1, a1, a3 396; RV32-NEXT: add a1, sp, a1 397; RV32-NEXT: addi a1, a1, 16 398; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 399; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma 400; RV32-NEXT: vmerge.vvm v24, v16, v24, v0 401; RV32-NEXT: csrr a1, vlenb 402; RV32-NEXT: slli a1, a1, 2 403; RV32-NEXT: add a1, sp, a1 404; RV32-NEXT: addi a1, a1, 16 405; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill 406; RV32-NEXT: vmv1r.v v0, v1 407; RV32-NEXT: csrr a1, vlenb 408; RV32-NEXT: li a3, 84 409; RV32-NEXT: mul a1, a1, a3 410; RV32-NEXT: add a1, sp, a1 411; RV32-NEXT: addi a1, a1, 16 412; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 413; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 414; RV32-NEXT: vmerge.vvm v4, v8, v24, v0 415; RV32-NEXT: csrr a1, vlenb 416; RV32-NEXT: li a3, 28 417; RV32-NEXT: mul a1, a1, a3 418; RV32-NEXT: add a1, sp, a1 419; RV32-NEXT: addi a1, a1, 16 420; RV32-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill 421; RV32-NEXT: vmv1r.v v0, v14 422; RV32-NEXT: csrr a1, vlenb 423; RV32-NEXT: li a3, 76 424; RV32-NEXT: mul a1, a1, a3 425; RV32-NEXT: add a1, sp, a1 426; RV32-NEXT: addi a1, a1, 16 427; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 428; RV32-NEXT: csrr a1, vlenb 429; RV32-NEXT: li a3, 68 430; RV32-NEXT: mul a1, a1, a3 431; RV32-NEXT: add a1, sp, a1 432; RV32-NEXT: addi a1, a1, 16 433; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 434; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma 435; RV32-NEXT: vmerge.vvm v16, v24, v16, v0 436; RV32-NEXT: csrr a1, vlenb 437; RV32-NEXT: li a2, 76 438; RV32-NEXT: mul a1, a1, a2 439; RV32-NEXT: add a1, sp, a1 440; RV32-NEXT: addi a1, a1, 16 441; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 442; RV32-NEXT: vmv1r.v v0, v12 443; RV32-NEXT: csrr a1, vlenb 444; RV32-NEXT: li a2, 84 445; RV32-NEXT: mul a1, a1, a2 446; RV32-NEXT: add a1, sp, a1 447; RV32-NEXT: addi a1, a1, 16 448; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 449; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 450; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 451; RV32-NEXT: csrr a1, vlenb 452; RV32-NEXT: li a2, 68 453; RV32-NEXT: mul a1, a1, a2 454; RV32-NEXT: add a1, sp, a1 455; RV32-NEXT: addi a1, a1, 16 456; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill 457; RV32-NEXT: lui a1, 32 458; RV32-NEXT: addi a1, a1, 4 459; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 460; RV32-NEXT: vmv.v.x v16, a1 461; RV32-NEXT: csrr a1, vlenb 462; RV32-NEXT: li a2, 40 463; RV32-NEXT: mul a1, a1, a2 464; RV32-NEXT: add a1, sp, a1 465; RV32-NEXT: addi a1, a1, 16 466; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload 467; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma 468; RV32-NEXT: vrgatherei16.vv v20, v8, v16 469; RV32-NEXT: csrr a1, vlenb 470; RV32-NEXT: li a2, 52 471; RV32-NEXT: mul a1, a1, a2 472; RV32-NEXT: add a1, sp, a1 473; RV32-NEXT: addi a1, a1, 16 474; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload 475; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma 476; RV32-NEXT: vmv.v.v v20, v8 477; RV32-NEXT: csrr a1, vlenb 478; RV32-NEXT: li a2, 84 479; RV32-NEXT: mul a1, a1, a2 480; RV32-NEXT: add a1, sp, a1 481; RV32-NEXT: addi a1, a1, 16 482; RV32-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill 483; RV32-NEXT: lui a1, 48 484; RV32-NEXT: lui a2, %hi(.LCPI8_3) 485; RV32-NEXT: addi a2, a2, %lo(.LCPI8_3) 486; RV32-NEXT: addi a1, a1, 5 487; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 488; RV32-NEXT: vle16.v v28, (a2) 489; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 490; RV32-NEXT: vmv.v.x v20, a1 491; RV32-NEXT: csrr a1, vlenb 492; RV32-NEXT: li a2, 24 493; RV32-NEXT: mul a1, a1, a2 494; RV32-NEXT: add a1, sp, a1 495; RV32-NEXT: addi a1, a1, 16 496; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload 497; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma 498; RV32-NEXT: vrgatherei16.vv v8, v12, v20 499; RV32-NEXT: csrr a1, vlenb 500; RV32-NEXT: li a2, 44 501; RV32-NEXT: mul a1, a1, a2 502; RV32-NEXT: add a1, sp, a1 503; RV32-NEXT: addi a1, a1, 16 504; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 505; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma 506; RV32-NEXT: vmv.v.v v8, v16 507; RV32-NEXT: csrr a1, vlenb 508; RV32-NEXT: li a2, 52 509; RV32-NEXT: mul a1, a1, a2 510; RV32-NEXT: add a1, sp, a1 511; RV32-NEXT: addi a1, a1, 16 512; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill 513; RV32-NEXT: addi a1, sp, 16 514; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload 515; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma 516; RV32-NEXT: vrgatherei16.vv v24, v12, v28 517; RV32-NEXT: csrr a1, vlenb 518; RV32-NEXT: slli a1, a1, 5 519; RV32-NEXT: add a1, sp, a1 520; RV32-NEXT: addi a1, a1, 16 521; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 522; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma 523; RV32-NEXT: vmv.v.v v24, v16 524; RV32-NEXT: lui a1, %hi(.LCPI8_4) 525; RV32-NEXT: addi a1, a1, %lo(.LCPI8_4) 526; RV32-NEXT: lui a2, %hi(.LCPI8_5) 527; RV32-NEXT: addi a2, a2, %lo(.LCPI8_5) 528; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma 529; RV32-NEXT: vle16.v v12, (a1) 530; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 531; RV32-NEXT: vle16.v v28, (a2) 532; RV32-NEXT: lui a1, %hi(.LCPI8_6) 533; RV32-NEXT: addi a1, a1, %lo(.LCPI8_6) 534; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 535; RV32-NEXT: vle16.v v30, (a1) 536; RV32-NEXT: csrr a1, vlenb 537; RV32-NEXT: slli a1, a1, 4 538; RV32-NEXT: add a1, sp, a1 539; RV32-NEXT: addi a1, a1, 16 540; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload 541; RV32-NEXT: vrgatherei16.vv v16, v0, v12 542; RV32-NEXT: csrr a1, vlenb 543; RV32-NEXT: li a2, 12 544; RV32-NEXT: mul a1, a1, a2 545; RV32-NEXT: add a1, sp, a1 546; RV32-NEXT: addi a1, a1, 16 547; RV32-NEXT: vl4r.v v20, (a1) # Unknown-size Folded Reload 548; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma 549; RV32-NEXT: vrgatherei16.vv v12, v20, v28 550; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma 551; RV32-NEXT: vmv.v.v v12, v16 552; RV32-NEXT: csrr a1, vlenb 553; RV32-NEXT: slli a1, a1, 2 554; RV32-NEXT: add a1, sp, a1 555; RV32-NEXT: addi a1, a1, 16 556; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload 557; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 558; RV32-NEXT: vrgatherei16.vv v16, v0, v30 559; RV32-NEXT: lui a1, %hi(.LCPI8_7) 560; RV32-NEXT: addi a1, a1, %lo(.LCPI8_7) 561; RV32-NEXT: lui a2, %hi(.LCPI8_8) 562; RV32-NEXT: addi a2, a2, %lo(.LCPI8_8) 563; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma 564; RV32-NEXT: vle16.v v20, (a1) 565; RV32-NEXT: lui a1, %hi(.LCPI8_9) 566; RV32-NEXT: addi a1, a1, %lo(.LCPI8_9) 567; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma 568; RV32-NEXT: vle16.v v8, (a2) 569; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma 570; RV32-NEXT: vle16.v v10, (a1) 571; RV32-NEXT: csrr a1, vlenb 572; RV32-NEXT: li a2, 28 573; RV32-NEXT: mul a1, a1, a2 574; RV32-NEXT: add a1, sp, a1 575; RV32-NEXT: addi a1, a1, 16 576; RV32-NEXT: vl4r.v v0, (a1) # Unknown-size Folded Reload 577; RV32-NEXT: vrgatherei16.vv v28, v0, v20 578; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma 579; RV32-NEXT: vmv.v.v v28, v16 580; RV32-NEXT: csrr a1, vlenb 581; RV32-NEXT: li a2, 76 582; RV32-NEXT: mul a1, a1, a2 583; RV32-NEXT: add a1, sp, a1 584; RV32-NEXT: addi a1, a1, 16 585; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload 586; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 587; RV32-NEXT: vrgatherei16.vv v16, v0, v8 588; RV32-NEXT: csrr a1, vlenb 589; RV32-NEXT: li a2, 60 590; RV32-NEXT: mul a1, a1, a2 591; RV32-NEXT: add a1, sp, a1 592; RV32-NEXT: addi a1, a1, 16 593; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 594; RV32-NEXT: csrr a1, vlenb 595; RV32-NEXT: li a2, 68 596; RV32-NEXT: mul a1, a1, a2 597; RV32-NEXT: add a1, sp, a1 598; RV32-NEXT: addi a1, a1, 16 599; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload 600; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma 601; RV32-NEXT: vrgatherei16.vv v16, v4, v10 602; RV32-NEXT: csrr a1, vlenb 603; RV32-NEXT: li a2, 60 604; RV32-NEXT: mul a1, a1, a2 605; RV32-NEXT: add a1, sp, a1 606; RV32-NEXT: addi a1, a1, 16 607; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload 608; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma 609; RV32-NEXT: vmv.v.v v16, v0 610; RV32-NEXT: addi a1, a0, 320 611; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 612; RV32-NEXT: vse32.v v16, (a1) 613; RV32-NEXT: addi a1, a0, 256 614; RV32-NEXT: vse32.v v28, (a1) 615; RV32-NEXT: addi a1, a0, 192 616; RV32-NEXT: vse32.v v12, (a1) 617; RV32-NEXT: addi a1, a0, 128 618; RV32-NEXT: vse32.v v24, (a1) 619; RV32-NEXT: addi a1, a0, 64 620; RV32-NEXT: csrr a2, vlenb 621; RV32-NEXT: li a3, 52 622; RV32-NEXT: mul a2, a2, a3 623; RV32-NEXT: add a2, sp, a2 624; RV32-NEXT: addi a2, a2, 16 625; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload 626; RV32-NEXT: vse32.v v8, (a1) 627; RV32-NEXT: csrr a1, vlenb 628; RV32-NEXT: li a2, 84 629; RV32-NEXT: mul a1, a1, a2 630; RV32-NEXT: add a1, sp, a1 631; RV32-NEXT: addi a1, a1, 16 632; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload 633; RV32-NEXT: vse32.v v8, (a0) 634; RV32-NEXT: csrr a0, vlenb 635; RV32-NEXT: li a1, 92 636; RV32-NEXT: mul a0, a0, a1 637; RV32-NEXT: add sp, sp, a0 638; RV32-NEXT: .cfi_def_cfa sp, 16 639; RV32-NEXT: addi sp, sp, 16 640; RV32-NEXT: .cfi_def_cfa_offset 0 641; RV32-NEXT: ret 642; 643; RV64-LABEL: load_factor6_too_big: 644; RV64: # %bb.0: 645; RV64-NEXT: addi sp, sp, -16 646; RV64-NEXT: .cfi_def_cfa_offset 16 647; RV64-NEXT: csrr a2, vlenb 648; RV64-NEXT: li a3, 88 649; RV64-NEXT: mul a2, a2, a3 650; RV64-NEXT: sub sp, sp, a2 651; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xd8, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 88 * vlenb 652; RV64-NEXT: addi a3, a1, 128 653; RV64-NEXT: addi a6, a1, 256 654; RV64-NEXT: li a4, 128 655; RV64-NEXT: lui a2, 1 656; RV64-NEXT: lui a5, %hi(.LCPI8_0) 657; RV64-NEXT: addi a5, a5, %lo(.LCPI8_0) 658; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma 659; RV64-NEXT: vmv.v.i v16, 6 660; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 661; RV64-NEXT: vle64.v v8, (a6) 662; RV64-NEXT: lui a6, 16 663; RV64-NEXT: addi a6, a6, 7 664; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma 665; RV64-NEXT: vmv.v.x v17, a6 666; RV64-NEXT: addi a6, a2, 65 667; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 668; RV64-NEXT: vrgather.vi v24, v8, 4 669; RV64-NEXT: vrgather.vi v20, v8, 5 670; RV64-NEXT: csrr a7, vlenb 671; RV64-NEXT: li t0, 68 672; RV64-NEXT: mul a7, a7, t0 673; RV64-NEXT: add a7, sp, a7 674; RV64-NEXT: addi a7, a7, 16 675; RV64-NEXT: vs4r.v v20, (a7) # Unknown-size Folded Spill 676; RV64-NEXT: vrgatherei16.vv v20, v8, v16 677; RV64-NEXT: csrr a7, vlenb 678; RV64-NEXT: li t0, 84 679; RV64-NEXT: mul a7, a7, t0 680; RV64-NEXT: add a7, sp, a7 681; RV64-NEXT: addi a7, a7, 16 682; RV64-NEXT: vs4r.v v20, (a7) # Unknown-size Folded Spill 683; RV64-NEXT: vrgatherei16.vv v20, v8, v17 684; RV64-NEXT: csrr a7, vlenb 685; RV64-NEXT: li t0, 72 686; RV64-NEXT: mul a7, a7, t0 687; RV64-NEXT: add a7, sp, a7 688; RV64-NEXT: addi a7, a7, 16 689; RV64-NEXT: vs4r.v v20, (a7) # Unknown-size Folded Spill 690; RV64-NEXT: vrgather.vi v16, v8, 2 691; RV64-NEXT: csrr a7, vlenb 692; RV64-NEXT: slli a7, a7, 6 693; RV64-NEXT: add a7, sp, a7 694; RV64-NEXT: addi a7, a7, 16 695; RV64-NEXT: vs4r.v v16, (a7) # Unknown-size Folded Spill 696; RV64-NEXT: vrgather.vi v16, v8, 3 697; RV64-NEXT: csrr a7, vlenb 698; RV64-NEXT: li t0, 56 699; RV64-NEXT: mul a7, a7, t0 700; RV64-NEXT: add a7, sp, a7 701; RV64-NEXT: addi a7, a7, 16 702; RV64-NEXT: vs4r.v v16, (a7) # Unknown-size Folded Spill 703; RV64-NEXT: vsetivli zero, 8, e64, m8, ta, ma 704; RV64-NEXT: vslidedown.vi v16, v8, 8 705; RV64-NEXT: csrr a7, vlenb 706; RV64-NEXT: li t0, 48 707; RV64-NEXT: mul a7, a7, t0 708; RV64-NEXT: add a7, sp, a7 709; RV64-NEXT: addi a7, a7, 16 710; RV64-NEXT: vs8r.v v16, (a7) # Unknown-size Folded Spill 711; RV64-NEXT: vmv.s.x v21, a4 712; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 713; RV64-NEXT: vle64.v v8, (a1) 714; RV64-NEXT: vle64.v v0, (a3) 715; RV64-NEXT: csrr a1, vlenb 716; RV64-NEXT: li a3, 40 717; RV64-NEXT: mul a1, a1, a3 718; RV64-NEXT: add a1, sp, a1 719; RV64-NEXT: addi a1, a1, 16 720; RV64-NEXT: vs8r.v v0, (a1) # Unknown-size Folded Spill 721; RV64-NEXT: vle16.v v2, (a5) 722; RV64-NEXT: vmv.s.x v20, a6 723; RV64-NEXT: vmv1r.v v0, v21 724; RV64-NEXT: vmv1r.v v7, v21 725; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu 726; RV64-NEXT: vrgather.vi v24, v16, 2, v0.t 727; RV64-NEXT: csrr a1, vlenb 728; RV64-NEXT: li a3, 60 729; RV64-NEXT: mul a1, a1, a3 730; RV64-NEXT: add a1, sp, a1 731; RV64-NEXT: addi a1, a1, 16 732; RV64-NEXT: vs4r.v v24, (a1) # Unknown-size Folded Spill 733; RV64-NEXT: vmv1r.v v0, v20 734; RV64-NEXT: csrr a1, vlenb 735; RV64-NEXT: li a3, 40 736; RV64-NEXT: mul a1, a1, a3 737; RV64-NEXT: add a1, sp, a1 738; RV64-NEXT: addi a1, a1, 16 739; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 740; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 741; RV64-NEXT: vmerge.vvm v24, v16, v8, v0 742; RV64-NEXT: vmv8r.v v16, v8 743; RV64-NEXT: csrr a1, vlenb 744; RV64-NEXT: li a3, 76 745; RV64-NEXT: mul a1, a1, a3 746; RV64-NEXT: add a1, sp, a1 747; RV64-NEXT: addi a1, a1, 16 748; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 749; RV64-NEXT: vrgatherei16.vv v8, v24, v2 750; RV64-NEXT: csrr a1, vlenb 751; RV64-NEXT: slli a1, a1, 5 752; RV64-NEXT: add a1, sp, a1 753; RV64-NEXT: addi a1, a1, 16 754; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 755; RV64-NEXT: lui a1, 2 756; RV64-NEXT: lui a3, %hi(.LCPI8_1) 757; RV64-NEXT: addi a3, a3, %lo(.LCPI8_1) 758; RV64-NEXT: addi a1, a1, 130 759; RV64-NEXT: vle16.v v8, (a3) 760; RV64-NEXT: csrr a3, vlenb 761; RV64-NEXT: slli a3, a3, 4 762; RV64-NEXT: add a3, sp, a3 763; RV64-NEXT: addi a3, a3, 16 764; RV64-NEXT: vs2r.v v8, (a3) # Unknown-size Folded Spill 765; RV64-NEXT: vmv.s.x v2, a1 766; RV64-NEXT: vmv1r.v v0, v7 767; RV64-NEXT: addi a1, sp, 16 768; RV64-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill 769; RV64-NEXT: csrr a1, vlenb 770; RV64-NEXT: li a3, 68 771; RV64-NEXT: mul a1, a1, a3 772; RV64-NEXT: add a1, sp, a1 773; RV64-NEXT: addi a1, a1, 16 774; RV64-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload 775; RV64-NEXT: csrr a1, vlenb 776; RV64-NEXT: li a3, 48 777; RV64-NEXT: mul a1, a1, a3 778; RV64-NEXT: add a1, sp, a1 779; RV64-NEXT: addi a1, a1, 16 780; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload 781; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu 782; RV64-NEXT: vrgather.vi v24, v8, 3, v0.t 783; RV64-NEXT: csrr a1, vlenb 784; RV64-NEXT: li a3, 68 785; RV64-NEXT: mul a1, a1, a3 786; RV64-NEXT: add a1, sp, a1 787; RV64-NEXT: addi a1, a1, 16 788; RV64-NEXT: vs4r.v v24, (a1) # Unknown-size Folded Spill 789; RV64-NEXT: vmv1r.v v0, v2 790; RV64-NEXT: csrr a1, vlenb 791; RV64-NEXT: li a3, 40 792; RV64-NEXT: mul a1, a1, a3 793; RV64-NEXT: add a1, sp, a1 794; RV64-NEXT: addi a1, a1, 16 795; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload 796; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 797; RV64-NEXT: vmerge.vvm v24, v8, v16, v0 798; RV64-NEXT: csrr a1, vlenb 799; RV64-NEXT: slli a1, a1, 4 800; RV64-NEXT: add a1, sp, a1 801; RV64-NEXT: addi a1, a1, 16 802; RV64-NEXT: vl2r.v v16, (a1) # Unknown-size Folded Reload 803; RV64-NEXT: vrgatherei16.vv v0, v24, v16 804; RV64-NEXT: csrr a1, vlenb 805; RV64-NEXT: li a3, 24 806; RV64-NEXT: mul a1, a1, a3 807; RV64-NEXT: add a1, sp, a1 808; RV64-NEXT: addi a1, a1, 16 809; RV64-NEXT: vs8r.v v0, (a1) # Unknown-size Folded Spill 810; RV64-NEXT: lui a1, 4 811; RV64-NEXT: lui a3, 8 812; RV64-NEXT: addi a1, a1, 260 813; RV64-NEXT: addi a3, a3, 520 814; RV64-NEXT: vmv.s.x v0, a1 815; RV64-NEXT: vmv.s.x v2, a3 816; RV64-NEXT: csrr a1, vlenb 817; RV64-NEXT: li a3, 76 818; RV64-NEXT: mul a1, a1, a3 819; RV64-NEXT: add a1, sp, a1 820; RV64-NEXT: addi a1, a1, 16 821; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 822; RV64-NEXT: vmerge.vvm v24, v8, v16, v0 823; RV64-NEXT: csrr a1, vlenb 824; RV64-NEXT: slli a1, a1, 3 825; RV64-NEXT: add a1, sp, a1 826; RV64-NEXT: addi a1, a1, 16 827; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill 828; RV64-NEXT: addi a1, sp, 16 829; RV64-NEXT: vl1r.v v7, (a1) # Unknown-size Folded Reload 830; RV64-NEXT: vmv1r.v v0, v7 831; RV64-NEXT: csrr a1, vlenb 832; RV64-NEXT: li a3, 84 833; RV64-NEXT: mul a1, a1, a3 834; RV64-NEXT: add a1, sp, a1 835; RV64-NEXT: addi a1, a1, 16 836; RV64-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload 837; RV64-NEXT: csrr a1, vlenb 838; RV64-NEXT: li a3, 48 839; RV64-NEXT: mul a1, a1, a3 840; RV64-NEXT: add a1, sp, a1 841; RV64-NEXT: addi a1, a1, 16 842; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 843; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu 844; RV64-NEXT: vrgather.vi v24, v16, 4, v0.t 845; RV64-NEXT: csrr a1, vlenb 846; RV64-NEXT: li a3, 84 847; RV64-NEXT: mul a1, a1, a3 848; RV64-NEXT: add a1, sp, a1 849; RV64-NEXT: addi a1, a1, 16 850; RV64-NEXT: vs4r.v v24, (a1) # Unknown-size Folded Spill 851; RV64-NEXT: vmv1r.v v0, v2 852; RV64-NEXT: csrr a1, vlenb 853; RV64-NEXT: li a3, 76 854; RV64-NEXT: mul a1, a1, a3 855; RV64-NEXT: add a1, sp, a1 856; RV64-NEXT: addi a1, a1, 16 857; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 858; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 859; RV64-NEXT: vmerge.vvm v24, v8, v16, v0 860; RV64-NEXT: csrr a1, vlenb 861; RV64-NEXT: slli a1, a1, 4 862; RV64-NEXT: add a1, sp, a1 863; RV64-NEXT: addi a1, a1, 16 864; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill 865; RV64-NEXT: vmv8r.v v16, v8 866; RV64-NEXT: vmv1r.v v0, v7 867; RV64-NEXT: csrr a1, vlenb 868; RV64-NEXT: li a3, 72 869; RV64-NEXT: mul a1, a1, a3 870; RV64-NEXT: add a1, sp, a1 871; RV64-NEXT: addi a1, a1, 16 872; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload 873; RV64-NEXT: csrr a1, vlenb 874; RV64-NEXT: li a3, 48 875; RV64-NEXT: mul a1, a1, a3 876; RV64-NEXT: add a1, sp, a1 877; RV64-NEXT: addi a1, a1, 16 878; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 879; RV64-NEXT: vmv4r.v v8, v24 880; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu 881; RV64-NEXT: vrgather.vi v12, v24, 5, v0.t 882; RV64-NEXT: csrr a1, vlenb 883; RV64-NEXT: li a3, 72 884; RV64-NEXT: mul a1, a1, a3 885; RV64-NEXT: add a1, sp, a1 886; RV64-NEXT: addi a1, a1, 16 887; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill 888; RV64-NEXT: lui a1, 96 889; RV64-NEXT: li a3, 192 890; RV64-NEXT: vmv.s.x v3, a3 891; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma 892; RV64-NEXT: vmv.v.x v12, a1 893; RV64-NEXT: vmv1r.v v0, v3 894; RV64-NEXT: csrr a1, vlenb 895; RV64-NEXT: slli a1, a1, 6 896; RV64-NEXT: add a1, sp, a1 897; RV64-NEXT: addi a1, a1, 16 898; RV64-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload 899; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu 900; RV64-NEXT: vrgatherei16.vv v24, v8, v12, v0.t 901; RV64-NEXT: csrr a1, vlenb 902; RV64-NEXT: slli a1, a1, 6 903; RV64-NEXT: add a1, sp, a1 904; RV64-NEXT: addi a1, a1, 16 905; RV64-NEXT: vs4r.v v24, (a1) # Unknown-size Folded Spill 906; RV64-NEXT: lui a1, %hi(.LCPI8_2) 907; RV64-NEXT: addi a1, a1, %lo(.LCPI8_2) 908; RV64-NEXT: li a3, 1040 909; RV64-NEXT: lui a4, 112 910; RV64-NEXT: addi a4, a4, 1 911; RV64-NEXT: vmv.s.x v0, a3 912; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma 913; RV64-NEXT: vmv.v.x v12, a4 914; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 915; RV64-NEXT: vle16.v v6, (a1) 916; RV64-NEXT: vmv8r.v v24, v16 917; RV64-NEXT: csrr a1, vlenb 918; RV64-NEXT: li a3, 76 919; RV64-NEXT: mul a1, a1, a3 920; RV64-NEXT: add a1, sp, a1 921; RV64-NEXT: addi a1, a1, 16 922; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 923; RV64-NEXT: vmerge.vvm v16, v24, v16, v0 924; RV64-NEXT: addi a1, sp, 16 925; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 926; RV64-NEXT: vmv1r.v v0, v3 927; RV64-NEXT: csrr a1, vlenb 928; RV64-NEXT: li a3, 56 929; RV64-NEXT: mul a1, a1, a3 930; RV64-NEXT: add a1, sp, a1 931; RV64-NEXT: addi a1, a1, 16 932; RV64-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload 933; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu 934; RV64-NEXT: vrgatherei16.vv v16, v8, v12, v0.t 935; RV64-NEXT: csrr a1, vlenb 936; RV64-NEXT: li a3, 56 937; RV64-NEXT: mul a1, a1, a3 938; RV64-NEXT: add a1, sp, a1 939; RV64-NEXT: addi a1, a1, 16 940; RV64-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill 941; RV64-NEXT: addi a1, a2, -2016 942; RV64-NEXT: vmv.s.x v0, a1 943; RV64-NEXT: csrr a1, vlenb 944; RV64-NEXT: slli a1, a1, 3 945; RV64-NEXT: add a1, sp, a1 946; RV64-NEXT: addi a1, a1, 16 947; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload 948; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 949; RV64-NEXT: vrgatherei16.vv v16, v8, v6 950; RV64-NEXT: csrr a1, vlenb 951; RV64-NEXT: li a2, 76 952; RV64-NEXT: mul a1, a1, a2 953; RV64-NEXT: add a1, sp, a1 954; RV64-NEXT: addi a1, a1, 16 955; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload 956; RV64-NEXT: vmerge.vvm v8, v24, v8, v0 957; RV64-NEXT: csrr a1, vlenb 958; RV64-NEXT: li a2, 76 959; RV64-NEXT: mul a1, a1, a2 960; RV64-NEXT: add a1, sp, a1 961; RV64-NEXT: addi a1, a1, 16 962; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 963; RV64-NEXT: lui a1, %hi(.LCPI8_3) 964; RV64-NEXT: addi a1, a1, %lo(.LCPI8_3) 965; RV64-NEXT: vle16.v v24, (a1) 966; RV64-NEXT: csrr a1, vlenb 967; RV64-NEXT: slli a1, a1, 5 968; RV64-NEXT: add a1, sp, a1 969; RV64-NEXT: addi a1, a1, 16 970; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload 971; RV64-NEXT: csrr a1, vlenb 972; RV64-NEXT: li a2, 60 973; RV64-NEXT: mul a1, a1, a2 974; RV64-NEXT: add a1, sp, a1 975; RV64-NEXT: addi a1, a1, 16 976; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload 977; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma 978; RV64-NEXT: vmv.v.v v8, v0 979; RV64-NEXT: csrr a1, vlenb 980; RV64-NEXT: li a2, 60 981; RV64-NEXT: mul a1, a1, a2 982; RV64-NEXT: add a1, sp, a1 983; RV64-NEXT: addi a1, a1, 16 984; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill 985; RV64-NEXT: csrr a1, vlenb 986; RV64-NEXT: li a2, 68 987; RV64-NEXT: mul a1, a1, a2 988; RV64-NEXT: add a1, sp, a1 989; RV64-NEXT: addi a1, a1, 16 990; RV64-NEXT: vl4r.v v0, (a1) # Unknown-size Folded Reload 991; RV64-NEXT: csrr a1, vlenb 992; RV64-NEXT: li a2, 24 993; RV64-NEXT: mul a1, a1, a2 994; RV64-NEXT: add a1, sp, a1 995; RV64-NEXT: addi a1, a1, 16 996; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload 997; RV64-NEXT: vmv.v.v v0, v8 998; RV64-NEXT: csrr a1, vlenb 999; RV64-NEXT: li a2, 84 1000; RV64-NEXT: mul a1, a1, a2 1001; RV64-NEXT: add a1, sp, a1 1002; RV64-NEXT: addi a1, a1, 16 1003; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload 1004; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma 1005; RV64-NEXT: vmv.v.v v8, v16 1006; RV64-NEXT: csrr a1, vlenb 1007; RV64-NEXT: li a2, 84 1008; RV64-NEXT: mul a1, a1, a2 1009; RV64-NEXT: add a1, sp, a1 1010; RV64-NEXT: addi a1, a1, 16 1011; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill 1012; RV64-NEXT: csrr a1, vlenb 1013; RV64-NEXT: slli a1, a1, 4 1014; RV64-NEXT: add a1, sp, a1 1015; RV64-NEXT: addi a1, a1, 16 1016; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload 1017; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1018; RV64-NEXT: vrgatherei16.vv v16, v8, v24 1019; RV64-NEXT: lui a1, %hi(.LCPI8_4) 1020; RV64-NEXT: addi a1, a1, %lo(.LCPI8_4) 1021; RV64-NEXT: vle16.v v8, (a1) 1022; RV64-NEXT: lui a1, %hi(.LCPI8_5) 1023; RV64-NEXT: addi a1, a1, %lo(.LCPI8_5) 1024; RV64-NEXT: vle16.v v6, (a1) 1025; RV64-NEXT: csrr a1, vlenb 1026; RV64-NEXT: li a2, 72 1027; RV64-NEXT: mul a1, a1, a2 1028; RV64-NEXT: add a1, sp, a1 1029; RV64-NEXT: addi a1, a1, 16 1030; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload 1031; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma 1032; RV64-NEXT: vmv.v.v v12, v16 1033; RV64-NEXT: addi a1, sp, 16 1034; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 1035; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1036; RV64-NEXT: vrgatherei16.vv v24, v16, v8 1037; RV64-NEXT: csrr a1, vlenb 1038; RV64-NEXT: slli a1, a1, 6 1039; RV64-NEXT: add a1, sp, a1 1040; RV64-NEXT: addi a1, a1, 16 1041; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload 1042; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma 1043; RV64-NEXT: vmv.v.v v8, v24 1044; RV64-NEXT: csrr a1, vlenb 1045; RV64-NEXT: li a2, 76 1046; RV64-NEXT: mul a1, a1, a2 1047; RV64-NEXT: add a1, sp, a1 1048; RV64-NEXT: addi a1, a1, 16 1049; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 1050; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1051; RV64-NEXT: vrgatherei16.vv v24, v16, v6 1052; RV64-NEXT: csrr a1, vlenb 1053; RV64-NEXT: li a2, 56 1054; RV64-NEXT: mul a1, a1, a2 1055; RV64-NEXT: add a1, sp, a1 1056; RV64-NEXT: addi a1, a1, 16 1057; RV64-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload 1058; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma 1059; RV64-NEXT: vmv.v.v v16, v24 1060; RV64-NEXT: addi a1, a0, 256 1061; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma 1062; RV64-NEXT: vse64.v v8, (a1) 1063; RV64-NEXT: addi a1, a0, 320 1064; RV64-NEXT: vse64.v v16, (a1) 1065; RV64-NEXT: addi a1, a0, 192 1066; RV64-NEXT: vse64.v v12, (a1) 1067; RV64-NEXT: addi a1, a0, 128 1068; RV64-NEXT: csrr a2, vlenb 1069; RV64-NEXT: li a3, 84 1070; RV64-NEXT: mul a2, a2, a3 1071; RV64-NEXT: add a2, sp, a2 1072; RV64-NEXT: addi a2, a2, 16 1073; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload 1074; RV64-NEXT: vse64.v v8, (a1) 1075; RV64-NEXT: addi a1, a0, 64 1076; RV64-NEXT: vse64.v v0, (a1) 1077; RV64-NEXT: csrr a1, vlenb 1078; RV64-NEXT: li a2, 60 1079; RV64-NEXT: mul a1, a1, a2 1080; RV64-NEXT: add a1, sp, a1 1081; RV64-NEXT: addi a1, a1, 16 1082; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload 1083; RV64-NEXT: vse64.v v8, (a0) 1084; RV64-NEXT: csrr a0, vlenb 1085; RV64-NEXT: li a1, 88 1086; RV64-NEXT: mul a0, a0, a1 1087; RV64-NEXT: add sp, sp, a0 1088; RV64-NEXT: .cfi_def_cfa sp, 16 1089; RV64-NEXT: addi sp, sp, 16 1090; RV64-NEXT: .cfi_def_cfa_offset 0 1091; RV64-NEXT: ret 1092 %interleaved.vec = load <48 x i64>, ptr %ptr 1093 %v0 = shufflevector <48 x i64> %interleaved.vec, <48 x i64> poison, <8 x i32> <i32 0, i32 6, i32 12, i32 18, i32 24, i32 30, i32 36, i32 42> 1094 %v1 = shufflevector <48 x i64> %interleaved.vec, <48 x i64> poison, <8 x i32> <i32 1, i32 7, i32 13, i32 19, i32 25, i32 31, i32 37, i32 43> 1095 %v2 = shufflevector <48 x i64> %interleaved.vec, <48 x i64> poison, <8 x i32> <i32 2, i32 8, i32 14, i32 20, i32 26, i32 32, i32 38, i32 44> 1096 %v3 = shufflevector <48 x i64> %interleaved.vec, <48 x i64> poison, <8 x i32> <i32 3, i32 9, i32 15, i32 21, i32 27, i32 33, i32 39, i32 45> 1097 %v4 = shufflevector <48 x i64> %interleaved.vec, <48 x i64> poison, <8 x i32> <i32 4, i32 10, i32 16, i32 22, i32 28, i32 34, i32 40, i32 46> 1098 %v5 = shufflevector <48 x i64> %interleaved.vec, <48 x i64> poison, <8 x i32> <i32 5, i32 11, i32 17, i32 23, i32 29, i32 35, i32 41, i32 47> 1099 %res0 = insertvalue {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} undef, <8 x i64> %v0, 0 1100 %res1 = insertvalue {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} %res0, <8 x i64> %v1, 1 1101 %res2 = insertvalue {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} %res1, <8 x i64> %v2, 2 1102 %res3 = insertvalue {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} %res2, <8 x i64> %v3, 3 1103 %res4 = insertvalue {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} %res3, <8 x i64> %v4, 4 1104 %res5 = insertvalue {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} %res4, <8 x i64> %v5, 5 1105 ret {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} %res5 1106} 1107 1108 1109; ------------------------------------------------------------------------------ 1110; Stores 1111; ------------------------------------------------------------------------------ 1112 1113define void @store_factor2(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1) { 1114; CHECK-LABEL: store_factor2: 1115; CHECK: # %bb.0: 1116; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1117; CHECK-NEXT: vsseg2e32.v v8, (a0) 1118; CHECK-NEXT: ret 1119 %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 1120 store <8 x i32> %interleaved.vec, ptr %ptr 1121 ret void 1122} 1123 1124define void @store_factor3(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) { 1125; CHECK-LABEL: store_factor3: 1126; CHECK: # %bb.0: 1127; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1128; CHECK-NEXT: vsseg3e32.v v8, (a0) 1129; CHECK-NEXT: ret 1130 %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1131 %s1 = shufflevector <4 x i32> %v2, <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 1132 %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11> 1133 store <12 x i32> %interleaved.vec, ptr %ptr 1134 ret void 1135} 1136 1137define void @store_factor4(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) { 1138; CHECK-LABEL: store_factor4: 1139; CHECK: # %bb.0: 1140; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1141; CHECK-NEXT: vsseg4e32.v v8, (a0) 1142; CHECK-NEXT: ret 1143 %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1144 %s1 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1145 %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> 1146 store <16 x i32> %interleaved.vec, ptr %ptr 1147 ret void 1148} 1149 1150define void @store_factor5(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3, <4 x i32> %v4) { 1151; CHECK-LABEL: store_factor5: 1152; CHECK: # %bb.0: 1153; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1154; CHECK-NEXT: vsseg5e32.v v8, (a0) 1155; CHECK-NEXT: ret 1156 %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1157 %s1 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1158 %s2 = shufflevector <8 x i32> %s0, <8 x i32> %s1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1159 %s3 = shufflevector <4 x i32> %v4, <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1160 %interleaved.vec = shufflevector <16 x i32> %s2, <16 x i32> %s3, <20 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 1, i32 5, i32 9, i32 13, i32 17, i32 2, i32 6, i32 10, i32 14, i32 18, i32 3, i32 7, i32 11, i32 15, i32 19> 1161 store <20 x i32> %interleaved.vec, ptr %ptr 1162 ret void 1163} 1164 1165define void @store_factor6(ptr %ptr, <2 x i16> %v0, <2 x i16> %v1, <2 x i16> %v2, <2 x i16> %v3, <2 x i16> %v4, <2 x i16> %v5) { 1166; CHECK-LABEL: store_factor6: 1167; CHECK: # %bb.0: 1168; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 1169; CHECK-NEXT: vsseg6e16.v v8, (a0) 1170; CHECK-NEXT: ret 1171 %s0 = shufflevector <2 x i16> %v0, <2 x i16> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1172 %s1 = shufflevector <2 x i16> %v2, <2 x i16> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1173 %s2 = shufflevector <4 x i16> %s0, <4 x i16> %s1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1174 %s3 = shufflevector <2 x i16> %v4, <2 x i16> %v5, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 1175 %interleaved.vec = shufflevector <8 x i16> %s2, <8 x i16> %s3, <12 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11> 1176 store <12 x i16> %interleaved.vec, ptr %ptr 1177 ret void 1178} 1179 1180 1181define <4 x i32> @load_factor2_one_active(ptr %ptr) { 1182; CHECK-LABEL: load_factor2_one_active: 1183; CHECK: # %bb.0: 1184; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1185; CHECK-NEXT: vlseg2e32.v v8, (a0) 1186; CHECK-NEXT: ret 1187 %interleaved.vec = load <8 x i32>, ptr %ptr 1188 %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 1189 ret <4 x i32> %v0 1190} 1191 1192 1193define <4 x i32> @load_factor3_one_active(ptr %ptr) { 1194; CHECK-LABEL: load_factor3_one_active: 1195; CHECK: # %bb.0: 1196; CHECK-NEXT: li a1, 12 1197; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1198; CHECK-NEXT: vlse32.v v8, (a0), a1 1199; CHECK-NEXT: ret 1200 %interleaved.vec = load <12 x i32>, ptr %ptr 1201 %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9> 1202 ret <4 x i32> %v0 1203} 1204 1205define <4 x i32> @load_factor4_one_active(ptr %ptr) { 1206; CHECK-LABEL: load_factor4_one_active: 1207; CHECK: # %bb.0: 1208; CHECK-NEXT: li a1, 16 1209; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1210; CHECK-NEXT: vlse32.v v8, (a0), a1 1211; CHECK-NEXT: ret 1212 %interleaved.vec = load <16 x i32>, ptr %ptr 1213 %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 1214 ret <4 x i32> %v0 1215} 1216 1217define <4 x i32> @load_factor5_one_active(ptr %ptr) { 1218; CHECK-LABEL: load_factor5_one_active: 1219; CHECK: # %bb.0: 1220; CHECK-NEXT: li a1, 20 1221; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1222; CHECK-NEXT: vlse32.v v8, (a0), a1 1223; CHECK-NEXT: ret 1224 %interleaved.vec = load <20 x i32>, ptr %ptr 1225 %v0 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 0, i32 5, i32 10, i32 15> 1226 ret <4 x i32> %v0 1227} 1228 1229define <2 x i16> @load_factor6_one_active(ptr %ptr) { 1230; CHECK-LABEL: load_factor6_one_active: 1231; CHECK: # %bb.0: 1232; CHECK-NEXT: addi a0, a0, 10 1233; CHECK-NEXT: li a1, 12 1234; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 1235; CHECK-NEXT: vlse16.v v8, (a0), a1 1236; CHECK-NEXT: ret 1237 %interleaved.vec = load <12 x i16>, ptr %ptr 1238 %v0 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> <i32 5, i32 11> 1239 ret <2 x i16> %v0 1240} 1241 1242define <4 x i8> @load_factor7_one_active(ptr %ptr) vscale_range(8,1024) { 1243; CHECK-LABEL: load_factor7_one_active: 1244; CHECK: # %bb.0: 1245; CHECK-NEXT: addi a0, a0, 1 1246; CHECK-NEXT: li a1, 7 1247; CHECK-NEXT: vsetivli zero, 4, e8, mf8, ta, ma 1248; CHECK-NEXT: vlse8.v v8, (a0), a1 1249; CHECK-NEXT: ret 1250 %interleaved.vec = load <32 x i8>, ptr %ptr 1251 %v0 = shufflevector <32 x i8> %interleaved.vec, <32 x i8> poison, <4 x i32> <i32 1, i32 8, i32 15, i32 22> 1252 ret <4 x i8> %v0 1253} 1254 1255define <4 x i8> @load_factor8_one_active(ptr %ptr) vscale_range(8,1024) { 1256; CHECK-LABEL: load_factor8_one_active: 1257; CHECK: # %bb.0: 1258; CHECK-NEXT: li a1, 8 1259; CHECK-NEXT: vsetivli zero, 4, e8, mf8, ta, ma 1260; CHECK-NEXT: vlse8.v v8, (a0), a1 1261; CHECK-NEXT: ret 1262 %interleaved.vec = load <32 x i8>, ptr %ptr 1263 %v0 = shufflevector <32 x i8> %interleaved.vec, <32 x i8> poison, <4 x i32> <i32 0, i32 8, i32 16, i32 24> 1264 ret <4 x i8> %v0 1265} 1266 1267define void @load_factor4_one_active_storeback(ptr %ptr) { 1268; CHECK-LABEL: load_factor4_one_active_storeback: 1269; CHECK: # %bb.0: 1270; CHECK-NEXT: li a1, 16 1271; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1272; CHECK-NEXT: vlse32.v v8, (a0), a1 1273; CHECK-NEXT: vse32.v v8, (a0) 1274; CHECK-NEXT: ret 1275 %interleaved.vec = load <16 x i32>, ptr %ptr 1276 %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 1277 store <4 x i32> %v0, ptr %ptr 1278 ret void 1279} 1280 1281; TODO: This should be a strided load 1282define void @load_factor4_one_active_storeback_full(ptr %ptr) { 1283; CHECK-LABEL: load_factor4_one_active_storeback_full: 1284; CHECK: # %bb.0: 1285; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma 1286; CHECK-NEXT: vle32.v v8, (a0) 1287; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma 1288; CHECK-NEXT: vslidedown.vi v12, v8, 4 1289; CHECK-NEXT: vmv1r.v v13, v8 1290; CHECK-NEXT: vmv1r.v v14, v12 1291; CHECK-NEXT: vsetivli zero, 4, e32, m4, ta, ma 1292; CHECK-NEXT: vslidedown.vi v16, v8, 8 1293; CHECK-NEXT: vmv1r.v v15, v16 1294; CHECK-NEXT: vslidedown.vi v16, v8, 12 1295; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1296; CHECK-NEXT: vsseg4e32.v v13, (a0) 1297; CHECK-NEXT: ret 1298 %interleaved.vec = load <16 x i32>, ptr %ptr 1299 %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1300 store <16 x i32> %v0, ptr %ptr 1301 ret void 1302} 1303 1304define void @store_factor4_one_active(ptr %ptr, <4 x i32> %v) { 1305; CHECK-LABEL: store_factor4_one_active: 1306; CHECK: # %bb.0: 1307; CHECK-NEXT: li a1, 16 1308; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1309; CHECK-NEXT: vsse32.v v8, (a0), a1 1310; CHECK-NEXT: ret 1311 %v0 = shufflevector <4 x i32> %v, <4 x i32> poison, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef> 1312 store <16 x i32> %v0, ptr %ptr 1313 ret void 1314} 1315 1316define void @store_factor4_one_active_idx1(ptr %ptr, <4 x i32> %v) { 1317; CHECK-LABEL: store_factor4_one_active_idx1: 1318; CHECK: # %bb.0: 1319; CHECK-NEXT: addi a0, a0, 4 1320; CHECK-NEXT: li a1, 16 1321; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1322; CHECK-NEXT: vsse32.v v8, (a0), a1 1323; CHECK-NEXT: ret 1324 %v0 = shufflevector <4 x i32> %v, <4 x i32> poison, <16 x i32> <i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef> 1325 store <16 x i32> %v0, ptr %ptr 1326 ret void 1327} 1328 1329define void @store_factor4_one_active_fullwidth(ptr %ptr, <16 x i32> %v) { 1330; CHECK-LABEL: store_factor4_one_active_fullwidth: 1331; CHECK: # %bb.0: 1332; CHECK-NEXT: li a1, 16 1333; CHECK-NEXT: vsetivli zero, 4, e32, m4, ta, ma 1334; CHECK-NEXT: vsse32.v v8, (a0), a1 1335; CHECK-NEXT: ret 1336 %v0 = shufflevector <16 x i32> %v, <16 x i32> poison, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef> 1337 store <16 x i32> %v0, ptr %ptr 1338 ret void 1339} 1340 1341; TODO: This could be a vslidedown followed by a strided store 1342define void @store_factor4_one_active_slidedown(ptr %ptr, <4 x i32> %v) { 1343; CHECK-LABEL: store_factor4_one_active_slidedown: 1344; CHECK: # %bb.0: 1345; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1346; CHECK-NEXT: vslidedown.vi v9, v8, 1 1347; CHECK-NEXT: vslideup.vi v10, v8, 1 1348; CHECK-NEXT: vmv.v.v v11, v10 1349; CHECK-NEXT: vmv.v.v v12, v10 1350; CHECK-NEXT: vsseg4e32.v v9, (a0) 1351; CHECK-NEXT: ret 1352 %v0 = shufflevector <4 x i32> %v, <4 x i32> poison, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 undef, i32 undef> 1353 store <16 x i32> %v0, ptr %ptr 1354 ret void 1355} 1356