1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfh,+zvfbfmin | FileCheck --check-prefixes=CHECK,RV32 %s 3; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin | FileCheck --check-prefixes=CHECK,RV64 %s 4; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin | FileCheck --check-prefixes=CHECK,RV32 %s 5; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin | FileCheck --check-prefixes=CHECK,RV64 %s 6 7; Integers 8 9define void @vector_interleave_store_nxv32i1_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b, ptr %p) { 10; CHECK-LABEL: vector_interleave_store_nxv32i1_nxv16i1: 11; CHECK: # %bb.0: 12; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma 13; CHECK-NEXT: vmv1r.v v9, v0 14; CHECK-NEXT: vmv1r.v v0, v8 15; CHECK-NEXT: vmv.v.i v10, 0 16; CHECK-NEXT: li a1, -1 17; CHECK-NEXT: csrr a2, vlenb 18; CHECK-NEXT: vmerge.vim v12, v10, 1, v0 19; CHECK-NEXT: vmv1r.v v0, v9 20; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 21; CHECK-NEXT: srli a2, a2, 2 22; CHECK-NEXT: vwaddu.vv v16, v8, v12 23; CHECK-NEXT: vwmaccu.vx v16, a1, v12 24; CHECK-NEXT: vmsne.vi v8, v18, 0 25; CHECK-NEXT: vmsne.vi v9, v16, 0 26; CHECK-NEXT: add a1, a2, a2 27; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma 28; CHECK-NEXT: vslideup.vx v9, v8, a2 29; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma 30; CHECK-NEXT: vsm.v v9, (a0) 31; CHECK-NEXT: ret 32 %res = call <vscale x 32 x i1> @llvm.vector.interleave2.nxv32i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) 33 store <vscale x 32 x i1> %res, ptr %p 34 ret void 35} 36 37; Shouldn't be lowered to vsseg because it's unaligned 38define void @vector_interleave_store_nxv16i16_nxv8i16_align1(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) { 39; CHECK-LABEL: vector_interleave_store_nxv16i16_nxv8i16_align1: 40; CHECK: # %bb.0: 41; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma 42; CHECK-NEXT: vwaddu.vv v12, v8, v10 43; CHECK-NEXT: li a1, -1 44; CHECK-NEXT: vwmaccu.vx v12, a1, v10 45; CHECK-NEXT: vs4r.v v12, (a0) 46; CHECK-NEXT: ret 47 %res = call <vscale x 16 x i16> @llvm.vector.interleave2.nxv16i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) 48 store <vscale x 16 x i16> %res, ptr %p, align 1 49 ret void 50} 51 52define void @vector_interleave_store_nxv16i16_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) { 53; CHECK-LABEL: vector_interleave_store_nxv16i16_nxv8i16: 54; CHECK: # %bb.0: 55; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma 56; CHECK-NEXT: vsseg2e16.v v8, (a0) 57; CHECK-NEXT: ret 58 %res = call <vscale x 16 x i16> @llvm.vector.interleave2.nxv16i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) 59 store <vscale x 16 x i16> %res, ptr %p 60 ret void 61} 62 63define void @vector_interleave_store_nxv8i32_nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, ptr %p) { 64; CHECK-LABEL: vector_interleave_store_nxv8i32_nxv4i32: 65; CHECK: # %bb.0: 66; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma 67; CHECK-NEXT: vsseg2e32.v v8, (a0) 68; CHECK-NEXT: ret 69 %res = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) 70 store <vscale x 8 x i32> %res, ptr %p 71 ret void 72} 73 74define void @vector_interleave_store_nxv4i64_nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, ptr %p) { 75; CHECK-LABEL: vector_interleave_store_nxv4i64_nxv2i64: 76; CHECK: # %bb.0: 77; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma 78; CHECK-NEXT: vsseg2e64.v v8, (a0) 79; CHECK-NEXT: ret 80 %res = call <vscale x 4 x i64> @llvm.vector.interleave2.nxv4i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) 81 store <vscale x 4 x i64> %res, ptr %p 82 ret void 83} 84 85define void @vector_interleave_store_nxv8i64_nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, ptr %p) { 86; CHECK-LABEL: vector_interleave_store_nxv8i64_nxv4i64: 87; CHECK: # %bb.0: 88; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma 89; CHECK-NEXT: vsseg2e64.v v8, (a0) 90; CHECK-NEXT: ret 91 %res = call <vscale x 8 x i64> @llvm.vector.interleave2.nxv8i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b) 92 store <vscale x 8 x i64> %res, ptr %p 93 ret void 94} 95 96; This shouldn't be lowered to a vsseg because EMUL * NFIELDS >= 8 97define void @vector_interleave_store_nxv16i64_nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b, ptr %p) { 98; CHECK-LABEL: vector_interleave_store_nxv16i64_nxv8i64: 99; CHECK: # %bb.0: 100; CHECK-NEXT: csrr a1, vlenb 101; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, mu 102; CHECK-NEXT: vid.v v6 103; CHECK-NEXT: vmv8r.v v24, v8 104; CHECK-NEXT: srli a2, a1, 1 105; CHECK-NEXT: vmv4r.v v28, v16 106; CHECK-NEXT: vmv4r.v v16, v12 107; CHECK-NEXT: vsrl.vi v8, v6, 1 108; CHECK-NEXT: vand.vi v10, v6, 1 109; CHECK-NEXT: slli a1, a1, 3 110; CHECK-NEXT: vmsne.vi v0, v10, 0 111; CHECK-NEXT: add a1, a0, a1 112; CHECK-NEXT: vadd.vx v8, v8, a2, v0.t 113; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma 114; CHECK-NEXT: vrgatherei16.vv v0, v24, v8 115; CHECK-NEXT: vrgatherei16.vv v24, v16, v8 116; CHECK-NEXT: vs8r.v v24, (a1) 117; CHECK-NEXT: vs8r.v v0, (a0) 118; CHECK-NEXT: ret 119 %res = call <vscale x 16 x i64> @llvm.vector.interleave2.nxv16i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b) 120 store <vscale x 16 x i64> %res, ptr %p 121 ret void 122} 123 124; Floats 125 126define void @vector_interleave_store_nxv4bf16_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b, ptr %p) { 127; CHECK-LABEL: vector_interleave_store_nxv4bf16_nxv2bf16: 128; CHECK: # %bb.0: 129; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma 130; CHECK-NEXT: vsseg2e16.v v8, (a0) 131; CHECK-NEXT: ret 132 %res = call <vscale x 4 x bfloat> @llvm.vector.interleave2.nxv4bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) 133 store <vscale x 4 x bfloat> %res, ptr %p 134 ret void 135} 136 137define void @vector_interleave_store_nxv8bf16_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b, ptr %p) { 138; CHECK-LABEL: vector_interleave_store_nxv8bf16_nxv4bf16: 139; CHECK: # %bb.0: 140; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma 141; CHECK-NEXT: vsseg2e16.v v8, (a0) 142; CHECK-NEXT: ret 143 %res = call <vscale x 8 x bfloat> @llvm.vector.interleave2.nxv8bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) 144 store <vscale x 8 x bfloat> %res, ptr %p 145 ret void 146} 147 148define void @vector_interleave_store_nxv4f16_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, ptr %p) { 149; CHECK-LABEL: vector_interleave_store_nxv4f16_nxv2f16: 150; CHECK: # %bb.0: 151; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma 152; CHECK-NEXT: vsseg2e16.v v8, (a0) 153; CHECK-NEXT: ret 154 %res = call <vscale x 4 x half> @llvm.vector.interleave2.nxv4f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) 155 store <vscale x 4 x half> %res, ptr %p 156 ret void 157} 158 159define void @vector_interleave_store_nxv8f16_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, ptr %p) { 160; CHECK-LABEL: vector_interleave_store_nxv8f16_nxv4f16: 161; CHECK: # %bb.0: 162; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma 163; CHECK-NEXT: vsseg2e16.v v8, (a0) 164; CHECK-NEXT: ret 165 %res = call <vscale x 8 x half> @llvm.vector.interleave2.nxv8f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) 166 store <vscale x 8 x half> %res, ptr %p 167 ret void 168} 169 170define void @vector_interleave_store_nxv4f32_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, ptr %p) { 171; CHECK-LABEL: vector_interleave_store_nxv4f32_nxv2f32: 172; CHECK: # %bb.0: 173; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma 174; CHECK-NEXT: vsseg2e32.v v8, (a0) 175; CHECK-NEXT: ret 176 %res = call <vscale x 4 x float> @llvm.vector.interleave2.nxv4f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) 177 store <vscale x 4 x float> %res, ptr %p 178 ret void 179} 180 181define void @vector_interleave_store_nxv16bf16_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, ptr %p) { 182; CHECK-LABEL: vector_interleave_store_nxv16bf16_nxv8bf16: 183; CHECK: # %bb.0: 184; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma 185; CHECK-NEXT: vsseg2e16.v v8, (a0) 186; CHECK-NEXT: ret 187 %res = call <vscale x 16 x bfloat> @llvm.vector.interleave2.nxv16bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) 188 store <vscale x 16 x bfloat> %res, ptr %p 189 ret void 190} 191 192define void @vector_interleave_store_nxv16f16_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, ptr %p) { 193; CHECK-LABEL: vector_interleave_store_nxv16f16_nxv8f16: 194; CHECK: # %bb.0: 195; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma 196; CHECK-NEXT: vsseg2e16.v v8, (a0) 197; CHECK-NEXT: ret 198 %res = call <vscale x 16 x half> @llvm.vector.interleave2.nxv16f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) 199 store <vscale x 16 x half> %res, ptr %p 200 ret void 201} 202 203define void @vector_interleave_store_nxv8f32_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, ptr %p) { 204; CHECK-LABEL: vector_interleave_store_nxv8f32_nxv4f32: 205; CHECK: # %bb.0: 206; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma 207; CHECK-NEXT: vsseg2e32.v v8, (a0) 208; CHECK-NEXT: ret 209 %res = call <vscale x 8 x float> @llvm.vector.interleave2.nxv8f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) 210 store <vscale x 8 x float> %res, ptr %p 211 ret void 212} 213 214define void @vector_interleave_store_nxv4f64_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, ptr %p) { 215; CHECK-LABEL: vector_interleave_store_nxv4f64_nxv2f64: 216; CHECK: # %bb.0: 217; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma 218; CHECK-NEXT: vsseg2e64.v v8, (a0) 219; CHECK-NEXT: ret 220 %res = call <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) 221 store <vscale x 4 x double> %res, ptr %p 222 ret void 223} 224 225define void @vector_interleave_store_nxv4p0_nxv2p0(<vscale x 2 x ptr> %a, <vscale x 2 x ptr> %b, ptr %p) { 226; RV32-LABEL: vector_interleave_store_nxv4p0_nxv2p0: 227; RV32: # %bb.0: 228; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma 229; RV32-NEXT: vsseg2e32.v v8, (a0) 230; RV32-NEXT: ret 231; 232; RV64-LABEL: vector_interleave_store_nxv4p0_nxv2p0: 233; RV64: # %bb.0: 234; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, ma 235; RV64-NEXT: vsseg2e64.v v8, (a0) 236; RV64-NEXT: ret 237 %res = call <vscale x 4 x ptr> @llvm.vector.interleave2.nxv4p0(<vscale x 2 x ptr> %a, <vscale x 2 x ptr> %b) 238 store <vscale x 4 x ptr> %res, ptr %p 239 ret void 240} 241 242define void @vector_interleave_store_factor4(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d, ptr %p) { 243; CHECK-LABEL: vector_interleave_store_factor4: 244; CHECK: # %bb.0: 245; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma 246; CHECK-NEXT: vsseg4e32.v v8, (a0) 247; CHECK-NEXT: ret 248 %v0 = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %c) 249 %v1 = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> %b, <vscale x 4 x i32> %d) 250 %v2 = call <vscale x 16 x i32> @llvm.vector.interleave2.nxv16i32(<vscale x 8 x i32> %v0, <vscale x 8 x i32> %v1) 251 store <vscale x 16 x i32> %v2, ptr %p 252 ret void 253} 254 255define void @vector_interleave_store_factor8(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c, <vscale x 2 x i32> %d, <vscale x 2 x i32> %e, <vscale x 2 x i32> %f, <vscale x 2 x i32> %g, <vscale x 2 x i32> %h, ptr %p) { 256; CHECK-LABEL: vector_interleave_store_factor8: 257; CHECK: # %bb.0: 258; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma 259; CHECK-NEXT: vsseg8e32.v v8, (a0) 260; CHECK-NEXT: ret 261 %v0 = call <vscale x 4 x i32> @llvm.vector.interleave2.nxv4i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %e) 262 %v1 = call <vscale x 4 x i32> @llvm.vector.interleave2.nxv4i32(<vscale x 2 x i32> %c, <vscale x 2 x i32> %g) 263 %v2 = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1) 264 265 %v3 = call <vscale x 4 x i32> @llvm.vector.interleave2.nxv4i32(<vscale x 2 x i32> %b, <vscale x 2 x i32> %f) 266 %v4 = call <vscale x 4 x i32> @llvm.vector.interleave2.nxv4i32(<vscale x 2 x i32> %d, <vscale x 2 x i32> %h) 267 %v5 = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> %v3, <vscale x 4 x i32> %v4) 268 269 %v6 = call <vscale x 16 x i32> @llvm.vector.interleave2.nxv16i32(<vscale x 8 x i32> %v2, <vscale x 8 x i32> %v5) 270 store <vscale x 16 x i32> %v6, ptr %p 271 ret void 272} 273