1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfh,+zvfbfmin | FileCheck %s 3; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin | FileCheck %s 4; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin | FileCheck %s 5; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin | FileCheck %s 6 7; Integers 8 9define void @vector_interleave_store_v32i1_v16i1(<16 x i1> %a, <16 x i1> %b, ptr %p) { 10; CHECK-LABEL: vector_interleave_store_v32i1_v16i1: 11; CHECK: # %bb.0: 12; CHECK-NEXT: li a1, 32 13; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 14; CHECK-NEXT: vslideup.vi v0, v8, 2 15; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma 16; CHECK-NEXT: vmv.v.i v8, 0 17; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 18; CHECK-NEXT: vsetivli zero, 16, e8, m2, ta, ma 19; CHECK-NEXT: vslidedown.vi v10, v8, 16 20; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 21; CHECK-NEXT: vwaddu.vv v12, v8, v10 22; CHECK-NEXT: li a2, -1 23; CHECK-NEXT: vwmaccu.vx v12, a2, v10 24; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma 25; CHECK-NEXT: vmsne.vi v8, v12, 0 26; CHECK-NEXT: vsm.v v8, (a0) 27; CHECK-NEXT: ret 28 %res = call <32 x i1> @llvm.vector.interleave2.v32i1(<16 x i1> %a, <16 x i1> %b) 29 store <32 x i1> %res, ptr %p 30 ret void 31} 32 33; Shouldn't be lowered to vsseg because it's unaligned 34define void @vector_interleave_store_v16i16_v8i16_align1(<8 x i16> %a, <8 x i16> %b, ptr %p) { 35; CHECK-LABEL: vector_interleave_store_v16i16_v8i16_align1: 36; CHECK: # %bb.0: 37; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 38; CHECK-NEXT: vwaddu.vv v10, v8, v9 39; CHECK-NEXT: li a1, -1 40; CHECK-NEXT: vwmaccu.vx v10, a1, v9 41; CHECK-NEXT: li a1, 32 42; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma 43; CHECK-NEXT: vse8.v v10, (a0) 44; CHECK-NEXT: ret 45 %res = call <16 x i16> @llvm.vector.interleave2.v16i16(<8 x i16> %a, <8 x i16> %b) 46 store <16 x i16> %res, ptr %p, align 1 47 ret void 48} 49 50define void @vector_interleave_store_v16i16_v8i16(<8 x i16> %a, <8 x i16> %b, ptr %p) { 51; CHECK-LABEL: vector_interleave_store_v16i16_v8i16: 52; CHECK: # %bb.0: 53; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 54; CHECK-NEXT: vsseg2e16.v v8, (a0) 55; CHECK-NEXT: ret 56 %res = call <16 x i16> @llvm.vector.interleave2.v16i16(<8 x i16> %a, <8 x i16> %b) 57 store <16 x i16> %res, ptr %p 58 ret void 59} 60 61define void @vector_interleave_store_v8i32_v4i32(<4 x i32> %a, <4 x i32> %b, ptr %p) { 62; CHECK-LABEL: vector_interleave_store_v8i32_v4i32: 63; CHECK: # %bb.0: 64; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 65; CHECK-NEXT: vsseg2e32.v v8, (a0) 66; CHECK-NEXT: ret 67 %res = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %a, <4 x i32> %b) 68 store <8 x i32> %res, ptr %p 69 ret void 70} 71 72define void @vector_interleave_store_v4i64_v2i64(<2 x i64> %a, <2 x i64> %b, ptr %p) { 73; CHECK-LABEL: vector_interleave_store_v4i64_v2i64: 74; CHECK: # %bb.0: 75; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 76; CHECK-NEXT: vsseg2e64.v v8, (a0) 77; CHECK-NEXT: ret 78 %res = call <4 x i64> @llvm.vector.interleave2.v4i64(<2 x i64> %a, <2 x i64> %b) 79 store <4 x i64> %res, ptr %p 80 ret void 81} 82 83; Floats 84 85define void @vector_interleave_store_v4bf16_v2bf16(<2 x bfloat> %a, <2 x bfloat> %b, ptr %p) { 86; CHECK-LABEL: vector_interleave_store_v4bf16_v2bf16: 87; CHECK: # %bb.0: 88; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 89; CHECK-NEXT: vsseg2e16.v v8, (a0) 90; CHECK-NEXT: ret 91 %res = call <4 x bfloat> @llvm.vector.interleave2.v4bf16(<2 x bfloat> %a, <2 x bfloat> %b) 92 store <4 x bfloat> %res, ptr %p 93 ret void 94} 95 96define void @vector_interleave_store_v8bf16_v4bf16(<4 x bfloat> %a, <4 x bfloat> %b, ptr %p) { 97; CHECK-LABEL: vector_interleave_store_v8bf16_v4bf16: 98; CHECK: # %bb.0: 99; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 100; CHECK-NEXT: vsseg2e16.v v8, (a0) 101; CHECK-NEXT: ret 102 %res = call <8 x bfloat> @llvm.vector.interleave2.v8bf16(<4 x bfloat> %a, <4 x bfloat> %b) 103 store <8 x bfloat> %res, ptr %p 104 ret void 105} 106 107define void @vector_interleave_store_v4f16_v2f16(<2 x half> %a, <2 x half> %b, ptr %p) { 108; CHECK-LABEL: vector_interleave_store_v4f16_v2f16: 109; CHECK: # %bb.0: 110; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 111; CHECK-NEXT: vsseg2e16.v v8, (a0) 112; CHECK-NEXT: ret 113 %res = call <4 x half> @llvm.vector.interleave2.v4f16(<2 x half> %a, <2 x half> %b) 114 store <4 x half> %res, ptr %p 115 ret void 116} 117 118define void @vector_interleave_store_v8f16_v4f16(<4 x half> %a, <4 x half> %b, ptr %p) { 119; CHECK-LABEL: vector_interleave_store_v8f16_v4f16: 120; CHECK: # %bb.0: 121; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 122; CHECK-NEXT: vsseg2e16.v v8, (a0) 123; CHECK-NEXT: ret 124 %res = call <8 x half> @llvm.vector.interleave2.v8f16(<4 x half> %a, <4 x half> %b) 125 store <8 x half> %res, ptr %p 126 ret void 127} 128 129define void @vector_interleave_store_v4f32_v2f32(<2 x float> %a, <2 x float> %b, ptr %p) { 130; CHECK-LABEL: vector_interleave_store_v4f32_v2f32: 131; CHECK: # %bb.0: 132; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 133; CHECK-NEXT: vsseg2e32.v v8, (a0) 134; CHECK-NEXT: ret 135 %res = call <4 x float> @llvm.vector.interleave2.v4f32(<2 x float> %a, <2 x float> %b) 136 store <4 x float> %res, ptr %p 137 ret void 138} 139 140define void @vector_interleave_store_v16bf16_v8bf16(<8 x bfloat> %a, <8 x bfloat> %b, ptr %p) { 141; CHECK-LABEL: vector_interleave_store_v16bf16_v8bf16: 142; CHECK: # %bb.0: 143; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 144; CHECK-NEXT: vsseg2e16.v v8, (a0) 145; CHECK-NEXT: ret 146 %res = call <16 x bfloat> @llvm.vector.interleave2.v16bf16(<8 x bfloat> %a, <8 x bfloat> %b) 147 store <16 x bfloat> %res, ptr %p 148 ret void 149} 150 151define void @vector_interleave_store_v16f16_v8f16(<8 x half> %a, <8 x half> %b, ptr %p) { 152; CHECK-LABEL: vector_interleave_store_v16f16_v8f16: 153; CHECK: # %bb.0: 154; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 155; CHECK-NEXT: vsseg2e16.v v8, (a0) 156; CHECK-NEXT: ret 157 %res = call <16 x half> @llvm.vector.interleave2.v16f16(<8 x half> %a, <8 x half> %b) 158 store <16 x half> %res, ptr %p 159 ret void 160} 161 162define void @vector_interleave_store_v8f32_v4f32(<4 x float> %a, <4 x float> %b, ptr %p) { 163; CHECK-LABEL: vector_interleave_store_v8f32_v4f32: 164; CHECK: # %bb.0: 165; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 166; CHECK-NEXT: vsseg2e32.v v8, (a0) 167; CHECK-NEXT: ret 168 %res = call <8 x float> @llvm.vector.interleave2.v8f32(<4 x float> %a, <4 x float> %b) 169 store <8 x float> %res, ptr %p 170 ret void 171} 172 173define void @vector_interleave_store_v4f64_v2f64(<2 x double> %a, <2 x double> %b, ptr %p) { 174; CHECK-LABEL: vector_interleave_store_v4f64_v2f64: 175; CHECK: # %bb.0: 176; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 177; CHECK-NEXT: vsseg2e64.v v8, (a0) 178; CHECK-NEXT: ret 179 %res = call <4 x double> @llvm.vector.interleave2.v4f64(<2 x double> %a, <2 x double> %b) 180 store <4 x double> %res, ptr %p 181 ret void 182} 183 184define void @vector_interleave_store_factor4(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, ptr %p) { 185; CHECK-LABEL: vector_interleave_store_factor4: 186; CHECK: # %bb.0: 187; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 188; CHECK-NEXT: vsseg4e32.v v8, (a0) 189; CHECK-NEXT: ret 190 %v0 = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %a, <4 x i32> %c) 191 %v1 = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %b, <4 x i32> %d) 192 %v2 = call <16 x i32> @llvm.vector.interleave2.v16i32(<8 x i32> %v0, <8 x i32> %v1) 193 store <16 x i32> %v2, ptr %p 194 ret void 195} 196 197define void @vector_interleave_store_factor8(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, <4 x i32> %e, <4 x i32> %f, <4 x i32> %g, <4 x i32> %h, ptr %p) { 198; CHECK-LABEL: vector_interleave_store_factor8: 199; CHECK: # %bb.0: 200; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 201; CHECK-NEXT: vsseg8e32.v v8, (a0) 202; CHECK-NEXT: ret 203 %v0 = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %a, <4 x i32> %e) 204 %v1 = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %c, <4 x i32> %g) 205 %v2 = call <16 x i32> @llvm.vector.interleave2.v16i32(<8 x i32> %v0, <8 x i32> %v1) 206 207 %v3 = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %b, <4 x i32> %f) 208 %v4 = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %d, <4 x i32> %h) 209 %v5 = call <16 x i32> @llvm.vector.interleave2.v16i32(<8 x i32> %v3, <8 x i32> %v4) 210 211 %v6 = call <32 x i32> @llvm.vector.interleave2.v32i32(<16 x i32> %v2, <16 x i32> %v5) 212 store <32 x i32> %v6, ptr %p 213 ret void 214} 215