1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfh | FileCheck -check-prefixes=CHECK,RV32 %s 3; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh | FileCheck -check-prefixes=CHECK,RV64 %s 4; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvbb,+zvfh | FileCheck %s --check-prefix=ZVBB 5; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvbb,+zvfh | FileCheck %s --check-prefix=ZVBB 6 7; Integers 8 9define <32 x i1> @vector_interleave_v32i1_v16i1(<16 x i1> %a, <16 x i1> %b) { 10; CHECK-LABEL: vector_interleave_v32i1_v16i1: 11; CHECK: # %bb.0: 12; CHECK-NEXT: li a0, 32 13; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 14; CHECK-NEXT: vslideup.vi v0, v8, 2 15; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma 16; CHECK-NEXT: vmv.v.i v8, 0 17; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 18; CHECK-NEXT: vsetivli zero, 16, e8, m2, ta, ma 19; CHECK-NEXT: vslidedown.vi v10, v8, 16 20; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 21; CHECK-NEXT: vwaddu.vv v12, v8, v10 22; CHECK-NEXT: li a1, -1 23; CHECK-NEXT: vwmaccu.vx v12, a1, v10 24; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma 25; CHECK-NEXT: vmsne.vi v0, v12, 0 26; CHECK-NEXT: ret 27; 28; ZVBB-LABEL: vector_interleave_v32i1_v16i1: 29; ZVBB: # %bb.0: 30; ZVBB-NEXT: li a0, 32 31; ZVBB-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 32; ZVBB-NEXT: vslideup.vi v0, v8, 2 33; ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma 34; ZVBB-NEXT: vmv.v.i v8, 0 35; ZVBB-NEXT: vmerge.vim v8, v8, 1, v0 36; ZVBB-NEXT: vsetivli zero, 16, e8, m2, ta, ma 37; ZVBB-NEXT: vslidedown.vi v10, v8, 16 38; ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma 39; ZVBB-NEXT: vwsll.vi v12, v10, 8 40; ZVBB-NEXT: vwaddu.wv v12, v12, v8 41; ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma 42; ZVBB-NEXT: vmsne.vi v0, v12, 0 43; ZVBB-NEXT: ret 44 %res = call <32 x i1> @llvm.vector.interleave2.v32i1(<16 x i1> %a, <16 x i1> %b) 45 ret <32 x i1> %res 46} 47 48define <16 x i16> @vector_interleave_v16i16_v8i16(<8 x i16> %a, <8 x i16> %b) { 49; CHECK-LABEL: vector_interleave_v16i16_v8i16: 50; CHECK: # %bb.0: 51; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 52; CHECK-NEXT: vwaddu.vv v10, v8, v9 53; CHECK-NEXT: li a0, -1 54; CHECK-NEXT: vwmaccu.vx v10, a0, v9 55; CHECK-NEXT: vmv2r.v v8, v10 56; CHECK-NEXT: ret 57; 58; ZVBB-LABEL: vector_interleave_v16i16_v8i16: 59; ZVBB: # %bb.0: 60; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma 61; ZVBB-NEXT: vwsll.vi v10, v9, 16 62; ZVBB-NEXT: vwaddu.wv v10, v10, v8 63; ZVBB-NEXT: vmv2r.v v8, v10 64; ZVBB-NEXT: ret 65 %res = call <16 x i16> @llvm.vector.interleave2.v16i16(<8 x i16> %a, <8 x i16> %b) 66 ret <16 x i16> %res 67} 68 69define <8 x i32> @vector_interleave_v8i32_v4i32(<4 x i32> %a, <4 x i32> %b) { 70; CHECK-LABEL: vector_interleave_v8i32_v4i32: 71; CHECK: # %bb.0: 72; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 73; CHECK-NEXT: vwaddu.vv v10, v8, v9 74; CHECK-NEXT: li a0, -1 75; CHECK-NEXT: vwmaccu.vx v10, a0, v9 76; CHECK-NEXT: vmv2r.v v8, v10 77; CHECK-NEXT: ret 78; 79; ZVBB-LABEL: vector_interleave_v8i32_v4i32: 80; ZVBB: # %bb.0: 81; ZVBB-NEXT: li a0, 32 82; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma 83; ZVBB-NEXT: vwsll.vx v10, v9, a0 84; ZVBB-NEXT: vwaddu.wv v10, v10, v8 85; ZVBB-NEXT: vmv2r.v v8, v10 86; ZVBB-NEXT: ret 87 %res = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %a, <4 x i32> %b) 88 ret <8 x i32> %res 89} 90 91define <4 x i64> @vector_interleave_v4i64_v2i64(<2 x i64> %a, <2 x i64> %b) { 92; CHECK-LABEL: vector_interleave_v4i64_v2i64: 93; CHECK: # %bb.0: 94; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 95; CHECK-NEXT: vmv1r.v v10, v9 96; CHECK-NEXT: lui a0, 12304 97; CHECK-NEXT: addi a0, a0, 512 98; CHECK-NEXT: vslideup.vi v8, v10, 2 99; CHECK-NEXT: vmv.s.x v10, a0 100; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 101; CHECK-NEXT: vsext.vf2 v12, v10 102; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma 103; CHECK-NEXT: vrgatherei16.vv v10, v8, v12 104; CHECK-NEXT: vmv.v.v v8, v10 105; CHECK-NEXT: ret 106; 107; ZVBB-LABEL: vector_interleave_v4i64_v2i64: 108; ZVBB: # %bb.0: 109; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma 110; ZVBB-NEXT: vmv1r.v v10, v9 111; ZVBB-NEXT: lui a0, 12304 112; ZVBB-NEXT: addi a0, a0, 512 113; ZVBB-NEXT: vslideup.vi v8, v10, 2 114; ZVBB-NEXT: vmv.s.x v10, a0 115; ZVBB-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 116; ZVBB-NEXT: vsext.vf2 v12, v10 117; ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma 118; ZVBB-NEXT: vrgatherei16.vv v10, v8, v12 119; ZVBB-NEXT: vmv.v.v v8, v10 120; ZVBB-NEXT: ret 121 %res = call <4 x i64> @llvm.vector.interleave2.v4i64(<2 x i64> %a, <2 x i64> %b) 122 ret <4 x i64> %res 123} 124 125declare <32 x i1> @llvm.vector.interleave2.v32i1(<16 x i1>, <16 x i1>) 126declare <16 x i16> @llvm.vector.interleave2.v16i16(<8 x i16>, <8 x i16>) 127declare <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32>, <4 x i32>) 128declare <4 x i64> @llvm.vector.interleave2.v4i64(<2 x i64>, <2 x i64>) 129 130; Floats 131 132define <4 x half> @vector_interleave_v4f16_v2f16(<2 x half> %a, <2 x half> %b) { 133; CHECK-LABEL: vector_interleave_v4f16_v2f16: 134; CHECK: # %bb.0: 135; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 136; CHECK-NEXT: vwaddu.vv v10, v8, v9 137; CHECK-NEXT: li a0, -1 138; CHECK-NEXT: vwmaccu.vx v10, a0, v9 139; CHECK-NEXT: vmv1r.v v8, v10 140; CHECK-NEXT: ret 141; 142; ZVBB-LABEL: vector_interleave_v4f16_v2f16: 143; ZVBB: # %bb.0: 144; ZVBB-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 145; ZVBB-NEXT: vwsll.vi v10, v9, 16 146; ZVBB-NEXT: vwaddu.wv v10, v10, v8 147; ZVBB-NEXT: vmv1r.v v8, v10 148; ZVBB-NEXT: ret 149 %res = call <4 x half> @llvm.vector.interleave2.v4f16(<2 x half> %a, <2 x half> %b) 150 ret <4 x half> %res 151} 152 153define <8 x half> @vector_interleave_v8f16_v4f16(<4 x half> %a, <4 x half> %b) { 154; CHECK-LABEL: vector_interleave_v8f16_v4f16: 155; CHECK: # %bb.0: 156; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 157; CHECK-NEXT: vwaddu.vv v10, v8, v9 158; CHECK-NEXT: li a0, -1 159; CHECK-NEXT: vwmaccu.vx v10, a0, v9 160; CHECK-NEXT: vmv1r.v v8, v10 161; CHECK-NEXT: ret 162; 163; ZVBB-LABEL: vector_interleave_v8f16_v4f16: 164; ZVBB: # %bb.0: 165; ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 166; ZVBB-NEXT: vwsll.vi v10, v9, 16 167; ZVBB-NEXT: vwaddu.wv v10, v10, v8 168; ZVBB-NEXT: vmv1r.v v8, v10 169; ZVBB-NEXT: ret 170 %res = call <8 x half> @llvm.vector.interleave2.v8f16(<4 x half> %a, <4 x half> %b) 171 ret <8 x half> %res 172} 173 174define <4 x float> @vector_interleave_v4f32_v2f32(<2 x float> %a, <2 x float> %b) { 175; CHECK-LABEL: vector_interleave_v4f32_v2f32: 176; CHECK: # %bb.0: 177; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 178; CHECK-NEXT: vwaddu.vv v10, v8, v9 179; CHECK-NEXT: li a0, -1 180; CHECK-NEXT: vwmaccu.vx v10, a0, v9 181; CHECK-NEXT: vmv1r.v v8, v10 182; CHECK-NEXT: ret 183; 184; ZVBB-LABEL: vector_interleave_v4f32_v2f32: 185; ZVBB: # %bb.0: 186; ZVBB-NEXT: li a0, 32 187; ZVBB-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 188; ZVBB-NEXT: vwsll.vx v10, v9, a0 189; ZVBB-NEXT: vwaddu.wv v10, v10, v8 190; ZVBB-NEXT: vmv1r.v v8, v10 191; ZVBB-NEXT: ret 192 %res = call <4 x float> @llvm.vector.interleave2.v4f32(<2 x float> %a, <2 x float> %b) 193 ret <4 x float> %res 194} 195 196define <16 x half> @vector_interleave_v16f16_v8f16(<8 x half> %a, <8 x half> %b) { 197; CHECK-LABEL: vector_interleave_v16f16_v8f16: 198; CHECK: # %bb.0: 199; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 200; CHECK-NEXT: vwaddu.vv v10, v8, v9 201; CHECK-NEXT: li a0, -1 202; CHECK-NEXT: vwmaccu.vx v10, a0, v9 203; CHECK-NEXT: vmv2r.v v8, v10 204; CHECK-NEXT: ret 205; 206; ZVBB-LABEL: vector_interleave_v16f16_v8f16: 207; ZVBB: # %bb.0: 208; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma 209; ZVBB-NEXT: vwsll.vi v10, v9, 16 210; ZVBB-NEXT: vwaddu.wv v10, v10, v8 211; ZVBB-NEXT: vmv2r.v v8, v10 212; ZVBB-NEXT: ret 213 %res = call <16 x half> @llvm.vector.interleave2.v16f16(<8 x half> %a, <8 x half> %b) 214 ret <16 x half> %res 215} 216 217define <8 x float> @vector_interleave_v8f32_v4f32(<4 x float> %a, <4 x float> %b) { 218; CHECK-LABEL: vector_interleave_v8f32_v4f32: 219; CHECK: # %bb.0: 220; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 221; CHECK-NEXT: vwaddu.vv v10, v8, v9 222; CHECK-NEXT: li a0, -1 223; CHECK-NEXT: vwmaccu.vx v10, a0, v9 224; CHECK-NEXT: vmv2r.v v8, v10 225; CHECK-NEXT: ret 226; 227; ZVBB-LABEL: vector_interleave_v8f32_v4f32: 228; ZVBB: # %bb.0: 229; ZVBB-NEXT: li a0, 32 230; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma 231; ZVBB-NEXT: vwsll.vx v10, v9, a0 232; ZVBB-NEXT: vwaddu.wv v10, v10, v8 233; ZVBB-NEXT: vmv2r.v v8, v10 234; ZVBB-NEXT: ret 235 %res = call <8 x float> @llvm.vector.interleave2.v8f32(<4 x float> %a, <4 x float> %b) 236 ret <8 x float> %res 237} 238 239define <4 x double> @vector_interleave_v4f64_v2f64(<2 x double> %a, <2 x double> %b) { 240; CHECK-LABEL: vector_interleave_v4f64_v2f64: 241; CHECK: # %bb.0: 242; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 243; CHECK-NEXT: vmv1r.v v10, v9 244; CHECK-NEXT: lui a0, 12304 245; CHECK-NEXT: addi a0, a0, 512 246; CHECK-NEXT: vslideup.vi v8, v10, 2 247; CHECK-NEXT: vmv.s.x v10, a0 248; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 249; CHECK-NEXT: vsext.vf2 v12, v10 250; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma 251; CHECK-NEXT: vrgatherei16.vv v10, v8, v12 252; CHECK-NEXT: vmv.v.v v8, v10 253; CHECK-NEXT: ret 254; 255; ZVBB-LABEL: vector_interleave_v4f64_v2f64: 256; ZVBB: # %bb.0: 257; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma 258; ZVBB-NEXT: vmv1r.v v10, v9 259; ZVBB-NEXT: lui a0, 12304 260; ZVBB-NEXT: addi a0, a0, 512 261; ZVBB-NEXT: vslideup.vi v8, v10, 2 262; ZVBB-NEXT: vmv.s.x v10, a0 263; ZVBB-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 264; ZVBB-NEXT: vsext.vf2 v12, v10 265; ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma 266; ZVBB-NEXT: vrgatherei16.vv v10, v8, v12 267; ZVBB-NEXT: vmv.v.v v8, v10 268; ZVBB-NEXT: ret 269 %res = call <4 x double> @llvm.vector.interleave2.v4f64(<2 x double> %a, <2 x double> %b) 270 ret <4 x double> %res 271} 272 273 274declare <4 x half> @llvm.vector.interleave2.v4f16(<2 x half>, <2 x half>) 275declare <8 x half> @llvm.vector.interleave2.v8f16(<4 x half>, <4 x half>) 276declare <4 x float> @llvm.vector.interleave2.v4f32(<2 x float>, <2 x float>) 277declare <16 x half> @llvm.vector.interleave2.v16f16(<8 x half>, <8 x half>) 278declare <8 x float> @llvm.vector.interleave2.v8f32(<4 x float>, <4 x float>) 279declare <4 x double> @llvm.vector.interleave2.v4f64(<2 x double>, <2 x double>) 280;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 281; RV32: {{.*}} 282; RV64: {{.*}} 283