1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v -target-abi=ilp32 \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 4; RUN: llc -mtriple=riscv64 -mattr=+v -target-abi=lp64 \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 6 7define void @v4xi8_concat_vector_insert_idx0(ptr %a, ptr %b, i8 %x) { 8; CHECK-LABEL: v4xi8_concat_vector_insert_idx0: 9; CHECK: # %bb.0: 10; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 11; CHECK-NEXT: vle8.v v8, (a0) 12; CHECK-NEXT: vle8.v v9, (a1) 13; CHECK-NEXT: vsetvli zero, zero, e8, mf8, tu, ma 14; CHECK-NEXT: vmv.s.x v8, a2 15; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 16; CHECK-NEXT: vslideup.vi v8, v9, 2 17; CHECK-NEXT: vse8.v v8, (a0) 18; CHECK-NEXT: ret 19 %v1 = load <2 x i8>, ptr %a 20 %v2 = load <2 x i8>, ptr %b 21 %concat = shufflevector <2 x i8> %v1, <2 x i8> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 22 %ins = insertelement <4 x i8> %concat, i8 %x, i32 0 23 store <4 x i8> %ins, ptr %a 24 ret void 25} 26 27define void @v4xi8_concat_vector_insert_idx1(ptr %a, ptr %b, i8 %x) { 28; CHECK-LABEL: v4xi8_concat_vector_insert_idx1: 29; CHECK: # %bb.0: 30; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 31; CHECK-NEXT: vle8.v v8, (a0) 32; CHECK-NEXT: vle8.v v9, (a1) 33; CHECK-NEXT: vmv.s.x v10, a2 34; CHECK-NEXT: vslideup.vi v8, v10, 1 35; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 36; CHECK-NEXT: vslideup.vi v8, v9, 2 37; CHECK-NEXT: vse8.v v8, (a0) 38; CHECK-NEXT: ret 39 %v1 = load <2 x i8>, ptr %a 40 %v2 = load <2 x i8>, ptr %b 41 %concat = shufflevector <2 x i8> %v1, <2 x i8> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 42 %ins = insertelement <4 x i8> %concat, i8 %x, i32 1 43 store <4 x i8> %ins, ptr %a 44 ret void 45} 46 47define void @v4xi8_concat_vector_insert_idx2(ptr %a, ptr %b, i8 %x) { 48; CHECK-LABEL: v4xi8_concat_vector_insert_idx2: 49; CHECK: # %bb.0: 50; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 51; CHECK-NEXT: vle8.v v8, (a1) 52; CHECK-NEXT: vle8.v v9, (a0) 53; CHECK-NEXT: vsetvli zero, zero, e8, mf8, tu, ma 54; CHECK-NEXT: vmv.s.x v8, a2 55; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 56; CHECK-NEXT: vslideup.vi v9, v8, 2 57; CHECK-NEXT: vse8.v v9, (a0) 58; CHECK-NEXT: ret 59 %v1 = load <2 x i8>, ptr %a 60 %v2 = load <2 x i8>, ptr %b 61 %concat = shufflevector <2 x i8> %v1, <2 x i8> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 62 %ins = insertelement <4 x i8> %concat, i8 %x, i32 2 63 store <4 x i8> %ins, ptr %a 64 ret void 65} 66 67define void @v4xi8_concat_vector_insert_idx3(ptr %a, ptr %b, i8 %x) { 68; CHECK-LABEL: v4xi8_concat_vector_insert_idx3: 69; CHECK: # %bb.0: 70; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma 71; CHECK-NEXT: vle8.v v8, (a1) 72; CHECK-NEXT: vle8.v v9, (a0) 73; CHECK-NEXT: vmv.s.x v10, a2 74; CHECK-NEXT: vslideup.vi v8, v10, 1 75; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 76; CHECK-NEXT: vslideup.vi v9, v8, 2 77; CHECK-NEXT: vse8.v v9, (a0) 78; CHECK-NEXT: ret 79 %v1 = load <2 x i8>, ptr %a 80 %v2 = load <2 x i8>, ptr %b 81 %concat = shufflevector <2 x i8> %v1, <2 x i8> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 82 %ins = insertelement <4 x i8> %concat, i8 %x, i32 3 83 store <4 x i8> %ins, ptr %a 84 ret void 85} 86 87define void @v4xi64_concat_vector_insert_idx0(ptr %a, ptr %b, i64 %x) { 88; RV32-LABEL: v4xi64_concat_vector_insert_idx0: 89; RV32: # %bb.0: 90; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 91; RV32-NEXT: vle64.v v8, (a0) 92; RV32-NEXT: vle64.v v10, (a1) 93; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma 94; RV32-NEXT: vslide1down.vx v8, v8, a2 95; RV32-NEXT: vslide1down.vx v8, v8, a3 96; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma 97; RV32-NEXT: vslideup.vi v8, v10, 2 98; RV32-NEXT: vse64.v v8, (a0) 99; RV32-NEXT: ret 100; 101; RV64-LABEL: v4xi64_concat_vector_insert_idx0: 102; RV64: # %bb.0: 103; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 104; RV64-NEXT: vle64.v v8, (a0) 105; RV64-NEXT: vle64.v v10, (a1) 106; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma 107; RV64-NEXT: vmv.s.x v8, a2 108; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma 109; RV64-NEXT: vslideup.vi v8, v10, 2 110; RV64-NEXT: vse64.v v8, (a0) 111; RV64-NEXT: ret 112 %v1 = load <2 x i64>, ptr %a 113 %v2 = load <2 x i64>, ptr %b 114 %concat = shufflevector <2 x i64> %v1, <2 x i64> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 115 %ins = insertelement <4 x i64> %concat, i64 %x, i32 0 116 store <4 x i64> %ins, ptr %a 117 ret void 118} 119 120define void @v4xi64_concat_vector_insert_idx1(ptr %a, ptr %b, i64 %x) { 121; RV32-LABEL: v4xi64_concat_vector_insert_idx1: 122; RV32: # %bb.0: 123; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 124; RV32-NEXT: vle64.v v8, (a0) 125; RV32-NEXT: vle64.v v10, (a1) 126; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma 127; RV32-NEXT: vslide1down.vx v9, v8, a2 128; RV32-NEXT: vslide1down.vx v9, v9, a3 129; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 130; RV32-NEXT: vslideup.vi v8, v9, 1 131; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma 132; RV32-NEXT: vslideup.vi v8, v10, 2 133; RV32-NEXT: vse64.v v8, (a0) 134; RV32-NEXT: ret 135; 136; RV64-LABEL: v4xi64_concat_vector_insert_idx1: 137; RV64: # %bb.0: 138; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 139; RV64-NEXT: vle64.v v8, (a0) 140; RV64-NEXT: vle64.v v10, (a1) 141; RV64-NEXT: vmv.s.x v9, a2 142; RV64-NEXT: vslideup.vi v8, v9, 1 143; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma 144; RV64-NEXT: vslideup.vi v8, v10, 2 145; RV64-NEXT: vse64.v v8, (a0) 146; RV64-NEXT: ret 147 %v1 = load <2 x i64>, ptr %a 148 %v2 = load <2 x i64>, ptr %b 149 %concat = shufflevector <2 x i64> %v1, <2 x i64> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 150 %ins = insertelement <4 x i64> %concat, i64 %x, i32 1 151 store <4 x i64> %ins, ptr %a 152 ret void 153} 154 155define void @v4xi64_concat_vector_insert_idx2(ptr %a, ptr %b, i64 %x) { 156; RV32-LABEL: v4xi64_concat_vector_insert_idx2: 157; RV32: # %bb.0: 158; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 159; RV32-NEXT: vle64.v v8, (a1) 160; RV32-NEXT: vle64.v v10, (a0) 161; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma 162; RV32-NEXT: vslide1down.vx v8, v8, a2 163; RV32-NEXT: vslide1down.vx v8, v8, a3 164; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma 165; RV32-NEXT: vslideup.vi v10, v8, 2 166; RV32-NEXT: vse64.v v10, (a0) 167; RV32-NEXT: ret 168; 169; RV64-LABEL: v4xi64_concat_vector_insert_idx2: 170; RV64: # %bb.0: 171; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 172; RV64-NEXT: vle64.v v8, (a1) 173; RV64-NEXT: vle64.v v10, (a0) 174; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma 175; RV64-NEXT: vmv.s.x v8, a2 176; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma 177; RV64-NEXT: vslideup.vi v10, v8, 2 178; RV64-NEXT: vse64.v v10, (a0) 179; RV64-NEXT: ret 180 %v1 = load <2 x i64>, ptr %a 181 %v2 = load <2 x i64>, ptr %b 182 %concat = shufflevector <2 x i64> %v1, <2 x i64> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 183 %ins = insertelement <4 x i64> %concat, i64 %x, i32 2 184 store <4 x i64> %ins, ptr %a 185 ret void 186} 187 188define void @v4xi64_concat_vector_insert_idx3(ptr %a, ptr %b, i64 %x) { 189; RV32-LABEL: v4xi64_concat_vector_insert_idx3: 190; RV32: # %bb.0: 191; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 192; RV32-NEXT: vle64.v v8, (a1) 193; RV32-NEXT: vle64.v v10, (a0) 194; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma 195; RV32-NEXT: vslide1down.vx v9, v8, a2 196; RV32-NEXT: vslide1down.vx v9, v9, a3 197; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 198; RV32-NEXT: vslideup.vi v8, v9, 1 199; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma 200; RV32-NEXT: vslideup.vi v10, v8, 2 201; RV32-NEXT: vse64.v v10, (a0) 202; RV32-NEXT: ret 203; 204; RV64-LABEL: v4xi64_concat_vector_insert_idx3: 205; RV64: # %bb.0: 206; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 207; RV64-NEXT: vle64.v v8, (a1) 208; RV64-NEXT: vle64.v v10, (a0) 209; RV64-NEXT: vmv.s.x v9, a2 210; RV64-NEXT: vslideup.vi v8, v9, 1 211; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma 212; RV64-NEXT: vslideup.vi v10, v8, 2 213; RV64-NEXT: vse64.v v10, (a0) 214; RV64-NEXT: ret 215 %v1 = load <2 x i64>, ptr %a 216 %v2 = load <2 x i64>, ptr %b 217 %concat = shufflevector <2 x i64> %v1, <2 x i64> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 218 %ins = insertelement <4 x i64> %concat, i64 %x, i32 3 219 store <4 x i64> %ins, ptr %a 220 ret void 221} 222