1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc < %s -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck -check-prefix=VLA %s 3; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck -check-prefix=VLA %s 4 5; RUN: llc < %s -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-max=128 -verify-machineinstrs | FileCheck -check-prefix=VLS %s 6; RUN: llc < %s -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -verify-machineinstrs | FileCheck -check-prefix=VLS %s 7 8define <8 x i32> @concat_2xv4i32(<4 x i32> %a, <4 x i32> %b) { 9; VLA-LABEL: concat_2xv4i32: 10; VLA: # %bb.0: 11; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma 12; VLA-NEXT: vmv1r.v v10, v9 13; VLA-NEXT: vslideup.vi v8, v10, 4 14; VLA-NEXT: ret 15; 16; VLS-LABEL: concat_2xv4i32: 17; VLS: # %bb.0: 18; VLS-NEXT: ret 19 %ab = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 20 ret <8 x i32> %ab 21} 22 23define <8 x i32> @concat_4xv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i32> %d) { 24; VLA-LABEL: concat_4xv2i32: 25; VLA: # %bb.0: 26; VLA-NEXT: vsetivli zero, 4, e32, m1, ta, ma 27; VLA-NEXT: vslideup.vi v10, v11, 2 28; VLA-NEXT: vslideup.vi v8, v9, 2 29; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma 30; VLA-NEXT: vslideup.vi v8, v10, 4 31; VLA-NEXT: ret 32; 33; VLS-LABEL: concat_4xv2i32: 34; VLS: # %bb.0: 35; VLS-NEXT: vsetivli zero, 4, e32, m1, ta, ma 36; VLS-NEXT: vmv1r.v v13, v10 37; VLS-NEXT: vmv1r.v v12, v8 38; VLS-NEXT: vslideup.vi v13, v11, 2 39; VLS-NEXT: vslideup.vi v12, v9, 2 40; VLS-NEXT: vmv2r.v v8, v12 41; VLS-NEXT: ret 42 %ab = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 43 %cd = shufflevector <2 x i32> %c, <2 x i32> %d, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 44 %abcd = shufflevector <4 x i32> %ab, <4 x i32> %cd, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 45 ret <8 x i32> %abcd 46} 47 48define <8 x i32> @concat_8xv1i32(<1 x i32> %a, <1 x i32> %b, <1 x i32> %c, <1 x i32> %d, <1 x i32> %e, <1 x i32> %f, <1 x i32> %g, <1 x i32> %h) { 49; VLA-LABEL: concat_8xv1i32: 50; VLA: # %bb.0: 51; VLA-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 52; VLA-NEXT: vslideup.vi v14, v15, 1 53; VLA-NEXT: vslideup.vi v12, v13, 1 54; VLA-NEXT: vslideup.vi v10, v11, 1 55; VLA-NEXT: vslideup.vi v8, v9, 1 56; VLA-NEXT: vsetivli zero, 4, e32, m1, ta, ma 57; VLA-NEXT: vslideup.vi v12, v14, 2 58; VLA-NEXT: vslideup.vi v8, v10, 2 59; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma 60; VLA-NEXT: vslideup.vi v8, v12, 4 61; VLA-NEXT: ret 62; 63; VLS-LABEL: concat_8xv1i32: 64; VLS: # %bb.0: 65; VLS-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 66; VLS-NEXT: vmv1r.v v17, v12 67; VLS-NEXT: vmv1r.v v16, v8 68; VLS-NEXT: vslideup.vi v14, v15, 1 69; VLS-NEXT: vslideup.vi v17, v13, 1 70; VLS-NEXT: vsetivli zero, 4, e32, m1, ta, ma 71; VLS-NEXT: vslideup.vi v17, v14, 2 72; VLS-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 73; VLS-NEXT: vslideup.vi v10, v11, 1 74; VLS-NEXT: vslideup.vi v16, v9, 1 75; VLS-NEXT: vsetivli zero, 4, e32, m1, ta, ma 76; VLS-NEXT: vslideup.vi v16, v10, 2 77; VLS-NEXT: vmv2r.v v8, v16 78; VLS-NEXT: ret 79 %ab = shufflevector <1 x i32> %a, <1 x i32> %b, <2 x i32> <i32 0, i32 1> 80 %cd = shufflevector <1 x i32> %c, <1 x i32> %d, <2 x i32> <i32 0, i32 1> 81 %abcd = shufflevector <2 x i32> %ab, <2 x i32> %cd, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 82 %ef = shufflevector <1 x i32> %e, <1 x i32> %f, <2 x i32> <i32 0, i32 1> 83 %gh = shufflevector <1 x i32> %g, <1 x i32> %h, <2 x i32> <i32 0, i32 1> 84 %efgh = shufflevector <2 x i32> %ef, <2 x i32> %gh, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 85 %abcdefgh = shufflevector <4 x i32> %abcd, <4 x i32> %efgh, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 86 ret <8 x i32> %abcdefgh 87} 88 89define <16 x i32> @concat_2xv8i32(<8 x i32> %a, <8 x i32> %b) { 90; VLA-LABEL: concat_2xv8i32: 91; VLA: # %bb.0: 92; VLA-NEXT: vsetivli zero, 16, e32, m4, ta, ma 93; VLA-NEXT: vmv2r.v v12, v10 94; VLA-NEXT: vslideup.vi v8, v12, 8 95; VLA-NEXT: ret 96; 97; VLS-LABEL: concat_2xv8i32: 98; VLS: # %bb.0: 99; VLS-NEXT: ret 100 %v = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 101 ret <16 x i32> %v 102} 103 104define <16 x i32> @concat_4xv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) { 105; VLA-LABEL: concat_4xv4i32: 106; VLA: # %bb.0: 107; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma 108; VLA-NEXT: vmv1r.v v14, v11 109; VLA-NEXT: vmv1r.v v12, v10 110; VLA-NEXT: vmv1r.v v10, v9 111; VLA-NEXT: vslideup.vi v12, v14, 4 112; VLA-NEXT: vslideup.vi v8, v10, 4 113; VLA-NEXT: vsetivli zero, 16, e32, m4, ta, ma 114; VLA-NEXT: vslideup.vi v8, v12, 8 115; VLA-NEXT: ret 116; 117; VLS-LABEL: concat_4xv4i32: 118; VLS: # %bb.0: 119; VLS-NEXT: ret 120 %ab = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 121 %cd = shufflevector <4 x i32> %c, <4 x i32> %d, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 122 %abcd = shufflevector <8 x i32> %ab, <8 x i32> %cd, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 123 ret <16 x i32> %abcd 124} 125 126define <16 x i32> @concat_8xv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i32> %d, <2 x i32> %e, <2 x i32> %f, <2 x i32> %g, <2 x i32> %h) { 127; VLA-LABEL: concat_8xv2i32: 128; VLA: # %bb.0: 129; VLA-NEXT: vsetivli zero, 4, e32, m1, ta, ma 130; VLA-NEXT: vslideup.vi v14, v15, 2 131; VLA-NEXT: vslideup.vi v12, v13, 2 132; VLA-NEXT: vslideup.vi v10, v11, 2 133; VLA-NEXT: vslideup.vi v8, v9, 2 134; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma 135; VLA-NEXT: vslideup.vi v12, v14, 4 136; VLA-NEXT: vslideup.vi v8, v10, 4 137; VLA-NEXT: vsetivli zero, 16, e32, m4, ta, ma 138; VLA-NEXT: vslideup.vi v8, v12, 8 139; VLA-NEXT: ret 140; 141; VLS-LABEL: concat_8xv2i32: 142; VLS: # %bb.0: 143; VLS-NEXT: vsetivli zero, 4, e32, m1, ta, ma 144; VLS-NEXT: vmv1r.v v19, v14 145; VLS-NEXT: vmv1r.v v18, v12 146; VLS-NEXT: vmv1r.v v17, v10 147; VLS-NEXT: vmv1r.v v16, v8 148; VLS-NEXT: vslideup.vi v19, v15, 2 149; VLS-NEXT: vslideup.vi v18, v13, 2 150; VLS-NEXT: vslideup.vi v17, v11, 2 151; VLS-NEXT: vslideup.vi v16, v9, 2 152; VLS-NEXT: vmv4r.v v8, v16 153; VLS-NEXT: ret 154 %ab = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 155 %cd = shufflevector <2 x i32> %c, <2 x i32> %d, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 156 %abcd = shufflevector <4 x i32> %ab, <4 x i32> %cd, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 157 %ef = shufflevector <2 x i32> %e, <2 x i32> %f, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 158 %gh = shufflevector <2 x i32> %g, <2 x i32> %h, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 159 %efgh = shufflevector <4 x i32> %ef, <4 x i32> %gh, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 160 %abcdefgh = shufflevector <8 x i32> %abcd, <8 x i32> %efgh, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 161 ret <16 x i32> %abcdefgh 162} 163 164define <32 x i32> @concat_2xv16i32(<16 x i32> %a, <16 x i32> %b) { 165; VLA-LABEL: concat_2xv16i32: 166; VLA: # %bb.0: 167; VLA-NEXT: vsetivli zero, 1, e8, m1, ta, ma 168; VLA-NEXT: vmv4r.v v16, v12 169; VLA-NEXT: li a0, 32 170; VLA-NEXT: vsetvli zero, a0, e32, m8, ta, ma 171; VLA-NEXT: vslideup.vi v8, v16, 16 172; VLA-NEXT: ret 173; 174; VLS-LABEL: concat_2xv16i32: 175; VLS: # %bb.0: 176; VLS-NEXT: ret 177 %ab = shufflevector <16 x i32> %a, <16 x i32> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 178 ret <32 x i32> %ab 179} 180 181define <32 x i32> @concat_4xv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) { 182; VLA-LABEL: concat_4xv8i32: 183; VLA: # %bb.0: 184; VLA-NEXT: vsetivli zero, 16, e32, m4, ta, ma 185; VLA-NEXT: vmv2r.v v20, v14 186; VLA-NEXT: vmv2r.v v16, v12 187; VLA-NEXT: vmv2r.v v12, v10 188; VLA-NEXT: li a0, 32 189; VLA-NEXT: vslideup.vi v16, v20, 8 190; VLA-NEXT: vslideup.vi v8, v12, 8 191; VLA-NEXT: vsetvli zero, a0, e32, m8, ta, ma 192; VLA-NEXT: vslideup.vi v8, v16, 16 193; VLA-NEXT: ret 194; 195; VLS-LABEL: concat_4xv8i32: 196; VLS: # %bb.0: 197; VLS-NEXT: ret 198 %ab = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 199 %cd = shufflevector <8 x i32> %c, <8 x i32> %d, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 200 %abcd = shufflevector <16 x i32> %ab, <16 x i32> %cd, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 201 ret <32 x i32> %abcd 202} 203 204define <32 x i32> @concat_8xv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, <4 x i32> %e, <4 x i32> %f, <4 x i32> %g, <4 x i32> %h) { 205; VLA-LABEL: concat_8xv4i32: 206; VLA: # %bb.0: 207; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma 208; VLA-NEXT: vmv1r.v v18, v15 209; VLA-NEXT: vmv1r.v v20, v14 210; VLA-NEXT: vmv1r.v v14, v13 211; VLA-NEXT: vmv1r.v v16, v12 212; VLA-NEXT: vmv1r.v v22, v11 213; VLA-NEXT: vmv1r.v v12, v10 214; VLA-NEXT: vmv1r.v v10, v9 215; VLA-NEXT: li a0, 32 216; VLA-NEXT: vslideup.vi v20, v18, 4 217; VLA-NEXT: vslideup.vi v16, v14, 4 218; VLA-NEXT: vslideup.vi v12, v22, 4 219; VLA-NEXT: vslideup.vi v8, v10, 4 220; VLA-NEXT: vsetivli zero, 16, e32, m4, ta, ma 221; VLA-NEXT: vslideup.vi v16, v20, 8 222; VLA-NEXT: vslideup.vi v8, v12, 8 223; VLA-NEXT: vsetvli zero, a0, e32, m8, ta, ma 224; VLA-NEXT: vslideup.vi v8, v16, 16 225; VLA-NEXT: ret 226; 227; VLS-LABEL: concat_8xv4i32: 228; VLS: # %bb.0: 229; VLS-NEXT: ret 230 %ab = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 231 %cd = shufflevector <4 x i32> %c, <4 x i32> %d, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 232 %abcd = shufflevector <8 x i32> %ab, <8 x i32> %cd, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 233 %ef = shufflevector <4 x i32> %e, <4 x i32> %f, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 234 %gh = shufflevector <4 x i32> %g, <4 x i32> %h, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 235 %efgh = shufflevector <8 x i32> %ef, <8 x i32> %gh, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 236 %abcdefgh = shufflevector <16 x i32> %abcd, <16 x i32> %efgh, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 237 ret <32 x i32> %abcdefgh 238} 239