1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+bf16 < %s | FileCheck %s --check-prefixes=CHECK 3 4define <vscale x 2 x i64> @insert_v2i64_nxv2i64(<vscale x 2 x i64> %vec, <2 x i64> %subvec) nounwind { 5; CHECK-LABEL: insert_v2i64_nxv2i64: 6; CHECK: // %bb.0: 7; CHECK-NEXT: ptrue p0.d, vl2 8; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 9; CHECK-NEXT: mov z0.d, p0/m, z1.d 10; CHECK-NEXT: ret 11 %retval = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> %vec, <2 x i64> %subvec, i64 0) 12 ret <vscale x 2 x i64> %retval 13} 14 15define <vscale x 2 x i64> @insert_v2i64_nxv2i64_idx2(<vscale x 2 x i64> %vec, <2 x i64> %subvec) nounwind { 16; CHECK-LABEL: insert_v2i64_nxv2i64_idx2: 17; CHECK: // %bb.0: 18; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 19; CHECK-NEXT: addvl sp, sp, #-1 20; CHECK-NEXT: cntd x8 21; CHECK-NEXT: mov w9, #2 // =0x2 22; CHECK-NEXT: ptrue p0.d 23; CHECK-NEXT: sub x8, x8, #2 24; CHECK-NEXT: cmp x8, #2 25; CHECK-NEXT: st1d { z0.d }, p0, [sp] 26; CHECK-NEXT: csel x8, x8, x9, lo 27; CHECK-NEXT: mov x9, sp 28; CHECK-NEXT: lsl x8, x8, #3 29; CHECK-NEXT: str q1, [x9, x8] 30; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] 31; CHECK-NEXT: addvl sp, sp, #1 32; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 33; CHECK-NEXT: ret 34 %retval = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> %vec, <2 x i64> %subvec, i64 2) 35 ret <vscale x 2 x i64> %retval 36} 37 38define <vscale x 4 x i32> @insert_v4i32_nxv4i32(<vscale x 4 x i32> %vec, <4 x i32> %subvec) nounwind { 39; CHECK-LABEL: insert_v4i32_nxv4i32: 40; CHECK: // %bb.0: 41; CHECK-NEXT: ptrue p0.s, vl4 42; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 43; CHECK-NEXT: mov z0.s, p0/m, z1.s 44; CHECK-NEXT: ret 45 %retval = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> %vec, <4 x i32> %subvec, i64 0) 46 ret <vscale x 4 x i32> %retval 47} 48 49define <vscale x 4 x i32> @insert_v4i32_nxv4i32_idx4(<vscale x 4 x i32> %vec, <4 x i32> %subvec) nounwind { 50; CHECK-LABEL: insert_v4i32_nxv4i32_idx4: 51; CHECK: // %bb.0: 52; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 53; CHECK-NEXT: addvl sp, sp, #-1 54; CHECK-NEXT: cntw x8 55; CHECK-NEXT: mov w9, #4 // =0x4 56; CHECK-NEXT: ptrue p0.s 57; CHECK-NEXT: sub x8, x8, #4 58; CHECK-NEXT: cmp x8, #4 59; CHECK-NEXT: st1w { z0.s }, p0, [sp] 60; CHECK-NEXT: csel x8, x8, x9, lo 61; CHECK-NEXT: mov x9, sp 62; CHECK-NEXT: lsl x8, x8, #2 63; CHECK-NEXT: str q1, [x9, x8] 64; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] 65; CHECK-NEXT: addvl sp, sp, #1 66; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 67; CHECK-NEXT: ret 68 %retval = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> %vec, <4 x i32> %subvec, i64 4) 69 ret <vscale x 4 x i32> %retval 70} 71 72define <vscale x 8 x i16> @insert_v8i16_nxv8i16(<vscale x 8 x i16> %vec, <8 x i16> %subvec) nounwind { 73; CHECK-LABEL: insert_v8i16_nxv8i16: 74; CHECK: // %bb.0: 75; CHECK-NEXT: ptrue p0.h, vl8 76; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 77; CHECK-NEXT: mov z0.h, p0/m, z1.h 78; CHECK-NEXT: ret 79 %retval = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> %vec, <8 x i16> %subvec, i64 0) 80 ret <vscale x 8 x i16> %retval 81} 82 83define <vscale x 8 x i16> @insert_v8i16_nxv8i16_idx8(<vscale x 8 x i16> %vec, <8 x i16> %subvec) nounwind { 84; CHECK-LABEL: insert_v8i16_nxv8i16_idx8: 85; CHECK: // %bb.0: 86; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 87; CHECK-NEXT: addvl sp, sp, #-1 88; CHECK-NEXT: cnth x8 89; CHECK-NEXT: mov w9, #8 // =0x8 90; CHECK-NEXT: ptrue p0.h 91; CHECK-NEXT: sub x8, x8, #8 92; CHECK-NEXT: cmp x8, #8 93; CHECK-NEXT: st1h { z0.h }, p0, [sp] 94; CHECK-NEXT: csel x8, x8, x9, lo 95; CHECK-NEXT: mov x9, sp 96; CHECK-NEXT: lsl x8, x8, #1 97; CHECK-NEXT: str q1, [x9, x8] 98; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] 99; CHECK-NEXT: addvl sp, sp, #1 100; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 101; CHECK-NEXT: ret 102 %retval = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> %vec, <8 x i16> %subvec, i64 8) 103 ret <vscale x 8 x i16> %retval 104} 105 106define <vscale x 16 x i8> @insert_v16i8_nxv16i8(<vscale x 16 x i8> %vec, <16 x i8> %subvec) nounwind { 107; CHECK-LABEL: insert_v16i8_nxv16i8: 108; CHECK: // %bb.0: 109; CHECK-NEXT: ptrue p0.b, vl16 110; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 111; CHECK-NEXT: mov z0.b, p0/m, z1.b 112; CHECK-NEXT: ret 113 %retval = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> %vec, <16 x i8> %subvec, i64 0) 114 ret <vscale x 16 x i8> %retval 115} 116 117define <vscale x 16 x i8> @insert_v16i8_nxv16i8_idx16(<vscale x 16 x i8> %vec, <16 x i8> %subvec) nounwind { 118; CHECK-LABEL: insert_v16i8_nxv16i8_idx16: 119; CHECK: // %bb.0: 120; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 121; CHECK-NEXT: addvl sp, sp, #-1 122; CHECK-NEXT: rdvl x8, #1 123; CHECK-NEXT: ptrue p0.b 124; CHECK-NEXT: mov w9, #16 // =0x10 125; CHECK-NEXT: sub x8, x8, #16 126; CHECK-NEXT: cmp x8, #16 127; CHECK-NEXT: st1b { z0.b }, p0, [sp] 128; CHECK-NEXT: csel x8, x8, x9, lo 129; CHECK-NEXT: mov x9, sp 130; CHECK-NEXT: str q1, [x9, x8] 131; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp] 132; CHECK-NEXT: addvl sp, sp, #1 133; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 134; CHECK-NEXT: ret 135 %retval = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> %vec, <16 x i8> %subvec, i64 16) 136 ret <vscale x 16 x i8> %retval 137} 138 139 140; Insert subvectors into illegal vectors 141 142define void @insert_nxv8i64_nxv16i64(<vscale x 8 x i64> %sv0, <vscale x 8 x i64> %sv1, ptr %out) { 143; CHECK-LABEL: insert_nxv8i64_nxv16i64: 144; CHECK: // %bb.0: 145; CHECK-NEXT: ptrue p0.d 146; CHECK-NEXT: st1d { z7.d }, p0, [x0, #7, mul vl] 147; CHECK-NEXT: st1d { z6.d }, p0, [x0, #6, mul vl] 148; CHECK-NEXT: st1d { z5.d }, p0, [x0, #5, mul vl] 149; CHECK-NEXT: st1d { z4.d }, p0, [x0, #4, mul vl] 150; CHECK-NEXT: st1d { z3.d }, p0, [x0, #3, mul vl] 151; CHECK-NEXT: st1d { z2.d }, p0, [x0, #2, mul vl] 152; CHECK-NEXT: st1d { z1.d }, p0, [x0, #1, mul vl] 153; CHECK-NEXT: st1d { z0.d }, p0, [x0] 154; CHECK-NEXT: ret 155 %v0 = call <vscale x 16 x i64> @llvm.vector.insert.nxv8i64.nxv16i64(<vscale x 16 x i64> undef, <vscale x 8 x i64> %sv0, i64 0) 156 %v = call <vscale x 16 x i64> @llvm.vector.insert.nxv8i64.nxv16i64(<vscale x 16 x i64> %v0, <vscale x 8 x i64> %sv1, i64 8) 157 store <vscale x 16 x i64> %v, ptr %out 158 ret void 159} 160 161define void @insert_nxv8i64_nxv16i64_lo(<vscale x 8 x i64> %sv0, ptr %out) { 162; CHECK-LABEL: insert_nxv8i64_nxv16i64_lo: 163; CHECK: // %bb.0: 164; CHECK-NEXT: ptrue p0.d 165; CHECK-NEXT: st1d { z3.d }, p0, [x0, #3, mul vl] 166; CHECK-NEXT: st1d { z2.d }, p0, [x0, #2, mul vl] 167; CHECK-NEXT: st1d { z1.d }, p0, [x0, #1, mul vl] 168; CHECK-NEXT: st1d { z0.d }, p0, [x0] 169; CHECK-NEXT: ret 170 %v = call <vscale x 16 x i64> @llvm.vector.insert.nxv8i64.nxv16i64(<vscale x 16 x i64> undef, <vscale x 8 x i64> %sv0, i64 0) 171 store <vscale x 16 x i64> %v, ptr %out 172 ret void 173} 174 175define void @insert_nxv8i64_nxv16i64_hi(<vscale x 8 x i64> %sv0, ptr %out) { 176; CHECK-LABEL: insert_nxv8i64_nxv16i64_hi: 177; CHECK: // %bb.0: 178; CHECK-NEXT: ptrue p0.d 179; CHECK-NEXT: st1d { z3.d }, p0, [x0, #7, mul vl] 180; CHECK-NEXT: st1d { z2.d }, p0, [x0, #6, mul vl] 181; CHECK-NEXT: st1d { z1.d }, p0, [x0, #5, mul vl] 182; CHECK-NEXT: st1d { z0.d }, p0, [x0, #4, mul vl] 183; CHECK-NEXT: ret 184 %v = call <vscale x 16 x i64> @llvm.vector.insert.nxv8i64.nxv16i64(<vscale x 16 x i64> undef, <vscale x 8 x i64> %sv0, i64 8) 185 store <vscale x 16 x i64> %v, ptr %out 186 ret void 187} 188 189define void @insert_v2i64_nxv16i64(<2 x i64> %sv0, <2 x i64> %sv1, ptr %out) uwtable { 190; CHECK-LABEL: insert_v2i64_nxv16i64: 191; CHECK: // %bb.0: 192; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 193; CHECK-NEXT: .cfi_def_cfa_offset 16 194; CHECK-NEXT: .cfi_offset w29, -16 195; CHECK-NEXT: addvl sp, sp, #-4 196; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG 197; CHECK-NEXT: ptrue p0.d 198; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 199; CHECK-NEXT: st1d { z0.d }, p0, [sp] 200; CHECK-NEXT: str q1, [sp, #32] 201; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #3, mul vl] 202; CHECK-NEXT: ld1d { z1.d }, p0/z, [sp, #2, mul vl] 203; CHECK-NEXT: ld1d { z2.d }, p0/z, [sp, #1, mul vl] 204; CHECK-NEXT: ld1d { z3.d }, p0/z, [sp] 205; CHECK-NEXT: st1d { z0.d }, p0, [x0, #3, mul vl] 206; CHECK-NEXT: st1d { z1.d }, p0, [x0, #2, mul vl] 207; CHECK-NEXT: st1d { z2.d }, p0, [x0, #1, mul vl] 208; CHECK-NEXT: st1d { z3.d }, p0, [x0] 209; CHECK-NEXT: addvl sp, sp, #4 210; CHECK-NEXT: .cfi_def_cfa wsp, 16 211; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 212; CHECK-NEXT: .cfi_def_cfa_offset 0 213; CHECK-NEXT: .cfi_restore w29 214; CHECK-NEXT: ret 215 %v0 = call <vscale x 16 x i64> @llvm.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64> undef, <2 x i64> %sv0, i64 0) 216 %v = call <vscale x 16 x i64> @llvm.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64> %v0, <2 x i64> %sv1, i64 4) 217 store <vscale x 16 x i64> %v, ptr %out 218 ret void 219} 220 221define void @insert_v2i64_nxv16i64_lo0(ptr %psv, ptr %out) { 222; CHECK-LABEL: insert_v2i64_nxv16i64_lo0: 223; CHECK: // %bb.0: 224; CHECK-NEXT: ptrue p0.d 225; CHECK-NEXT: ldr q0, [x0] 226; CHECK-NEXT: st1d { z0.d }, p0, [x1] 227; CHECK-NEXT: ret 228 %sv = load <2 x i64>, ptr %psv 229 %v = call <vscale x 16 x i64> @llvm.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64> undef, <2 x i64> %sv, i64 0) 230 store <vscale x 16 x i64> %v, ptr %out 231 ret void 232} 233 234define void @insert_v2i64_nxv16i64_lo2(ptr %psv, ptr %out) uwtable { 235; CHECK-LABEL: insert_v2i64_nxv16i64_lo2: 236; CHECK: // %bb.0: 237; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 238; CHECK-NEXT: .cfi_def_cfa_offset 16 239; CHECK-NEXT: .cfi_offset w29, -16 240; CHECK-NEXT: addvl sp, sp, #-2 241; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG 242; CHECK-NEXT: ldr q0, [x0] 243; CHECK-NEXT: ptrue p0.d 244; CHECK-NEXT: str q0, [sp, #16] 245; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #1, mul vl] 246; CHECK-NEXT: ld1d { z1.d }, p0/z, [sp] 247; CHECK-NEXT: st1d { z0.d }, p0, [x1, #1, mul vl] 248; CHECK-NEXT: st1d { z1.d }, p0, [x1] 249; CHECK-NEXT: addvl sp, sp, #2 250; CHECK-NEXT: .cfi_def_cfa wsp, 16 251; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 252; CHECK-NEXT: .cfi_def_cfa_offset 0 253; CHECK-NEXT: .cfi_restore w29 254; CHECK-NEXT: ret 255 %sv = load <2 x i64>, ptr %psv 256 %v = call <vscale x 16 x i64> @llvm.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64> undef, <2 x i64> %sv, i64 2) 257 store <vscale x 16 x i64> %v, ptr %out 258 ret void 259} 260 261 262; Insert subvectors that need widening 263 264define <vscale x 4 x i32> @insert_nxv1i32_nxv4i32_undef() nounwind { 265; CHECK-LABEL: insert_nxv1i32_nxv4i32_undef: 266; CHECK: // %bb.0: // %entry 267; CHECK-NEXT: mov z0.s, #1 // =0x1 268; CHECK-NEXT: ret 269entry: 270 %retval = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.nxv1i32(<vscale x 4 x i32> undef, <vscale x 1 x i32> splat(i32 1), i64 0) 271 ret <vscale x 4 x i32> %retval 272} 273 274define <vscale x 6 x i16> @insert_nxv1i16_nxv6i16_undef() nounwind { 275; CHECK-LABEL: insert_nxv1i16_nxv6i16_undef: 276; CHECK: // %bb.0: // %entry 277; CHECK-NEXT: mov z0.h, #1 // =0x1 278; CHECK-NEXT: ret 279entry: 280 %retval = call <vscale x 6 x i16> @llvm.vector.insert.nxv6i16.nxv1i16(<vscale x 6 x i16> undef, <vscale x 1 x i16> splat(i16 1), i64 0) 281 ret <vscale x 6 x i16> %retval 282} 283 284define <vscale x 4 x float> @insert_nxv1f32_nxv4f32_undef(<vscale x 1 x float> %subvec) nounwind { 285; CHECK-LABEL: insert_nxv1f32_nxv4f32_undef: 286; CHECK: // %bb.0: // %entry 287; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s 288; CHECK-NEXT: ret 289entry: 290 %retval = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv1f32(<vscale x 4 x float> undef, <vscale x 1 x float> %subvec, i64 0) 291 ret <vscale x 4 x float> %retval 292} 293 294; This tests promotion of the input operand to INSERT_SUBVECTOR. 295define <vscale x 8 x i16> @insert_nxv8i16_nxv2i16(<vscale x 8 x i16> %vec, <vscale x 2 x i16> %in) nounwind { 296; CHECK-LABEL: insert_nxv8i16_nxv2i16: 297; CHECK: // %bb.0: 298; CHECK-NEXT: uunpklo z2.s, z0.h 299; CHECK-NEXT: uunpkhi z0.s, z0.h 300; CHECK-NEXT: uunpklo z2.d, z2.s 301; CHECK-NEXT: uzp1 z1.s, z2.s, z1.s 302; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h 303; CHECK-NEXT: ret 304 %r = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.nxv2i16(<vscale x 8 x i16> %vec, <vscale x 2 x i16> %in, i64 2) 305 ret <vscale x 8 x i16> %r 306} 307 308define <vscale x 4 x half> @insert_nxv4f16_nxv2f16_0(<vscale x 4 x half> %sv0, <vscale x 2 x half> %sv1) nounwind { 309; CHECK-LABEL: insert_nxv4f16_nxv2f16_0: 310; CHECK: // %bb.0: 311; CHECK-NEXT: uunpkhi z0.d, z0.s 312; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s 313; CHECK-NEXT: ret 314 %v0 = call <vscale x 4 x half> @llvm.vector.insert.nxv4f16.nxv2f16(<vscale x 4 x half> %sv0, <vscale x 2 x half> %sv1, i64 0) 315 ret <vscale x 4 x half> %v0 316} 317 318define <vscale x 4 x half> @insert_nxv4f16_nxv2f16_2(<vscale x 4 x half> %sv0, <vscale x 2 x half> %sv1) nounwind { 319; CHECK-LABEL: insert_nxv4f16_nxv2f16_2: 320; CHECK: // %bb.0: 321; CHECK-NEXT: uunpklo z0.d, z0.s 322; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s 323; CHECK-NEXT: ret 324 %v0 = call <vscale x 4 x half> @llvm.vector.insert.nxv4f16.nxv2f16(<vscale x 4 x half> %sv0, <vscale x 2 x half> %sv1, i64 2) 325 ret <vscale x 4 x half> %v0 326} 327 328; Test that the index is scaled by vscale if the subvector is scalable. 329define <vscale x 8 x half> @insert_nxv8f16_nxv2f16(<vscale x 8 x half> %vec, <vscale x 2 x half> %in) nounwind { 330; CHECK-LABEL: insert_nxv8f16_nxv2f16: 331; CHECK: // %bb.0: 332; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 333; CHECK-NEXT: addvl sp, sp, #-1 334; CHECK-NEXT: ptrue p0.h 335; CHECK-NEXT: ptrue p1.d 336; CHECK-NEXT: st1h { z0.h }, p0, [sp] 337; CHECK-NEXT: st1h { z1.d }, p1, [sp, #1, mul vl] 338; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] 339; CHECK-NEXT: addvl sp, sp, #1 340; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 341; CHECK-NEXT: ret 342 %r = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.nxv2f16(<vscale x 8 x half> %vec, <vscale x 2 x half> %in, i64 2) 343 ret <vscale x 8 x half> %r 344} 345 346define <vscale x 8 x half> @insert_nxv8f16_nxv4f16_0(<vscale x 8 x half> %sv0, <vscale x 4 x half> %sv1) nounwind { 347; CHECK-LABEL: insert_nxv8f16_nxv4f16_0: 348; CHECK: // %bb.0: 349; CHECK-NEXT: uunpkhi z0.s, z0.h 350; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h 351; CHECK-NEXT: ret 352 %v0 = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.nxv4f16(<vscale x 8 x half> %sv0, <vscale x 4 x half> %sv1, i64 0) 353 ret <vscale x 8 x half> %v0 354} 355 356define <vscale x 8 x half> @insert_nxv8f16_nxv4f16_4(<vscale x 8 x half> %sv0, <vscale x 4 x half> %sv1) nounwind { 357; CHECK-LABEL: insert_nxv8f16_nxv4f16_4: 358; CHECK: // %bb.0: 359; CHECK-NEXT: uunpklo z0.s, z0.h 360; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h 361; CHECK-NEXT: ret 362 %v0 = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.nxv4f16(<vscale x 8 x half> %sv0, <vscale x 4 x half> %sv1, i64 4) 363 ret <vscale x 8 x half> %v0 364} 365 366; Fixed length clamping 367 368define <vscale x 2 x i64> @insert_fixed_v2i64_nxv2i64(<vscale x 2 x i64> %vec, <2 x i64> %subvec) nounwind #0 { 369; CHECK-LABEL: insert_fixed_v2i64_nxv2i64: 370; CHECK: // %bb.0: 371; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 372; CHECK-NEXT: addvl sp, sp, #-1 373; CHECK-NEXT: ptrue p0.d 374; CHECK-NEXT: st1d { z0.d }, p0, [sp] 375; CHECK-NEXT: str q1, [sp, #16] 376; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] 377; CHECK-NEXT: addvl sp, sp, #1 378; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 379; CHECK-NEXT: ret 380 %retval = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> %vec, <2 x i64> %subvec, i64 2) 381 ret <vscale x 2 x i64> %retval 382} 383 384define <vscale x 2 x i64> @insert_fixed_v4i64_nxv2i64(<vscale x 2 x i64> %vec, ptr %ptr) nounwind #0 { 385; CHECK-LABEL: insert_fixed_v4i64_nxv2i64: 386; CHECK: // %bb.0: 387; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 388; CHECK-NEXT: addvl sp, sp, #-1 389; CHECK-NEXT: ptrue p0.d 390; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0] 391; CHECK-NEXT: st1d { z0.d }, p0, [sp] 392; CHECK-NEXT: st1d { z1.d }, p0, [sp] 393; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] 394; CHECK-NEXT: addvl sp, sp, #1 395; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 396; CHECK-NEXT: ret 397 %subvec = load <4 x i64>, ptr %ptr 398 %retval = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> %vec, <4 x i64> %subvec, i64 4) 399 ret <vscale x 2 x i64> %retval 400} 401 402;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 403;; Upacked types that need result widening 404;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 405 406define <vscale x 3 x i32> @insert_nxv3i32_nxv2i32(<vscale x 2 x i32> %sv0) { 407; CHECK-LABEL: insert_nxv3i32_nxv2i32: 408; CHECK: // %bb.0: 409; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s 410; CHECK-NEXT: ret 411 %v0 = call <vscale x 3 x i32> @llvm.vector.insert.nxv3i32.nxv2i32(<vscale x 3 x i32> undef, <vscale x 2 x i32> %sv0, i64 0) 412 ret <vscale x 3 x i32> %v0 413} 414 415;; Check that the Subvector is not widen so it does not crash. 416define <vscale x 3 x i32> @insert_nxv3i32_nxv2i32_2(<vscale x 3 x i32> %sv0, <vscale x 2 x i32> %sv1) { 417; CHECK-LABEL: insert_nxv3i32_nxv2i32_2: 418; CHECK: // %bb.0: 419; CHECK-NEXT: uunpkhi z0.d, z0.s 420; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s 421; CHECK-NEXT: ret 422 %v0 = call <vscale x 3 x i32> @llvm.vector.insert.nxv3i32.nxv2i32(<vscale x 3 x i32> %sv0, <vscale x 2 x i32> %sv1, i64 0) 423 ret <vscale x 3 x i32> %v0 424} 425 426define <vscale x 3 x float> @insert_nxv3f32_nxv2f32(<vscale x 2 x float> %sv0) nounwind { 427; CHECK-LABEL: insert_nxv3f32_nxv2f32: 428; CHECK: // %bb.0: 429; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s 430; CHECK-NEXT: ret 431 %v0 = call <vscale x 3 x float> @llvm.vector.insert.nxv3f32.nxv2f32(<vscale x 3 x float> undef, <vscale x 2 x float> %sv0, i64 0) 432 ret <vscale x 3 x float> %v0 433} 434 435define <vscale x 4 x float> @insert_nxv4f32_nxv2f32_0(<vscale x 4 x float> %sv0, <vscale x 2 x float> %sv1) nounwind { 436; CHECK-LABEL: insert_nxv4f32_nxv2f32_0: 437; CHECK: // %bb.0: 438; CHECK-NEXT: uunpkhi z0.d, z0.s 439; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s 440; CHECK-NEXT: ret 441 %v0 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv2f32(<vscale x 4 x float> %sv0, <vscale x 2 x float> %sv1, i64 0) 442 ret <vscale x 4 x float> %v0 443} 444 445define <vscale x 4 x float> @insert_nxv4f32_nxv2f32_2(<vscale x 4 x float> %sv0, <vscale x 2 x float> %sv1) nounwind { 446; CHECK-LABEL: insert_nxv4f32_nxv2f32_2: 447; CHECK: // %bb.0: 448; CHECK-NEXT: uunpklo z0.d, z0.s 449; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s 450; CHECK-NEXT: ret 451 %v0 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv2f32(<vscale x 4 x float> %sv0, <vscale x 2 x float> %sv1, i64 2) 452 ret <vscale x 4 x float> %v0 453} 454 455define <vscale x 6 x i32> @insert_nxv6i32_nxv2i32(<vscale x 2 x i32> %sv0, <vscale x 2 x i32> %sv1) nounwind { 456; CHECK-LABEL: insert_nxv6i32_nxv2i32: 457; CHECK: // %bb.0: 458; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 459; CHECK-NEXT: addvl sp, sp, #-2 460; CHECK-NEXT: ptrue p0.s 461; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s 462; CHECK-NEXT: ld1w { z1.s }, p0/z, [sp, #1, mul vl] 463; CHECK-NEXT: st1w { z0.s }, p0, [sp] 464; CHECK-NEXT: addvl sp, sp, #2 465; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 466; CHECK-NEXT: ret 467 %v0 = call <vscale x 6 x i32> @llvm.vector.insert.nxv6i32.nxv2i32(<vscale x 6 x i32> undef, <vscale x 2 x i32> %sv0, i64 0) 468 %v1 = call <vscale x 6 x i32> @llvm.vector.insert.nxv6i32.nxv2i32(<vscale x 6 x i32> %v0, <vscale x 2 x i32> %sv1, i64 2) 469 ret <vscale x 6 x i32> %v1 470} 471 472;; This only works because the input vector is undef and index is zero 473define <vscale x 6 x i32> @insert_nxv6i32_nxv3i32(<vscale x 3 x i32> %sv0) { 474; CHECK-LABEL: insert_nxv6i32_nxv3i32: 475; CHECK: // %bb.0: 476; CHECK-NEXT: ret 477 %v0 = call <vscale x 6 x i32> @llvm.vector.insert.nxv6i32.nxv3i32(<vscale x 6 x i32> undef, <vscale x 3 x i32> %sv0, i64 0) 478 ret <vscale x 6 x i32> %v0 479} 480 481define <vscale x 12 x i32> @insert_nxv12i32_nxv4i32(<vscale x 4 x i32> %sv0, <vscale x 4 x i32> %sv1, <vscale x 4 x i32> %sv2) { 482; CHECK-LABEL: insert_nxv12i32_nxv4i32: 483; CHECK: // %bb.0: 484; CHECK-NEXT: ret 485 %v0 = call <vscale x 12 x i32> @llvm.vector.insert.nxv4i32.nxv12i32(<vscale x 12 x i32> undef, <vscale x 4 x i32> %sv0, i64 0) 486 %v1 = call <vscale x 12 x i32> @llvm.vector.insert.nxv4i32.nxv12i32(<vscale x 12 x i32> %v0, <vscale x 4 x i32> %sv1, i64 4) 487 %v2 = call <vscale x 12 x i32> @llvm.vector.insert.nxv4i32.nxv12i32(<vscale x 12 x i32> %v1, <vscale x 4 x i32> %sv2, i64 8) 488 ret <vscale x 12 x i32> %v2 489} 490 491define <vscale x 2 x bfloat> @insert_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %sv0, <vscale x 2 x bfloat> %sv1) nounwind { 492; CHECK-LABEL: insert_nxv2bf16_nxv2bf16: 493; CHECK: // %bb.0: 494; CHECK-NEXT: mov z0.d, z1.d 495; CHECK-NEXT: ret 496 %v0 = call <vscale x 2 x bfloat> @llvm.vector.insert.nxv2bf16.nxv2bf16(<vscale x 2 x bfloat> %sv0, <vscale x 2 x bfloat> %sv1, i64 0) 497 ret <vscale x 2 x bfloat> %v0 498} 499 500define <vscale x 4 x bfloat> @insert_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %sv0, <vscale x 4 x bfloat> %sv1) nounwind { 501; CHECK-LABEL: insert_nxv4bf16_nxv4bf16: 502; CHECK: // %bb.0: 503; CHECK-NEXT: mov z0.d, z1.d 504; CHECK-NEXT: ret 505 %v0 = call <vscale x 4 x bfloat> @llvm.vector.insert.nxv4bf16.nxv4bf16(<vscale x 4 x bfloat> %sv0, <vscale x 4 x bfloat> %sv1, i64 0) 506 ret <vscale x 4 x bfloat> %v0 507} 508 509define <vscale x 4 x bfloat> @insert_nxv4bf16_v4bf16(<vscale x 4 x bfloat> %sv0, <4 x bfloat> %v1) nounwind { 510; CHECK-LABEL: insert_nxv4bf16_v4bf16: 511; CHECK: // %bb.0: 512; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 513; CHECK-NEXT: addvl sp, sp, #-1 514; CHECK-NEXT: ptrue p0.s 515; CHECK-NEXT: addpl x8, sp, #4 516; CHECK-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl] 517; CHECK-NEXT: str d1, [x8] 518; CHECK-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl] 519; CHECK-NEXT: addvl sp, sp, #1 520; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 521; CHECK-NEXT: ret 522 %v0 = call <vscale x 4 x bfloat> @llvm.vector.insert.nxv4bf16.v4bf16(<vscale x 4 x bfloat> %sv0, <4 x bfloat> %v1, i64 0) 523 ret <vscale x 4 x bfloat> %v0 524} 525 526define <vscale x 8 x bfloat> @insert_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %sv0, <vscale x 8 x bfloat> %sv1) nounwind { 527; CHECK-LABEL: insert_nxv8bf16_nxv8bf16: 528; CHECK: // %bb.0: 529; CHECK-NEXT: mov z0.d, z1.d 530; CHECK-NEXT: ret 531 %v0 = call <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.nxv8bf16(<vscale x 8 x bfloat> %sv0, <vscale x 8 x bfloat> %sv1, i64 0) 532 ret <vscale x 8 x bfloat> %v0 533} 534 535define <vscale x 8 x bfloat> @insert_nxv8bf16_v8bf16(<vscale x 8 x bfloat> %sv0, <8 x bfloat> %v1) nounwind { 536; CHECK-LABEL: insert_nxv8bf16_v8bf16: 537; CHECK: // %bb.0: 538; CHECK-NEXT: ptrue p0.h, vl8 539; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 540; CHECK-NEXT: mov z0.h, p0/m, z1.h 541; CHECK-NEXT: ret 542 %v0 = call <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v8bf16(<vscale x 8 x bfloat> %sv0, <8 x bfloat> %v1, i64 0) 543 ret <vscale x 8 x bfloat> %v0 544} 545 546define <vscale x 8 x bfloat> @insert_nxv8bf16_nxv4bf16_0(<vscale x 8 x bfloat> %sv0, <vscale x 4 x bfloat> %sv1) nounwind { 547; CHECK-LABEL: insert_nxv8bf16_nxv4bf16_0: 548; CHECK: // %bb.0: 549; CHECK-NEXT: uunpkhi z0.s, z0.h 550; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h 551; CHECK-NEXT: ret 552 %v0 = call <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.nxv4bf16(<vscale x 8 x bfloat> %sv0, <vscale x 4 x bfloat> %sv1, i64 0) 553 ret <vscale x 8 x bfloat> %v0 554} 555 556define <vscale x 8 x bfloat> @insert_nxv8bf16_nxv4bf16_4(<vscale x 8 x bfloat> %sv0, <vscale x 4 x bfloat> %sv1) nounwind { 557; CHECK-LABEL: insert_nxv8bf16_nxv4bf16_4: 558; CHECK: // %bb.0: 559; CHECK-NEXT: uunpklo z0.s, z0.h 560; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h 561; CHECK-NEXT: ret 562 %v0 = call <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.nxv4bf16(<vscale x 8 x bfloat> %sv0, <vscale x 4 x bfloat> %sv1, i64 4) 563 ret <vscale x 8 x bfloat> %v0 564} 565 566define <vscale x 4 x bfloat> @insert_nxv4bf16_nxv2bf16_0(<vscale x 4 x bfloat> %sv0, <vscale x 2 x bfloat> %sv1) nounwind { 567; CHECK-LABEL: insert_nxv4bf16_nxv2bf16_0: 568; CHECK: // %bb.0: 569; CHECK-NEXT: uunpkhi z0.d, z0.s 570; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s 571; CHECK-NEXT: ret 572 %v0 = call <vscale x 4 x bfloat> @llvm.vector.insert.nxv4bf16.nxv2bf16(<vscale x 4 x bfloat> %sv0, <vscale x 2 x bfloat> %sv1, i64 0) 573 ret <vscale x 4 x bfloat> %v0 574} 575 576define <vscale x 4 x bfloat> @insert_nxv4bf16_nxv2bf16_2(<vscale x 4 x bfloat> %sv0, <vscale x 2 x bfloat> %sv1) nounwind { 577; CHECK-LABEL: insert_nxv4bf16_nxv2bf16_2: 578; CHECK: // %bb.0: 579; CHECK-NEXT: uunpklo z0.d, z0.s 580; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s 581; CHECK-NEXT: ret 582 %v0 = call <vscale x 4 x bfloat> @llvm.vector.insert.nxv4bf16.nxv2bf16(<vscale x 4 x bfloat> %sv0, <vscale x 2 x bfloat> %sv1, i64 2) 583 ret <vscale x 4 x bfloat> %v0 584} 585 586; Test predicate inserts of half size. 587define <vscale x 16 x i1> @insert_nxv16i1_nxv8i1_0(<vscale x 16 x i1> %vec, <vscale x 8 x i1> %sv) { 588; CHECK-LABEL: insert_nxv16i1_nxv8i1_0: 589; CHECK: // %bb.0: 590; CHECK-NEXT: punpkhi p0.h, p0.b 591; CHECK-NEXT: uzp1 p0.b, p1.b, p0.b 592; CHECK-NEXT: ret 593 %v0 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv8i1(<vscale x 16 x i1> %vec, <vscale x 8 x i1> %sv, i64 0) 594 ret <vscale x 16 x i1> %v0 595} 596 597define <vscale x 16 x i1> @insert_nxv16i1_nxv8i1_8(<vscale x 16 x i1> %vec, <vscale x 8 x i1> %sv) { 598; CHECK-LABEL: insert_nxv16i1_nxv8i1_8: 599; CHECK: // %bb.0: 600; CHECK-NEXT: punpklo p0.h, p0.b 601; CHECK-NEXT: uzp1 p0.b, p0.b, p1.b 602; CHECK-NEXT: ret 603 %v0 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv8i1(<vscale x 16 x i1> %vec, <vscale x 8 x i1> %sv, i64 8) 604 ret <vscale x 16 x i1> %v0 605} 606 607; Test predicate inserts of less than half the size. 608define <vscale x 16 x i1> @insert_nxv16i1_nxv4i1_0(<vscale x 16 x i1> %vec, <vscale x 4 x i1> %sv) { 609; CHECK-LABEL: insert_nxv16i1_nxv4i1_0: 610; CHECK: // %bb.0: 611; CHECK-NEXT: punpklo p2.h, p0.b 612; CHECK-NEXT: punpkhi p0.h, p0.b 613; CHECK-NEXT: punpkhi p2.h, p2.b 614; CHECK-NEXT: uzp1 p1.h, p1.h, p2.h 615; CHECK-NEXT: uzp1 p0.b, p1.b, p0.b 616; CHECK-NEXT: ret 617 %v0 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv4i1(<vscale x 16 x i1> %vec, <vscale x 4 x i1> %sv, i64 0) 618 ret <vscale x 16 x i1> %v0 619} 620 621define <vscale x 16 x i1> @insert_nxv16i1_nxv4i1_12(<vscale x 16 x i1> %vec, <vscale x 4 x i1> %sv) { 622; CHECK-LABEL: insert_nxv16i1_nxv4i1_12: 623; CHECK: // %bb.0: 624; CHECK-NEXT: punpkhi p2.h, p0.b 625; CHECK-NEXT: punpklo p0.h, p0.b 626; CHECK-NEXT: punpklo p2.h, p2.b 627; CHECK-NEXT: uzp1 p1.h, p2.h, p1.h 628; CHECK-NEXT: uzp1 p0.b, p0.b, p1.b 629; CHECK-NEXT: ret 630 %v0 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv4i1(<vscale x 16 x i1> %vec, <vscale x 4 x i1> %sv, i64 12) 631 ret <vscale x 16 x i1> %v0 632} 633 634; Test predicate insert into undef/zero 635define <vscale x 16 x i1> @insert_nxv16i1_nxv4i1_into_zero(<vscale x 4 x i1> %sv) { 636; CHECK-LABEL: insert_nxv16i1_nxv4i1_into_zero: 637; CHECK: // %bb.0: 638; CHECK-NEXT: pfalse p1.b 639; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h 640; CHECK-NEXT: uzp1 p0.b, p0.b, p1.b 641; CHECK-NEXT: ret 642 %v0 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv4i1(<vscale x 16 x i1> zeroinitializer, <vscale x 4 x i1> %sv, i64 0) 643 ret <vscale x 16 x i1> %v0 644} 645 646define <vscale x 16 x i1> @insert_nxv16i1_nxv4i1_into_poison(<vscale x 4 x i1> %sv) { 647; CHECK-LABEL: insert_nxv16i1_nxv4i1_into_poison: 648; CHECK: // %bb.0: 649; CHECK-NEXT: uzp1 p0.h, p0.h, p0.h 650; CHECK-NEXT: uzp1 p0.b, p0.b, p0.b 651; CHECK-NEXT: ret 652 %v0 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv4i1(<vscale x 16 x i1> poison, <vscale x 4 x i1> %sv, i64 0) 653 ret <vscale x 16 x i1> %v0 654} 655 656; Test constant predicate insert into undef 657define <vscale x 2 x i1> @insert_nxv2i1_v8i1_const_true_into_undef() vscale_range(4,8) { 658; CHECK-LABEL: insert_nxv2i1_v8i1_const_true_into_undef: 659; CHECK: // %bb.0: 660; CHECK-NEXT: ptrue p0.d 661; CHECK-NEXT: ret 662 %v0 = call <vscale x 2 x i1> @llvm.vector.insert.nxv2i1.v8i1 (<vscale x 2 x i1> undef, <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, i64 0) 663 ret <vscale x 2 x i1> %v0 664} 665 666define <vscale x 4 x i1> @insert_nxv4i1_v16i1_const_true_into_undef() vscale_range(4,8) { 667; CHECK-LABEL: insert_nxv4i1_v16i1_const_true_into_undef: 668; CHECK: // %bb.0: 669; CHECK-NEXT: ptrue p0.s 670; CHECK-NEXT: ret 671 %v0 = call <vscale x 4 x i1> @llvm.vector.insert.nxv4i1.v16i1 (<vscale x 4 x i1> undef, <16 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, i64 0) 672 ret <vscale x 4 x i1> %v0 673} 674 675define <vscale x 8 x i1> @insert_nxv8i1_v32i1_const_true_into_undef() vscale_range(4,8) { 676; CHECK-LABEL: insert_nxv8i1_v32i1_const_true_into_undef: 677; CHECK: // %bb.0: 678; CHECK-NEXT: ptrue p0.h 679; CHECK-NEXT: ret 680 %v0 = call <vscale x 8 x i1> @llvm.vector.insert.nxv8i1.v32i1 (<vscale x 8 x i1> undef, <32 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, i64 0) 681 ret <vscale x 8 x i1> %v0 682} 683 684define <vscale x 16 x i1> @insert_nxv16i1_v64i1_const_true_into_undef() vscale_range(4,8) { 685; CHECK-LABEL: insert_nxv16i1_v64i1_const_true_into_undef: 686; CHECK: // %bb.0: 687; CHECK-NEXT: ptrue p0.b 688; CHECK-NEXT: ret 689 %v0 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.v64i1 (<vscale x 16 x i1> undef, <64 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, i64 0) 690 ret <vscale x 16 x i1> %v0 691} 692 693; 694; Insert nxv1i1 type into: nxv2i1 695; 696 697define <vscale x 2 x i1> @insert_nxv1i1_nxv2i1_0(<vscale x 2 x i1> %vec, <vscale x 1 x i1> %sv) { 698; CHECK-LABEL: insert_nxv1i1_nxv2i1_0: 699; CHECK: // %bb.0: 700; CHECK-NEXT: punpkhi p0.h, p0.b 701; CHECK-NEXT: uzp1 p0.d, p1.d, p0.d 702; CHECK-NEXT: ret 703 %res = call <vscale x 2 x i1> @llvm.vector.insert.nxv2i1.nxv1i1(<vscale x 2 x i1> %vec, <vscale x 1 x i1> %sv, i64 0) 704 ret <vscale x 2 x i1> %res 705} 706 707define <vscale x 2 x i1> @insert_nxv1i1_nxv2i1_1(<vscale x 2 x i1> %vec, <vscale x 1 x i1> %sv) { 708; CHECK-LABEL: insert_nxv1i1_nxv2i1_1: 709; CHECK: // %bb.0: 710; CHECK-NEXT: punpklo p0.h, p0.b 711; CHECK-NEXT: uzp1 p0.d, p0.d, p1.d 712; CHECK-NEXT: ret 713 %res = call <vscale x 2 x i1> @llvm.vector.insert.nxv2i1.nxv1i1(<vscale x 2 x i1> %vec, <vscale x 1 x i1> %sv, i64 1) 714 ret <vscale x 2 x i1> %res 715} 716 717; 718; Insert nxv1i1 type into: nxv4i1 719; 720 721define <vscale x 4 x i1> @insert_nxv1i1_nxv4i1_0(<vscale x 4 x i1> %vec, <vscale x 1 x i1> %sv) { 722; CHECK-LABEL: insert_nxv1i1_nxv4i1_0: 723; CHECK: // %bb.0: 724; CHECK-NEXT: punpklo p2.h, p0.b 725; CHECK-NEXT: punpkhi p0.h, p0.b 726; CHECK-NEXT: punpkhi p2.h, p2.b 727; CHECK-NEXT: uzp1 p1.d, p1.d, p2.d 728; CHECK-NEXT: uzp1 p0.s, p1.s, p0.s 729; CHECK-NEXT: ret 730 %res = call <vscale x 4 x i1> @llvm.vector.insert.nxv4i1.nxv1i1(<vscale x 4 x i1> %vec, <vscale x 1 x i1> %sv, i64 0) 731 ret <vscale x 4 x i1> %res 732} 733 734define <vscale x 4 x i1> @insert_nxv1i1_nxv4i1_1(<vscale x 4 x i1> %vec, <vscale x 1 x i1> %sv) { 735; CHECK-LABEL: insert_nxv1i1_nxv4i1_1: 736; CHECK: // %bb.0: 737; CHECK-NEXT: punpklo p2.h, p0.b 738; CHECK-NEXT: punpkhi p0.h, p0.b 739; CHECK-NEXT: punpklo p2.h, p2.b 740; CHECK-NEXT: uzp1 p1.d, p2.d, p1.d 741; CHECK-NEXT: uzp1 p0.s, p1.s, p0.s 742; CHECK-NEXT: ret 743 %res = call <vscale x 4 x i1> @llvm.vector.insert.nxv4i1.nxv1i1(<vscale x 4 x i1> %vec, <vscale x 1 x i1> %sv, i64 1) 744 ret <vscale x 4 x i1> %res 745} 746 747define <vscale x 4 x i1> @insert_nxv1i1_nxv4i1_2(<vscale x 4 x i1> %vec, <vscale x 1 x i1> %sv) { 748; CHECK-LABEL: insert_nxv1i1_nxv4i1_2: 749; CHECK: // %bb.0: 750; CHECK-NEXT: punpkhi p2.h, p0.b 751; CHECK-NEXT: punpklo p0.h, p0.b 752; CHECK-NEXT: punpkhi p2.h, p2.b 753; CHECK-NEXT: uzp1 p1.d, p1.d, p2.d 754; CHECK-NEXT: uzp1 p0.s, p0.s, p1.s 755; CHECK-NEXT: ret 756 %res = call <vscale x 4 x i1> @llvm.vector.insert.nxv4i1.nxv1i1(<vscale x 4 x i1> %vec, <vscale x 1 x i1> %sv, i64 2) 757 ret <vscale x 4 x i1> %res 758} 759 760define <vscale x 4 x i1> @insert_nxv1i1_nxv4i1_3(<vscale x 4 x i1> %vec, <vscale x 1 x i1> %sv) { 761; CHECK-LABEL: insert_nxv1i1_nxv4i1_3: 762; CHECK: // %bb.0: 763; CHECK-NEXT: punpkhi p2.h, p0.b 764; CHECK-NEXT: punpklo p0.h, p0.b 765; CHECK-NEXT: punpklo p2.h, p2.b 766; CHECK-NEXT: uzp1 p1.d, p2.d, p1.d 767; CHECK-NEXT: uzp1 p0.s, p0.s, p1.s 768; CHECK-NEXT: ret 769 %res = call <vscale x 4 x i1> @llvm.vector.insert.nxv4i1.nxv1i1(<vscale x 4 x i1> %vec, <vscale x 1 x i1> %sv, i64 3) 770 ret <vscale x 4 x i1> %res 771} 772 773; 774; Insert nxv1i1 type into: nxv8i1 775; 776 777define <vscale x 8 x i1> @insert_nxv1i1_nxv8i1_0(<vscale x 8 x i1> %vec, <vscale x 1 x i1> %sv) { 778; CHECK-LABEL: insert_nxv1i1_nxv8i1_0: 779; CHECK: // %bb.0: 780; CHECK-NEXT: punpklo p2.h, p0.b 781; CHECK-NEXT: punpkhi p0.h, p0.b 782; CHECK-NEXT: punpklo p3.h, p2.b 783; CHECK-NEXT: punpkhi p2.h, p2.b 784; CHECK-NEXT: punpkhi p3.h, p3.b 785; CHECK-NEXT: uzp1 p1.d, p1.d, p3.d 786; CHECK-NEXT: uzp1 p1.s, p1.s, p2.s 787; CHECK-NEXT: uzp1 p0.h, p1.h, p0.h 788; CHECK-NEXT: ret 789 %res = call <vscale x 8 x i1> @llvm.vector.insert.nxv8i1.nxv1i1(<vscale x 8 x i1> %vec, <vscale x 1 x i1> %sv, i64 0) 790 ret <vscale x 8 x i1> %res 791} 792 793define <vscale x 8 x i1> @insert_nxv1i1_nxv8i1_1(<vscale x 8 x i1> %vec, <vscale x 1 x i1> %sv) { 794; CHECK-LABEL: insert_nxv1i1_nxv8i1_1: 795; CHECK: // %bb.0: 796; CHECK-NEXT: punpklo p2.h, p0.b 797; CHECK-NEXT: punpkhi p0.h, p0.b 798; CHECK-NEXT: punpklo p3.h, p2.b 799; CHECK-NEXT: punpkhi p2.h, p2.b 800; CHECK-NEXT: punpklo p3.h, p3.b 801; CHECK-NEXT: uzp1 p1.d, p3.d, p1.d 802; CHECK-NEXT: uzp1 p1.s, p1.s, p2.s 803; CHECK-NEXT: uzp1 p0.h, p1.h, p0.h 804; CHECK-NEXT: ret 805 %res = call <vscale x 8 x i1> @llvm.vector.insert.nxv8i1.nxv1i1(<vscale x 8 x i1> %vec, <vscale x 1 x i1> %sv, i64 1) 806 ret <vscale x 8 x i1> %res 807} 808 809define <vscale x 8 x i1> @insert_nxv1i1_nxv8i1_2(<vscale x 8 x i1> %vec, <vscale x 1 x i1> %sv) { 810; CHECK-LABEL: insert_nxv1i1_nxv8i1_2: 811; CHECK: // %bb.0: 812; CHECK-NEXT: punpklo p2.h, p0.b 813; CHECK-NEXT: punpkhi p0.h, p0.b 814; CHECK-NEXT: punpkhi p3.h, p2.b 815; CHECK-NEXT: punpklo p2.h, p2.b 816; CHECK-NEXT: punpkhi p3.h, p3.b 817; CHECK-NEXT: uzp1 p1.d, p1.d, p3.d 818; CHECK-NEXT: uzp1 p1.s, p2.s, p1.s 819; CHECK-NEXT: uzp1 p0.h, p1.h, p0.h 820; CHECK-NEXT: ret 821 %res = call <vscale x 8 x i1> @llvm.vector.insert.nxv8i1.nxv1i1(<vscale x 8 x i1> %vec, <vscale x 1 x i1> %sv, i64 2) 822 ret <vscale x 8 x i1> %res 823} 824 825define <vscale x 8 x i1> @insert_nxv1i1_nxv8i1_3(<vscale x 8 x i1> %vec, <vscale x 1 x i1> %sv) { 826; CHECK-LABEL: insert_nxv1i1_nxv8i1_3: 827; CHECK: // %bb.0: 828; CHECK-NEXT: punpklo p2.h, p0.b 829; CHECK-NEXT: punpkhi p0.h, p0.b 830; CHECK-NEXT: punpkhi p3.h, p2.b 831; CHECK-NEXT: punpklo p2.h, p2.b 832; CHECK-NEXT: punpklo p3.h, p3.b 833; CHECK-NEXT: uzp1 p1.d, p3.d, p1.d 834; CHECK-NEXT: uzp1 p1.s, p2.s, p1.s 835; CHECK-NEXT: uzp1 p0.h, p1.h, p0.h 836; CHECK-NEXT: ret 837 %res = call <vscale x 8 x i1> @llvm.vector.insert.nxv8i1.nxv1i1(<vscale x 8 x i1> %vec, <vscale x 1 x i1> %sv, i64 3) 838 ret <vscale x 8 x i1> %res 839} 840 841define <vscale x 8 x i1> @insert_nxv1i1_nxv8i1_4(<vscale x 8 x i1> %vec, <vscale x 1 x i1> %sv) { 842; CHECK-LABEL: insert_nxv1i1_nxv8i1_4: 843; CHECK: // %bb.0: 844; CHECK-NEXT: punpkhi p2.h, p0.b 845; CHECK-NEXT: punpklo p0.h, p0.b 846; CHECK-NEXT: punpklo p3.h, p2.b 847; CHECK-NEXT: punpkhi p2.h, p2.b 848; CHECK-NEXT: punpkhi p3.h, p3.b 849; CHECK-NEXT: uzp1 p1.d, p1.d, p3.d 850; CHECK-NEXT: uzp1 p1.s, p1.s, p2.s 851; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h 852; CHECK-NEXT: ret 853 %res = call <vscale x 8 x i1> @llvm.vector.insert.nxv8i1.nxv1i1(<vscale x 8 x i1> %vec, <vscale x 1 x i1> %sv, i64 4) 854 ret <vscale x 8 x i1> %res 855} 856 857define <vscale x 8 x i1> @insert_nxv1i1_nxv8i1_5(<vscale x 8 x i1> %vec, <vscale x 1 x i1> %sv) { 858; CHECK-LABEL: insert_nxv1i1_nxv8i1_5: 859; CHECK: // %bb.0: 860; CHECK-NEXT: punpkhi p2.h, p0.b 861; CHECK-NEXT: punpklo p0.h, p0.b 862; CHECK-NEXT: punpklo p3.h, p2.b 863; CHECK-NEXT: punpkhi p2.h, p2.b 864; CHECK-NEXT: punpklo p3.h, p3.b 865; CHECK-NEXT: uzp1 p1.d, p3.d, p1.d 866; CHECK-NEXT: uzp1 p1.s, p1.s, p2.s 867; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h 868; CHECK-NEXT: ret 869 %res = call <vscale x 8 x i1> @llvm.vector.insert.nxv8i1.nxv1i1(<vscale x 8 x i1> %vec, <vscale x 1 x i1> %sv, i64 5) 870 ret <vscale x 8 x i1> %res 871} 872 873define <vscale x 8 x i1> @insert_nxv1i1_nxv8i1_6(<vscale x 8 x i1> %vec, <vscale x 1 x i1> %sv) { 874; CHECK-LABEL: insert_nxv1i1_nxv8i1_6: 875; CHECK: // %bb.0: 876; CHECK-NEXT: punpkhi p2.h, p0.b 877; CHECK-NEXT: punpklo p0.h, p0.b 878; CHECK-NEXT: punpkhi p3.h, p2.b 879; CHECK-NEXT: punpklo p2.h, p2.b 880; CHECK-NEXT: punpkhi p3.h, p3.b 881; CHECK-NEXT: uzp1 p1.d, p1.d, p3.d 882; CHECK-NEXT: uzp1 p1.s, p2.s, p1.s 883; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h 884; CHECK-NEXT: ret 885 %res = call <vscale x 8 x i1> @llvm.vector.insert.nxv8i1.nxv1i1(<vscale x 8 x i1> %vec, <vscale x 1 x i1> %sv, i64 6) 886 ret <vscale x 8 x i1> %res 887} 888 889define <vscale x 8 x i1> @insert_nxv1i1_nxv8i1_7(<vscale x 8 x i1> %vec, <vscale x 1 x i1> %sv) { 890; CHECK-LABEL: insert_nxv1i1_nxv8i1_7: 891; CHECK: // %bb.0: 892; CHECK-NEXT: punpkhi p2.h, p0.b 893; CHECK-NEXT: punpklo p0.h, p0.b 894; CHECK-NEXT: punpkhi p3.h, p2.b 895; CHECK-NEXT: punpklo p2.h, p2.b 896; CHECK-NEXT: punpklo p3.h, p3.b 897; CHECK-NEXT: uzp1 p1.d, p3.d, p1.d 898; CHECK-NEXT: uzp1 p1.s, p2.s, p1.s 899; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h 900; CHECK-NEXT: ret 901 %res = call <vscale x 8 x i1> @llvm.vector.insert.nxv8i1.nxv1i1(<vscale x 8 x i1> %vec, <vscale x 1 x i1> %sv, i64 7) 902 ret <vscale x 8 x i1> %res 903} 904 905; 906; Insert nxv1i1 type into: nxv16i1 907; 908 909define <vscale x 16 x i1> @insert_nxv1i1_nxv16i1_0(<vscale x 16 x i1> %vec, <vscale x 1 x i1> %sv) { 910; CHECK-LABEL: insert_nxv1i1_nxv16i1_0: 911; CHECK: // %bb.0: 912; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 913; CHECK-NEXT: addvl sp, sp, #-1 914; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill 915; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG 916; CHECK-NEXT: .cfi_offset w29, -16 917; CHECK-NEXT: punpklo p2.h, p0.b 918; CHECK-NEXT: punpkhi p0.h, p0.b 919; CHECK-NEXT: punpklo p3.h, p2.b 920; CHECK-NEXT: punpkhi p2.h, p2.b 921; CHECK-NEXT: punpklo p4.h, p3.b 922; CHECK-NEXT: punpkhi p3.h, p3.b 923; CHECK-NEXT: punpkhi p4.h, p4.b 924; CHECK-NEXT: uzp1 p1.d, p1.d, p4.d 925; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload 926; CHECK-NEXT: uzp1 p1.s, p1.s, p3.s 927; CHECK-NEXT: uzp1 p1.h, p1.h, p2.h 928; CHECK-NEXT: uzp1 p0.b, p1.b, p0.b 929; CHECK-NEXT: addvl sp, sp, #1 930; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 931; CHECK-NEXT: ret 932 %res = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv1i1(<vscale x 16 x i1> %vec, <vscale x 1 x i1> %sv, i64 0) 933 ret <vscale x 16 x i1> %res 934} 935 936define <vscale x 16 x i1> @insert_nxv1i1_nxv16i1_1(<vscale x 16 x i1> %vec, <vscale x 1 x i1> %sv) { 937; CHECK-LABEL: insert_nxv1i1_nxv16i1_1: 938; CHECK: // %bb.0: 939; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 940; CHECK-NEXT: addvl sp, sp, #-1 941; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill 942; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG 943; CHECK-NEXT: .cfi_offset w29, -16 944; CHECK-NEXT: punpklo p2.h, p0.b 945; CHECK-NEXT: punpkhi p0.h, p0.b 946; CHECK-NEXT: punpklo p3.h, p2.b 947; CHECK-NEXT: punpkhi p2.h, p2.b 948; CHECK-NEXT: punpklo p4.h, p3.b 949; CHECK-NEXT: punpkhi p3.h, p3.b 950; CHECK-NEXT: punpklo p4.h, p4.b 951; CHECK-NEXT: uzp1 p1.d, p4.d, p1.d 952; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload 953; CHECK-NEXT: uzp1 p1.s, p1.s, p3.s 954; CHECK-NEXT: uzp1 p1.h, p1.h, p2.h 955; CHECK-NEXT: uzp1 p0.b, p1.b, p0.b 956; CHECK-NEXT: addvl sp, sp, #1 957; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 958; CHECK-NEXT: ret 959 %res = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv1i1(<vscale x 16 x i1> %vec, <vscale x 1 x i1> %sv, i64 1) 960 ret <vscale x 16 x i1> %res 961} 962 963define <vscale x 16 x i1> @insert_nxv1i1_nxv16i1_2(<vscale x 16 x i1> %vec, <vscale x 1 x i1> %sv) { 964; CHECK-LABEL: insert_nxv1i1_nxv16i1_2: 965; CHECK: // %bb.0: 966; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 967; CHECK-NEXT: addvl sp, sp, #-1 968; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill 969; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG 970; CHECK-NEXT: .cfi_offset w29, -16 971; CHECK-NEXT: punpklo p2.h, p0.b 972; CHECK-NEXT: punpkhi p0.h, p0.b 973; CHECK-NEXT: punpklo p3.h, p2.b 974; CHECK-NEXT: punpkhi p2.h, p2.b 975; CHECK-NEXT: punpkhi p4.h, p3.b 976; CHECK-NEXT: punpklo p3.h, p3.b 977; CHECK-NEXT: punpkhi p4.h, p4.b 978; CHECK-NEXT: uzp1 p1.d, p1.d, p4.d 979; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload 980; CHECK-NEXT: uzp1 p1.s, p3.s, p1.s 981; CHECK-NEXT: uzp1 p1.h, p1.h, p2.h 982; CHECK-NEXT: uzp1 p0.b, p1.b, p0.b 983; CHECK-NEXT: addvl sp, sp, #1 984; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 985; CHECK-NEXT: ret 986 %res = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv1i1(<vscale x 16 x i1> %vec, <vscale x 1 x i1> %sv, i64 2) 987 ret <vscale x 16 x i1> %res 988} 989 990define <vscale x 16 x i1> @insert_nxv1i1_nxv16i1_3(<vscale x 16 x i1> %vec, <vscale x 1 x i1> %sv) { 991; CHECK-LABEL: insert_nxv1i1_nxv16i1_3: 992; CHECK: // %bb.0: 993; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 994; CHECK-NEXT: addvl sp, sp, #-1 995; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill 996; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG 997; CHECK-NEXT: .cfi_offset w29, -16 998; CHECK-NEXT: punpklo p2.h, p0.b 999; CHECK-NEXT: punpkhi p0.h, p0.b 1000; CHECK-NEXT: punpklo p3.h, p2.b 1001; CHECK-NEXT: punpkhi p2.h, p2.b 1002; CHECK-NEXT: punpkhi p4.h, p3.b 1003; CHECK-NEXT: punpklo p3.h, p3.b 1004; CHECK-NEXT: punpklo p4.h, p4.b 1005; CHECK-NEXT: uzp1 p1.d, p4.d, p1.d 1006; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload 1007; CHECK-NEXT: uzp1 p1.s, p3.s, p1.s 1008; CHECK-NEXT: uzp1 p1.h, p1.h, p2.h 1009; CHECK-NEXT: uzp1 p0.b, p1.b, p0.b 1010; CHECK-NEXT: addvl sp, sp, #1 1011; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 1012; CHECK-NEXT: ret 1013 %res = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv1i1(<vscale x 16 x i1> %vec, <vscale x 1 x i1> %sv, i64 3) 1014 ret <vscale x 16 x i1> %res 1015} 1016 1017define <vscale x 16 x i1> @insert_nxv1i1_nxv16i1_4(<vscale x 16 x i1> %vec, <vscale x 1 x i1> %sv) { 1018; CHECK-LABEL: insert_nxv1i1_nxv16i1_4: 1019; CHECK: // %bb.0: 1020; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 1021; CHECK-NEXT: addvl sp, sp, #-1 1022; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill 1023; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG 1024; CHECK-NEXT: .cfi_offset w29, -16 1025; CHECK-NEXT: punpklo p2.h, p0.b 1026; CHECK-NEXT: punpkhi p0.h, p0.b 1027; CHECK-NEXT: punpkhi p3.h, p2.b 1028; CHECK-NEXT: punpklo p2.h, p2.b 1029; CHECK-NEXT: punpklo p4.h, p3.b 1030; CHECK-NEXT: punpkhi p3.h, p3.b 1031; CHECK-NEXT: punpkhi p4.h, p4.b 1032; CHECK-NEXT: uzp1 p1.d, p1.d, p4.d 1033; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload 1034; CHECK-NEXT: uzp1 p1.s, p1.s, p3.s 1035; CHECK-NEXT: uzp1 p1.h, p2.h, p1.h 1036; CHECK-NEXT: uzp1 p0.b, p1.b, p0.b 1037; CHECK-NEXT: addvl sp, sp, #1 1038; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 1039; CHECK-NEXT: ret 1040 %res = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv1i1(<vscale x 16 x i1> %vec, <vscale x 1 x i1> %sv, i64 4) 1041 ret <vscale x 16 x i1> %res 1042} 1043 1044define <vscale x 16 x i1> @insert_nxv1i1_nxv16i1_5(<vscale x 16 x i1> %vec, <vscale x 1 x i1> %sv) { 1045; CHECK-LABEL: insert_nxv1i1_nxv16i1_5: 1046; CHECK: // %bb.0: 1047; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 1048; CHECK-NEXT: addvl sp, sp, #-1 1049; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill 1050; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG 1051; CHECK-NEXT: .cfi_offset w29, -16 1052; CHECK-NEXT: punpklo p2.h, p0.b 1053; CHECK-NEXT: punpkhi p0.h, p0.b 1054; CHECK-NEXT: punpkhi p3.h, p2.b 1055; CHECK-NEXT: punpklo p2.h, p2.b 1056; CHECK-NEXT: punpklo p4.h, p3.b 1057; CHECK-NEXT: punpkhi p3.h, p3.b 1058; CHECK-NEXT: punpklo p4.h, p4.b 1059; CHECK-NEXT: uzp1 p1.d, p4.d, p1.d 1060; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload 1061; CHECK-NEXT: uzp1 p1.s, p1.s, p3.s 1062; CHECK-NEXT: uzp1 p1.h, p2.h, p1.h 1063; CHECK-NEXT: uzp1 p0.b, p1.b, p0.b 1064; CHECK-NEXT: addvl sp, sp, #1 1065; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 1066; CHECK-NEXT: ret 1067 %res = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv1i1(<vscale x 16 x i1> %vec, <vscale x 1 x i1> %sv, i64 5) 1068 ret <vscale x 16 x i1> %res 1069} 1070 1071define <vscale x 16 x i1> @insert_nxv1i1_nxv16i1_6(<vscale x 16 x i1> %vec, <vscale x 1 x i1> %sv) { 1072; CHECK-LABEL: insert_nxv1i1_nxv16i1_6: 1073; CHECK: // %bb.0: 1074; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 1075; CHECK-NEXT: addvl sp, sp, #-1 1076; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill 1077; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG 1078; CHECK-NEXT: .cfi_offset w29, -16 1079; CHECK-NEXT: punpklo p2.h, p0.b 1080; CHECK-NEXT: punpkhi p0.h, p0.b 1081; CHECK-NEXT: punpkhi p3.h, p2.b 1082; CHECK-NEXT: punpklo p2.h, p2.b 1083; CHECK-NEXT: punpkhi p4.h, p3.b 1084; CHECK-NEXT: punpklo p3.h, p3.b 1085; CHECK-NEXT: punpkhi p4.h, p4.b 1086; CHECK-NEXT: uzp1 p1.d, p1.d, p4.d 1087; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload 1088; CHECK-NEXT: uzp1 p1.s, p3.s, p1.s 1089; CHECK-NEXT: uzp1 p1.h, p2.h, p1.h 1090; CHECK-NEXT: uzp1 p0.b, p1.b, p0.b 1091; CHECK-NEXT: addvl sp, sp, #1 1092; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 1093; CHECK-NEXT: ret 1094 %res = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv1i1(<vscale x 16 x i1> %vec, <vscale x 1 x i1> %sv, i64 6) 1095 ret <vscale x 16 x i1> %res 1096} 1097 1098define <vscale x 16 x i1> @insert_nxv1i1_nxv16i1_7(<vscale x 16 x i1> %vec, <vscale x 1 x i1> %sv) { 1099; CHECK-LABEL: insert_nxv1i1_nxv16i1_7: 1100; CHECK: // %bb.0: 1101; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 1102; CHECK-NEXT: addvl sp, sp, #-1 1103; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill 1104; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG 1105; CHECK-NEXT: .cfi_offset w29, -16 1106; CHECK-NEXT: punpklo p2.h, p0.b 1107; CHECK-NEXT: punpkhi p0.h, p0.b 1108; CHECK-NEXT: punpkhi p3.h, p2.b 1109; CHECK-NEXT: punpklo p2.h, p2.b 1110; CHECK-NEXT: punpkhi p4.h, p3.b 1111; CHECK-NEXT: punpklo p3.h, p3.b 1112; CHECK-NEXT: punpklo p4.h, p4.b 1113; CHECK-NEXT: uzp1 p1.d, p4.d, p1.d 1114; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload 1115; CHECK-NEXT: uzp1 p1.s, p3.s, p1.s 1116; CHECK-NEXT: uzp1 p1.h, p2.h, p1.h 1117; CHECK-NEXT: uzp1 p0.b, p1.b, p0.b 1118; CHECK-NEXT: addvl sp, sp, #1 1119; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 1120; CHECK-NEXT: ret 1121 %res = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv1i1(<vscale x 16 x i1> %vec, <vscale x 1 x i1> %sv, i64 7) 1122 ret <vscale x 16 x i1> %res 1123} 1124 1125define <vscale x 16 x i1> @insert_nxv1i1_nxv16i1_8(<vscale x 16 x i1> %vec, <vscale x 1 x i1> %sv) { 1126; CHECK-LABEL: insert_nxv1i1_nxv16i1_8: 1127; CHECK: // %bb.0: 1128; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 1129; CHECK-NEXT: addvl sp, sp, #-1 1130; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill 1131; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG 1132; CHECK-NEXT: .cfi_offset w29, -16 1133; CHECK-NEXT: punpkhi p2.h, p0.b 1134; CHECK-NEXT: punpklo p0.h, p0.b 1135; CHECK-NEXT: punpklo p3.h, p2.b 1136; CHECK-NEXT: punpkhi p2.h, p2.b 1137; CHECK-NEXT: punpklo p4.h, p3.b 1138; CHECK-NEXT: punpkhi p3.h, p3.b 1139; CHECK-NEXT: punpkhi p4.h, p4.b 1140; CHECK-NEXT: uzp1 p1.d, p1.d, p4.d 1141; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload 1142; CHECK-NEXT: uzp1 p1.s, p1.s, p3.s 1143; CHECK-NEXT: uzp1 p1.h, p1.h, p2.h 1144; CHECK-NEXT: uzp1 p0.b, p0.b, p1.b 1145; CHECK-NEXT: addvl sp, sp, #1 1146; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 1147; CHECK-NEXT: ret 1148 %res = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv1i1(<vscale x 16 x i1> %vec, <vscale x 1 x i1> %sv, i64 8) 1149 ret <vscale x 16 x i1> %res 1150} 1151 1152define <vscale x 16 x i1> @insert_nxv1i1_nxv16i1_9(<vscale x 16 x i1> %vec, <vscale x 1 x i1> %sv) { 1153; CHECK-LABEL: insert_nxv1i1_nxv16i1_9: 1154; CHECK: // %bb.0: 1155; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 1156; CHECK-NEXT: addvl sp, sp, #-1 1157; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill 1158; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG 1159; CHECK-NEXT: .cfi_offset w29, -16 1160; CHECK-NEXT: punpkhi p2.h, p0.b 1161; CHECK-NEXT: punpklo p0.h, p0.b 1162; CHECK-NEXT: punpklo p3.h, p2.b 1163; CHECK-NEXT: punpkhi p2.h, p2.b 1164; CHECK-NEXT: punpklo p4.h, p3.b 1165; CHECK-NEXT: punpkhi p3.h, p3.b 1166; CHECK-NEXT: punpklo p4.h, p4.b 1167; CHECK-NEXT: uzp1 p1.d, p4.d, p1.d 1168; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload 1169; CHECK-NEXT: uzp1 p1.s, p1.s, p3.s 1170; CHECK-NEXT: uzp1 p1.h, p1.h, p2.h 1171; CHECK-NEXT: uzp1 p0.b, p0.b, p1.b 1172; CHECK-NEXT: addvl sp, sp, #1 1173; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 1174; CHECK-NEXT: ret 1175 %res = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv1i1(<vscale x 16 x i1> %vec, <vscale x 1 x i1> %sv, i64 9) 1176 ret <vscale x 16 x i1> %res 1177} 1178 1179define <vscale x 16 x i1> @insert_nxv1i1_nxv16i1_10(<vscale x 16 x i1> %vec, <vscale x 1 x i1> %sv) { 1180; CHECK-LABEL: insert_nxv1i1_nxv16i1_10: 1181; CHECK: // %bb.0: 1182; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 1183; CHECK-NEXT: addvl sp, sp, #-1 1184; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill 1185; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG 1186; CHECK-NEXT: .cfi_offset w29, -16 1187; CHECK-NEXT: punpkhi p2.h, p0.b 1188; CHECK-NEXT: punpklo p0.h, p0.b 1189; CHECK-NEXT: punpklo p3.h, p2.b 1190; CHECK-NEXT: punpkhi p2.h, p2.b 1191; CHECK-NEXT: punpkhi p4.h, p3.b 1192; CHECK-NEXT: punpklo p3.h, p3.b 1193; CHECK-NEXT: punpkhi p4.h, p4.b 1194; CHECK-NEXT: uzp1 p1.d, p1.d, p4.d 1195; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload 1196; CHECK-NEXT: uzp1 p1.s, p3.s, p1.s 1197; CHECK-NEXT: uzp1 p1.h, p1.h, p2.h 1198; CHECK-NEXT: uzp1 p0.b, p0.b, p1.b 1199; CHECK-NEXT: addvl sp, sp, #1 1200; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 1201; CHECK-NEXT: ret 1202 %res = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv1i1(<vscale x 16 x i1> %vec, <vscale x 1 x i1> %sv, i64 10) 1203 ret <vscale x 16 x i1> %res 1204} 1205 1206define <vscale x 16 x i1> @insert_nxv1i1_nxv16i1_11(<vscale x 16 x i1> %vec, <vscale x 1 x i1> %sv) { 1207; CHECK-LABEL: insert_nxv1i1_nxv16i1_11: 1208; CHECK: // %bb.0: 1209; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 1210; CHECK-NEXT: addvl sp, sp, #-1 1211; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill 1212; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG 1213; CHECK-NEXT: .cfi_offset w29, -16 1214; CHECK-NEXT: punpkhi p2.h, p0.b 1215; CHECK-NEXT: punpklo p0.h, p0.b 1216; CHECK-NEXT: punpklo p3.h, p2.b 1217; CHECK-NEXT: punpkhi p2.h, p2.b 1218; CHECK-NEXT: punpkhi p4.h, p3.b 1219; CHECK-NEXT: punpklo p3.h, p3.b 1220; CHECK-NEXT: punpklo p4.h, p4.b 1221; CHECK-NEXT: uzp1 p1.d, p4.d, p1.d 1222; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload 1223; CHECK-NEXT: uzp1 p1.s, p3.s, p1.s 1224; CHECK-NEXT: uzp1 p1.h, p1.h, p2.h 1225; CHECK-NEXT: uzp1 p0.b, p0.b, p1.b 1226; CHECK-NEXT: addvl sp, sp, #1 1227; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 1228; CHECK-NEXT: ret 1229 %res = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv1i1(<vscale x 16 x i1> %vec, <vscale x 1 x i1> %sv, i64 11) 1230 ret <vscale x 16 x i1> %res 1231} 1232 1233define <vscale x 16 x i1> @insert_nxv1i1_nxv16i1_12(<vscale x 16 x i1> %vec, <vscale x 1 x i1> %sv) { 1234; CHECK-LABEL: insert_nxv1i1_nxv16i1_12: 1235; CHECK: // %bb.0: 1236; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 1237; CHECK-NEXT: addvl sp, sp, #-1 1238; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill 1239; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG 1240; CHECK-NEXT: .cfi_offset w29, -16 1241; CHECK-NEXT: punpkhi p2.h, p0.b 1242; CHECK-NEXT: punpklo p0.h, p0.b 1243; CHECK-NEXT: punpkhi p3.h, p2.b 1244; CHECK-NEXT: punpklo p2.h, p2.b 1245; CHECK-NEXT: punpklo p4.h, p3.b 1246; CHECK-NEXT: punpkhi p3.h, p3.b 1247; CHECK-NEXT: punpkhi p4.h, p4.b 1248; CHECK-NEXT: uzp1 p1.d, p1.d, p4.d 1249; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload 1250; CHECK-NEXT: uzp1 p1.s, p1.s, p3.s 1251; CHECK-NEXT: uzp1 p1.h, p2.h, p1.h 1252; CHECK-NEXT: uzp1 p0.b, p0.b, p1.b 1253; CHECK-NEXT: addvl sp, sp, #1 1254; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 1255; CHECK-NEXT: ret 1256 %res = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv1i1(<vscale x 16 x i1> %vec, <vscale x 1 x i1> %sv, i64 12) 1257 ret <vscale x 16 x i1> %res 1258} 1259 1260define <vscale x 16 x i1> @insert_nxv1i1_nxv16i1_13(<vscale x 16 x i1> %vec, <vscale x 1 x i1> %sv) { 1261; CHECK-LABEL: insert_nxv1i1_nxv16i1_13: 1262; CHECK: // %bb.0: 1263; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 1264; CHECK-NEXT: addvl sp, sp, #-1 1265; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill 1266; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG 1267; CHECK-NEXT: .cfi_offset w29, -16 1268; CHECK-NEXT: punpkhi p2.h, p0.b 1269; CHECK-NEXT: punpklo p0.h, p0.b 1270; CHECK-NEXT: punpkhi p3.h, p2.b 1271; CHECK-NEXT: punpklo p2.h, p2.b 1272; CHECK-NEXT: punpklo p4.h, p3.b 1273; CHECK-NEXT: punpkhi p3.h, p3.b 1274; CHECK-NEXT: punpklo p4.h, p4.b 1275; CHECK-NEXT: uzp1 p1.d, p4.d, p1.d 1276; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload 1277; CHECK-NEXT: uzp1 p1.s, p1.s, p3.s 1278; CHECK-NEXT: uzp1 p1.h, p2.h, p1.h 1279; CHECK-NEXT: uzp1 p0.b, p0.b, p1.b 1280; CHECK-NEXT: addvl sp, sp, #1 1281; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 1282; CHECK-NEXT: ret 1283 %res = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv1i1(<vscale x 16 x i1> %vec, <vscale x 1 x i1> %sv, i64 13) 1284 ret <vscale x 16 x i1> %res 1285} 1286 1287define <vscale x 16 x i1> @insert_nxv1i1_nxv16i1_14(<vscale x 16 x i1> %vec, <vscale x 1 x i1> %sv) { 1288; CHECK-LABEL: insert_nxv1i1_nxv16i1_14: 1289; CHECK: // %bb.0: 1290; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 1291; CHECK-NEXT: addvl sp, sp, #-1 1292; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill 1293; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG 1294; CHECK-NEXT: .cfi_offset w29, -16 1295; CHECK-NEXT: punpkhi p2.h, p0.b 1296; CHECK-NEXT: punpklo p0.h, p0.b 1297; CHECK-NEXT: punpkhi p3.h, p2.b 1298; CHECK-NEXT: punpklo p2.h, p2.b 1299; CHECK-NEXT: punpkhi p4.h, p3.b 1300; CHECK-NEXT: punpklo p3.h, p3.b 1301; CHECK-NEXT: punpkhi p4.h, p4.b 1302; CHECK-NEXT: uzp1 p1.d, p1.d, p4.d 1303; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload 1304; CHECK-NEXT: uzp1 p1.s, p3.s, p1.s 1305; CHECK-NEXT: uzp1 p1.h, p2.h, p1.h 1306; CHECK-NEXT: uzp1 p0.b, p0.b, p1.b 1307; CHECK-NEXT: addvl sp, sp, #1 1308; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 1309; CHECK-NEXT: ret 1310 %res = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv1i1(<vscale x 16 x i1> %vec, <vscale x 1 x i1> %sv, i64 14) 1311 ret <vscale x 16 x i1> %res 1312} 1313 1314define <vscale x 16 x i1> @insert_nxv1i1_nxv16i1_15(<vscale x 16 x i1> %vec, <vscale x 1 x i1> %sv) { 1315; CHECK-LABEL: insert_nxv1i1_nxv16i1_15: 1316; CHECK: // %bb.0: 1317; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 1318; CHECK-NEXT: addvl sp, sp, #-1 1319; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill 1320; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG 1321; CHECK-NEXT: .cfi_offset w29, -16 1322; CHECK-NEXT: punpkhi p2.h, p0.b 1323; CHECK-NEXT: punpklo p0.h, p0.b 1324; CHECK-NEXT: punpkhi p3.h, p2.b 1325; CHECK-NEXT: punpklo p2.h, p2.b 1326; CHECK-NEXT: punpkhi p4.h, p3.b 1327; CHECK-NEXT: punpklo p3.h, p3.b 1328; CHECK-NEXT: punpklo p4.h, p4.b 1329; CHECK-NEXT: uzp1 p1.d, p4.d, p1.d 1330; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload 1331; CHECK-NEXT: uzp1 p1.s, p3.s, p1.s 1332; CHECK-NEXT: uzp1 p1.h, p2.h, p1.h 1333; CHECK-NEXT: uzp1 p0.b, p0.b, p1.b 1334; CHECK-NEXT: addvl sp, sp, #1 1335; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 1336; CHECK-NEXT: ret 1337 %res = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv1i1(<vscale x 16 x i1> %vec, <vscale x 1 x i1> %sv, i64 15) 1338 ret <vscale x 16 x i1> %res 1339} 1340 1341attributes #0 = { vscale_range(2,2) } 1342 1343declare <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8>, <16 x i8>, i64) 1344 1345declare <vscale x 6 x i16> @llvm.vector.insert.nxv6i16.nxv1i16(<vscale x 6 x i16>, <vscale x 1 x i16>, i64) 1346declare <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.nxv2i16(<vscale x 8 x i16>, <vscale x 2 x i16>, i64) 1347declare <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16>, <8 x i16>, i64) 1348 1349declare <vscale x 3 x i32> @llvm.vector.insert.nxv3i32.nxv2i32(<vscale x 3 x i32>, <vscale x 2 x i32>, i64) 1350declare <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.nxv1i32(<vscale x 4 x i32>, <vscale x 1 x i32>, i64) 1351declare <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32>, <4 x i32>, i64) 1352declare <vscale x 12 x i32> @llvm.vector.insert.nxv4i32.nxv12i32(<vscale x 12 x i32>, <vscale x 4 x i32>, i64) 1353declare <vscale x 6 x i32> @llvm.vector.insert.nxv6i32.nxv2i32(<vscale x 6 x i32>, <vscale x 2 x i32>, i64) 1354declare <vscale x 6 x i32> @llvm.vector.insert.nxv6i32.nxv3i32(<vscale x 6 x i32>, <vscale x 3 x i32>, i64) 1355 1356declare <vscale x 2 x bfloat> @llvm.vector.insert.nxv2bf16.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, i64) 1357declare <vscale x 4 x bfloat> @llvm.vector.insert.nxv4bf16.nxv2bf16(<vscale x 4 x bfloat>, <vscale x 2 x bfloat>, i64) 1358declare <vscale x 4 x bfloat> @llvm.vector.insert.nxv4bf16.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, i64) 1359declare <vscale x 4 x bfloat> @llvm.vector.insert.nxv4bf16.v4bf16(<vscale x 4 x bfloat>, <4 x bfloat>, i64) 1360declare <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i64) 1361declare <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.nxv4bf16(<vscale x 8 x bfloat>, <vscale x 4 x bfloat>, i64) 1362declare <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v8bf16(<vscale x 8 x bfloat>, <8 x bfloat>, i64) 1363 1364declare <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64>, <2 x i64>, i64) 1365declare <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64>, <4 x i64>, i64) 1366declare <vscale x 16 x i64> @llvm.vector.insert.nxv8i64.nxv16i64(<vscale x 16 x i64>, <vscale x 8 x i64>, i64) 1367declare <vscale x 16 x i64> @llvm.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64>, <2 x i64>, i64) 1368 1369declare <vscale x 4 x half> @llvm.vector.insert.nxv4f16.nxv2f16(<vscale x 4 x half>, <vscale x 2 x half>, i64) 1370declare <vscale x 8 x half> @llvm.vector.insert.nxv8f16.nxv2f16(<vscale x 8 x half>, <vscale x 2 x half>, i64) 1371declare <vscale x 8 x half> @llvm.vector.insert.nxv8f16.nxv4f16(<vscale x 8 x half>, <vscale x 4 x half>, i64) 1372 1373declare <vscale x 3 x float> @llvm.vector.insert.nxv3f32.nxv2f32(<vscale x 3 x float>, <vscale x 2 x float>, i64) 1374declare <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv1f32(<vscale x 4 x float>, <vscale x 1 x float>, i64) 1375declare <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv2f32(<vscale x 4 x float>, <vscale x 2 x float>, i64) 1376 1377declare <vscale x 2 x i1> @llvm.vector.insert.nxv2i1.v8i1(<vscale x 2 x i1>, <8 x i1>, i64) 1378declare <vscale x 4 x i1> @llvm.vector.insert.nxv4i1.v16i1(<vscale x 4 x i1>, <16 x i1>, i64) 1379declare <vscale x 8 x i1> @llvm.vector.insert.nxv8i1.v32i1(<vscale x 8 x i1>, <32 x i1>, i64) 1380declare <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv1i1(<vscale x 16 x i1>, <vscale x 1 x i1>, i64) 1381declare <vscale x 8 x i1> @llvm.vector.insert.nxv8i1.nxv1i1(<vscale x 8 x i1>, <vscale x 1 x i1>, i64) 1382declare <vscale x 4 x i1> @llvm.vector.insert.nxv4i1.nxv1i1(<vscale x 4 x i1>, <vscale x 1 x i1>, i64) 1383declare <vscale x 2 x i1> @llvm.vector.insert.nxv2i1.nxv1i1(<vscale x 2 x i1>, <vscale x 1 x i1>, i64) 1384declare <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv4i1(<vscale x 16 x i1>, <vscale x 4 x i1>, i64) 1385declare <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv8i1(<vscale x 16 x i1>, <vscale x 8 x i1>, i64) 1386declare <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.v64i1(<vscale x 16 x i1>, <64 x i1>, i64) 1387