1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -verify-machineinstrs < %s | FileCheck %s 3 4target triple = "aarch64-unknown-linux-gnu" 5 6; 7; VECTOR_SPLICE (index) 8; 9 10define <vscale x 16 x i8> @splice_nxv16i8_zero_idx(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 { 11; CHECK-LABEL: splice_nxv16i8_zero_idx: 12; CHECK: // %bb.0: 13; CHECK-NEXT: ret 14 %res = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 0) 15 ret <vscale x 16 x i8> %res 16} 17 18define <vscale x 16 x i8> @splice_nxv16i8_first_idx(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 { 19; CHECK-LABEL: splice_nxv16i8_first_idx: 20; CHECK: // %bb.0: 21; CHECK-NEXT: ext z0.b, z0.b, z1.b, #1 22; CHECK-NEXT: ret 23 %res = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 1) 24 ret <vscale x 16 x i8> %res 25} 26 27define <vscale x 16 x i8> @splice_nxv16i8_last_idx(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) vscale_range(16,16) #0 { 28; CHECK-LABEL: splice_nxv16i8_last_idx: 29; CHECK: // %bb.0: 30; CHECK-NEXT: ext z0.b, z0.b, z1.b, #255 31; CHECK-NEXT: ret 32 %res = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 255) 33 ret <vscale x 16 x i8> %res 34} 35 36define <vscale x 8 x i16> @splice_nxv8i16_first_idx(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 { 37; CHECK-LABEL: splice_nxv8i16_first_idx: 38; CHECK: // %bb.0: 39; CHECK-NEXT: ext z0.b, z0.b, z1.b, #2 40; CHECK-NEXT: ret 41 %res = call <vscale x 8 x i16> @llvm.vector.splice.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, i32 1) 42 ret <vscale x 8 x i16> %res 43} 44 45define <vscale x 4 x i32> @splice_nxv4i32_first_idx(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { 46; CHECK-LABEL: splice_nxv4i32_first_idx: 47; CHECK: // %bb.0: 48; CHECK-NEXT: ext z0.b, z0.b, z1.b, #4 49; CHECK-NEXT: ret 50 %res = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i32 1) 51 ret <vscale x 4 x i32> %res 52} 53 54define <vscale x 4 x i32> @splice_nxv4i32_last_idx(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) vscale_range(16,16) #0 { 55; CHECK-LABEL: splice_nxv4i32_last_idx: 56; CHECK: // %bb.0: 57; CHECK-NEXT: ext z0.b, z0.b, z1.b, #252 58; CHECK-NEXT: ret 59 %res = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i32 63) 60 ret <vscale x 4 x i32> %res 61} 62 63define <vscale x 2 x i64> @splice_nxv2i64_first_idx(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 { 64; CHECK-LABEL: splice_nxv2i64_first_idx: 65; CHECK: // %bb.0: 66; CHECK-NEXT: ext z0.b, z0.b, z1.b, #8 67; CHECK-NEXT: ret 68 %res = call <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, i32 1) 69 ret <vscale x 2 x i64> %res 70} 71 72define <vscale x 2 x i64> @splice_nxv2i64_last_idx(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) vscale_range(16,16) #0 { 73; CHECK-LABEL: splice_nxv2i64_last_idx: 74; CHECK: // %bb.0: 75; CHECK-NEXT: ext z0.b, z0.b, z1.b, #248 76; CHECK-NEXT: ret 77 %res = call <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, i32 31) 78 ret <vscale x 2 x i64> %res 79} 80 81define <vscale x 2 x half> @splice_nxv2f16_neg_idx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #0 { 82; CHECK-LABEL: splice_nxv2f16_neg_idx: 83; CHECK: // %bb.0: 84; CHECK-NEXT: ptrue p0.d, vl1 85; CHECK-NEXT: rev p0.d, p0.d 86; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d 87; CHECK-NEXT: ret 88 %res = call <vscale x 2 x half> @llvm.vector.splice.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i32 -1) 89 ret <vscale x 2 x half> %res 90} 91 92define <vscale x 2 x half> @splice_nxv2f16_neg2_idx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #0 { 93; CHECK-LABEL: splice_nxv2f16_neg2_idx: 94; CHECK: // %bb.0: 95; CHECK-NEXT: ptrue p0.d, vl2 96; CHECK-NEXT: rev p0.d, p0.d 97; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d 98; CHECK-NEXT: ret 99 %res = call <vscale x 2 x half> @llvm.vector.splice.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i32 -2) 100 ret <vscale x 2 x half> %res 101} 102 103define <vscale x 2 x half> @splice_nxv2f16_first_idx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #0 { 104; CHECK-LABEL: splice_nxv2f16_first_idx: 105; CHECK: // %bb.0: 106; CHECK-NEXT: ext z0.b, z0.b, z1.b, #8 107; CHECK-NEXT: ret 108 %res = call <vscale x 2 x half> @llvm.vector.splice.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i32 1) 109 ret <vscale x 2 x half> %res 110} 111 112define <vscale x 2 x half> @splice_nxv2f16_last_idx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) vscale_range(16,16) #0 { 113; CHECK-LABEL: splice_nxv2f16_last_idx: 114; CHECK: // %bb.0: 115; CHECK-NEXT: ext z0.b, z0.b, z1.b, #248 116; CHECK-NEXT: ret 117 %res = call <vscale x 2 x half> @llvm.vector.splice.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i32 31) 118 ret <vscale x 2 x half> %res 119} 120 121define <vscale x 4 x half> @splice_nxv4f16_neg_idx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 { 122; CHECK-LABEL: splice_nxv4f16_neg_idx: 123; CHECK: // %bb.0: 124; CHECK-NEXT: ptrue p0.s, vl1 125; CHECK-NEXT: rev p0.s, p0.s 126; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s 127; CHECK-NEXT: ret 128 %res = call <vscale x 4 x half> @llvm.vector.splice.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, i32 -1) 129 ret <vscale x 4 x half> %res 130} 131 132define <vscale x 4 x half> @splice_nxv4f16_neg3_idx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 { 133; CHECK-LABEL: splice_nxv4f16_neg3_idx: 134; CHECK: // %bb.0: 135; CHECK-NEXT: ptrue p0.s, vl3 136; CHECK-NEXT: rev p0.s, p0.s 137; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s 138; CHECK-NEXT: ret 139 %res = call <vscale x 4 x half> @llvm.vector.splice.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, i32 -3) 140 ret <vscale x 4 x half> %res 141} 142 143define <vscale x 4 x half> @splice_nxv4f16_first_idx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 { 144; CHECK-LABEL: splice_nxv4f16_first_idx: 145; CHECK: // %bb.0: 146; CHECK-NEXT: ext z0.b, z0.b, z1.b, #4 147; CHECK-NEXT: ret 148 %res = call <vscale x 4 x half> @llvm.vector.splice.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, i32 1) 149 ret <vscale x 4 x half> %res 150} 151 152define <vscale x 4 x half> @splice_nxv4f16_last_idx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) vscale_range(16,16) #0 { 153; CHECK-LABEL: splice_nxv4f16_last_idx: 154; CHECK: // %bb.0: 155; CHECK-NEXT: ext z0.b, z0.b, z1.b, #252 156; CHECK-NEXT: ret 157 %res = call <vscale x 4 x half> @llvm.vector.splice.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, i32 63) 158 ret <vscale x 4 x half> %res 159} 160 161define <vscale x 8 x half> @splice_nxv8f16_first_idx(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 { 162; CHECK-LABEL: splice_nxv8f16_first_idx: 163; CHECK: // %bb.0: 164; CHECK-NEXT: ext z0.b, z0.b, z1.b, #2 165; CHECK-NEXT: ret 166 %res = call <vscale x 8 x half> @llvm.vector.splice.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i32 1) 167 ret <vscale x 8 x half> %res 168} 169 170define <vscale x 8 x half> @splice_nxv8f16_last_idx(<vscale x 8 x half> %a, <vscale x 8 x half> %b) vscale_range(16,16) #0 { 171; CHECK-LABEL: splice_nxv8f16_last_idx: 172; CHECK: // %bb.0: 173; CHECK-NEXT: ext z0.b, z0.b, z1.b, #254 174; CHECK-NEXT: ret 175 %res = call <vscale x 8 x half> @llvm.vector.splice.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i32 127) 176 ret <vscale x 8 x half> %res 177} 178 179define <vscale x 2 x float> @splice_nxv2f32_neg_idx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #0 { 180; CHECK-LABEL: splice_nxv2f32_neg_idx: 181; CHECK: // %bb.0: 182; CHECK-NEXT: ptrue p0.d, vl1 183; CHECK-NEXT: rev p0.d, p0.d 184; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d 185; CHECK-NEXT: ret 186 %res = call <vscale x 2 x float> @llvm.vector.splice.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, i32 -1) 187 ret <vscale x 2 x float> %res 188} 189 190define <vscale x 2 x float> @splice_nxv2f32_neg2_idx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #0 { 191; CHECK-LABEL: splice_nxv2f32_neg2_idx: 192; CHECK: // %bb.0: 193; CHECK-NEXT: ptrue p0.d, vl2 194; CHECK-NEXT: rev p0.d, p0.d 195; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d 196; CHECK-NEXT: ret 197 %res = call <vscale x 2 x float> @llvm.vector.splice.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, i32 -2) 198 ret <vscale x 2 x float> %res 199} 200 201define <vscale x 2 x float> @splice_nxv2f32_first_idx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #0 { 202; CHECK-LABEL: splice_nxv2f32_first_idx: 203; CHECK: // %bb.0: 204; CHECK-NEXT: ext z0.b, z0.b, z1.b, #8 205; CHECK-NEXT: ret 206 %res = call <vscale x 2 x float> @llvm.vector.splice.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, i32 1) 207 ret <vscale x 2 x float> %res 208} 209 210define <vscale x 2 x float> @splice_nxv2f32_last_idx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) vscale_range(16,16) #0 { 211; CHECK-LABEL: splice_nxv2f32_last_idx: 212; CHECK: // %bb.0: 213; CHECK-NEXT: ext z0.b, z0.b, z1.b, #248 214; CHECK-NEXT: ret 215 %res = call <vscale x 2 x float> @llvm.vector.splice.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, i32 31) 216 ret <vscale x 2 x float> %res 217} 218 219define <vscale x 4 x float> @splice_nxv4f32_first_idx(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 { 220; CHECK-LABEL: splice_nxv4f32_first_idx: 221; CHECK: // %bb.0: 222; CHECK-NEXT: ext z0.b, z0.b, z1.b, #4 223; CHECK-NEXT: ret 224 %res = call <vscale x 4 x float> @llvm.vector.splice.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i32 1) 225 ret <vscale x 4 x float> %res 226} 227 228define <vscale x 4 x float> @splice_nxv4f32_last_idx(<vscale x 4 x float> %a, <vscale x 4 x float> %b) vscale_range(16,16) #0 { 229; CHECK-LABEL: splice_nxv4f32_last_idx: 230; CHECK: // %bb.0: 231; CHECK-NEXT: ext z0.b, z0.b, z1.b, #252 232; CHECK-NEXT: ret 233 %res = call <vscale x 4 x float> @llvm.vector.splice.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i32 63) 234 ret <vscale x 4 x float> %res 235} 236 237define <vscale x 2 x double> @splice_nxv2f64_first_idx(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 { 238; CHECK-LABEL: splice_nxv2f64_first_idx: 239; CHECK: // %bb.0: 240; CHECK-NEXT: ext z0.b, z0.b, z1.b, #8 241; CHECK-NEXT: ret 242 %res = call <vscale x 2 x double> @llvm.vector.splice.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i32 1) 243 ret <vscale x 2 x double> %res 244} 245 246define <vscale x 2 x double> @splice_nxv2f64_last_idx(<vscale x 2 x double> %a, <vscale x 2 x double> %b) vscale_range(16,16) #0 { 247; CHECK-LABEL: splice_nxv2f64_last_idx: 248; CHECK: // %bb.0: 249; CHECK-NEXT: ext z0.b, z0.b, z1.b, #248 250; CHECK-NEXT: ret 251 %res = call <vscale x 2 x double> @llvm.vector.splice.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i32 31) 252 ret <vscale x 2 x double> %res 253} 254 255; Ensure predicate based splice is promoted to use ZPRs. 256define <vscale x 2 x i1> @splice_nxv2i1_idx(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) #0 { 257; CHECK-LABEL: splice_nxv2i1_idx: 258; CHECK: // %bb.0: 259; CHECK-NEXT: mov z0.d, p1/z, #1 // =0x1 260; CHECK-NEXT: mov z1.d, p0/z, #1 // =0x1 261; CHECK-NEXT: ptrue p0.d 262; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 263; CHECK-NEXT: and z1.d, z1.d, #0x1 264; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 265; CHECK-NEXT: ret 266 %res = call <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b, i32 1) 267 ret <vscale x 2 x i1> %res 268} 269 270; Ensure predicate based splice is promoted to use ZPRs. 271define <vscale x 4 x i1> @splice_nxv4i1_idx(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) #0 { 272; CHECK-LABEL: splice_nxv4i1_idx: 273; CHECK: // %bb.0: 274; CHECK-NEXT: mov z0.s, p1/z, #1 // =0x1 275; CHECK-NEXT: mov z1.s, p0/z, #1 // =0x1 276; CHECK-NEXT: ptrue p0.s 277; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 278; CHECK-NEXT: and z1.s, z1.s, #0x1 279; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0 280; CHECK-NEXT: ret 281 %res = call <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b, i32 2) 282 ret <vscale x 4 x i1> %res 283} 284 285; Ensure predicate based splice is promoted to use ZPRs. 286define <vscale x 8 x i1> @splice_nxv8i1_idx(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) #0 { 287; CHECK-LABEL: splice_nxv8i1_idx: 288; CHECK: // %bb.0: 289; CHECK-NEXT: mov z0.h, p1/z, #1 // =0x1 290; CHECK-NEXT: mov z1.h, p0/z, #1 // =0x1 291; CHECK-NEXT: ptrue p0.h 292; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 293; CHECK-NEXT: and z1.h, z1.h, #0x1 294; CHECK-NEXT: cmpne p0.h, p0/z, z1.h, #0 295; CHECK-NEXT: ret 296 %res = call <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b, i32 4) 297 ret <vscale x 8 x i1> %res 298} 299 300; Ensure predicate based splice is promoted to use ZPRs. 301define <vscale x 16 x i1> @splice_nxv16i1_idx(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) #0 { 302; CHECK-LABEL: splice_nxv16i1_idx: 303; CHECK: // %bb.0: 304; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1 305; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1 306; CHECK-NEXT: ptrue p0.b 307; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 308; CHECK-NEXT: and z1.b, z1.b, #0x1 309; CHECK-NEXT: cmpne p0.b, p0/z, z1.b, #0 310; CHECK-NEXT: ret 311 %res = call <vscale x 16 x i1> @llvm.vector.splice.nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b, i32 8) 312 ret <vscale x 16 x i1> %res 313} 314 315; Verify promote type legalisation works as expected. 316define <vscale x 2 x i8> @splice_nxv2i8_idx(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b) #0 { 317; CHECK-LABEL: splice_nxv2i8_idx: 318; CHECK: // %bb.0: 319; CHECK-NEXT: ext z0.b, z0.b, z1.b, #8 320; CHECK-NEXT: ret 321 %res = call <vscale x 2 x i8> @llvm.vector.splice.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, i32 1) 322 ret <vscale x 2 x i8> %res 323} 324 325; Verify splitvec type legalisation works as expected. 326define <vscale x 8 x i32> @splice_nxv8i32_idx(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b) #0 { 327; CHECK-LABEL: splice_nxv8i32_idx: 328; CHECK: // %bb.0: 329; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 330; CHECK-NEXT: addvl sp, sp, #-4 331; CHECK-NEXT: ptrue p0.s 332; CHECK-NEXT: mov x8, sp 333; CHECK-NEXT: orr x8, x8, #0x8 334; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl] 335; CHECK-NEXT: st1w { z0.s }, p0, [sp] 336; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl] 337; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl] 338; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8] 339; CHECK-NEXT: ld1w { z1.s }, p0/z, [x8, #1, mul vl] 340; CHECK-NEXT: addvl sp, sp, #4 341; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 342; CHECK-NEXT: ret 343 %res = call <vscale x 8 x i32> @llvm.vector.splice.nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, i32 2) 344 ret <vscale x 8 x i32> %res 345} 346 347; Verify splitvec type legalisation works as expected. 348define <vscale x 16 x float> @splice_nxv16f32_16(<vscale x 16 x float> %a, <vscale x 16 x float> %b) vscale_range(2,16) #0 { 349; CHECK-LABEL: splice_nxv16f32_16: 350; CHECK: // %bb.0: 351; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 352; CHECK-NEXT: addvl sp, sp, #-8 353; CHECK-NEXT: rdvl x8, #1 354; CHECK-NEXT: mov w9, #16 // =0x10 355; CHECK-NEXT: ptrue p0.s 356; CHECK-NEXT: sub x8, x8, #1 357; CHECK-NEXT: cmp x8, #16 358; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl] 359; CHECK-NEXT: csel x8, x8, x9, lo 360; CHECK-NEXT: mov x9, sp 361; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl] 362; CHECK-NEXT: add x10, x9, x8, lsl #2 363; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl] 364; CHECK-NEXT: st1w { z0.s }, p0, [sp] 365; CHECK-NEXT: st1w { z7.s }, p0, [sp, #7, mul vl] 366; CHECK-NEXT: st1w { z4.s }, p0, [sp, #4, mul vl] 367; CHECK-NEXT: st1w { z5.s }, p0, [sp, #5, mul vl] 368; CHECK-NEXT: st1w { z6.s }, p0, [sp, #6, mul vl] 369; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9, x8, lsl #2] 370; CHECK-NEXT: ld1w { z1.s }, p0/z, [x10, #1, mul vl] 371; CHECK-NEXT: ld1w { z2.s }, p0/z, [x10, #2, mul vl] 372; CHECK-NEXT: ld1w { z3.s }, p0/z, [x10, #3, mul vl] 373; CHECK-NEXT: addvl sp, sp, #8 374; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 375; CHECK-NEXT: ret 376 %res = call <vscale x 16 x float> @llvm.vector.splice.nxv16f32(<vscale x 16 x float> %a, <vscale x 16 x float> %b, i32 16) 377 ret <vscale x 16 x float> %res 378} 379 380; 381; VECTOR_SPLICE (trailing elements) 382; 383 384define <vscale x 16 x i8> @splice_nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 { 385; CHECK-LABEL: splice_nxv16i8: 386; CHECK: // %bb.0: 387; CHECK-NEXT: ptrue p0.b, vl16 388; CHECK-NEXT: rev p0.b, p0.b 389; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b 390; CHECK-NEXT: ret 391 %res = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 -16) 392 ret <vscale x 16 x i8> %res 393} 394 395define <vscale x 16 x i8> @splice_nxv16i8_neg32(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) vscale_range(2,16) #0 { 396; CHECK-LABEL: splice_nxv16i8_neg32: 397; CHECK: // %bb.0: 398; CHECK-NEXT: ptrue p0.b, vl32 399; CHECK-NEXT: rev p0.b, p0.b 400; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b 401; CHECK-NEXT: ret 402 %res = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 -32) 403 ret <vscale x 16 x i8> %res 404} 405 406define <vscale x 16 x i8> @splice_nxv16i8_neg64(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) vscale_range(4,16) #0 { 407; CHECK-LABEL: splice_nxv16i8_neg64: 408; CHECK: // %bb.0: 409; CHECK-NEXT: ptrue p0.b, vl64 410; CHECK-NEXT: rev p0.b, p0.b 411; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b 412; CHECK-NEXT: ret 413 %res = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 -64) 414 ret <vscale x 16 x i8> %res 415} 416 417define <vscale x 16 x i8> @splice_nxv16i8_neg128(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) vscale_range(8,16) #0 { 418; CHECK-LABEL: splice_nxv16i8_neg128: 419; CHECK: // %bb.0: 420; CHECK-NEXT: ptrue p0.b, vl128 421; CHECK-NEXT: rev p0.b, p0.b 422; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b 423; CHECK-NEXT: ret 424 %res = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 -128) 425 ret <vscale x 16 x i8> %res 426} 427 428define <vscale x 16 x i8> @splice_nxv16i8_neg256(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) vscale_range(16,16) #0 { 429; CHECK-LABEL: splice_nxv16i8_neg256: 430; CHECK: // %bb.0: 431; CHECK-NEXT: ptrue p0.b, vl256 432; CHECK-NEXT: rev p0.b, p0.b 433; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b 434; CHECK-NEXT: ret 435 %res = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 -256) 436 ret <vscale x 16 x i8> %res 437} 438 439define <vscale x 16 x i8> @splice_nxv16i8_1(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 { 440; CHECK-LABEL: splice_nxv16i8_1: 441; CHECK: // %bb.0: 442; CHECK-NEXT: ptrue p0.b, vl1 443; CHECK-NEXT: rev p0.b, p0.b 444; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b 445; CHECK-NEXT: ret 446 %res = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 -1) 447 ret <vscale x 16 x i8> %res 448} 449 450define <vscale x 16 x i8> @splice_nxv16i8_neg17(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) vscale_range(2,16) #0 { 451; CHECK-LABEL: splice_nxv16i8_neg17: 452; CHECK: // %bb.0: 453; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 454; CHECK-NEXT: addvl sp, sp, #-2 455; CHECK-NEXT: rdvl x8, #1 456; CHECK-NEXT: ptrue p0.b 457; CHECK-NEXT: mov w9, #17 // =0x11 458; CHECK-NEXT: cmp x8, #17 459; CHECK-NEXT: mov x10, sp 460; CHECK-NEXT: csel x9, x8, x9, lo 461; CHECK-NEXT: add x8, x10, x8 462; CHECK-NEXT: st1b { z0.b }, p0, [sp] 463; CHECK-NEXT: st1b { z1.b }, p0, [sp, #1, mul vl] 464; CHECK-NEXT: sub x8, x8, x9 465; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8] 466; CHECK-NEXT: addvl sp, sp, #2 467; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 468; CHECK-NEXT: ret 469 %res = call <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i32 -17) 470 ret <vscale x 16 x i8> %res 471} 472 473define <vscale x 8 x i16> @splice_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 { 474; CHECK-LABEL: splice_nxv8i16: 475; CHECK: // %bb.0: 476; CHECK-NEXT: ptrue p0.h, vl8 477; CHECK-NEXT: rev p0.h, p0.h 478; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h 479; CHECK-NEXT: ret 480 %res = call <vscale x 8 x i16> @llvm.vector.splice.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, i32 -8) 481 ret <vscale x 8 x i16> %res 482} 483 484define <vscale x 8 x i16> @splice_nxv8i16_1(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 { 485; CHECK-LABEL: splice_nxv8i16_1: 486; CHECK: // %bb.0: 487; CHECK-NEXT: ptrue p0.h, vl1 488; CHECK-NEXT: rev p0.h, p0.h 489; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h 490; CHECK-NEXT: ret 491 %res = call <vscale x 8 x i16> @llvm.vector.splice.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, i32 -1) 492 ret <vscale x 8 x i16> %res 493} 494 495define <vscale x 8 x i16> @splice_nxv8i16_neg9(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) vscale_range(2,16) #0 { 496; CHECK-LABEL: splice_nxv8i16_neg9: 497; CHECK: // %bb.0: 498; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 499; CHECK-NEXT: addvl sp, sp, #-2 500; CHECK-NEXT: rdvl x8, #1 501; CHECK-NEXT: ptrue p0.h 502; CHECK-NEXT: mov w9, #18 // =0x12 503; CHECK-NEXT: cmp x8, #18 504; CHECK-NEXT: mov x10, sp 505; CHECK-NEXT: csel x9, x8, x9, lo 506; CHECK-NEXT: add x8, x10, x8 507; CHECK-NEXT: st1h { z0.h }, p0, [sp] 508; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl] 509; CHECK-NEXT: sub x8, x8, x9 510; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8] 511; CHECK-NEXT: addvl sp, sp, #2 512; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 513; CHECK-NEXT: ret 514 %res = call <vscale x 8 x i16> @llvm.vector.splice.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, i32 -9) 515 ret <vscale x 8 x i16> %res 516} 517 518define <vscale x 4 x i32> @splice_nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { 519; CHECK-LABEL: splice_nxv4i32: 520; CHECK: // %bb.0: 521; CHECK-NEXT: ptrue p0.s, vl4 522; CHECK-NEXT: rev p0.s, p0.s 523; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s 524; CHECK-NEXT: ret 525 %res = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i32 -4) 526 ret <vscale x 4 x i32> %res 527} 528 529define <vscale x 4 x i32> @splice_nxv4i32_1(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { 530; CHECK-LABEL: splice_nxv4i32_1: 531; CHECK: // %bb.0: 532; CHECK-NEXT: ptrue p0.s, vl1 533; CHECK-NEXT: rev p0.s, p0.s 534; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s 535; CHECK-NEXT: ret 536 %res = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i32 -1) 537 ret <vscale x 4 x i32> %res 538} 539 540define <vscale x 4 x i32> @splice_nxv4i32_neg5(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) vscale_range(2,16) #0 { 541; CHECK-LABEL: splice_nxv4i32_neg5: 542; CHECK: // %bb.0: 543; CHECK-NEXT: ptrue p0.s, vl5 544; CHECK-NEXT: rev p0.s, p0.s 545; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s 546; CHECK-NEXT: ret 547 %res = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i32 -5) 548 ret <vscale x 4 x i32> %res 549} 550 551define <vscale x 2 x i64> @splice_nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 { 552; CHECK-LABEL: splice_nxv2i64: 553; CHECK: // %bb.0: 554; CHECK-NEXT: ptrue p0.d, vl2 555; CHECK-NEXT: rev p0.d, p0.d 556; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d 557; CHECK-NEXT: ret 558 %res = call <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, i32 -2) 559 ret <vscale x 2 x i64> %res 560} 561 562define <vscale x 2 x i64> @splice_nxv2i64_1(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 { 563; CHECK-LABEL: splice_nxv2i64_1: 564; CHECK: // %bb.0: 565; CHECK-NEXT: ptrue p0.d, vl1 566; CHECK-NEXT: rev p0.d, p0.d 567; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d 568; CHECK-NEXT: ret 569 %res = call <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, i32 -1) 570 ret <vscale x 2 x i64> %res 571} 572 573define <vscale x 2 x i64> @splice_nxv2i64_neg3(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) vscale_range(2,16) #0 { 574; CHECK-LABEL: splice_nxv2i64_neg3: 575; CHECK: // %bb.0: 576; CHECK-NEXT: ptrue p0.d, vl3 577; CHECK-NEXT: rev p0.d, p0.d 578; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d 579; CHECK-NEXT: ret 580 %res = call <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, i32 -3) 581 ret <vscale x 2 x i64> %res 582} 583 584define <vscale x 8 x half> @splice_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 { 585; CHECK-LABEL: splice_nxv8f16: 586; CHECK: // %bb.0: 587; CHECK-NEXT: ptrue p0.h, vl8 588; CHECK-NEXT: rev p0.h, p0.h 589; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h 590; CHECK-NEXT: ret 591 %res = call <vscale x 8 x half> @llvm.vector.splice.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i32 -8) 592 ret <vscale x 8 x half> %res 593} 594 595define <vscale x 8 x half> @splice_nxv8f16_1(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 { 596; CHECK-LABEL: splice_nxv8f16_1: 597; CHECK: // %bb.0: 598; CHECK-NEXT: ptrue p0.h, vl1 599; CHECK-NEXT: rev p0.h, p0.h 600; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h 601; CHECK-NEXT: ret 602 %res = call <vscale x 8 x half> @llvm.vector.splice.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i32 -1) 603 ret <vscale x 8 x half> %res 604} 605 606define <vscale x 8 x half> @splice_nxv8f16_neg9(<vscale x 8 x half> %a, <vscale x 8 x half> %b) vscale_range(2,16) #0 { 607; CHECK-LABEL: splice_nxv8f16_neg9: 608; CHECK: // %bb.0: 609; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 610; CHECK-NEXT: addvl sp, sp, #-2 611; CHECK-NEXT: rdvl x8, #1 612; CHECK-NEXT: ptrue p0.h 613; CHECK-NEXT: mov w9, #18 // =0x12 614; CHECK-NEXT: cmp x8, #18 615; CHECK-NEXT: mov x10, sp 616; CHECK-NEXT: csel x9, x8, x9, lo 617; CHECK-NEXT: add x8, x10, x8 618; CHECK-NEXT: st1h { z0.h }, p0, [sp] 619; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl] 620; CHECK-NEXT: sub x8, x8, x9 621; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8] 622; CHECK-NEXT: addvl sp, sp, #2 623; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 624; CHECK-NEXT: ret 625 %res = call <vscale x 8 x half> @llvm.vector.splice.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i32 -9) 626 ret <vscale x 8 x half> %res 627} 628 629define <vscale x 4 x float> @splice_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 { 630; CHECK-LABEL: splice_nxv4f32: 631; CHECK: // %bb.0: 632; CHECK-NEXT: ptrue p0.s, vl4 633; CHECK-NEXT: rev p0.s, p0.s 634; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s 635; CHECK-NEXT: ret 636 %res = call <vscale x 4 x float> @llvm.vector.splice.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i32 -4) 637 ret <vscale x 4 x float> %res 638} 639 640define <vscale x 4 x float> @splice_nxv4f32_1(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 { 641; CHECK-LABEL: splice_nxv4f32_1: 642; CHECK: // %bb.0: 643; CHECK-NEXT: ptrue p0.s, vl1 644; CHECK-NEXT: rev p0.s, p0.s 645; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s 646; CHECK-NEXT: ret 647 %res = call <vscale x 4 x float> @llvm.vector.splice.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i32 -1) 648 ret <vscale x 4 x float> %res 649} 650 651define <vscale x 4 x float> @splice_nxv4f32_neg5(<vscale x 4 x float> %a, <vscale x 4 x float> %b) vscale_range(2,16) #0 { 652; CHECK-LABEL: splice_nxv4f32_neg5: 653; CHECK: // %bb.0: 654; CHECK-NEXT: ptrue p0.s, vl5 655; CHECK-NEXT: rev p0.s, p0.s 656; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s 657; CHECK-NEXT: ret 658 %res = call <vscale x 4 x float> @llvm.vector.splice.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i32 -5) 659 ret <vscale x 4 x float> %res 660} 661 662define <vscale x 2 x double> @splice_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 { 663; CHECK-LABEL: splice_nxv2f64: 664; CHECK: // %bb.0: 665; CHECK-NEXT: ptrue p0.d, vl2 666; CHECK-NEXT: rev p0.d, p0.d 667; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d 668; CHECK-NEXT: ret 669 %res = call <vscale x 2 x double> @llvm.vector.splice.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i32 -2) 670 ret <vscale x 2 x double> %res 671} 672 673define <vscale x 2 x double> @splice_nxv2f64_1(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 { 674; CHECK-LABEL: splice_nxv2f64_1: 675; CHECK: // %bb.0: 676; CHECK-NEXT: ptrue p0.d, vl1 677; CHECK-NEXT: rev p0.d, p0.d 678; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d 679; CHECK-NEXT: ret 680 %res = call <vscale x 2 x double> @llvm.vector.splice.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i32 -1) 681 ret <vscale x 2 x double> %res 682} 683 684define <vscale x 2 x double> @splice_nxv2f64_neg3(<vscale x 2 x double> %a, <vscale x 2 x double> %b) vscale_range(2,16) #0 { 685; CHECK-LABEL: splice_nxv2f64_neg3: 686; CHECK: // %bb.0: 687; CHECK-NEXT: ptrue p0.d, vl3 688; CHECK-NEXT: rev p0.d, p0.d 689; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d 690; CHECK-NEXT: ret 691 %res = call <vscale x 2 x double> @llvm.vector.splice.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i32 -3) 692 ret <vscale x 2 x double> %res 693} 694 695define <vscale x 2 x bfloat> @splice_nxv2bf16_neg_idx(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) #0 { 696; CHECK-LABEL: splice_nxv2bf16_neg_idx: 697; CHECK: // %bb.0: 698; CHECK-NEXT: ptrue p0.d, vl1 699; CHECK-NEXT: rev p0.d, p0.d 700; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d 701; CHECK-NEXT: ret 702 %res = call <vscale x 2 x bfloat> @llvm.vector.splice.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b, i32 -1) 703 ret <vscale x 2 x bfloat> %res 704} 705 706define <vscale x 2 x bfloat> @splice_nxv2bf16_neg2_idx(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) #0 { 707; CHECK-LABEL: splice_nxv2bf16_neg2_idx: 708; CHECK: // %bb.0: 709; CHECK-NEXT: ptrue p0.d, vl2 710; CHECK-NEXT: rev p0.d, p0.d 711; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d 712; CHECK-NEXT: ret 713 %res = call <vscale x 2 x bfloat> @llvm.vector.splice.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b, i32 -2) 714 ret <vscale x 2 x bfloat> %res 715} 716 717define <vscale x 2 x bfloat> @splice_nxv2bf16_first_idx(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) #0 { 718; CHECK-LABEL: splice_nxv2bf16_first_idx: 719; CHECK: // %bb.0: 720; CHECK-NEXT: ext z0.b, z0.b, z1.b, #8 721; CHECK-NEXT: ret 722 %res = call <vscale x 2 x bfloat> @llvm.vector.splice.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b, i32 1) 723 ret <vscale x 2 x bfloat> %res 724} 725 726define <vscale x 2 x bfloat> @splice_nxv2bf16_last_idx(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) vscale_range(16,16) #0 { 727; CHECK-LABEL: splice_nxv2bf16_last_idx: 728; CHECK: // %bb.0: 729; CHECK-NEXT: ext z0.b, z0.b, z1.b, #248 730; CHECK-NEXT: ret 731 %res = call <vscale x 2 x bfloat> @llvm.vector.splice.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b, i32 31) 732 ret <vscale x 2 x bfloat> %res 733} 734 735define <vscale x 4 x bfloat> @splice_nxv4bf16_neg_idx(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) #0 { 736; CHECK-LABEL: splice_nxv4bf16_neg_idx: 737; CHECK: // %bb.0: 738; CHECK-NEXT: ptrue p0.s, vl1 739; CHECK-NEXT: rev p0.s, p0.s 740; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s 741; CHECK-NEXT: ret 742 %res = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b, i32 -1) 743 ret <vscale x 4 x bfloat> %res 744} 745 746define <vscale x 4 x bfloat> @splice_nxv4bf16_neg3_idx(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) #0 { 747; CHECK-LABEL: splice_nxv4bf16_neg3_idx: 748; CHECK: // %bb.0: 749; CHECK-NEXT: ptrue p0.s, vl3 750; CHECK-NEXT: rev p0.s, p0.s 751; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s 752; CHECK-NEXT: ret 753 %res = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b, i32 -3) 754 ret <vscale x 4 x bfloat> %res 755} 756 757define <vscale x 4 x bfloat> @splice_nxv4bf16_first_idx(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) #0 { 758; CHECK-LABEL: splice_nxv4bf16_first_idx: 759; CHECK: // %bb.0: 760; CHECK-NEXT: ext z0.b, z0.b, z1.b, #4 761; CHECK-NEXT: ret 762 %res = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b, i32 1) 763 ret <vscale x 4 x bfloat> %res 764} 765 766define <vscale x 4 x bfloat> @splice_nxv4bf16_last_idx(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) vscale_range(16,16) #0 { 767; CHECK-LABEL: splice_nxv4bf16_last_idx: 768; CHECK: // %bb.0: 769; CHECK-NEXT: ext z0.b, z0.b, z1.b, #252 770; CHECK-NEXT: ret 771 %res = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b, i32 63) 772 ret <vscale x 4 x bfloat> %res 773} 774 775define <vscale x 8 x bfloat> @splice_nxv8bf16_first_idx(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 { 776; CHECK-LABEL: splice_nxv8bf16_first_idx: 777; CHECK: // %bb.0: 778; CHECK-NEXT: ext z0.b, z0.b, z1.b, #2 779; CHECK-NEXT: ret 780 %res = call <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, i32 1) 781 ret <vscale x 8 x bfloat> %res 782} 783 784define <vscale x 8 x bfloat> @splice_nxv8bf16_last_idx(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) vscale_range(16,16) #0 { 785; CHECK-LABEL: splice_nxv8bf16_last_idx: 786; CHECK: // %bb.0: 787; CHECK-NEXT: ext z0.b, z0.b, z1.b, #254 788; CHECK-NEXT: ret 789 %res = call <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, i32 127) 790 ret <vscale x 8 x bfloat> %res 791} 792 793; Ensure predicate based splice is promoted to use ZPRs. 794define <vscale x 2 x i1> @splice_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) #0 { 795; CHECK-LABEL: splice_nxv2i1: 796; CHECK: // %bb.0: 797; CHECK-NEXT: ptrue p2.d, vl1 798; CHECK-NEXT: mov z0.d, p1/z, #1 // =0x1 799; CHECK-NEXT: mov z1.d, p0/z, #1 // =0x1 800; CHECK-NEXT: rev p0.d, p2.d 801; CHECK-NEXT: splice z1.d, p0, z1.d, z0.d 802; CHECK-NEXT: ptrue p0.d 803; CHECK-NEXT: and z1.d, z1.d, #0x1 804; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 805; CHECK-NEXT: ret 806 %res = call <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b, i32 -1) 807 ret <vscale x 2 x i1> %res 808} 809 810; Ensure predicate based splice is promoted to use ZPRs. 811define <vscale x 4 x i1> @splice_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) #0 { 812; CHECK-LABEL: splice_nxv4i1: 813; CHECK: // %bb.0: 814; CHECK-NEXT: ptrue p2.s, vl1 815; CHECK-NEXT: mov z0.s, p1/z, #1 // =0x1 816; CHECK-NEXT: mov z1.s, p0/z, #1 // =0x1 817; CHECK-NEXT: rev p0.s, p2.s 818; CHECK-NEXT: splice z1.s, p0, z1.s, z0.s 819; CHECK-NEXT: ptrue p0.s 820; CHECK-NEXT: and z1.s, z1.s, #0x1 821; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0 822; CHECK-NEXT: ret 823 %res = call <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b, i32 -1) 824 ret <vscale x 4 x i1> %res 825} 826 827; Ensure predicate based splice is promoted to use ZPRs. 828define <vscale x 8 x i1> @splice_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) #0 { 829; CHECK-LABEL: splice_nxv8i1: 830; CHECK: // %bb.0: 831; CHECK-NEXT: ptrue p2.h, vl1 832; CHECK-NEXT: mov z0.h, p1/z, #1 // =0x1 833; CHECK-NEXT: mov z1.h, p0/z, #1 // =0x1 834; CHECK-NEXT: rev p0.h, p2.h 835; CHECK-NEXT: splice z1.h, p0, z1.h, z0.h 836; CHECK-NEXT: ptrue p0.h 837; CHECK-NEXT: and z1.h, z1.h, #0x1 838; CHECK-NEXT: cmpne p0.h, p0/z, z1.h, #0 839; CHECK-NEXT: ret 840 %res = call <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b, i32 -1) 841 ret <vscale x 8 x i1> %res 842} 843 844; Ensure predicate based splice is promoted to use ZPRs. 845define <vscale x 16 x i1> @splice_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) #0 { 846; CHECK-LABEL: splice_nxv16i1: 847; CHECK: // %bb.0: 848; CHECK-NEXT: ptrue p2.b, vl1 849; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1 850; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1 851; CHECK-NEXT: rev p0.b, p2.b 852; CHECK-NEXT: splice z1.b, p0, z1.b, z0.b 853; CHECK-NEXT: ptrue p0.b 854; CHECK-NEXT: and z1.b, z1.b, #0x1 855; CHECK-NEXT: cmpne p0.b, p0/z, z1.b, #0 856; CHECK-NEXT: ret 857 %res = call <vscale x 16 x i1> @llvm.vector.splice.nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b, i32 -1) 858 ret <vscale x 16 x i1> %res 859} 860 861; Verify promote type legalisation works as expected. 862define <vscale x 2 x i8> @splice_nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b) #0 { 863; CHECK-LABEL: splice_nxv2i8: 864; CHECK: // %bb.0: 865; CHECK-NEXT: ptrue p0.d, vl2 866; CHECK-NEXT: rev p0.d, p0.d 867; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d 868; CHECK-NEXT: ret 869 %res = call <vscale x 2 x i8> @llvm.vector.splice.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, i32 -2) 870 ret <vscale x 2 x i8> %res 871} 872 873; Verify splitvec type legalisation works as expected. 874define <vscale x 8 x i32> @splice_nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b) #0 { 875; CHECK-LABEL: splice_nxv8i32: 876; CHECK: // %bb.0: 877; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 878; CHECK-NEXT: addvl sp, sp, #-4 879; CHECK-NEXT: ptrue p0.s 880; CHECK-NEXT: rdvl x8, #2 881; CHECK-NEXT: mov x9, sp 882; CHECK-NEXT: add x8, x9, x8 883; CHECK-NEXT: mov x9, #-8 // =0xfffffffffffffff8 884; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl] 885; CHECK-NEXT: sub x10, x8, #32 886; CHECK-NEXT: st1w { z0.s }, p0, [sp] 887; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl] 888; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl] 889; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2] 890; CHECK-NEXT: ld1w { z1.s }, p0/z, [x10, #1, mul vl] 891; CHECK-NEXT: addvl sp, sp, #4 892; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 893; CHECK-NEXT: ret 894 %res = call <vscale x 8 x i32> @llvm.vector.splice.nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, i32 -8) 895 ret <vscale x 8 x i32> %res 896} 897 898; Verify splitvec type legalisation works as expected. 899define <vscale x 16 x float> @splice_nxv16f32_neg17(<vscale x 16 x float> %a, <vscale x 16 x float> %b) vscale_range(2,16) #0 { 900; CHECK-LABEL: splice_nxv16f32_neg17: 901; CHECK: // %bb.0: 902; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 903; CHECK-NEXT: addvl sp, sp, #-8 904; CHECK-NEXT: rdvl x8, #4 905; CHECK-NEXT: ptrue p0.s 906; CHECK-NEXT: mov w9, #68 // =0x44 907; CHECK-NEXT: cmp x8, #68 908; CHECK-NEXT: mov x10, sp 909; CHECK-NEXT: csel x9, x8, x9, lo 910; CHECK-NEXT: add x8, x10, x8 911; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl] 912; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl] 913; CHECK-NEXT: sub x8, x8, x9 914; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl] 915; CHECK-NEXT: st1w { z0.s }, p0, [sp] 916; CHECK-NEXT: st1w { z7.s }, p0, [sp, #7, mul vl] 917; CHECK-NEXT: st1w { z4.s }, p0, [sp, #4, mul vl] 918; CHECK-NEXT: st1w { z5.s }, p0, [sp, #5, mul vl] 919; CHECK-NEXT: st1w { z6.s }, p0, [sp, #6, mul vl] 920; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8] 921; CHECK-NEXT: ld1w { z1.s }, p0/z, [x8, #1, mul vl] 922; CHECK-NEXT: ld1w { z2.s }, p0/z, [x8, #2, mul vl] 923; CHECK-NEXT: ld1w { z3.s }, p0/z, [x8, #3, mul vl] 924; CHECK-NEXT: addvl sp, sp, #8 925; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 926; CHECK-NEXT: ret 927 %res = call <vscale x 16 x float> @llvm.vector.splice.nxv16f32(<vscale x 16 x float> %a, <vscale x 16 x float> %b, i32 -17) 928 ret <vscale x 16 x float> %res 929} 930 931declare <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>, i32) 932declare <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>, i32) 933declare <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>, i32) 934declare <vscale x 16 x i1> @llvm.vector.splice.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, i32) 935 936declare <vscale x 2 x i8> @llvm.vector.splice.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>, i32) 937declare <vscale x 16 x i8> @llvm.vector.splice.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32) 938declare <vscale x 8 x i16> @llvm.vector.splice.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32) 939declare <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32) 940declare <vscale x 8 x i32> @llvm.vector.splice.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, i32) 941declare <vscale x 2 x i64> @llvm.vector.splice.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32) 942 943declare <vscale x 2 x half> @llvm.vector.splice.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>, i32) 944declare <vscale x 4 x half> @llvm.vector.splice.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>, i32) 945declare <vscale x 8 x half> @llvm.vector.splice.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, i32) 946declare <vscale x 2 x float> @llvm.vector.splice.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, i32) 947declare <vscale x 4 x float> @llvm.vector.splice.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, i32) 948declare <vscale x 16 x float> @llvm.vector.splice.nxv16f32(<vscale x 16 x float>, <vscale x 16 x float>, i32) 949declare <vscale x 2 x double> @llvm.vector.splice.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, i32) 950 951declare <vscale x 2 x bfloat> @llvm.vector.splice.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, i32) 952declare <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, i32) 953declare <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32) 954 955attributes #0 = { nounwind "target-features"="+sve" } 956