1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK 3; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK 4; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE 5 6target triple = "aarch64-unknown-linux-gnu" 7 8; 9; i8 10; 11 12define <8 x i8> @concat_v8i8(<4 x i8> %op1, <4 x i8> %op2) { 13; CHECK-LABEL: concat_v8i8: 14; CHECK: // %bb.0: 15; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 16; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 17; CHECK-NEXT: mov z2.h, z1.h[3] 18; CHECK-NEXT: mov z3.h, z1.h[2] 19; CHECK-NEXT: mov z4.h, z1.h[1] 20; CHECK-NEXT: mov z5.h, z0.h[3] 21; CHECK-NEXT: mov z6.h, z0.h[2] 22; CHECK-NEXT: mov z7.h, z0.h[1] 23; CHECK-NEXT: zip1 z2.b, z3.b, z2.b 24; CHECK-NEXT: zip1 z1.b, z1.b, z4.b 25; CHECK-NEXT: zip1 z3.b, z6.b, z5.b 26; CHECK-NEXT: zip1 z0.b, z0.b, z7.b 27; CHECK-NEXT: zip1 z1.h, z1.h, z2.h 28; CHECK-NEXT: zip1 z0.h, z0.h, z3.h 29; CHECK-NEXT: zip1 z0.s, z0.s, z1.s 30; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 31; CHECK-NEXT: ret 32; 33; NONEON-NOSVE-LABEL: concat_v8i8: 34; NONEON-NOSVE: // %bb.0: 35; NONEON-NOSVE-NEXT: sub sp, sp, #32 36; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 37; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] 38; NONEON-NOSVE-NEXT: ldrh w8, [sp, #22] 39; NONEON-NOSVE-NEXT: strb w8, [sp, #31] 40; NONEON-NOSVE-NEXT: ldrh w8, [sp, #20] 41; NONEON-NOSVE-NEXT: strb w8, [sp, #30] 42; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] 43; NONEON-NOSVE-NEXT: strb w8, [sp, #29] 44; NONEON-NOSVE-NEXT: ldrh w8, [sp, #16] 45; NONEON-NOSVE-NEXT: strb w8, [sp, #28] 46; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] 47; NONEON-NOSVE-NEXT: strb w8, [sp, #27] 48; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] 49; NONEON-NOSVE-NEXT: strb w8, [sp, #26] 50; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] 51; NONEON-NOSVE-NEXT: strb w8, [sp, #25] 52; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8] 53; NONEON-NOSVE-NEXT: strb w8, [sp, #24] 54; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] 55; NONEON-NOSVE-NEXT: add sp, sp, #32 56; NONEON-NOSVE-NEXT: ret 57 %res = shufflevector <4 x i8> %op1, <4 x i8> %op2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 58 ret <8 x i8> %res 59} 60 61define <16 x i8> @concat_v16i8(<8 x i8> %op1, <8 x i8> %op2) { 62; CHECK-LABEL: concat_v16i8: 63; CHECK: // %bb.0: 64; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1 65; CHECK-NEXT: ptrue p0.b, vl8 66; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1 67; CHECK-NEXT: splice z0.b, p0, { z0.b, z1.b } 68; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 69; CHECK-NEXT: ret 70; 71; NONEON-NOSVE-LABEL: concat_v16i8: 72; NONEON-NOSVE: // %bb.0: 73; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #-16]! 74; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 75; NONEON-NOSVE-NEXT: ldr q0, [sp], #16 76; NONEON-NOSVE-NEXT: ret 77 %res = shufflevector <8 x i8> %op1, <8 x i8> %op2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, 78 i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 79 ret <16 x i8> %res 80} 81 82define void @concat_v32i8(ptr %a, ptr %b, ptr %c) { 83; CHECK-LABEL: concat_v32i8: 84; CHECK: // %bb.0: 85; CHECK-NEXT: ldr q0, [x1] 86; CHECK-NEXT: ldr q1, [x0] 87; CHECK-NEXT: stp q1, q0, [x2] 88; CHECK-NEXT: ret 89; 90; NONEON-NOSVE-LABEL: concat_v32i8: 91; NONEON-NOSVE: // %bb.0: 92; NONEON-NOSVE-NEXT: ldr q0, [x1] 93; NONEON-NOSVE-NEXT: ldr q1, [x0] 94; NONEON-NOSVE-NEXT: stp q1, q0, [x2] 95; NONEON-NOSVE-NEXT: ret 96 %op1 = load <16 x i8>, ptr %a 97 %op2 = load <16 x i8>, ptr %b 98 %res = shufflevector <16 x i8> %op1, <16 x i8> %op2, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, 99 i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, 100 i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, 101 i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 102 store <32 x i8> %res, ptr %c 103 ret void 104} 105 106define void @concat_v64i8(ptr %a, ptr %b, ptr %c) { 107; CHECK-LABEL: concat_v64i8: 108; CHECK: // %bb.0: 109; CHECK-NEXT: ldp q0, q1, [x1] 110; CHECK-NEXT: ldp q3, q2, [x0] 111; CHECK-NEXT: stp q0, q1, [x2, #32] 112; CHECK-NEXT: stp q3, q2, [x2] 113; CHECK-NEXT: ret 114; 115; NONEON-NOSVE-LABEL: concat_v64i8: 116; NONEON-NOSVE: // %bb.0: 117; NONEON-NOSVE-NEXT: ldp q0, q1, [x1] 118; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] 119; NONEON-NOSVE-NEXT: stp q0, q1, [x2, #32] 120; NONEON-NOSVE-NEXT: stp q3, q2, [x2] 121; NONEON-NOSVE-NEXT: ret 122 %op1 = load <32 x i8>, ptr %a 123 %op2 = load <32 x i8>, ptr %b 124 %res = shufflevector <32 x i8> %op1, <32 x i8> %op2, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, 125 i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, 126 i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, 127 i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, 128 i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, 129 i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, 130 i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, 131 i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 132 store <64 x i8> %res, ptr %c 133 ret void 134} 135 136; 137; i16 138; 139 140define <4 x i16> @concat_v4i16(<2 x i16> %op1, <2 x i16> %op2) { 141; CHECK-LABEL: concat_v4i16: 142; CHECK: // %bb.0: 143; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 144; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 145; CHECK-NEXT: mov z2.s, z1.s[1] 146; CHECK-NEXT: mov z3.s, z0.s[1] 147; CHECK-NEXT: zip1 z1.h, z1.h, z2.h 148; CHECK-NEXT: zip1 z0.h, z0.h, z3.h 149; CHECK-NEXT: zip1 z0.s, z0.s, z1.s 150; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 151; CHECK-NEXT: ret 152; 153; NONEON-NOSVE-LABEL: concat_v4i16: 154; NONEON-NOSVE: // %bb.0: 155; NONEON-NOSVE-NEXT: sub sp, sp, #32 156; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 157; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] 158; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16] 159; NONEON-NOSVE-NEXT: strh w9, [sp, #30] 160; NONEON-NOSVE-NEXT: strh w8, [sp, #28] 161; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #8] 162; NONEON-NOSVE-NEXT: strh w9, [sp, #26] 163; NONEON-NOSVE-NEXT: strh w8, [sp, #24] 164; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] 165; NONEON-NOSVE-NEXT: add sp, sp, #32 166; NONEON-NOSVE-NEXT: ret 167 %res = shufflevector <2 x i16> %op1, <2 x i16> %op2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 168 ret <4 x i16> %res 169} 170 171; Don't use SVE for 128-bit vectors. 172define <8 x i16> @concat_v8i16(<4 x i16> %op1, <4 x i16> %op2) { 173; CHECK-LABEL: concat_v8i16: 174; CHECK: // %bb.0: 175; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1 176; CHECK-NEXT: ptrue p0.h, vl4 177; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1 178; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h } 179; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 180; CHECK-NEXT: ret 181; 182; NONEON-NOSVE-LABEL: concat_v8i16: 183; NONEON-NOSVE: // %bb.0: 184; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #-16]! 185; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 186; NONEON-NOSVE-NEXT: ldr q0, [sp], #16 187; NONEON-NOSVE-NEXT: ret 188 %res = shufflevector <4 x i16> %op1, <4 x i16> %op2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 189 ret <8 x i16> %res 190} 191 192define void @concat_v16i16(ptr %a, ptr %b, ptr %c) { 193; CHECK-LABEL: concat_v16i16: 194; CHECK: // %bb.0: 195; CHECK-NEXT: ldr q0, [x1] 196; CHECK-NEXT: ldr q1, [x0] 197; CHECK-NEXT: stp q1, q0, [x2] 198; CHECK-NEXT: ret 199; 200; NONEON-NOSVE-LABEL: concat_v16i16: 201; NONEON-NOSVE: // %bb.0: 202; NONEON-NOSVE-NEXT: ldr q0, [x1] 203; NONEON-NOSVE-NEXT: ldr q1, [x0] 204; NONEON-NOSVE-NEXT: stp q1, q0, [x2] 205; NONEON-NOSVE-NEXT: ret 206 %op1 = load <8 x i16>, ptr %a 207 %op2 = load <8 x i16>, ptr %b 208 %res = shufflevector <8 x i16> %op1, <8 x i16> %op2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, 209 i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 210 store <16 x i16> %res, ptr %c 211 ret void 212} 213 214define void @concat_v32i16(ptr %a, ptr %b, ptr %c) { 215; CHECK-LABEL: concat_v32i16: 216; CHECK: // %bb.0: 217; CHECK-NEXT: ldp q0, q1, [x1] 218; CHECK-NEXT: ldp q3, q2, [x0] 219; CHECK-NEXT: stp q0, q1, [x2, #32] 220; CHECK-NEXT: stp q3, q2, [x2] 221; CHECK-NEXT: ret 222; 223; NONEON-NOSVE-LABEL: concat_v32i16: 224; NONEON-NOSVE: // %bb.0: 225; NONEON-NOSVE-NEXT: ldp q0, q1, [x1] 226; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] 227; NONEON-NOSVE-NEXT: stp q0, q1, [x2, #32] 228; NONEON-NOSVE-NEXT: stp q3, q2, [x2] 229; NONEON-NOSVE-NEXT: ret 230 %op1 = load <16 x i16>, ptr %a 231 %op2 = load <16 x i16>, ptr %b 232 %res = shufflevector <16 x i16> %op1, <16 x i16> %op2, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, 233 i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, 234 i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, 235 i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 236 store <32 x i16> %res, ptr %c 237 ret void 238} 239 240; 241; i32 242; 243 244; Don't use SVE for 64-bit vectors. 245define <2 x i32> @concat_v2i32(<1 x i32> %op1, <1 x i32> %op2) { 246; CHECK-LABEL: concat_v2i32: 247; CHECK: // %bb.0: 248; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 249; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 250; CHECK-NEXT: zip1 z0.s, z0.s, z1.s 251; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 252; CHECK-NEXT: ret 253; 254; NONEON-NOSVE-LABEL: concat_v2i32: 255; NONEON-NOSVE: // %bb.0: 256; NONEON-NOSVE-NEXT: sub sp, sp, #32 257; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 258; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] 259; NONEON-NOSVE-NEXT: ldr w9, [sp, #16] 260; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] 261; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] 262; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] 263; NONEON-NOSVE-NEXT: add sp, sp, #32 264; NONEON-NOSVE-NEXT: ret 265 %res = shufflevector <1 x i32> %op1, <1 x i32> %op2, <2 x i32> <i32 0, i32 1> 266 ret <2 x i32> %res 267} 268 269; Don't use SVE for 128-bit vectors. 270define <4 x i32> @concat_v4i32(<2 x i32> %op1, <2 x i32> %op2) { 271; CHECK-LABEL: concat_v4i32: 272; CHECK: // %bb.0: 273; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1 274; CHECK-NEXT: ptrue p0.s, vl2 275; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1 276; CHECK-NEXT: splice z0.s, p0, { z0.s, z1.s } 277; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 278; CHECK-NEXT: ret 279; 280; NONEON-NOSVE-LABEL: concat_v4i32: 281; NONEON-NOSVE: // %bb.0: 282; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #-16]! 283; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 284; NONEON-NOSVE-NEXT: ldr q0, [sp], #16 285; NONEON-NOSVE-NEXT: ret 286 %res = shufflevector <2 x i32> %op1, <2 x i32> %op2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 287 ret <4 x i32> %res 288} 289 290define void @concat_v8i32(ptr %a, ptr %b, ptr %c) { 291; CHECK-LABEL: concat_v8i32: 292; CHECK: // %bb.0: 293; CHECK-NEXT: ldr q0, [x1] 294; CHECK-NEXT: ldr q1, [x0] 295; CHECK-NEXT: stp q1, q0, [x2] 296; CHECK-NEXT: ret 297; 298; NONEON-NOSVE-LABEL: concat_v8i32: 299; NONEON-NOSVE: // %bb.0: 300; NONEON-NOSVE-NEXT: ldr q0, [x1] 301; NONEON-NOSVE-NEXT: ldr q1, [x0] 302; NONEON-NOSVE-NEXT: stp q1, q0, [x2] 303; NONEON-NOSVE-NEXT: ret 304 %op1 = load <4 x i32>, ptr %a 305 %op2 = load <4 x i32>, ptr %b 306 %res = shufflevector <4 x i32> %op1, <4 x i32> %op2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 307 store <8 x i32> %res, ptr %c 308 ret void 309} 310 311define void @concat_v16i32(ptr %a, ptr %b, ptr %c) { 312; CHECK-LABEL: concat_v16i32: 313; CHECK: // %bb.0: 314; CHECK-NEXT: ldp q0, q1, [x1] 315; CHECK-NEXT: ldp q3, q2, [x0] 316; CHECK-NEXT: stp q0, q1, [x2, #32] 317; CHECK-NEXT: stp q3, q2, [x2] 318; CHECK-NEXT: ret 319; 320; NONEON-NOSVE-LABEL: concat_v16i32: 321; NONEON-NOSVE: // %bb.0: 322; NONEON-NOSVE-NEXT: ldp q0, q1, [x1] 323; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] 324; NONEON-NOSVE-NEXT: stp q0, q1, [x2, #32] 325; NONEON-NOSVE-NEXT: stp q3, q2, [x2] 326; NONEON-NOSVE-NEXT: ret 327 %op1 = load <8 x i32>, ptr %a 328 %op2 = load <8 x i32>, ptr %b 329 %res = shufflevector <8 x i32> %op1, <8 x i32> %op2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, 330 i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 331 store <16 x i32> %res, ptr %c 332 ret void 333} 334 335; 336; i64 337; 338 339; Don't use SVE for 128-bit vectors. 340define <2 x i64> @concat_v2i64(<1 x i64> %op1, <1 x i64> %op2) { 341; CHECK-LABEL: concat_v2i64: 342; CHECK: // %bb.0: 343; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1 344; CHECK-NEXT: ptrue p0.d, vl1 345; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1 346; CHECK-NEXT: splice z0.d, p0, { z0.d, z1.d } 347; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 348; CHECK-NEXT: ret 349; 350; NONEON-NOSVE-LABEL: concat_v2i64: 351; NONEON-NOSVE: // %bb.0: 352; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #-16]! 353; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 354; NONEON-NOSVE-NEXT: ldr q0, [sp], #16 355; NONEON-NOSVE-NEXT: ret 356 %res = shufflevector <1 x i64> %op1, <1 x i64> %op2, <2 x i32> <i32 0, i32 1> 357 ret <2 x i64> %res 358} 359 360define void @concat_v4i64(ptr %a, ptr %b, ptr %c) { 361; CHECK-LABEL: concat_v4i64: 362; CHECK: // %bb.0: 363; CHECK-NEXT: ldr q0, [x1] 364; CHECK-NEXT: ldr q1, [x0] 365; CHECK-NEXT: stp q1, q0, [x2] 366; CHECK-NEXT: ret 367; 368; NONEON-NOSVE-LABEL: concat_v4i64: 369; NONEON-NOSVE: // %bb.0: 370; NONEON-NOSVE-NEXT: ldr q0, [x1] 371; NONEON-NOSVE-NEXT: ldr q1, [x0] 372; NONEON-NOSVE-NEXT: stp q1, q0, [x2] 373; NONEON-NOSVE-NEXT: ret 374 %op1 = load <2 x i64>, ptr %a 375 %op2 = load <2 x i64>, ptr %b 376 %res = shufflevector <2 x i64> %op1, <2 x i64> %op2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 377 store <4 x i64> %res, ptr %c 378 ret void 379} 380 381define void @concat_v8i64(ptr %a, ptr %b, ptr %c) { 382; CHECK-LABEL: concat_v8i64: 383; CHECK: // %bb.0: 384; CHECK-NEXT: ldp q0, q1, [x1] 385; CHECK-NEXT: ldp q3, q2, [x0] 386; CHECK-NEXT: stp q0, q1, [x2, #32] 387; CHECK-NEXT: stp q3, q2, [x2] 388; CHECK-NEXT: ret 389; 390; NONEON-NOSVE-LABEL: concat_v8i64: 391; NONEON-NOSVE: // %bb.0: 392; NONEON-NOSVE-NEXT: ldp q0, q1, [x1] 393; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] 394; NONEON-NOSVE-NEXT: stp q0, q1, [x2, #32] 395; NONEON-NOSVE-NEXT: stp q3, q2, [x2] 396; NONEON-NOSVE-NEXT: ret 397 %op1 = load <4 x i64>, ptr %a 398 %op2 = load <4 x i64>, ptr %b 399 %res = shufflevector <4 x i64> %op1, <4 x i64> %op2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 400 store <8 x i64> %res, ptr %c 401 ret void 402} 403 404; 405; f16 406; 407 408define <4 x half> @concat_v4f16(<2 x half> %op1, <2 x half> %op2) { 409; CHECK-LABEL: concat_v4f16: 410; CHECK: // %bb.0: 411; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 412; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 413; CHECK-NEXT: zip1 z0.s, z0.s, z1.s 414; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 415; CHECK-NEXT: ret 416; 417; NONEON-NOSVE-LABEL: concat_v4f16: 418; NONEON-NOSVE: // %bb.0: 419; NONEON-NOSVE-NEXT: sub sp, sp, #32 420; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 421; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] 422; NONEON-NOSVE-NEXT: ldr w9, [sp, #16] 423; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] 424; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] 425; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] 426; NONEON-NOSVE-NEXT: add sp, sp, #32 427; NONEON-NOSVE-NEXT: ret 428 %res = shufflevector <2 x half> %op1, <2 x half> %op2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 429 ret <4 x half> %res 430} 431 432define <8 x half> @concat_v8f16(<4 x half> %op1, <4 x half> %op2) { 433; CHECK-LABEL: concat_v8f16: 434; CHECK: // %bb.0: 435; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1 436; CHECK-NEXT: ptrue p0.h, vl4 437; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1 438; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h } 439; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 440; CHECK-NEXT: ret 441; 442; NONEON-NOSVE-LABEL: concat_v8f16: 443; NONEON-NOSVE: // %bb.0: 444; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #-16]! 445; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 446; NONEON-NOSVE-NEXT: ldr q0, [sp], #16 447; NONEON-NOSVE-NEXT: ret 448 %res = shufflevector <4 x half> %op1, <4 x half> %op2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 449 ret <8 x half> %res 450} 451 452define void @concat_v16f16(ptr %a, ptr %b, ptr %c) { 453; CHECK-LABEL: concat_v16f16: 454; CHECK: // %bb.0: 455; CHECK-NEXT: ldr q0, [x1] 456; CHECK-NEXT: ldr q1, [x0] 457; CHECK-NEXT: stp q1, q0, [x2] 458; CHECK-NEXT: ret 459; 460; NONEON-NOSVE-LABEL: concat_v16f16: 461; NONEON-NOSVE: // %bb.0: 462; NONEON-NOSVE-NEXT: ldr q0, [x1] 463; NONEON-NOSVE-NEXT: ldr q1, [x0] 464; NONEON-NOSVE-NEXT: stp q1, q0, [x2] 465; NONEON-NOSVE-NEXT: ret 466 %op1 = load <8 x half>, ptr %a 467 %op2 = load <8 x half>, ptr %b 468 %res = shufflevector <8 x half> %op1, <8 x half> %op2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, 469 i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 470 store <16 x half> %res, ptr %c 471 ret void 472} 473 474define void @concat_v32f16(ptr %a, ptr %b, ptr %c) { 475; CHECK-LABEL: concat_v32f16: 476; CHECK: // %bb.0: 477; CHECK-NEXT: ldp q0, q1, [x1] 478; CHECK-NEXT: ldp q3, q2, [x0] 479; CHECK-NEXT: stp q0, q1, [x2, #32] 480; CHECK-NEXT: stp q3, q2, [x2] 481; CHECK-NEXT: ret 482; 483; NONEON-NOSVE-LABEL: concat_v32f16: 484; NONEON-NOSVE: // %bb.0: 485; NONEON-NOSVE-NEXT: ldp q0, q1, [x1] 486; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] 487; NONEON-NOSVE-NEXT: stp q0, q1, [x2, #32] 488; NONEON-NOSVE-NEXT: stp q3, q2, [x2] 489; NONEON-NOSVE-NEXT: ret 490 %op1 = load <16 x half>, ptr %a 491 %op2 = load <16 x half>, ptr %b 492 %res = shufflevector <16 x half> %op1, <16 x half> %op2, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, 493 i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, 494 i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, 495 i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 496 store <32 x half> %res, ptr %c 497 ret void 498} 499 500; 501; i32 502; 503 504; Don't use SVE for 64-bit vectors. 505define <2 x float> @concat_v2f32(<1 x float> %op1, <1 x float> %op2) { 506; CHECK-LABEL: concat_v2f32: 507; CHECK: // %bb.0: 508; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 509; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 510; CHECK-NEXT: zip1 z0.s, z0.s, z1.s 511; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 512; CHECK-NEXT: ret 513; 514; NONEON-NOSVE-LABEL: concat_v2f32: 515; NONEON-NOSVE: // %bb.0: 516; NONEON-NOSVE-NEXT: sub sp, sp, #32 517; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 518; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] 519; NONEON-NOSVE-NEXT: ldr w9, [sp, #16] 520; NONEON-NOSVE-NEXT: ldr w8, [sp, #8] 521; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] 522; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] 523; NONEON-NOSVE-NEXT: add sp, sp, #32 524; NONEON-NOSVE-NEXT: ret 525 %res = shufflevector <1 x float> %op1, <1 x float> %op2, <2 x i32> <i32 0, i32 1> 526 ret <2 x float> %res 527} 528 529; Don't use SVE for 128-bit vectors. 530define <4 x float> @concat_v4f32(<2 x float> %op1, <2 x float> %op2) { 531; CHECK-LABEL: concat_v4f32: 532; CHECK: // %bb.0: 533; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1 534; CHECK-NEXT: ptrue p0.s, vl2 535; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1 536; CHECK-NEXT: splice z0.s, p0, { z0.s, z1.s } 537; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 538; CHECK-NEXT: ret 539; 540; NONEON-NOSVE-LABEL: concat_v4f32: 541; NONEON-NOSVE: // %bb.0: 542; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #-16]! 543; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 544; NONEON-NOSVE-NEXT: ldr q0, [sp], #16 545; NONEON-NOSVE-NEXT: ret 546 %res = shufflevector <2 x float> %op1, <2 x float> %op2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 547 ret <4 x float> %res 548} 549 550define void @concat_v8f32(ptr %a, ptr %b, ptr %c) { 551; CHECK-LABEL: concat_v8f32: 552; CHECK: // %bb.0: 553; CHECK-NEXT: ldr q0, [x1] 554; CHECK-NEXT: ldr q1, [x0] 555; CHECK-NEXT: stp q1, q0, [x2] 556; CHECK-NEXT: ret 557; 558; NONEON-NOSVE-LABEL: concat_v8f32: 559; NONEON-NOSVE: // %bb.0: 560; NONEON-NOSVE-NEXT: ldr q0, [x1] 561; NONEON-NOSVE-NEXT: ldr q1, [x0] 562; NONEON-NOSVE-NEXT: stp q1, q0, [x2] 563; NONEON-NOSVE-NEXT: ret 564 %op1 = load <4 x float>, ptr %a 565 %op2 = load <4 x float>, ptr %b 566 %res = shufflevector <4 x float> %op1, <4 x float> %op2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 567 store <8 x float> %res, ptr %c 568 ret void 569} 570 571define void @concat_v16f32(ptr %a, ptr %b, ptr %c) { 572; CHECK-LABEL: concat_v16f32: 573; CHECK: // %bb.0: 574; CHECK-NEXT: ldp q0, q1, [x1] 575; CHECK-NEXT: ldp q3, q2, [x0] 576; CHECK-NEXT: stp q0, q1, [x2, #32] 577; CHECK-NEXT: stp q3, q2, [x2] 578; CHECK-NEXT: ret 579; 580; NONEON-NOSVE-LABEL: concat_v16f32: 581; NONEON-NOSVE: // %bb.0: 582; NONEON-NOSVE-NEXT: ldp q0, q1, [x1] 583; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] 584; NONEON-NOSVE-NEXT: stp q0, q1, [x2, #32] 585; NONEON-NOSVE-NEXT: stp q3, q2, [x2] 586; NONEON-NOSVE-NEXT: ret 587 %op1 = load <8 x float>, ptr %a 588 %op2 = load <8 x float>, ptr %b 589 %res = shufflevector <8 x float> %op1, <8 x float> %op2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, 590 i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 591 store <16 x float> %res, ptr %c 592 ret void 593} 594 595; 596; f64 597; 598 599; Don't use SVE for 128-bit vectors. 600define <2 x double> @concat_v2f64(<1 x double> %op1, <1 x double> %op2) { 601; CHECK-LABEL: concat_v2f64: 602; CHECK: // %bb.0: 603; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1 604; CHECK-NEXT: ptrue p0.d, vl1 605; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1 606; CHECK-NEXT: splice z0.d, p0, { z0.d, z1.d } 607; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 608; CHECK-NEXT: ret 609; 610; NONEON-NOSVE-LABEL: concat_v2f64: 611; NONEON-NOSVE: // %bb.0: 612; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #-16]! 613; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 614; NONEON-NOSVE-NEXT: ldr q0, [sp], #16 615; NONEON-NOSVE-NEXT: ret 616 %res = shufflevector <1 x double> %op1, <1 x double> %op2, <2 x i32> <i32 0, i32 1> 617 ret <2 x double> %res 618} 619 620define void @concat_v4f64(ptr %a, ptr %b, ptr %c) { 621; CHECK-LABEL: concat_v4f64: 622; CHECK: // %bb.0: 623; CHECK-NEXT: ldr q0, [x1] 624; CHECK-NEXT: ldr q1, [x0] 625; CHECK-NEXT: stp q1, q0, [x2] 626; CHECK-NEXT: ret 627; 628; NONEON-NOSVE-LABEL: concat_v4f64: 629; NONEON-NOSVE: // %bb.0: 630; NONEON-NOSVE-NEXT: ldr q0, [x1] 631; NONEON-NOSVE-NEXT: ldr q1, [x0] 632; NONEON-NOSVE-NEXT: stp q1, q0, [x2] 633; NONEON-NOSVE-NEXT: ret 634 %op1 = load <2 x double>, ptr %a 635 %op2 = load <2 x double>, ptr %b 636 %res = shufflevector <2 x double> %op1, <2 x double> %op2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 637 store <4 x double> %res, ptr %c 638 ret void 639} 640 641define void @concat_v8f64(ptr %a, ptr %b, ptr %c) { 642; CHECK-LABEL: concat_v8f64: 643; CHECK: // %bb.0: 644; CHECK-NEXT: ldp q0, q1, [x1] 645; CHECK-NEXT: ldp q3, q2, [x0] 646; CHECK-NEXT: stp q0, q1, [x2, #32] 647; CHECK-NEXT: stp q3, q2, [x2] 648; CHECK-NEXT: ret 649; 650; NONEON-NOSVE-LABEL: concat_v8f64: 651; NONEON-NOSVE: // %bb.0: 652; NONEON-NOSVE-NEXT: ldp q0, q1, [x1] 653; NONEON-NOSVE-NEXT: ldp q3, q2, [x0] 654; NONEON-NOSVE-NEXT: stp q0, q1, [x2, #32] 655; NONEON-NOSVE-NEXT: stp q3, q2, [x2] 656; NONEON-NOSVE-NEXT: ret 657 %op1 = load <4 x double>, ptr %a 658 %op2 = load <4 x double>, ptr %b 659 %res = shufflevector <4 x double> %op1, <4 x double> %op2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 660 store <8 x double> %res, ptr %c 661 ret void 662} 663 664; 665; undef 666; 667 668define void @concat_v32i8_undef(ptr %a, ptr %b) { 669; CHECK-LABEL: concat_v32i8_undef: 670; CHECK: // %bb.0: 671; CHECK-NEXT: ldr q0, [x0] 672; CHECK-NEXT: str q0, [x1] 673; CHECK-NEXT: ret 674; 675; NONEON-NOSVE-LABEL: concat_v32i8_undef: 676; NONEON-NOSVE: // %bb.0: 677; NONEON-NOSVE-NEXT: ldr q0, [x0] 678; NONEON-NOSVE-NEXT: str q0, [x1] 679; NONEON-NOSVE-NEXT: ret 680 %op1 = load <16 x i8>, ptr %a 681 %res = shufflevector <16 x i8> %op1, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, 682 i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, 683 i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, 684 i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 685 store <32 x i8> %res, ptr %b 686 ret void 687} 688 689define void @concat_v16i16_undef(ptr %a, ptr %b) { 690; CHECK-LABEL: concat_v16i16_undef: 691; CHECK: // %bb.0: 692; CHECK-NEXT: ldr q0, [x0] 693; CHECK-NEXT: str q0, [x1] 694; CHECK-NEXT: ret 695; 696; NONEON-NOSVE-LABEL: concat_v16i16_undef: 697; NONEON-NOSVE: // %bb.0: 698; NONEON-NOSVE-NEXT: ldr q0, [x0] 699; NONEON-NOSVE-NEXT: str q0, [x1] 700; NONEON-NOSVE-NEXT: ret 701 %op1 = load <8 x i16>, ptr %a 702 %res = shufflevector <8 x i16> %op1, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, 703 i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 704 store <16 x i16> %res, ptr %b 705 ret void 706} 707 708define void @concat_v8i32_undef(ptr %a, ptr %b) { 709; CHECK-LABEL: concat_v8i32_undef: 710; CHECK: // %bb.0: 711; CHECK-NEXT: ldr q0, [x0] 712; CHECK-NEXT: str q0, [x1] 713; CHECK-NEXT: ret 714; 715; NONEON-NOSVE-LABEL: concat_v8i32_undef: 716; NONEON-NOSVE: // %bb.0: 717; NONEON-NOSVE-NEXT: ldr q0, [x0] 718; NONEON-NOSVE-NEXT: str q0, [x1] 719; NONEON-NOSVE-NEXT: ret 720 %op1 = load <4 x i32>, ptr %a 721 %res = shufflevector <4 x i32> %op1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 722 store <8 x i32> %res, ptr %b 723 ret void 724} 725 726define void @concat_v4i64_undef(ptr %a, ptr %b) { 727; CHECK-LABEL: concat_v4i64_undef: 728; CHECK: // %bb.0: 729; CHECK-NEXT: ldr q0, [x0] 730; CHECK-NEXT: str q0, [x1] 731; CHECK-NEXT: ret 732; 733; NONEON-NOSVE-LABEL: concat_v4i64_undef: 734; NONEON-NOSVE: // %bb.0: 735; NONEON-NOSVE-NEXT: ldr q0, [x0] 736; NONEON-NOSVE-NEXT: str q0, [x1] 737; NONEON-NOSVE-NEXT: ret 738 %op1 = load <2 x i64>, ptr %a 739 %res = shufflevector <2 x i64> %op1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 740 store <4 x i64> %res, ptr %b 741 ret void 742} 743 744; 745; > 2 operands 746; 747 748define void @concat_v32i8_4op(ptr %a, ptr %b) { 749; CHECK-LABEL: concat_v32i8_4op: 750; CHECK: // %bb.0: 751; CHECK-NEXT: ldr d0, [x0] 752; CHECK-NEXT: str q0, [x1] 753; CHECK-NEXT: ret 754; 755; NONEON-NOSVE-LABEL: concat_v32i8_4op: 756; NONEON-NOSVE: // %bb.0: 757; NONEON-NOSVE-NEXT: ldr d0, [x0] 758; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! 759; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 760; NONEON-NOSVE-NEXT: ldr q0, [sp] 761; NONEON-NOSVE-NEXT: str q0, [x1] 762; NONEON-NOSVE-NEXT: add sp, sp, #16 763; NONEON-NOSVE-NEXT: ret 764 %op1 = load <8 x i8>, ptr %a 765 %shuffle = shufflevector <8 x i8> %op1, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, 766 i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 767 %res = shufflevector <16 x i8> %shuffle, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, 768 i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, 769 i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, 770 i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 771 store <32 x i8> %res, ptr %b 772 ret void 773} 774 775define void @concat_v16i16_4op(ptr %a, ptr %b) { 776; CHECK-LABEL: concat_v16i16_4op: 777; CHECK: // %bb.0: 778; CHECK-NEXT: ldr d0, [x0] 779; CHECK-NEXT: str q0, [x1] 780; CHECK-NEXT: ret 781; 782; NONEON-NOSVE-LABEL: concat_v16i16_4op: 783; NONEON-NOSVE: // %bb.0: 784; NONEON-NOSVE-NEXT: ldr d0, [x0] 785; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! 786; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 787; NONEON-NOSVE-NEXT: ldr q0, [sp] 788; NONEON-NOSVE-NEXT: str q0, [x1] 789; NONEON-NOSVE-NEXT: add sp, sp, #16 790; NONEON-NOSVE-NEXT: ret 791 %op1 = load <4 x i16>, ptr %a 792 %shuffle = shufflevector <4 x i16> %op1, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 793 %res = shufflevector <8 x i16> %shuffle, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, 794 i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 795 store <16 x i16> %res, ptr %b 796 ret void 797} 798 799define void @concat_v8i32_4op(ptr %a, ptr %b) { 800; CHECK-LABEL: concat_v8i32_4op: 801; CHECK: // %bb.0: 802; CHECK-NEXT: ldr d0, [x0] 803; CHECK-NEXT: str q0, [x1] 804; CHECK-NEXT: ret 805; 806; NONEON-NOSVE-LABEL: concat_v8i32_4op: 807; NONEON-NOSVE: // %bb.0: 808; NONEON-NOSVE-NEXT: ldr d0, [x0] 809; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! 810; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 811; NONEON-NOSVE-NEXT: ldr q0, [sp] 812; NONEON-NOSVE-NEXT: str q0, [x1] 813; NONEON-NOSVE-NEXT: add sp, sp, #16 814; NONEON-NOSVE-NEXT: ret 815 %op1 = load <2 x i32>, ptr %a 816 %shuffle = shufflevector <2 x i32> %op1, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 817 %res = shufflevector <4 x i32> %shuffle, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 818 store <8 x i32> %res, ptr %b 819 ret void 820} 821 822define void @concat_v4i64_4op(ptr %a, ptr %b) { 823; CHECK-LABEL: concat_v4i64_4op: 824; CHECK: // %bb.0: 825; CHECK-NEXT: ldr d0, [x0] 826; CHECK-NEXT: str q0, [x1] 827; CHECK-NEXT: ret 828; 829; NONEON-NOSVE-LABEL: concat_v4i64_4op: 830; NONEON-NOSVE: // %bb.0: 831; NONEON-NOSVE-NEXT: ldr d0, [x0] 832; NONEON-NOSVE-NEXT: str d0, [sp, #-16]! 833; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 834; NONEON-NOSVE-NEXT: ldr q0, [sp] 835; NONEON-NOSVE-NEXT: str q0, [x1] 836; NONEON-NOSVE-NEXT: add sp, sp, #16 837; NONEON-NOSVE-NEXT: ret 838 %op1 = load <1 x i64>, ptr %a 839 %shuffle = shufflevector <1 x i64> %op1, <1 x i64> undef, <2 x i32> <i32 0, i32 1> 840 %res = shufflevector <2 x i64> %shuffle, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 841 store <4 x i64> %res, ptr %b 842 ret void 843} 844