1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD 3; RUN: llc -mtriple=aarch64 -global-isel %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI 4 5define <4 x i8> @concat1(<2 x i8> %A, <2 x i8> %B) { 6; CHECK-SD-LABEL: concat1: 7; CHECK-SD: // %bb.0: 8; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h 9; CHECK-SD-NEXT: ret 10; 11; CHECK-GI-LABEL: concat1: 12; CHECK-GI: // %bb.0: 13; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 14; CHECK-GI-NEXT: mov w8, v0.s[1] 15; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 16; CHECK-GI-NEXT: mov w9, v1.s[1] 17; CHECK-GI-NEXT: mov v0.h[1], w8 18; CHECK-GI-NEXT: fmov w8, s1 19; CHECK-GI-NEXT: mov v0.h[2], w8 20; CHECK-GI-NEXT: mov v0.h[3], w9 21; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 22; CHECK-GI-NEXT: ret 23 %v4i8 = shufflevector <2 x i8> %A, <2 x i8> %B, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 24 ret <4 x i8> %v4i8 25} 26 27define <8 x i8> @concat2(<4 x i8> %A, <4 x i8> %B) { 28; CHECK-SD-LABEL: concat2: 29; CHECK-SD: // %bb.0: 30; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v1.8b 31; CHECK-SD-NEXT: ret 32; 33; CHECK-GI-LABEL: concat2: 34; CHECK-GI: // %bb.0: 35; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b 36; CHECK-GI-NEXT: uzp1 v1.8b, v1.8b, v0.8b 37; CHECK-GI-NEXT: fmov w8, s0 38; CHECK-GI-NEXT: mov v0.s[0], w8 39; CHECK-GI-NEXT: fmov w8, s1 40; CHECK-GI-NEXT: mov v0.s[1], w8 41; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 42; CHECK-GI-NEXT: ret 43 %v8i8 = shufflevector <4 x i8> %A, <4 x i8> %B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 44 ret <8 x i8> %v8i8 45} 46 47define <16 x i8> @concat3(<8 x i8> %A, <8 x i8> %B) { 48; CHECK-LABEL: concat3: 49; CHECK: // %bb.0: 50; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 51; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 52; CHECK-NEXT: mov v0.d[1], v1.d[0] 53; CHECK-NEXT: ret 54 %v16i8 = shufflevector <8 x i8> %A, <8 x i8> %B, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 55 ret <16 x i8> %v16i8 56} 57 58define <4 x i16> @concat4(<2 x i16> %A, <2 x i16> %B) { 59; CHECK-SD-LABEL: concat4: 60; CHECK-SD: // %bb.0: 61; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h 62; CHECK-SD-NEXT: ret 63; 64; CHECK-GI-LABEL: concat4: 65; CHECK-GI: // %bb.0: 66; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h 67; CHECK-GI-NEXT: uzp1 v1.4h, v1.4h, v0.4h 68; CHECK-GI-NEXT: fmov w8, s0 69; CHECK-GI-NEXT: mov v0.s[0], w8 70; CHECK-GI-NEXT: fmov w8, s1 71; CHECK-GI-NEXT: mov v0.s[1], w8 72; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 73; CHECK-GI-NEXT: ret 74 %v4i16 = shufflevector <2 x i16> %A, <2 x i16> %B, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 75 ret <4 x i16> %v4i16 76} 77 78define <8 x i16> @concat5(<4 x i16> %A, <4 x i16> %B) { 79; CHECK-LABEL: concat5: 80; CHECK: // %bb.0: 81; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 82; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 83; CHECK-NEXT: mov v0.d[1], v1.d[0] 84; CHECK-NEXT: ret 85 %v8i16 = shufflevector <4 x i16> %A, <4 x i16> %B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 86 ret <8 x i16> %v8i16 87} 88 89define <16 x i16> @concat6(ptr %A, ptr %B) { 90; CHECK-LABEL: concat6: 91; CHECK: // %bb.0: 92; CHECK-NEXT: ldr q0, [x0] 93; CHECK-NEXT: ldr q1, [x1] 94; CHECK-NEXT: ret 95 %tmp1 = load <8 x i16>, ptr %A 96 %tmp2 = load <8 x i16>, ptr %B 97 %v16i16 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 98 ret <16 x i16> %v16i16 99} 100 101define <4 x i32> @concat7(<2 x i32> %A, <2 x i32> %B) { 102; CHECK-LABEL: concat7: 103; CHECK: // %bb.0: 104; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 105; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 106; CHECK-NEXT: mov v0.d[1], v1.d[0] 107; CHECK-NEXT: ret 108 %v4i32 = shufflevector <2 x i32> %A, <2 x i32> %B, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 109 ret <4 x i32> %v4i32 110} 111 112define <8 x i32> @concat8(ptr %A, ptr %B) { 113; CHECK-LABEL: concat8: 114; CHECK: // %bb.0: 115; CHECK-NEXT: ldr q0, [x0] 116; CHECK-NEXT: ldr q1, [x1] 117; CHECK-NEXT: ret 118 %tmp1 = load <4 x i32>, ptr %A 119 %tmp2 = load <4 x i32>, ptr %B 120 %v8i32 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 121 ret <8 x i32> %v8i32 122} 123 124define <4 x half> @concat9(<2 x half> %A, <2 x half> %B) { 125; CHECK-SD-LABEL: concat9: 126; CHECK-SD: // %bb.0: 127; CHECK-SD-NEXT: zip1 v0.2s, v0.2s, v1.2s 128; CHECK-SD-NEXT: ret 129; 130; CHECK-GI-LABEL: concat9: 131; CHECK-GI: // %bb.0: 132; CHECK-GI-NEXT: fmov w8, s0 133; CHECK-GI-NEXT: mov v0.s[0], w8 134; CHECK-GI-NEXT: fmov w8, s1 135; CHECK-GI-NEXT: mov v0.s[1], w8 136; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 137; CHECK-GI-NEXT: ret 138 %v4half= shufflevector <2 x half> %A, <2 x half> %B, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 139 ret <4 x half> %v4half 140} 141 142define <8 x half> @concat10(<4 x half> %A, <4 x half> %B) { 143; CHECK-LABEL: concat10: 144; CHECK: // %bb.0: 145; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 146; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 147; CHECK-NEXT: mov v0.d[1], v1.d[0] 148; CHECK-NEXT: ret 149 %v8half= shufflevector <4 x half> %A, <4 x half> %B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 150 ret <8 x half> %v8half 151} 152 153define <16 x half> @concat11(<8 x half> %A, <8 x half> %B) { 154; CHECK-LABEL: concat11: 155; CHECK: // %bb.0: 156; CHECK-NEXT: ret 157 %v16half= shufflevector <8 x half> %A, <8 x half> %B, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 158 ret <16 x half> %v16half 159} 160 161define <8 x i16> @concat_v8s16_v2s16(ptr %ptr) { 162; CHECK-SD-LABEL: concat_v8s16_v2s16: 163; CHECK-SD: // %bb.0: 164; CHECK-SD-NEXT: ldr s0, [x0] 165; CHECK-SD-NEXT: ret 166; 167; CHECK-GI-LABEL: concat_v8s16_v2s16: 168; CHECK-GI: // %bb.0: 169; CHECK-GI-NEXT: ldrh w8, [x0] 170; CHECK-GI-NEXT: ldrh w9, [x0, #2] 171; CHECK-GI-NEXT: fmov s1, w8 172; CHECK-GI-NEXT: mov v1.h[1], w9 173; CHECK-GI-NEXT: mov v0.s[0], v1.s[0] 174; CHECK-GI-NEXT: ret 175 %a = load <2 x i16>, ptr %ptr 176 %b = shufflevector <2 x i16> %a, <2 x i16> %a, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 177 ret <8 x i16> %b 178} 179 180define <16 x i8> @concat_v16s8_v4s8(ptr %ptr) { 181; CHECK-LABEL: concat_v16s8_v4s8: 182; CHECK: // %bb.0: 183; CHECK-NEXT: ldr s0, [x0] 184; CHECK-NEXT: ret 185 %a = load <4 x i8>, ptr %ptr 186 %b = shufflevector <4 x i8> %a, <4 x i8> %a, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 187 ret <16 x i8> %b 188} 189 190define <16 x i8> @concat_v16s8_v4s8_load(ptr %ptrA, ptr %ptrB, ptr %ptrC, ptr %ptrD) { 191; CHECK-LABEL: concat_v16s8_v4s8_load: 192; CHECK: // %bb.0: 193; CHECK-NEXT: ldr s0, [x0] 194; CHECK-NEXT: ld1 { v0.s }[1], [x1] 195; CHECK-NEXT: ld1 { v0.s }[2], [x2] 196; CHECK-NEXT: ld1 { v0.s }[3], [x3] 197; CHECK-NEXT: ret 198 %A = load <4 x i8>, ptr %ptrA 199 %B = load <4 x i8>, ptr %ptrB 200 %C = load <4 x i8>, ptr %ptrC 201 %D = load <4 x i8>, ptr %ptrD 202 %b = shufflevector <4 x i8> %A, <4 x i8> %B, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 203 %c = shufflevector <4 x i8> %C, <4 x i8> %D, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 204 %d = shufflevector <16 x i8> %b, <16 x i8> %c, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 205 ret <16 x i8> %d 206} 207 208 209define <16 x i8> @concat_v16s8_v4s8_reg(<4 x i8> %A, <4 x i8> %B, <4 x i8> %C, <4 x i8> %D) { 210; CHECK-SD-LABEL: concat_v16s8_v4s8_reg: 211; CHECK-SD: // %bb.0: 212; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 213; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 214; CHECK-SD-NEXT: // kill: def $d3 killed $d3 def $q3 215; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 216; CHECK-SD-NEXT: mov v2.d[1], v3.d[0] 217; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] 218; CHECK-SD-NEXT: uzp1 v0.16b, v0.16b, v2.16b 219; CHECK-SD-NEXT: ret 220; 221; CHECK-GI-LABEL: concat_v16s8_v4s8_reg: 222; CHECK-GI: // %bb.0: 223; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b 224; CHECK-GI-NEXT: uzp1 v1.8b, v1.8b, v0.8b 225; CHECK-GI-NEXT: fmov w8, s0 226; CHECK-GI-NEXT: mov v0.s[0], w8 227; CHECK-GI-NEXT: fmov w8, s1 228; CHECK-GI-NEXT: uzp1 v2.8b, v2.8b, v0.8b 229; CHECK-GI-NEXT: mov v0.s[1], w8 230; CHECK-GI-NEXT: uzp1 v1.8b, v3.8b, v0.8b 231; CHECK-GI-NEXT: fmov w8, s2 232; CHECK-GI-NEXT: mov v0.s[2], w8 233; CHECK-GI-NEXT: fmov w8, s1 234; CHECK-GI-NEXT: mov v0.s[3], w8 235; CHECK-GI-NEXT: ret 236 %b = shufflevector <4 x i8> %A, <4 x i8> %B, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 237 %c = shufflevector <4 x i8> %C, <4 x i8> %D, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 238 %d = shufflevector <16 x i8> %b, <16 x i8> %c, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 239 ret <16 x i8> %d 240} 241 242define <8 x i16> @concat_v8s16_v2s16_reg(<2 x i16> %A, <2 x i16> %B, <2 x i16> %C, <2 x i16> %D) { 243; CHECK-SD-LABEL: concat_v8s16_v2s16_reg: 244; CHECK-SD: // %bb.0: 245; CHECK-SD-NEXT: // kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 246; CHECK-SD-NEXT: adrp x8, .LCPI15_0 247; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 248; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI15_0] 249; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 250; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 251; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b 252; CHECK-SD-NEXT: ret 253; 254; CHECK-GI-LABEL: concat_v8s16_v2s16_reg: 255; CHECK-GI: // %bb.0: 256; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h 257; CHECK-GI-NEXT: uzp1 v1.4h, v1.4h, v0.4h 258; CHECK-GI-NEXT: fmov w8, s0 259; CHECK-GI-NEXT: mov v0.s[0], w8 260; CHECK-GI-NEXT: fmov w8, s1 261; CHECK-GI-NEXT: uzp1 v2.4h, v2.4h, v0.4h 262; CHECK-GI-NEXT: mov v0.s[1], w8 263; CHECK-GI-NEXT: uzp1 v1.4h, v3.4h, v0.4h 264; CHECK-GI-NEXT: fmov w8, s2 265; CHECK-GI-NEXT: mov v0.s[2], w8 266; CHECK-GI-NEXT: fmov w8, s1 267; CHECK-GI-NEXT: mov v0.s[3], w8 268; CHECK-GI-NEXT: ret 269 %b = shufflevector <2 x i16> %A, <2 x i16> %B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 270 %c = shufflevector <2 x i16> %C, <2 x i16> %D, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 271 %d = shufflevector <8 x i16> %b, <8 x i16> %c, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 272 ret <8 x i16> %d 273} 274 275define <4 x i16> @concat_undef_first_use_first(ptr %p1, ptr %p2) { 276; CHECK-SD-LABEL: concat_undef_first_use_first: 277; CHECK-SD: // %bb.0: 278; CHECK-SD-NEXT: ld1r { v0.2s }, [x0] 279; CHECK-SD-NEXT: ret 280; 281; CHECK-GI-LABEL: concat_undef_first_use_first: 282; CHECK-GI: // %bb.0: 283; CHECK-GI-NEXT: ldrh w8, [x0] 284; CHECK-GI-NEXT: ldrh w9, [x0, #2] 285; CHECK-GI-NEXT: fmov s1, w8 286; CHECK-GI-NEXT: mov v1.h[1], w9 287; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] 288; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 289; CHECK-GI-NEXT: ret 290 %l1 = load <2 x i16>, ptr %p1 291 %l2 = load <2 x i16>, ptr %p2 292 %ext1 = shufflevector <2 x i16> %l1, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 293 %ext2 = shufflevector <2 x i16> %l1, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 294 %t = shufflevector <8 x i16> %ext1, <8 x i16> %ext2, <4 x i32> <i32 poison, i32 poison, i32 0, i32 1> 295 ret <4 x i16> %t 296} 297 298define <4 x i16> @concat_undef_first_use_second(ptr %p1, ptr %p2) { 299; CHECK-SD-LABEL: concat_undef_first_use_second: 300; CHECK-SD: // %bb.0: 301; CHECK-SD-NEXT: ld1r { v0.2s }, [x0] 302; CHECK-SD-NEXT: ret 303; 304; CHECK-GI-LABEL: concat_undef_first_use_second: 305; CHECK-GI: // %bb.0: 306; CHECK-GI-NEXT: ldrh w8, [x0] 307; CHECK-GI-NEXT: ldrh w9, [x0, #2] 308; CHECK-GI-NEXT: fmov s1, w8 309; CHECK-GI-NEXT: mov v1.h[1], w9 310; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] 311; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 312; CHECK-GI-NEXT: ret 313 %l1 = load <2 x i16>, ptr %p1 314 %l2 = load <2 x i16>, ptr %p2 315 %ext1 = shufflevector <2 x i16> %l1, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 316 %ext2 = shufflevector <2 x i16> %l1, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 317 %t = shufflevector <8 x i16> %ext1, <8 x i16> %ext2, <4 x i32> <i32 poison, i32 poison, i32 8, i32 9> 318 ret <4 x i16> %t 319} 320 321define <4 x i16> @concat_undef_first_use_undef(ptr %p1, ptr %p2) { 322; CHECK-SD-LABEL: concat_undef_first_use_undef: 323; CHECK-SD: // %bb.0: 324; CHECK-SD-NEXT: ldr s0, [x0] 325; CHECK-SD-NEXT: ret 326; 327; CHECK-GI-LABEL: concat_undef_first_use_undef: 328; CHECK-GI: // %bb.0: 329; CHECK-GI-NEXT: ret 330 %l1 = load <2 x i16>, ptr %p1 331 %l2 = load <2 x i16>, ptr %p2 332 %ext1 = shufflevector <2 x i16> %l1, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 333 %ext2 = shufflevector <2 x i16> %l1, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 334 %t = shufflevector <8 x i16> %ext1, <8 x i16> %ext2, <4 x i32> <i32 poison, i32 poison, i32 2, i32 3> 335 ret <4 x i16> %t 336} 337 338define <8 x i16> @concat_low_low_v8i16(<8 x i16> %a_vec, <8 x i16> %b_vec) { 339; CHECK-LABEL: concat_low_low_v8i16: 340; CHECK: // %bb.0: // %entry 341; CHECK-NEXT: mov v0.d[1], v1.d[0] 342; CHECK-NEXT: ret 343entry: 344 %shuffle.i3 = shufflevector <8 x i16> %a_vec, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 345 %shuffle.i = shufflevector <8 x i16> %b_vec, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 346 %shuffle.i4 = shufflevector <4 x i16> %shuffle.i3, <4 x i16> %shuffle.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 347 ret <8 x i16> %shuffle.i4 348} 349 350define <8 x i16> @concat_high_low_v8i16(<8 x i16> %a_vec, <8 x i16> %b_vec) { 351; CHECK-SD-LABEL: concat_high_low_v8i16: 352; CHECK-SD: // %bb.0: // %entry 353; CHECK-SD-NEXT: ext v0.16b, v0.16b, v1.16b, #8 354; CHECK-SD-NEXT: ret 355; 356; CHECK-GI-LABEL: concat_high_low_v8i16: 357; CHECK-GI: // %bb.0: // %entry 358; CHECK-GI-NEXT: mov d0, v0.d[1] 359; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] 360; CHECK-GI-NEXT: ret 361entry: 362 %shuffle.i3 = shufflevector <8 x i16> %a_vec, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 363 %shuffle.i = shufflevector <8 x i16> %b_vec, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 364 %shuffle.i4 = shufflevector <4 x i16> %shuffle.i3, <4 x i16> %shuffle.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 365 ret <8 x i16> %shuffle.i4 366} 367 368define <8 x i16> @concat_low_high_v8i16(<8 x i16> %a_vec, <8 x i16> %b_vec) { 369; CHECK-SD-LABEL: concat_low_high_v8i16: 370; CHECK-SD: // %bb.0: // %entry 371; CHECK-SD-NEXT: ext v1.16b, v1.16b, v1.16b, #8 372; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] 373; CHECK-SD-NEXT: ret 374; 375; CHECK-GI-LABEL: concat_low_high_v8i16: 376; CHECK-GI: // %bb.0: // %entry 377; CHECK-GI-NEXT: mov d1, v1.d[1] 378; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] 379; CHECK-GI-NEXT: ret 380entry: 381 %shuffle.i3 = shufflevector <8 x i16> %a_vec, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 382 %shuffle.i = shufflevector <8 x i16> %b_vec, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 383 %shuffle.i4 = shufflevector <4 x i16> %shuffle.i3, <4 x i16> %shuffle.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 384 ret <8 x i16> %shuffle.i4 385} 386 387define <8 x i16> @concat_high_high_v8i16(<8 x i16> %a_vec, <8 x i16> %b_vec) { 388; CHECK-LABEL: concat_high_high_v8i16: 389; CHECK: // %bb.0: // %entry 390; CHECK-NEXT: mov v1.d[0], v0.d[1] 391; CHECK-NEXT: mov v0.16b, v1.16b 392; CHECK-NEXT: ret 393entry: 394 %shuffle.i3 = shufflevector <8 x i16> %a_vec, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 395 %shuffle.i = shufflevector <8 x i16> %b_vec, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 396 %shuffle.i4 = shufflevector <4 x i16> %shuffle.i3, <4 x i16> %shuffle.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 397 ret <8 x i16> %shuffle.i4 398} 399 400define <8 x half> @concat_high_high_v8f16(<8 x half> %a_vec, <8 x half> %b_vec) { 401; CHECK-LABEL: concat_high_high_v8f16: 402; CHECK: // %bb.0: // %entry 403; CHECK-NEXT: mov v1.d[0], v0.d[1] 404; CHECK-NEXT: mov v0.16b, v1.16b 405; CHECK-NEXT: ret 406entry: 407 %shuffle.i3 = shufflevector <8 x half> %a_vec, <8 x half> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 408 %shuffle.i = shufflevector <8 x half> %b_vec, <8 x half> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 409 %shuffle.i4 = shufflevector <4 x half> %shuffle.i3, <4 x half> %shuffle.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 410 ret <8 x half> %shuffle.i4 411} 412 413define <8 x bfloat> @concat_high_high_v8bf16(<8 x bfloat> %a_vec, <8 x bfloat> %b_vec) { 414; CHECK-LABEL: concat_high_high_v8bf16: 415; CHECK: // %bb.0: // %entry 416; CHECK-NEXT: mov v1.d[0], v0.d[1] 417; CHECK-NEXT: mov v0.16b, v1.16b 418; CHECK-NEXT: ret 419entry: 420 %shuffle.i3 = shufflevector <8 x bfloat> %a_vec, <8 x bfloat> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 421 %shuffle.i = shufflevector <8 x bfloat> %b_vec, <8 x bfloat> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 422 %shuffle.i4 = shufflevector <4 x bfloat> %shuffle.i3, <4 x bfloat> %shuffle.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 423 ret <8 x bfloat> %shuffle.i4 424} 425 426define <4 x i32> @concat_high_high_v4i32(<4 x i32> %a_vec, <4 x i32> %b_vec) { 427; CHECK-SD-LABEL: concat_high_high_v4i32: 428; CHECK-SD: // %bb.0: // %entry 429; CHECK-SD-NEXT: zip2 v0.2d, v0.2d, v1.2d 430; CHECK-SD-NEXT: ret 431; 432; CHECK-GI-LABEL: concat_high_high_v4i32: 433; CHECK-GI: // %bb.0: // %entry 434; CHECK-GI-NEXT: mov v1.d[0], v0.d[1] 435; CHECK-GI-NEXT: mov v0.16b, v1.16b 436; CHECK-GI-NEXT: ret 437entry: 438 %shuffle.i3 = shufflevector <4 x i32> %a_vec, <4 x i32> poison, <2 x i32> <i32 2, i32 3> 439 %shuffle.i = shufflevector <4 x i32> %b_vec, <4 x i32> poison, <2 x i32> <i32 2, i32 3> 440 %shuffle.i4 = shufflevector <2 x i32> %shuffle.i3, <2 x i32> %shuffle.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 441 ret <4 x i32> %shuffle.i4 442} 443 444define <4 x float> @concat_high_high_v4f32(<4 x float> %a_vec, <4 x float> %b_vec) { 445; CHECK-SD-LABEL: concat_high_high_v4f32: 446; CHECK-SD: // %bb.0: // %entry 447; CHECK-SD-NEXT: zip2 v0.2d, v0.2d, v1.2d 448; CHECK-SD-NEXT: ret 449; 450; CHECK-GI-LABEL: concat_high_high_v4f32: 451; CHECK-GI: // %bb.0: // %entry 452; CHECK-GI-NEXT: mov v1.d[0], v0.d[1] 453; CHECK-GI-NEXT: mov v0.16b, v1.16b 454; CHECK-GI-NEXT: ret 455entry: 456 %shuffle.i3 = shufflevector <4 x float> %a_vec, <4 x float> poison, <2 x i32> <i32 2, i32 3> 457 %shuffle.i = shufflevector <4 x float> %b_vec, <4 x float> poison, <2 x i32> <i32 2, i32 3> 458 %shuffle.i4 = shufflevector <2 x float> %shuffle.i3, <2 x float> %shuffle.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 459 ret <4 x float> %shuffle.i4 460} 461 462define <16 x i8> @concat_high_high_v16i8(<16 x i8> %a_vec, <16 x i8> %b_vec) { 463; CHECK-LABEL: concat_high_high_v16i8: 464; CHECK: // %bb.0: // %entry 465; CHECK-NEXT: mov v1.d[0], v0.d[1] 466; CHECK-NEXT: mov v0.16b, v1.16b 467; CHECK-NEXT: ret 468entry: 469 %shuffle.i3 = shufflevector <16 x i8> %a_vec, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 470 %shuffle.i = shufflevector <16 x i8> %b_vec, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 471 %shuffle.i4 = shufflevector <8 x i8> %shuffle.i3, <8 x i8> %shuffle.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 472 ret <16 x i8> %shuffle.i4 473} 474 475define <2 x i64> @concat_high_high_v2i64(<2 x i64> %a_vec, <2 x i64> %b_vec) { 476; CHECK-SD-LABEL: concat_high_high_v2i64: 477; CHECK-SD: // %bb.0: // %entry 478; CHECK-SD-NEXT: zip2 v0.2d, v0.2d, v1.2d 479; CHECK-SD-NEXT: ret 480; 481; CHECK-GI-LABEL: concat_high_high_v2i64: 482; CHECK-GI: // %bb.0: // %entry 483; CHECK-GI-NEXT: mov v0.d[0], v0.d[1] 484; CHECK-GI-NEXT: mov v0.d[1], v1.d[1] 485; CHECK-GI-NEXT: ret 486entry: 487 %shuffle.i3 = shufflevector <2 x i64> %a_vec, <2 x i64> poison, <1 x i32> <i32 1> 488 %shuffle.i = shufflevector <2 x i64> %b_vec, <2 x i64> poison, <1 x i32> <i32 1> 489 %shuffle.i4 = shufflevector <1 x i64> %shuffle.i3, <1 x i64> %shuffle.i, <2 x i32> <i32 0, i32 1> 490 ret <2 x i64> %shuffle.i4 491} 492 493define <2 x double> @concat_high_high_v2f64(<2 x double> %a_vec, <2 x double> %b_vec) { 494; CHECK-SD-LABEL: concat_high_high_v2f64: 495; CHECK-SD: // %bb.0: // %entry 496; CHECK-SD-NEXT: zip2 v0.2d, v0.2d, v1.2d 497; CHECK-SD-NEXT: ret 498; 499; CHECK-GI-LABEL: concat_high_high_v2f64: 500; CHECK-GI: // %bb.0: // %entry 501; CHECK-GI-NEXT: mov v0.d[0], v0.d[1] 502; CHECK-GI-NEXT: mov v0.d[1], v1.d[1] 503; CHECK-GI-NEXT: ret 504entry: 505 %shuffle.i3 = shufflevector <2 x double> %a_vec, <2 x double> poison, <1 x i32> <i32 1> 506 %shuffle.i = shufflevector <2 x double> %b_vec, <2 x double> poison, <1 x i32> <i32 1> 507 %shuffle.i4 = shufflevector <1 x double> %shuffle.i3, <1 x double> %shuffle.i, <2 x i32> <i32 0, i32 1> 508 ret <2 x double> %shuffle.i4 509} 510