; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI ; ===== Legal Vector Types ===== define <8 x i8> @shufflevector_v8i8(<8 x i8> %a, <8 x i8> %b) { ; CHECK-SD-LABEL: shufflevector_v8i8: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-SD-NEXT: adrp x8, .LCPI0_0 ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] ; CHECK-SD-NEXT: ldr d1, [x8, :lo12:.LCPI0_0] ; CHECK-SD-NEXT: tbl v0.8b, { v0.16b }, v1.8b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shufflevector_v8i8: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-GI-NEXT: adrp x8, .LCPI0_0 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI0_0] ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %c = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> ret <8 x i8> %c } define <16 x i8> @shufflevector_v16i8(<16 x i8> %a, <16 x i8> %b) { ; CHECK-SD-LABEL: shufflevector_v16i8: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI1_0 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI1_0] ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shufflevector_v16i8: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI1_0 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI1_0] ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-GI-NEXT: ret %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %c } define <4 x i16> @shufflevector_v4i16(<4 x i16> %a, <4 x i16> %b) { ; CHECK-LABEL: shufflevector_v4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: uzp2 v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %c = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> ret <4 x i16> %c } define <8 x i16> @shufflevector_v8i16(<8 x i16> %a, <8 x i16> %b) { ; CHECK-SD-LABEL: shufflevector_v8i16: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI3_0 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI3_0] ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shufflevector_v8i16: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI3_0 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI3_0] ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-GI-NEXT: ret %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %c } define <2 x i32> @shufflevector_v2i32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: shufflevector_v2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: zip2 v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %c = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> ret <2 x i32> %c } define <4 x i32> @shufflevector_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: shufflevector_v4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %c } define <2 x i64> @shufflevector_v2i64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: shufflevector_v2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: zip2 v0.2d, v0.2d, v1.2d ; CHECK-NEXT: ret %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %c } define <2 x ptr> @shufflevector_v2p0(<2 x ptr> %a, <2 x ptr> %b) { ; CHECK-LABEL: shufflevector_v2p0: ; CHECK: // %bb.0: ; CHECK-NEXT: zip2 v0.2d, v0.2d, v1.2d ; CHECK-NEXT: ret %c = shufflevector <2 x ptr> %a, <2 x ptr> %b, <2 x i32> ret <2 x ptr> %c } ; ===== Legal Vector Types with Zero Masks ===== define <8 x i8> @shufflevector_v8i8_zeroes(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: shufflevector_v8i8_zeroes: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: dup v0.8b, v0.b[0] ; CHECK-NEXT: ret %c = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> ret <8 x i8> %c } define <16 x i8> @shufflevector_v16i8_zeroes(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: shufflevector_v16i8_zeroes: ; CHECK: // %bb.0: ; CHECK-NEXT: dup v0.16b, v0.b[0] ; CHECK-NEXT: ret %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %c } define <4 x i16> @shufflevector_v4i16_zeroes(<4 x i16> %a, <4 x i16> %b) { ; CHECK-LABEL: shufflevector_v4i16_zeroes: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: dup v0.4h, v0.h[0] ; CHECK-NEXT: ret %c = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> ret <4 x i16> %c } define <8 x i16> @shufflevector_v8i16_zeroes(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: shufflevector_v8i16_zeroes: ; CHECK: // %bb.0: ; CHECK-NEXT: dup v0.8h, v0.h[0] ; CHECK-NEXT: ret %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %c } define <2 x i32> @shufflevector_v2i32_zeroes(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: shufflevector_v2i32_zeroes: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: dup v0.2s, v0.s[0] ; CHECK-NEXT: ret %c = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> ret <2 x i32> %c } define <4 x i32> @shufflevector_v4i32_zeroes(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: shufflevector_v4i32_zeroes: ; CHECK: // %bb.0: ; CHECK-NEXT: dup v0.4s, v0.s[0] ; CHECK-NEXT: ret %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %c } define <2 x i64> @shufflevector_v2i64_zeroes(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: shufflevector_v2i64_zeroes: ; CHECK: // %bb.0: ; CHECK-NEXT: dup v0.2d, v0.d[0] ; CHECK-NEXT: ret %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %c } define <2 x ptr> @shufflevector_v2p0_zeroes(<2 x ptr> %a, <2 x ptr> %b) { ; CHECK-LABEL: shufflevector_v2p0_zeroes: ; CHECK: // %bb.0: ; CHECK-NEXT: dup v0.2d, v0.d[0] ; CHECK-NEXT: ret %c = shufflevector <2 x ptr> %a, <2 x ptr> %b, <2 x i32> ret <2 x ptr> %c } ; ===== Smaller/Larger Width Vectors with Legal Element Sizes ===== define <2 x i1> @shufflevector_v2i1(<2 x i1> %a, <2 x i1> %b){ ; CHECK-SD-LABEL: shufflevector_v2i1: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-SD-NEXT: mov v0.s[1], v1.s[1] ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shufflevector_v2i1: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-GI-NEXT: mov w8, v1.s[1] ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: mov w9, v0.s[1] ; CHECK-GI-NEXT: mov v1.b[1], w8 ; CHECK-GI-NEXT: mov v0.b[1], w9 ; CHECK-GI-NEXT: mov b1, v1.b[1] ; CHECK-GI-NEXT: mov v0.b[1], v1.b[0] ; CHECK-GI-NEXT: umov w8, v0.b[0] ; CHECK-GI-NEXT: umov w9, v0.b[1] ; CHECK-GI-NEXT: mov v0.s[0], w8 ; CHECK-GI-NEXT: mov v0.s[1], w9 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %c = shufflevector <2 x i1> %a, <2 x i1> %b, <2 x i32> ret <2 x i1> %c } define i32 @shufflevector_v4i8(<4 x i8> %a, <4 x i8> %b){ ; CHECK-SD-LABEL: shufflevector_v4i8: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #16 ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 ; CHECK-SD-NEXT: ext v0.8b, v1.8b, v0.8b, #6 ; CHECK-SD-NEXT: zip1 v1.4h, v1.4h, v0.4h ; CHECK-SD-NEXT: ext v0.8b, v0.8b, v1.8b, #4 ; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: add sp, sp, #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shufflevector_v4i8: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; CHECK-GI-NEXT: adrp x8, .LCPI17_0 ; CHECK-GI-NEXT: uzp1 v1.8b, v1.8b, v0.8b ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI17_0] ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret %c = shufflevector <4 x i8> %a, <4 x i8> %b, <4 x i32> %d = bitcast <4 x i8> %c to i32 ret i32 %d } define <32 x i8> @shufflevector_v32i8(<32 x i8> %a, <32 x i8> %b){ ; CHECK-SD-LABEL: shufflevector_v32i8: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 def $q1_q2 ; CHECK-SD-NEXT: adrp x8, .LCPI18_0 ; CHECK-SD-NEXT: adrp x9, .LCPI18_1 ; CHECK-SD-NEXT: mov v1.16b, v0.16b ; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI18_0] ; CHECK-SD-NEXT: ldr q4, [x9, :lo12:.LCPI18_1] ; CHECK-SD-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v3.16b ; CHECK-SD-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v4.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shufflevector_v32i8: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov v3.16b, v0.16b ; CHECK-GI-NEXT: adrp x8, .LCPI18_1 ; CHECK-GI-NEXT: adrp x9, .LCPI18_0 ; CHECK-GI-NEXT: mov v4.16b, v2.16b ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI18_1] ; CHECK-GI-NEXT: ldr q1, [x9, :lo12:.LCPI18_0] ; CHECK-GI-NEXT: tbl v0.16b, { v3.16b, v4.16b }, v0.16b ; CHECK-GI-NEXT: tbl v1.16b, { v3.16b, v4.16b }, v1.16b ; CHECK-GI-NEXT: ret %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %c } define i32 @shufflevector_v2i16(<2 x i16> %a, <2 x i16> %b){ ; CHECK-SD-LABEL: shufflevector_v2i16: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #16 ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 ; CHECK-SD-NEXT: ext v0.8b, v0.8b, v1.8b, #4 ; CHECK-SD-NEXT: mov w8, v0.s[1] ; CHECK-SD-NEXT: fmov w9, s0 ; CHECK-SD-NEXT: strh w9, [sp, #12] ; CHECK-SD-NEXT: strh w8, [sp, #14] ; CHECK-SD-NEXT: ldr w0, [sp, #12] ; CHECK-SD-NEXT: add sp, sp, #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shufflevector_v2i16: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h ; CHECK-GI-NEXT: adrp x8, .LCPI19_0 ; CHECK-GI-NEXT: uzp1 v1.4h, v1.4h, v0.4h ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI19_0] ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret %c = shufflevector <2 x i16> %a, <2 x i16> %b, <2 x i32> %d = bitcast <2 x i16> %c to i32 ret i32 %d } define <16 x i16> @shufflevector_v16i16(<16 x i16> %a, <16 x i16> %b){ ; CHECK-SD-LABEL: shufflevector_v16i16: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 def $q1_q2 ; CHECK-SD-NEXT: adrp x8, .LCPI20_0 ; CHECK-SD-NEXT: adrp x9, .LCPI20_1 ; CHECK-SD-NEXT: mov v1.16b, v0.16b ; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI20_0] ; CHECK-SD-NEXT: ldr q4, [x9, :lo12:.LCPI20_1] ; CHECK-SD-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v3.16b ; CHECK-SD-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v4.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shufflevector_v16i16: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov v3.16b, v0.16b ; CHECK-GI-NEXT: adrp x8, .LCPI20_1 ; CHECK-GI-NEXT: adrp x9, .LCPI20_0 ; CHECK-GI-NEXT: mov v4.16b, v2.16b ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI20_1] ; CHECK-GI-NEXT: ldr q1, [x9, :lo12:.LCPI20_0] ; CHECK-GI-NEXT: tbl v0.16b, { v3.16b, v4.16b }, v0.16b ; CHECK-GI-NEXT: tbl v1.16b, { v3.16b, v4.16b }, v1.16b ; CHECK-GI-NEXT: ret %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %c } define <1 x i32> @shufflevector_v1i32(<1 x i32> %a, <1 x i32> %b) { ; CHECK-LABEL: shufflevector_v1i32: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret %c = shufflevector <1 x i32> %a, <1 x i32> %b, <1 x i32> ret <1 x i32> %c } define <8 x i32> @shufflevector_v8i32(<8 x i32> %a, <8 x i32> %b) { ; CHECK-SD-LABEL: shufflevector_v8i32: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: uzp1 v2.4s, v2.4s, v3.4s ; CHECK-SD-NEXT: uzp2 v0.4s, v0.4s, v1.4s ; CHECK-SD-NEXT: mov v2.s[3], v3.s[3] ; CHECK-SD-NEXT: mov v1.16b, v2.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shufflevector_v8i32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI22_0 ; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: uzp2 v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI22_0] ; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: tbl v1.16b, { v2.16b, v3.16b }, v4.16b ; CHECK-GI-NEXT: ret %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> ret <8 x i32> %c } define <4 x i64> @shufflevector_v4i64(<4 x i64> %a, <4 x i64> %b) { ; CHECK-SD-LABEL: shufflevector_v4i64: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: zip2 v2.2d, v2.2d, v3.2d ; CHECK-SD-NEXT: zip2 v0.2d, v0.2d, v1.2d ; CHECK-SD-NEXT: mov v1.16b, v2.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shufflevector_v4i64: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: zip2 v0.2d, v0.2d, v1.2d ; CHECK-GI-NEXT: zip2 v1.2d, v2.2d, v3.2d ; CHECK-GI-NEXT: ret %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> ret <4 x i64> %c } define <3 x ptr> @shufflevector_v3p0(<3 x ptr> %a, <3 x ptr> %b) { ; CHECK-SD-LABEL: shufflevector_v3p0: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fmov d2, d5 ; CHECK-SD-NEXT: fmov d0, d1 ; CHECK-SD-NEXT: fmov d1, d3 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shufflevector_v3p0: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: fmov x8, d0 ; CHECK-GI-NEXT: fmov x9, d3 ; CHECK-GI-NEXT: mov v0.d[0], x8 ; CHECK-GI-NEXT: mov v2.d[0], x9 ; CHECK-GI-NEXT: fmov x8, d1 ; CHECK-GI-NEXT: fmov x9, d4 ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: mov v2.d[1], x9 ; CHECK-GI-NEXT: fmov x8, d5 ; CHECK-GI-NEXT: mov v1.d[0], x8 ; CHECK-GI-NEXT: ext v0.16b, v0.16b, v2.16b, #8 ; CHECK-GI-NEXT: fmov x10, d1 ; CHECK-GI-NEXT: mov d2, v0.d[1] ; CHECK-GI-NEXT: fmov d1, d2 ; CHECK-GI-NEXT: fmov d2, x10 ; CHECK-GI-NEXT: ret %c = shufflevector <3 x ptr> %a, <3 x ptr> %b, <3 x i32> ret <3 x ptr> %c } define <4 x ptr> @shufflevector_v4p0(<4 x ptr> %a, <4 x ptr> %b) { ; CHECK-SD-LABEL: shufflevector_v4p0: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: zip2 v2.2d, v2.2d, v3.2d ; CHECK-SD-NEXT: zip2 v0.2d, v0.2d, v1.2d ; CHECK-SD-NEXT: mov v1.16b, v2.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shufflevector_v4p0: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: zip2 v0.2d, v0.2d, v1.2d ; CHECK-GI-NEXT: zip2 v1.2d, v2.2d, v3.2d ; CHECK-GI-NEXT: ret %c = shufflevector <4 x ptr> %a, <4 x ptr> %b, <4 x i32> ret <4 x ptr> %c } ; ===== Smaller/Larger Width Vectors with Zero Masks ===== define <2 x i1> @shufflevector_v2i1_zeroes(<2 x i1> %a, <2 x i1> %b){ ; CHECK-SD-LABEL: shufflevector_v2i1_zeroes: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: dup v0.2s, v0.s[0] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shufflevector_v2i1_zeroes: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: mov w8, v0.s[1] ; CHECK-GI-NEXT: mov v0.b[1], w8 ; CHECK-GI-NEXT: dup v0.8b, v0.b[0] ; CHECK-GI-NEXT: umov w8, v0.b[0] ; CHECK-GI-NEXT: umov w9, v0.b[1] ; CHECK-GI-NEXT: mov v0.s[0], w8 ; CHECK-GI-NEXT: mov v0.s[1], w9 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %c = shufflevector <2 x i1> %a, <2 x i1> %b, <2 x i32> ret <2 x i1> %c } define i32 @shufflevector_v4i8_zeroes(<4 x i8> %a, <4 x i8> %b){ ; CHECK-SD-LABEL: shufflevector_v4i8_zeroes: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #16 ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: dup v0.4h, v0.h[0] ; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; CHECK-SD-NEXT: fmov w0, s0 ; CHECK-SD-NEXT: add sp, sp, #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shufflevector_v4i8_zeroes: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; CHECK-GI-NEXT: dup v0.8b, v0.b[0] ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret %c = shufflevector <4 x i8> %a, <4 x i8> %b, <4 x i32> %d = bitcast <4 x i8> %c to i32 ret i32 %d } define <32 x i8> @shufflevector_v32i8_zeroes(<32 x i8> %a, <32 x i8> %b){ ; CHECK-LABEL: shufflevector_v32i8_zeroes: ; CHECK: // %bb.0: ; CHECK-NEXT: dup v0.16b, v0.b[0] ; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: ret %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %c } define i32 @shufflevector_v2i16_zeroes(<2 x i16> %a, <2 x i16> %b){ ; CHECK-SD-LABEL: shufflevector_v2i16_zeroes: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: sub sp, sp, #16 ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: dup v1.2s, v0.s[0] ; CHECK-SD-NEXT: fmov w9, s0 ; CHECK-SD-NEXT: strh w9, [sp, #12] ; CHECK-SD-NEXT: mov w8, v1.s[1] ; CHECK-SD-NEXT: strh w8, [sp, #14] ; CHECK-SD-NEXT: ldr w0, [sp, #12] ; CHECK-SD-NEXT: add sp, sp, #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shufflevector_v2i16_zeroes: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h ; CHECK-GI-NEXT: dup v0.4h, v0.h[0] ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret %c = shufflevector <2 x i16> %a, <2 x i16> %b, <2 x i32> %d = bitcast <2 x i16> %c to i32 ret i32 %d } define <16 x i16> @shufflevector_v16i16_zeroes(<16 x i16> %a, <16 x i16> %b){ ; CHECK-LABEL: shufflevector_v16i16_zeroes: ; CHECK: // %bb.0: ; CHECK-NEXT: dup v0.8h, v0.h[0] ; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: ret %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %c } define <1 x i32> @shufflevector_v1i32_zeroes(<1 x i32> %a, <1 x i32> %b) { ; CHECK-LABEL: shufflevector_v1i32_zeroes: ; CHECK: // %bb.0: ; CHECK-NEXT: ret %c = shufflevector <1 x i32> %a, <1 x i32> %b, <1 x i32> ret <1 x i32> %c } define <8 x i32> @shufflevector_v8i32_zeroes(<8 x i32> %a, <8 x i32> %b) { ; CHECK-LABEL: shufflevector_v8i32_zeroes: ; CHECK: // %bb.0: ; CHECK-NEXT: dup v0.4s, v0.s[0] ; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: ret %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> ret <8 x i32> %c } define <4 x i64> @shufflevector_v4i64_zeroes(<4 x i64> %a, <4 x i64> %b) { ; CHECK-LABEL: shufflevector_v4i64_zeroes: ; CHECK: // %bb.0: ; CHECK-NEXT: dup v0.2d, v0.d[0] ; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: ret %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> ret <4 x i64> %c } define <4 x ptr> @shufflevector_v4p0_zeroes(<4 x ptr> %a, <4 x ptr> %b) { ; CHECK-LABEL: shufflevector_v4p0_zeroes: ; CHECK: // %bb.0: ; CHECK-NEXT: dup v0.2d, v0.d[0] ; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: ret %c = shufflevector <4 x ptr> %a, <4 x ptr> %b, <4 x i32> ret <4 x ptr> %c } ; ===== Vectors with Non-Pow 2 Widths ===== define <3 x i8> @shufflevector_v3i8(<3 x i8> %a, <3 x i8> %b) { ; CHECK-SD-LABEL: shufflevector_v3i8: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: mov w0, w1 ; CHECK-SD-NEXT: mov w1, w2 ; CHECK-SD-NEXT: mov w2, w4 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shufflevector_v3i8: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: fmov s0, w0 ; CHECK-GI-NEXT: fmov s1, w3 ; CHECK-GI-NEXT: adrp x8, .LCPI35_0 ; CHECK-GI-NEXT: mov v0.b[1], w1 ; CHECK-GI-NEXT: mov v1.b[1], w4 ; CHECK-GI-NEXT: mov v0.b[2], w2 ; CHECK-GI-NEXT: mov v1.b[2], w5 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI35_0] ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b ; CHECK-GI-NEXT: umov w0, v0.b[0] ; CHECK-GI-NEXT: umov w1, v0.b[1] ; CHECK-GI-NEXT: umov w2, v0.b[2] ; CHECK-GI-NEXT: ret %c = shufflevector <3 x i8> %a, <3 x i8> %b, <3 x i32> ret <3 x i8> %c } define <7 x i8> @shufflevector_v7i8(<7 x i8> %a, <7 x i8> %b) { ; CHECK-SD-LABEL: shufflevector_v7i8: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-SD-NEXT: adrp x8, .LCPI36_0 ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] ; CHECK-SD-NEXT: ldr d1, [x8, :lo12:.LCPI36_0] ; CHECK-SD-NEXT: tbl v0.8b, { v0.16b }, v1.8b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shufflevector_v7i8: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-GI-NEXT: adrp x8, .LCPI36_0 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI36_0] ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %c = shufflevector <7 x i8> %a, <7 x i8> %b, <7 x i32> ret <7 x i8> %c } define <3 x i16> @shufflevector_v3i16(<3 x i16> %a, <3 x i16> %b) { ; CHECK-SD-LABEL: shufflevector_v3i16: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: zip1 v1.4h, v0.4h, v1.4h ; CHECK-SD-NEXT: zip2 v0.4h, v1.4h, v0.4h ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shufflevector_v3i16: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-GI-NEXT: adrp x8, .LCPI37_0 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI37_0] ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %c = shufflevector <3 x i16> %a, <3 x i16> %b, <3 x i32> ret <3 x i16> %c } define <7 x i16> @shufflevector_v7i16(<7 x i16> %a, <7 x i16> %b) { ; CHECK-SD-LABEL: shufflevector_v7i16: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI38_0 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI38_0] ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shufflevector_v7i16: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI38_0 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI38_0] ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-GI-NEXT: ret %c = shufflevector <7 x i16> %a, <7 x i16> %b, <7 x i32> ret <7 x i16> %c } define <3 x i32> @shufflevector_v3i32(<3 x i32> %a, <3 x i32> %b) { ; CHECK-SD-LABEL: shufflevector_v3i32: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: zip1 v1.4s, v0.4s, v1.4s ; CHECK-SD-NEXT: zip2 v0.4s, v1.4s, v0.4s ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shufflevector_v3i32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI39_0 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI39_0] ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-GI-NEXT: ret %c = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32> ret <3 x i32> %c } ; ===== Vectors with Non-Pow 2 Widths with Zero Masks ===== define <3 x i8> @shufflevector_v3i8_zeroes(<3 x i8> %a, <3 x i8> %b) { ; CHECK-SD-LABEL: shufflevector_v3i8_zeroes: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: mov w1, w0 ; CHECK-SD-NEXT: mov w2, w0 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shufflevector_v3i8_zeroes: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: fmov s0, w0 ; CHECK-GI-NEXT: mov v0.b[1], w1 ; CHECK-GI-NEXT: mov v0.b[2], w2 ; CHECK-GI-NEXT: dup v0.8b, v0.b[0] ; CHECK-GI-NEXT: umov w0, v0.b[0] ; CHECK-GI-NEXT: umov w1, v0.b[1] ; CHECK-GI-NEXT: umov w2, v0.b[2] ; CHECK-GI-NEXT: ret %c = shufflevector <3 x i8> %a, <3 x i8> %b, <3 x i32> ret <3 x i8> %c } define <7 x i8> @shufflevector_v7i8_zeroes(<7 x i8> %a, <7 x i8> %b) { ; CHECK-LABEL: shufflevector_v7i8_zeroes: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: dup v0.8b, v0.b[0] ; CHECK-NEXT: ret %c = shufflevector <7 x i8> %a, <7 x i8> %b, <7 x i32> ret <7 x i8> %c } define <3 x i16> @shufflevector_v3i16_zeroes(<3 x i16> %a, <3 x i16> %b) { ; CHECK-LABEL: shufflevector_v3i16_zeroes: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: dup v0.4h, v0.h[0] ; CHECK-NEXT: ret %c = shufflevector <3 x i16> %a, <3 x i16> %b, <3 x i32> ret <3 x i16> %c } define <7 x i16> @shufflevector_v7i16_zeroes(<7 x i16> %a, <7 x i16> %b) { ; CHECK-LABEL: shufflevector_v7i16_zeroes: ; CHECK: // %bb.0: ; CHECK-NEXT: dup v0.8h, v0.h[0] ; CHECK-NEXT: ret %c = shufflevector <7 x i16> %a, <7 x i16> %b, <7 x i32> ret <7 x i16> %c } define <3 x i32> @shufflevector_v3i32_zeroes(<3 x i32> %a, <3 x i32> %b) { ; CHECK-LABEL: shufflevector_v3i32_zeroes: ; CHECK: // %bb.0: ; CHECK-NEXT: dup v0.4s, v0.s[0] ; CHECK-NEXT: ret %c = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32> ret <3 x i32> %c }