1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD 3; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI 4 5; ===== Legal Vector Types ===== 6 7define <8 x i8> @shufflevector_v8i8(<8 x i8> %a, <8 x i8> %b) { 8; CHECK-SD-LABEL: shufflevector_v8i8: 9; CHECK-SD: // %bb.0: 10; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 11; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 12; CHECK-SD-NEXT: adrp x8, .LCPI0_0 13; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] 14; CHECK-SD-NEXT: ldr d1, [x8, :lo12:.LCPI0_0] 15; CHECK-SD-NEXT: tbl v0.8b, { v0.16b }, v1.8b 16; CHECK-SD-NEXT: ret 17; 18; CHECK-GI-LABEL: shufflevector_v8i8: 19; CHECK-GI: // %bb.0: 20; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 21; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 22; CHECK-GI-NEXT: adrp x8, .LCPI0_0 23; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] 24; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI0_0] 25; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b 26; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 27; CHECK-GI-NEXT: ret 28 %c = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12, i32 15> 29 ret <8 x i8> %c 30} 31 32define <16 x i8> @shufflevector_v16i8(<16 x i8> %a, <16 x i8> %b) { 33; CHECK-SD-LABEL: shufflevector_v16i8: 34; CHECK-SD: // %bb.0: 35; CHECK-SD-NEXT: adrp x8, .LCPI1_0 36; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 37; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI1_0] 38; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 39; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b 40; CHECK-SD-NEXT: ret 41; 42; CHECK-GI-LABEL: shufflevector_v16i8: 43; CHECK-GI: // %bb.0: 44; CHECK-GI-NEXT: adrp x8, .LCPI1_0 45; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 46; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI1_0] 47; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 48; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b 49; CHECK-GI-NEXT: ret 50 %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12, i32 15, i32 2, i32 4, i32 6, i32 8, i32 25, i32 30, i32 31, i32 31> 51 ret <16 x i8> %c 52} 53 54define <4 x i16> @shufflevector_v4i16(<4 x i16> %a, <4 x i16> %b) { 55; CHECK-LABEL: shufflevector_v4i16: 56; CHECK: // %bb.0: 57; CHECK-NEXT: uzp2 v0.4h, v0.4h, v1.4h 58; CHECK-NEXT: ret 59 %c = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 60 ret <4 x i16> %c 61} 62 63define <8 x i16> @shufflevector_v8i16(<8 x i16> %a, <8 x i16> %b) { 64; CHECK-SD-LABEL: shufflevector_v8i16: 65; CHECK-SD: // %bb.0: 66; CHECK-SD-NEXT: adrp x8, .LCPI3_0 67; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 68; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI3_0] 69; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 70; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b 71; CHECK-SD-NEXT: ret 72; 73; CHECK-GI-LABEL: shufflevector_v8i16: 74; CHECK-GI: // %bb.0: 75; CHECK-GI-NEXT: adrp x8, .LCPI3_0 76; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 77; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI3_0] 78; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 79; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b 80; CHECK-GI-NEXT: ret 81 %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12, i32 15> 82 ret <8 x i16> %c 83} 84 85define <2 x i32> @shufflevector_v2i32(<2 x i32> %a, <2 x i32> %b) { 86; CHECK-LABEL: shufflevector_v2i32: 87; CHECK: // %bb.0: 88; CHECK-NEXT: zip2 v0.2s, v0.2s, v1.2s 89; CHECK-NEXT: ret 90 %c = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3> 91 ret <2 x i32> %c 92} 93 94define <4 x i32> @shufflevector_v4i32(<4 x i32> %a, <4 x i32> %b) { 95; CHECK-LABEL: shufflevector_v4i32: 96; CHECK: // %bb.0: 97; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s 98; CHECK-NEXT: ret 99 %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 100 ret <4 x i32> %c 101} 102 103define <2 x i64> @shufflevector_v2i64(<2 x i64> %a, <2 x i64> %b) { 104; CHECK-LABEL: shufflevector_v2i64: 105; CHECK: // %bb.0: 106; CHECK-NEXT: zip2 v0.2d, v0.2d, v1.2d 107; CHECK-NEXT: ret 108 %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3> 109 ret <2 x i64> %c 110} 111 112define <2 x ptr> @shufflevector_v2p0(<2 x ptr> %a, <2 x ptr> %b) { 113; CHECK-LABEL: shufflevector_v2p0: 114; CHECK: // %bb.0: 115; CHECK-NEXT: zip2 v0.2d, v0.2d, v1.2d 116; CHECK-NEXT: ret 117 %c = shufflevector <2 x ptr> %a, <2 x ptr> %b, <2 x i32> <i32 1, i32 3> 118 ret <2 x ptr> %c 119} 120 121; ===== Legal Vector Types with Zero Masks ===== 122 123define <8 x i8> @shufflevector_v8i8_zeroes(<8 x i8> %a, <8 x i8> %b) { 124; CHECK-LABEL: shufflevector_v8i8_zeroes: 125; CHECK: // %bb.0: 126; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 127; CHECK-NEXT: dup v0.8b, v0.b[0] 128; CHECK-NEXT: ret 129 %c = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 130 ret <8 x i8> %c 131} 132 133define <16 x i8> @shufflevector_v16i8_zeroes(<16 x i8> %a, <16 x i8> %b) { 134; CHECK-LABEL: shufflevector_v16i8_zeroes: 135; CHECK: // %bb.0: 136; CHECK-NEXT: dup v0.16b, v0.b[0] 137; CHECK-NEXT: ret 138 %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 139 ret <16 x i8> %c 140} 141 142define <4 x i16> @shufflevector_v4i16_zeroes(<4 x i16> %a, <4 x i16> %b) { 143; CHECK-LABEL: shufflevector_v4i16_zeroes: 144; CHECK: // %bb.0: 145; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 146; CHECK-NEXT: dup v0.4h, v0.h[0] 147; CHECK-NEXT: ret 148 %c = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 149 ret <4 x i16> %c 150} 151 152define <8 x i16> @shufflevector_v8i16_zeroes(<8 x i16> %a, <8 x i16> %b) { 153; CHECK-LABEL: shufflevector_v8i16_zeroes: 154; CHECK: // %bb.0: 155; CHECK-NEXT: dup v0.8h, v0.h[0] 156; CHECK-NEXT: ret 157 %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 158 ret <8 x i16> %c 159} 160 161define <2 x i32> @shufflevector_v2i32_zeroes(<2 x i32> %a, <2 x i32> %b) { 162; CHECK-LABEL: shufflevector_v2i32_zeroes: 163; CHECK: // %bb.0: 164; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 165; CHECK-NEXT: dup v0.2s, v0.s[0] 166; CHECK-NEXT: ret 167 %c = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 0> 168 ret <2 x i32> %c 169} 170 171define <4 x i32> @shufflevector_v4i32_zeroes(<4 x i32> %a, <4 x i32> %b) { 172; CHECK-LABEL: shufflevector_v4i32_zeroes: 173; CHECK: // %bb.0: 174; CHECK-NEXT: dup v0.4s, v0.s[0] 175; CHECK-NEXT: ret 176 %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 177 ret <4 x i32> %c 178} 179 180define <2 x i64> @shufflevector_v2i64_zeroes(<2 x i64> %a, <2 x i64> %b) { 181; CHECK-LABEL: shufflevector_v2i64_zeroes: 182; CHECK: // %bb.0: 183; CHECK-NEXT: dup v0.2d, v0.d[0] 184; CHECK-NEXT: ret 185 %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0> 186 ret <2 x i64> %c 187} 188 189define <2 x ptr> @shufflevector_v2p0_zeroes(<2 x ptr> %a, <2 x ptr> %b) { 190; CHECK-LABEL: shufflevector_v2p0_zeroes: 191; CHECK: // %bb.0: 192; CHECK-NEXT: dup v0.2d, v0.d[0] 193; CHECK-NEXT: ret 194 %c = shufflevector <2 x ptr> %a, <2 x ptr> %b, <2 x i32> <i32 0, i32 0> 195 ret <2 x ptr> %c 196} 197 198; ===== Smaller/Larger Width Vectors with Legal Element Sizes ===== 199 200define <2 x i1> @shufflevector_v2i1(<2 x i1> %a, <2 x i1> %b){ 201; CHECK-SD-LABEL: shufflevector_v2i1: 202; CHECK-SD: // %bb.0: 203; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 204; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 205; CHECK-SD-NEXT: mov v0.s[1], v1.s[1] 206; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 207; CHECK-SD-NEXT: ret 208; 209; CHECK-GI-LABEL: shufflevector_v2i1: 210; CHECK-GI: // %bb.0: 211; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 212; CHECK-GI-NEXT: mov w8, v1.s[1] 213; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 214; CHECK-GI-NEXT: mov w9, v0.s[1] 215; CHECK-GI-NEXT: mov v1.b[1], w8 216; CHECK-GI-NEXT: mov v0.b[1], w9 217; CHECK-GI-NEXT: mov b1, v1.b[1] 218; CHECK-GI-NEXT: mov v0.b[1], v1.b[0] 219; CHECK-GI-NEXT: umov w8, v0.b[0] 220; CHECK-GI-NEXT: umov w9, v0.b[1] 221; CHECK-GI-NEXT: mov v0.s[0], w8 222; CHECK-GI-NEXT: mov v0.s[1], w9 223; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 224; CHECK-GI-NEXT: ret 225 %c = shufflevector <2 x i1> %a, <2 x i1> %b, <2 x i32> <i32 0, i32 3> 226 ret <2 x i1> %c 227} 228 229define i32 @shufflevector_v4i8(<4 x i8> %a, <4 x i8> %b){ 230; CHECK-SD-LABEL: shufflevector_v4i8: 231; CHECK-SD: // %bb.0: 232; CHECK-SD-NEXT: sub sp, sp, #16 233; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 234; CHECK-SD-NEXT: ext v0.8b, v1.8b, v0.8b, #6 235; CHECK-SD-NEXT: zip1 v1.4h, v1.4h, v0.4h 236; CHECK-SD-NEXT: ext v0.8b, v0.8b, v1.8b, #4 237; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b 238; CHECK-SD-NEXT: fmov w0, s0 239; CHECK-SD-NEXT: add sp, sp, #16 240; CHECK-SD-NEXT: ret 241; 242; CHECK-GI-LABEL: shufflevector_v4i8: 243; CHECK-GI: // %bb.0: 244; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b 245; CHECK-GI-NEXT: adrp x8, .LCPI17_0 246; CHECK-GI-NEXT: uzp1 v1.8b, v1.8b, v0.8b 247; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] 248; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI17_0] 249; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b 250; CHECK-GI-NEXT: fmov w0, s0 251; CHECK-GI-NEXT: ret 252 %c = shufflevector <4 x i8> %a, <4 x i8> %b, <4 x i32> <i32 1, i32 2, i32 4, i32 7> 253 %d = bitcast <4 x i8> %c to i32 254 ret i32 %d 255} 256 257define <32 x i8> @shufflevector_v32i8(<32 x i8> %a, <32 x i8> %b){ 258; CHECK-SD-LABEL: shufflevector_v32i8: 259; CHECK-SD: // %bb.0: 260; CHECK-SD-NEXT: // kill: def $q2 killed $q2 def $q1_q2 261; CHECK-SD-NEXT: adrp x8, .LCPI18_0 262; CHECK-SD-NEXT: adrp x9, .LCPI18_1 263; CHECK-SD-NEXT: mov v1.16b, v0.16b 264; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI18_0] 265; CHECK-SD-NEXT: ldr q4, [x9, :lo12:.LCPI18_1] 266; CHECK-SD-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v3.16b 267; CHECK-SD-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v4.16b 268; CHECK-SD-NEXT: ret 269; 270; CHECK-GI-LABEL: shufflevector_v32i8: 271; CHECK-GI: // %bb.0: 272; CHECK-GI-NEXT: mov v3.16b, v0.16b 273; CHECK-GI-NEXT: adrp x8, .LCPI18_1 274; CHECK-GI-NEXT: adrp x9, .LCPI18_0 275; CHECK-GI-NEXT: mov v4.16b, v2.16b 276; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI18_1] 277; CHECK-GI-NEXT: ldr q1, [x9, :lo12:.LCPI18_0] 278; CHECK-GI-NEXT: tbl v0.16b, { v3.16b, v4.16b }, v0.16b 279; CHECK-GI-NEXT: tbl v1.16b, { v3.16b, v4.16b }, v1.16b 280; CHECK-GI-NEXT: ret 281 %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 4, i32 32, i32 32, i32 32, i32 5, i32 32, i32 32, i32 32, i32 6, i32 32, i32 32, i32 32, i32 7, i32 32, i32 32, i32 32> 282 ret <32 x i8> %c 283} 284 285define i32 @shufflevector_v2i16(<2 x i16> %a, <2 x i16> %b){ 286; CHECK-SD-LABEL: shufflevector_v2i16: 287; CHECK-SD: // %bb.0: 288; CHECK-SD-NEXT: sub sp, sp, #16 289; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 290; CHECK-SD-NEXT: ext v0.8b, v0.8b, v1.8b, #4 291; CHECK-SD-NEXT: mov w8, v0.s[1] 292; CHECK-SD-NEXT: fmov w9, s0 293; CHECK-SD-NEXT: strh w9, [sp, #12] 294; CHECK-SD-NEXT: strh w8, [sp, #14] 295; CHECK-SD-NEXT: ldr w0, [sp, #12] 296; CHECK-SD-NEXT: add sp, sp, #16 297; CHECK-SD-NEXT: ret 298; 299; CHECK-GI-LABEL: shufflevector_v2i16: 300; CHECK-GI: // %bb.0: 301; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h 302; CHECK-GI-NEXT: adrp x8, .LCPI19_0 303; CHECK-GI-NEXT: uzp1 v1.4h, v1.4h, v0.4h 304; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] 305; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI19_0] 306; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b 307; CHECK-GI-NEXT: fmov w0, s0 308; CHECK-GI-NEXT: ret 309 %c = shufflevector <2 x i16> %a, <2 x i16> %b, <2 x i32> <i32 1, i32 2> 310 %d = bitcast <2 x i16> %c to i32 311 ret i32 %d 312} 313 314define <16 x i16> @shufflevector_v16i16(<16 x i16> %a, <16 x i16> %b){ 315; CHECK-SD-LABEL: shufflevector_v16i16: 316; CHECK-SD: // %bb.0: 317; CHECK-SD-NEXT: // kill: def $q2 killed $q2 def $q1_q2 318; CHECK-SD-NEXT: adrp x8, .LCPI20_0 319; CHECK-SD-NEXT: adrp x9, .LCPI20_1 320; CHECK-SD-NEXT: mov v1.16b, v0.16b 321; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI20_0] 322; CHECK-SD-NEXT: ldr q4, [x9, :lo12:.LCPI20_1] 323; CHECK-SD-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v3.16b 324; CHECK-SD-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v4.16b 325; CHECK-SD-NEXT: ret 326; 327; CHECK-GI-LABEL: shufflevector_v16i16: 328; CHECK-GI: // %bb.0: 329; CHECK-GI-NEXT: mov v3.16b, v0.16b 330; CHECK-GI-NEXT: adrp x8, .LCPI20_1 331; CHECK-GI-NEXT: adrp x9, .LCPI20_0 332; CHECK-GI-NEXT: mov v4.16b, v2.16b 333; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI20_1] 334; CHECK-GI-NEXT: ldr q1, [x9, :lo12:.LCPI20_0] 335; CHECK-GI-NEXT: tbl v0.16b, { v3.16b, v4.16b }, v0.16b 336; CHECK-GI-NEXT: tbl v1.16b, { v3.16b, v4.16b }, v1.16b 337; CHECK-GI-NEXT: ret 338 %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16> 339 ret <16 x i16> %c 340} 341 342define <1 x i32> @shufflevector_v1i32(<1 x i32> %a, <1 x i32> %b) { 343; CHECK-LABEL: shufflevector_v1i32: 344; CHECK: // %bb.0: 345; CHECK-NEXT: fmov d0, d1 346; CHECK-NEXT: ret 347 %c = shufflevector <1 x i32> %a, <1 x i32> %b, <1 x i32> <i32 1> 348 ret <1 x i32> %c 349} 350 351define <8 x i32> @shufflevector_v8i32(<8 x i32> %a, <8 x i32> %b) { 352; CHECK-SD-LABEL: shufflevector_v8i32: 353; CHECK-SD: // %bb.0: 354; CHECK-SD-NEXT: uzp1 v2.4s, v2.4s, v3.4s 355; CHECK-SD-NEXT: uzp2 v0.4s, v0.4s, v1.4s 356; CHECK-SD-NEXT: mov v2.s[3], v3.s[3] 357; CHECK-SD-NEXT: mov v1.16b, v2.16b 358; CHECK-SD-NEXT: ret 359; 360; CHECK-GI-LABEL: shufflevector_v8i32: 361; CHECK-GI: // %bb.0: 362; CHECK-GI-NEXT: adrp x8, .LCPI22_0 363; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 364; CHECK-GI-NEXT: uzp2 v0.4s, v0.4s, v1.4s 365; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI22_0] 366; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 367; CHECK-GI-NEXT: tbl v1.16b, { v2.16b, v3.16b }, v4.16b 368; CHECK-GI-NEXT: ret 369 %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12, i32 15> 370 ret <8 x i32> %c 371} 372 373define <4 x i64> @shufflevector_v4i64(<4 x i64> %a, <4 x i64> %b) { 374; CHECK-SD-LABEL: shufflevector_v4i64: 375; CHECK-SD: // %bb.0: 376; CHECK-SD-NEXT: zip2 v2.2d, v2.2d, v3.2d 377; CHECK-SD-NEXT: zip2 v0.2d, v0.2d, v1.2d 378; CHECK-SD-NEXT: mov v1.16b, v2.16b 379; CHECK-SD-NEXT: ret 380; 381; CHECK-GI-LABEL: shufflevector_v4i64: 382; CHECK-GI: // %bb.0: 383; CHECK-GI-NEXT: zip2 v0.2d, v0.2d, v1.2d 384; CHECK-GI-NEXT: zip2 v1.2d, v2.2d, v3.2d 385; CHECK-GI-NEXT: ret 386 %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 387 ret <4 x i64> %c 388} 389 390define <3 x ptr> @shufflevector_v3p0(<3 x ptr> %a, <3 x ptr> %b) { 391; CHECK-SD-LABEL: shufflevector_v3p0: 392; CHECK-SD: // %bb.0: 393; CHECK-SD-NEXT: fmov d2, d5 394; CHECK-SD-NEXT: fmov d0, d1 395; CHECK-SD-NEXT: fmov d1, d3 396; CHECK-SD-NEXT: ret 397; 398; CHECK-GI-LABEL: shufflevector_v3p0: 399; CHECK-GI: // %bb.0: 400; CHECK-GI-NEXT: fmov x8, d0 401; CHECK-GI-NEXT: fmov x9, d3 402; CHECK-GI-NEXT: mov v0.d[0], x8 403; CHECK-GI-NEXT: mov v2.d[0], x9 404; CHECK-GI-NEXT: fmov x8, d1 405; CHECK-GI-NEXT: fmov x9, d4 406; CHECK-GI-NEXT: mov v0.d[1], x8 407; CHECK-GI-NEXT: mov v2.d[1], x9 408; CHECK-GI-NEXT: fmov x8, d5 409; CHECK-GI-NEXT: mov v1.d[0], x8 410; CHECK-GI-NEXT: ext v0.16b, v0.16b, v2.16b, #8 411; CHECK-GI-NEXT: fmov x10, d1 412; CHECK-GI-NEXT: mov d2, v0.d[1] 413; CHECK-GI-NEXT: fmov d1, d2 414; CHECK-GI-NEXT: fmov d2, x10 415; CHECK-GI-NEXT: ret 416 %c = shufflevector <3 x ptr> %a, <3 x ptr> %b, <3 x i32> <i32 1, i32 3, i32 5> 417 ret <3 x ptr> %c 418} 419 420define <4 x ptr> @shufflevector_v4p0(<4 x ptr> %a, <4 x ptr> %b) { 421; CHECK-SD-LABEL: shufflevector_v4p0: 422; CHECK-SD: // %bb.0: 423; CHECK-SD-NEXT: zip2 v2.2d, v2.2d, v3.2d 424; CHECK-SD-NEXT: zip2 v0.2d, v0.2d, v1.2d 425; CHECK-SD-NEXT: mov v1.16b, v2.16b 426; CHECK-SD-NEXT: ret 427; 428; CHECK-GI-LABEL: shufflevector_v4p0: 429; CHECK-GI: // %bb.0: 430; CHECK-GI-NEXT: zip2 v0.2d, v0.2d, v1.2d 431; CHECK-GI-NEXT: zip2 v1.2d, v2.2d, v3.2d 432; CHECK-GI-NEXT: ret 433 %c = shufflevector <4 x ptr> %a, <4 x ptr> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 434 ret <4 x ptr> %c 435} 436 437; ===== Smaller/Larger Width Vectors with Zero Masks ===== 438 439define <2 x i1> @shufflevector_v2i1_zeroes(<2 x i1> %a, <2 x i1> %b){ 440; CHECK-SD-LABEL: shufflevector_v2i1_zeroes: 441; CHECK-SD: // %bb.0: 442; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 443; CHECK-SD-NEXT: dup v0.2s, v0.s[0] 444; CHECK-SD-NEXT: ret 445; 446; CHECK-GI-LABEL: shufflevector_v2i1_zeroes: 447; CHECK-GI: // %bb.0: 448; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 449; CHECK-GI-NEXT: mov w8, v0.s[1] 450; CHECK-GI-NEXT: mov v0.b[1], w8 451; CHECK-GI-NEXT: dup v0.8b, v0.b[0] 452; CHECK-GI-NEXT: umov w8, v0.b[0] 453; CHECK-GI-NEXT: umov w9, v0.b[1] 454; CHECK-GI-NEXT: mov v0.s[0], w8 455; CHECK-GI-NEXT: mov v0.s[1], w9 456; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 457; CHECK-GI-NEXT: ret 458 %c = shufflevector <2 x i1> %a, <2 x i1> %b, <2 x i32> <i32 0, i32 0> 459 ret <2 x i1> %c 460} 461 462define i32 @shufflevector_v4i8_zeroes(<4 x i8> %a, <4 x i8> %b){ 463; CHECK-SD-LABEL: shufflevector_v4i8_zeroes: 464; CHECK-SD: // %bb.0: 465; CHECK-SD-NEXT: sub sp, sp, #16 466; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 467; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 468; CHECK-SD-NEXT: dup v0.4h, v0.h[0] 469; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b 470; CHECK-SD-NEXT: fmov w0, s0 471; CHECK-SD-NEXT: add sp, sp, #16 472; CHECK-SD-NEXT: ret 473; 474; CHECK-GI-LABEL: shufflevector_v4i8_zeroes: 475; CHECK-GI: // %bb.0: 476; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b 477; CHECK-GI-NEXT: dup v0.8b, v0.b[0] 478; CHECK-GI-NEXT: fmov w0, s0 479; CHECK-GI-NEXT: ret 480 %c = shufflevector <4 x i8> %a, <4 x i8> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 481 %d = bitcast <4 x i8> %c to i32 482 ret i32 %d 483} 484 485define <32 x i8> @shufflevector_v32i8_zeroes(<32 x i8> %a, <32 x i8> %b){ 486; CHECK-LABEL: shufflevector_v32i8_zeroes: 487; CHECK: // %bb.0: 488; CHECK-NEXT: dup v0.16b, v0.b[0] 489; CHECK-NEXT: mov v1.16b, v0.16b 490; CHECK-NEXT: ret 491 %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 492 ret <32 x i8> %c 493} 494 495define i32 @shufflevector_v2i16_zeroes(<2 x i16> %a, <2 x i16> %b){ 496; CHECK-SD-LABEL: shufflevector_v2i16_zeroes: 497; CHECK-SD: // %bb.0: 498; CHECK-SD-NEXT: sub sp, sp, #16 499; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 500; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 501; CHECK-SD-NEXT: dup v1.2s, v0.s[0] 502; CHECK-SD-NEXT: fmov w9, s0 503; CHECK-SD-NEXT: strh w9, [sp, #12] 504; CHECK-SD-NEXT: mov w8, v1.s[1] 505; CHECK-SD-NEXT: strh w8, [sp, #14] 506; CHECK-SD-NEXT: ldr w0, [sp, #12] 507; CHECK-SD-NEXT: add sp, sp, #16 508; CHECK-SD-NEXT: ret 509; 510; CHECK-GI-LABEL: shufflevector_v2i16_zeroes: 511; CHECK-GI: // %bb.0: 512; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h 513; CHECK-GI-NEXT: dup v0.4h, v0.h[0] 514; CHECK-GI-NEXT: fmov w0, s0 515; CHECK-GI-NEXT: ret 516 %c = shufflevector <2 x i16> %a, <2 x i16> %b, <2 x i32> <i32 0, i32 0> 517 %d = bitcast <2 x i16> %c to i32 518 ret i32 %d 519} 520 521define <16 x i16> @shufflevector_v16i16_zeroes(<16 x i16> %a, <16 x i16> %b){ 522; CHECK-LABEL: shufflevector_v16i16_zeroes: 523; CHECK: // %bb.0: 524; CHECK-NEXT: dup v0.8h, v0.h[0] 525; CHECK-NEXT: mov v1.16b, v0.16b 526; CHECK-NEXT: ret 527 %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 528 ret <16 x i16> %c 529} 530 531define <1 x i32> @shufflevector_v1i32_zeroes(<1 x i32> %a, <1 x i32> %b) { 532; CHECK-LABEL: shufflevector_v1i32_zeroes: 533; CHECK: // %bb.0: 534; CHECK-NEXT: ret 535 %c = shufflevector <1 x i32> %a, <1 x i32> %b, <1 x i32> <i32 0> 536 ret <1 x i32> %c 537} 538 539define <8 x i32> @shufflevector_v8i32_zeroes(<8 x i32> %a, <8 x i32> %b) { 540; CHECK-LABEL: shufflevector_v8i32_zeroes: 541; CHECK: // %bb.0: 542; CHECK-NEXT: dup v0.4s, v0.s[0] 543; CHECK-NEXT: mov v1.16b, v0.16b 544; CHECK-NEXT: ret 545 %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 546 ret <8 x i32> %c 547} 548 549define <4 x i64> @shufflevector_v4i64_zeroes(<4 x i64> %a, <4 x i64> %b) { 550; CHECK-LABEL: shufflevector_v4i64_zeroes: 551; CHECK: // %bb.0: 552; CHECK-NEXT: dup v0.2d, v0.d[0] 553; CHECK-NEXT: mov v1.16b, v0.16b 554; CHECK-NEXT: ret 555 %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 556 ret <4 x i64> %c 557} 558 559define <4 x ptr> @shufflevector_v4p0_zeroes(<4 x ptr> %a, <4 x ptr> %b) { 560; CHECK-LABEL: shufflevector_v4p0_zeroes: 561; CHECK: // %bb.0: 562; CHECK-NEXT: dup v0.2d, v0.d[0] 563; CHECK-NEXT: mov v1.16b, v0.16b 564; CHECK-NEXT: ret 565 %c = shufflevector <4 x ptr> %a, <4 x ptr> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 566 ret <4 x ptr> %c 567} 568 569; ===== Vectors with Non-Pow 2 Widths ===== 570 571define <3 x i8> @shufflevector_v3i8(<3 x i8> %a, <3 x i8> %b) { 572; CHECK-SD-LABEL: shufflevector_v3i8: 573; CHECK-SD: // %bb.0: 574; CHECK-SD-NEXT: mov w0, w1 575; CHECK-SD-NEXT: mov w1, w2 576; CHECK-SD-NEXT: mov w2, w4 577; CHECK-SD-NEXT: ret 578; 579; CHECK-GI-LABEL: shufflevector_v3i8: 580; CHECK-GI: // %bb.0: 581; CHECK-GI-NEXT: fmov s0, w0 582; CHECK-GI-NEXT: fmov s1, w3 583; CHECK-GI-NEXT: adrp x8, .LCPI35_0 584; CHECK-GI-NEXT: mov v0.b[1], w1 585; CHECK-GI-NEXT: mov v1.b[1], w4 586; CHECK-GI-NEXT: mov v0.b[2], w2 587; CHECK-GI-NEXT: mov v1.b[2], w5 588; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] 589; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI35_0] 590; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b 591; CHECK-GI-NEXT: umov w0, v0.b[0] 592; CHECK-GI-NEXT: umov w1, v0.b[1] 593; CHECK-GI-NEXT: umov w2, v0.b[2] 594; CHECK-GI-NEXT: ret 595 %c = shufflevector <3 x i8> %a, <3 x i8> %b, <3 x i32> <i32 1, i32 2, i32 4> 596 ret <3 x i8> %c 597} 598 599define <7 x i8> @shufflevector_v7i8(<7 x i8> %a, <7 x i8> %b) { 600; CHECK-SD-LABEL: shufflevector_v7i8: 601; CHECK-SD: // %bb.0: 602; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 603; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 604; CHECK-SD-NEXT: adrp x8, .LCPI36_0 605; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] 606; CHECK-SD-NEXT: ldr d1, [x8, :lo12:.LCPI36_0] 607; CHECK-SD-NEXT: tbl v0.8b, { v0.16b }, v1.8b 608; CHECK-SD-NEXT: ret 609; 610; CHECK-GI-LABEL: shufflevector_v7i8: 611; CHECK-GI: // %bb.0: 612; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 613; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 614; CHECK-GI-NEXT: adrp x8, .LCPI36_0 615; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] 616; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI36_0] 617; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b 618; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 619; CHECK-GI-NEXT: ret 620 %c = shufflevector <7 x i8> %a, <7 x i8> %b, <7 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12> 621 ret <7 x i8> %c 622} 623 624define <3 x i16> @shufflevector_v3i16(<3 x i16> %a, <3 x i16> %b) { 625; CHECK-SD-LABEL: shufflevector_v3i16: 626; CHECK-SD: // %bb.0: 627; CHECK-SD-NEXT: zip1 v1.4h, v0.4h, v1.4h 628; CHECK-SD-NEXT: zip2 v0.4h, v1.4h, v0.4h 629; CHECK-SD-NEXT: ret 630; 631; CHECK-GI-LABEL: shufflevector_v3i16: 632; CHECK-GI: // %bb.0: 633; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 634; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 635; CHECK-GI-NEXT: adrp x8, .LCPI37_0 636; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] 637; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI37_0] 638; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b 639; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 640; CHECK-GI-NEXT: ret 641 %c = shufflevector <3 x i16> %a, <3 x i16> %b, <3 x i32> <i32 1, i32 2, i32 4> 642 ret <3 x i16> %c 643} 644 645define <7 x i16> @shufflevector_v7i16(<7 x i16> %a, <7 x i16> %b) { 646; CHECK-SD-LABEL: shufflevector_v7i16: 647; CHECK-SD: // %bb.0: 648; CHECK-SD-NEXT: adrp x8, .LCPI38_0 649; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 650; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI38_0] 651; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 652; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b 653; CHECK-SD-NEXT: ret 654; 655; CHECK-GI-LABEL: shufflevector_v7i16: 656; CHECK-GI: // %bb.0: 657; CHECK-GI-NEXT: adrp x8, .LCPI38_0 658; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 659; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI38_0] 660; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 661; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b 662; CHECK-GI-NEXT: ret 663 %c = shufflevector <7 x i16> %a, <7 x i16> %b, <7 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12> 664 ret <7 x i16> %c 665} 666 667define <3 x i32> @shufflevector_v3i32(<3 x i32> %a, <3 x i32> %b) { 668; CHECK-SD-LABEL: shufflevector_v3i32: 669; CHECK-SD: // %bb.0: 670; CHECK-SD-NEXT: zip1 v1.4s, v0.4s, v1.4s 671; CHECK-SD-NEXT: zip2 v0.4s, v1.4s, v0.4s 672; CHECK-SD-NEXT: ret 673; 674; CHECK-GI-LABEL: shufflevector_v3i32: 675; CHECK-GI: // %bb.0: 676; CHECK-GI-NEXT: adrp x8, .LCPI39_0 677; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 678; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI39_0] 679; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 680; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b 681; CHECK-GI-NEXT: ret 682 %c = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32> <i32 1, i32 2, i32 4> 683 ret <3 x i32> %c 684} 685 686; ===== Vectors with Non-Pow 2 Widths with Zero Masks ===== 687 688define <3 x i8> @shufflevector_v3i8_zeroes(<3 x i8> %a, <3 x i8> %b) { 689; CHECK-SD-LABEL: shufflevector_v3i8_zeroes: 690; CHECK-SD: // %bb.0: 691; CHECK-SD-NEXT: mov w1, w0 692; CHECK-SD-NEXT: mov w2, w0 693; CHECK-SD-NEXT: ret 694; 695; CHECK-GI-LABEL: shufflevector_v3i8_zeroes: 696; CHECK-GI: // %bb.0: 697; CHECK-GI-NEXT: fmov s0, w0 698; CHECK-GI-NEXT: mov v0.b[1], w1 699; CHECK-GI-NEXT: mov v0.b[2], w2 700; CHECK-GI-NEXT: dup v0.8b, v0.b[0] 701; CHECK-GI-NEXT: umov w0, v0.b[0] 702; CHECK-GI-NEXT: umov w1, v0.b[1] 703; CHECK-GI-NEXT: umov w2, v0.b[2] 704; CHECK-GI-NEXT: ret 705 %c = shufflevector <3 x i8> %a, <3 x i8> %b, <3 x i32> <i32 0, i32 0, i32 0> 706 ret <3 x i8> %c 707} 708 709define <7 x i8> @shufflevector_v7i8_zeroes(<7 x i8> %a, <7 x i8> %b) { 710; CHECK-LABEL: shufflevector_v7i8_zeroes: 711; CHECK: // %bb.0: 712; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 713; CHECK-NEXT: dup v0.8b, v0.b[0] 714; CHECK-NEXT: ret 715 %c = shufflevector <7 x i8> %a, <7 x i8> %b, <7 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 716 ret <7 x i8> %c 717} 718 719define <3 x i16> @shufflevector_v3i16_zeroes(<3 x i16> %a, <3 x i16> %b) { 720; CHECK-LABEL: shufflevector_v3i16_zeroes: 721; CHECK: // %bb.0: 722; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 723; CHECK-NEXT: dup v0.4h, v0.h[0] 724; CHECK-NEXT: ret 725 %c = shufflevector <3 x i16> %a, <3 x i16> %b, <3 x i32> <i32 0, i32 0, i32 0> 726 ret <3 x i16> %c 727} 728 729define <7 x i16> @shufflevector_v7i16_zeroes(<7 x i16> %a, <7 x i16> %b) { 730; CHECK-LABEL: shufflevector_v7i16_zeroes: 731; CHECK: // %bb.0: 732; CHECK-NEXT: dup v0.8h, v0.h[0] 733; CHECK-NEXT: ret 734 %c = shufflevector <7 x i16> %a, <7 x i16> %b, <7 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 735 ret <7 x i16> %c 736} 737 738define <3 x i32> @shufflevector_v3i32_zeroes(<3 x i32> %a, <3 x i32> %b) { 739; CHECK-LABEL: shufflevector_v3i32_zeroes: 740; CHECK: // %bb.0: 741; CHECK-NEXT: dup v0.4s, v0.s[0] 742; CHECK-NEXT: ret 743 %c = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32> <i32 0, i32 0, i32 0> 744 ret <3 x i32> %c 745} 746