1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD 3; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI 4 5; CHECK-GI: warning: Instruction selection used fallback path for shuffle_zip1 6; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shuffle_zip2 7; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shuffle_zip3 8 9define <8 x i8> @vzipi8(ptr %A, ptr %B) nounwind { 10; CHECK-LABEL: vzipi8: 11; CHECK: // %bb.0: 12; CHECK-NEXT: ldr d0, [x0] 13; CHECK-NEXT: ldr d1, [x1] 14; CHECK-NEXT: zip1.8b v2, v0, v1 15; CHECK-NEXT: zip2.8b v0, v0, v1 16; CHECK-NEXT: add.8b v0, v2, v0 17; CHECK-NEXT: ret 18 %tmp1 = load <8 x i8>, ptr %A 19 %tmp2 = load <8 x i8>, ptr %B 20 %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 21 %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 22 %tmp5 = add <8 x i8> %tmp3, %tmp4 23 ret <8 x i8> %tmp5 24} 25 26define <4 x i16> @vzipi16(ptr %A, ptr %B) nounwind { 27; CHECK-LABEL: vzipi16: 28; CHECK: // %bb.0: 29; CHECK-NEXT: ldr d0, [x0] 30; CHECK-NEXT: ldr d1, [x1] 31; CHECK-NEXT: zip1.4h v2, v0, v1 32; CHECK-NEXT: zip2.4h v0, v0, v1 33; CHECK-NEXT: add.4h v0, v2, v0 34; CHECK-NEXT: ret 35 %tmp1 = load <4 x i16>, ptr %A 36 %tmp2 = load <4 x i16>, ptr %B 37 %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 38 %tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 39 %tmp5 = add <4 x i16> %tmp3, %tmp4 40 ret <4 x i16> %tmp5 41} 42 43define <16 x i8> @vzipQi8(ptr %A, ptr %B) nounwind { 44; CHECK-LABEL: vzipQi8: 45; CHECK: // %bb.0: 46; CHECK-NEXT: ldr q0, [x0] 47; CHECK-NEXT: ldr q1, [x1] 48; CHECK-NEXT: zip1.16b v2, v0, v1 49; CHECK-NEXT: zip2.16b v0, v0, v1 50; CHECK-NEXT: add.16b v0, v2, v0 51; CHECK-NEXT: ret 52 %tmp1 = load <16 x i8>, ptr %A 53 %tmp2 = load <16 x i8>, ptr %B 54 %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> 55 %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 56 %tmp5 = add <16 x i8> %tmp3, %tmp4 57 ret <16 x i8> %tmp5 58} 59 60define <8 x i16> @vzipQi16(ptr %A, ptr %B) nounwind { 61; CHECK-LABEL: vzipQi16: 62; CHECK: // %bb.0: 63; CHECK-NEXT: ldr q0, [x0] 64; CHECK-NEXT: ldr q1, [x1] 65; CHECK-NEXT: zip1.8h v2, v0, v1 66; CHECK-NEXT: zip2.8h v0, v0, v1 67; CHECK-NEXT: add.8h v0, v2, v0 68; CHECK-NEXT: ret 69 %tmp1 = load <8 x i16>, ptr %A 70 %tmp2 = load <8 x i16>, ptr %B 71 %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 72 %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 73 %tmp5 = add <8 x i16> %tmp3, %tmp4 74 ret <8 x i16> %tmp5 75} 76 77define <4 x i32> @vzipQi32(ptr %A, ptr %B) nounwind { 78; CHECK-LABEL: vzipQi32: 79; CHECK: // %bb.0: 80; CHECK-NEXT: ldr q0, [x0] 81; CHECK-NEXT: ldr q1, [x1] 82; CHECK-NEXT: zip1.4s v2, v0, v1 83; CHECK-NEXT: zip2.4s v0, v0, v1 84; CHECK-NEXT: add.4s v0, v2, v0 85; CHECK-NEXT: ret 86 %tmp1 = load <4 x i32>, ptr %A 87 %tmp2 = load <4 x i32>, ptr %B 88 %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 89 %tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 90 %tmp5 = add <4 x i32> %tmp3, %tmp4 91 ret <4 x i32> %tmp5 92} 93 94define <4 x float> @vzipQf(ptr %A, ptr %B) nounwind { 95; CHECK-LABEL: vzipQf: 96; CHECK: // %bb.0: 97; CHECK-NEXT: ldr q0, [x0] 98; CHECK-NEXT: ldr q1, [x1] 99; CHECK-NEXT: zip1.4s v2, v0, v1 100; CHECK-NEXT: zip2.4s v0, v0, v1 101; CHECK-NEXT: fadd.4s v0, v2, v0 102; CHECK-NEXT: ret 103 %tmp1 = load <4 x float>, ptr %A 104 %tmp2 = load <4 x float>, ptr %B 105 %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 106 %tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 107 %tmp5 = fadd <4 x float> %tmp3, %tmp4 108 ret <4 x float> %tmp5 109} 110 111; Undef shuffle indices should not prevent matching to VZIP: 112 113define <8 x i8> @vzipi8_undef(ptr %A, ptr %B) nounwind { 114; CHECK-LABEL: vzipi8_undef: 115; CHECK: // %bb.0: 116; CHECK-NEXT: ldr d0, [x0] 117; CHECK-NEXT: ldr d1, [x1] 118; CHECK-NEXT: zip1.8b v2, v0, v1 119; CHECK-NEXT: zip2.8b v0, v0, v1 120; CHECK-NEXT: add.8b v0, v2, v0 121; CHECK-NEXT: ret 122 %tmp1 = load <8 x i8>, ptr %A 123 %tmp2 = load <8 x i8>, ptr %B 124 %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 undef, i32 10, i32 3, i32 11> 125 %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 undef, i32 undef, i32 15> 126 %tmp5 = add <8 x i8> %tmp3, %tmp4 127 ret <8 x i8> %tmp5 128} 129 130define <16 x i8> @vzipQi8_undef(ptr %A, ptr %B) nounwind { 131; CHECK-LABEL: vzipQi8_undef: 132; CHECK: // %bb.0: 133; CHECK-NEXT: ldr q0, [x0] 134; CHECK-NEXT: ldr q1, [x1] 135; CHECK-NEXT: zip1.16b v2, v0, v1 136; CHECK-NEXT: zip2.16b v0, v0, v1 137; CHECK-NEXT: add.16b v0, v2, v0 138; CHECK-NEXT: ret 139 %tmp1 = load <16 x i8>, ptr %A 140 %tmp2 = load <16 x i8>, ptr %B 141 %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 undef, i32 undef, i32 undef, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> 142 %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 undef, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 undef, i32 14, i32 30, i32 undef, i32 31> 143 %tmp5 = add <16 x i8> %tmp3, %tmp4 144 ret <16 x i8> %tmp5 145} 146 147define <8 x i16> @vzip1_undef_01(<8 x i16> %A, <8 x i16> %B) nounwind { 148; CHECK-LABEL: vzip1_undef_01: 149; CHECK: // %bb.0: 150; CHECK-NEXT: zip1.8h v0, v0, v1 151; CHECK-NEXT: ret 152 %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 153 ret <8 x i16> %s 154} 155 156define <8 x i16> @vzip1_undef_0(<8 x i16> %A, <8 x i16> %B) nounwind { 157; CHECK-LABEL: vzip1_undef_0: 158; CHECK: // %bb.0: 159; CHECK-NEXT: zip1.8h v0, v0, v1 160; CHECK-NEXT: ret 161 %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 162 ret <8 x i16> %s 163} 164 165define <8 x i16> @vzip1_undef_1(<8 x i16> %A, <8 x i16> %B) nounwind { 166; CHECK-LABEL: vzip1_undef_1: 167; CHECK: // %bb.0: 168; CHECK-NEXT: zip1.8h v0, v0, v1 169; CHECK-NEXT: ret 170 %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 171 ret <8 x i16> %s 172} 173 174define <8 x i16> @vzip1_undef_012(<8 x i16> %A, <8 x i16> %B) nounwind { 175; CHECK-LABEL: vzip1_undef_012: 176; CHECK: // %bb.0: 177; CHECK-NEXT: zip1.8h v0, v0, v1 178; CHECK-NEXT: ret 179 %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 9, i32 2, i32 10, i32 3, i32 11> 180 ret <8 x i16> %s 181} 182 183define <8 x i16> @vzip2_undef_01(<8 x i16> %A, <8 x i16> %B) nounwind { 184; CHECK-LABEL: vzip2_undef_01: 185; CHECK: // %bb.0: 186; CHECK-NEXT: zip2.8h v0, v0, v1 187; CHECK-NEXT: ret 188 %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 189 ret <8 x i16> %s 190} 191 192define <8 x i16> @vzip2_undef_0(<8 x i16> %A, <8 x i16> %B) nounwind { 193; CHECK-LABEL: vzip2_undef_0: 194; CHECK: // %bb.0: 195; CHECK-NEXT: zip2.8h v0, v0, v1 196; CHECK-NEXT: ret 197 %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 198 ret <8 x i16> %s 199} 200 201define <8 x i16> @vzip2_undef_1(<8 x i16> %A, <8 x i16> %B) nounwind { 202; CHECK-LABEL: vzip2_undef_1: 203; CHECK: // %bb.0: 204; CHECK-NEXT: zip2.8h v0, v0, v1 205; CHECK-NEXT: ret 206 %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 4, i32 undef, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 207 ret <8 x i16> %s 208} 209 210define <8 x i16> @vzip2_undef_012(<8 x i16> %A, <8 x i16> %B) nounwind { 211; CHECK-LABEL: vzip2_undef_012: 212; CHECK: // %bb.0: 213; CHECK-NEXT: zip2.8h v0, v0, v1 214; CHECK-NEXT: ret 215 %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 13, i32 6, i32 14, i32 7, i32 15> 216 ret <8 x i16> %s 217} 218 219define <16 x i8> @combine_v16i8(<8 x i8> %0, <8 x i8> %1) { 220; CHECK-LABEL: combine_v16i8: 221; CHECK: // %bb.0: 222; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 223; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 224; CHECK-NEXT: zip1.16b v0, v0, v1 225; CHECK-NEXT: ret 226 %3 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 227 ret <16 x i8> %3 228} 229 230define <16 x i8> @combine2_v16i8(<8 x i8> %0, <8 x i8> %1) { 231; CHECK-SD-LABEL: combine2_v16i8: 232; CHECK-SD: // %bb.0: 233; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 234; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 235; CHECK-SD-NEXT: zip1.16b v0, v0, v1 236; CHECK-SD-NEXT: ret 237; 238; CHECK-GI-LABEL: combine2_v16i8: 239; CHECK-GI: // %bb.0: 240; CHECK-GI-NEXT: zip1.8b v2, v0, v1 241; CHECK-GI-NEXT: zip2.8b v0, v0, v1 242; CHECK-GI-NEXT: mov.d v2[1], v0[0] 243; CHECK-GI-NEXT: mov.16b v0, v2 244; CHECK-GI-NEXT: ret 245 %3 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 246 %4 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 247 %5 = shufflevector <8 x i8> %3, <8 x i8> %4, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 248 ret <16 x i8> %5 249} 250 251define <8 x i16> @combine_v8i16(<4 x i16> %0, <4 x i16> %1) { 252; CHECK-LABEL: combine_v8i16: 253; CHECK: // %bb.0: 254; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 255; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 256; CHECK-NEXT: zip1.8h v0, v0, v1 257; CHECK-NEXT: ret 258 %3 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 259 ret <8 x i16> %3 260} 261 262define <8 x i16> @combine2_v8i16(<4 x i16> %0, <4 x i16> %1) { 263; CHECK-SD-LABEL: combine2_v8i16: 264; CHECK-SD: // %bb.0: 265; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 266; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 267; CHECK-SD-NEXT: zip1.8h v0, v0, v1 268; CHECK-SD-NEXT: ret 269; 270; CHECK-GI-LABEL: combine2_v8i16: 271; CHECK-GI: // %bb.0: 272; CHECK-GI-NEXT: zip1.4h v2, v0, v1 273; CHECK-GI-NEXT: zip2.4h v0, v0, v1 274; CHECK-GI-NEXT: mov.d v2[1], v0[0] 275; CHECK-GI-NEXT: mov.16b v0, v2 276; CHECK-GI-NEXT: ret 277 %3 = shufflevector <4 x i16> %0, <4 x i16> %1, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 278 %4 = shufflevector <4 x i16> %0, <4 x i16> %1, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 279 %5 = shufflevector <4 x i16> %3, <4 x i16> %4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 280 ret <8 x i16> %5 281} 282 283define <4 x i32> @combine_v4i32(<2 x i32> %0, <2 x i32> %1) { 284; CHECK-LABEL: combine_v4i32: 285; CHECK: // %bb.0: 286; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 287; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 288; CHECK-NEXT: zip1.4s v0, v0, v1 289; CHECK-NEXT: ret 290 %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 291 ret <4 x i32> %3 292} 293 294define <4 x i32> @combine2_v4i32(<2 x i32> %0, <2 x i32> %1) { 295; CHECK-SD-LABEL: combine2_v4i32: 296; CHECK-SD: // %bb.0: 297; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 298; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 299; CHECK-SD-NEXT: zip1.4s v0, v0, v1 300; CHECK-SD-NEXT: ret 301; 302; CHECK-GI-LABEL: combine2_v4i32: 303; CHECK-GI: // %bb.0: 304; CHECK-GI-NEXT: zip1.2s v2, v0, v1 305; CHECK-GI-NEXT: zip2.2s v0, v0, v1 306; CHECK-GI-NEXT: mov.d v2[1], v0[0] 307; CHECK-GI-NEXT: mov.16b v0, v2 308; CHECK-GI-NEXT: ret 309 %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <2 x i32> <i32 0, i32 2> 310 %4 = shufflevector <2 x i32> %0, <2 x i32> %1, <2 x i32> <i32 1, i32 3> 311 %5 = shufflevector <2 x i32> %3, <2 x i32> %4, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 312 ret <4 x i32> %5 313} 314 315define <16 x i8> @combine_v16i8_undef(<8 x i8> %0, <8 x i8> %1) { 316; CHECK-LABEL: combine_v16i8_undef: 317; CHECK: // %bb.0: 318; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 319; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 320; CHECK-NEXT: zip1.16b v0, v0, v1 321; CHECK-NEXT: ret 322 %3 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 323 ret <16 x i8> %3 324} 325 326define <16 x i8> @combine2_v16i8_undef(<8 x i8> %0, <8 x i8> %1) { 327; CHECK-SD-LABEL: combine2_v16i8_undef: 328; CHECK-SD: // %bb.0: 329; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 330; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 331; CHECK-SD-NEXT: zip1.16b v0, v0, v1 332; CHECK-SD-NEXT: ret 333; 334; CHECK-GI-LABEL: combine2_v16i8_undef: 335; CHECK-GI: // %bb.0: 336; CHECK-GI-NEXT: zip1.8b v2, v0, v1 337; CHECK-GI-NEXT: zip2.8b v0, v0, v1 338; CHECK-GI-NEXT: mov.d v2[1], v0[0] 339; CHECK-GI-NEXT: mov.16b v0, v2 340; CHECK-GI-NEXT: ret 341 %3 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 342 %4 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 343 %5 = shufflevector <8 x i8> %3, <8 x i8> %4, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 344 ret <16 x i8> %5 345} 346 347define <8 x i16> @combine_v8i16_undef(<4 x i16> %0, <4 x i16> %1) { 348; CHECK-LABEL: combine_v8i16_undef: 349; CHECK: // %bb.0: 350; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 351; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 352; CHECK-NEXT: zip1.8h v0, v0, v1 353; CHECK-NEXT: ret 354 %3 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 undef, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 355 ret <8 x i16> %3 356} 357 358; FIXME: This could be zip1 too, 8,0,9,1... pattern is handled 359define <16 x i8> @combine_v8i16_8first(<8 x i8> %0, <8 x i8> %1) { 360; CHECK-SD-LABEL: combine_v8i16_8first: 361; CHECK-SD: // %bb.0: 362; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1_q2 363; CHECK-SD-NEXT: adrp x8, .LCPI25_0 364; CHECK-SD-NEXT: fmov d2, d0 365; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI25_0] 366; CHECK-SD-NEXT: tbl.16b v0, { v1, v2 }, v3 367; CHECK-SD-NEXT: ret 368; 369; CHECK-GI-LABEL: combine_v8i16_8first: 370; CHECK-GI: // %bb.0: 371; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q31_q0 372; CHECK-GI-NEXT: adrp x8, .LCPI25_0 373; CHECK-GI-NEXT: fmov d31, d1 374; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI25_0] 375; CHECK-GI-NEXT: tbl.16b v0, { v31, v0 }, v2 376; CHECK-GI-NEXT: ret 377 %3 = shufflevector <8 x i8> %1, <8 x i8> %0, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7> 378 ret <16 x i8> %3 379} 380 381 382; FIXME: This could be zip1 too, 8,0,9,1... pattern is handled 383define <16 x i8> @combine_v8i16_8firstundef(<8 x i8> %0, <8 x i8> %1) { 384; CHECK-SD-LABEL: combine_v8i16_8firstundef: 385; CHECK-SD: // %bb.0: 386; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1_q2 387; CHECK-SD-NEXT: adrp x8, .LCPI26_0 388; CHECK-SD-NEXT: fmov d2, d0 389; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI26_0] 390; CHECK-SD-NEXT: tbl.16b v0, { v1, v2 }, v3 391; CHECK-SD-NEXT: ret 392; 393; CHECK-GI-LABEL: combine_v8i16_8firstundef: 394; CHECK-GI: // %bb.0: 395; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q31_q0 396; CHECK-GI-NEXT: adrp x8, .LCPI26_0 397; CHECK-GI-NEXT: fmov d31, d1 398; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI26_0] 399; CHECK-GI-NEXT: tbl.16b v0, { v31, v0 }, v2 400; CHECK-GI-NEXT: ret 401 %3 = shufflevector <8 x i8> %1, <8 x i8> %0, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 undef> 402 ret <16 x i8> %3 403} 404 405define <4 x float> @shuffle_zip1(<4 x float> %arg) { 406; CHECK-LABEL: shuffle_zip1: 407; CHECK: // %bb.0: // %bb 408; CHECK-NEXT: movi.2d v1, #0000000000000000 409; CHECK-NEXT: fcmgt.4s v0, v0, v1 410; CHECK-NEXT: uzp1.8h v1, v0, v0 411; CHECK-NEXT: xtn.4h v0, v0 412; CHECK-NEXT: xtn.4h v1, v1 413; CHECK-NEXT: zip2.4h v0, v0, v1 414; CHECK-NEXT: fmov.4s v1, #1.00000000 415; CHECK-NEXT: zip1.4h v0, v0, v0 416; CHECK-NEXT: sshll.4s v0, v0, #0 417; CHECK-NEXT: and.16b v0, v1, v0 418; CHECK-NEXT: ret 419bb: 420 %inst = fcmp olt <4 x float> zeroinitializer, %arg 421 %inst1 = shufflevector <4 x i1> %inst, <4 x i1> zeroinitializer, <2 x i32> <i32 2, i32 0> 422 %inst2 = shufflevector <2 x i1> %inst1, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 0, i32 1, i32 1> 423 %inst3 = select <4 x i1> %inst2, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> zeroinitializer 424 ret <4 x float> %inst3 425} 426 427define <4 x i32> @shuffle_zip2(<4 x i32> %arg) { 428; CHECK-LABEL: shuffle_zip2: 429; CHECK: // %bb.0: // %bb 430; CHECK-NEXT: cmtst.4s v0, v0, v0 431; CHECK-NEXT: uzp1.8h v1, v0, v0 432; CHECK-NEXT: xtn.4h v0, v0 433; CHECK-NEXT: xtn.4h v1, v1 434; CHECK-NEXT: zip2.4h v0, v0, v1 435; CHECK-NEXT: movi.4s v1, #1 436; CHECK-NEXT: zip1.4h v0, v0, v0 437; CHECK-NEXT: ushll.4s v0, v0, #0 438; CHECK-NEXT: and.16b v0, v0, v1 439; CHECK-NEXT: ret 440bb: 441 %inst = icmp ult <4 x i32> zeroinitializer, %arg 442 %inst1 = shufflevector <4 x i1> %inst, <4 x i1> zeroinitializer, <2 x i32> <i32 2, i32 0> 443 %inst2 = shufflevector <2 x i1> %inst1, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 0, i32 1, i32 1> 444 %inst3 = select <4 x i1> %inst2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> zeroinitializer 445 ret <4 x i32> %inst3 446} 447 448define <4 x i32> @shuffle_zip3(<4 x i32> %arg) { 449; CHECK-LABEL: shuffle_zip3: 450; CHECK: // %bb.0: // %bb 451; CHECK-NEXT: cmgt.4s v0, v0, #0 452; CHECK-NEXT: uzp1.8h v1, v0, v0 453; CHECK-NEXT: xtn.4h v0, v0 454; CHECK-NEXT: xtn.4h v1, v1 455; CHECK-NEXT: zip2.4h v0, v0, v1 456; CHECK-NEXT: movi.4s v1, #1 457; CHECK-NEXT: zip1.4h v0, v0, v0 458; CHECK-NEXT: sshll.4s v0, v0, #0 459; CHECK-NEXT: and.16b v0, v0, v1 460; CHECK-NEXT: ret 461bb: 462 %inst = icmp slt <4 x i32> zeroinitializer, %arg 463 %inst1 = shufflevector <4 x i1> %inst, <4 x i1> zeroinitializer, <2 x i32> <i32 2, i32 0> 464 %inst2 = shufflevector <2 x i1> %inst1, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 0, i32 1, i32 1> 465 %inst3 = select <4 x i1> %inst2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> zeroinitializer 466 ret <4 x i32> %inst3 467} 468