1; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s 2 3declare <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8>, <16 x i8>) 4 5; CHECK-LABEL: fun1: 6; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b 7define i32 @fun1() { 8entry: 9 %vtbl1.i.1 = tail call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> <i8 0, i8 16, i8 19, i8 4, i8 -65, i8 -65, i8 -71, i8 -71, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> undef) 10 %vuzp.i212.1 = shufflevector <16 x i8> %vtbl1.i.1, <16 x i8> %vtbl1.i.1, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> 11 %scevgep = getelementptr <16 x i8>, ptr undef, i64 1 12 store <16 x i8> %vuzp.i212.1, ptr %scevgep, align 1 13 ret i32 undef 14} 15 16; CHECK-LABEL: fun2: 17; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b 18define i32 @fun2() { 19entry: 20 %vtbl1.i.1 = tail call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> <i8 0, i8 16, i8 19, i8 4, i8 -65, i8 -65, i8 -71, i8 -71, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> undef) 21 %vuzp.i212.1 = shufflevector <16 x i8> %vtbl1.i.1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 22 %scevgep = getelementptr <8 x i8>, ptr undef, i64 1 23 store <8 x i8> %vuzp.i212.1, ptr %scevgep, align 1 24 ret i32 undef 25} 26 27; CHECK-LABEL: fun3: 28; CHECK-NOT: uzp1 29define i32 @fun3() { 30entry: 31 %vtbl1.i.1 = tail call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> <i8 0, i8 16, i8 19, i8 4, i8 -65, i8 -65, i8 -71, i8 -71, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> undef) 32 %vuzp.i212.1 = shufflevector <16 x i8> %vtbl1.i.1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 15> 33 %scevgep = getelementptr <8 x i8>, ptr undef, i64 1 34 store <8 x i8> %vuzp.i212.1, ptr %scevgep, align 1 35 ret i32 undef 36} 37 38; CHECK-LABEL: fun4: 39; CHECK-NOT: uzp2 40define i32 @fun4() { 41entry: 42 %vtbl1.i.1 = tail call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> <i8 0, i8 16, i8 19, i8 4, i8 -65, i8 -65, i8 -71, i8 -71, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> undef) 43 %vuzp.i212.1 = shufflevector <16 x i8> %vtbl1.i.1, <16 x i8> undef, <8 x i32> <i32 3, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 44 %scevgep = getelementptr <8 x i8>, ptr undef, i64 1 45 store <8 x i8> %vuzp.i212.1, ptr %scevgep, align 1 46 ret i32 undef 47} 48 49; CHECK-LABEL: pr36582: 50; Check that this does not ICE. 51define void @pr36582(ptr %p1, ptr %p2) { 52entry: 53 %wide.vec = load <8 x i8>, ptr %p1, align 1 54 %strided.vec = shufflevector <8 x i8> %wide.vec, <8 x i8> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 55 %y = zext <4 x i8> %strided.vec to <4 x i32> 56 store <4 x i32> %y, ptr %p2, align 4 57 ret void 58} 59 60; Check that this pattern is recognized as a VZIP and 61; that the vector blend transform does not scramble the pattern. 62; CHECK-LABEL: vzipNoBlend: 63; CHECK: zip1 64define <8 x i8> @vzipNoBlend(ptr %A, ptr %B) nounwind { 65 %t = load <8 x i8>, ptr %A 66 %vzip = shufflevector <8 x i8> %t, <8 x i8> <i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef>, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 67 ret <8 x i8> %vzip 68} 69 70; CHECK-LABEL: vzipNoBlendCommutted: 71; CHECK: zip1 72define <8 x i8> @vzipNoBlendCommutted(ptr %A, ptr %B) nounwind { 73 %t = load <8 x i8>, ptr %A 74 %vzip = shufflevector <8 x i8> <i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef>, <8 x i8> %t, <8 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3> 75 ret <8 x i8> %vzip 76} 77 78; CHECK-LABEL: vzipStillZExt: 79; CHECK: zip1 80define <8 x i8> @vzipStillZExt(ptr %A, ptr %B) nounwind { 81 %t = load <8 x i8>, ptr %A 82 %vzip = shufflevector <8 x i8> %t, <8 x i8> <i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <8 x i32> <i32 0, i32 9, i32 1, i32 9, i32 2, i32 9, i32 3, i32 9> 83 ret <8 x i8> %vzip 84} 85 86; CHECK-LABEL: vzipStillZExtCommutted: 87; CHECK: zip1 88define <8 x i8> @vzipStillZExtCommutted(ptr %A, ptr %B) nounwind { 89 %t = load <8 x i8>, ptr %A 90 %vzip = shufflevector <8 x i8> <i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <8 x i8> %t, <8 x i32> <i32 8, i32 1, i32 9, i32 1, i32 10, i32 1, i32 11, i32 1> 91 ret <8 x i8> %vzip 92} 93