1; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t 2; RUN: grep {vqadd\\.s8} %t | count 2 3; RUN: grep {vqadd\\.s16} %t | count 2 4; RUN: grep {vqadd\\.s32} %t | count 2 5; RUN: grep {vqadd\\.s64} %t | count 2 6; RUN: grep {vqadd\\.u8} %t | count 2 7; RUN: grep {vqadd\\.u16} %t | count 2 8; RUN: grep {vqadd\\.u32} %t | count 2 9; RUN: grep {vqadd\\.u64} %t | count 2 10 11define <8 x i8> @vqadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind { 12 %tmp1 = load <8 x i8>* %A 13 %tmp2 = load <8 x i8>* %B 14 %tmp3 = call <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 15 ret <8 x i8> %tmp3 16} 17 18define <4 x i16> @vqadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 19 %tmp1 = load <4 x i16>* %A 20 %tmp2 = load <4 x i16>* %B 21 %tmp3 = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 22 ret <4 x i16> %tmp3 23} 24 25define <2 x i32> @vqadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind { 26 %tmp1 = load <2 x i32>* %A 27 %tmp2 = load <2 x i32>* %B 28 %tmp3 = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 29 ret <2 x i32> %tmp3 30} 31 32define <1 x i64> @vqadds64(<1 x i64>* %A, <1 x i64>* %B) nounwind { 33 %tmp1 = load <1 x i64>* %A 34 %tmp2 = load <1 x i64>* %B 35 %tmp3 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) 36 ret <1 x i64> %tmp3 37} 38 39define <8 x i8> @vqaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { 40 %tmp1 = load <8 x i8>* %A 41 %tmp2 = load <8 x i8>* %B 42 %tmp3 = call <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 43 ret <8 x i8> %tmp3 44} 45 46define <4 x i16> @vqaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 47 %tmp1 = load <4 x i16>* %A 48 %tmp2 = load <4 x i16>* %B 49 %tmp3 = call <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 50 ret <4 x i16> %tmp3 51} 52 53define <2 x i32> @vqaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { 54 %tmp1 = load <2 x i32>* %A 55 %tmp2 = load <2 x i32>* %B 56 %tmp3 = call <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 57 ret <2 x i32> %tmp3 58} 59 60define <1 x i64> @vqaddu64(<1 x i64>* %A, <1 x i64>* %B) nounwind { 61 %tmp1 = load <1 x i64>* %A 62 %tmp2 = load <1 x i64>* %B 63 %tmp3 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) 64 ret <1 x i64> %tmp3 65} 66 67define <16 x i8> @vqaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { 68 %tmp1 = load <16 x i8>* %A 69 %tmp2 = load <16 x i8>* %B 70 %tmp3 = call <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 71 ret <16 x i8> %tmp3 72} 73 74define <8 x i16> @vqaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { 75 %tmp1 = load <8 x i16>* %A 76 %tmp2 = load <8 x i16>* %B 77 %tmp3 = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 78 ret <8 x i16> %tmp3 79} 80 81define <4 x i32> @vqaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { 82 %tmp1 = load <4 x i32>* %A 83 %tmp2 = load <4 x i32>* %B 84 %tmp3 = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 85 ret <4 x i32> %tmp3 86} 87 88define <2 x i64> @vqaddQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { 89 %tmp1 = load <2 x i64>* %A 90 %tmp2 = load <2 x i64>* %B 91 %tmp3 = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) 92 ret <2 x i64> %tmp3 93} 94 95define <16 x i8> @vqaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { 96 %tmp1 = load <16 x i8>* %A 97 %tmp2 = load <16 x i8>* %B 98 %tmp3 = call <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 99 ret <16 x i8> %tmp3 100} 101 102define <8 x i16> @vqaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { 103 %tmp1 = load <8 x i16>* %A 104 %tmp2 = load <8 x i16>* %B 105 %tmp3 = call <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 106 ret <8 x i16> %tmp3 107} 108 109define <4 x i32> @vqaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { 110 %tmp1 = load <4 x i32>* %A 111 %tmp2 = load <4 x i32>* %B 112 %tmp3 = call <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 113 ret <4 x i32> %tmp3 114} 115 116define <2 x i64> @vqaddQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { 117 %tmp1 = load <2 x i64>* %A 118 %tmp2 = load <2 x i64>* %B 119 %tmp3 = call <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) 120 ret <2 x i64> %tmp3 121} 122 123declare <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 124declare <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 125declare <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 126declare <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64>, <1 x i64>) nounwind readnone 127 128declare <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 129declare <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 130declare <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 131declare <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone 132 133declare <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 134declare <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 135declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 136declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>) nounwind readnone 137 138declare <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 139declare <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 140declare <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 141declare <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone 142