1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mtriple=armv7a-eabihf -mattr=+neon %s -o - | FileCheck %s 3 4define <8 x i8> @vabds8(ptr %A, ptr %B) nounwind { 5; CHECK-LABEL: vabds8: 6; CHECK: @ %bb.0: 7; CHECK-NEXT: vldr d16, [r1] 8; CHECK-NEXT: vldr d17, [r0] 9; CHECK-NEXT: vabd.s8 d0, d17, d16 10; CHECK-NEXT: bx lr 11 %tmp1 = load <8 x i8>, ptr %A 12 %tmp2 = load <8 x i8>, ptr %B 13 %tmp3 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 14 ret <8 x i8> %tmp3 15} 16 17define <4 x i16> @vabds16(ptr %A, ptr %B) nounwind { 18; CHECK-LABEL: vabds16: 19; CHECK: @ %bb.0: 20; CHECK-NEXT: vldr d16, [r1] 21; CHECK-NEXT: vldr d17, [r0] 22; CHECK-NEXT: vabd.s16 d0, d17, d16 23; CHECK-NEXT: bx lr 24 %tmp1 = load <4 x i16>, ptr %A 25 %tmp2 = load <4 x i16>, ptr %B 26 %tmp3 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 27 ret <4 x i16> %tmp3 28} 29 30define <2 x i32> @vabds32(ptr %A, ptr %B) nounwind { 31; CHECK-LABEL: vabds32: 32; CHECK: @ %bb.0: 33; CHECK-NEXT: vldr d16, [r1] 34; CHECK-NEXT: vldr d17, [r0] 35; CHECK-NEXT: vabd.s32 d0, d17, d16 36; CHECK-NEXT: bx lr 37 %tmp1 = load <2 x i32>, ptr %A 38 %tmp2 = load <2 x i32>, ptr %B 39 %tmp3 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 40 ret <2 x i32> %tmp3 41} 42 43define <8 x i8> @vabdu8(ptr %A, ptr %B) nounwind { 44; CHECK-LABEL: vabdu8: 45; CHECK: @ %bb.0: 46; CHECK-NEXT: vldr d16, [r1] 47; CHECK-NEXT: vldr d17, [r0] 48; CHECK-NEXT: vabd.u8 d0, d17, d16 49; CHECK-NEXT: bx lr 50 %tmp1 = load <8 x i8>, ptr %A 51 %tmp2 = load <8 x i8>, ptr %B 52 %tmp3 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 53 ret <8 x i8> %tmp3 54} 55 56define <4 x i16> @vabdu16(ptr %A, ptr %B) nounwind { 57; CHECK-LABEL: vabdu16: 58; CHECK: @ %bb.0: 59; CHECK-NEXT: vldr d16, [r1] 60; CHECK-NEXT: vldr d17, [r0] 61; CHECK-NEXT: vabd.u16 d0, d17, d16 62; CHECK-NEXT: bx lr 63 %tmp1 = load <4 x i16>, ptr %A 64 %tmp2 = load <4 x i16>, ptr %B 65 %tmp3 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 66 ret <4 x i16> %tmp3 67} 68 69define <2 x i32> @vabdu32(ptr %A, ptr %B) nounwind { 70; CHECK-LABEL: vabdu32: 71; CHECK: @ %bb.0: 72; CHECK-NEXT: vldr d16, [r1] 73; CHECK-NEXT: vldr d17, [r0] 74; CHECK-NEXT: vabd.u32 d0, d17, d16 75; CHECK-NEXT: bx lr 76 %tmp1 = load <2 x i32>, ptr %A 77 %tmp2 = load <2 x i32>, ptr %B 78 %tmp3 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 79 ret <2 x i32> %tmp3 80} 81 82define <2 x float> @vabdf32(ptr %A, ptr %B) nounwind { 83; CHECK-LABEL: vabdf32: 84; CHECK: @ %bb.0: 85; CHECK-NEXT: vldr d16, [r1] 86; CHECK-NEXT: vldr d17, [r0] 87; CHECK-NEXT: vabd.f32 d0, d17, d16 88; CHECK-NEXT: bx lr 89 %tmp1 = load <2 x float>, ptr %A 90 %tmp2 = load <2 x float>, ptr %B 91 %tmp3 = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) 92 ret <2 x float> %tmp3 93} 94 95define <16 x i8> @vabdQs8(ptr %A, ptr %B) nounwind { 96; CHECK-LABEL: vabdQs8: 97; CHECK: @ %bb.0: 98; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 99; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 100; CHECK-NEXT: vabd.s8 q0, q9, q8 101; CHECK-NEXT: bx lr 102 %tmp1 = load <16 x i8>, ptr %A 103 %tmp2 = load <16 x i8>, ptr %B 104 %tmp3 = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 105 ret <16 x i8> %tmp3 106} 107 108define <8 x i16> @vabdQs16(ptr %A, ptr %B) nounwind { 109; CHECK-LABEL: vabdQs16: 110; CHECK: @ %bb.0: 111; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 112; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 113; CHECK-NEXT: vabd.s16 q0, q9, q8 114; CHECK-NEXT: bx lr 115 %tmp1 = load <8 x i16>, ptr %A 116 %tmp2 = load <8 x i16>, ptr %B 117 %tmp3 = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 118 ret <8 x i16> %tmp3 119} 120 121define <4 x i32> @vabdQs32(ptr %A, ptr %B) nounwind { 122; CHECK-LABEL: vabdQs32: 123; CHECK: @ %bb.0: 124; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 125; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 126; CHECK-NEXT: vabd.s32 q0, q9, q8 127; CHECK-NEXT: bx lr 128 %tmp1 = load <4 x i32>, ptr %A 129 %tmp2 = load <4 x i32>, ptr %B 130 %tmp3 = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 131 ret <4 x i32> %tmp3 132} 133 134define <16 x i8> @vabdQu8(ptr %A, ptr %B) nounwind { 135; CHECK-LABEL: vabdQu8: 136; CHECK: @ %bb.0: 137; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 138; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 139; CHECK-NEXT: vabd.u8 q0, q9, q8 140; CHECK-NEXT: bx lr 141 %tmp1 = load <16 x i8>, ptr %A 142 %tmp2 = load <16 x i8>, ptr %B 143 %tmp3 = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 144 ret <16 x i8> %tmp3 145} 146 147define <8 x i16> @vabdQu16(ptr %A, ptr %B) nounwind { 148; CHECK-LABEL: vabdQu16: 149; CHECK: @ %bb.0: 150; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 151; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 152; CHECK-NEXT: vabd.u16 q0, q9, q8 153; CHECK-NEXT: bx lr 154 %tmp1 = load <8 x i16>, ptr %A 155 %tmp2 = load <8 x i16>, ptr %B 156 %tmp3 = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 157 ret <8 x i16> %tmp3 158} 159 160define <4 x i32> @vabdQu32(ptr %A, ptr %B) nounwind { 161; CHECK-LABEL: vabdQu32: 162; CHECK: @ %bb.0: 163; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 164; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 165; CHECK-NEXT: vabd.u32 q0, q9, q8 166; CHECK-NEXT: bx lr 167 %tmp1 = load <4 x i32>, ptr %A 168 %tmp2 = load <4 x i32>, ptr %B 169 %tmp3 = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 170 ret <4 x i32> %tmp3 171} 172 173define <4 x float> @vabdQf32(ptr %A, ptr %B) nounwind { 174; CHECK-LABEL: vabdQf32: 175; CHECK: @ %bb.0: 176; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 177; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 178; CHECK-NEXT: vabd.f32 q0, q9, q8 179; CHECK-NEXT: bx lr 180 %tmp1 = load <4 x float>, ptr %A 181 %tmp2 = load <4 x float>, ptr %B 182 %tmp3 = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) 183 ret <4 x float> %tmp3 184} 185 186declare <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 187declare <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 188declare <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 189 190declare <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 191declare <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 192declare <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 193 194declare <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float>, <2 x float>) nounwind readnone 195 196declare <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 197declare <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 198declare <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 199 200declare <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 201declare <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 202declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 203 204declare <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float>, <4 x float>) nounwind readnone 205 206define <8 x i16> @vabdls8(ptr %A, ptr %B) nounwind { 207; CHECK-LABEL: vabdls8: 208; CHECK: @ %bb.0: 209; CHECK-NEXT: vldr d16, [r1] 210; CHECK-NEXT: vldr d17, [r0] 211; CHECK-NEXT: vabdl.s8 q0, d17, d16 212; CHECK-NEXT: bx lr 213 %tmp1 = load <8 x i8>, ptr %A 214 %tmp2 = load <8 x i8>, ptr %B 215 %tmp3 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 216 %tmp4 = zext <8 x i8> %tmp3 to <8 x i16> 217 ret <8 x i16> %tmp4 218} 219 220define <4 x i32> @vabdls16(ptr %A, ptr %B) nounwind { 221; CHECK-LABEL: vabdls16: 222; CHECK: @ %bb.0: 223; CHECK-NEXT: vldr d16, [r1] 224; CHECK-NEXT: vldr d17, [r0] 225; CHECK-NEXT: vabdl.s16 q0, d17, d16 226; CHECK-NEXT: bx lr 227 %tmp1 = load <4 x i16>, ptr %A 228 %tmp2 = load <4 x i16>, ptr %B 229 %tmp3 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 230 %tmp4 = zext <4 x i16> %tmp3 to <4 x i32> 231 ret <4 x i32> %tmp4 232} 233 234define <2 x i64> @vabdls32(ptr %A, ptr %B) nounwind { 235; CHECK-LABEL: vabdls32: 236; CHECK: @ %bb.0: 237; CHECK-NEXT: vldr d16, [r1] 238; CHECK-NEXT: vldr d17, [r0] 239; CHECK-NEXT: vabdl.s32 q0, d17, d16 240; CHECK-NEXT: bx lr 241 %tmp1 = load <2 x i32>, ptr %A 242 %tmp2 = load <2 x i32>, ptr %B 243 %tmp3 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 244 %tmp4 = zext <2 x i32> %tmp3 to <2 x i64> 245 ret <2 x i64> %tmp4 246} 247 248define <8 x i16> @vabdlu8(ptr %A, ptr %B) nounwind { 249; CHECK-LABEL: vabdlu8: 250; CHECK: @ %bb.0: 251; CHECK-NEXT: vldr d16, [r1] 252; CHECK-NEXT: vldr d17, [r0] 253; CHECK-NEXT: vabdl.u8 q0, d17, d16 254; CHECK-NEXT: bx lr 255 %tmp1 = load <8 x i8>, ptr %A 256 %tmp2 = load <8 x i8>, ptr %B 257 %tmp3 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 258 %tmp4 = zext <8 x i8> %tmp3 to <8 x i16> 259 ret <8 x i16> %tmp4 260} 261 262define <4 x i32> @vabdlu16(ptr %A, ptr %B) nounwind { 263; CHECK-LABEL: vabdlu16: 264; CHECK: @ %bb.0: 265; CHECK-NEXT: vldr d16, [r1] 266; CHECK-NEXT: vldr d17, [r0] 267; CHECK-NEXT: vabdl.u16 q0, d17, d16 268; CHECK-NEXT: bx lr 269 %tmp1 = load <4 x i16>, ptr %A 270 %tmp2 = load <4 x i16>, ptr %B 271 %tmp3 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 272 %tmp4 = zext <4 x i16> %tmp3 to <4 x i32> 273 ret <4 x i32> %tmp4 274} 275 276define <2 x i64> @vabdlu32(ptr %A, ptr %B) nounwind { 277; CHECK-LABEL: vabdlu32: 278; CHECK: @ %bb.0: 279; CHECK-NEXT: vldr d16, [r1] 280; CHECK-NEXT: vldr d17, [r0] 281; CHECK-NEXT: vabdl.u32 q0, d17, d16 282; CHECK-NEXT: bx lr 283 %tmp1 = load <2 x i32>, ptr %A 284 %tmp2 = load <2 x i32>, ptr %B 285 %tmp3 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 286 %tmp4 = zext <2 x i32> %tmp3 to <2 x i64> 287 ret <2 x i64> %tmp4 288} 289