1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mtriple=armv7a-eabihf -mattr=+neon %s -o - | FileCheck %s 3 4define <8 x i8> @vabas8(ptr %A, ptr %B, ptr %C) nounwind { 5; CHECK-LABEL: vabas8: 6; CHECK: @ %bb.0: 7; CHECK-NEXT: vldr d16, [r2] 8; CHECK-NEXT: vldr d17, [r1] 9; CHECK-NEXT: vldr d0, [r0] 10; CHECK-NEXT: vaba.s8 d0, d17, d16 11; CHECK-NEXT: bx lr 12 %tmp1 = load <8 x i8>, ptr %A 13 %tmp2 = load <8 x i8>, ptr %B 14 %tmp3 = load <8 x i8>, ptr %C 15 %tmp4 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3) 16 %tmp5 = add <8 x i8> %tmp1, %tmp4 17 ret <8 x i8> %tmp5 18} 19 20define <4 x i16> @vabas16(ptr %A, ptr %B, ptr %C) nounwind { 21; CHECK-LABEL: vabas16: 22; CHECK: @ %bb.0: 23; CHECK-NEXT: vldr d16, [r2] 24; CHECK-NEXT: vldr d17, [r1] 25; CHECK-NEXT: vldr d0, [r0] 26; CHECK-NEXT: vaba.s16 d0, d17, d16 27; CHECK-NEXT: bx lr 28 %tmp1 = load <4 x i16>, ptr %A 29 %tmp2 = load <4 x i16>, ptr %B 30 %tmp3 = load <4 x i16>, ptr %C 31 %tmp4 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3) 32 %tmp5 = add <4 x i16> %tmp1, %tmp4 33 ret <4 x i16> %tmp5 34} 35 36define <2 x i32> @vabas32(ptr %A, ptr %B, ptr %C) nounwind { 37; CHECK-LABEL: vabas32: 38; CHECK: @ %bb.0: 39; CHECK-NEXT: vldr d16, [r2] 40; CHECK-NEXT: vldr d17, [r1] 41; CHECK-NEXT: vldr d0, [r0] 42; CHECK-NEXT: vaba.s32 d0, d17, d16 43; CHECK-NEXT: bx lr 44 %tmp1 = load <2 x i32>, ptr %A 45 %tmp2 = load <2 x i32>, ptr %B 46 %tmp3 = load <2 x i32>, ptr %C 47 %tmp4 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3) 48 %tmp5 = add <2 x i32> %tmp1, %tmp4 49 ret <2 x i32> %tmp5 50} 51 52define <8 x i8> @vabau8(ptr %A, ptr %B, ptr %C) nounwind { 53; CHECK-LABEL: vabau8: 54; CHECK: @ %bb.0: 55; CHECK-NEXT: vldr d16, [r2] 56; CHECK-NEXT: vldr d17, [r1] 57; CHECK-NEXT: vldr d0, [r0] 58; CHECK-NEXT: vaba.u8 d0, d17, d16 59; CHECK-NEXT: bx lr 60 %tmp1 = load <8 x i8>, ptr %A 61 %tmp2 = load <8 x i8>, ptr %B 62 %tmp3 = load <8 x i8>, ptr %C 63 %tmp4 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3) 64 %tmp5 = add <8 x i8> %tmp1, %tmp4 65 ret <8 x i8> %tmp5 66} 67 68define <4 x i16> @vabau16(ptr %A, ptr %B, ptr %C) nounwind { 69; CHECK-LABEL: vabau16: 70; CHECK: @ %bb.0: 71; CHECK-NEXT: vldr d16, [r2] 72; CHECK-NEXT: vldr d17, [r1] 73; CHECK-NEXT: vldr d0, [r0] 74; CHECK-NEXT: vaba.u16 d0, d17, d16 75; CHECK-NEXT: bx lr 76 %tmp1 = load <4 x i16>, ptr %A 77 %tmp2 = load <4 x i16>, ptr %B 78 %tmp3 = load <4 x i16>, ptr %C 79 %tmp4 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3) 80 %tmp5 = add <4 x i16> %tmp1, %tmp4 81 ret <4 x i16> %tmp5 82} 83 84define <2 x i32> @vabau32(ptr %A, ptr %B, ptr %C) nounwind { 85; CHECK-LABEL: vabau32: 86; CHECK: @ %bb.0: 87; CHECK-NEXT: vldr d16, [r2] 88; CHECK-NEXT: vldr d17, [r1] 89; CHECK-NEXT: vldr d0, [r0] 90; CHECK-NEXT: vaba.u32 d0, d17, d16 91; CHECK-NEXT: bx lr 92 %tmp1 = load <2 x i32>, ptr %A 93 %tmp2 = load <2 x i32>, ptr %B 94 %tmp3 = load <2 x i32>, ptr %C 95 %tmp4 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3) 96 %tmp5 = add <2 x i32> %tmp1, %tmp4 97 ret <2 x i32> %tmp5 98} 99 100define <16 x i8> @vabaQs8(ptr %A, ptr %B, ptr %C) nounwind { 101; CHECK-LABEL: vabaQs8: 102; CHECK: @ %bb.0: 103; CHECK-NEXT: vld1.64 {d16, d17}, [r2] 104; CHECK-NEXT: vld1.64 {d18, d19}, [r1] 105; CHECK-NEXT: vld1.64 {d0, d1}, [r0] 106; CHECK-NEXT: vaba.s8 q0, q9, q8 107; CHECK-NEXT: bx lr 108 %tmp1 = load <16 x i8>, ptr %A 109 %tmp2 = load <16 x i8>, ptr %B 110 %tmp3 = load <16 x i8>, ptr %C 111 %tmp4 = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %tmp2, <16 x i8> %tmp3) 112 %tmp5 = add <16 x i8> %tmp1, %tmp4 113 ret <16 x i8> %tmp5 114} 115 116define <8 x i16> @vabaQs16(ptr %A, ptr %B, ptr %C) nounwind { 117; CHECK-LABEL: vabaQs16: 118; CHECK: @ %bb.0: 119; CHECK-NEXT: vld1.64 {d16, d17}, [r2] 120; CHECK-NEXT: vld1.64 {d18, d19}, [r1] 121; CHECK-NEXT: vld1.64 {d0, d1}, [r0] 122; CHECK-NEXT: vaba.s16 q0, q9, q8 123; CHECK-NEXT: bx lr 124 %tmp1 = load <8 x i16>, ptr %A 125 %tmp2 = load <8 x i16>, ptr %B 126 %tmp3 = load <8 x i16>, ptr %C 127 %tmp4 = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp3) 128 %tmp5 = add <8 x i16> %tmp1, %tmp4 129 ret <8 x i16> %tmp5 130} 131 132define <4 x i32> @vabaQs32(ptr %A, ptr %B, ptr %C) nounwind { 133; CHECK-LABEL: vabaQs32: 134; CHECK: @ %bb.0: 135; CHECK-NEXT: vld1.64 {d16, d17}, [r2] 136; CHECK-NEXT: vld1.64 {d18, d19}, [r1] 137; CHECK-NEXT: vld1.64 {d0, d1}, [r0] 138; CHECK-NEXT: vaba.s32 q0, q9, q8 139; CHECK-NEXT: bx lr 140 %tmp1 = load <4 x i32>, ptr %A 141 %tmp2 = load <4 x i32>, ptr %B 142 %tmp3 = load <4 x i32>, ptr %C 143 %tmp4 = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %tmp2, <4 x i32> %tmp3) 144 %tmp5 = add <4 x i32> %tmp1, %tmp4 145 ret <4 x i32> %tmp5 146} 147 148define <16 x i8> @vabaQu8(ptr %A, ptr %B, ptr %C) nounwind { 149; CHECK-LABEL: vabaQu8: 150; CHECK: @ %bb.0: 151; CHECK-NEXT: vld1.64 {d16, d17}, [r2] 152; CHECK-NEXT: vld1.64 {d18, d19}, [r1] 153; CHECK-NEXT: vld1.64 {d0, d1}, [r0] 154; CHECK-NEXT: vaba.u8 q0, q9, q8 155; CHECK-NEXT: bx lr 156 %tmp1 = load <16 x i8>, ptr %A 157 %tmp2 = load <16 x i8>, ptr %B 158 %tmp3 = load <16 x i8>, ptr %C 159 %tmp4 = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %tmp2, <16 x i8> %tmp3) 160 %tmp5 = add <16 x i8> %tmp1, %tmp4 161 ret <16 x i8> %tmp5 162} 163 164define <8 x i16> @vabaQu16(ptr %A, ptr %B, ptr %C) nounwind { 165; CHECK-LABEL: vabaQu16: 166; CHECK: @ %bb.0: 167; CHECK-NEXT: vld1.64 {d16, d17}, [r2] 168; CHECK-NEXT: vld1.64 {d18, d19}, [r1] 169; CHECK-NEXT: vld1.64 {d0, d1}, [r0] 170; CHECK-NEXT: vaba.u16 q0, q9, q8 171; CHECK-NEXT: bx lr 172 %tmp1 = load <8 x i16>, ptr %A 173 %tmp2 = load <8 x i16>, ptr %B 174 %tmp3 = load <8 x i16>, ptr %C 175 %tmp4 = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp3) 176 %tmp5 = add <8 x i16> %tmp1, %tmp4 177 ret <8 x i16> %tmp5 178} 179 180define <4 x i32> @vabaQu32(ptr %A, ptr %B, ptr %C) nounwind { 181; CHECK-LABEL: vabaQu32: 182; CHECK: @ %bb.0: 183; CHECK-NEXT: vld1.64 {d16, d17}, [r2] 184; CHECK-NEXT: vld1.64 {d18, d19}, [r1] 185; CHECK-NEXT: vld1.64 {d0, d1}, [r0] 186; CHECK-NEXT: vaba.u32 q0, q9, q8 187; CHECK-NEXT: bx lr 188 %tmp1 = load <4 x i32>, ptr %A 189 %tmp2 = load <4 x i32>, ptr %B 190 %tmp3 = load <4 x i32>, ptr %C 191 %tmp4 = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %tmp2, <4 x i32> %tmp3) 192 %tmp5 = add <4 x i32> %tmp1, %tmp4 193 ret <4 x i32> %tmp5 194} 195 196declare <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 197declare <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 198declare <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 199 200declare <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 201declare <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 202declare <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 203 204declare <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 205declare <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 206declare <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 207 208declare <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 209declare <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 210declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 211 212define <8 x i16> @vabals8(ptr %A, ptr %B, ptr %C) nounwind { 213; CHECK-LABEL: vabals8: 214; CHECK: @ %bb.0: 215; CHECK-NEXT: vldr d16, [r2] 216; CHECK-NEXT: vldr d17, [r1] 217; CHECK-NEXT: vld1.64 {d0, d1}, [r0] 218; CHECK-NEXT: vabal.s8 q0, d17, d16 219; CHECK-NEXT: bx lr 220 %tmp1 = load <8 x i16>, ptr %A 221 %tmp2 = load <8 x i8>, ptr %B 222 %tmp3 = load <8 x i8>, ptr %C 223 %tmp4 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3) 224 %tmp5 = zext <8 x i8> %tmp4 to <8 x i16> 225 %tmp6 = add <8 x i16> %tmp1, %tmp5 226 ret <8 x i16> %tmp6 227} 228 229define <4 x i32> @vabals16(ptr %A, ptr %B, ptr %C) nounwind { 230; CHECK-LABEL: vabals16: 231; CHECK: @ %bb.0: 232; CHECK-NEXT: vldr d16, [r2] 233; CHECK-NEXT: vldr d17, [r1] 234; CHECK-NEXT: vld1.64 {d0, d1}, [r0] 235; CHECK-NEXT: vabal.s16 q0, d17, d16 236; CHECK-NEXT: bx lr 237 %tmp1 = load <4 x i32>, ptr %A 238 %tmp2 = load <4 x i16>, ptr %B 239 %tmp3 = load <4 x i16>, ptr %C 240 %tmp4 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3) 241 %tmp5 = zext <4 x i16> %tmp4 to <4 x i32> 242 %tmp6 = add <4 x i32> %tmp1, %tmp5 243 ret <4 x i32> %tmp6 244} 245 246define <2 x i64> @vabals32(ptr %A, ptr %B, ptr %C) nounwind { 247; CHECK-LABEL: vabals32: 248; CHECK: @ %bb.0: 249; CHECK-NEXT: vldr d16, [r2] 250; CHECK-NEXT: vldr d17, [r1] 251; CHECK-NEXT: vld1.64 {d0, d1}, [r0] 252; CHECK-NEXT: vabal.s32 q0, d17, d16 253; CHECK-NEXT: bx lr 254 %tmp1 = load <2 x i64>, ptr %A 255 %tmp2 = load <2 x i32>, ptr %B 256 %tmp3 = load <2 x i32>, ptr %C 257 %tmp4 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3) 258 %tmp5 = zext <2 x i32> %tmp4 to <2 x i64> 259 %tmp6 = add <2 x i64> %tmp1, %tmp5 260 ret <2 x i64> %tmp6 261} 262 263define <8 x i16> @vabalu8(ptr %A, ptr %B, ptr %C) nounwind { 264; CHECK-LABEL: vabalu8: 265; CHECK: @ %bb.0: 266; CHECK-NEXT: vldr d16, [r2] 267; CHECK-NEXT: vldr d17, [r1] 268; CHECK-NEXT: vld1.64 {d0, d1}, [r0] 269; CHECK-NEXT: vabal.u8 q0, d17, d16 270; CHECK-NEXT: bx lr 271 %tmp1 = load <8 x i16>, ptr %A 272 %tmp2 = load <8 x i8>, ptr %B 273 %tmp3 = load <8 x i8>, ptr %C 274 %tmp4 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3) 275 %tmp5 = zext <8 x i8> %tmp4 to <8 x i16> 276 %tmp6 = add <8 x i16> %tmp1, %tmp5 277 ret <8 x i16> %tmp6 278} 279 280define <4 x i32> @vabalu16(ptr %A, ptr %B, ptr %C) nounwind { 281; CHECK-LABEL: vabalu16: 282; CHECK: @ %bb.0: 283; CHECK-NEXT: vldr d16, [r2] 284; CHECK-NEXT: vldr d17, [r1] 285; CHECK-NEXT: vld1.64 {d0, d1}, [r0] 286; CHECK-NEXT: vabal.u16 q0, d17, d16 287; CHECK-NEXT: bx lr 288 %tmp1 = load <4 x i32>, ptr %A 289 %tmp2 = load <4 x i16>, ptr %B 290 %tmp3 = load <4 x i16>, ptr %C 291 %tmp4 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3) 292 %tmp5 = zext <4 x i16> %tmp4 to <4 x i32> 293 %tmp6 = add <4 x i32> %tmp1, %tmp5 294 ret <4 x i32> %tmp6 295} 296 297define <2 x i64> @vabalu32(ptr %A, ptr %B, ptr %C) nounwind { 298; CHECK-LABEL: vabalu32: 299; CHECK: @ %bb.0: 300; CHECK-NEXT: vldr d16, [r2] 301; CHECK-NEXT: vldr d17, [r1] 302; CHECK-NEXT: vld1.64 {d0, d1}, [r0] 303; CHECK-NEXT: vabal.u32 q0, d17, d16 304; CHECK-NEXT: bx lr 305 %tmp1 = load <2 x i64>, ptr %A 306 %tmp2 = load <2 x i32>, ptr %B 307 %tmp3 = load <2 x i32>, ptr %C 308 %tmp4 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3) 309 %tmp5 = zext <2 x i32> %tmp4 to <2 x i64> 310 %tmp6 = add <2 x i64> %tmp1, %tmp5 311 ret <2 x i64> %tmp6 312} 313