1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s 3 4define <8 x i8> @shadd8b(ptr nocapture readonly %A, ptr nocapture readonly %B) { 5; CHECK-LABEL: shadd8b: 6; CHECK: // %bb.0: 7; CHECK-NEXT: ldr d0, [x0] 8; CHECK-NEXT: ldr d1, [x1] 9; CHECK-NEXT: shadd.8b v0, v0, v1 10; CHECK-NEXT: ret 11 %tmp1 = load <8 x i8>, ptr %A, align 8 12 %tmp2 = load <8 x i8>, ptr %B, align 8 13 %tmp3 = tail call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 14 ret <8 x i8> %tmp3 15} 16 17define <16 x i8> @shadd16b(ptr nocapture readonly %A, ptr nocapture readonly %B) { 18; CHECK-LABEL: shadd16b: 19; CHECK: // %bb.0: 20; CHECK-NEXT: ldr q0, [x0] 21; CHECK-NEXT: ldr q1, [x1] 22; CHECK-NEXT: shadd.16b v0, v0, v1 23; CHECK-NEXT: ret 24 %tmp1 = load <16 x i8>, ptr %A, align 16 25 %tmp2 = load <16 x i8>, ptr %B, align 16 26 %tmp3 = tail call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 27 ret <16 x i8> %tmp3 28} 29 30define <4 x i16> @shadd4h(ptr nocapture readonly %A, ptr nocapture readonly %B) { 31; CHECK-LABEL: shadd4h: 32; CHECK: // %bb.0: 33; CHECK-NEXT: ldr d0, [x0] 34; CHECK-NEXT: ldr d1, [x1] 35; CHECK-NEXT: shadd.4h v0, v0, v1 36; CHECK-NEXT: ret 37 %tmp1 = load <4 x i16>, ptr %A, align 8 38 %tmp2 = load <4 x i16>, ptr %B, align 8 39 %tmp3 = tail call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 40 ret <4 x i16> %tmp3 41} 42 43define <8 x i16> @shadd8h(ptr nocapture readonly %A, ptr nocapture readonly %B) { 44; CHECK-LABEL: shadd8h: 45; CHECK: // %bb.0: 46; CHECK-NEXT: ldr q0, [x0] 47; CHECK-NEXT: ldr q1, [x1] 48; CHECK-NEXT: shadd.8h v0, v0, v1 49; CHECK-NEXT: ret 50 %tmp1 = load <8 x i16>, ptr %A, align 16 51 %tmp2 = load <8 x i16>, ptr %B, align 16 52 %tmp3 = tail call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 53 ret <8 x i16> %tmp3 54} 55 56define <2 x i32> @shadd2s(ptr nocapture readonly %A, ptr nocapture readonly %B) { 57; CHECK-LABEL: shadd2s: 58; CHECK: // %bb.0: 59; CHECK-NEXT: ldr d0, [x0] 60; CHECK-NEXT: ldr d1, [x1] 61; CHECK-NEXT: shadd.2s v0, v0, v1 62; CHECK-NEXT: ret 63 %tmp1 = load <2 x i32>, ptr %A, align 8 64 %tmp2 = load <2 x i32>, ptr %B, align 8 65 %tmp3 = tail call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 66 ret <2 x i32> %tmp3 67} 68 69define <4 x i32> @shadd4s(ptr nocapture readonly %A, ptr nocapture readonly %B) { 70; CHECK-LABEL: shadd4s: 71; CHECK: // %bb.0: 72; CHECK-NEXT: ldr q0, [x0] 73; CHECK-NEXT: ldr q1, [x1] 74; CHECK-NEXT: shadd.4s v0, v0, v1 75; CHECK-NEXT: ret 76 %tmp1 = load <4 x i32>, ptr %A, align 16 77 %tmp2 = load <4 x i32>, ptr %B, align 16 78 %tmp3 = tail call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 79 ret <4 x i32> %tmp3 80} 81 82define <8 x i8> @uhadd8b(ptr nocapture readonly %A, ptr nocapture readonly %B) { 83; CHECK-LABEL: uhadd8b: 84; CHECK: // %bb.0: 85; CHECK-NEXT: ldr d0, [x0] 86; CHECK-NEXT: ldr d1, [x1] 87; CHECK-NEXT: uhadd.8b v0, v0, v1 88; CHECK-NEXT: ret 89 %tmp1 = load <8 x i8>, ptr %A, align 8 90 %tmp2 = load <8 x i8>, ptr %B, align 8 91 %tmp3 = tail call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 92 ret <8 x i8> %tmp3 93} 94 95define <16 x i8> @uhadd16b(ptr nocapture readonly %A, ptr nocapture readonly %B) { 96; CHECK-LABEL: uhadd16b: 97; CHECK: // %bb.0: 98; CHECK-NEXT: ldr q0, [x0] 99; CHECK-NEXT: ldr q1, [x1] 100; CHECK-NEXT: uhadd.16b v0, v0, v1 101; CHECK-NEXT: ret 102 %tmp1 = load <16 x i8>, ptr %A, align 16 103 %tmp2 = load <16 x i8>, ptr %B, align 16 104 %tmp3 = tail call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 105 ret <16 x i8> %tmp3 106} 107 108define <4 x i16> @uhadd4h(ptr nocapture readonly %A, ptr nocapture readonly %B) { 109; CHECK-LABEL: uhadd4h: 110; CHECK: // %bb.0: 111; CHECK-NEXT: ldr d0, [x0] 112; CHECK-NEXT: ldr d1, [x1] 113; CHECK-NEXT: uhadd.4h v0, v0, v1 114; CHECK-NEXT: ret 115 %tmp1 = load <4 x i16>, ptr %A, align 8 116 %tmp2 = load <4 x i16>, ptr %B, align 8 117 %tmp3 = tail call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 118 ret <4 x i16> %tmp3 119} 120 121define <8 x i16> @uhadd8h(ptr nocapture readonly %A, ptr nocapture readonly %B) { 122; CHECK-LABEL: uhadd8h: 123; CHECK: // %bb.0: 124; CHECK-NEXT: ldr q0, [x0] 125; CHECK-NEXT: ldr q1, [x1] 126; CHECK-NEXT: uhadd.8h v0, v0, v1 127; CHECK-NEXT: ret 128 %tmp1 = load <8 x i16>, ptr %A, align 16 129 %tmp2 = load <8 x i16>, ptr %B, align 16 130 %tmp3 = tail call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 131 ret <8 x i16> %tmp3 132} 133 134define <2 x i32> @uhadd2s(ptr nocapture readonly %A, ptr nocapture readonly %B) { 135; CHECK-LABEL: uhadd2s: 136; CHECK: // %bb.0: 137; CHECK-NEXT: ldr d0, [x0] 138; CHECK-NEXT: ldr d1, [x1] 139; CHECK-NEXT: uhadd.2s v0, v0, v1 140; CHECK-NEXT: ret 141 %tmp1 = load <2 x i32>, ptr %A, align 8 142 %tmp2 = load <2 x i32>, ptr %B, align 8 143 %tmp3 = tail call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 144 ret <2 x i32> %tmp3 145} 146 147define <4 x i32> @uhadd4s(ptr nocapture readonly %A, ptr nocapture readonly %B) { 148; CHECK-LABEL: uhadd4s: 149; CHECK: // %bb.0: 150; CHECK-NEXT: ldr q0, [x0] 151; CHECK-NEXT: ldr q1, [x1] 152; CHECK-NEXT: uhadd.4s v0, v0, v1 153; CHECK-NEXT: ret 154 %tmp1 = load <4 x i32>, ptr %A, align 16 155 %tmp2 = load <4 x i32>, ptr %B, align 16 156 %tmp3 = tail call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 157 ret <4 x i32> %tmp3 158} 159 160declare <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8>, <8 x i8>) 161declare <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16>, <4 x i16>) 162declare <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32>, <2 x i32>) 163declare <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8>, <8 x i8>) 164declare <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16>, <4 x i16>) 165declare <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32>, <2 x i32>) 166declare <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8>, <16 x i8>) 167declare <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16>, <8 x i16>) 168declare <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32>, <4 x i32>) 169declare <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8>, <16 x i8>) 170declare <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>) 171declare <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32>, <4 x i32>) 172 173define <8 x i8> @srhadd8b(ptr nocapture readonly %A, ptr nocapture readonly %B) { 174; CHECK-LABEL: srhadd8b: 175; CHECK: // %bb.0: 176; CHECK-NEXT: ldr d0, [x0] 177; CHECK-NEXT: ldr d1, [x1] 178; CHECK-NEXT: srhadd.8b v0, v0, v1 179; CHECK-NEXT: ret 180 %tmp1 = load <8 x i8>, ptr %A, align 8 181 %tmp2 = load <8 x i8>, ptr %B, align 8 182 %tmp3 = tail call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 183 ret <8 x i8> %tmp3 184} 185 186define <16 x i8> @srhadd16b(ptr nocapture readonly %A, ptr nocapture readonly %B) { 187; CHECK-LABEL: srhadd16b: 188; CHECK: // %bb.0: 189; CHECK-NEXT: ldr q0, [x0] 190; CHECK-NEXT: ldr q1, [x1] 191; CHECK-NEXT: srhadd.16b v0, v0, v1 192; CHECK-NEXT: ret 193 %tmp1 = load <16 x i8>, ptr %A, align 16 194 %tmp2 = load <16 x i8>, ptr %B, align 16 195 %tmp3 = tail call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 196 ret <16 x i8> %tmp3 197} 198 199define <4 x i16> @srhadd4h(ptr nocapture readonly %A, ptr nocapture readonly %B) { 200; CHECK-LABEL: srhadd4h: 201; CHECK: // %bb.0: 202; CHECK-NEXT: ldr d0, [x0] 203; CHECK-NEXT: ldr d1, [x1] 204; CHECK-NEXT: srhadd.4h v0, v0, v1 205; CHECK-NEXT: ret 206 %tmp1 = load <4 x i16>, ptr %A, align 8 207 %tmp2 = load <4 x i16>, ptr %B, align 8 208 %tmp3 = tail call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 209 ret <4 x i16> %tmp3 210} 211 212define <8 x i16> @srhadd8h(ptr nocapture readonly %A, ptr nocapture readonly %B) { 213; CHECK-LABEL: srhadd8h: 214; CHECK: // %bb.0: 215; CHECK-NEXT: ldr q0, [x0] 216; CHECK-NEXT: ldr q1, [x1] 217; CHECK-NEXT: srhadd.8h v0, v0, v1 218; CHECK-NEXT: ret 219 %tmp1 = load <8 x i16>, ptr %A, align 16 220 %tmp2 = load <8 x i16>, ptr %B, align 16 221 %tmp3 = tail call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 222 ret <8 x i16> %tmp3 223} 224 225define <2 x i32> @srhadd2s(ptr nocapture readonly %A, ptr nocapture readonly %B) { 226; CHECK-LABEL: srhadd2s: 227; CHECK: // %bb.0: 228; CHECK-NEXT: ldr d0, [x0] 229; CHECK-NEXT: ldr d1, [x1] 230; CHECK-NEXT: srhadd.2s v0, v0, v1 231; CHECK-NEXT: ret 232 %tmp1 = load <2 x i32>, ptr %A, align 8 233 %tmp2 = load <2 x i32>, ptr %B, align 8 234 %tmp3 = tail call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 235 ret <2 x i32> %tmp3 236} 237 238define <4 x i32> @srhadd4s(ptr nocapture readonly %A, ptr nocapture readonly %B) { 239; CHECK-LABEL: srhadd4s: 240; CHECK: // %bb.0: 241; CHECK-NEXT: ldr q0, [x0] 242; CHECK-NEXT: ldr q1, [x1] 243; CHECK-NEXT: srhadd.4s v0, v0, v1 244; CHECK-NEXT: ret 245 %tmp1 = load <4 x i32>, ptr %A, align 16 246 %tmp2 = load <4 x i32>, ptr %B, align 16 247 %tmp3 = tail call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 248 ret <4 x i32> %tmp3 249} 250 251define <8 x i8> @urhadd8b(ptr nocapture readonly %A, ptr nocapture readonly %B) { 252; CHECK-LABEL: urhadd8b: 253; CHECK: // %bb.0: 254; CHECK-NEXT: ldr d0, [x0] 255; CHECK-NEXT: ldr d1, [x1] 256; CHECK-NEXT: urhadd.8b v0, v0, v1 257; CHECK-NEXT: ret 258 %tmp1 = load <8 x i8>, ptr %A, align 8 259 %tmp2 = load <8 x i8>, ptr %B, align 8 260 %tmp3 = tail call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 261 ret <8 x i8> %tmp3 262} 263 264define <16 x i8> @urhadd16b(ptr nocapture readonly %A, ptr nocapture readonly %B) { 265; CHECK-LABEL: urhadd16b: 266; CHECK: // %bb.0: 267; CHECK-NEXT: ldr q0, [x0] 268; CHECK-NEXT: ldr q1, [x1] 269; CHECK-NEXT: urhadd.16b v0, v0, v1 270; CHECK-NEXT: ret 271 %tmp1 = load <16 x i8>, ptr %A, align 16 272 %tmp2 = load <16 x i8>, ptr %B, align 16 273 %tmp3 = tail call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 274 ret <16 x i8> %tmp3 275} 276 277define <4 x i16> @urhadd4h(ptr nocapture readonly %A, ptr nocapture readonly %B) { 278; CHECK-LABEL: urhadd4h: 279; CHECK: // %bb.0: 280; CHECK-NEXT: ldr d0, [x0] 281; CHECK-NEXT: ldr d1, [x1] 282; CHECK-NEXT: urhadd.4h v0, v0, v1 283; CHECK-NEXT: ret 284 %tmp1 = load <4 x i16>, ptr %A, align 8 285 %tmp2 = load <4 x i16>, ptr %B, align 8 286 %tmp3 = tail call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 287 ret <4 x i16> %tmp3 288} 289 290define <8 x i16> @urhadd8h(ptr nocapture readonly %A, ptr nocapture readonly %B) { 291; CHECK-LABEL: urhadd8h: 292; CHECK: // %bb.0: 293; CHECK-NEXT: ldr q0, [x0] 294; CHECK-NEXT: ldr q1, [x1] 295; CHECK-NEXT: urhadd.8h v0, v0, v1 296; CHECK-NEXT: ret 297 %tmp1 = load <8 x i16>, ptr %A, align 16 298 %tmp2 = load <8 x i16>, ptr %B, align 16 299 %tmp3 = tail call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 300 ret <8 x i16> %tmp3 301} 302 303define <2 x i32> @urhadd2s(ptr nocapture readonly %A, ptr nocapture readonly %B) { 304; CHECK-LABEL: urhadd2s: 305; CHECK: // %bb.0: 306; CHECK-NEXT: ldr d0, [x0] 307; CHECK-NEXT: ldr d1, [x1] 308; CHECK-NEXT: urhadd.2s v0, v0, v1 309; CHECK-NEXT: ret 310 %tmp1 = load <2 x i32>, ptr %A, align 8 311 %tmp2 = load <2 x i32>, ptr %B, align 8 312 %tmp3 = tail call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 313 ret <2 x i32> %tmp3 314} 315 316define <4 x i32> @urhadd4s(ptr nocapture readonly %A, ptr nocapture readonly %B) { 317; CHECK-LABEL: urhadd4s: 318; CHECK: // %bb.0: 319; CHECK-NEXT: ldr q0, [x0] 320; CHECK-NEXT: ldr q1, [x1] 321; CHECK-NEXT: urhadd.4s v0, v0, v1 322; CHECK-NEXT: ret 323 %tmp1 = load <4 x i32>, ptr %A, align 16 324 %tmp2 = load <4 x i32>, ptr %B, align 16 325 %tmp3 = tail call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 326 ret <4 x i32> %tmp3 327} 328 329define void @testLowerToSRHADD8b(<8 x i8> %src1, <8 x i8> %src2, ptr nocapture writeonly %dest) { 330; CHECK-LABEL: testLowerToSRHADD8b: 331; CHECK: // %bb.0: 332; CHECK-NEXT: srhadd.8b v0, v0, v1 333; CHECK-NEXT: str d0, [x0] 334; CHECK-NEXT: ret 335 %sextsrc1 = sext <8 x i8> %src1 to <8 x i16> 336 %sextsrc2 = sext <8 x i8> %src2 to <8 x i16> 337 %add1 = add nsw <8 x i16> %sextsrc1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 338 %add2 = add nsw <8 x i16> %add1, %sextsrc2 339 %resulti16 = lshr <8 x i16> %add2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 340 %result = trunc <8 x i16> %resulti16 to <8 x i8> 341 store <8 x i8> %result, ptr %dest, align 8 342 ret void 343} 344 345define void @testLowerToSRHADD4h(<4 x i16> %src1, <4 x i16> %src2, ptr nocapture writeonly %dest) { 346; CHECK-LABEL: testLowerToSRHADD4h: 347; CHECK: // %bb.0: 348; CHECK-NEXT: srhadd.4h v0, v0, v1 349; CHECK-NEXT: str d0, [x0] 350; CHECK-NEXT: ret 351 %sextsrc1 = sext <4 x i16> %src1 to <4 x i32> 352 %sextsrc2 = sext <4 x i16> %src2 to <4 x i32> 353 %add1 = add nsw <4 x i32> %sextsrc1, <i32 1, i32 1, i32 1, i32 1> 354 %add2 = add nsw <4 x i32> %add1, %sextsrc2 355 %resulti16 = lshr <4 x i32> %add2, <i32 1, i32 1, i32 1, i32 1> 356 %result = trunc <4 x i32> %resulti16 to <4 x i16> 357 store <4 x i16> %result, ptr %dest, align 8 358 ret void 359} 360 361define void @testLowerToSRHADD2s(<2 x i32> %src1, <2 x i32> %src2, ptr nocapture writeonly %dest) { 362; CHECK-LABEL: testLowerToSRHADD2s: 363; CHECK: // %bb.0: 364; CHECK-NEXT: srhadd.2s v0, v0, v1 365; CHECK-NEXT: str d0, [x0] 366; CHECK-NEXT: ret 367 %sextsrc1 = sext <2 x i32> %src1 to <2 x i64> 368 %sextsrc2 = sext <2 x i32> %src2 to <2 x i64> 369 %add1 = add nsw <2 x i64> %sextsrc1, <i64 1, i64 1> 370 %add2 = add nsw <2 x i64> %add1, %sextsrc2 371 %resulti16 = lshr <2 x i64> %add2, <i64 1, i64 1> 372 %result = trunc <2 x i64> %resulti16 to <2 x i32> 373 store <2 x i32> %result, ptr %dest, align 8 374 ret void 375} 376 377define void @testLowerToSRHADD16b(<16 x i8> %src1, <16 x i8> %src2, ptr nocapture writeonly %dest) { 378; CHECK-LABEL: testLowerToSRHADD16b: 379; CHECK: // %bb.0: 380; CHECK-NEXT: srhadd.16b v0, v0, v1 381; CHECK-NEXT: str q0, [x0] 382; CHECK-NEXT: ret 383 %sextsrc1 = sext <16 x i8> %src1 to <16 x i16> 384 %sextsrc2 = sext <16 x i8> %src2 to <16 x i16> 385 %add1 = add nsw <16 x i16> %sextsrc1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 386 %add2 = add nsw <16 x i16> %add1, %sextsrc2 387 %resulti16 = lshr <16 x i16> %add2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 388 %result = trunc <16 x i16> %resulti16 to <16 x i8> 389 store <16 x i8> %result, ptr %dest, align 16 390 ret void 391} 392 393define void @testLowerToSRHADD8h(<8 x i16> %src1, <8 x i16> %src2, ptr nocapture writeonly %dest) { 394; CHECK-LABEL: testLowerToSRHADD8h: 395; CHECK: // %bb.0: 396; CHECK-NEXT: srhadd.8h v0, v0, v1 397; CHECK-NEXT: str q0, [x0] 398; CHECK-NEXT: ret 399 %sextsrc1 = sext <8 x i16> %src1 to <8 x i32> 400 %sextsrc2 = sext <8 x i16> %src2 to <8 x i32> 401 %add1 = add nsw <8 x i32> %sextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 402 %add2 = add nsw <8 x i32> %add1, %sextsrc2 403 %resulti16 = lshr <8 x i32> %add2, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 404 %result = trunc <8 x i32> %resulti16 to <8 x i16> 405 store <8 x i16> %result, ptr %dest, align 16 406 ret void 407} 408 409define void @testLowerToSRHADD4s(<4 x i32> %src1, <4 x i32> %src2, ptr nocapture writeonly %dest) { 410; CHECK-LABEL: testLowerToSRHADD4s: 411; CHECK: // %bb.0: 412; CHECK-NEXT: srhadd.4s v0, v0, v1 413; CHECK-NEXT: str q0, [x0] 414; CHECK-NEXT: ret 415 %sextsrc1 = sext <4 x i32> %src1 to <4 x i64> 416 %sextsrc2 = sext <4 x i32> %src2 to <4 x i64> 417 %add1 = add nsw <4 x i64> %sextsrc1, <i64 1, i64 1, i64 1, i64 1> 418 %add2 = add nsw <4 x i64> %add1, %sextsrc2 419 %resulti16 = lshr <4 x i64> %add2, <i64 1, i64 1, i64 1, i64 1> 420 %result = trunc <4 x i64> %resulti16 to <4 x i32> 421 store <4 x i32> %result, ptr %dest, align 16 422 ret void 423} 424 425define void @testLowerToSHADD8b(<8 x i8> %src1, <8 x i8> %src2, ptr nocapture writeonly %dest) { 426; CHECK-LABEL: testLowerToSHADD8b: 427; CHECK: // %bb.0: 428; CHECK-NEXT: shadd.8b v0, v0, v1 429; CHECK-NEXT: str d0, [x0] 430; CHECK-NEXT: ret 431 %sextsrc1 = sext <8 x i8> %src1 to <8 x i16> 432 %sextsrc2 = sext <8 x i8> %src2 to <8 x i16> 433 %add = add nsw <8 x i16> %sextsrc1, %sextsrc2 434 %resulti16 = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 435 %result = trunc <8 x i16> %resulti16 to <8 x i8> 436 store <8 x i8> %result, ptr %dest, align 8 437 ret void 438} 439 440define void @testLowerToSHADD4h(<4 x i16> %src1, <4 x i16> %src2, ptr nocapture writeonly %dest) { 441; CHECK-LABEL: testLowerToSHADD4h: 442; CHECK: // %bb.0: 443; CHECK-NEXT: shadd.4h v0, v0, v1 444; CHECK-NEXT: str d0, [x0] 445; CHECK-NEXT: ret 446 %sextsrc1 = sext <4 x i16> %src1 to <4 x i32> 447 %sextsrc2 = sext <4 x i16> %src2 to <4 x i32> 448 %add = add nsw <4 x i32> %sextsrc1, %sextsrc2 449 %resulti16 = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1> 450 %result = trunc <4 x i32> %resulti16 to <4 x i16> 451 store <4 x i16> %result, ptr %dest, align 8 452 ret void 453} 454 455define void @testLowerToSHADD2s(<2 x i32> %src1, <2 x i32> %src2, ptr nocapture writeonly %dest) { 456; CHECK-LABEL: testLowerToSHADD2s: 457; CHECK: // %bb.0: 458; CHECK-NEXT: shadd.2s v0, v0, v1 459; CHECK-NEXT: str d0, [x0] 460; CHECK-NEXT: ret 461 %sextsrc1 = sext <2 x i32> %src1 to <2 x i64> 462 %sextsrc2 = sext <2 x i32> %src2 to <2 x i64> 463 %add = add nsw <2 x i64> %sextsrc1, %sextsrc2 464 %resulti16 = lshr <2 x i64> %add, <i64 1, i64 1> 465 %result = trunc <2 x i64> %resulti16 to <2 x i32> 466 store <2 x i32> %result, ptr %dest, align 8 467 ret void 468} 469 470define void @testLowerToSHADD16b(<16 x i8> %src1, <16 x i8> %src2, ptr nocapture writeonly %dest) { 471; CHECK-LABEL: testLowerToSHADD16b: 472; CHECK: // %bb.0: 473; CHECK-NEXT: shadd.16b v0, v0, v1 474; CHECK-NEXT: str q0, [x0] 475; CHECK-NEXT: ret 476 %sextsrc1 = sext <16 x i8> %src1 to <16 x i16> 477 %sextsrc2 = sext <16 x i8> %src2 to <16 x i16> 478 %add = add nsw <16 x i16> %sextsrc1, %sextsrc2 479 %resulti16 = lshr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 480 %result = trunc <16 x i16> %resulti16 to <16 x i8> 481 store <16 x i8> %result, ptr %dest, align 16 482 ret void 483} 484 485define void @testLowerToSHADD8h(<8 x i16> %src1, <8 x i16> %src2, ptr nocapture writeonly %dest) { 486; CHECK-LABEL: testLowerToSHADD8h: 487; CHECK: // %bb.0: 488; CHECK-NEXT: shadd.8h v0, v0, v1 489; CHECK-NEXT: str q0, [x0] 490; CHECK-NEXT: ret 491 %sextsrc1 = sext <8 x i16> %src1 to <8 x i32> 492 %sextsrc2 = sext <8 x i16> %src2 to <8 x i32> 493 %add = add nsw <8 x i32> %sextsrc1, %sextsrc2 494 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 495 %result = trunc <8 x i32> %resulti16 to <8 x i16> 496 store <8 x i16> %result, ptr %dest, align 16 497 ret void 498} 499 500define void @testLowerToSHADD4s(<4 x i32> %src1, <4 x i32> %src2, ptr nocapture writeonly %dest) { 501; CHECK-LABEL: testLowerToSHADD4s: 502; CHECK: // %bb.0: 503; CHECK-NEXT: shadd.4s v0, v0, v1 504; CHECK-NEXT: str q0, [x0] 505; CHECK-NEXT: ret 506 %sextsrc1 = sext <4 x i32> %src1 to <4 x i64> 507 %sextsrc2 = sext <4 x i32> %src2 to <4 x i64> 508 %add = add nsw <4 x i64> %sextsrc1, %sextsrc2 509 %resulti16 = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1> 510 %result = trunc <4 x i64> %resulti16 to <4 x i32> 511 store <4 x i32> %result, ptr %dest, align 16 512 ret void 513} 514 515define void @testLowerToURHADD8b(<8 x i8> %src1, <8 x i8> %src2, ptr nocapture writeonly %dest) { 516; CHECK-LABEL: testLowerToURHADD8b: 517; CHECK: // %bb.0: 518; CHECK-NEXT: urhadd.8b v0, v0, v1 519; CHECK-NEXT: str d0, [x0] 520; CHECK-NEXT: ret 521 %zextsrc1 = zext <8 x i8> %src1 to <8 x i16> 522 %zextsrc2 = zext <8 x i8> %src2 to <8 x i16> 523 %add1 = add nuw nsw <8 x i16> %zextsrc1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 524 %add2 = add nuw nsw <8 x i16> %add1, %zextsrc2 525 %resulti16 = lshr <8 x i16> %add2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 526 %result = trunc <8 x i16> %resulti16 to <8 x i8> 527 store <8 x i8> %result, ptr %dest, align 8 528 ret void 529} 530 531define void @testLowerToURHADD4h(<4 x i16> %src1, <4 x i16> %src2, ptr nocapture writeonly %dest) { 532; CHECK-LABEL: testLowerToURHADD4h: 533; CHECK: // %bb.0: 534; CHECK-NEXT: urhadd.4h v0, v0, v1 535; CHECK-NEXT: str d0, [x0] 536; CHECK-NEXT: ret 537 %zextsrc1 = zext <4 x i16> %src1 to <4 x i32> 538 %zextsrc2 = zext <4 x i16> %src2 to <4 x i32> 539 %add1 = add nuw nsw <4 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1> 540 %add2 = add nuw nsw <4 x i32> %add1, %zextsrc2 541 %resulti16 = lshr <4 x i32> %add2, <i32 1, i32 1, i32 1, i32 1> 542 %result = trunc <4 x i32> %resulti16 to <4 x i16> 543 store <4 x i16> %result, ptr %dest, align 8 544 ret void 545} 546 547define void @testLowerToURHADD2s(<2 x i32> %src1, <2 x i32> %src2, ptr nocapture writeonly %dest) { 548; CHECK-LABEL: testLowerToURHADD2s: 549; CHECK: // %bb.0: 550; CHECK-NEXT: urhadd.2s v0, v0, v1 551; CHECK-NEXT: str d0, [x0] 552; CHECK-NEXT: ret 553 %zextsrc1 = zext <2 x i32> %src1 to <2 x i64> 554 %zextsrc2 = zext <2 x i32> %src2 to <2 x i64> 555 %add1 = add nuw nsw <2 x i64> %zextsrc1, <i64 1, i64 1> 556 %add2 = add nuw nsw <2 x i64> %add1, %zextsrc2 557 %resulti16 = lshr <2 x i64> %add2, <i64 1, i64 1> 558 %result = trunc <2 x i64> %resulti16 to <2 x i32> 559 store <2 x i32> %result, ptr %dest, align 8 560 ret void 561} 562 563define void @testLowerToURHADD16b(<16 x i8> %src1, <16 x i8> %src2, ptr nocapture writeonly %dest) { 564; CHECK-LABEL: testLowerToURHADD16b: 565; CHECK: // %bb.0: 566; CHECK-NEXT: urhadd.16b v0, v0, v1 567; CHECK-NEXT: str q0, [x0] 568; CHECK-NEXT: ret 569 %zextsrc1 = zext <16 x i8> %src1 to <16 x i16> 570 %zextsrc2 = zext <16 x i8> %src2 to <16 x i16> 571 %add1 = add nuw nsw <16 x i16> %zextsrc1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 572 %add2 = add nuw nsw <16 x i16> %add1, %zextsrc2 573 %resulti16 = lshr <16 x i16> %add2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 574 %result = trunc <16 x i16> %resulti16 to <16 x i8> 575 store <16 x i8> %result, ptr %dest, align 16 576 ret void 577} 578 579define void @testLowerToURHADD8h(<8 x i16> %src1, <8 x i16> %src2, ptr nocapture writeonly %dest) { 580; CHECK-LABEL: testLowerToURHADD8h: 581; CHECK: // %bb.0: 582; CHECK-NEXT: urhadd.8h v0, v0, v1 583; CHECK-NEXT: str q0, [x0] 584; CHECK-NEXT: ret 585 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32> 586 %zextsrc2 = zext <8 x i16> %src2 to <8 x i32> 587 %add1 = add nuw nsw <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 588 %add2 = add nuw nsw <8 x i32> %add1, %zextsrc2 589 %resulti16 = lshr <8 x i32> %add2, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 590 %result = trunc <8 x i32> %resulti16 to <8 x i16> 591 store <8 x i16> %result, ptr %dest, align 16 592 ret void 593} 594 595define void @testLowerToURHADD4s(<4 x i32> %src1, <4 x i32> %src2, ptr nocapture writeonly %dest) { 596; CHECK-LABEL: testLowerToURHADD4s: 597; CHECK: // %bb.0: 598; CHECK-NEXT: urhadd.4s v0, v0, v1 599; CHECK-NEXT: str q0, [x0] 600; CHECK-NEXT: ret 601 %zextsrc1 = zext <4 x i32> %src1 to <4 x i64> 602 %zextsrc2 = zext <4 x i32> %src2 to <4 x i64> 603 %add1 = add nuw nsw <4 x i64> %zextsrc1, <i64 1, i64 1, i64 1, i64 1> 604 %add2 = add nuw nsw <4 x i64> %add1, %zextsrc2 605 %resulti16 = lshr <4 x i64> %add2, <i64 1, i64 1, i64 1, i64 1> 606 %result = trunc <4 x i64> %resulti16 to <4 x i32> 607 store <4 x i32> %result, ptr %dest, align 16 608 ret void 609} 610 611define void @testLowerToUHADD8b(<8 x i8> %src1, <8 x i8> %src2, ptr nocapture writeonly %dest) { 612; CHECK-LABEL: testLowerToUHADD8b: 613; CHECK: // %bb.0: 614; CHECK-NEXT: uhadd.8b v0, v0, v1 615; CHECK-NEXT: str d0, [x0] 616; CHECK-NEXT: ret 617 %zextsrc1 = zext <8 x i8> %src1 to <8 x i16> 618 %zextsrc2 = zext <8 x i8> %src2 to <8 x i16> 619 %add = add nuw nsw <8 x i16> %zextsrc1, %zextsrc2 620 %resulti16 = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 621 %result = trunc <8 x i16> %resulti16 to <8 x i8> 622 store <8 x i8> %result, ptr %dest, align 8 623 ret void 624} 625 626define void @testLowerToUHADD4h(<4 x i16> %src1, <4 x i16> %src2, ptr nocapture writeonly %dest) { 627; CHECK-LABEL: testLowerToUHADD4h: 628; CHECK: // %bb.0: 629; CHECK-NEXT: uhadd.4h v0, v0, v1 630; CHECK-NEXT: str d0, [x0] 631; CHECK-NEXT: ret 632 %zextsrc1 = zext <4 x i16> %src1 to <4 x i32> 633 %zextsrc2 = zext <4 x i16> %src2 to <4 x i32> 634 %add = add nuw nsw <4 x i32> %zextsrc1, %zextsrc2 635 %resulti16 = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1> 636 %result = trunc <4 x i32> %resulti16 to <4 x i16> 637 store <4 x i16> %result, ptr %dest, align 8 638 ret void 639} 640 641define void @testLowerToUHADD2s(<2 x i32> %src1, <2 x i32> %src2, ptr nocapture writeonly %dest) { 642; CHECK-LABEL: testLowerToUHADD2s: 643; CHECK: // %bb.0: 644; CHECK-NEXT: uhadd.2s v0, v0, v1 645; CHECK-NEXT: str d0, [x0] 646; CHECK-NEXT: ret 647 %zextsrc1 = zext <2 x i32> %src1 to <2 x i64> 648 %zextsrc2 = zext <2 x i32> %src2 to <2 x i64> 649 %add = add nuw nsw <2 x i64> %zextsrc1, %zextsrc2 650 %resulti16 = lshr <2 x i64> %add, <i64 1, i64 1> 651 %result = trunc <2 x i64> %resulti16 to <2 x i32> 652 store <2 x i32> %result, ptr %dest, align 8 653 ret void 654} 655 656define void @testLowerToUHADD16b(<16 x i8> %src1, <16 x i8> %src2, ptr nocapture writeonly %dest) { 657; CHECK-LABEL: testLowerToUHADD16b: 658; CHECK: // %bb.0: 659; CHECK-NEXT: uhadd.16b v0, v0, v1 660; CHECK-NEXT: str q0, [x0] 661; CHECK-NEXT: ret 662 %zextsrc1 = zext <16 x i8> %src1 to <16 x i16> 663 %zextsrc2 = zext <16 x i8> %src2 to <16 x i16> 664 %add = add nuw nsw <16 x i16> %zextsrc1, %zextsrc2 665 %resulti16 = lshr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 666 %result = trunc <16 x i16> %resulti16 to <16 x i8> 667 store <16 x i8> %result, ptr %dest, align 16 668 ret void 669} 670 671define void @testLowerToUHADD8h(<8 x i16> %src1, <8 x i16> %src2, ptr nocapture writeonly %dest) { 672; CHECK-LABEL: testLowerToUHADD8h: 673; CHECK: // %bb.0: 674; CHECK-NEXT: uhadd.8h v0, v0, v1 675; CHECK-NEXT: str q0, [x0] 676; CHECK-NEXT: ret 677 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32> 678 %zextsrc2 = zext <8 x i16> %src2 to <8 x i32> 679 %add = add nuw nsw <8 x i32> %zextsrc1, %zextsrc2 680 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 681 %result = trunc <8 x i32> %resulti16 to <8 x i16> 682 store <8 x i16> %result, ptr %dest, align 16 683 ret void 684} 685 686define void @testLowerToUHADD4s(<4 x i32> %src1, <4 x i32> %src2, ptr nocapture writeonly %dest) { 687; CHECK-LABEL: testLowerToUHADD4s: 688; CHECK: // %bb.0: 689; CHECK-NEXT: uhadd.4s v0, v0, v1 690; CHECK-NEXT: str q0, [x0] 691; CHECK-NEXT: ret 692 %zextsrc1 = zext <4 x i32> %src1 to <4 x i64> 693 %zextsrc2 = zext <4 x i32> %src2 to <4 x i64> 694 %add = add nuw nsw <4 x i64> %zextsrc1, %zextsrc2 695 %resulti16 = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1> 696 %result = trunc <4 x i64> %resulti16 to <4 x i32> 697 store <4 x i32> %result, ptr %dest, align 16 698 ret void 699} 700 701define <4 x i32> @hadd16_sext_asr(<4 x i16> %src1, <4 x i16> %src2) { 702; CHECK-LABEL: hadd16_sext_asr: 703; CHECK: // %bb.0: 704; CHECK-NEXT: shadd.4h v0, v0, v1 705; CHECK-NEXT: sshll.4s v0, v0, #0 706; CHECK-NEXT: ret 707 %zextsrc1 = sext <4 x i16> %src1 to <4 x i32> 708 %zextsrc2 = sext <4 x i16> %src2 to <4 x i32> 709 %add = add nsw <4 x i32> %zextsrc1, %zextsrc2 710 %resulti16 = ashr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1> 711 ret <4 x i32> %resulti16 712} 713 714define <4 x i32> @hadd16_zext_asr(<4 x i16> %src1, <4 x i16> %src2) { 715; CHECK-LABEL: hadd16_zext_asr: 716; CHECK: // %bb.0: 717; CHECK-NEXT: uhadd.4h v0, v0, v1 718; CHECK-NEXT: ushll.4s v0, v0, #0 719; CHECK-NEXT: ret 720 %zextsrc1 = zext <4 x i16> %src1 to <4 x i32> 721 %zextsrc2 = zext <4 x i16> %src2 to <4 x i32> 722 %add = add nuw nsw <4 x i32> %zextsrc1, %zextsrc2 723 %resulti16 = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1> 724 ret <4 x i32> %resulti16 725} 726 727define <4 x i32> @hadd16_sext_lsr(<4 x i16> %src1, <4 x i16> %src2) { 728; CHECK-LABEL: hadd16_sext_lsr: 729; CHECK: // %bb.0: 730; CHECK-NEXT: saddl.4s v0, v0, v1 731; CHECK-NEXT: ushr.4s v0, v0, #1 732; CHECK-NEXT: ret 733 %zextsrc1 = sext <4 x i16> %src1 to <4 x i32> 734 %zextsrc2 = sext <4 x i16> %src2 to <4 x i32> 735 %add = add nsw <4 x i32> %zextsrc1, %zextsrc2 736 %resulti16 = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1> 737 ret <4 x i32> %resulti16 738} 739 740define <4 x i32> @hadd16_zext_lsr(<4 x i16> %src1, <4 x i16> %src2) { 741; CHECK-LABEL: hadd16_zext_lsr: 742; CHECK: // %bb.0: 743; CHECK-NEXT: uhadd.4h v0, v0, v1 744; CHECK-NEXT: ushll.4s v0, v0, #0 745; CHECK-NEXT: ret 746 %zextsrc1 = zext <4 x i16> %src1 to <4 x i32> 747 %zextsrc2 = zext <4 x i16> %src2 to <4 x i32> 748 %add = add nuw nsw <4 x i32> %zextsrc1, %zextsrc2 749 %resulti16 = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1> 750 ret <4 x i32> %resulti16 751} 752 753define <4 x i64> @hadd32_sext_asr(<4 x i32> %src1, <4 x i32> %src2) { 754; CHECK-LABEL: hadd32_sext_asr: 755; CHECK: // %bb.0: 756; CHECK-NEXT: shadd.4s v0, v0, v1 757; CHECK-NEXT: sshll2.2d v1, v0, #0 758; CHECK-NEXT: sshll.2d v0, v0, #0 759; CHECK-NEXT: ret 760 %zextsrc1 = sext <4 x i32> %src1 to <4 x i64> 761 %zextsrc2 = sext <4 x i32> %src2 to <4 x i64> 762 %add = add nsw <4 x i64> %zextsrc1, %zextsrc2 763 %resulti32 = ashr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1> 764 ret <4 x i64> %resulti32 765} 766 767define <4 x i64> @hadd32_zext_asr(<4 x i32> %src1, <4 x i32> %src2) { 768; CHECK-LABEL: hadd32_zext_asr: 769; CHECK: // %bb.0: 770; CHECK-NEXT: uhadd.4s v0, v0, v1 771; CHECK-NEXT: ushll2.2d v1, v0, #0 772; CHECK-NEXT: ushll.2d v0, v0, #0 773; CHECK-NEXT: ret 774 %zextsrc1 = zext <4 x i32> %src1 to <4 x i64> 775 %zextsrc2 = zext <4 x i32> %src2 to <4 x i64> 776 %add = add nuw nsw <4 x i64> %zextsrc1, %zextsrc2 777 %resulti32 = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1> 778 ret <4 x i64> %resulti32 779} 780 781define <4 x i64> @hadd32_sext_lsr(<4 x i32> %src1, <4 x i32> %src2) { 782; CHECK-LABEL: hadd32_sext_lsr: 783; CHECK: // %bb.0: 784; CHECK-NEXT: saddl.2d v2, v0, v1 785; CHECK-NEXT: saddl2.2d v0, v0, v1 786; CHECK-NEXT: ushr.2d v1, v0, #1 787; CHECK-NEXT: ushr.2d v0, v2, #1 788; CHECK-NEXT: ret 789 %zextsrc1 = sext <4 x i32> %src1 to <4 x i64> 790 %zextsrc2 = sext <4 x i32> %src2 to <4 x i64> 791 %add = add nsw <4 x i64> %zextsrc1, %zextsrc2 792 %resulti32 = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1> 793 ret <4 x i64> %resulti32 794} 795 796define <4 x i64> @hadd32_zext_lsr(<4 x i32> %src1, <4 x i32> %src2) { 797; CHECK-LABEL: hadd32_zext_lsr: 798; CHECK: // %bb.0: 799; CHECK-NEXT: uhadd.4s v0, v0, v1 800; CHECK-NEXT: ushll2.2d v1, v0, #0 801; CHECK-NEXT: ushll.2d v0, v0, #0 802; CHECK-NEXT: ret 803 %zextsrc1 = zext <4 x i32> %src1 to <4 x i64> 804 %zextsrc2 = zext <4 x i32> %src2 to <4 x i64> 805 %add = add nuw nsw <4 x i64> %zextsrc1, %zextsrc2 806 %resulti32 = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1> 807 ret <4 x i64> %resulti32 808} 809 810define <4 x i16> @hadd8_sext_asr(<4 x i8> %src1, <4 x i8> %src2) { 811; CHECK-LABEL: hadd8_sext_asr: 812; CHECK: // %bb.0: 813; CHECK-NEXT: shl.4h v1, v1, #8 814; CHECK-NEXT: shl.4h v0, v0, #8 815; CHECK-NEXT: sshr.4h v1, v1, #8 816; CHECK-NEXT: sshr.4h v0, v0, #8 817; CHECK-NEXT: shadd.4h v0, v0, v1 818; CHECK-NEXT: ret 819 %zextsrc1 = sext <4 x i8> %src1 to <4 x i16> 820 %zextsrc2 = sext <4 x i8> %src2 to <4 x i16> 821 %add = add nsw <4 x i16> %zextsrc1, %zextsrc2 822 %resulti8 = ashr <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1> 823 ret <4 x i16> %resulti8 824} 825 826define <4 x i16> @hadd8_zext_asr(<4 x i8> %src1, <4 x i8> %src2) { 827; CHECK-LABEL: hadd8_zext_asr: 828; CHECK: // %bb.0: 829; CHECK-NEXT: bic.4h v1, #255, lsl #8 830; CHECK-NEXT: bic.4h v0, #255, lsl #8 831; CHECK-NEXT: uhadd.4h v0, v0, v1 832; CHECK-NEXT: ret 833 %zextsrc1 = zext <4 x i8> %src1 to <4 x i16> 834 %zextsrc2 = zext <4 x i8> %src2 to <4 x i16> 835 %add = add nuw nsw <4 x i16> %zextsrc1, %zextsrc2 836 %resulti8 = lshr <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1> 837 ret <4 x i16> %resulti8 838} 839 840define <4 x i16> @hadd8_sext_lsr(<4 x i8> %src1, <4 x i8> %src2) { 841; CHECK-LABEL: hadd8_sext_lsr: 842; CHECK: // %bb.0: 843; CHECK-NEXT: shl.4h v0, v0, #8 844; CHECK-NEXT: shl.4h v1, v1, #8 845; CHECK-NEXT: sshr.4h v0, v0, #8 846; CHECK-NEXT: ssra.4h v0, v1, #8 847; CHECK-NEXT: ushr.4h v0, v0, #1 848; CHECK-NEXT: ret 849 %zextsrc1 = sext <4 x i8> %src1 to <4 x i16> 850 %zextsrc2 = sext <4 x i8> %src2 to <4 x i16> 851 %add = add nsw <4 x i16> %zextsrc1, %zextsrc2 852 %resulti8 = lshr <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1> 853 ret <4 x i16> %resulti8 854} 855 856define <4 x i16> @hadd8_zext_lsr(<4 x i8> %src1, <4 x i8> %src2) { 857; CHECK-LABEL: hadd8_zext_lsr: 858; CHECK: // %bb.0: 859; CHECK-NEXT: bic.4h v1, #255, lsl #8 860; CHECK-NEXT: bic.4h v0, #255, lsl #8 861; CHECK-NEXT: uhadd.4h v0, v0, v1 862; CHECK-NEXT: ret 863 %zextsrc1 = zext <4 x i8> %src1 to <4 x i16> 864 %zextsrc2 = zext <4 x i8> %src2 to <4 x i16> 865 %add = add nuw nsw <4 x i16> %zextsrc1, %zextsrc2 866 %resulti8 = lshr <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1> 867 ret <4 x i16> %resulti8 868} 869 870define <2 x i16> @hadd8x2_sext_asr(<2 x i8> %src1, <2 x i8> %src2) { 871; CHECK-LABEL: hadd8x2_sext_asr: 872; CHECK: // %bb.0: 873; CHECK-NEXT: shl.2s v1, v1, #24 874; CHECK-NEXT: shl.2s v0, v0, #24 875; CHECK-NEXT: sshr.2s v1, v1, #24 876; CHECK-NEXT: sshr.2s v0, v0, #24 877; CHECK-NEXT: shadd.2s v0, v0, v1 878; CHECK-NEXT: ret 879 %zextsrc1 = sext <2 x i8> %src1 to <2 x i16> 880 %zextsrc2 = sext <2 x i8> %src2 to <2 x i16> 881 %add = add nsw <2 x i16> %zextsrc1, %zextsrc2 882 %resulti8 = ashr <2 x i16> %add, <i16 1, i16 1> 883 ret <2 x i16> %resulti8 884} 885 886define <2 x i16> @hadd8x2_zext_asr(<2 x i8> %src1, <2 x i8> %src2) { 887; CHECK-LABEL: hadd8x2_zext_asr: 888; CHECK: // %bb.0: 889; CHECK-NEXT: movi d2, #0x0000ff000000ff 890; CHECK-NEXT: and.8b v1, v1, v2 891; CHECK-NEXT: and.8b v0, v0, v2 892; CHECK-NEXT: uhadd.2s v0, v0, v1 893; CHECK-NEXT: ret 894 %zextsrc1 = zext <2 x i8> %src1 to <2 x i16> 895 %zextsrc2 = zext <2 x i8> %src2 to <2 x i16> 896 %add = add nuw nsw <2 x i16> %zextsrc1, %zextsrc2 897 %resulti8 = lshr <2 x i16> %add, <i16 1, i16 1> 898 ret <2 x i16> %resulti8 899} 900 901define <2 x i16> @hadd8x2_sext_lsr(<2 x i8> %src1, <2 x i8> %src2) { 902; CHECK-LABEL: hadd8x2_sext_lsr: 903; CHECK: // %bb.0: 904; CHECK-NEXT: shl.2s v0, v0, #24 905; CHECK-NEXT: shl.2s v1, v1, #24 906; CHECK-NEXT: movi d2, #0x00ffff0000ffff 907; CHECK-NEXT: sshr.2s v0, v0, #24 908; CHECK-NEXT: ssra.2s v0, v1, #24 909; CHECK-NEXT: and.8b v0, v0, v2 910; CHECK-NEXT: ushr.2s v0, v0, #1 911; CHECK-NEXT: ret 912 %zextsrc1 = sext <2 x i8> %src1 to <2 x i16> 913 %zextsrc2 = sext <2 x i8> %src2 to <2 x i16> 914 %add = add nsw <2 x i16> %zextsrc1, %zextsrc2 915 %resulti8 = lshr <2 x i16> %add, <i16 1, i16 1> 916 ret <2 x i16> %resulti8 917} 918 919define <2 x i16> @hadd8x2_zext_lsr(<2 x i8> %src1, <2 x i8> %src2) { 920; CHECK-LABEL: hadd8x2_zext_lsr: 921; CHECK: // %bb.0: 922; CHECK-NEXT: movi d2, #0x0000ff000000ff 923; CHECK-NEXT: and.8b v1, v1, v2 924; CHECK-NEXT: and.8b v0, v0, v2 925; CHECK-NEXT: uhadd.2s v0, v0, v1 926; CHECK-NEXT: ret 927 %zextsrc1 = zext <2 x i8> %src1 to <2 x i16> 928 %zextsrc2 = zext <2 x i8> %src2 to <2 x i16> 929 %add = add nuw nsw <2 x i16> %zextsrc1, %zextsrc2 930 %resulti8 = lshr <2 x i16> %add, <i16 1, i16 1> 931 ret <2 x i16> %resulti8 932} 933 934define <4 x i16> @rhadd8_sext_asr(<4 x i8> %src1, <4 x i8> %src2) { 935; CHECK-LABEL: rhadd8_sext_asr: 936; CHECK: // %bb.0: 937; CHECK-NEXT: shl.4h v1, v1, #8 938; CHECK-NEXT: shl.4h v0, v0, #8 939; CHECK-NEXT: sshr.4h v1, v1, #8 940; CHECK-NEXT: sshr.4h v0, v0, #8 941; CHECK-NEXT: srhadd.4h v0, v0, v1 942; CHECK-NEXT: ret 943 %zextsrc1 = sext <4 x i8> %src1 to <4 x i16> 944 %zextsrc2 = sext <4 x i8> %src2 to <4 x i16> 945 %add = add nsw <4 x i16> %zextsrc1, %zextsrc2 946 %add2 = add nsw <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1> 947 %resulti8 = ashr <4 x i16> %add2, <i16 1, i16 1, i16 1, i16 1> 948 ret <4 x i16> %resulti8 949} 950 951define <4 x i16> @rhadd8_zext_asr(<4 x i8> %src1, <4 x i8> %src2) { 952; CHECK-LABEL: rhadd8_zext_asr: 953; CHECK: // %bb.0: 954; CHECK-NEXT: bic.4h v1, #255, lsl #8 955; CHECK-NEXT: bic.4h v0, #255, lsl #8 956; CHECK-NEXT: urhadd.4h v0, v0, v1 957; CHECK-NEXT: ret 958 %zextsrc1 = zext <4 x i8> %src1 to <4 x i16> 959 %zextsrc2 = zext <4 x i8> %src2 to <4 x i16> 960 %add = add nuw nsw <4 x i16> %zextsrc1, %zextsrc2 961 %add2 = add nuw nsw <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1> 962 %resulti8 = lshr <4 x i16> %add2, <i16 1, i16 1, i16 1, i16 1> 963 ret <4 x i16> %resulti8 964} 965 966define <4 x i16> @rhadd8_sext_lsr(<4 x i8> %src1, <4 x i8> %src2) { 967; CHECK-LABEL: rhadd8_sext_lsr: 968; CHECK: // %bb.0: 969; CHECK-NEXT: shl.4h v0, v0, #8 970; CHECK-NEXT: shl.4h v1, v1, #8 971; CHECK-NEXT: movi.4h v2, #1 972; CHECK-NEXT: sshr.4h v0, v0, #8 973; CHECK-NEXT: ssra.4h v0, v1, #8 974; CHECK-NEXT: add.4h v0, v0, v2 975; CHECK-NEXT: ushr.4h v0, v0, #1 976; CHECK-NEXT: ret 977 %zextsrc1 = sext <4 x i8> %src1 to <4 x i16> 978 %zextsrc2 = sext <4 x i8> %src2 to <4 x i16> 979 %add = add nsw <4 x i16> %zextsrc1, %zextsrc2 980 %add2 = add nsw <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1> 981 %resulti8 = lshr <4 x i16> %add2, <i16 1, i16 1, i16 1, i16 1> 982 ret <4 x i16> %resulti8 983} 984 985define <4 x i16> @rhadd8_zext_lsr(<4 x i8> %src1, <4 x i8> %src2) { 986; CHECK-LABEL: rhadd8_zext_lsr: 987; CHECK: // %bb.0: 988; CHECK-NEXT: bic.4h v1, #255, lsl #8 989; CHECK-NEXT: bic.4h v0, #255, lsl #8 990; CHECK-NEXT: urhadd.4h v0, v0, v1 991; CHECK-NEXT: ret 992 %zextsrc1 = zext <4 x i8> %src1 to <4 x i16> 993 %zextsrc2 = zext <4 x i8> %src2 to <4 x i16> 994 %add = add nuw nsw <4 x i16> %zextsrc1, %zextsrc2 995 %add2 = add nuw nsw <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1> 996 %resulti8 = lshr <4 x i16> %add2, <i16 1, i16 1, i16 1, i16 1> 997 ret <4 x i16> %resulti8 998} 999 1000define <2 x i16> @rhadd8x2_sext_asr(<2 x i8> %src1, <2 x i8> %src2) { 1001; CHECK-LABEL: rhadd8x2_sext_asr: 1002; CHECK: // %bb.0: 1003; CHECK-NEXT: shl.2s v1, v1, #24 1004; CHECK-NEXT: shl.2s v0, v0, #24 1005; CHECK-NEXT: sshr.2s v1, v1, #24 1006; CHECK-NEXT: sshr.2s v0, v0, #24 1007; CHECK-NEXT: srhadd.2s v0, v0, v1 1008; CHECK-NEXT: ret 1009 %zextsrc1 = sext <2 x i8> %src1 to <2 x i16> 1010 %zextsrc2 = sext <2 x i8> %src2 to <2 x i16> 1011 %add = add nsw <2 x i16> %zextsrc1, %zextsrc2 1012 %add2 = add nsw <2 x i16> %add, <i16 1, i16 1> 1013 %resulti8 = ashr <2 x i16> %add2, <i16 1, i16 1> 1014 ret <2 x i16> %resulti8 1015} 1016 1017define <2 x i16> @rhadd8x2_zext_asr(<2 x i8> %src1, <2 x i8> %src2) { 1018; CHECK-LABEL: rhadd8x2_zext_asr: 1019; CHECK: // %bb.0: 1020; CHECK-NEXT: movi d2, #0x0000ff000000ff 1021; CHECK-NEXT: and.8b v1, v1, v2 1022; CHECK-NEXT: and.8b v0, v0, v2 1023; CHECK-NEXT: urhadd.2s v0, v0, v1 1024; CHECK-NEXT: ret 1025 %zextsrc1 = zext <2 x i8> %src1 to <2 x i16> 1026 %zextsrc2 = zext <2 x i8> %src2 to <2 x i16> 1027 %add = add nuw nsw <2 x i16> %zextsrc1, %zextsrc2 1028 %add2 = add nuw nsw <2 x i16> %add, <i16 1, i16 1> 1029 %resulti8 = lshr <2 x i16> %add2, <i16 1, i16 1> 1030 ret <2 x i16> %resulti8 1031} 1032 1033define <2 x i16> @rhadd8x2_sext_lsr(<2 x i8> %src1, <2 x i8> %src2) { 1034; CHECK-LABEL: rhadd8x2_sext_lsr: 1035; CHECK: // %bb.0: 1036; CHECK-NEXT: shl.2s v0, v0, #24 1037; CHECK-NEXT: shl.2s v1, v1, #24 1038; CHECK-NEXT: movi d2, #0x00ffff0000ffff 1039; CHECK-NEXT: sshr.2s v0, v0, #24 1040; CHECK-NEXT: sshr.2s v1, v1, #24 1041; CHECK-NEXT: mvn.8b v0, v0 1042; CHECK-NEXT: sub.2s v0, v1, v0 1043; CHECK-NEXT: and.8b v0, v0, v2 1044; CHECK-NEXT: ushr.2s v0, v0, #1 1045; CHECK-NEXT: ret 1046 %zextsrc1 = sext <2 x i8> %src1 to <2 x i16> 1047 %zextsrc2 = sext <2 x i8> %src2 to <2 x i16> 1048 %add = add nsw <2 x i16> %zextsrc1, %zextsrc2 1049 %add2 = add nsw <2 x i16> %add, <i16 1, i16 1> 1050 %resulti8 = lshr <2 x i16> %add2, <i16 1, i16 1> 1051 ret <2 x i16> %resulti8 1052} 1053 1054define <2 x i16> @rhadd8x2_zext_lsr(<2 x i8> %src1, <2 x i8> %src2) { 1055; CHECK-LABEL: rhadd8x2_zext_lsr: 1056; CHECK: // %bb.0: 1057; CHECK-NEXT: movi d2, #0x0000ff000000ff 1058; CHECK-NEXT: and.8b v1, v1, v2 1059; CHECK-NEXT: and.8b v0, v0, v2 1060; CHECK-NEXT: urhadd.2s v0, v0, v1 1061; CHECK-NEXT: ret 1062 %zextsrc1 = zext <2 x i8> %src1 to <2 x i16> 1063 %zextsrc2 = zext <2 x i8> %src2 to <2 x i16> 1064 %add = add nuw nsw <2 x i16> %zextsrc1, %zextsrc2 1065 %add2 = add nuw nsw <2 x i16> %add, <i16 1, i16 1> 1066 %resulti8 = lshr <2 x i16> %add2, <i16 1, i16 1> 1067 ret <2 x i16> %resulti8 1068} 1069 1070 1071define void @testLowerToSHADD8b_c(<8 x i8> %src1, ptr nocapture writeonly %dest) { 1072; CHECK-LABEL: testLowerToSHADD8b_c: 1073; CHECK: // %bb.0: 1074; CHECK-NEXT: movi.8b v1, #10 1075; CHECK-NEXT: shadd.8b v0, v0, v1 1076; CHECK-NEXT: str d0, [x0] 1077; CHECK-NEXT: ret 1078 %sextsrc1 = sext <8 x i8> %src1 to <8 x i16> 1079 %add = add nsw <8 x i16> %sextsrc1, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10> 1080 %resulti16 = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1081 %result = trunc <8 x i16> %resulti16 to <8 x i8> 1082 store <8 x i8> %result, ptr %dest, align 8 1083 ret void 1084} 1085 1086define void @testLowerToSHADD4h_c(<4 x i16> %src1, ptr nocapture writeonly %dest) { 1087; CHECK-LABEL: testLowerToSHADD4h_c: 1088; CHECK: // %bb.0: 1089; CHECK-NEXT: movi.4h v1, #10 1090; CHECK-NEXT: shadd.4h v0, v0, v1 1091; CHECK-NEXT: str d0, [x0] 1092; CHECK-NEXT: ret 1093 %sextsrc1 = sext <4 x i16> %src1 to <4 x i32> 1094 %add = add nsw <4 x i32> %sextsrc1, <i32 10, i32 10, i32 10, i32 10> 1095 %resulti16 = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1> 1096 %result = trunc <4 x i32> %resulti16 to <4 x i16> 1097 store <4 x i16> %result, ptr %dest, align 8 1098 ret void 1099} 1100 1101define void @testLowerToSHADD2s_c(<2 x i32> %src1, ptr nocapture writeonly %dest) { 1102; CHECK-LABEL: testLowerToSHADD2s_c: 1103; CHECK: // %bb.0: 1104; CHECK-NEXT: movi.2s v1, #10 1105; CHECK-NEXT: shadd.2s v0, v0, v1 1106; CHECK-NEXT: str d0, [x0] 1107; CHECK-NEXT: ret 1108 %sextsrc1 = sext <2 x i32> %src1 to <2 x i64> 1109 %add = add nsw <2 x i64> %sextsrc1, <i64 10, i64 10> 1110 %resulti16 = lshr <2 x i64> %add, <i64 1, i64 1> 1111 %result = trunc <2 x i64> %resulti16 to <2 x i32> 1112 store <2 x i32> %result, ptr %dest, align 8 1113 ret void 1114} 1115 1116define void @testLowerToSHADD16b_c(<16 x i8> %src1, ptr nocapture writeonly %dest) { 1117; CHECK-LABEL: testLowerToSHADD16b_c: 1118; CHECK: // %bb.0: 1119; CHECK-NEXT: movi.16b v1, #10 1120; CHECK-NEXT: shadd.16b v0, v0, v1 1121; CHECK-NEXT: str q0, [x0] 1122; CHECK-NEXT: ret 1123 %sextsrc1 = sext <16 x i8> %src1 to <16 x i16> 1124 %add = add nsw <16 x i16> %sextsrc1, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10> 1125 %resulti16 = lshr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1126 %result = trunc <16 x i16> %resulti16 to <16 x i8> 1127 store <16 x i8> %result, ptr %dest, align 16 1128 ret void 1129} 1130 1131define void @testLowerToSHADD8h_c(<8 x i16> %src1, ptr nocapture writeonly %dest) { 1132; CHECK-LABEL: testLowerToSHADD8h_c: 1133; CHECK: // %bb.0: 1134; CHECK-NEXT: movi.8h v1, #10 1135; CHECK-NEXT: shadd.8h v0, v0, v1 1136; CHECK-NEXT: str q0, [x0] 1137; CHECK-NEXT: ret 1138 %sextsrc1 = sext <8 x i16> %src1 to <8 x i32> 1139 %add = add nsw <8 x i32> %sextsrc1, <i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10> 1140 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 1141 %result = trunc <8 x i32> %resulti16 to <8 x i16> 1142 store <8 x i16> %result, ptr %dest, align 16 1143 ret void 1144} 1145 1146define void @testLowerToSHADD4s_c(<4 x i32> %src1, ptr nocapture writeonly %dest) { 1147; CHECK-LABEL: testLowerToSHADD4s_c: 1148; CHECK: // %bb.0: 1149; CHECK-NEXT: movi.4s v1, #10 1150; CHECK-NEXT: shadd.4s v0, v0, v1 1151; CHECK-NEXT: str q0, [x0] 1152; CHECK-NEXT: ret 1153 %sextsrc1 = sext <4 x i32> %src1 to <4 x i64> 1154 %add = add nsw <4 x i64> %sextsrc1, <i64 10, i64 10, i64 10, i64 10> 1155 %resulti16 = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1> 1156 %result = trunc <4 x i64> %resulti16 to <4 x i32> 1157 store <4 x i32> %result, ptr %dest, align 16 1158 ret void 1159} 1160 1161define void @testLowerToUHADD8b_c(<8 x i8> %src1, ptr nocapture writeonly %dest) { 1162; CHECK-LABEL: testLowerToUHADD8b_c: 1163; CHECK: // %bb.0: 1164; CHECK-NEXT: movi.8b v1, #10 1165; CHECK-NEXT: uhadd.8b v0, v0, v1 1166; CHECK-NEXT: str d0, [x0] 1167; CHECK-NEXT: ret 1168 %zextsrc1 = zext <8 x i8> %src1 to <8 x i16> 1169 %add = add nuw nsw <8 x i16> %zextsrc1, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10> 1170 %resulti16 = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1171 %result = trunc <8 x i16> %resulti16 to <8 x i8> 1172 store <8 x i8> %result, ptr %dest, align 8 1173 ret void 1174} 1175 1176define void @testLowerToUHADD4h_c(<4 x i16> %src1, ptr nocapture writeonly %dest) { 1177; CHECK-LABEL: testLowerToUHADD4h_c: 1178; CHECK: // %bb.0: 1179; CHECK-NEXT: movi.4h v1, #10 1180; CHECK-NEXT: uhadd.4h v0, v0, v1 1181; CHECK-NEXT: str d0, [x0] 1182; CHECK-NEXT: ret 1183 %zextsrc1 = zext <4 x i16> %src1 to <4 x i32> 1184 %add = add nuw nsw <4 x i32> %zextsrc1, <i32 10, i32 10, i32 10, i32 10> 1185 %resulti16 = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1> 1186 %result = trunc <4 x i32> %resulti16 to <4 x i16> 1187 store <4 x i16> %result, ptr %dest, align 8 1188 ret void 1189} 1190 1191define void @testLowerToUHADD2s_c(<2 x i32> %src1, ptr nocapture writeonly %dest) { 1192; CHECK-LABEL: testLowerToUHADD2s_c: 1193; CHECK: // %bb.0: 1194; CHECK-NEXT: movi.2s v1, #10 1195; CHECK-NEXT: uhadd.2s v0, v0, v1 1196; CHECK-NEXT: str d0, [x0] 1197; CHECK-NEXT: ret 1198 %zextsrc1 = zext <2 x i32> %src1 to <2 x i64> 1199 %add = add nuw nsw <2 x i64> %zextsrc1, <i64 10, i64 10> 1200 %resulti16 = lshr <2 x i64> %add, <i64 1, i64 1> 1201 %result = trunc <2 x i64> %resulti16 to <2 x i32> 1202 store <2 x i32> %result, ptr %dest, align 8 1203 ret void 1204} 1205 1206define void @testLowerToUHADD16b_c(<16 x i8> %src1, ptr nocapture writeonly %dest) { 1207; CHECK-LABEL: testLowerToUHADD16b_c: 1208; CHECK: // %bb.0: 1209; CHECK-NEXT: movi.16b v1, #10 1210; CHECK-NEXT: uhadd.16b v0, v0, v1 1211; CHECK-NEXT: str q0, [x0] 1212; CHECK-NEXT: ret 1213 %zextsrc1 = zext <16 x i8> %src1 to <16 x i16> 1214 %add = add nuw nsw <16 x i16> %zextsrc1, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10> 1215 %resulti16 = lshr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1216 %result = trunc <16 x i16> %resulti16 to <16 x i8> 1217 store <16 x i8> %result, ptr %dest, align 16 1218 ret void 1219} 1220 1221define void @testLowerToUHADD8h_c(<8 x i16> %src1, ptr nocapture writeonly %dest) { 1222; CHECK-LABEL: testLowerToUHADD8h_c: 1223; CHECK: // %bb.0: 1224; CHECK-NEXT: movi.8h v1, #10 1225; CHECK-NEXT: uhadd.8h v0, v0, v1 1226; CHECK-NEXT: str q0, [x0] 1227; CHECK-NEXT: ret 1228 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32> 1229 %add = add nuw nsw <8 x i32> %zextsrc1, <i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10> 1230 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 1231 %result = trunc <8 x i32> %resulti16 to <8 x i16> 1232 store <8 x i16> %result, ptr %dest, align 16 1233 ret void 1234} 1235 1236define void @testLowerToUHADD4s_c(<4 x i32> %src1, ptr nocapture writeonly %dest) { 1237; CHECK-LABEL: testLowerToUHADD4s_c: 1238; CHECK: // %bb.0: 1239; CHECK-NEXT: movi.4s v1, #10 1240; CHECK-NEXT: uhadd.4s v0, v0, v1 1241; CHECK-NEXT: str q0, [x0] 1242; CHECK-NEXT: ret 1243 %zextsrc1 = zext <4 x i32> %src1 to <4 x i64> 1244 %add = add nuw nsw <4 x i64> %zextsrc1, <i64 10, i64 10, i64 10, i64 10> 1245 %resulti16 = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1> 1246 %result = trunc <4 x i64> %resulti16 to <4 x i32> 1247 store <4 x i32> %result, ptr %dest, align 16 1248 ret void 1249} 1250 1251define <8 x i8> @andmaskv8i8(<8 x i16> %src1, <8 x i8> %src2) { 1252; CHECK-LABEL: andmaskv8i8: 1253; CHECK: // %bb.0: 1254; CHECK-NEXT: movi.8b v2, #7 1255; CHECK-NEXT: xtn.8b v0, v0 1256; CHECK-NEXT: and.8b v0, v0, v2 1257; CHECK-NEXT: uhadd.8b v0, v0, v1 1258; CHECK-NEXT: ret 1259 %zextsrc1 = and <8 x i16> %src1, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7> 1260 %zextsrc2 = zext <8 x i8> %src2 to <8 x i16> 1261 %add = add nuw nsw <8 x i16> %zextsrc1, %zextsrc2 1262 %resulti16 = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1263 %result = trunc <8 x i16> %resulti16 to <8 x i8> 1264 ret <8 x i8> %result 1265} 1266 1267define <16 x i8> @andmaskv16i8(<16 x i16> %src1, <16 x i8> %src2) { 1268; CHECK-LABEL: andmaskv16i8: 1269; CHECK: // %bb.0: 1270; CHECK-NEXT: movi.16b v3, #7 1271; CHECK-NEXT: uzp1.16b v0, v0, v1 1272; CHECK-NEXT: and.16b v0, v0, v3 1273; CHECK-NEXT: uhadd.16b v0, v0, v2 1274; CHECK-NEXT: ret 1275 %zextsrc1 = and <16 x i16> %src1, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7> 1276 %zextsrc2 = zext <16 x i8> %src2 to <16 x i16> 1277 %add = add nuw nsw <16 x i16> %zextsrc1, %zextsrc2 1278 %resulti16 = lshr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1279 %result = trunc <16 x i16> %resulti16 to <16 x i8> 1280 ret <16 x i8> %result 1281} 1282 1283define <16 x i8> @andmask2v16i8(<16 x i16> %src1, <16 x i16> %src2) { 1284; CHECK-LABEL: andmask2v16i8: 1285; CHECK: // %bb.0: 1286; CHECK-NEXT: uzp1.16b v2, v2, v3 1287; CHECK-NEXT: movi.16b v3, #3 1288; CHECK-NEXT: uzp1.16b v0, v0, v1 1289; CHECK-NEXT: movi.16b v1, #7 1290; CHECK-NEXT: and.16b v2, v2, v3 1291; CHECK-NEXT: and.16b v0, v0, v1 1292; CHECK-NEXT: uhadd.16b v0, v0, v2 1293; CHECK-NEXT: ret 1294 %zextsrc1 = and <16 x i16> %src1, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7> 1295 %zextsrc2 = and <16 x i16> %src2, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 1296 %add = add nuw nsw <16 x i16> %zextsrc1, %zextsrc2 1297 %resulti16 = lshr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1298 %result = trunc <16 x i16> %resulti16 to <16 x i8> 1299 ret <16 x i8> %result 1300} 1301 1302define <8 x i8> @andmask2v8i8(<8 x i16> %src1, <8 x i16> %src2) { 1303; CHECK-LABEL: andmask2v8i8: 1304; CHECK: // %bb.0: 1305; CHECK-NEXT: movi.8b v2, #7 1306; CHECK-NEXT: xtn.8b v0, v0 1307; CHECK-NEXT: xtn.8b v1, v1 1308; CHECK-NEXT: and.8b v0, v0, v2 1309; CHECK-NEXT: uhadd.8b v0, v0, v1 1310; CHECK-NEXT: ret 1311 %zextsrc1 = and <8 x i16> %src1, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7> 1312 %zextsrc2 = and <8 x i16> %src2, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255> 1313 %add = add nuw nsw <8 x i16> %zextsrc1, %zextsrc2 1314 %resulti16 = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1315 %result = trunc <8 x i16> %resulti16 to <8 x i8> 1316 ret <8 x i8> %result 1317} 1318 1319define <8 x i16> @andmask3v8i8(<8 x i16> %src1, <8 x i16> %src2) { 1320; CHECK-LABEL: andmask3v8i8: 1321; CHECK: // %bb.0: 1322; CHECK-NEXT: movi.8h v2, #7 1323; CHECK-NEXT: bic.8h v1, #254, lsl #8 1324; CHECK-NEXT: and.16b v0, v0, v2 1325; CHECK-NEXT: uhadd.8h v0, v0, v1 1326; CHECK-NEXT: ret 1327 %zextsrc1 = and <8 x i16> %src1, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7> 1328 %zextsrc2 = and <8 x i16> %src2, <i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511> 1329 %add = add nuw nsw <8 x i16> %zextsrc1, %zextsrc2 1330 %resulti16 = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1331 ret <8 x i16> %resulti16 1332} 1333 1334define <16 x i8> @sextmaskv16i8(<16 x i16> %src1, <16 x i8> %src2) { 1335; CHECK-LABEL: sextmaskv16i8: 1336; CHECK: // %bb.0: 1337; CHECK-NEXT: sshr.8h v1, v1, #11 1338; CHECK-NEXT: sshr.8h v0, v0, #11 1339; CHECK-NEXT: uzp1.16b v0, v0, v1 1340; CHECK-NEXT: shadd.16b v0, v0, v2 1341; CHECK-NEXT: ret 1342 %sextsrc1 = ashr <16 x i16> %src1, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11> 1343 %sextsrc2 = sext <16 x i8> %src2 to <16 x i16> 1344 %add = add nsw <16 x i16> %sextsrc1, %sextsrc2 1345 %1 = ashr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1346 %result = trunc <16 x i16> %1 to <16 x i8> 1347 ret <16 x i8> %result 1348} 1349 1350define <8 x i8> @sextmaskv8i8(<8 x i16> %src1, <8 x i8> %src2) { 1351; CHECK-LABEL: sextmaskv8i8: 1352; CHECK: // %bb.0: 1353; CHECK-NEXT: sshr.8h v0, v0, #11 1354; CHECK-NEXT: xtn.8b v0, v0 1355; CHECK-NEXT: shadd.8b v0, v0, v1 1356; CHECK-NEXT: ret 1357 %sextsrc1 = ashr <8 x i16> %src1, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11> 1358 %sextsrc2 = sext <8 x i8> %src2 to <8 x i16> 1359 %add = add nsw <8 x i16> %sextsrc1, %sextsrc2 1360 %1 = ashr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1361 %result = trunc <8 x i16> %1 to <8 x i8> 1362 ret <8 x i8> %result 1363} 1364 1365define <8 x i8> @sextmask2v8i8(<8 x i16> %src1, <8 x i8> %src2) { 1366; CHECK-LABEL: sextmask2v8i8: 1367; CHECK: // %bb.0: 1368; CHECK-NEXT: shrn.8b v0, v0, #8 1369; CHECK-NEXT: shadd.8b v0, v0, v1 1370; CHECK-NEXT: ret 1371 %sextsrc1 = ashr <8 x i16> %src1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 1372 %sextsrc2 = sext <8 x i8> %src2 to <8 x i16> 1373 %add = add nsw <8 x i16> %sextsrc1, %sextsrc2 1374 %1 = ashr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1375 %result = trunc <8 x i16> %1 to <8 x i8> 1376 ret <8 x i8> %result 1377} 1378 1379define <8 x i8> @sextmask3v8i8(<8 x i16> %src1, <8 x i8> %src2) { 1380; CHECK-LABEL: sextmask3v8i8: 1381; CHECK: // %bb.0: 1382; CHECK-NEXT: ushr.8h v0, v0, #7 1383; CHECK-NEXT: sshll.8h v1, v1, #0 1384; CHECK-NEXT: shadd.8h v0, v0, v1 1385; CHECK-NEXT: xtn.8b v0, v0 1386; CHECK-NEXT: ret 1387 %1 = ashr <8 x i16> %src1, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7> 1388 %sextsrc2 = sext <8 x i8> %src2 to <8 x i16> 1389 %add = add nsw <8 x i16> %1, %sextsrc2 1390 %2 = ashr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1391 %result = trunc <8 x i16> %2 to <8 x i8> 1392 ret <8 x i8> %result 1393} 1394 1395define <4 x i16> @ext_via_i19(<4 x i16> %a) { 1396; CHECK-LABEL: ext_via_i19: 1397; CHECK: // %bb.0: 1398; CHECK-NEXT: movi.4h v1, #1 1399; CHECK-NEXT: urhadd.4h v0, v0, v1 1400; CHECK-NEXT: ret 1401 %t3 = zext <4 x i16> %a to <4 x i32> 1402 %t4 = add <4 x i32> %t3, <i32 1, i32 1, i32 1, i32 1> 1403 %t5 = trunc <4 x i32> %t4 to <4 x i19> 1404 %new0 = add <4 x i19> %t5, <i19 1, i19 1, i19 1, i19 1> 1405 %new1 = lshr <4 x i19> %new0, <i19 1, i19 1, i19 1, i19 1> 1406 %last = zext <4 x i19> %new1 to <4 x i32> 1407 %t6 = trunc <4 x i32> %last to <4 x i16> 1408 ret <4 x i16> %t6 1409} 1410 1411declare <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8>, <8 x i8>) 1412declare <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16>, <4 x i16>) 1413declare <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32>, <2 x i32>) 1414declare <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8>, <8 x i8>) 1415declare <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16>, <4 x i16>) 1416declare <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32>, <2 x i32>) 1417declare <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8>, <16 x i8>) 1418declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>) 1419declare <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32>, <4 x i32>) 1420declare <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8>, <16 x i8>) 1421declare <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>) 1422declare <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32>, <4 x i32>) 1423