1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=aarch64 | FileCheck %s 3 4define <8 x i16> @haddu_base(<8 x i16> %src1, <8 x i16> %src2) { 5; CHECK-LABEL: haddu_base: 6; CHECK: // %bb.0: 7; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h 8; CHECK-NEXT: ret 9 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32> 10 %zextsrc2 = zext <8 x i16> %src2 to <8 x i32> 11 %add = add <8 x i32> %zextsrc1, %zextsrc2 12 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 13 %result = trunc <8 x i32> %resulti16 to <8 x i16> 14 ret <8 x i16> %result 15} 16 17define <8 x i16> @haddu_const(<8 x i16> %src1) { 18; CHECK-LABEL: haddu_const: 19; CHECK: // %bb.0: 20; CHECK-NEXT: movi v1.8h, #1 21; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h 22; CHECK-NEXT: ret 23 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32> 24 %add = add <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 25 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 26 %result = trunc <8 x i32> %resulti16 to <8 x i16> 27 ret <8 x i16> %result 28} 29 30define <8 x i16> @haddu_const_lhs(<8 x i16> %src1) { 31; CHECK-LABEL: haddu_const_lhs: 32; CHECK: // %bb.0: 33; CHECK-NEXT: movi v1.8h, #1 34; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h 35; CHECK-NEXT: ret 36 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32> 37 %add = add <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %zextsrc1 38 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 39 %result = trunc <8 x i32> %resulti16 to <8 x i16> 40 ret <8 x i16> %result 41} 42 43define <8 x i16> @haddu_const_zero(<8 x i16> %src1) { 44; CHECK-LABEL: haddu_const_zero: 45; CHECK: // %bb.0: 46; CHECK-NEXT: ushll v1.4s, v0.4h, #0 47; CHECK-NEXT: ushll2 v2.4s, v0.8h, #0 48; CHECK-NEXT: shrn v0.4h, v1.4s, #1 49; CHECK-NEXT: shrn2 v0.8h, v2.4s, #1 50; CHECK-NEXT: ret 51 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32> 52 %add = add <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1 53 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 54 %result = trunc <8 x i32> %resulti16 to <8 x i16> 55 ret <8 x i16> %result 56} 57 58define <8 x i16> @haddu_const_both() { 59; CHECK-LABEL: haddu_const_both: 60; CHECK: // %bb.0: 61; CHECK-NEXT: movi v0.8h, #2 62; CHECK-NEXT: ret 63 %add = add <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 64 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 65 %result = trunc <8 x i32> %resulti16 to <8 x i16> 66 ret <8 x i16> %result 67} 68 69define <8 x i16> @haddu_const_bothhigh() { 70; CHECK-LABEL: haddu_const_bothhigh: 71; CHECK: // %bb.0: 72; CHECK-NEXT: mvni v0.8h, #1 73; CHECK-NEXT: ret 74 %ext1 = zext <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534> to <8 x i32> 75 %ext2 = zext <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535> to <8 x i32> 76 %add = add <8 x i32> %ext1, %ext2 77 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 78 %result = trunc <8 x i32> %resulti16 to <8 x i16> 79 ret <8 x i16> %result 80} 81 82define <8 x i16> @haddu_undef(<8 x i16> %src1) { 83; CHECK-LABEL: haddu_undef: 84; CHECK: // %bb.0: 85; CHECK-NEXT: ushll v1.4s, v0.4h, #0 86; CHECK-NEXT: ushll2 v2.4s, v0.8h, #0 87; CHECK-NEXT: shrn v0.4h, v1.4s, #1 88; CHECK-NEXT: shrn2 v0.8h, v2.4s, #1 89; CHECK-NEXT: ret 90 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32> 91 %zextsrc2 = zext <8 x i16> undef to <8 x i32> 92 %add = add <8 x i32> %zextsrc2, %zextsrc1 93 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 94 %result = trunc <8 x i32> %resulti16 to <8 x i16> 95 ret <8 x i16> %result 96} 97 98 99 100define <8 x i16> @haddu_i_base(<8 x i16> %src1, <8 x i16> %src2) { 101; CHECK-LABEL: haddu_i_base: 102; CHECK: // %bb.0: 103; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h 104; CHECK-NEXT: ret 105 %result = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %src1, <8 x i16> %src2) 106 ret <8 x i16> %result 107} 108 109define <8 x i16> @haddu_i_const(<8 x i16> %src1) { 110; CHECK-LABEL: haddu_i_const: 111; CHECK: // %bb.0: 112; CHECK-NEXT: movi v1.8h, #1 113; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h 114; CHECK-NEXT: ret 115 %result = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %src1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 116 ret <8 x i16> %result 117} 118 119define <8 x i16> @haddu_i_const_lhs(<8 x i16> %src1) { 120; CHECK-LABEL: haddu_i_const_lhs: 121; CHECK: // %bb.0: 122; CHECK-NEXT: movi v1.8h, #1 123; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h 124; CHECK-NEXT: ret 125 %result = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %src1) 126 ret <8 x i16> %result 127} 128 129define <8 x i16> @haddu_i_const_zero(<8 x i16> %src1) { 130; CHECK-LABEL: haddu_i_const_zero: 131; CHECK: // %bb.0: 132; CHECK-NEXT: ushr v0.8h, v0.8h, #1 133; CHECK-NEXT: ret 134 %result = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1) 135 ret <8 x i16> %result 136} 137 138define <8 x i16> @haddu_i_const_both() { 139; CHECK-LABEL: haddu_i_const_both: 140; CHECK: // %bb.0: 141; CHECK-NEXT: movi v0.8h, #2 142; CHECK-NEXT: ret 143 %result = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 144 ret <8 x i16> %result 145} 146 147define <8 x i16> @haddu_i_const_bothhigh() { 148; CHECK-LABEL: haddu_i_const_bothhigh: 149; CHECK: // %bb.0: 150; CHECK-NEXT: mvni v0.8h, #1 151; CHECK-NEXT: ret 152 %result = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>, <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535>) 153 ret <8 x i16> %result 154} 155 156define <8 x i16> @haddu_i_undef(<8 x i16> %t, <8 x i16> %src1) { 157; CHECK-LABEL: haddu_i_undef: 158; CHECK: // %bb.0: 159; CHECK-NEXT: mov v0.16b, v1.16b 160; CHECK-NEXT: ret 161 %result = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> undef, <8 x i16> %src1) 162 ret <8 x i16> %result 163} 164 165 166 167 168 169define <8 x i16> @hadds_base(<8 x i16> %src1, <8 x i16> %src2) { 170; CHECK-LABEL: hadds_base: 171; CHECK: // %bb.0: 172; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h 173; CHECK-NEXT: ret 174 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32> 175 %zextsrc2 = sext <8 x i16> %src2 to <8 x i32> 176 %add = add <8 x i32> %zextsrc1, %zextsrc2 177 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 178 %result = trunc <8 x i32> %resulti16 to <8 x i16> 179 ret <8 x i16> %result 180} 181 182define <8 x i16> @hadds_const(<8 x i16> %src1) { 183; CHECK-LABEL: hadds_const: 184; CHECK: // %bb.0: 185; CHECK-NEXT: movi v1.8h, #1 186; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h 187; CHECK-NEXT: ret 188 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32> 189 %add = add <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 190 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 191 %result = trunc <8 x i32> %resulti16 to <8 x i16> 192 ret <8 x i16> %result 193} 194 195define <8 x i16> @hadds_const_lhs(<8 x i16> %src1) { 196; CHECK-LABEL: hadds_const_lhs: 197; CHECK: // %bb.0: 198; CHECK-NEXT: movi v1.8h, #1 199; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h 200; CHECK-NEXT: ret 201 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32> 202 %add = add <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %zextsrc1 203 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 204 %result = trunc <8 x i32> %resulti16 to <8 x i16> 205 ret <8 x i16> %result 206} 207 208define <8 x i16> @hadds_const_zero(<8 x i16> %src1) { 209; CHECK-LABEL: hadds_const_zero: 210; CHECK: // %bb.0: 211; CHECK-NEXT: sshll v1.4s, v0.4h, #0 212; CHECK-NEXT: sshll2 v2.4s, v0.8h, #0 213; CHECK-NEXT: shrn v0.4h, v1.4s, #1 214; CHECK-NEXT: shrn2 v0.8h, v2.4s, #1 215; CHECK-NEXT: ret 216 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32> 217 %add = add <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1 218 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 219 %result = trunc <8 x i32> %resulti16 to <8 x i16> 220 ret <8 x i16> %result 221} 222 223define <8 x i16> @hadds_const_both() { 224; CHECK-LABEL: hadds_const_both: 225; CHECK: // %bb.0: 226; CHECK-NEXT: movi v0.8h, #2 227; CHECK-NEXT: ret 228 %add = add <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 229 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 230 %result = trunc <8 x i32> %resulti16 to <8 x i16> 231 ret <8 x i16> %result 232} 233 234define <8 x i16> @hadds_const_bothhigh() { 235; CHECK-LABEL: hadds_const_bothhigh: 236; CHECK: // %bb.0: 237; CHECK-NEXT: mov w8, #32766 // =0x7ffe 238; CHECK-NEXT: dup v0.8h, w8 239; CHECK-NEXT: ret 240 %ext1 = sext <8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766> to <8 x i32> 241 %ext2 = sext <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767> to <8 x i32> 242 %add = add <8 x i32> %ext1, %ext2 243 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 244 %result = trunc <8 x i32> %resulti16 to <8 x i16> 245 ret <8 x i16> %result 246} 247 248define <8 x i16> @hadds_undef(<8 x i16> %src1) { 249; CHECK-LABEL: hadds_undef: 250; CHECK: // %bb.0: 251; CHECK-NEXT: sshll v1.4s, v0.4h, #0 252; CHECK-NEXT: sshll2 v2.4s, v0.8h, #0 253; CHECK-NEXT: shrn v0.4h, v1.4s, #1 254; CHECK-NEXT: shrn2 v0.8h, v2.4s, #1 255; CHECK-NEXT: ret 256 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32> 257 %zextsrc2 = sext <8 x i16> undef to <8 x i32> 258 %add = add <8 x i32> %zextsrc2, %zextsrc1 259 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 260 %result = trunc <8 x i32> %resulti16 to <8 x i16> 261 ret <8 x i16> %result 262} 263 264 265 266define <8 x i16> @hadds_i_base(<8 x i16> %src1, <8 x i16> %src2) { 267; CHECK-LABEL: hadds_i_base: 268; CHECK: // %bb.0: 269; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h 270; CHECK-NEXT: ret 271 %result = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %src1, <8 x i16> %src2) 272 ret <8 x i16> %result 273} 274 275define <8 x i16> @hadds_i_const(<8 x i16> %src1) { 276; CHECK-LABEL: hadds_i_const: 277; CHECK: // %bb.0: 278; CHECK-NEXT: movi v1.8h, #1 279; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h 280; CHECK-NEXT: ret 281 %result = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %src1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 282 ret <8 x i16> %result 283} 284 285define <8 x i16> @hadds_i_const_lhs(<8 x i16> %src1) { 286; CHECK-LABEL: hadds_i_const_lhs: 287; CHECK: // %bb.0: 288; CHECK-NEXT: movi v1.8h, #1 289; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h 290; CHECK-NEXT: ret 291 %result = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %src1) 292 ret <8 x i16> %result 293} 294 295define <8 x i16> @hadds_i_const_zero(<8 x i16> %src1) { 296; CHECK-LABEL: hadds_i_const_zero: 297; CHECK: // %bb.0: 298; CHECK-NEXT: sshr v0.8h, v0.8h, #1 299; CHECK-NEXT: ret 300 %result = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1) 301 ret <8 x i16> %result 302} 303 304define <8 x i16> @hadds_i_const_both() { 305; CHECK-LABEL: hadds_i_const_both: 306; CHECK: // %bb.0: 307; CHECK-NEXT: movi v0.8h, #2 308; CHECK-NEXT: ret 309 %result = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 310 ret <8 x i16> %result 311} 312 313define <8 x i16> @hadds_i_const_bothhigh() { 314; CHECK-LABEL: hadds_i_const_bothhigh: 315; CHECK: // %bb.0: 316; CHECK-NEXT: mov w8, #32766 // =0x7ffe 317; CHECK-NEXT: dup v0.8h, w8 318; CHECK-NEXT: ret 319 %result = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767>) 320 ret <8 x i16> %result 321} 322 323define <8 x i16> @hadds_i_undef(<8 x i16> %t, <8 x i16> %src1) { 324; CHECK-LABEL: hadds_i_undef: 325; CHECK: // %bb.0: 326; CHECK-NEXT: mov v0.16b, v1.16b 327; CHECK-NEXT: ret 328 %result = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> undef, <8 x i16> %src1) 329 ret <8 x i16> %result 330} 331 332define <8 x i16> @sub_fixedwidth_v4i32(<8 x i16> %a0, <8 x i16> %a1) { 333; CHECK-LABEL: sub_fixedwidth_v4i32: 334; CHECK: // %bb.0: 335; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h 336; CHECK-NEXT: ret 337 %or = or <8 x i16> %a0, %a1 338 %xor = xor <8 x i16> %a0, %a1 339 %srl = lshr <8 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 340 %res = sub <8 x i16> %or, %srl 341 ret <8 x i16> %res 342} 343 344define <8 x i16> @srhadd_fixedwidth_v8i16(<8 x i16> %a0, <8 x i16> %a1) { 345; CHECK-LABEL: srhadd_fixedwidth_v8i16: 346; CHECK: // %bb.0: 347; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h 348; CHECK-NEXT: ret 349 %or = or <8 x i16> %a0, %a1 350 %xor = xor <8 x i16> %a0, %a1 351 %srl = ashr <8 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 352 %res = sub <8 x i16> %or, %srl 353 ret <8 x i16> %res 354} 355 356define <8 x i16> @rhaddu_base(<8 x i16> %src1, <8 x i16> %src2) { 357; CHECK-LABEL: rhaddu_base: 358; CHECK: // %bb.0: 359; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h 360; CHECK-NEXT: ret 361 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32> 362 %zextsrc2 = zext <8 x i16> %src2 to <8 x i32> 363 %add1 = add <8 x i32> %zextsrc1, %zextsrc2 364 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 365 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 366 %result = trunc <8 x i32> %resulti16 to <8 x i16> 367 ret <8 x i16> %result 368} 369 370define <8 x i16> @rhaddu_const(<8 x i16> %src1) { 371; CHECK-LABEL: rhaddu_const: 372; CHECK: // %bb.0: 373; CHECK-NEXT: movi v1.8h, #1 374; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h 375; CHECK-NEXT: ret 376 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32> 377 %add1 = add <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 378 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 379 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 380 %result = trunc <8 x i32> %resulti16 to <8 x i16> 381 ret <8 x i16> %result 382} 383 384define <8 x i16> @rhaddu_const_lhs(<8 x i16> %src1) { 385; CHECK-LABEL: rhaddu_const_lhs: 386; CHECK: // %bb.0: 387; CHECK-NEXT: movi v1.8h, #1 388; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h 389; CHECK-NEXT: ret 390 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32> 391 %add1 = add <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %zextsrc1 392 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 393 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 394 %result = trunc <8 x i32> %resulti16 to <8 x i16> 395 ret <8 x i16> %result 396} 397 398define <8 x i16> @rhaddu_const_zero(<8 x i16> %src1) { 399; CHECK-LABEL: rhaddu_const_zero: 400; CHECK: // %bb.0: 401; CHECK-NEXT: movi v1.8h, #1 402; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h 403; CHECK-NEXT: ret 404 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32> 405 %add1 = add <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1 406 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 407 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 408 %result = trunc <8 x i32> %resulti16 to <8 x i16> 409 ret <8 x i16> %result 410} 411 412define <8 x i16> @rhaddu_const_both() { 413; CHECK-LABEL: rhaddu_const_both: 414; CHECK: // %bb.0: 415; CHECK-NEXT: movi v0.8h, #2 416; CHECK-NEXT: ret 417 %add1 = add <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 418 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 419 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 420 %result = trunc <8 x i32> %resulti16 to <8 x i16> 421 ret <8 x i16> %result 422} 423 424define <8 x i16> @rhaddu_const_bothhigh() { 425; CHECK-LABEL: rhaddu_const_bothhigh: 426; CHECK: // %bb.0: 427; CHECK-NEXT: movi v0.2d, #0xffffffffffffffff 428; CHECK-NEXT: ret 429 %ext1 = zext <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534> to <8 x i32> 430 %ext2 = zext <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535> to <8 x i32> 431 %add1 = add <8 x i32> %ext1, %ext2 432 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 433 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 434 %result = trunc <8 x i32> %resulti16 to <8 x i16> 435 ret <8 x i16> %result 436} 437 438define <8 x i16> @rhaddu_undef(<8 x i16> %src1) { 439; CHECK-LABEL: rhaddu_undef: 440; CHECK: // %bb.0: 441; CHECK-NEXT: movi v1.8h, #1 442; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h 443; CHECK-NEXT: ret 444 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32> 445 %zextsrc2 = zext <8 x i16> undef to <8 x i32> 446 %add1 = add <8 x i32> %zextsrc2, %zextsrc1 447 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 448 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 449 %result = trunc <8 x i32> %resulti16 to <8 x i16> 450 ret <8 x i16> %result 451} 452 453 454 455define <8 x i16> @rhaddu_i_base(<8 x i16> %src1, <8 x i16> %src2) { 456; CHECK-LABEL: rhaddu_i_base: 457; CHECK: // %bb.0: 458; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h 459; CHECK-NEXT: ret 460 %result = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %src1, <8 x i16> %src2) 461 ret <8 x i16> %result 462} 463 464define <8 x i16> @rhaddu_i_const(<8 x i16> %src1) { 465; CHECK-LABEL: rhaddu_i_const: 466; CHECK: // %bb.0: 467; CHECK-NEXT: movi v1.8h, #1 468; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h 469; CHECK-NEXT: ret 470 %result = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %src1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 471 ret <8 x i16> %result 472} 473 474define <8 x i16> @rhaddu_i_const_lhs(<8 x i16> %src1) { 475; CHECK-LABEL: rhaddu_i_const_lhs: 476; CHECK: // %bb.0: 477; CHECK-NEXT: movi v1.8h, #1 478; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h 479; CHECK-NEXT: ret 480 %result = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %src1) 481 ret <8 x i16> %result 482} 483 484define <8 x i16> @rhaddu_i_const_zero(<8 x i16> %src1) { 485; CHECK-LABEL: rhaddu_i_const_zero: 486; CHECK: // %bb.0: 487; CHECK-NEXT: movi v1.2d, #0000000000000000 488; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h 489; CHECK-NEXT: ret 490 %result = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1) 491 ret <8 x i16> %result 492} 493 494define <8 x i16> @rhaddu_i_const_both() { 495; CHECK-LABEL: rhaddu_i_const_both: 496; CHECK: // %bb.0: 497; CHECK-NEXT: movi v0.8h, #2 498; CHECK-NEXT: ret 499 %result = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 500 ret <8 x i16> %result 501} 502 503define <8 x i16> @rhaddu_i_const_bothhigh() { 504; CHECK-LABEL: rhaddu_i_const_bothhigh: 505; CHECK: // %bb.0: 506; CHECK-NEXT: movi v0.2d, #0xffffffffffffffff 507; CHECK-NEXT: ret 508 %result = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>, <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535>) 509 ret <8 x i16> %result 510} 511 512define <8 x i16> @rhaddu_i_undef(<8 x i16> %t, <8 x i16> %src1) { 513; CHECK-LABEL: rhaddu_i_undef: 514; CHECK: // %bb.0: 515; CHECK-NEXT: mov v0.16b, v1.16b 516; CHECK-NEXT: ret 517 %result = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> undef, <8 x i16> %src1) 518 ret <8 x i16> %result 519} 520 521 522 523 524 525define <8 x i16> @rhadds_base(<8 x i16> %src1, <8 x i16> %src2) { 526; CHECK-LABEL: rhadds_base: 527; CHECK: // %bb.0: 528; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h 529; CHECK-NEXT: ret 530 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32> 531 %zextsrc2 = sext <8 x i16> %src2 to <8 x i32> 532 %add1 = add <8 x i32> %zextsrc1, %zextsrc2 533 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 534 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 535 %result = trunc <8 x i32> %resulti16 to <8 x i16> 536 ret <8 x i16> %result 537} 538 539define <8 x i16> @rhadds_const(<8 x i16> %src1) { 540; CHECK-LABEL: rhadds_const: 541; CHECK: // %bb.0: 542; CHECK-NEXT: movi v1.8h, #1 543; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h 544; CHECK-NEXT: ret 545 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32> 546 %add1 = add <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 547 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 548 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 549 %result = trunc <8 x i32> %resulti16 to <8 x i16> 550 ret <8 x i16> %result 551} 552 553define <8 x i16> @rhadds_const_lhs(<8 x i16> %src1) { 554; CHECK-LABEL: rhadds_const_lhs: 555; CHECK: // %bb.0: 556; CHECK-NEXT: movi v1.8h, #1 557; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h 558; CHECK-NEXT: ret 559 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32> 560 %add1 = add <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %zextsrc1 561 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 562 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 563 %result = trunc <8 x i32> %resulti16 to <8 x i16> 564 ret <8 x i16> %result 565} 566 567define <8 x i16> @rhadds_const_zero(<8 x i16> %src1) { 568; CHECK-LABEL: rhadds_const_zero: 569; CHECK: // %bb.0: 570; CHECK-NEXT: movi v1.8h, #1 571; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h 572; CHECK-NEXT: ret 573 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32> 574 %add1 = add <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1 575 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 576 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 577 %result = trunc <8 x i32> %resulti16 to <8 x i16> 578 ret <8 x i16> %result 579} 580 581define <8 x i16> @rhadds_const_both() { 582; CHECK-LABEL: rhadds_const_both: 583; CHECK: // %bb.0: 584; CHECK-NEXT: movi v0.8h, #2 585; CHECK-NEXT: ret 586 %add1 = add <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 587 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 588 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 589 %result = trunc <8 x i32> %resulti16 to <8 x i16> 590 ret <8 x i16> %result 591} 592 593define <8 x i16> @rhadds_const_bothhigh() { 594; CHECK-LABEL: rhadds_const_bothhigh: 595; CHECK: // %bb.0: 596; CHECK-NEXT: mvni v0.8h, #128, lsl #8 597; CHECK-NEXT: ret 598 %ext1 = sext <8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766> to <8 x i32> 599 %ext2 = sext <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767> to <8 x i32> 600 %add1 = add <8 x i32> %ext1, %ext2 601 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 602 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 603 %result = trunc <8 x i32> %resulti16 to <8 x i16> 604 ret <8 x i16> %result 605} 606 607define <8 x i16> @rhadds_undef(<8 x i16> %src1) { 608; CHECK-LABEL: rhadds_undef: 609; CHECK: // %bb.0: 610; CHECK-NEXT: movi v1.8h, #1 611; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h 612; CHECK-NEXT: ret 613 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32> 614 %zextsrc2 = sext <8 x i16> undef to <8 x i32> 615 %add1 = add <8 x i32> %zextsrc2, %zextsrc1 616 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 617 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 618 %result = trunc <8 x i32> %resulti16 to <8 x i16> 619 ret <8 x i16> %result 620} 621 622 623 624define <8 x i16> @rhadds_i_base(<8 x i16> %src1, <8 x i16> %src2) { 625; CHECK-LABEL: rhadds_i_base: 626; CHECK: // %bb.0: 627; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h 628; CHECK-NEXT: ret 629 %result = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %src1, <8 x i16> %src2) 630 ret <8 x i16> %result 631} 632 633define <8 x i16> @rhadds_i_const(<8 x i16> %src1) { 634; CHECK-LABEL: rhadds_i_const: 635; CHECK: // %bb.0: 636; CHECK-NEXT: movi v1.8h, #1 637; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h 638; CHECK-NEXT: ret 639 %result = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %src1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 640 ret <8 x i16> %result 641} 642 643define <8 x i16> @rhadds_i_const_lhs(<8 x i16> %src1) { 644; CHECK-LABEL: rhadds_i_const_lhs: 645; CHECK: // %bb.0: 646; CHECK-NEXT: movi v1.8h, #1 647; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h 648; CHECK-NEXT: ret 649 %result = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %src1) 650 ret <8 x i16> %result 651} 652 653define <8 x i16> @rhadds_i_const_zero(<8 x i16> %src1) { 654; CHECK-LABEL: rhadds_i_const_zero: 655; CHECK: // %bb.0: 656; CHECK-NEXT: movi v1.2d, #0000000000000000 657; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h 658; CHECK-NEXT: ret 659 %result = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1) 660 ret <8 x i16> %result 661} 662 663define <8 x i16> @rhadds_i_const_both() { 664; CHECK-LABEL: rhadds_i_const_both: 665; CHECK: // %bb.0: 666; CHECK-NEXT: movi v0.8h, #2 667; CHECK-NEXT: ret 668 %result = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 669 ret <8 x i16> %result 670} 671 672define <8 x i16> @rhadds_i_const_bothhigh() { 673; CHECK-LABEL: rhadds_i_const_bothhigh: 674; CHECK: // %bb.0: 675; CHECK-NEXT: mvni v0.8h, #128, lsl #8 676; CHECK-NEXT: ret 677 %result = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767>) 678 ret <8 x i16> %result 679} 680 681define <8 x i16> @rhadds_i_undef(<8 x i16> %t, <8 x i16> %src1) { 682; CHECK-LABEL: rhadds_i_undef: 683; CHECK: // %bb.0: 684; CHECK-NEXT: mov v0.16b, v1.16b 685; CHECK-NEXT: ret 686 %result = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> undef, <8 x i16> %src1) 687 ret <8 x i16> %result 688} 689 690 691define <8 x i8> @shadd_v8i8(<8 x i8> %x) { 692; CHECK-LABEL: shadd_v8i8: 693; CHECK: // %bb.0: 694; CHECK-NEXT: ret 695 %r = tail call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %x, <8 x i8> %x) 696 ret <8 x i8> %r 697} 698 699define <4 x i16> @shadd_v4i16(<4 x i16> %x) { 700; CHECK-LABEL: shadd_v4i16: 701; CHECK: // %bb.0: 702; CHECK-NEXT: ret 703 %r = tail call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> %x, <4 x i16> %x) 704 ret <4 x i16> %r 705} 706 707define <2 x i32> @shadd_v2i32(<2 x i32> %x) { 708; CHECK-LABEL: shadd_v2i32: 709; CHECK: // %bb.0: 710; CHECK-NEXT: ret 711 %r = tail call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> %x, <2 x i32> %x) 712 ret <2 x i32> %r 713} 714 715define <16 x i8> @shadd_v16i8(<16 x i8> %x) { 716; CHECK-LABEL: shadd_v16i8: 717; CHECK: // %bb.0: 718; CHECK-NEXT: ret 719 %r = tail call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> %x, <16 x i8> %x) 720 ret <16 x i8> %r 721} 722 723define <8 x i16> @shadd_v8i16(<8 x i16> %x) { 724; CHECK-LABEL: shadd_v8i16: 725; CHECK: // %bb.0: 726; CHECK-NEXT: ret 727 %r = tail call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %x, <8 x i16> %x) 728 ret <8 x i16> %r 729} 730 731define <4 x i32> @shadd_v4i32(<4 x i32> %x) { 732; CHECK-LABEL: shadd_v4i32: 733; CHECK: // %bb.0: 734; CHECK-NEXT: ret 735 %r = tail call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> %x, <4 x i32> %x) 736 ret <4 x i32> %r 737} 738 739define <8 x i8> @uhadd_v8i8(<8 x i8> %x) { 740; CHECK-LABEL: uhadd_v8i8: 741; CHECK: // %bb.0: 742; CHECK-NEXT: ret 743 %r = tail call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %x, <8 x i8> %x) 744 ret <8 x i8> %r 745} 746 747define <4 x i16> @uhadd_v4i16(<4 x i16> %x) { 748; CHECK-LABEL: uhadd_v4i16: 749; CHECK: // %bb.0: 750; CHECK-NEXT: ret 751 %r = tail call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> %x, <4 x i16> %x) 752 ret <4 x i16> %r 753} 754 755define <2 x i32> @uhadd_v2i32(<2 x i32> %x) { 756; CHECK-LABEL: uhadd_v2i32: 757; CHECK: // %bb.0: 758; CHECK-NEXT: ret 759 %r = tail call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> %x, <2 x i32> %x) 760 ret <2 x i32> %r 761} 762 763define <16 x i8> @uhadd_v16i8(<16 x i8> %x) { 764; CHECK-LABEL: uhadd_v16i8: 765; CHECK: // %bb.0: 766; CHECK-NEXT: ret 767 %r = tail call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> %x, <16 x i8> %x) 768 ret <16 x i8> %r 769} 770 771define <8 x i16> @uhadd_v8i16(<8 x i16> %x) { 772; CHECK-LABEL: uhadd_v8i16: 773; CHECK: // %bb.0: 774; CHECK-NEXT: ret 775 %r = tail call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %x, <8 x i16> %x) 776 ret <8 x i16> %r 777} 778 779define <4 x i32> @uhadd_v4i32(<4 x i32> %x) { 780; CHECK-LABEL: uhadd_v4i32: 781; CHECK: // %bb.0: 782; CHECK-NEXT: ret 783 %r = tail call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> %x, <4 x i32> %x) 784 ret <4 x i32> %r 785} 786define <8 x i8> @srhadd_v8i8(<8 x i8> %x) { 787; CHECK-LABEL: srhadd_v8i8: 788; CHECK: // %bb.0: 789; CHECK-NEXT: ret 790 %r = tail call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %x, <8 x i8> %x) 791 ret <8 x i8> %r 792} 793 794define <4 x i16> @srhadd_v4i16(<4 x i16> %x) { 795; CHECK-LABEL: srhadd_v4i16: 796; CHECK: // %bb.0: 797; CHECK-NEXT: ret 798 %r = tail call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> %x, <4 x i16> %x) 799 ret <4 x i16> %r 800} 801 802define <2 x i32> @srhadd_v2i32(<2 x i32> %x) { 803; CHECK-LABEL: srhadd_v2i32: 804; CHECK: // %bb.0: 805; CHECK-NEXT: ret 806 %r = tail call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> %x, <2 x i32> %x) 807 ret <2 x i32> %r 808} 809 810define <16 x i8> @srhadd_v16i8(<16 x i8> %x) { 811; CHECK-LABEL: srhadd_v16i8: 812; CHECK: // %bb.0: 813; CHECK-NEXT: ret 814 %r = tail call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> %x, <16 x i8> %x) 815 ret <16 x i8> %r 816} 817 818define <8 x i16> @srhadd_v8i16(<8 x i16> %x) { 819; CHECK-LABEL: srhadd_v8i16: 820; CHECK: // %bb.0: 821; CHECK-NEXT: ret 822 %r = tail call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %x, <8 x i16> %x) 823 ret <8 x i16> %r 824} 825 826define <4 x i32> @srhadd_v4i32(<4 x i32> %x) { 827; CHECK-LABEL: srhadd_v4i32: 828; CHECK: // %bb.0: 829; CHECK-NEXT: ret 830 %r = tail call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> %x, <4 x i32> %x) 831 ret <4 x i32> %r 832} 833 834define <8 x i8> @urhadd_v8i8(<8 x i8> %x) { 835; CHECK-LABEL: urhadd_v8i8: 836; CHECK: // %bb.0: 837; CHECK-NEXT: ret 838 %r = tail call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %x, <8 x i8> %x) 839 ret <8 x i8> %r 840} 841 842define <4 x i16> @urhadd_v4i16(<4 x i16> %x) { 843; CHECK-LABEL: urhadd_v4i16: 844; CHECK: // %bb.0: 845; CHECK-NEXT: ret 846 %r = tail call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> %x, <4 x i16> %x) 847 ret <4 x i16> %r 848} 849 850define <2 x i32> @urhadd_v2i32(<2 x i32> %x) { 851; CHECK-LABEL: urhadd_v2i32: 852; CHECK: // %bb.0: 853; CHECK-NEXT: ret 854 %r = tail call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> %x, <2 x i32> %x) 855 ret <2 x i32> %r 856} 857 858define <16 x i8> @urhadd_v16i8(<16 x i8> %x) { 859; CHECK-LABEL: urhadd_v16i8: 860; CHECK: // %bb.0: 861; CHECK-NEXT: ret 862 %r = tail call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> %x, <16 x i8> %x) 863 ret <16 x i8> %r 864} 865 866define <8 x i16> @urhadd_v8i16(<8 x i16> %x) { 867; CHECK-LABEL: urhadd_v8i16: 868; CHECK: // %bb.0: 869; CHECK-NEXT: ret 870 %r = tail call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %x, <8 x i16> %x) 871 ret <8 x i16> %r 872} 873 874define <4 x i32> @urhadd_v4i32(<4 x i32> %x) { 875; CHECK-LABEL: urhadd_v4i32: 876; CHECK: // %bb.0: 877; CHECK-NEXT: ret 878 %r = tail call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> %x, <4 x i32> %x) 879 ret <4 x i32> %r 880} 881 882define <8 x i16> @uhadd_fixedwidth_v4i32(<8 x i16> %a0, <8 x i16> %a1) { 883; CHECK-LABEL: uhadd_fixedwidth_v4i32: 884; CHECK: // %bb.0: 885; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h 886; CHECK-NEXT: ret 887 %and = and <8 x i16> %a0, %a1 888 %xor = xor <8 x i16> %a0, %a1 889 %srl = lshr <8 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 890 %res = add <8 x i16> %and, %srl 891 ret <8 x i16> %res 892} 893 894define <8 x i16> @shadd_fixedwidth_v8i16(<8 x i16> %a0, <8 x i16> %a1) { 895; CHECK-LABEL: shadd_fixedwidth_v8i16: 896; CHECK: // %bb.0: 897; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h 898; CHECK-NEXT: ret 899 %and = and <8 x i16> %a0, %a1 900 %xor = xor <8 x i16> %a0, %a1 901 %srl = ashr <8 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 902 %res = add <8 x i16> %and, %srl 903 ret <8 x i16> %res 904} 905 906define <8 x i16> @shadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) { 907; CHECK-LABEL: shadd_demandedelts: 908; CHECK: // %bb.0: 909; CHECK-NEXT: dup v0.8h, v0.h[0] 910; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h 911; CHECK-NEXT: dup v0.8h, v0.h[0] 912; CHECK-NEXT: ret 913 %s0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer 914 %op = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %s0, <8 x i16> %a1) 915 %r0 = shufflevector <8 x i16> %op, <8 x i16> undef, <8 x i32> zeroinitializer 916 ret <8 x i16> %r0 917} 918 919define <8 x i16> @srhadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) { 920; CHECK-LABEL: srhadd_demandedelts: 921; CHECK: // %bb.0: 922; CHECK-NEXT: dup v0.8h, v0.h[0] 923; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h 924; CHECK-NEXT: dup v0.8h, v0.h[0] 925; CHECK-NEXT: ret 926 %s0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer 927 %op = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %s0, <8 x i16> %a1) 928 %r0 = shufflevector <8 x i16> %op, <8 x i16> undef, <8 x i32> zeroinitializer 929 ret <8 x i16> %r0 930} 931 932define <8 x i16> @uhadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) { 933; CHECK-LABEL: uhadd_demandedelts: 934; CHECK: // %bb.0: 935; CHECK-NEXT: dup v0.8h, v0.h[0] 936; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h 937; CHECK-NEXT: dup v0.8h, v0.h[0] 938; CHECK-NEXT: ret 939 %s0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer 940 %op = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %s0, <8 x i16> %a1) 941 %r0 = shufflevector <8 x i16> %op, <8 x i16> undef, <8 x i32> zeroinitializer 942 ret <8 x i16> %r0 943} 944 945define <8 x i16> @urhadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) { 946; CHECK-LABEL: urhadd_demandedelts: 947; CHECK: // %bb.0: 948; CHECK-NEXT: dup v0.8h, v0.h[0] 949; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h 950; CHECK-NEXT: dup v0.8h, v0.h[0] 951; CHECK-NEXT: ret 952 %s0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer 953 %op = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %s0, <8 x i16> %a1) 954 %r0 = shufflevector <8 x i16> %op, <8 x i16> undef, <8 x i32> zeroinitializer 955 ret <8 x i16> %r0 956} 957 958; Remove unnecessary sign_extend_inreg after shadd 959define <2 x i32> @shadd_signbits_v2i32(<2 x i32> %a0, <2 x i32> %a1, ptr %p2) { 960; CHECK-LABEL: shadd_signbits_v2i32: 961; CHECK: // %bb.0: 962; CHECK-NEXT: sshr v0.2s, v0.2s, #17 963; CHECK-NEXT: sshr v1.2s, v1.2s, #17 964; CHECK-NEXT: shadd v0.2s, v0.2s, v1.2s 965; CHECK-NEXT: str d0, [x0] 966; CHECK-NEXT: ret 967 %x0 = ashr <2 x i32> %a0, <i32 17, i32 17> 968 %x1 = ashr <2 x i32> %a1, <i32 17, i32 17> 969 %m = and <2 x i32> %x0, %x1 970 %s = xor <2 x i32> %x0, %x1 971 %x = ashr <2 x i32> %s, <i32 1, i32 1> 972 %avg = add <2 x i32> %m, %x 973 %avg1 = shl <2 x i32> %avg, <i32 17, i32 17> 974 %avg2 = ashr <2 x i32> %avg1, <i32 17, i32 17> 975 store <2 x i32> %avg, ptr %p2 ; extra use 976 ret <2 x i32> %avg2 977} 978 979; Remove unnecessary sign_extend_inreg after srhadd 980define <2 x i32> @srhadd_signbits_v2i32(<2 x i32> %a0, <2 x i32> %a1, ptr %p2) { 981; CHECK-LABEL: srhadd_signbits_v2i32: 982; CHECK: // %bb.0: 983; CHECK-NEXT: sshr v0.2s, v0.2s, #17 984; CHECK-NEXT: sshr v1.2s, v1.2s, #17 985; CHECK-NEXT: srhadd v0.2s, v0.2s, v1.2s 986; CHECK-NEXT: str d0, [x0] 987; CHECK-NEXT: ret 988 %x0 = ashr <2 x i32> %a0, <i32 17, i32 17> 989 %x1 = ashr <2 x i32> %a1, <i32 17, i32 17> 990 %m = or <2 x i32> %x0, %x1 991 %s = xor <2 x i32> %x0, %x1 992 %x = ashr <2 x i32> %s, <i32 1, i32 1> 993 %avg = sub <2 x i32> %m, %x 994 %avg1 = shl <2 x i32> %avg, <i32 17, i32 17> 995 %avg2 = ashr <2 x i32> %avg1, <i32 17, i32 17> 996 store <2 x i32> %avg, ptr %p2 ; extra use 997 ret <2 x i32> %avg2 998} 999 1000; negative test - not enough signbits to remove sign_extend_inreg after srhadd 1001define <2 x i32> @srhadd_signbits_v2i32_negative(<2 x i32> %a0, <2 x i32> %a1, ptr %p2) { 1002; CHECK-LABEL: srhadd_signbits_v2i32_negative: 1003; CHECK: // %bb.0: 1004; CHECK-NEXT: sshr v0.2s, v0.2s, #17 1005; CHECK-NEXT: sshr v1.2s, v1.2s, #17 1006; CHECK-NEXT: srhadd v1.2s, v0.2s, v1.2s 1007; CHECK-NEXT: shl v0.2s, v1.2s, #22 1008; CHECK-NEXT: str d1, [x0] 1009; CHECK-NEXT: sshr v0.2s, v0.2s, #22 1010; CHECK-NEXT: ret 1011 %x0 = ashr <2 x i32> %a0, <i32 17, i32 17> 1012 %x1 = ashr <2 x i32> %a1, <i32 17, i32 17> 1013 %m = or <2 x i32> %x0, %x1 1014 %s = xor <2 x i32> %x0, %x1 1015 %x = ashr <2 x i32> %s, <i32 1, i32 1> 1016 %avg = sub <2 x i32> %m, %x 1017 %avg1 = shl <2 x i32> %avg, <i32 22, i32 22> 1018 %avg2 = ashr <2 x i32> %avg1, <i32 22, i32 22> 1019 store <2 x i32> %avg, ptr %p2 ; extra use 1020 ret <2 x i32> %avg2 1021} 1022 1023declare <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8>, <8 x i8>) 1024declare <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16>, <4 x i16>) 1025declare <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32>, <2 x i32>) 1026declare <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8>, <8 x i8>) 1027declare <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16>, <4 x i16>) 1028declare <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32>, <2 x i32>) 1029declare <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8>, <16 x i8>) 1030declare <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16>, <8 x i16>) 1031declare <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32>, <4 x i32>) 1032declare <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8>, <16 x i8>) 1033declare <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>) 1034declare <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32>, <4 x i32>) 1035 1036declare <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8>, <8 x i8>) 1037declare <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16>, <4 x i16>) 1038declare <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32>, <2 x i32>) 1039declare <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8>, <8 x i8>) 1040declare <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16>, <4 x i16>) 1041declare <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32>, <2 x i32>) 1042declare <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8>, <16 x i8>) 1043declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>) 1044declare <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32>, <4 x i32>) 1045declare <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8>, <16 x i8>) 1046declare <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>) 1047declare <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32>, <4 x i32>) 1048