1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=aarch64 | FileCheck %s 3 4define <8 x i16> @abdu_base(<8 x i16> %src1, <8 x i16> %src2) { 5; CHECK-LABEL: abdu_base: 6; CHECK: // %bb.0: 7; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h 8; CHECK-NEXT: ret 9 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32> 10 %zextsrc2 = zext <8 x i16> %src2 to <8 x i32> 11 %sub = sub <8 x i32> %zextsrc1, %zextsrc2 12 %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0) 13 %result = trunc <8 x i32> %abs to <8 x i16> 14 ret <8 x i16> %result 15} 16 17define <8 x i16> @abdu_const(<8 x i16> %src1) { 18; CHECK-LABEL: abdu_const: 19; CHECK: // %bb.0: 20; CHECK-NEXT: movi v1.4s, #1 21; CHECK-NEXT: ushll2 v2.4s, v0.8h, #0 22; CHECK-NEXT: ushll v0.4s, v0.4h, #0 23; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s 24; CHECK-NEXT: sub v1.4s, v2.4s, v1.4s 25; CHECK-NEXT: abs v1.4s, v1.4s 26; CHECK-NEXT: abs v0.4s, v0.4s 27; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h 28; CHECK-NEXT: ret 29 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32> 30 %sub = sub <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 31 %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0) 32 %result = trunc <8 x i32> %abs to <8 x i16> 33 ret <8 x i16> %result 34} 35 36define <8 x i16> @abdu_const_lhs(<8 x i16> %src1) { 37; CHECK-LABEL: abdu_const_lhs: 38; CHECK: // %bb.0: 39; CHECK-NEXT: movi v1.4s, #1 40; CHECK-NEXT: usubw v2.4s, v1.4s, v0.4h 41; CHECK-NEXT: usubw2 v0.4s, v1.4s, v0.8h 42; CHECK-NEXT: abs v0.4s, v0.4s 43; CHECK-NEXT: abs v1.4s, v2.4s 44; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h 45; CHECK-NEXT: ret 46 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32> 47 %sub = sub <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %zextsrc1 48 %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0) 49 %result = trunc <8 x i32> %abs to <8 x i16> 50 ret <8 x i16> %result 51} 52 53define <8 x i16> @abdu_const_zero(<8 x i16> %src1) { 54; CHECK-LABEL: abdu_const_zero: 55; CHECK: // %bb.0: 56; CHECK-NEXT: movi v1.2d, #0000000000000000 57; CHECK-NEXT: ushll v2.4s, v0.4h, #0 58; CHECK-NEXT: usubw2 v0.4s, v1.4s, v0.8h 59; CHECK-NEXT: neg v1.4s, v2.4s 60; CHECK-NEXT: abs v0.4s, v0.4s 61; CHECK-NEXT: abs v1.4s, v1.4s 62; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h 63; CHECK-NEXT: ret 64 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32> 65 %sub = sub <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1 66 %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0) 67 %result = trunc <8 x i32> %abs to <8 x i16> 68 ret <8 x i16> %result 69} 70 71define <8 x i16> @abdu_const_both() { 72; CHECK-LABEL: abdu_const_both: 73; CHECK: // %bb.0: 74; CHECK-NEXT: movi v0.8h, #2 75; CHECK-NEXT: ret 76 %sub = sub <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 77 %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0) 78 %result = trunc <8 x i32> %abs to <8 x i16> 79 ret <8 x i16> %result 80} 81 82define <8 x i16> @abdu_const_bothhigh() { 83; CHECK-LABEL: abdu_const_bothhigh: 84; CHECK: // %bb.0: 85; CHECK-NEXT: movi v0.8h, #1 86; CHECK-NEXT: ret 87 %zextsrc1 = zext <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534> to <8 x i32> 88 %zextsrc2 = zext <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535> to <8 x i32> 89 %sub = sub <8 x i32> %zextsrc1, %zextsrc2 90 %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0) 91 %result = trunc <8 x i32> %abs to <8 x i16> 92 ret <8 x i16> %result 93} 94 95define <8 x i16> @abdu_undef(<8 x i16> %src1) { 96; CHECK-LABEL: abdu_undef: 97; CHECK: // %bb.0: 98; CHECK-NEXT: ret 99 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32> 100 %zextsrc2 = zext <8 x i16> undef to <8 x i32> 101 %sub = sub <8 x i32> %zextsrc1, %zextsrc2 102 %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0) 103 %result = trunc <8 x i32> %abs to <8 x i16> 104 ret <8 x i16> %result 105} 106 107define <8 x i16> @abdu_ugt(<8 x i16>, <8 x i16>) { 108; CHECK-LABEL: abdu_ugt: 109; CHECK: // %bb.0: 110; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h 111; CHECK-NEXT: ret 112 %3 = icmp ugt <8 x i16> %0, %1 113 %4 = sub <8 x i16> %0, %1 114 %5 = sub <8 x i16> %1, %0 115 %6 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> %5 116 ret <8 x i16> %6 117} 118 119define <8 x i16> @abdu_uge(<8 x i16>, <8 x i16>) { 120; CHECK-LABEL: abdu_uge: 121; CHECK: // %bb.0: 122; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h 123; CHECK-NEXT: ret 124 %3 = icmp uge <8 x i16> %0, %1 125 %4 = sub <8 x i16> %0, %1 126 %5 = sub <8 x i16> %1, %0 127 %6 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> %5 128 ret <8 x i16> %6 129} 130 131define <8 x i16> @abdu_ult(<8 x i16>, <8 x i16>) { 132; CHECK-LABEL: abdu_ult: 133; CHECK: // %bb.0: 134; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h 135; CHECK-NEXT: ret 136 %3 = icmp ult <8 x i16> %0, %1 137 %4 = sub <8 x i16> %0, %1 138 %5 = sub <8 x i16> %1, %0 139 %6 = select <8 x i1> %3, <8 x i16> %5, <8 x i16> %4 140 ret <8 x i16> %6 141} 142 143define <8 x i16> @abdu_ule(<8 x i16>, <8 x i16>) { 144; CHECK-LABEL: abdu_ule: 145; CHECK: // %bb.0: 146; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h 147; CHECK-NEXT: ret 148 %3 = icmp ule <8 x i16> %0, %1 149 %4 = sub <8 x i16> %0, %1 150 %5 = sub <8 x i16> %1, %0 151 %6 = select <8 x i1> %3, <8 x i16> %5, <8 x i16> %4 152 ret <8 x i16> %6 153} 154 155define <8 x i16> @abds_sgt(<8 x i16>, <8 x i16>) { 156; CHECK-LABEL: abds_sgt: 157; CHECK: // %bb.0: 158; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h 159; CHECK-NEXT: ret 160 %3 = icmp sgt <8 x i16> %0, %1 161 %4 = sub <8 x i16> %0, %1 162 %5 = sub <8 x i16> %1, %0 163 %6 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> %5 164 ret <8 x i16> %6 165} 166 167define <8 x i16> @abds_sge(<8 x i16>, <8 x i16>) { 168; CHECK-LABEL: abds_sge: 169; CHECK: // %bb.0: 170; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h 171; CHECK-NEXT: ret 172 %3 = icmp sge <8 x i16> %0, %1 173 %4 = sub <8 x i16> %0, %1 174 %5 = sub <8 x i16> %1, %0 175 %6 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> %5 176 ret <8 x i16> %6 177} 178 179define <8 x i16> @abds_slt(<8 x i16>, <8 x i16>) { 180; CHECK-LABEL: abds_slt: 181; CHECK: // %bb.0: 182; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h 183; CHECK-NEXT: ret 184 %3 = icmp slt <8 x i16> %0, %1 185 %4 = sub <8 x i16> %0, %1 186 %5 = sub <8 x i16> %1, %0 187 %6 = select <8 x i1> %3, <8 x i16> %5, <8 x i16> %4 188 ret <8 x i16> %6 189} 190 191define <8 x i16> @abds_sle(<8 x i16>, <8 x i16>) { 192; CHECK-LABEL: abds_sle: 193; CHECK: // %bb.0: 194; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h 195; CHECK-NEXT: ret 196 %3 = icmp sle <8 x i16> %0, %1 197 %4 = sub <8 x i16> %0, %1 198 %5 = sub <8 x i16> %1, %0 199 %6 = select <8 x i1> %3, <8 x i16> %5, <8 x i16> %4 200 ret <8 x i16> %6 201} 202 203 204define <8 x i16> @abdu_i_base(<8 x i16> %src1, <8 x i16> %src2) { 205; CHECK-LABEL: abdu_i_base: 206; CHECK: // %bb.0: 207; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h 208; CHECK-NEXT: ret 209 %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %src1, <8 x i16> %src2) 210 ret <8 x i16> %result 211} 212 213define <8 x i16> @abdu_i_const(<8 x i16> %src1) { 214; CHECK-LABEL: abdu_i_const: 215; CHECK: // %bb.0: 216; CHECK-NEXT: movi v1.8h, #1 217; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h 218; CHECK-NEXT: ret 219 %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %src1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 220 ret <8 x i16> %result 221} 222 223define <8 x i16> @abdu_i_const_lhs(<8 x i16> %src1) { 224; CHECK-LABEL: abdu_i_const_lhs: 225; CHECK: // %bb.0: 226; CHECK-NEXT: movi v1.8h, #1 227; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h 228; CHECK-NEXT: ret 229 %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %src1) 230 ret <8 x i16> %result 231} 232 233define <8 x i16> @abdu_i_const_zero(float %t, <8 x i16> %src1) { 234; CHECK-LABEL: abdu_i_const_zero: 235; CHECK: // %bb.0: 236; CHECK-NEXT: mov v0.16b, v1.16b 237; CHECK-NEXT: ret 238 %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1) 239 ret <8 x i16> %result 240} 241 242define <8 x i16> @abdu_i_const_both() { 243; CHECK-LABEL: abdu_i_const_both: 244; CHECK: // %bb.0: 245; CHECK-NEXT: movi v0.8h, #2 246; CHECK-NEXT: ret 247 %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 248 ret <8 x i16> %result 249} 250 251define <8 x i16> @abdu_i_const_bothhigh() { 252; CHECK-LABEL: abdu_i_const_bothhigh: 253; CHECK: // %bb.0: 254; CHECK-NEXT: movi v0.8h, #1 255; CHECK-NEXT: ret 256 %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>, <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535>) 257 ret <8 x i16> %result 258} 259 260define <8 x i16> @abdu_i_const_onehigh() { 261; CHECK-LABEL: abdu_i_const_onehigh: 262; CHECK: // %bb.0: 263; CHECK-NEXT: mov w8, #32765 // =0x7ffd 264; CHECK-NEXT: dup v0.8h, w8 265; CHECK-NEXT: ret 266 %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 267 ret <8 x i16> %result 268} 269 270define <8 x i16> @abdu_i_const_oneneg() { 271; CHECK-LABEL: abdu_i_const_oneneg: 272; CHECK: // %bb.0: 273; CHECK-NEXT: movi v0.8h, #128, lsl #8 274; CHECK-NEXT: ret 275 %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>) 276 ret <8 x i16> %result 277} 278 279define <8 x i16> @abdu_i_zero(<8 x i16> %t, <8 x i16> %src1) { 280; CHECK-LABEL: abdu_i_zero: 281; CHECK: // %bb.0: 282; CHECK-NEXT: mov v0.16b, v1.16b 283; CHECK-NEXT: ret 284 %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1) 285 ret <8 x i16> %result 286} 287 288define <8 x i16> @abdu_i_undef(<8 x i16> %t, <8 x i16> %src1) { 289; CHECK-LABEL: abdu_i_undef: 290; CHECK: // %bb.0: 291; CHECK-NEXT: movi v0.2d, #0000000000000000 292; CHECK-NEXT: ret 293 %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> undef, <8 x i16> %src1) 294 ret <8 x i16> %result 295} 296 297define <8 x i16> @abdu_i_reassoc(<8 x i16> %src1) { 298; CHECK-LABEL: abdu_i_reassoc: 299; CHECK: // %bb.0: 300; CHECK-NEXT: movi v1.8h, #3 301; CHECK-NEXT: movi v2.8h, #1 302; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h 303; CHECK-NEXT: uabd v0.8h, v0.8h, v2.8h 304; CHECK-NEXT: ret 305 %r1 = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %src1, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>) 306 %result = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %r1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 307 ret <8 x i16> %result 308} 309 310 311 312 313 314define <8 x i16> @abds_base(<8 x i16> %src1, <8 x i16> %src2) { 315; CHECK-LABEL: abds_base: 316; CHECK: // %bb.0: 317; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h 318; CHECK-NEXT: ret 319 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32> 320 %zextsrc2 = sext <8 x i16> %src2 to <8 x i32> 321 %sub = sub <8 x i32> %zextsrc1, %zextsrc2 322 %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0) 323 %result = trunc <8 x i32> %abs to <8 x i16> 324 ret <8 x i16> %result 325} 326 327define <8 x i16> @abds_const(<8 x i16> %src1) { 328; CHECK-LABEL: abds_const: 329; CHECK: // %bb.0: 330; CHECK-NEXT: movi v1.4s, #1 331; CHECK-NEXT: sshll2 v2.4s, v0.8h, #0 332; CHECK-NEXT: sshll v0.4s, v0.4h, #0 333; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s 334; CHECK-NEXT: sub v1.4s, v2.4s, v1.4s 335; CHECK-NEXT: abs v1.4s, v1.4s 336; CHECK-NEXT: abs v0.4s, v0.4s 337; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h 338; CHECK-NEXT: ret 339 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32> 340 %sub = sub <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 341 %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0) 342 %result = trunc <8 x i32> %abs to <8 x i16> 343 ret <8 x i16> %result 344} 345 346define <8 x i16> @abds_const_lhs(<8 x i16> %src1) { 347; CHECK-LABEL: abds_const_lhs: 348; CHECK: // %bb.0: 349; CHECK-NEXT: movi v1.4s, #1 350; CHECK-NEXT: ssubw v2.4s, v1.4s, v0.4h 351; CHECK-NEXT: ssubw2 v0.4s, v1.4s, v0.8h 352; CHECK-NEXT: abs v0.4s, v0.4s 353; CHECK-NEXT: abs v1.4s, v2.4s 354; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h 355; CHECK-NEXT: ret 356 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32> 357 %sub = sub <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %zextsrc1 358 %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0) 359 %result = trunc <8 x i32> %abs to <8 x i16> 360 ret <8 x i16> %result 361} 362 363define <8 x i16> @abds_const_zero(<8 x i16> %src1) { 364; CHECK-LABEL: abds_const_zero: 365; CHECK: // %bb.0: 366; CHECK-NEXT: movi v1.2d, #0000000000000000 367; CHECK-NEXT: sshll v2.4s, v0.4h, #0 368; CHECK-NEXT: ssubw2 v0.4s, v1.4s, v0.8h 369; CHECK-NEXT: neg v1.4s, v2.4s 370; CHECK-NEXT: abs v0.4s, v0.4s 371; CHECK-NEXT: abs v1.4s, v1.4s 372; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h 373; CHECK-NEXT: ret 374 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32> 375 %sub = sub <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1 376 %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0) 377 %result = trunc <8 x i32> %abs to <8 x i16> 378 ret <8 x i16> %result 379} 380 381define <8 x i16> @abds_const_both() { 382; CHECK-LABEL: abds_const_both: 383; CHECK: // %bb.0: 384; CHECK-NEXT: movi v0.8h, #2 385; CHECK-NEXT: ret 386 %sub = sub <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 387 %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0) 388 %result = trunc <8 x i32> %abs to <8 x i16> 389 ret <8 x i16> %result 390} 391 392define <8 x i16> @abds_const_bothhigh() { 393; CHECK-LABEL: abds_const_bothhigh: 394; CHECK: // %bb.0: 395; CHECK-NEXT: movi v0.8h, #1 396; CHECK-NEXT: ret 397 %zextsrc1 = sext <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534> to <8 x i32> 398 %zextsrc2 = sext <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535> to <8 x i32> 399 %sub = sub <8 x i32> %zextsrc1, %zextsrc2 400 %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0) 401 %result = trunc <8 x i32> %abs to <8 x i16> 402 ret <8 x i16> %result 403} 404 405define <8 x i16> @abds_undef(<8 x i16> %src1) { 406; CHECK-LABEL: abds_undef: 407; CHECK: // %bb.0: 408; CHECK-NEXT: sshll v1.4s, v0.4h, #0 409; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0 410; CHECK-NEXT: abs v0.4s, v0.4s 411; CHECK-NEXT: abs v1.4s, v1.4s 412; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h 413; CHECK-NEXT: ret 414 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32> 415 %zextsrc2 = sext <8 x i16> undef to <8 x i32> 416 %sub = sub <8 x i32> %zextsrc1, %zextsrc2 417 %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 0) 418 %result = trunc <8 x i32> %abs to <8 x i16> 419 ret <8 x i16> %result 420} 421 422 423 424define <8 x i16> @abds_i_base(<8 x i16> %src1, <8 x i16> %src2) { 425; CHECK-LABEL: abds_i_base: 426; CHECK: // %bb.0: 427; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h 428; CHECK-NEXT: ret 429 %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %src1, <8 x i16> %src2) 430 ret <8 x i16> %result 431} 432 433define <8 x i16> @abds_i_const(<8 x i16> %src1) { 434; CHECK-LABEL: abds_i_const: 435; CHECK: // %bb.0: 436; CHECK-NEXT: movi v1.8h, #1 437; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h 438; CHECK-NEXT: ret 439 %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %src1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 440 ret <8 x i16> %result 441} 442 443define <8 x i16> @abds_i_const_lhs(<8 x i16> %src1) { 444; CHECK-LABEL: abds_i_const_lhs: 445; CHECK: // %bb.0: 446; CHECK-NEXT: movi v1.8h, #1 447; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h 448; CHECK-NEXT: ret 449 %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %src1) 450 ret <8 x i16> %result 451} 452 453define <8 x i16> @abds_i_const_zero(<8 x i16> %src1) { 454; CHECK-LABEL: abds_i_const_zero: 455; CHECK: // %bb.0: 456; CHECK-NEXT: abs v0.8h, v0.8h 457; CHECK-NEXT: ret 458 %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1) 459 ret <8 x i16> %result 460} 461 462define <8 x i16> @abds_i_const_both() { 463; CHECK-LABEL: abds_i_const_both: 464; CHECK: // %bb.0: 465; CHECK-NEXT: movi v0.8h, #2 466; CHECK-NEXT: ret 467 %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 468 ret <8 x i16> %result 469} 470 471define <8 x i16> @abds_i_const_bothhigh() { 472; CHECK-LABEL: abds_i_const_bothhigh: 473; CHECK: // %bb.0: 474; CHECK-NEXT: movi v0.8h, #1 475; CHECK-NEXT: ret 476 %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767>) 477 ret <8 x i16> %result 478} 479 480define <8 x i16> @abds_i_const_onehigh() { 481; CHECK-LABEL: abds_i_const_onehigh: 482; CHECK: // %bb.0: 483; CHECK-NEXT: mov w8, #32765 // =0x7ffd 484; CHECK-NEXT: dup v0.8h, w8 485; CHECK-NEXT: ret 486 %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 487 ret <8 x i16> %result 488} 489 490define <8 x i16> @abds_i_const_oneneg() { 491; CHECK-LABEL: abds_i_const_oneneg: 492; CHECK: // %bb.0: 493; CHECK-NEXT: movi v0.8h, #128, lsl #8 494; CHECK-NEXT: ret 495 %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>, <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>) 496 ret <8 x i16> %result 497} 498 499define <8 x i16> @abds_i_zero(<8 x i16> %t, <8 x i16> %src1) { 500; CHECK-LABEL: abds_i_zero: 501; CHECK: // %bb.0: 502; CHECK-NEXT: abs v0.8h, v1.8h 503; CHECK-NEXT: ret 504 %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %src1) 505 ret <8 x i16> %result 506} 507 508define <8 x i16> @abds_i_undef(<8 x i16> %t, <8 x i16> %src1) { 509; CHECK-LABEL: abds_i_undef: 510; CHECK: // %bb.0: 511; CHECK-NEXT: movi v0.2d, #0000000000000000 512; CHECK-NEXT: ret 513 %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> undef, <8 x i16> %src1) 514 ret <8 x i16> %result 515} 516 517define <8 x i16> @abds_i_reassoc(<8 x i16> %src1) { 518; CHECK-LABEL: abds_i_reassoc: 519; CHECK: // %bb.0: 520; CHECK-NEXT: movi v1.8h, #3 521; CHECK-NEXT: movi v2.8h, #1 522; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h 523; CHECK-NEXT: sabd v0.8h, v0.8h, v2.8h 524; CHECK-NEXT: ret 525 %r1 = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %src1, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>) 526 %result = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %r1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 527 ret <8 x i16> %result 528} 529 530define <1 x i64> @recursive() { 531; CHECK-LABEL: recursive: 532; CHECK: // %bb.0: 533; CHECK-NEXT: movi v0.8b, #254 534; CHECK-NEXT: ushll v1.8h, v0.8b, #0 535; CHECK-NEXT: dup v0.8b, v0.b[0] 536; CHECK-NEXT: saddlp v1.1d, v1.2s 537; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b 538; CHECK-NEXT: ret 539 %1 = tail call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> zeroinitializer, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 540 %2 = tail call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) 541 %3 = zext <8 x i8> %2 to <8 x i16> 542 %4 = bitcast <8 x i16> %3 to <4 x i32> 543 %5 = shufflevector <4 x i32> %4, <4 x i32> zeroinitializer, <2 x i32> <i32 0, i32 1> 544 %6 = shufflevector <8 x i8> %2, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 545 %7 = bitcast <16 x i8> %6 to <2 x i64> 546 %8 = shufflevector <2 x i64> %7, <2 x i64> zeroinitializer, <1 x i32> zeroinitializer 547 %9 = tail call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32> %5) 548 %10 = or <1 x i64> %8, %9 549 ret <1 x i64> %10 550} 551 552declare <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8>, <8 x i8>) 553declare <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32>) 554declare <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8>, <8 x i8>) 555declare <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16>, <8 x i16>) 556declare <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16>, <8 x i16>) 557declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1) 558