1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s | FileCheck %s 3 4target triple = "aarch64-unknown-linux-gnu" 5 6; 7; SABD 8; 9 10define <8 x i8> @sabd_8b(<8 x i8> %a, <8 x i8> %b) #0 { 11; CHECK-LABEL: sabd_8b: 12; CHECK: // %bb.0: 13; CHECK-NEXT: sabd v0.8b, v0.8b, v1.8b 14; CHECK-NEXT: ret 15 %a.sext = sext <8 x i8> %a to <8 x i16> 16 %b.sext = sext <8 x i8> %b to <8 x i16> 17 %sub = sub <8 x i16> %a.sext, %b.sext 18 %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true) 19 %trunc = trunc <8 x i16> %abs to <8 x i8> 20 ret <8 x i8> %trunc 21} 22 23define <16 x i8> @sabd_16b(<16 x i8> %a, <16 x i8> %b) #0 { 24; CHECK-LABEL: sabd_16b: 25; CHECK: // %bb.0: 26; CHECK-NEXT: sabd v0.16b, v0.16b, v1.16b 27; CHECK-NEXT: ret 28 %a.sext = sext <16 x i8> %a to <16 x i16> 29 %b.sext = sext <16 x i8> %b to <16 x i16> 30 %sub = sub <16 x i16> %a.sext, %b.sext 31 %abs = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %sub, i1 true) 32 %trunc = trunc <16 x i16> %abs to <16 x i8> 33 ret <16 x i8> %trunc 34} 35 36define <4 x i16> @sabd_4h(<4 x i16> %a, <4 x i16> %b) #0 { 37; CHECK-LABEL: sabd_4h: 38; CHECK: // %bb.0: 39; CHECK-NEXT: sabd v0.4h, v0.4h, v1.4h 40; CHECK-NEXT: ret 41 %a.sext = sext <4 x i16> %a to <4 x i32> 42 %b.sext = sext <4 x i16> %b to <4 x i32> 43 %sub = sub <4 x i32> %a.sext, %b.sext 44 %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true) 45 %trunc = trunc <4 x i32> %abs to <4 x i16> 46 ret <4 x i16> %trunc 47} 48 49define <4 x i16> @sabd_4h_promoted_ops(<4 x i8> %a, <4 x i8> %b) #0 { 50; CHECK-LABEL: sabd_4h_promoted_ops: 51; CHECK: // %bb.0: 52; CHECK-NEXT: shl v1.4h, v1.4h, #8 53; CHECK-NEXT: shl v0.4h, v0.4h, #8 54; CHECK-NEXT: sshr v1.4h, v1.4h, #8 55; CHECK-NEXT: sshr v0.4h, v0.4h, #8 56; CHECK-NEXT: sabd v0.4h, v0.4h, v1.4h 57; CHECK-NEXT: ret 58 %a.sext = sext <4 x i8> %a to <4 x i16> 59 %b.sext = sext <4 x i8> %b to <4 x i16> 60 %sub = sub <4 x i16> %a.sext, %b.sext 61 %abs = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %sub, i1 true) 62 ret <4 x i16> %abs 63} 64 65define <8 x i16> @sabd_8h(<8 x i16> %a, <8 x i16> %b) #0 { 66; CHECK-LABEL: sabd_8h: 67; CHECK: // %bb.0: 68; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h 69; CHECK-NEXT: ret 70 %a.sext = sext <8 x i16> %a to <8 x i32> 71 %b.sext = sext <8 x i16> %b to <8 x i32> 72 %sub = sub <8 x i32> %a.sext, %b.sext 73 %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 true) 74 %trunc = trunc <8 x i32> %abs to <8 x i16> 75 ret <8 x i16> %trunc 76} 77 78define <8 x i16> @sabd_8h_promoted_ops(<8 x i8> %a, <8 x i8> %b) #0 { 79; CHECK-LABEL: sabd_8h_promoted_ops: 80; CHECK: // %bb.0: 81; CHECK-NEXT: sabdl v0.8h, v0.8b, v1.8b 82; CHECK-NEXT: ret 83 %a.sext = sext <8 x i8> %a to <8 x i16> 84 %b.sext = sext <8 x i8> %b to <8 x i16> 85 %sub = sub <8 x i16> %a.sext, %b.sext 86 %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true) 87 ret <8 x i16> %abs 88} 89 90define <2 x i32> @sabd_2s(<2 x i32> %a, <2 x i32> %b) #0 { 91; CHECK-LABEL: sabd_2s: 92; CHECK: // %bb.0: 93; CHECK-NEXT: sabd v0.2s, v0.2s, v1.2s 94; CHECK-NEXT: ret 95 %a.sext = sext <2 x i32> %a to <2 x i64> 96 %b.sext = sext <2 x i32> %b to <2 x i64> 97 %sub = sub <2 x i64> %a.sext, %b.sext 98 %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true) 99 %trunc = trunc <2 x i64> %abs to <2 x i32> 100 ret <2 x i32> %trunc 101} 102 103define <2 x i32> @sabd_2s_promoted_ops(<2 x i16> %a, <2 x i16> %b) #0 { 104; CHECK-LABEL: sabd_2s_promoted_ops: 105; CHECK: // %bb.0: 106; CHECK-NEXT: shl v1.2s, v1.2s, #16 107; CHECK-NEXT: shl v0.2s, v0.2s, #16 108; CHECK-NEXT: sshr v1.2s, v1.2s, #16 109; CHECK-NEXT: sshr v0.2s, v0.2s, #16 110; CHECK-NEXT: sabd v0.2s, v0.2s, v1.2s 111; CHECK-NEXT: ret 112 %a.sext = sext <2 x i16> %a to <2 x i32> 113 %b.sext = sext <2 x i16> %b to <2 x i32> 114 %sub = sub <2 x i32> %a.sext, %b.sext 115 %abs = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %sub, i1 true) 116 ret <2 x i32> %abs 117} 118 119define <4 x i32> @sabd_4s(<4 x i32> %a, <4 x i32> %b) #0 { 120; CHECK-LABEL: sabd_4s: 121; CHECK: // %bb.0: 122; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s 123; CHECK-NEXT: ret 124 %a.sext = sext <4 x i32> %a to <4 x i64> 125 %b.sext = sext <4 x i32> %b to <4 x i64> 126 %sub = sub <4 x i64> %a.sext, %b.sext 127 %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %sub, i1 true) 128 %trunc = trunc <4 x i64> %abs to <4 x i32> 129 ret <4 x i32> %trunc 130} 131 132define <4 x i32> @sabd_4s_promoted_ops(<4 x i16> %a, <4 x i16> %b) #0 { 133; CHECK-LABEL: sabd_4s_promoted_ops: 134; CHECK: // %bb.0: 135; CHECK-NEXT: sabdl v0.4s, v0.4h, v1.4h 136; CHECK-NEXT: ret 137 %a.sext = sext <4 x i16> %a to <4 x i32> 138 %b.sext = sext <4 x i16> %b to <4 x i32> 139 %sub = sub <4 x i32> %a.sext, %b.sext 140 %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true) 141 ret <4 x i32> %abs 142} 143 144define <2 x i64> @sabd_2d(<2 x i64> %a, <2 x i64> %b) #0 { 145; CHECK-LABEL: sabd_2d: 146; CHECK: // %bb.0: 147; CHECK-NEXT: cmgt v2.2d, v0.2d, v1.2d 148; CHECK-NEXT: sub v0.2d, v0.2d, v1.2d 149; CHECK-NEXT: eor v0.16b, v0.16b, v2.16b 150; CHECK-NEXT: sub v0.2d, v2.2d, v0.2d 151; CHECK-NEXT: ret 152 %a.sext = sext <2 x i64> %a to <2 x i128> 153 %b.sext = sext <2 x i64> %b to <2 x i128> 154 %sub = sub <2 x i128> %a.sext, %b.sext 155 %abs = call <2 x i128> @llvm.abs.v2i128(<2 x i128> %sub, i1 true) 156 %trunc = trunc <2 x i128> %abs to <2 x i64> 157 ret <2 x i64> %trunc 158} 159 160define <2 x i64> @sabd_2d_promoted_ops(<2 x i32> %a, <2 x i32> %b) #0 { 161; CHECK-LABEL: sabd_2d_promoted_ops: 162; CHECK: // %bb.0: 163; CHECK-NEXT: sabdl v0.2d, v0.2s, v1.2s 164; CHECK-NEXT: ret 165 %a.sext = sext <2 x i32> %a to <2 x i64> 166 %b.sext = sext <2 x i32> %b to <2 x i64> 167 %sub = sub <2 x i64> %a.sext, %b.sext 168 %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true) 169 ret <2 x i64> %abs 170} 171 172; 173; UABD 174; 175 176define <8 x i8> @uabd_8b(<8 x i8> %a, <8 x i8> %b) #0 { 177; CHECK-LABEL: uabd_8b: 178; CHECK: // %bb.0: 179; CHECK-NEXT: uabd v0.8b, v0.8b, v1.8b 180; CHECK-NEXT: ret 181 %a.zext = zext <8 x i8> %a to <8 x i16> 182 %b.zext = zext <8 x i8> %b to <8 x i16> 183 %sub = sub <8 x i16> %a.zext, %b.zext 184 %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true) 185 %trunc = trunc <8 x i16> %abs to <8 x i8> 186 ret <8 x i8> %trunc 187} 188 189define <16 x i8> @uabd_16b(<16 x i8> %a, <16 x i8> %b) #0 { 190; CHECK-LABEL: uabd_16b: 191; CHECK: // %bb.0: 192; CHECK-NEXT: uabd v0.16b, v0.16b, v1.16b 193; CHECK-NEXT: ret 194 %a.zext = zext <16 x i8> %a to <16 x i16> 195 %b.zext = zext <16 x i8> %b to <16 x i16> 196 %sub = sub <16 x i16> %a.zext, %b.zext 197 %abs = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %sub, i1 true) 198 %trunc = trunc <16 x i16> %abs to <16 x i8> 199 ret <16 x i8> %trunc 200} 201 202define <4 x i16> @uabd_4h(<4 x i16> %a, <4 x i16> %b) #0 { 203; CHECK-LABEL: uabd_4h: 204; CHECK: // %bb.0: 205; CHECK-NEXT: uabd v0.4h, v0.4h, v1.4h 206; CHECK-NEXT: ret 207 %a.zext = zext <4 x i16> %a to <4 x i32> 208 %b.zext = zext <4 x i16> %b to <4 x i32> 209 %sub = sub <4 x i32> %a.zext, %b.zext 210 %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true) 211 %trunc = trunc <4 x i32> %abs to <4 x i16> 212 ret <4 x i16> %trunc 213} 214 215define <4 x i16> @uabd_4h_promoted_ops(<4 x i8> %a, <4 x i8> %b) #0 { 216; CHECK-LABEL: uabd_4h_promoted_ops: 217; CHECK: // %bb.0: 218; CHECK-NEXT: bic v1.4h, #255, lsl #8 219; CHECK-NEXT: bic v0.4h, #255, lsl #8 220; CHECK-NEXT: uabd v0.4h, v0.4h, v1.4h 221; CHECK-NEXT: ret 222 %a.zext = zext <4 x i8> %a to <4 x i16> 223 %b.zext = zext <4 x i8> %b to <4 x i16> 224 %sub = sub <4 x i16> %a.zext, %b.zext 225 %abs = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %sub, i1 true) 226 ret <4 x i16> %abs 227} 228 229define <8 x i16> @uabd_8h(<8 x i16> %a, <8 x i16> %b) #0 { 230; CHECK-LABEL: uabd_8h: 231; CHECK: // %bb.0: 232; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h 233; CHECK-NEXT: ret 234 %a.zext = zext <8 x i16> %a to <8 x i32> 235 %b.zext = zext <8 x i16> %b to <8 x i32> 236 %sub = sub <8 x i32> %a.zext, %b.zext 237 %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 true) 238 %trunc = trunc <8 x i32> %abs to <8 x i16> 239 ret <8 x i16> %trunc 240} 241 242define <8 x i16> @uabd_8h_promoted_ops(<8 x i8> %a, <8 x i8> %b) #0 { 243; CHECK-LABEL: uabd_8h_promoted_ops: 244; CHECK: // %bb.0: 245; CHECK-NEXT: uabdl v0.8h, v0.8b, v1.8b 246; CHECK-NEXT: ret 247 %a.zext = zext <8 x i8> %a to <8 x i16> 248 %b.zext = zext <8 x i8> %b to <8 x i16> 249 %sub = sub <8 x i16> %a.zext, %b.zext 250 %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true) 251 ret <8 x i16> %abs 252} 253 254define <2 x i32> @uabd_2s(<2 x i32> %a, <2 x i32> %b) #0 { 255; CHECK-LABEL: uabd_2s: 256; CHECK: // %bb.0: 257; CHECK-NEXT: uabd v0.2s, v0.2s, v1.2s 258; CHECK-NEXT: ret 259 %a.zext = zext <2 x i32> %a to <2 x i64> 260 %b.zext = zext <2 x i32> %b to <2 x i64> 261 %sub = sub <2 x i64> %a.zext, %b.zext 262 %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true) 263 %trunc = trunc <2 x i64> %abs to <2 x i32> 264 ret <2 x i32> %trunc 265} 266 267define <2 x i32> @uabd_2s_promoted_ops(<2 x i16> %a, <2 x i16> %b) #0 { 268; CHECK-LABEL: uabd_2s_promoted_ops: 269; CHECK: // %bb.0: 270; CHECK-NEXT: movi d2, #0x00ffff0000ffff 271; CHECK-NEXT: and v1.8b, v1.8b, v2.8b 272; CHECK-NEXT: and v0.8b, v0.8b, v2.8b 273; CHECK-NEXT: uabd v0.2s, v0.2s, v1.2s 274; CHECK-NEXT: ret 275 %a.zext = zext <2 x i16> %a to <2 x i32> 276 %b.zext = zext <2 x i16> %b to <2 x i32> 277 %sub = sub <2 x i32> %a.zext, %b.zext 278 %abs = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %sub, i1 true) 279 ret <2 x i32> %abs 280} 281 282define <4 x i32> @uabd_4s(<4 x i32> %a, <4 x i32> %b) #0 { 283; CHECK-LABEL: uabd_4s: 284; CHECK: // %bb.0: 285; CHECK-NEXT: uabd v0.4s, v0.4s, v1.4s 286; CHECK-NEXT: ret 287 %a.zext = zext <4 x i32> %a to <4 x i64> 288 %b.zext = zext <4 x i32> %b to <4 x i64> 289 %sub = sub <4 x i64> %a.zext, %b.zext 290 %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %sub, i1 true) 291 %trunc = trunc <4 x i64> %abs to <4 x i32> 292 ret <4 x i32> %trunc 293} 294 295define <4 x i32> @uabd_4s_promoted_ops(<4 x i16> %a, <4 x i16> %b) #0 { 296; CHECK-LABEL: uabd_4s_promoted_ops: 297; CHECK: // %bb.0: 298; CHECK-NEXT: uabdl v0.4s, v0.4h, v1.4h 299; CHECK-NEXT: ret 300 %a.zext = zext <4 x i16> %a to <4 x i32> 301 %b.zext = zext <4 x i16> %b to <4 x i32> 302 %sub = sub <4 x i32> %a.zext, %b.zext 303 %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true) 304 ret <4 x i32> %abs 305} 306 307define <2 x i64> @uabd_2d(<2 x i64> %a, <2 x i64> %b) #0 { 308; CHECK-LABEL: uabd_2d: 309; CHECK: // %bb.0: 310; CHECK-NEXT: uqsub v2.2d, v1.2d, v0.2d 311; CHECK-NEXT: uqsub v0.2d, v0.2d, v1.2d 312; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b 313; CHECK-NEXT: ret 314 %a.zext = zext <2 x i64> %a to <2 x i128> 315 %b.zext = zext <2 x i64> %b to <2 x i128> 316 %sub = sub <2 x i128> %a.zext, %b.zext 317 %abs = call <2 x i128> @llvm.abs.v2i128(<2 x i128> %sub, i1 true) 318 %trunc = trunc <2 x i128> %abs to <2 x i64> 319 ret <2 x i64> %trunc 320} 321 322define <2 x i64> @uabd_2d_promoted_ops(<2 x i32> %a, <2 x i32> %b) #0 { 323; CHECK-LABEL: uabd_2d_promoted_ops: 324; CHECK: // %bb.0: 325; CHECK-NEXT: uabdl v0.2d, v0.2s, v1.2s 326; CHECK-NEXT: ret 327 %a.zext = zext <2 x i32> %a to <2 x i64> 328 %b.zext = zext <2 x i32> %b to <2 x i64> 329 %sub = sub <2 x i64> %a.zext, %b.zext 330 %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true) 331 ret <2 x i64> %abs 332} 333 334define <16 x i8> @uabd_v16i8_nuw(<16 x i8> %a, <16 x i8> %b) #0 { 335; CHECK-LABEL: uabd_v16i8_nuw: 336; CHECK: // %bb.0: 337; CHECK-NEXT: sub v0.16b, v0.16b, v1.16b 338; CHECK-NEXT: abs v0.16b, v0.16b 339; CHECK-NEXT: ret 340 %sub = sub nuw <16 x i8> %a, %b 341 %abs = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %sub, i1 true) 342 ret <16 x i8> %abs 343} 344 345define <8 x i16> @uabd_v8i16_nuw(<8 x i16> %a, <8 x i16> %b) #0 { 346; CHECK-LABEL: uabd_v8i16_nuw: 347; CHECK: // %bb.0: 348; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h 349; CHECK-NEXT: abs v0.8h, v0.8h 350; CHECK-NEXT: ret 351 %sub = sub nuw <8 x i16> %a, %b 352 %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true) 353 ret <8 x i16> %abs 354} 355 356define <4 x i32> @uabd_v4i32_nuw(<4 x i32> %a, <4 x i32> %b) #0 { 357; CHECK-LABEL: uabd_v4i32_nuw: 358; CHECK: // %bb.0: 359; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s 360; CHECK-NEXT: abs v0.4s, v0.4s 361; CHECK-NEXT: ret 362 %sub = sub nuw <4 x i32> %a, %b 363 %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true) 364 ret <4 x i32> %abs 365} 366 367define <2 x i64> @uabd_v2i64_nuw(<2 x i64> %a, <2 x i64> %b) #0 { 368; CHECK-LABEL: uabd_v2i64_nuw: 369; CHECK: // %bb.0: 370; CHECK-NEXT: sub v0.2d, v0.2d, v1.2d 371; CHECK-NEXT: abs v0.2d, v0.2d 372; CHECK-NEXT: ret 373 %sub = sub nuw <2 x i64> %a, %b 374 %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true) 375 ret <2 x i64> %abs 376} 377 378define <16 x i8> @sabd_v16i8_nsw(<16 x i8> %a, <16 x i8> %b) #0 { 379; CHECK-LABEL: sabd_v16i8_nsw: 380; CHECK: // %bb.0: 381; CHECK-NEXT: sabd v0.16b, v0.16b, v1.16b 382; CHECK-NEXT: ret 383 %sub = sub nsw <16 x i8> %a, %b 384 %abs = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %sub, i1 true) 385 ret <16 x i8> %abs 386} 387 388define <8 x i16> @sabd_v8i16_nsw(<8 x i16> %a, <8 x i16> %b) #0 { 389; CHECK-LABEL: sabd_v8i16_nsw: 390; CHECK: // %bb.0: 391; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h 392; CHECK-NEXT: ret 393 %sub = sub nsw <8 x i16> %a, %b 394 %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true) 395 ret <8 x i16> %abs 396} 397 398define <4 x i32> @sabd_v4i32_nsw(<4 x i32> %a, <4 x i32> %b) #0 { 399; CHECK-LABEL: sabd_v4i32_nsw: 400; CHECK: // %bb.0: 401; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s 402; CHECK-NEXT: ret 403 %sub = sub nsw <4 x i32> %a, %b 404 %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true) 405 ret <4 x i32> %abs 406} 407 408define <2 x i64> @sabd_v2i64_nsw(<2 x i64> %a, <2 x i64> %b) #0 { 409; CHECK-LABEL: sabd_v2i64_nsw: 410; CHECK: // %bb.0: 411; CHECK-NEXT: sub v0.2d, v0.2d, v1.2d 412; CHECK-NEXT: abs v0.2d, v0.2d 413; CHECK-NEXT: ret 414 %sub = sub nsw <2 x i64> %a, %b 415 %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true) 416 ret <2 x i64> %abs 417} 418 419define <16 x i8> @smaxmin_v16i8(<16 x i8> %0, <16 x i8> %1) { 420; CHECK-LABEL: smaxmin_v16i8: 421; CHECK: // %bb.0: 422; CHECK-NEXT: sabd v0.16b, v0.16b, v1.16b 423; CHECK-NEXT: ret 424 %a = tail call <16 x i8> @llvm.smax.v16i8(<16 x i8> %0, <16 x i8> %1) 425 %b = tail call <16 x i8> @llvm.smin.v16i8(<16 x i8> %0, <16 x i8> %1) 426 %sub = sub <16 x i8> %a, %b 427 ret <16 x i8> %sub 428} 429 430define <8 x i16> @smaxmin_v8i16(<8 x i16> %0, <8 x i16> %1) { 431; CHECK-LABEL: smaxmin_v8i16: 432; CHECK: // %bb.0: 433; CHECK-NEXT: sabd v0.8h, v0.8h, v1.8h 434; CHECK-NEXT: ret 435 %a = tail call <8 x i16> @llvm.smax.v8i16(<8 x i16> %0, <8 x i16> %1) 436 %b = tail call <8 x i16> @llvm.smin.v8i16(<8 x i16> %0, <8 x i16> %1) 437 %sub = sub <8 x i16> %a, %b 438 ret <8 x i16> %sub 439} 440 441define <4 x i32> @smaxmin_v4i32(<4 x i32> %0, <4 x i32> %1) { 442; CHECK-LABEL: smaxmin_v4i32: 443; CHECK: // %bb.0: 444; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s 445; CHECK-NEXT: ret 446 %a = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %0, <4 x i32> %1) 447 %b = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %0, <4 x i32> %1) 448 %sub = sub <4 x i32> %a, %b 449 ret <4 x i32> %sub 450} 451 452define <2 x i64> @smaxmin_v2i64(<2 x i64> %0, <2 x i64> %1) { 453; CHECK-LABEL: smaxmin_v2i64: 454; CHECK: // %bb.0: 455; CHECK-NEXT: cmgt v2.2d, v0.2d, v1.2d 456; CHECK-NEXT: sub v0.2d, v0.2d, v1.2d 457; CHECK-NEXT: eor v0.16b, v0.16b, v2.16b 458; CHECK-NEXT: sub v0.2d, v2.2d, v0.2d 459; CHECK-NEXT: ret 460 %a = tail call <2 x i64> @llvm.smax.v2i64(<2 x i64> %0, <2 x i64> %1) 461 %b = tail call <2 x i64> @llvm.smin.v2i64(<2 x i64> %0, <2 x i64> %1) 462 %sub = sub <2 x i64> %a, %b 463 ret <2 x i64> %sub 464} 465 466define <16 x i8> @umaxmin_v16i8(<16 x i8> %0, <16 x i8> %1) { 467; CHECK-LABEL: umaxmin_v16i8: 468; CHECK: // %bb.0: 469; CHECK-NEXT: uabd v0.16b, v0.16b, v1.16b 470; CHECK-NEXT: ret 471 %a = tail call <16 x i8> @llvm.umax.v16i8(<16 x i8> %0, <16 x i8> %1) 472 %b = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %0, <16 x i8> %1) 473 %sub = sub <16 x i8> %a, %b 474 ret <16 x i8> %sub 475} 476 477define <8 x i16> @umaxmin_v8i16(<8 x i16> %0, <8 x i16> %1) { 478; CHECK-LABEL: umaxmin_v8i16: 479; CHECK: // %bb.0: 480; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h 481; CHECK-NEXT: ret 482 %a = tail call <8 x i16> @llvm.umax.v8i16(<8 x i16> %0, <8 x i16> %1) 483 %b = tail call <8 x i16> @llvm.umin.v8i16(<8 x i16> %0, <8 x i16> %1) 484 %sub = sub <8 x i16> %a, %b 485 ret <8 x i16> %sub 486} 487 488define <4 x i32> @umaxmin_v4i32(<4 x i32> %0, <4 x i32> %1) { 489; CHECK-LABEL: umaxmin_v4i32: 490; CHECK: // %bb.0: 491; CHECK-NEXT: uabd v0.4s, v0.4s, v1.4s 492; CHECK-NEXT: ret 493 %a = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> %0, <4 x i32> %1) 494 %b = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %0, <4 x i32> %1) 495 %sub = sub <4 x i32> %a, %b 496 ret <4 x i32> %sub 497} 498 499define <2 x i64> @umaxmin_v2i64(<2 x i64> %0, <2 x i64> %1) { 500; CHECK-LABEL: umaxmin_v2i64: 501; CHECK: // %bb.0: 502; CHECK-NEXT: uqsub v2.2d, v1.2d, v0.2d 503; CHECK-NEXT: uqsub v0.2d, v0.2d, v1.2d 504; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b 505; CHECK-NEXT: ret 506 %a = tail call <2 x i64> @llvm.umax.v2i64(<2 x i64> %0, <2 x i64> %1) 507 %b = tail call <2 x i64> @llvm.umin.v2i64(<2 x i64> %0, <2 x i64> %1) 508 %sub = sub <2 x i64> %a, %b 509 ret <2 x i64> %sub 510} 511 512define <16 x i8> @umaxmin_v16i8_com1(<16 x i8> %0, <16 x i8> %1) { 513; CHECK-LABEL: umaxmin_v16i8_com1: 514; CHECK: // %bb.0: 515; CHECK-NEXT: uabd v0.16b, v0.16b, v1.16b 516; CHECK-NEXT: ret 517 %a = tail call <16 x i8> @llvm.umax.v16i8(<16 x i8> %0, <16 x i8> %1) 518 %b = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %1, <16 x i8> %0) 519 %sub = sub <16 x i8> %a, %b 520 ret <16 x i8> %sub 521} 522 523; (abds x, y) upper bits are known zero if x and y have extra sign bits 524define <4 x i16> @combine_sabd_4h_zerosign(<4 x i16> %a, <4 x i16> %b) #0 { 525; CHECK-LABEL: combine_sabd_4h_zerosign: 526; CHECK: // %bb.0: 527; CHECK-NEXT: movi v0.2d, #0000000000000000 528; CHECK-NEXT: ret 529 %a.ext = ashr <4 x i16> %a, <i16 7, i16 8, i16 9, i16 10> 530 %b.ext = ashr <4 x i16> %b, <i16 11, i16 12, i16 13, i16 14> 531 %max = tail call <4 x i16> @llvm.smax.v4i16(<4 x i16> %a.ext, <4 x i16> %b.ext) 532 %min = tail call <4 x i16> @llvm.smin.v4i16(<4 x i16> %a.ext, <4 x i16> %b.ext) 533 %sub = sub <4 x i16> %max, %min 534 %mask = and <4 x i16> %sub, <i16 32768, i16 32768, i16 32768, i16 32768> 535 ret <4 x i16> %mask 536} 537 538; negative test - mask extends beyond known zero bits 539define <2 x i32> @combine_sabd_2s_zerosign_negative(<2 x i32> %a, <2 x i32> %b) { 540; CHECK-LABEL: combine_sabd_2s_zerosign_negative: 541; CHECK: // %bb.0: 542; CHECK-NEXT: sshr v0.2s, v0.2s, #3 543; CHECK-NEXT: sshr v1.2s, v1.2s, #15 544; CHECK-NEXT: mvni v2.2s, #7, msl #16 545; CHECK-NEXT: sabd v0.2s, v0.2s, v1.2s 546; CHECK-NEXT: and v0.8b, v0.8b, v2.8b 547; CHECK-NEXT: ret 548 %a.ext = ashr <2 x i32> %a, <i32 3, i32 3> 549 %b.ext = ashr <2 x i32> %b, <i32 15, i32 15> 550 %max = tail call <2 x i32> @llvm.smax.v2i32(<2 x i32> %a.ext, <2 x i32> %b.ext) 551 %min = tail call <2 x i32> @llvm.smin.v2i32(<2 x i32> %a.ext, <2 x i32> %b.ext) 552 %sub = sub <2 x i32> %max, %min 553 %mask = and <2 x i32> %sub, <i32 -524288, i32 -524288> ; 0xFFF80000 554 ret <2 x i32> %mask 555} 556 557declare <8 x i8> @llvm.abs.v8i8(<8 x i8>, i1) 558declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1) 559 560declare <4 x i16> @llvm.abs.v4i16(<4 x i16>, i1) 561declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1) 562declare <16 x i16> @llvm.abs.v16i16(<16 x i16>, i1) 563 564declare <2 x i32> @llvm.abs.v2i32(<2 x i32>, i1) 565declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1) 566declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1) 567 568declare <2 x i64> @llvm.abs.v2i64(<2 x i64>, i1) 569declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1) 570 571declare <2 x i128> @llvm.abs.v2i128(<2 x i128>, i1) 572 573declare <16 x i8> @llvm.smax.v16i8(<16 x i8>, <16 x i8>) 574declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>) 575declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>) 576declare <2 x i64> @llvm.smax.v2i64(<2 x i64>, <2 x i64>) 577declare <16 x i8> @llvm.smin.v16i8(<16 x i8>, <16 x i8>) 578declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>) 579declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>) 580declare <2 x i64> @llvm.smin.v2i64(<2 x i64>, <2 x i64>) 581declare <16 x i8> @llvm.umax.v16i8(<16 x i8>, <16 x i8>) 582declare <8 x i16> @llvm.umax.v8i16(<8 x i16>, <8 x i16>) 583declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>) 584declare <2 x i64> @llvm.umax.v2i64(<2 x i64>, <2 x i64>) 585declare <16 x i8> @llvm.umin.v16i8(<16 x i8>, <16 x i8>) 586declare <8 x i16> @llvm.umin.v8i16(<8 x i16>, <8 x i16>) 587declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>) 588declare <2 x i64> @llvm.umin.v2i64(<2 x i64>, <2 x i64>) 589 590attributes #0 = { "target-features"="+neon" } 591