1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW 3; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ 4 5; 6; trunc(abs(sub(sext(a),sext(b)))) -> abds(a,b) 7; 8 9define <64 x i8> @abd_ext_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { 10; AVX512BW-LABEL: abd_ext_v64i8: 11; AVX512BW: # %bb.0: 12; AVX512BW-NEXT: vpminsb %zmm1, %zmm0, %zmm2 13; AVX512BW-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0 14; AVX512BW-NEXT: vpsubb %zmm2, %zmm0, %zmm0 15; AVX512BW-NEXT: retq 16; 17; AVX512DQ-LABEL: abd_ext_v64i8: 18; AVX512DQ: # %bb.0: 19; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2 20; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm3 21; AVX512DQ-NEXT: vpminsb %ymm2, %ymm3, %ymm4 22; AVX512DQ-NEXT: vpmaxsb %ymm2, %ymm3, %ymm2 23; AVX512DQ-NEXT: vpsubb %ymm4, %ymm2, %ymm2 24; AVX512DQ-NEXT: vpminsb %ymm1, %ymm0, %ymm3 25; AVX512DQ-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 26; AVX512DQ-NEXT: vpsubb %ymm3, %ymm0, %ymm0 27; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 28; AVX512DQ-NEXT: retq 29 %aext = sext <64 x i8> %a to <64 x i64> 30 %bext = sext <64 x i8> %b to <64 x i64> 31 %sub = sub <64 x i64> %aext, %bext 32 %abs = call <64 x i64> @llvm.abs.v64i64(<64 x i64> %sub, i1 false) 33 %trunc = trunc <64 x i64> %abs to <64 x i8> 34 ret <64 x i8> %trunc 35} 36 37define <64 x i8> @abd_ext_v64i8_undef(<64 x i8> %a, <64 x i8> %b) nounwind { 38; AVX512BW-LABEL: abd_ext_v64i8_undef: 39; AVX512BW: # %bb.0: 40; AVX512BW-NEXT: vpminsb %zmm1, %zmm0, %zmm2 41; AVX512BW-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0 42; AVX512BW-NEXT: vpsubb %zmm2, %zmm0, %zmm0 43; AVX512BW-NEXT: retq 44; 45; AVX512DQ-LABEL: abd_ext_v64i8_undef: 46; AVX512DQ: # %bb.0: 47; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2 48; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm3 49; AVX512DQ-NEXT: vpminsb %ymm2, %ymm3, %ymm4 50; AVX512DQ-NEXT: vpmaxsb %ymm2, %ymm3, %ymm2 51; AVX512DQ-NEXT: vpsubb %ymm4, %ymm2, %ymm2 52; AVX512DQ-NEXT: vpminsb %ymm1, %ymm0, %ymm3 53; AVX512DQ-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 54; AVX512DQ-NEXT: vpsubb %ymm3, %ymm0, %ymm0 55; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 56; AVX512DQ-NEXT: retq 57 %aext = sext <64 x i8> %a to <64 x i64> 58 %bext = sext <64 x i8> %b to <64 x i64> 59 %sub = sub <64 x i64> %aext, %bext 60 %abs = call <64 x i64> @llvm.abs.v64i64(<64 x i64> %sub, i1 true) 61 %trunc = trunc <64 x i64> %abs to <64 x i8> 62 ret <64 x i8> %trunc 63} 64 65define <32 x i16> @abd_ext_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind { 66; AVX512BW-LABEL: abd_ext_v32i16: 67; AVX512BW: # %bb.0: 68; AVX512BW-NEXT: vpminsw %zmm1, %zmm0, %zmm2 69; AVX512BW-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0 70; AVX512BW-NEXT: vpsubw %zmm2, %zmm0, %zmm0 71; AVX512BW-NEXT: retq 72; 73; AVX512DQ-LABEL: abd_ext_v32i16: 74; AVX512DQ: # %bb.0: 75; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2 76; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm3 77; AVX512DQ-NEXT: vpminsw %ymm2, %ymm3, %ymm4 78; AVX512DQ-NEXT: vpmaxsw %ymm2, %ymm3, %ymm2 79; AVX512DQ-NEXT: vpsubw %ymm4, %ymm2, %ymm2 80; AVX512DQ-NEXT: vpminsw %ymm1, %ymm0, %ymm3 81; AVX512DQ-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 82; AVX512DQ-NEXT: vpsubw %ymm3, %ymm0, %ymm0 83; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 84; AVX512DQ-NEXT: retq 85 %aext = sext <32 x i16> %a to <32 x i64> 86 %bext = sext <32 x i16> %b to <32 x i64> 87 %sub = sub <32 x i64> %aext, %bext 88 %abs = call <32 x i64> @llvm.abs.v32i64(<32 x i64> %sub, i1 false) 89 %trunc = trunc <32 x i64> %abs to <32 x i16> 90 ret <32 x i16> %trunc 91} 92 93define <32 x i16> @abd_ext_v32i16_undef(<32 x i16> %a, <32 x i16> %b) nounwind { 94; AVX512BW-LABEL: abd_ext_v32i16_undef: 95; AVX512BW: # %bb.0: 96; AVX512BW-NEXT: vpminsw %zmm1, %zmm0, %zmm2 97; AVX512BW-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0 98; AVX512BW-NEXT: vpsubw %zmm2, %zmm0, %zmm0 99; AVX512BW-NEXT: retq 100; 101; AVX512DQ-LABEL: abd_ext_v32i16_undef: 102; AVX512DQ: # %bb.0: 103; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2 104; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm3 105; AVX512DQ-NEXT: vpminsw %ymm2, %ymm3, %ymm4 106; AVX512DQ-NEXT: vpmaxsw %ymm2, %ymm3, %ymm2 107; AVX512DQ-NEXT: vpsubw %ymm4, %ymm2, %ymm2 108; AVX512DQ-NEXT: vpminsw %ymm1, %ymm0, %ymm3 109; AVX512DQ-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 110; AVX512DQ-NEXT: vpsubw %ymm3, %ymm0, %ymm0 111; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 112; AVX512DQ-NEXT: retq 113 %aext = sext <32 x i16> %a to <32 x i64> 114 %bext = sext <32 x i16> %b to <32 x i64> 115 %sub = sub <32 x i64> %aext, %bext 116 %abs = call <32 x i64> @llvm.abs.v32i64(<32 x i64> %sub, i1 true) 117 %trunc = trunc <32 x i64> %abs to <32 x i16> 118 ret <32 x i16> %trunc 119} 120 121define <16 x i32> @abd_ext_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind { 122; AVX512-LABEL: abd_ext_v16i32: 123; AVX512: # %bb.0: 124; AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm2 125; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0 126; AVX512-NEXT: vpsubd %zmm2, %zmm0, %zmm0 127; AVX512-NEXT: retq 128 %aext = sext <16 x i32> %a to <16 x i64> 129 %bext = sext <16 x i32> %b to <16 x i64> 130 %sub = sub <16 x i64> %aext, %bext 131 %abs = call <16 x i64> @llvm.abs.v16i64(<16 x i64> %sub, i1 false) 132 %trunc = trunc <16 x i64> %abs to <16 x i32> 133 ret <16 x i32> %trunc 134} 135 136define <16 x i32> @abd_ext_v16i32_undef(<16 x i32> %a, <16 x i32> %b) nounwind { 137; AVX512-LABEL: abd_ext_v16i32_undef: 138; AVX512: # %bb.0: 139; AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm2 140; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0 141; AVX512-NEXT: vpsubd %zmm2, %zmm0, %zmm0 142; AVX512-NEXT: retq 143 %aext = sext <16 x i32> %a to <16 x i64> 144 %bext = sext <16 x i32> %b to <16 x i64> 145 %sub = sub <16 x i64> %aext, %bext 146 %abs = call <16 x i64> @llvm.abs.v16i64(<16 x i64> %sub, i1 true) 147 %trunc = trunc <16 x i64> %abs to <16 x i32> 148 ret <16 x i32> %trunc 149} 150 151define <8 x i64> @abd_ext_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind { 152; AVX512-LABEL: abd_ext_v8i64: 153; AVX512: # %bb.0: 154; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm2 155; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 156; AVX512-NEXT: vpsubq %zmm2, %zmm0, %zmm0 157; AVX512-NEXT: retq 158 %aext = sext <8 x i64> %a to <8 x i128> 159 %bext = sext <8 x i64> %b to <8 x i128> 160 %sub = sub <8 x i128> %aext, %bext 161 %abs = call <8 x i128> @llvm.abs.v8i128(<8 x i128> %sub, i1 false) 162 %trunc = trunc <8 x i128> %abs to <8 x i64> 163 ret <8 x i64> %trunc 164} 165 166define <8 x i64> @abd_ext_v8i64_undef(<8 x i64> %a, <8 x i64> %b) nounwind { 167; AVX512-LABEL: abd_ext_v8i64_undef: 168; AVX512: # %bb.0: 169; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm2 170; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 171; AVX512-NEXT: vpsubq %zmm2, %zmm0, %zmm0 172; AVX512-NEXT: retq 173 %aext = sext <8 x i64> %a to <8 x i128> 174 %bext = sext <8 x i64> %b to <8 x i128> 175 %sub = sub <8 x i128> %aext, %bext 176 %abs = call <8 x i128> @llvm.abs.v8i128(<8 x i128> %sub, i1 true) 177 %trunc = trunc <8 x i128> %abs to <8 x i64> 178 ret <8 x i64> %trunc 179} 180 181; 182; sub(smax(a,b),smin(a,b)) -> abds(a,b) 183; 184 185define <64 x i8> @abd_minmax_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { 186; AVX512BW-LABEL: abd_minmax_v64i8: 187; AVX512BW: # %bb.0: 188; AVX512BW-NEXT: vpminsb %zmm1, %zmm0, %zmm2 189; AVX512BW-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0 190; AVX512BW-NEXT: vpsubb %zmm2, %zmm0, %zmm0 191; AVX512BW-NEXT: retq 192; 193; AVX512DQ-LABEL: abd_minmax_v64i8: 194; AVX512DQ: # %bb.0: 195; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2 196; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm3 197; AVX512DQ-NEXT: vpminsb %ymm2, %ymm3, %ymm4 198; AVX512DQ-NEXT: vpmaxsb %ymm2, %ymm3, %ymm2 199; AVX512DQ-NEXT: vpsubb %ymm4, %ymm2, %ymm2 200; AVX512DQ-NEXT: vpminsb %ymm1, %ymm0, %ymm3 201; AVX512DQ-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 202; AVX512DQ-NEXT: vpsubb %ymm3, %ymm0, %ymm0 203; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 204; AVX512DQ-NEXT: retq 205 %min = call <64 x i8> @llvm.smin.v64i8(<64 x i8> %a, <64 x i8> %b) 206 %max = call <64 x i8> @llvm.smax.v64i8(<64 x i8> %a, <64 x i8> %b) 207 %sub = sub <64 x i8> %max, %min 208 ret <64 x i8> %sub 209} 210 211define <32 x i16> @abd_minmax_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind { 212; AVX512BW-LABEL: abd_minmax_v32i16: 213; AVX512BW: # %bb.0: 214; AVX512BW-NEXT: vpminsw %zmm1, %zmm0, %zmm2 215; AVX512BW-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0 216; AVX512BW-NEXT: vpsubw %zmm2, %zmm0, %zmm0 217; AVX512BW-NEXT: retq 218; 219; AVX512DQ-LABEL: abd_minmax_v32i16: 220; AVX512DQ: # %bb.0: 221; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2 222; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm3 223; AVX512DQ-NEXT: vpminsw %ymm2, %ymm3, %ymm4 224; AVX512DQ-NEXT: vpmaxsw %ymm2, %ymm3, %ymm2 225; AVX512DQ-NEXT: vpsubw %ymm4, %ymm2, %ymm2 226; AVX512DQ-NEXT: vpminsw %ymm1, %ymm0, %ymm3 227; AVX512DQ-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 228; AVX512DQ-NEXT: vpsubw %ymm3, %ymm0, %ymm0 229; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 230; AVX512DQ-NEXT: retq 231 %min = call <32 x i16> @llvm.smin.v32i16(<32 x i16> %a, <32 x i16> %b) 232 %max = call <32 x i16> @llvm.smax.v32i16(<32 x i16> %a, <32 x i16> %b) 233 %sub = sub <32 x i16> %max, %min 234 ret <32 x i16> %sub 235} 236 237define <16 x i32> @abd_minmax_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind { 238; AVX512-LABEL: abd_minmax_v16i32: 239; AVX512: # %bb.0: 240; AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm2 241; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0 242; AVX512-NEXT: vpsubd %zmm2, %zmm0, %zmm0 243; AVX512-NEXT: retq 244 %min = call <16 x i32> @llvm.smin.v16i32(<16 x i32> %a, <16 x i32> %b) 245 %max = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %a, <16 x i32> %b) 246 %sub = sub <16 x i32> %max, %min 247 ret <16 x i32> %sub 248} 249 250define <8 x i64> @abd_minmax_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind { 251; AVX512-LABEL: abd_minmax_v8i64: 252; AVX512: # %bb.0: 253; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm2 254; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 255; AVX512-NEXT: vpsubq %zmm2, %zmm0, %zmm0 256; AVX512-NEXT: retq 257 %min = call <8 x i64> @llvm.smin.v8i64(<8 x i64> %a, <8 x i64> %b) 258 %max = call <8 x i64> @llvm.smax.v8i64(<8 x i64> %a, <8 x i64> %b) 259 %sub = sub <8 x i64> %max, %min 260 ret <8 x i64> %sub 261} 262 263; 264; select(icmp(a,b),sub(a,b),sub(b,a)) -> abds(a,b) 265; 266 267define <64 x i8> @abd_cmp_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { 268; AVX512BW-LABEL: abd_cmp_v64i8: 269; AVX512BW: # %bb.0: 270; AVX512BW-NEXT: vpminsb %zmm1, %zmm0, %zmm2 271; AVX512BW-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0 272; AVX512BW-NEXT: vpsubb %zmm2, %zmm0, %zmm0 273; AVX512BW-NEXT: retq 274; 275; AVX512DQ-LABEL: abd_cmp_v64i8: 276; AVX512DQ: # %bb.0: 277; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2 278; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm3 279; AVX512DQ-NEXT: vpminsb %ymm2, %ymm3, %ymm4 280; AVX512DQ-NEXT: vpmaxsb %ymm2, %ymm3, %ymm2 281; AVX512DQ-NEXT: vpsubb %ymm4, %ymm2, %ymm2 282; AVX512DQ-NEXT: vpminsb %ymm1, %ymm0, %ymm3 283; AVX512DQ-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 284; AVX512DQ-NEXT: vpsubb %ymm3, %ymm0, %ymm0 285; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 286; AVX512DQ-NEXT: retq 287 %cmp = icmp sgt <64 x i8> %a, %b 288 %ab = sub <64 x i8> %a, %b 289 %ba = sub <64 x i8> %b, %a 290 %sel = select <64 x i1> %cmp, <64 x i8> %ab, <64 x i8> %ba 291 ret <64 x i8> %sel 292} 293 294define <32 x i16> @abd_cmp_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind { 295; AVX512BW-LABEL: abd_cmp_v32i16: 296; AVX512BW: # %bb.0: 297; AVX512BW-NEXT: vpminsw %zmm1, %zmm0, %zmm2 298; AVX512BW-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0 299; AVX512BW-NEXT: vpsubw %zmm2, %zmm0, %zmm0 300; AVX512BW-NEXT: retq 301; 302; AVX512DQ-LABEL: abd_cmp_v32i16: 303; AVX512DQ: # %bb.0: 304; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2 305; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm3 306; AVX512DQ-NEXT: vpminsw %ymm2, %ymm3, %ymm4 307; AVX512DQ-NEXT: vpmaxsw %ymm2, %ymm3, %ymm2 308; AVX512DQ-NEXT: vpsubw %ymm4, %ymm2, %ymm2 309; AVX512DQ-NEXT: vpminsw %ymm1, %ymm0, %ymm3 310; AVX512DQ-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 311; AVX512DQ-NEXT: vpsubw %ymm3, %ymm0, %ymm0 312; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 313; AVX512DQ-NEXT: retq 314 %cmp = icmp sge <32 x i16> %a, %b 315 %ab = sub <32 x i16> %a, %b 316 %ba = sub <32 x i16> %b, %a 317 %sel = select <32 x i1> %cmp, <32 x i16> %ab, <32 x i16> %ba 318 ret <32 x i16> %sel 319} 320 321define <16 x i32> @abd_cmp_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind { 322; AVX512-LABEL: abd_cmp_v16i32: 323; AVX512: # %bb.0: 324; AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm2 325; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0 326; AVX512-NEXT: vpsubd %zmm2, %zmm0, %zmm0 327; AVX512-NEXT: retq 328 %cmp = icmp slt <16 x i32> %a, %b 329 %ab = sub <16 x i32> %a, %b 330 %ba = sub <16 x i32> %b, %a 331 %sel = select <16 x i1> %cmp, <16 x i32> %ba, <16 x i32> %ab 332 ret <16 x i32> %sel 333} 334 335define <8 x i64> @abd_cmp_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind { 336; AVX512-LABEL: abd_cmp_v8i64: 337; AVX512: # %bb.0: 338; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm2 339; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 340; AVX512-NEXT: vpsubq %zmm2, %zmm0, %zmm0 341; AVX512-NEXT: retq 342 %cmp = icmp sge <8 x i64> %a, %b 343 %ab = sub <8 x i64> %a, %b 344 %ba = sub <8 x i64> %b, %a 345 %sel = select <8 x i1> %cmp, <8 x i64> %ab, <8 x i64> %ba 346 ret <8 x i64> %sel 347} 348 349; 350; abs(sub_nsw(x, y)) -> abds(a,b) 351; 352 353define <64 x i8> @abd_subnsw_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { 354; AVX512BW-LABEL: abd_subnsw_v64i8: 355; AVX512BW: # %bb.0: 356; AVX512BW-NEXT: vpsubb %zmm1, %zmm0, %zmm0 357; AVX512BW-NEXT: vpabsb %zmm0, %zmm0 358; AVX512BW-NEXT: retq 359; 360; AVX512DQ-LABEL: abd_subnsw_v64i8: 361; AVX512DQ: # %bb.0: 362; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2 363; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm3 364; AVX512DQ-NEXT: vpsubb %ymm2, %ymm3, %ymm2 365; AVX512DQ-NEXT: vpsubb %ymm1, %ymm0, %ymm0 366; AVX512DQ-NEXT: vpabsb %ymm0, %ymm0 367; AVX512DQ-NEXT: vpabsb %ymm2, %ymm1 368; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 369; AVX512DQ-NEXT: retq 370 %sub = sub nsw <64 x i8> %a, %b 371 %abs = call <64 x i8> @llvm.abs.v64i8(<64 x i8> %sub, i1 false) 372 ret <64 x i8> %abs 373} 374 375define <32 x i16> @abd_subnsw_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind { 376; AVX512BW-LABEL: abd_subnsw_v32i16: 377; AVX512BW: # %bb.0: 378; AVX512BW-NEXT: vpsubw %zmm1, %zmm0, %zmm0 379; AVX512BW-NEXT: vpabsw %zmm0, %zmm0 380; AVX512BW-NEXT: retq 381; 382; AVX512DQ-LABEL: abd_subnsw_v32i16: 383; AVX512DQ: # %bb.0: 384; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2 385; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm3 386; AVX512DQ-NEXT: vpsubw %ymm2, %ymm3, %ymm2 387; AVX512DQ-NEXT: vpsubw %ymm1, %ymm0, %ymm0 388; AVX512DQ-NEXT: vpabsw %ymm0, %ymm0 389; AVX512DQ-NEXT: vpabsw %ymm2, %ymm1 390; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 391; AVX512DQ-NEXT: retq 392 %sub = sub nsw <32 x i16> %a, %b 393 %abs = call <32 x i16> @llvm.abs.v32i16(<32 x i16> %sub, i1 false) 394 ret <32 x i16> %abs 395} 396 397define <16 x i32> @abd_subnsw_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind { 398; AVX512-LABEL: abd_subnsw_v16i32: 399; AVX512: # %bb.0: 400; AVX512-NEXT: vpsubd %zmm1, %zmm0, %zmm0 401; AVX512-NEXT: vpabsd %zmm0, %zmm0 402; AVX512-NEXT: retq 403 %sub = sub nsw <16 x i32> %a, %b 404 %abs = call <16 x i32> @llvm.abs.v16i32(<16 x i32> %sub, i1 false) 405 ret <16 x i32> %abs 406} 407 408define <8 x i64> @abd_subnsw_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind { 409; AVX512-LABEL: abd_subnsw_v8i64: 410; AVX512: # %bb.0: 411; AVX512-NEXT: vpsubq %zmm1, %zmm0, %zmm0 412; AVX512-NEXT: vpabsq %zmm0, %zmm0 413; AVX512-NEXT: retq 414 %sub = sub nsw <8 x i64> %a, %b 415 %abs = call <8 x i64> @llvm.abs.v8i64(<8 x i64> %sub, i1 false) 416 ret <8 x i64> %abs 417} 418 419declare <64 x i8> @llvm.abs.v64i8(<64 x i8>, i1) 420declare <32 x i16> @llvm.abs.v32i16(<32 x i16>, i1) 421declare <16 x i32> @llvm.abs.v16i32(<16 x i32>, i1) 422declare <8 x i64> @llvm.abs.v8i64(<8 x i64>, i1) 423declare <16 x i64> @llvm.abs.v16i64(<16 x i64>, i1) 424declare <32 x i64> @llvm.abs.v32i64(<32 x i64>, i1) 425declare <64 x i64> @llvm.abs.v64i64(<64 x i64>, i1) 426declare <8 x i128> @llvm.abs.v8i128(<8 x i128>, i1) 427 428declare <64 x i8> @llvm.smax.v64i8(<64 x i8>, <64 x i8>) 429declare <32 x i16> @llvm.smax.v32i16(<32 x i16>, <32 x i16>) 430declare <16 x i32> @llvm.smax.v16i32(<16 x i32>, <16 x i32>) 431declare <8 x i64> @llvm.smax.v8i64(<8 x i64>, <8 x i64>) 432 433declare <64 x i8> @llvm.smin.v64i8(<64 x i8>, <64 x i8>) 434declare <32 x i16> @llvm.smin.v32i16(<32 x i16>, <32 x i16>) 435declare <16 x i32> @llvm.smin.v16i32(<16 x i32>, <16 x i32>) 436declare <8 x i64> @llvm.smin.v8i64(<8 x i64>, <8 x i64>) 437