1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx | FileCheck %s --check-prefixes=AVX1 3; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 4; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX512 5 6; 7; trunc(abs(sub(sext(a),sext(b)))) -> abds(a,b) 8; 9 10define <32 x i8> @abd_ext_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { 11; AVX1-LABEL: abd_ext_v32i8: 12; AVX1: # %bb.0: 13; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 14; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 15; AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm4 16; AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2 17; AVX1-NEXT: vpsubb %xmm4, %xmm2, %xmm2 18; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm3 19; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 20; AVX1-NEXT: vpsubb %xmm3, %xmm0, %xmm0 21; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 22; AVX1-NEXT: retq 23; 24; AVX2-LABEL: abd_ext_v32i8: 25; AVX2: # %bb.0: 26; AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm2 27; AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 28; AVX2-NEXT: vpsubb %ymm2, %ymm0, %ymm0 29; AVX2-NEXT: retq 30; 31; AVX512-LABEL: abd_ext_v32i8: 32; AVX512: # %bb.0: 33; AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm2 34; AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 35; AVX512-NEXT: vpsubb %ymm2, %ymm0, %ymm0 36; AVX512-NEXT: retq 37 %aext = sext <32 x i8> %a to <32 x i64> 38 %bext = sext <32 x i8> %b to <32 x i64> 39 %sub = sub <32 x i64> %aext, %bext 40 %abs = call <32 x i64> @llvm.abs.v32i64(<32 x i64> %sub, i1 false) 41 %trunc = trunc <32 x i64> %abs to <32 x i8> 42 ret <32 x i8> %trunc 43} 44 45define <32 x i8> @abd_ext_v32i8_undef(<32 x i8> %a, <32 x i8> %b) nounwind { 46; AVX1-LABEL: abd_ext_v32i8_undef: 47; AVX1: # %bb.0: 48; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 49; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 50; AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm4 51; AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2 52; AVX1-NEXT: vpsubb %xmm4, %xmm2, %xmm2 53; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm3 54; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 55; AVX1-NEXT: vpsubb %xmm3, %xmm0, %xmm0 56; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 57; AVX1-NEXT: retq 58; 59; AVX2-LABEL: abd_ext_v32i8_undef: 60; AVX2: # %bb.0: 61; AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm2 62; AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 63; AVX2-NEXT: vpsubb %ymm2, %ymm0, %ymm0 64; AVX2-NEXT: retq 65; 66; AVX512-LABEL: abd_ext_v32i8_undef: 67; AVX512: # %bb.0: 68; AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm2 69; AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 70; AVX512-NEXT: vpsubb %ymm2, %ymm0, %ymm0 71; AVX512-NEXT: retq 72 %aext = sext <32 x i8> %a to <32 x i64> 73 %bext = sext <32 x i8> %b to <32 x i64> 74 %sub = sub <32 x i64> %aext, %bext 75 %abs = call <32 x i64> @llvm.abs.v32i64(<32 x i64> %sub, i1 true) 76 %trunc = trunc <32 x i64> %abs to <32 x i8> 77 ret <32 x i8> %trunc 78} 79 80define <16 x i16> @abd_ext_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind { 81; AVX1-LABEL: abd_ext_v16i16: 82; AVX1: # %bb.0: 83; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 84; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 85; AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm4 86; AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2 87; AVX1-NEXT: vpsubw %xmm4, %xmm2, %xmm2 88; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm3 89; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 90; AVX1-NEXT: vpsubw %xmm3, %xmm0, %xmm0 91; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 92; AVX1-NEXT: retq 93; 94; AVX2-LABEL: abd_ext_v16i16: 95; AVX2: # %bb.0: 96; AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm2 97; AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 98; AVX2-NEXT: vpsubw %ymm2, %ymm0, %ymm0 99; AVX2-NEXT: retq 100; 101; AVX512-LABEL: abd_ext_v16i16: 102; AVX512: # %bb.0: 103; AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm2 104; AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 105; AVX512-NEXT: vpsubw %ymm2, %ymm0, %ymm0 106; AVX512-NEXT: retq 107 %aext = sext <16 x i16> %a to <16 x i64> 108 %bext = sext <16 x i16> %b to <16 x i64> 109 %sub = sub <16 x i64> %aext, %bext 110 %abs = call <16 x i64> @llvm.abs.v16i64(<16 x i64> %sub, i1 false) 111 %trunc = trunc <16 x i64> %abs to <16 x i16> 112 ret <16 x i16> %trunc 113} 114 115define <16 x i16> @abd_ext_v16i16_undef(<16 x i16> %a, <16 x i16> %b) nounwind { 116; AVX1-LABEL: abd_ext_v16i16_undef: 117; AVX1: # %bb.0: 118; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 119; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 120; AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm4 121; AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2 122; AVX1-NEXT: vpsubw %xmm4, %xmm2, %xmm2 123; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm3 124; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 125; AVX1-NEXT: vpsubw %xmm3, %xmm0, %xmm0 126; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 127; AVX1-NEXT: retq 128; 129; AVX2-LABEL: abd_ext_v16i16_undef: 130; AVX2: # %bb.0: 131; AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm2 132; AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 133; AVX2-NEXT: vpsubw %ymm2, %ymm0, %ymm0 134; AVX2-NEXT: retq 135; 136; AVX512-LABEL: abd_ext_v16i16_undef: 137; AVX512: # %bb.0: 138; AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm2 139; AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 140; AVX512-NEXT: vpsubw %ymm2, %ymm0, %ymm0 141; AVX512-NEXT: retq 142 %aext = sext <16 x i16> %a to <16 x i64> 143 %bext = sext <16 x i16> %b to <16 x i64> 144 %sub = sub <16 x i64> %aext, %bext 145 %abs = call <16 x i64> @llvm.abs.v16i64(<16 x i64> %sub, i1 true) 146 %trunc = trunc <16 x i64> %abs to <16 x i16> 147 ret <16 x i16> %trunc 148} 149 150define <8 x i32> @abd_ext_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { 151; AVX1-LABEL: abd_ext_v8i32: 152; AVX1: # %bb.0: 153; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 154; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 155; AVX1-NEXT: vpminsd %xmm2, %xmm3, %xmm4 156; AVX1-NEXT: vpmaxsd %xmm2, %xmm3, %xmm2 157; AVX1-NEXT: vpsubd %xmm4, %xmm2, %xmm2 158; AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm3 159; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 160; AVX1-NEXT: vpsubd %xmm3, %xmm0, %xmm0 161; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 162; AVX1-NEXT: retq 163; 164; AVX2-LABEL: abd_ext_v8i32: 165; AVX2: # %bb.0: 166; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm2 167; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 168; AVX2-NEXT: vpsubd %ymm2, %ymm0, %ymm0 169; AVX2-NEXT: retq 170; 171; AVX512-LABEL: abd_ext_v8i32: 172; AVX512: # %bb.0: 173; AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm2 174; AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 175; AVX512-NEXT: vpsubd %ymm2, %ymm0, %ymm0 176; AVX512-NEXT: retq 177 %aext = sext <8 x i32> %a to <8 x i64> 178 %bext = sext <8 x i32> %b to <8 x i64> 179 %sub = sub <8 x i64> %aext, %bext 180 %abs = call <8 x i64> @llvm.abs.v8i64(<8 x i64> %sub, i1 false) 181 %trunc = trunc <8 x i64> %abs to <8 x i32> 182 ret <8 x i32> %trunc 183} 184 185define <8 x i32> @abd_ext_v8i32_undef(<8 x i32> %a, <8 x i32> %b) nounwind { 186; AVX1-LABEL: abd_ext_v8i32_undef: 187; AVX1: # %bb.0: 188; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 189; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 190; AVX1-NEXT: vpminsd %xmm2, %xmm3, %xmm4 191; AVX1-NEXT: vpmaxsd %xmm2, %xmm3, %xmm2 192; AVX1-NEXT: vpsubd %xmm4, %xmm2, %xmm2 193; AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm3 194; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 195; AVX1-NEXT: vpsubd %xmm3, %xmm0, %xmm0 196; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 197; AVX1-NEXT: retq 198; 199; AVX2-LABEL: abd_ext_v8i32_undef: 200; AVX2: # %bb.0: 201; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm2 202; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 203; AVX2-NEXT: vpsubd %ymm2, %ymm0, %ymm0 204; AVX2-NEXT: retq 205; 206; AVX512-LABEL: abd_ext_v8i32_undef: 207; AVX512: # %bb.0: 208; AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm2 209; AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 210; AVX512-NEXT: vpsubd %ymm2, %ymm0, %ymm0 211; AVX512-NEXT: retq 212 %aext = sext <8 x i32> %a to <8 x i64> 213 %bext = sext <8 x i32> %b to <8 x i64> 214 %sub = sub <8 x i64> %aext, %bext 215 %abs = call <8 x i64> @llvm.abs.v8i64(<8 x i64> %sub, i1 true) 216 %trunc = trunc <8 x i64> %abs to <8 x i32> 217 ret <8 x i32> %trunc 218} 219 220define <4 x i64> @abd_ext_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind { 221; AVX1-LABEL: abd_ext_v4i64: 222; AVX1: # %bb.0: 223; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 224; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 225; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm4 226; AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2 227; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2 228; AVX1-NEXT: vpsubq %xmm2, %xmm4, %xmm2 229; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3 230; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm0 231; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 232; AVX1-NEXT: vpsubq %xmm0, %xmm3, %xmm0 233; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 234; AVX1-NEXT: retq 235; 236; AVX2-LABEL: abd_ext_v4i64: 237; AVX2: # %bb.0: 238; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 239; AVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm0 240; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 241; AVX2-NEXT: vpsubq %ymm0, %ymm2, %ymm0 242; AVX2-NEXT: retq 243; 244; AVX512-LABEL: abd_ext_v4i64: 245; AVX512: # %bb.0: 246; AVX512-NEXT: vpminsq %ymm1, %ymm0, %ymm2 247; AVX512-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0 248; AVX512-NEXT: vpsubq %ymm2, %ymm0, %ymm0 249; AVX512-NEXT: retq 250 %aext = sext <4 x i64> %a to <4 x i128> 251 %bext = sext <4 x i64> %b to <4 x i128> 252 %sub = sub <4 x i128> %aext, %bext 253 %abs = call <4 x i128> @llvm.abs.v4i128(<4 x i128> %sub, i1 false) 254 %trunc = trunc <4 x i128> %abs to <4 x i64> 255 ret <4 x i64> %trunc 256} 257 258define <4 x i64> @abd_ext_v4i64_undef(<4 x i64> %a, <4 x i64> %b) nounwind { 259; AVX1-LABEL: abd_ext_v4i64_undef: 260; AVX1: # %bb.0: 261; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 262; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 263; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm4 264; AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2 265; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2 266; AVX1-NEXT: vpsubq %xmm2, %xmm4, %xmm2 267; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3 268; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm0 269; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 270; AVX1-NEXT: vpsubq %xmm0, %xmm3, %xmm0 271; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 272; AVX1-NEXT: retq 273; 274; AVX2-LABEL: abd_ext_v4i64_undef: 275; AVX2: # %bb.0: 276; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 277; AVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm0 278; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 279; AVX2-NEXT: vpsubq %ymm0, %ymm2, %ymm0 280; AVX2-NEXT: retq 281; 282; AVX512-LABEL: abd_ext_v4i64_undef: 283; AVX512: # %bb.0: 284; AVX512-NEXT: vpminsq %ymm1, %ymm0, %ymm2 285; AVX512-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0 286; AVX512-NEXT: vpsubq %ymm2, %ymm0, %ymm0 287; AVX512-NEXT: retq 288 %aext = sext <4 x i64> %a to <4 x i128> 289 %bext = sext <4 x i64> %b to <4 x i128> 290 %sub = sub <4 x i128> %aext, %bext 291 %abs = call <4 x i128> @llvm.abs.v4i128(<4 x i128> %sub, i1 true) 292 %trunc = trunc <4 x i128> %abs to <4 x i64> 293 ret <4 x i64> %trunc 294} 295 296; 297; sub(smax(a,b),smin(a,b)) -> abds(a,b) 298; 299 300define <32 x i8> @abd_minmax_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { 301; AVX1-LABEL: abd_minmax_v32i8: 302; AVX1: # %bb.0: 303; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 304; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 305; AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm4 306; AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2 307; AVX1-NEXT: vpsubb %xmm4, %xmm2, %xmm2 308; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm3 309; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 310; AVX1-NEXT: vpsubb %xmm3, %xmm0, %xmm0 311; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 312; AVX1-NEXT: retq 313; 314; AVX2-LABEL: abd_minmax_v32i8: 315; AVX2: # %bb.0: 316; AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm2 317; AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 318; AVX2-NEXT: vpsubb %ymm2, %ymm0, %ymm0 319; AVX2-NEXT: retq 320; 321; AVX512-LABEL: abd_minmax_v32i8: 322; AVX512: # %bb.0: 323; AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm2 324; AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 325; AVX512-NEXT: vpsubb %ymm2, %ymm0, %ymm0 326; AVX512-NEXT: retq 327 %min = call <32 x i8> @llvm.smin.v32i8(<32 x i8> %a, <32 x i8> %b) 328 %max = call <32 x i8> @llvm.smax.v32i8(<32 x i8> %a, <32 x i8> %b) 329 %sub = sub <32 x i8> %max, %min 330 ret <32 x i8> %sub 331} 332 333define <16 x i16> @abd_minmax_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind { 334; AVX1-LABEL: abd_minmax_v16i16: 335; AVX1: # %bb.0: 336; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 337; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 338; AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm4 339; AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2 340; AVX1-NEXT: vpsubw %xmm4, %xmm2, %xmm2 341; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm3 342; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 343; AVX1-NEXT: vpsubw %xmm3, %xmm0, %xmm0 344; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 345; AVX1-NEXT: retq 346; 347; AVX2-LABEL: abd_minmax_v16i16: 348; AVX2: # %bb.0: 349; AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm2 350; AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 351; AVX2-NEXT: vpsubw %ymm2, %ymm0, %ymm0 352; AVX2-NEXT: retq 353; 354; AVX512-LABEL: abd_minmax_v16i16: 355; AVX512: # %bb.0: 356; AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm2 357; AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 358; AVX512-NEXT: vpsubw %ymm2, %ymm0, %ymm0 359; AVX512-NEXT: retq 360 %min = call <16 x i16> @llvm.smin.v16i16(<16 x i16> %a, <16 x i16> %b) 361 %max = call <16 x i16> @llvm.smax.v16i16(<16 x i16> %a, <16 x i16> %b) 362 %sub = sub <16 x i16> %max, %min 363 ret <16 x i16> %sub 364} 365 366define <8 x i32> @abd_minmax_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { 367; AVX1-LABEL: abd_minmax_v8i32: 368; AVX1: # %bb.0: 369; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 370; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 371; AVX1-NEXT: vpminsd %xmm2, %xmm3, %xmm4 372; AVX1-NEXT: vpmaxsd %xmm2, %xmm3, %xmm2 373; AVX1-NEXT: vpsubd %xmm4, %xmm2, %xmm2 374; AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm3 375; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 376; AVX1-NEXT: vpsubd %xmm3, %xmm0, %xmm0 377; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 378; AVX1-NEXT: retq 379; 380; AVX2-LABEL: abd_minmax_v8i32: 381; AVX2: # %bb.0: 382; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm2 383; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 384; AVX2-NEXT: vpsubd %ymm2, %ymm0, %ymm0 385; AVX2-NEXT: retq 386; 387; AVX512-LABEL: abd_minmax_v8i32: 388; AVX512: # %bb.0: 389; AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm2 390; AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 391; AVX512-NEXT: vpsubd %ymm2, %ymm0, %ymm0 392; AVX512-NEXT: retq 393 %min = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %a, <8 x i32> %b) 394 %max = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %a, <8 x i32> %b) 395 %sub = sub <8 x i32> %max, %min 396 ret <8 x i32> %sub 397} 398 399define <4 x i64> @abd_minmax_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind { 400; AVX1-LABEL: abd_minmax_v4i64: 401; AVX1: # %bb.0: 402; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 403; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 404; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm4 405; AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2 406; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2 407; AVX1-NEXT: vpsubq %xmm2, %xmm4, %xmm2 408; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3 409; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm0 410; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 411; AVX1-NEXT: vpsubq %xmm0, %xmm3, %xmm0 412; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 413; AVX1-NEXT: retq 414; 415; AVX2-LABEL: abd_minmax_v4i64: 416; AVX2: # %bb.0: 417; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 418; AVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm0 419; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 420; AVX2-NEXT: vpsubq %ymm0, %ymm2, %ymm0 421; AVX2-NEXT: retq 422; 423; AVX512-LABEL: abd_minmax_v4i64: 424; AVX512: # %bb.0: 425; AVX512-NEXT: vpminsq %ymm1, %ymm0, %ymm2 426; AVX512-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0 427; AVX512-NEXT: vpsubq %ymm2, %ymm0, %ymm0 428; AVX512-NEXT: retq 429 %min = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %a, <4 x i64> %b) 430 %max = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %a, <4 x i64> %b) 431 %sub = sub <4 x i64> %max, %min 432 ret <4 x i64> %sub 433} 434 435; 436; select(icmp(a,b),sub(a,b),sub(b,a)) -> abds(a,b) 437; 438 439define <32 x i8> @abd_cmp_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { 440; AVX1-LABEL: abd_cmp_v32i8: 441; AVX1: # %bb.0: 442; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 443; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 444; AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm4 445; AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2 446; AVX1-NEXT: vpsubb %xmm4, %xmm2, %xmm2 447; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm3 448; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 449; AVX1-NEXT: vpsubb %xmm3, %xmm0, %xmm0 450; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 451; AVX1-NEXT: retq 452; 453; AVX2-LABEL: abd_cmp_v32i8: 454; AVX2: # %bb.0: 455; AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm2 456; AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 457; AVX2-NEXT: vpsubb %ymm2, %ymm0, %ymm0 458; AVX2-NEXT: retq 459; 460; AVX512-LABEL: abd_cmp_v32i8: 461; AVX512: # %bb.0: 462; AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm2 463; AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 464; AVX512-NEXT: vpsubb %ymm2, %ymm0, %ymm0 465; AVX512-NEXT: retq 466 %cmp = icmp sgt <32 x i8> %a, %b 467 %ab = sub <32 x i8> %a, %b 468 %ba = sub <32 x i8> %b, %a 469 %sel = select <32 x i1> %cmp, <32 x i8> %ab, <32 x i8> %ba 470 ret <32 x i8> %sel 471} 472 473define <16 x i16> @abd_cmp_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind { 474; AVX1-LABEL: abd_cmp_v16i16: 475; AVX1: # %bb.0: 476; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 477; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 478; AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm4 479; AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2 480; AVX1-NEXT: vpsubw %xmm4, %xmm2, %xmm2 481; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm3 482; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 483; AVX1-NEXT: vpsubw %xmm3, %xmm0, %xmm0 484; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 485; AVX1-NEXT: retq 486; 487; AVX2-LABEL: abd_cmp_v16i16: 488; AVX2: # %bb.0: 489; AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm2 490; AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 491; AVX2-NEXT: vpsubw %ymm2, %ymm0, %ymm0 492; AVX2-NEXT: retq 493; 494; AVX512-LABEL: abd_cmp_v16i16: 495; AVX512: # %bb.0: 496; AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm2 497; AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 498; AVX512-NEXT: vpsubw %ymm2, %ymm0, %ymm0 499; AVX512-NEXT: retq 500 %cmp = icmp sge <16 x i16> %a, %b 501 %ab = sub <16 x i16> %a, %b 502 %ba = sub <16 x i16> %b, %a 503 %sel = select <16 x i1> %cmp, <16 x i16> %ab, <16 x i16> %ba 504 ret <16 x i16> %sel 505} 506 507define <8 x i32> @abd_cmp_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { 508; AVX1-LABEL: abd_cmp_v8i32: 509; AVX1: # %bb.0: 510; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 511; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 512; AVX1-NEXT: vpminsd %xmm2, %xmm3, %xmm4 513; AVX1-NEXT: vpmaxsd %xmm2, %xmm3, %xmm2 514; AVX1-NEXT: vpsubd %xmm4, %xmm2, %xmm2 515; AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm3 516; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 517; AVX1-NEXT: vpsubd %xmm3, %xmm0, %xmm0 518; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 519; AVX1-NEXT: retq 520; 521; AVX2-LABEL: abd_cmp_v8i32: 522; AVX2: # %bb.0: 523; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm2 524; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 525; AVX2-NEXT: vpsubd %ymm2, %ymm0, %ymm0 526; AVX2-NEXT: retq 527; 528; AVX512-LABEL: abd_cmp_v8i32: 529; AVX512: # %bb.0: 530; AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm2 531; AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 532; AVX512-NEXT: vpsubd %ymm2, %ymm0, %ymm0 533; AVX512-NEXT: retq 534 %cmp = icmp slt <8 x i32> %a, %b 535 %ab = sub <8 x i32> %a, %b 536 %ba = sub <8 x i32> %b, %a 537 %sel = select <8 x i1> %cmp, <8 x i32> %ba, <8 x i32> %ab 538 ret <8 x i32> %sel 539} 540 541define <4 x i64> @abd_cmp_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind { 542; AVX1-LABEL: abd_cmp_v4i64: 543; AVX1: # %bb.0: 544; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 545; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 546; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm4 547; AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2 548; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2 549; AVX1-NEXT: vpsubq %xmm2, %xmm4, %xmm2 550; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3 551; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm0 552; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 553; AVX1-NEXT: vpsubq %xmm0, %xmm3, %xmm0 554; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 555; AVX1-NEXT: retq 556; 557; AVX2-LABEL: abd_cmp_v4i64: 558; AVX2: # %bb.0: 559; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 560; AVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm0 561; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 562; AVX2-NEXT: vpsubq %ymm0, %ymm2, %ymm0 563; AVX2-NEXT: retq 564; 565; AVX512-LABEL: abd_cmp_v4i64: 566; AVX512: # %bb.0: 567; AVX512-NEXT: vpminsq %ymm1, %ymm0, %ymm2 568; AVX512-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0 569; AVX512-NEXT: vpsubq %ymm2, %ymm0, %ymm0 570; AVX512-NEXT: retq 571 %cmp = icmp sge <4 x i64> %a, %b 572 %ab = sub <4 x i64> %a, %b 573 %ba = sub <4 x i64> %b, %a 574 %sel = select <4 x i1> %cmp, <4 x i64> %ab, <4 x i64> %ba 575 ret <4 x i64> %sel 576} 577 578; 579; abs(sub_nsw(x, y)) -> abds(a,b) 580; 581 582define <32 x i8> @abd_subnsw_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { 583; AVX1-LABEL: abd_subnsw_v32i8: 584; AVX1: # %bb.0: 585; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 586; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 587; AVX1-NEXT: vpsubb %xmm2, %xmm3, %xmm2 588; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm0 589; AVX1-NEXT: vpabsb %xmm0, %xmm0 590; AVX1-NEXT: vpabsb %xmm2, %xmm1 591; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 592; AVX1-NEXT: retq 593; 594; AVX2-LABEL: abd_subnsw_v32i8: 595; AVX2: # %bb.0: 596; AVX2-NEXT: vpsubb %ymm1, %ymm0, %ymm0 597; AVX2-NEXT: vpabsb %ymm0, %ymm0 598; AVX2-NEXT: retq 599; 600; AVX512-LABEL: abd_subnsw_v32i8: 601; AVX512: # %bb.0: 602; AVX512-NEXT: vpsubb %ymm1, %ymm0, %ymm0 603; AVX512-NEXT: vpabsb %ymm0, %ymm0 604; AVX512-NEXT: retq 605 %sub = sub nsw <32 x i8> %a, %b 606 %abs = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %sub, i1 false) 607 ret <32 x i8> %abs 608} 609 610define <16 x i16> @abd_subnsw_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind { 611; AVX1-LABEL: abd_subnsw_v16i16: 612; AVX1: # %bb.0: 613; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 614; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 615; AVX1-NEXT: vpsubw %xmm2, %xmm3, %xmm2 616; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm0 617; AVX1-NEXT: vpabsw %xmm0, %xmm0 618; AVX1-NEXT: vpabsw %xmm2, %xmm1 619; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 620; AVX1-NEXT: retq 621; 622; AVX2-LABEL: abd_subnsw_v16i16: 623; AVX2: # %bb.0: 624; AVX2-NEXT: vpsubw %ymm1, %ymm0, %ymm0 625; AVX2-NEXT: vpabsw %ymm0, %ymm0 626; AVX2-NEXT: retq 627; 628; AVX512-LABEL: abd_subnsw_v16i16: 629; AVX512: # %bb.0: 630; AVX512-NEXT: vpsubw %ymm1, %ymm0, %ymm0 631; AVX512-NEXT: vpabsw %ymm0, %ymm0 632; AVX512-NEXT: retq 633 %sub = sub nsw <16 x i16> %a, %b 634 %abs = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %sub, i1 false) 635 ret <16 x i16> %abs 636} 637 638define <8 x i32> @abd_subnsw_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { 639; AVX1-LABEL: abd_subnsw_v8i32: 640; AVX1: # %bb.0: 641; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 642; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 643; AVX1-NEXT: vpsubd %xmm2, %xmm3, %xmm2 644; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 645; AVX1-NEXT: vpabsd %xmm0, %xmm0 646; AVX1-NEXT: vpabsd %xmm2, %xmm1 647; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 648; AVX1-NEXT: retq 649; 650; AVX2-LABEL: abd_subnsw_v8i32: 651; AVX2: # %bb.0: 652; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm0 653; AVX2-NEXT: vpabsd %ymm0, %ymm0 654; AVX2-NEXT: retq 655; 656; AVX512-LABEL: abd_subnsw_v8i32: 657; AVX512: # %bb.0: 658; AVX512-NEXT: vpsubd %ymm1, %ymm0, %ymm0 659; AVX512-NEXT: vpabsd %ymm0, %ymm0 660; AVX512-NEXT: retq 661 %sub = sub nsw <8 x i32> %a, %b 662 %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 false) 663 ret <8 x i32> %abs 664} 665 666define <4 x i64> @abd_subnsw_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind { 667; AVX1-LABEL: abd_subnsw_v4i64: 668; AVX1: # %bb.0: 669; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 670; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 671; AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2 672; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm0 673; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm1 674; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 675; AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2 676; AVX1-NEXT: vpsubq %xmm0, %xmm3, %xmm0 677; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 678; AVX1-NEXT: vblendvpd %ymm1, %ymm0, %ymm1, %ymm0 679; AVX1-NEXT: retq 680; 681; AVX2-LABEL: abd_subnsw_v4i64: 682; AVX2: # %bb.0: 683; AVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm0 684; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 685; AVX2-NEXT: vpsubq %ymm0, %ymm1, %ymm1 686; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm0 687; AVX2-NEXT: retq 688; 689; AVX512-LABEL: abd_subnsw_v4i64: 690; AVX512: # %bb.0: 691; AVX512-NEXT: vpsubq %ymm1, %ymm0, %ymm0 692; AVX512-NEXT: vpabsq %ymm0, %ymm0 693; AVX512-NEXT: retq 694 %sub = sub nsw <4 x i64> %a, %b 695 %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %sub, i1 false) 696 ret <4 x i64> %abs 697} 698 699declare <32 x i8> @llvm.abs.v32i8(<32 x i8>, i1) 700declare <16 x i16> @llvm.abs.v16i16(<16 x i16>, i1) 701declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1) 702declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1) 703declare <8 x i64> @llvm.abs.v8i64(<8 x i64>, i1) 704declare <16 x i64> @llvm.abs.v16i64(<16 x i64>, i1) 705declare <32 x i64> @llvm.abs.v32i64(<32 x i64>, i1) 706declare <4 x i128> @llvm.abs.v4i128(<4 x i128>, i1) 707 708declare <32 x i8> @llvm.smax.v32i8(<32 x i8>, <32 x i8>) 709declare <16 x i16> @llvm.smax.v16i16(<16 x i16>, <16 x i16>) 710declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>) 711declare <4 x i64> @llvm.smax.v4i64(<4 x i64>, <4 x i64>) 712 713declare <32 x i8> @llvm.smin.v32i8(<32 x i8>, <32 x i8>) 714declare <16 x i16> @llvm.smin.v16i16(<16 x i16>, <16 x i16>) 715declare <8 x i32> @llvm.smin.v8i32(<8 x i32>, <8 x i32>) 716declare <4 x i64> @llvm.smin.v4i64(<4 x i64>, <4 x i64>) 717