1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx | FileCheck %s --check-prefixes=AVX1 3; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 4; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX512 5 6; 7; trunc(abs(sub(zext(a),zext(b)))) -> abdu(a,b) 8; 9 10define <32 x i8> @abd_ext_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { 11; AVX1-LABEL: abd_ext_v32i8: 12; AVX1: # %bb.0: 13; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 14; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 15; AVX1-NEXT: vpminub %xmm2, %xmm3, %xmm4 16; AVX1-NEXT: vpmaxub %xmm2, %xmm3, %xmm2 17; AVX1-NEXT: vpsubb %xmm4, %xmm2, %xmm2 18; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm3 19; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 20; AVX1-NEXT: vpsubb %xmm3, %xmm0, %xmm0 21; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 22; AVX1-NEXT: retq 23; 24; AVX2-LABEL: abd_ext_v32i8: 25; AVX2: # %bb.0: 26; AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm2 27; AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 28; AVX2-NEXT: vpsubb %ymm2, %ymm0, %ymm0 29; AVX2-NEXT: retq 30; 31; AVX512-LABEL: abd_ext_v32i8: 32; AVX512: # %bb.0: 33; AVX512-NEXT: vpminub %ymm1, %ymm0, %ymm2 34; AVX512-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 35; AVX512-NEXT: vpsubb %ymm2, %ymm0, %ymm0 36; AVX512-NEXT: retq 37 %aext = zext <32 x i8> %a to <32 x i64> 38 %bext = zext <32 x i8> %b to <32 x i64> 39 %sub = sub <32 x i64> %aext, %bext 40 %abs = call <32 x i64> @llvm.abs.v32i64(<32 x i64> %sub, i1 false) 41 %trunc = trunc <32 x i64> %abs to <32 x i8> 42 ret <32 x i8> %trunc 43} 44 45define <32 x i8> @abd_ext_v32i8_undef(<32 x i8> %a, <32 x i8> %b) nounwind { 46; AVX1-LABEL: abd_ext_v32i8_undef: 47; AVX1: # %bb.0: 48; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 49; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 50; AVX1-NEXT: vpminub %xmm2, %xmm3, %xmm4 51; AVX1-NEXT: vpmaxub %xmm2, %xmm3, %xmm2 52; AVX1-NEXT: vpsubb %xmm4, %xmm2, %xmm2 53; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm3 54; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 55; AVX1-NEXT: vpsubb %xmm3, %xmm0, %xmm0 56; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 57; AVX1-NEXT: retq 58; 59; AVX2-LABEL: abd_ext_v32i8_undef: 60; AVX2: # %bb.0: 61; AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm2 62; AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 63; AVX2-NEXT: vpsubb %ymm2, %ymm0, %ymm0 64; AVX2-NEXT: retq 65; 66; AVX512-LABEL: abd_ext_v32i8_undef: 67; AVX512: # %bb.0: 68; AVX512-NEXT: vpminub %ymm1, %ymm0, %ymm2 69; AVX512-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 70; AVX512-NEXT: vpsubb %ymm2, %ymm0, %ymm0 71; AVX512-NEXT: retq 72 %aext = zext <32 x i8> %a to <32 x i64> 73 %bext = zext <32 x i8> %b to <32 x i64> 74 %sub = sub <32 x i64> %aext, %bext 75 %abs = call <32 x i64> @llvm.abs.v32i64(<32 x i64> %sub, i1 true) 76 %trunc = trunc <32 x i64> %abs to <32 x i8> 77 ret <32 x i8> %trunc 78} 79 80define <16 x i16> @abd_ext_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind { 81; AVX1-LABEL: abd_ext_v16i16: 82; AVX1: # %bb.0: 83; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 84; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 85; AVX1-NEXT: vpminuw %xmm2, %xmm3, %xmm4 86; AVX1-NEXT: vpmaxuw %xmm2, %xmm3, %xmm2 87; AVX1-NEXT: vpsubw %xmm4, %xmm2, %xmm2 88; AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm3 89; AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 90; AVX1-NEXT: vpsubw %xmm3, %xmm0, %xmm0 91; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 92; AVX1-NEXT: retq 93; 94; AVX2-LABEL: abd_ext_v16i16: 95; AVX2: # %bb.0: 96; AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm2 97; AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 98; AVX2-NEXT: vpsubw %ymm2, %ymm0, %ymm0 99; AVX2-NEXT: retq 100; 101; AVX512-LABEL: abd_ext_v16i16: 102; AVX512: # %bb.0: 103; AVX512-NEXT: vpminuw %ymm1, %ymm0, %ymm2 104; AVX512-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 105; AVX512-NEXT: vpsubw %ymm2, %ymm0, %ymm0 106; AVX512-NEXT: retq 107 %aext = zext <16 x i16> %a to <16 x i64> 108 %bext = zext <16 x i16> %b to <16 x i64> 109 %sub = sub <16 x i64> %aext, %bext 110 %abs = call <16 x i64> @llvm.abs.v16i64(<16 x i64> %sub, i1 false) 111 %trunc = trunc <16 x i64> %abs to <16 x i16> 112 ret <16 x i16> %trunc 113} 114 115define <16 x i16> @abd_ext_v16i16_undef(<16 x i16> %a, <16 x i16> %b) nounwind { 116; AVX1-LABEL: abd_ext_v16i16_undef: 117; AVX1: # %bb.0: 118; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 119; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 120; AVX1-NEXT: vpminuw %xmm2, %xmm3, %xmm4 121; AVX1-NEXT: vpmaxuw %xmm2, %xmm3, %xmm2 122; AVX1-NEXT: vpsubw %xmm4, %xmm2, %xmm2 123; AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm3 124; AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 125; AVX1-NEXT: vpsubw %xmm3, %xmm0, %xmm0 126; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 127; AVX1-NEXT: retq 128; 129; AVX2-LABEL: abd_ext_v16i16_undef: 130; AVX2: # %bb.0: 131; AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm2 132; AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 133; AVX2-NEXT: vpsubw %ymm2, %ymm0, %ymm0 134; AVX2-NEXT: retq 135; 136; AVX512-LABEL: abd_ext_v16i16_undef: 137; AVX512: # %bb.0: 138; AVX512-NEXT: vpminuw %ymm1, %ymm0, %ymm2 139; AVX512-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 140; AVX512-NEXT: vpsubw %ymm2, %ymm0, %ymm0 141; AVX512-NEXT: retq 142 %aext = zext <16 x i16> %a to <16 x i64> 143 %bext = zext <16 x i16> %b to <16 x i64> 144 %sub = sub <16 x i64> %aext, %bext 145 %abs = call <16 x i64> @llvm.abs.v16i64(<16 x i64> %sub, i1 true) 146 %trunc = trunc <16 x i64> %abs to <16 x i16> 147 ret <16 x i16> %trunc 148} 149 150define <8 x i32> @abd_ext_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { 151; AVX1-LABEL: abd_ext_v8i32: 152; AVX1: # %bb.0: 153; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 154; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 155; AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm4 156; AVX1-NEXT: vpmaxud %xmm2, %xmm3, %xmm2 157; AVX1-NEXT: vpsubd %xmm4, %xmm2, %xmm2 158; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm3 159; AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 160; AVX1-NEXT: vpsubd %xmm3, %xmm0, %xmm0 161; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 162; AVX1-NEXT: retq 163; 164; AVX2-LABEL: abd_ext_v8i32: 165; AVX2: # %bb.0: 166; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm2 167; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 168; AVX2-NEXT: vpsubd %ymm2, %ymm0, %ymm0 169; AVX2-NEXT: retq 170; 171; AVX512-LABEL: abd_ext_v8i32: 172; AVX512: # %bb.0: 173; AVX512-NEXT: vpminud %ymm1, %ymm0, %ymm2 174; AVX512-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 175; AVX512-NEXT: vpsubd %ymm2, %ymm0, %ymm0 176; AVX512-NEXT: retq 177 %aext = zext <8 x i32> %a to <8 x i64> 178 %bext = zext <8 x i32> %b to <8 x i64> 179 %sub = sub <8 x i64> %aext, %bext 180 %abs = call <8 x i64> @llvm.abs.v8i64(<8 x i64> %sub, i1 false) 181 %trunc = trunc <8 x i64> %abs to <8 x i32> 182 ret <8 x i32> %trunc 183} 184 185define <8 x i32> @abd_ext_v8i32_undef(<8 x i32> %a, <8 x i32> %b) nounwind { 186; AVX1-LABEL: abd_ext_v8i32_undef: 187; AVX1: # %bb.0: 188; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 189; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 190; AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm4 191; AVX1-NEXT: vpmaxud %xmm2, %xmm3, %xmm2 192; AVX1-NEXT: vpsubd %xmm4, %xmm2, %xmm2 193; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm3 194; AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 195; AVX1-NEXT: vpsubd %xmm3, %xmm0, %xmm0 196; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 197; AVX1-NEXT: retq 198; 199; AVX2-LABEL: abd_ext_v8i32_undef: 200; AVX2: # %bb.0: 201; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm2 202; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 203; AVX2-NEXT: vpsubd %ymm2, %ymm0, %ymm0 204; AVX2-NEXT: retq 205; 206; AVX512-LABEL: abd_ext_v8i32_undef: 207; AVX512: # %bb.0: 208; AVX512-NEXT: vpminud %ymm1, %ymm0, %ymm2 209; AVX512-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 210; AVX512-NEXT: vpsubd %ymm2, %ymm0, %ymm0 211; AVX512-NEXT: retq 212 %aext = zext <8 x i32> %a to <8 x i64> 213 %bext = zext <8 x i32> %b to <8 x i64> 214 %sub = sub <8 x i64> %aext, %bext 215 %abs = call <8 x i64> @llvm.abs.v8i64(<8 x i64> %sub, i1 true) 216 %trunc = trunc <8 x i64> %abs to <8 x i32> 217 ret <8 x i32> %trunc 218} 219 220define <4 x i64> @abd_ext_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind { 221; AVX1-LABEL: abd_ext_v4i64: 222; AVX1: # %bb.0: 223; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 224; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] 225; AVX1-NEXT: # xmm3 = mem[0,0] 226; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm4 227; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 228; AVX1-NEXT: vpxor %xmm3, %xmm5, %xmm6 229; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4 230; AVX1-NEXT: vpsubq %xmm2, %xmm5, %xmm2 231; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2 232; AVX1-NEXT: vpsubq %xmm2, %xmm4, %xmm2 233; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm4 234; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm3 235; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3 236; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm0 237; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 238; AVX1-NEXT: vpsubq %xmm0, %xmm3, %xmm0 239; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 240; AVX1-NEXT: retq 241; 242; AVX2-LABEL: abd_ext_v4i64: 243; AVX2: # %bb.0: 244; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] 245; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm3 246; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2 247; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 248; AVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm0 249; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 250; AVX2-NEXT: vpsubq %ymm0, %ymm2, %ymm0 251; AVX2-NEXT: retq 252; 253; AVX512-LABEL: abd_ext_v4i64: 254; AVX512: # %bb.0: 255; AVX512-NEXT: vpminuq %ymm1, %ymm0, %ymm2 256; AVX512-NEXT: vpmaxuq %ymm1, %ymm0, %ymm0 257; AVX512-NEXT: vpsubq %ymm2, %ymm0, %ymm0 258; AVX512-NEXT: retq 259 %aext = zext <4 x i64> %a to <4 x i128> 260 %bext = zext <4 x i64> %b to <4 x i128> 261 %sub = sub <4 x i128> %aext, %bext 262 %abs = call <4 x i128> @llvm.abs.v4i128(<4 x i128> %sub, i1 false) 263 %trunc = trunc <4 x i128> %abs to <4 x i64> 264 ret <4 x i64> %trunc 265} 266 267define <4 x i64> @abd_ext_v4i64_undef(<4 x i64> %a, <4 x i64> %b) nounwind { 268; AVX1-LABEL: abd_ext_v4i64_undef: 269; AVX1: # %bb.0: 270; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 271; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] 272; AVX1-NEXT: # xmm3 = mem[0,0] 273; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm4 274; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 275; AVX1-NEXT: vpxor %xmm3, %xmm5, %xmm6 276; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4 277; AVX1-NEXT: vpsubq %xmm2, %xmm5, %xmm2 278; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2 279; AVX1-NEXT: vpsubq %xmm2, %xmm4, %xmm2 280; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm4 281; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm3 282; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3 283; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm0 284; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 285; AVX1-NEXT: vpsubq %xmm0, %xmm3, %xmm0 286; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 287; AVX1-NEXT: retq 288; 289; AVX2-LABEL: abd_ext_v4i64_undef: 290; AVX2: # %bb.0: 291; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] 292; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm3 293; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2 294; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 295; AVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm0 296; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 297; AVX2-NEXT: vpsubq %ymm0, %ymm2, %ymm0 298; AVX2-NEXT: retq 299; 300; AVX512-LABEL: abd_ext_v4i64_undef: 301; AVX512: # %bb.0: 302; AVX512-NEXT: vpminuq %ymm1, %ymm0, %ymm2 303; AVX512-NEXT: vpmaxuq %ymm1, %ymm0, %ymm0 304; AVX512-NEXT: vpsubq %ymm2, %ymm0, %ymm0 305; AVX512-NEXT: retq 306 %aext = zext <4 x i64> %a to <4 x i128> 307 %bext = zext <4 x i64> %b to <4 x i128> 308 %sub = sub <4 x i128> %aext, %bext 309 %abs = call <4 x i128> @llvm.abs.v4i128(<4 x i128> %sub, i1 true) 310 %trunc = trunc <4 x i128> %abs to <4 x i64> 311 ret <4 x i64> %trunc 312} 313 314; 315; sub(umax(a,b),umin(a,b)) -> abdu(a,b) 316; 317 318define <32 x i8> @abd_minmax_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { 319; AVX1-LABEL: abd_minmax_v32i8: 320; AVX1: # %bb.0: 321; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 322; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 323; AVX1-NEXT: vpminub %xmm2, %xmm3, %xmm4 324; AVX1-NEXT: vpmaxub %xmm2, %xmm3, %xmm2 325; AVX1-NEXT: vpsubb %xmm4, %xmm2, %xmm2 326; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm3 327; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 328; AVX1-NEXT: vpsubb %xmm3, %xmm0, %xmm0 329; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 330; AVX1-NEXT: retq 331; 332; AVX2-LABEL: abd_minmax_v32i8: 333; AVX2: # %bb.0: 334; AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm2 335; AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 336; AVX2-NEXT: vpsubb %ymm2, %ymm0, %ymm0 337; AVX2-NEXT: retq 338; 339; AVX512-LABEL: abd_minmax_v32i8: 340; AVX512: # %bb.0: 341; AVX512-NEXT: vpminub %ymm1, %ymm0, %ymm2 342; AVX512-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 343; AVX512-NEXT: vpsubb %ymm2, %ymm0, %ymm0 344; AVX512-NEXT: retq 345 %min = call <32 x i8> @llvm.umin.v32i8(<32 x i8> %a, <32 x i8> %b) 346 %max = call <32 x i8> @llvm.umax.v32i8(<32 x i8> %a, <32 x i8> %b) 347 %sub = sub <32 x i8> %max, %min 348 ret <32 x i8> %sub 349} 350 351define <16 x i16> @abd_minmax_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind { 352; AVX1-LABEL: abd_minmax_v16i16: 353; AVX1: # %bb.0: 354; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 355; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 356; AVX1-NEXT: vpminuw %xmm2, %xmm3, %xmm4 357; AVX1-NEXT: vpmaxuw %xmm2, %xmm3, %xmm2 358; AVX1-NEXT: vpsubw %xmm4, %xmm2, %xmm2 359; AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm3 360; AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 361; AVX1-NEXT: vpsubw %xmm3, %xmm0, %xmm0 362; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 363; AVX1-NEXT: retq 364; 365; AVX2-LABEL: abd_minmax_v16i16: 366; AVX2: # %bb.0: 367; AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm2 368; AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 369; AVX2-NEXT: vpsubw %ymm2, %ymm0, %ymm0 370; AVX2-NEXT: retq 371; 372; AVX512-LABEL: abd_minmax_v16i16: 373; AVX512: # %bb.0: 374; AVX512-NEXT: vpminuw %ymm1, %ymm0, %ymm2 375; AVX512-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 376; AVX512-NEXT: vpsubw %ymm2, %ymm0, %ymm0 377; AVX512-NEXT: retq 378 %min = call <16 x i16> @llvm.umin.v16i16(<16 x i16> %a, <16 x i16> %b) 379 %max = call <16 x i16> @llvm.umax.v16i16(<16 x i16> %a, <16 x i16> %b) 380 %sub = sub <16 x i16> %max, %min 381 ret <16 x i16> %sub 382} 383 384define <8 x i32> @abd_minmax_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { 385; AVX1-LABEL: abd_minmax_v8i32: 386; AVX1: # %bb.0: 387; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 388; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 389; AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm4 390; AVX1-NEXT: vpmaxud %xmm2, %xmm3, %xmm2 391; AVX1-NEXT: vpsubd %xmm4, %xmm2, %xmm2 392; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm3 393; AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 394; AVX1-NEXT: vpsubd %xmm3, %xmm0, %xmm0 395; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 396; AVX1-NEXT: retq 397; 398; AVX2-LABEL: abd_minmax_v8i32: 399; AVX2: # %bb.0: 400; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm2 401; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 402; AVX2-NEXT: vpsubd %ymm2, %ymm0, %ymm0 403; AVX2-NEXT: retq 404; 405; AVX512-LABEL: abd_minmax_v8i32: 406; AVX512: # %bb.0: 407; AVX512-NEXT: vpminud %ymm1, %ymm0, %ymm2 408; AVX512-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 409; AVX512-NEXT: vpsubd %ymm2, %ymm0, %ymm0 410; AVX512-NEXT: retq 411 %min = call <8 x i32> @llvm.umin.v8i32(<8 x i32> %a, <8 x i32> %b) 412 %max = call <8 x i32> @llvm.umax.v8i32(<8 x i32> %a, <8 x i32> %b) 413 %sub = sub <8 x i32> %max, %min 414 ret <8 x i32> %sub 415} 416 417define <4 x i64> @abd_minmax_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind { 418; AVX1-LABEL: abd_minmax_v4i64: 419; AVX1: # %bb.0: 420; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 421; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] 422; AVX1-NEXT: # xmm3 = mem[0,0] 423; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm4 424; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 425; AVX1-NEXT: vpxor %xmm3, %xmm5, %xmm6 426; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4 427; AVX1-NEXT: vpsubq %xmm2, %xmm5, %xmm2 428; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2 429; AVX1-NEXT: vpsubq %xmm2, %xmm4, %xmm2 430; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm4 431; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm3 432; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3 433; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm0 434; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 435; AVX1-NEXT: vpsubq %xmm0, %xmm3, %xmm0 436; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 437; AVX1-NEXT: retq 438; 439; AVX2-LABEL: abd_minmax_v4i64: 440; AVX2: # %bb.0: 441; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] 442; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm3 443; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2 444; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 445; AVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm0 446; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 447; AVX2-NEXT: vpsubq %ymm0, %ymm2, %ymm0 448; AVX2-NEXT: retq 449; 450; AVX512-LABEL: abd_minmax_v4i64: 451; AVX512: # %bb.0: 452; AVX512-NEXT: vpminuq %ymm1, %ymm0, %ymm2 453; AVX512-NEXT: vpmaxuq %ymm1, %ymm0, %ymm0 454; AVX512-NEXT: vpsubq %ymm2, %ymm0, %ymm0 455; AVX512-NEXT: retq 456 %min = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %a, <4 x i64> %b) 457 %max = call <4 x i64> @llvm.umax.v4i64(<4 x i64> %a, <4 x i64> %b) 458 %sub = sub <4 x i64> %max, %min 459 ret <4 x i64> %sub 460} 461 462; 463; select(icmp(a,b),sub(a,b),sub(b,a)) -> abdu(a,b) 464; 465 466define <32 x i8> @abd_cmp_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { 467; AVX1-LABEL: abd_cmp_v32i8: 468; AVX1: # %bb.0: 469; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 470; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 471; AVX1-NEXT: vpminub %xmm2, %xmm3, %xmm4 472; AVX1-NEXT: vpmaxub %xmm2, %xmm3, %xmm2 473; AVX1-NEXT: vpsubb %xmm4, %xmm2, %xmm2 474; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm3 475; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 476; AVX1-NEXT: vpsubb %xmm3, %xmm0, %xmm0 477; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 478; AVX1-NEXT: retq 479; 480; AVX2-LABEL: abd_cmp_v32i8: 481; AVX2: # %bb.0: 482; AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm2 483; AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 484; AVX2-NEXT: vpsubb %ymm2, %ymm0, %ymm0 485; AVX2-NEXT: retq 486; 487; AVX512-LABEL: abd_cmp_v32i8: 488; AVX512: # %bb.0: 489; AVX512-NEXT: vpminub %ymm1, %ymm0, %ymm2 490; AVX512-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 491; AVX512-NEXT: vpsubb %ymm2, %ymm0, %ymm0 492; AVX512-NEXT: retq 493 %cmp = icmp ugt <32 x i8> %a, %b 494 %ab = sub <32 x i8> %a, %b 495 %ba = sub <32 x i8> %b, %a 496 %sel = select <32 x i1> %cmp, <32 x i8> %ab, <32 x i8> %ba 497 ret <32 x i8> %sel 498} 499 500define <16 x i16> @abd_cmp_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind { 501; AVX1-LABEL: abd_cmp_v16i16: 502; AVX1: # %bb.0: 503; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 504; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 505; AVX1-NEXT: vpminuw %xmm2, %xmm3, %xmm4 506; AVX1-NEXT: vpmaxuw %xmm2, %xmm3, %xmm2 507; AVX1-NEXT: vpsubw %xmm4, %xmm2, %xmm2 508; AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm3 509; AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 510; AVX1-NEXT: vpsubw %xmm3, %xmm0, %xmm0 511; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 512; AVX1-NEXT: retq 513; 514; AVX2-LABEL: abd_cmp_v16i16: 515; AVX2: # %bb.0: 516; AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm2 517; AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 518; AVX2-NEXT: vpsubw %ymm2, %ymm0, %ymm0 519; AVX2-NEXT: retq 520; 521; AVX512-LABEL: abd_cmp_v16i16: 522; AVX512: # %bb.0: 523; AVX512-NEXT: vpminuw %ymm1, %ymm0, %ymm2 524; AVX512-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 525; AVX512-NEXT: vpsubw %ymm2, %ymm0, %ymm0 526; AVX512-NEXT: retq 527 %cmp = icmp uge <16 x i16> %a, %b 528 %ab = sub <16 x i16> %a, %b 529 %ba = sub <16 x i16> %b, %a 530 %sel = select <16 x i1> %cmp, <16 x i16> %ab, <16 x i16> %ba 531 ret <16 x i16> %sel 532} 533 534define <8 x i32> @abd_cmp_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { 535; AVX1-LABEL: abd_cmp_v8i32: 536; AVX1: # %bb.0: 537; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 538; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 539; AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm4 540; AVX1-NEXT: vpmaxud %xmm2, %xmm3, %xmm2 541; AVX1-NEXT: vpsubd %xmm4, %xmm2, %xmm2 542; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm3 543; AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 544; AVX1-NEXT: vpsubd %xmm3, %xmm0, %xmm0 545; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 546; AVX1-NEXT: retq 547; 548; AVX2-LABEL: abd_cmp_v8i32: 549; AVX2: # %bb.0: 550; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm2 551; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 552; AVX2-NEXT: vpsubd %ymm2, %ymm0, %ymm0 553; AVX2-NEXT: retq 554; 555; AVX512-LABEL: abd_cmp_v8i32: 556; AVX512: # %bb.0: 557; AVX512-NEXT: vpminud %ymm1, %ymm0, %ymm2 558; AVX512-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 559; AVX512-NEXT: vpsubd %ymm2, %ymm0, %ymm0 560; AVX512-NEXT: retq 561 %cmp = icmp ult <8 x i32> %a, %b 562 %ab = sub <8 x i32> %a, %b 563 %ba = sub <8 x i32> %b, %a 564 %sel = select <8 x i1> %cmp, <8 x i32> %ba, <8 x i32> %ab 565 ret <8 x i32> %sel 566} 567 568define <4 x i64> @abd_cmp_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind { 569; AVX1-LABEL: abd_cmp_v4i64: 570; AVX1: # %bb.0: 571; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 572; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] 573; AVX1-NEXT: # xmm3 = mem[0,0] 574; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm4 575; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 576; AVX1-NEXT: vpxor %xmm3, %xmm5, %xmm6 577; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4 578; AVX1-NEXT: vpsubq %xmm2, %xmm5, %xmm2 579; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2 580; AVX1-NEXT: vpsubq %xmm2, %xmm4, %xmm2 581; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm4 582; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm3 583; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3 584; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm0 585; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 586; AVX1-NEXT: vpsubq %xmm0, %xmm3, %xmm0 587; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 588; AVX1-NEXT: retq 589; 590; AVX2-LABEL: abd_cmp_v4i64: 591; AVX2: # %bb.0: 592; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] 593; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm3 594; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2 595; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 596; AVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm0 597; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 598; AVX2-NEXT: vpsubq %ymm0, %ymm2, %ymm0 599; AVX2-NEXT: retq 600; 601; AVX512-LABEL: abd_cmp_v4i64: 602; AVX512: # %bb.0: 603; AVX512-NEXT: vpminuq %ymm1, %ymm0, %ymm2 604; AVX512-NEXT: vpmaxuq %ymm1, %ymm0, %ymm0 605; AVX512-NEXT: vpsubq %ymm2, %ymm0, %ymm0 606; AVX512-NEXT: retq 607 %cmp = icmp uge <4 x i64> %a, %b 608 %ab = sub <4 x i64> %a, %b 609 %ba = sub <4 x i64> %b, %a 610 %sel = select <4 x i1> %cmp, <4 x i64> %ab, <4 x i64> %ba 611 ret <4 x i64> %sel 612} 613 614declare <32 x i8> @llvm.abs.v32i8(<32 x i8>, i1) 615declare <16 x i16> @llvm.abs.v16i16(<16 x i16>, i1) 616declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1) 617declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1) 618declare <8 x i64> @llvm.abs.v8i64(<8 x i64>, i1) 619declare <16 x i64> @llvm.abs.v16i64(<16 x i64>, i1) 620declare <32 x i64> @llvm.abs.v32i64(<32 x i64>, i1) 621declare <4 x i128> @llvm.abs.v4i128(<4 x i128>, i1) 622 623declare <32 x i8> @llvm.umax.v32i8(<32 x i8>, <32 x i8>) 624declare <16 x i16> @llvm.umax.v16i16(<16 x i16>, <16 x i16>) 625declare <8 x i32> @llvm.umax.v8i32(<8 x i32>, <8 x i32>) 626declare <4 x i64> @llvm.umax.v4i64(<4 x i64>, <4 x i64>) 627 628declare <32 x i8> @llvm.umin.v32i8(<32 x i8>, <32 x i8>) 629declare <16 x i16> @llvm.umin.v16i16(<16 x i16>, <16 x i16>) 630declare <8 x i32> @llvm.umin.v8i32(<8 x i32>, <8 x i32>) 631declare <4 x i64> @llvm.umin.v4i64(<4 x i64>, <4 x i64>) 632