1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefixes=SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42 4; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 5; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 6; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512 7 8; 9; trunc(abs(sub(sext(a),sext(b)))) -> abds(a,b) 10; 11 12define <16 x i8> @abd_ext_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { 13; SSE2-LABEL: abd_ext_v16i8: 14; SSE2: # %bb.0: 15; SSE2-NEXT: movdqa %xmm0, %xmm2 16; SSE2-NEXT: pcmpgtb %xmm1, %xmm2 17; SSE2-NEXT: psubb %xmm1, %xmm0 18; SSE2-NEXT: pxor %xmm2, %xmm0 19; SSE2-NEXT: psubb %xmm0, %xmm2 20; SSE2-NEXT: movdqa %xmm2, %xmm0 21; SSE2-NEXT: retq 22; 23; SSE42-LABEL: abd_ext_v16i8: 24; SSE42: # %bb.0: 25; SSE42-NEXT: movdqa %xmm0, %xmm2 26; SSE42-NEXT: pminsb %xmm1, %xmm2 27; SSE42-NEXT: pmaxsb %xmm1, %xmm0 28; SSE42-NEXT: psubb %xmm2, %xmm0 29; SSE42-NEXT: retq 30; 31; AVX-LABEL: abd_ext_v16i8: 32; AVX: # %bb.0: 33; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm2 34; AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 35; AVX-NEXT: vpsubb %xmm2, %xmm0, %xmm0 36; AVX-NEXT: retq 37 %aext = sext <16 x i8> %a to <16 x i64> 38 %bext = sext <16 x i8> %b to <16 x i64> 39 %sub = sub <16 x i64> %aext, %bext 40 %abs = call <16 x i64> @llvm.abs.v16i64(<16 x i64> %sub, i1 false) 41 %trunc = trunc <16 x i64> %abs to <16 x i8> 42 ret <16 x i8> %trunc 43} 44 45define <16 x i8> @abd_ext_v16i8_undef(<16 x i8> %a, <16 x i8> %b) nounwind { 46; SSE2-LABEL: abd_ext_v16i8_undef: 47; SSE2: # %bb.0: 48; SSE2-NEXT: movdqa %xmm0, %xmm2 49; SSE2-NEXT: pcmpgtb %xmm1, %xmm2 50; SSE2-NEXT: psubb %xmm1, %xmm0 51; SSE2-NEXT: pxor %xmm2, %xmm0 52; SSE2-NEXT: psubb %xmm0, %xmm2 53; SSE2-NEXT: movdqa %xmm2, %xmm0 54; SSE2-NEXT: retq 55; 56; SSE42-LABEL: abd_ext_v16i8_undef: 57; SSE42: # %bb.0: 58; SSE42-NEXT: movdqa %xmm0, %xmm2 59; SSE42-NEXT: pminsb %xmm1, %xmm2 60; SSE42-NEXT: pmaxsb %xmm1, %xmm0 61; SSE42-NEXT: psubb %xmm2, %xmm0 62; SSE42-NEXT: retq 63; 64; AVX-LABEL: abd_ext_v16i8_undef: 65; AVX: # %bb.0: 66; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm2 67; AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 68; AVX-NEXT: vpsubb %xmm2, %xmm0, %xmm0 69; AVX-NEXT: retq 70 %aext = sext <16 x i8> %a to <16 x i64> 71 %bext = sext <16 x i8> %b to <16 x i64> 72 %sub = sub <16 x i64> %aext, %bext 73 %abs = call <16 x i64> @llvm.abs.v16i64(<16 x i64> %sub, i1 true) 74 %trunc = trunc <16 x i64> %abs to <16 x i8> 75 ret <16 x i8> %trunc 76} 77 78define <8 x i16> @abd_ext_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { 79; SSE-LABEL: abd_ext_v8i16: 80; SSE: # %bb.0: 81; SSE-NEXT: movdqa %xmm0, %xmm2 82; SSE-NEXT: pminsw %xmm1, %xmm2 83; SSE-NEXT: pmaxsw %xmm1, %xmm0 84; SSE-NEXT: psubw %xmm2, %xmm0 85; SSE-NEXT: retq 86; 87; AVX-LABEL: abd_ext_v8i16: 88; AVX: # %bb.0: 89; AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm2 90; AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 91; AVX-NEXT: vpsubw %xmm2, %xmm0, %xmm0 92; AVX-NEXT: retq 93 %aext = sext <8 x i16> %a to <8 x i64> 94 %bext = sext <8 x i16> %b to <8 x i64> 95 %sub = sub <8 x i64> %aext, %bext 96 %abs = call <8 x i64> @llvm.abs.v8i64(<8 x i64> %sub, i1 false) 97 %trunc = trunc <8 x i64> %abs to <8 x i16> 98 ret <8 x i16> %trunc 99} 100 101define <8 x i16> @abd_ext_v8i16_undef(<8 x i16> %a, <8 x i16> %b) nounwind { 102; SSE-LABEL: abd_ext_v8i16_undef: 103; SSE: # %bb.0: 104; SSE-NEXT: movdqa %xmm0, %xmm2 105; SSE-NEXT: pminsw %xmm1, %xmm2 106; SSE-NEXT: pmaxsw %xmm1, %xmm0 107; SSE-NEXT: psubw %xmm2, %xmm0 108; SSE-NEXT: retq 109; 110; AVX-LABEL: abd_ext_v8i16_undef: 111; AVX: # %bb.0: 112; AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm2 113; AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 114; AVX-NEXT: vpsubw %xmm2, %xmm0, %xmm0 115; AVX-NEXT: retq 116 %aext = sext <8 x i16> %a to <8 x i64> 117 %bext = sext <8 x i16> %b to <8 x i64> 118 %sub = sub <8 x i64> %aext, %bext 119 %abs = call <8 x i64> @llvm.abs.v8i64(<8 x i64> %sub, i1 true) 120 %trunc = trunc <8 x i64> %abs to <8 x i16> 121 ret <8 x i16> %trunc 122} 123 124define <4 x i32> @abd_ext_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind { 125; SSE2-LABEL: abd_ext_v4i32: 126; SSE2: # %bb.0: 127; SSE2-NEXT: movdqa %xmm0, %xmm2 128; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 129; SSE2-NEXT: psubd %xmm1, %xmm0 130; SSE2-NEXT: pxor %xmm2, %xmm0 131; SSE2-NEXT: psubd %xmm0, %xmm2 132; SSE2-NEXT: movdqa %xmm2, %xmm0 133; SSE2-NEXT: retq 134; 135; SSE42-LABEL: abd_ext_v4i32: 136; SSE42: # %bb.0: 137; SSE42-NEXT: movdqa %xmm0, %xmm2 138; SSE42-NEXT: pminsd %xmm1, %xmm2 139; SSE42-NEXT: pmaxsd %xmm1, %xmm0 140; SSE42-NEXT: psubd %xmm2, %xmm0 141; SSE42-NEXT: retq 142; 143; AVX-LABEL: abd_ext_v4i32: 144; AVX: # %bb.0: 145; AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm2 146; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 147; AVX-NEXT: vpsubd %xmm2, %xmm0, %xmm0 148; AVX-NEXT: retq 149 %aext = sext <4 x i32> %a to <4 x i64> 150 %bext = sext <4 x i32> %b to <4 x i64> 151 %sub = sub <4 x i64> %aext, %bext 152 %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %sub, i1 false) 153 %trunc = trunc <4 x i64> %abs to <4 x i32> 154 ret <4 x i32> %trunc 155} 156 157define <4 x i32> @abd_ext_v4i32_undef(<4 x i32> %a, <4 x i32> %b) nounwind { 158; SSE2-LABEL: abd_ext_v4i32_undef: 159; SSE2: # %bb.0: 160; SSE2-NEXT: movdqa %xmm0, %xmm2 161; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 162; SSE2-NEXT: psubd %xmm1, %xmm0 163; SSE2-NEXT: pxor %xmm2, %xmm0 164; SSE2-NEXT: psubd %xmm0, %xmm2 165; SSE2-NEXT: movdqa %xmm2, %xmm0 166; SSE2-NEXT: retq 167; 168; SSE42-LABEL: abd_ext_v4i32_undef: 169; SSE42: # %bb.0: 170; SSE42-NEXT: movdqa %xmm0, %xmm2 171; SSE42-NEXT: pminsd %xmm1, %xmm2 172; SSE42-NEXT: pmaxsd %xmm1, %xmm0 173; SSE42-NEXT: psubd %xmm2, %xmm0 174; SSE42-NEXT: retq 175; 176; AVX-LABEL: abd_ext_v4i32_undef: 177; AVX: # %bb.0: 178; AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm2 179; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 180; AVX-NEXT: vpsubd %xmm2, %xmm0, %xmm0 181; AVX-NEXT: retq 182 %aext = sext <4 x i32> %a to <4 x i64> 183 %bext = sext <4 x i32> %b to <4 x i64> 184 %sub = sub <4 x i64> %aext, %bext 185 %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %sub, i1 true) 186 %trunc = trunc <4 x i64> %abs to <4 x i32> 187 ret <4 x i32> %trunc 188} 189 190define <2 x i64> @abd_ext_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { 191; SSE2-LABEL: abd_ext_v2i64: 192; SSE2: # %bb.0: 193; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 194; SSE2-NEXT: movdqa %xmm1, %xmm3 195; SSE2-NEXT: pxor %xmm2, %xmm3 196; SSE2-NEXT: pxor %xmm0, %xmm2 197; SSE2-NEXT: movdqa %xmm2, %xmm4 198; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 199; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 200; SSE2-NEXT: pcmpeqd %xmm3, %xmm2 201; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 202; SSE2-NEXT: pand %xmm5, %xmm3 203; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3] 204; SSE2-NEXT: por %xmm3, %xmm2 205; SSE2-NEXT: psubq %xmm1, %xmm0 206; SSE2-NEXT: pxor %xmm2, %xmm0 207; SSE2-NEXT: psubq %xmm0, %xmm2 208; SSE2-NEXT: movdqa %xmm2, %xmm0 209; SSE2-NEXT: retq 210; 211; SSE42-LABEL: abd_ext_v2i64: 212; SSE42: # %bb.0: 213; SSE42-NEXT: movdqa %xmm0, %xmm2 214; SSE42-NEXT: pcmpgtq %xmm1, %xmm2 215; SSE42-NEXT: psubq %xmm1, %xmm0 216; SSE42-NEXT: pxor %xmm2, %xmm0 217; SSE42-NEXT: psubq %xmm0, %xmm2 218; SSE42-NEXT: movdqa %xmm2, %xmm0 219; SSE42-NEXT: retq 220; 221; AVX1-LABEL: abd_ext_v2i64: 222; AVX1: # %bb.0: 223; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 224; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm0 225; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 226; AVX1-NEXT: vpsubq %xmm0, %xmm2, %xmm0 227; AVX1-NEXT: retq 228; 229; AVX2-LABEL: abd_ext_v2i64: 230; AVX2: # %bb.0: 231; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 232; AVX2-NEXT: vpsubq %xmm1, %xmm0, %xmm0 233; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 234; AVX2-NEXT: vpsubq %xmm0, %xmm2, %xmm0 235; AVX2-NEXT: retq 236; 237; AVX512-LABEL: abd_ext_v2i64: 238; AVX512: # %bb.0: 239; AVX512-NEXT: vpminsq %xmm1, %xmm0, %xmm2 240; AVX512-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0 241; AVX512-NEXT: vpsubq %xmm2, %xmm0, %xmm0 242; AVX512-NEXT: retq 243 %aext = sext <2 x i64> %a to <2 x i128> 244 %bext = sext <2 x i64> %b to <2 x i128> 245 %sub = sub <2 x i128> %aext, %bext 246 %abs = call <2 x i128> @llvm.abs.v2i128(<2 x i128> %sub, i1 false) 247 %trunc = trunc <2 x i128> %abs to <2 x i64> 248 ret <2 x i64> %trunc 249} 250 251define <2 x i64> @abd_ext_v2i64_undef(<2 x i64> %a, <2 x i64> %b) nounwind { 252; SSE2-LABEL: abd_ext_v2i64_undef: 253; SSE2: # %bb.0: 254; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 255; SSE2-NEXT: movdqa %xmm1, %xmm3 256; SSE2-NEXT: pxor %xmm2, %xmm3 257; SSE2-NEXT: pxor %xmm0, %xmm2 258; SSE2-NEXT: movdqa %xmm2, %xmm4 259; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 260; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 261; SSE2-NEXT: pcmpeqd %xmm3, %xmm2 262; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 263; SSE2-NEXT: pand %xmm5, %xmm3 264; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3] 265; SSE2-NEXT: por %xmm3, %xmm2 266; SSE2-NEXT: psubq %xmm1, %xmm0 267; SSE2-NEXT: pxor %xmm2, %xmm0 268; SSE2-NEXT: psubq %xmm0, %xmm2 269; SSE2-NEXT: movdqa %xmm2, %xmm0 270; SSE2-NEXT: retq 271; 272; SSE42-LABEL: abd_ext_v2i64_undef: 273; SSE42: # %bb.0: 274; SSE42-NEXT: movdqa %xmm0, %xmm2 275; SSE42-NEXT: pcmpgtq %xmm1, %xmm2 276; SSE42-NEXT: psubq %xmm1, %xmm0 277; SSE42-NEXT: pxor %xmm2, %xmm0 278; SSE42-NEXT: psubq %xmm0, %xmm2 279; SSE42-NEXT: movdqa %xmm2, %xmm0 280; SSE42-NEXT: retq 281; 282; AVX1-LABEL: abd_ext_v2i64_undef: 283; AVX1: # %bb.0: 284; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 285; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm0 286; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 287; AVX1-NEXT: vpsubq %xmm0, %xmm2, %xmm0 288; AVX1-NEXT: retq 289; 290; AVX2-LABEL: abd_ext_v2i64_undef: 291; AVX2: # %bb.0: 292; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 293; AVX2-NEXT: vpsubq %xmm1, %xmm0, %xmm0 294; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 295; AVX2-NEXT: vpsubq %xmm0, %xmm2, %xmm0 296; AVX2-NEXT: retq 297; 298; AVX512-LABEL: abd_ext_v2i64_undef: 299; AVX512: # %bb.0: 300; AVX512-NEXT: vpminsq %xmm1, %xmm0, %xmm2 301; AVX512-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0 302; AVX512-NEXT: vpsubq %xmm2, %xmm0, %xmm0 303; AVX512-NEXT: retq 304 %aext = sext <2 x i64> %a to <2 x i128> 305 %bext = sext <2 x i64> %b to <2 x i128> 306 %sub = sub <2 x i128> %aext, %bext 307 %abs = call <2 x i128> @llvm.abs.v2i128(<2 x i128> %sub, i1 true) 308 %trunc = trunc <2 x i128> %abs to <2 x i64> 309 ret <2 x i64> %trunc 310} 311 312; 313; sub(smax(a,b),smin(a,b)) -> abds(a,b) 314; 315 316define <16 x i8> @abd_minmax_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { 317; SSE2-LABEL: abd_minmax_v16i8: 318; SSE2: # %bb.0: 319; SSE2-NEXT: movdqa %xmm0, %xmm2 320; SSE2-NEXT: pcmpgtb %xmm1, %xmm2 321; SSE2-NEXT: psubb %xmm1, %xmm0 322; SSE2-NEXT: pxor %xmm2, %xmm0 323; SSE2-NEXT: psubb %xmm0, %xmm2 324; SSE2-NEXT: movdqa %xmm2, %xmm0 325; SSE2-NEXT: retq 326; 327; SSE42-LABEL: abd_minmax_v16i8: 328; SSE42: # %bb.0: 329; SSE42-NEXT: movdqa %xmm0, %xmm2 330; SSE42-NEXT: pminsb %xmm1, %xmm2 331; SSE42-NEXT: pmaxsb %xmm1, %xmm0 332; SSE42-NEXT: psubb %xmm2, %xmm0 333; SSE42-NEXT: retq 334; 335; AVX-LABEL: abd_minmax_v16i8: 336; AVX: # %bb.0: 337; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm2 338; AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 339; AVX-NEXT: vpsubb %xmm2, %xmm0, %xmm0 340; AVX-NEXT: retq 341 %min = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %a, <16 x i8> %b) 342 %max = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %a, <16 x i8> %b) 343 %sub = sub <16 x i8> %max, %min 344 ret <16 x i8> %sub 345} 346 347define <8 x i16> @abd_minmax_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { 348; SSE-LABEL: abd_minmax_v8i16: 349; SSE: # %bb.0: 350; SSE-NEXT: movdqa %xmm0, %xmm2 351; SSE-NEXT: pminsw %xmm1, %xmm2 352; SSE-NEXT: pmaxsw %xmm1, %xmm0 353; SSE-NEXT: psubw %xmm2, %xmm0 354; SSE-NEXT: retq 355; 356; AVX-LABEL: abd_minmax_v8i16: 357; AVX: # %bb.0: 358; AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm2 359; AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 360; AVX-NEXT: vpsubw %xmm2, %xmm0, %xmm0 361; AVX-NEXT: retq 362 %min = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %a, <8 x i16> %b) 363 %max = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %a, <8 x i16> %b) 364 %sub = sub <8 x i16> %max, %min 365 ret <8 x i16> %sub 366} 367 368define <4 x i32> @abd_minmax_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind { 369; SSE2-LABEL: abd_minmax_v4i32: 370; SSE2: # %bb.0: 371; SSE2-NEXT: movdqa %xmm0, %xmm2 372; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 373; SSE2-NEXT: psubd %xmm1, %xmm0 374; SSE2-NEXT: pxor %xmm2, %xmm0 375; SSE2-NEXT: psubd %xmm0, %xmm2 376; SSE2-NEXT: movdqa %xmm2, %xmm0 377; SSE2-NEXT: retq 378; 379; SSE42-LABEL: abd_minmax_v4i32: 380; SSE42: # %bb.0: 381; SSE42-NEXT: movdqa %xmm0, %xmm2 382; SSE42-NEXT: pminsd %xmm1, %xmm2 383; SSE42-NEXT: pmaxsd %xmm1, %xmm0 384; SSE42-NEXT: psubd %xmm2, %xmm0 385; SSE42-NEXT: retq 386; 387; AVX-LABEL: abd_minmax_v4i32: 388; AVX: # %bb.0: 389; AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm2 390; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 391; AVX-NEXT: vpsubd %xmm2, %xmm0, %xmm0 392; AVX-NEXT: retq 393 %min = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %a, <4 x i32> %b) 394 %max = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> %b) 395 %sub = sub <4 x i32> %max, %min 396 ret <4 x i32> %sub 397} 398 399define <2 x i64> @abd_minmax_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { 400; SSE2-LABEL: abd_minmax_v2i64: 401; SSE2: # %bb.0: 402; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 403; SSE2-NEXT: movdqa %xmm1, %xmm3 404; SSE2-NEXT: pxor %xmm2, %xmm3 405; SSE2-NEXT: pxor %xmm0, %xmm2 406; SSE2-NEXT: movdqa %xmm2, %xmm4 407; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 408; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 409; SSE2-NEXT: pcmpeqd %xmm3, %xmm2 410; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 411; SSE2-NEXT: pand %xmm5, %xmm3 412; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3] 413; SSE2-NEXT: por %xmm3, %xmm2 414; SSE2-NEXT: psubq %xmm1, %xmm0 415; SSE2-NEXT: pxor %xmm2, %xmm0 416; SSE2-NEXT: psubq %xmm0, %xmm2 417; SSE2-NEXT: movdqa %xmm2, %xmm0 418; SSE2-NEXT: retq 419; 420; SSE42-LABEL: abd_minmax_v2i64: 421; SSE42: # %bb.0: 422; SSE42-NEXT: movdqa %xmm0, %xmm2 423; SSE42-NEXT: pcmpgtq %xmm1, %xmm2 424; SSE42-NEXT: psubq %xmm1, %xmm0 425; SSE42-NEXT: pxor %xmm2, %xmm0 426; SSE42-NEXT: psubq %xmm0, %xmm2 427; SSE42-NEXT: movdqa %xmm2, %xmm0 428; SSE42-NEXT: retq 429; 430; AVX1-LABEL: abd_minmax_v2i64: 431; AVX1: # %bb.0: 432; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 433; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm0 434; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 435; AVX1-NEXT: vpsubq %xmm0, %xmm2, %xmm0 436; AVX1-NEXT: retq 437; 438; AVX2-LABEL: abd_minmax_v2i64: 439; AVX2: # %bb.0: 440; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 441; AVX2-NEXT: vpsubq %xmm1, %xmm0, %xmm0 442; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 443; AVX2-NEXT: vpsubq %xmm0, %xmm2, %xmm0 444; AVX2-NEXT: retq 445; 446; AVX512-LABEL: abd_minmax_v2i64: 447; AVX512: # %bb.0: 448; AVX512-NEXT: vpminsq %xmm1, %xmm0, %xmm2 449; AVX512-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0 450; AVX512-NEXT: vpsubq %xmm2, %xmm0, %xmm0 451; AVX512-NEXT: retq 452 %min = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %a, <2 x i64> %b) 453 %max = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %a, <2 x i64> %b) 454 %sub = sub <2 x i64> %max, %min 455 ret <2 x i64> %sub 456} 457 458; 459; select(icmp(a,b),sub(a,b),sub(b,a)) -> abds(a,b) 460; 461 462define <16 x i8> @abd_cmp_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { 463; SSE2-LABEL: abd_cmp_v16i8: 464; SSE2: # %bb.0: 465; SSE2-NEXT: movdqa %xmm0, %xmm2 466; SSE2-NEXT: pcmpgtb %xmm1, %xmm2 467; SSE2-NEXT: psubb %xmm1, %xmm0 468; SSE2-NEXT: pxor %xmm2, %xmm0 469; SSE2-NEXT: psubb %xmm0, %xmm2 470; SSE2-NEXT: movdqa %xmm2, %xmm0 471; SSE2-NEXT: retq 472; 473; SSE42-LABEL: abd_cmp_v16i8: 474; SSE42: # %bb.0: 475; SSE42-NEXT: movdqa %xmm0, %xmm2 476; SSE42-NEXT: pminsb %xmm1, %xmm2 477; SSE42-NEXT: pmaxsb %xmm1, %xmm0 478; SSE42-NEXT: psubb %xmm2, %xmm0 479; SSE42-NEXT: retq 480; 481; AVX-LABEL: abd_cmp_v16i8: 482; AVX: # %bb.0: 483; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm2 484; AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 485; AVX-NEXT: vpsubb %xmm2, %xmm0, %xmm0 486; AVX-NEXT: retq 487 %cmp = icmp sgt <16 x i8> %a, %b 488 %ab = sub <16 x i8> %a, %b 489 %ba = sub <16 x i8> %b, %a 490 %sel = select <16 x i1> %cmp, <16 x i8> %ab, <16 x i8> %ba 491 ret <16 x i8> %sel 492} 493 494define <8 x i16> @abd_cmp_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { 495; SSE-LABEL: abd_cmp_v8i16: 496; SSE: # %bb.0: 497; SSE-NEXT: movdqa %xmm0, %xmm2 498; SSE-NEXT: pminsw %xmm1, %xmm2 499; SSE-NEXT: pmaxsw %xmm1, %xmm0 500; SSE-NEXT: psubw %xmm2, %xmm0 501; SSE-NEXT: retq 502; 503; AVX-LABEL: abd_cmp_v8i16: 504; AVX: # %bb.0: 505; AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm2 506; AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 507; AVX-NEXT: vpsubw %xmm2, %xmm0, %xmm0 508; AVX-NEXT: retq 509 %cmp = icmp sge <8 x i16> %a, %b 510 %ab = sub <8 x i16> %a, %b 511 %ba = sub <8 x i16> %b, %a 512 %sel = select <8 x i1> %cmp, <8 x i16> %ab, <8 x i16> %ba 513 ret <8 x i16> %sel 514} 515 516define <4 x i32> @abd_cmp_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind { 517; SSE2-LABEL: abd_cmp_v4i32: 518; SSE2: # %bb.0: 519; SSE2-NEXT: movdqa %xmm0, %xmm2 520; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 521; SSE2-NEXT: psubd %xmm1, %xmm0 522; SSE2-NEXT: pxor %xmm2, %xmm0 523; SSE2-NEXT: psubd %xmm0, %xmm2 524; SSE2-NEXT: movdqa %xmm2, %xmm0 525; SSE2-NEXT: retq 526; 527; SSE42-LABEL: abd_cmp_v4i32: 528; SSE42: # %bb.0: 529; SSE42-NEXT: movdqa %xmm0, %xmm2 530; SSE42-NEXT: pminsd %xmm1, %xmm2 531; SSE42-NEXT: pmaxsd %xmm1, %xmm0 532; SSE42-NEXT: psubd %xmm2, %xmm0 533; SSE42-NEXT: retq 534; 535; AVX-LABEL: abd_cmp_v4i32: 536; AVX: # %bb.0: 537; AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm2 538; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 539; AVX-NEXT: vpsubd %xmm2, %xmm0, %xmm0 540; AVX-NEXT: retq 541 %cmp = icmp slt <4 x i32> %a, %b 542 %ab = sub <4 x i32> %a, %b 543 %ba = sub <4 x i32> %b, %a 544 %sel = select <4 x i1> %cmp, <4 x i32> %ba, <4 x i32> %ab 545 ret <4 x i32> %sel 546} 547 548define <2 x i64> @abd_cmp_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { 549; SSE2-LABEL: abd_cmp_v2i64: 550; SSE2: # %bb.0: 551; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 552; SSE2-NEXT: movdqa %xmm1, %xmm3 553; SSE2-NEXT: pxor %xmm2, %xmm3 554; SSE2-NEXT: pxor %xmm0, %xmm2 555; SSE2-NEXT: movdqa %xmm2, %xmm4 556; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 557; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 558; SSE2-NEXT: pcmpeqd %xmm3, %xmm2 559; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 560; SSE2-NEXT: pand %xmm5, %xmm3 561; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3] 562; SSE2-NEXT: por %xmm3, %xmm2 563; SSE2-NEXT: psubq %xmm1, %xmm0 564; SSE2-NEXT: pxor %xmm2, %xmm0 565; SSE2-NEXT: psubq %xmm0, %xmm2 566; SSE2-NEXT: movdqa %xmm2, %xmm0 567; SSE2-NEXT: retq 568; 569; SSE42-LABEL: abd_cmp_v2i64: 570; SSE42: # %bb.0: 571; SSE42-NEXT: movdqa %xmm0, %xmm2 572; SSE42-NEXT: pcmpgtq %xmm1, %xmm2 573; SSE42-NEXT: psubq %xmm1, %xmm0 574; SSE42-NEXT: pxor %xmm2, %xmm0 575; SSE42-NEXT: psubq %xmm0, %xmm2 576; SSE42-NEXT: movdqa %xmm2, %xmm0 577; SSE42-NEXT: retq 578; 579; AVX1-LABEL: abd_cmp_v2i64: 580; AVX1: # %bb.0: 581; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 582; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm0 583; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 584; AVX1-NEXT: vpsubq %xmm0, %xmm2, %xmm0 585; AVX1-NEXT: retq 586; 587; AVX2-LABEL: abd_cmp_v2i64: 588; AVX2: # %bb.0: 589; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 590; AVX2-NEXT: vpsubq %xmm1, %xmm0, %xmm0 591; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 592; AVX2-NEXT: vpsubq %xmm0, %xmm2, %xmm0 593; AVX2-NEXT: retq 594; 595; AVX512-LABEL: abd_cmp_v2i64: 596; AVX512: # %bb.0: 597; AVX512-NEXT: vpminsq %xmm1, %xmm0, %xmm2 598; AVX512-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0 599; AVX512-NEXT: vpsubq %xmm2, %xmm0, %xmm0 600; AVX512-NEXT: retq 601 %cmp = icmp sge <2 x i64> %a, %b 602 %ab = sub <2 x i64> %a, %b 603 %ba = sub <2 x i64> %b, %a 604 %sel = select <2 x i1> %cmp, <2 x i64> %ab, <2 x i64> %ba 605 ret <2 x i64> %sel 606} 607 608; 609; abs(sub_nsw(x, y)) -> abds(a,b) 610; 611 612define <16 x i8> @abd_subnsw_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { 613; SSE2-LABEL: abd_subnsw_v16i8: 614; SSE2: # %bb.0: 615; SSE2-NEXT: psubb %xmm1, %xmm0 616; SSE2-NEXT: pxor %xmm1, %xmm1 617; SSE2-NEXT: psubb %xmm0, %xmm1 618; SSE2-NEXT: pminub %xmm1, %xmm0 619; SSE2-NEXT: retq 620; 621; SSE42-LABEL: abd_subnsw_v16i8: 622; SSE42: # %bb.0: 623; SSE42-NEXT: psubb %xmm1, %xmm0 624; SSE42-NEXT: pabsb %xmm0, %xmm0 625; SSE42-NEXT: retq 626; 627; AVX-LABEL: abd_subnsw_v16i8: 628; AVX: # %bb.0: 629; AVX-NEXT: vpsubb %xmm1, %xmm0, %xmm0 630; AVX-NEXT: vpabsb %xmm0, %xmm0 631; AVX-NEXT: retq 632 %sub = sub nsw <16 x i8> %a, %b 633 %abs = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %sub, i1 false) 634 ret <16 x i8> %abs 635} 636 637define <8 x i16> @abd_subnsw_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { 638; SSE2-LABEL: abd_subnsw_v8i16: 639; SSE2: # %bb.0: 640; SSE2-NEXT: psubw %xmm1, %xmm0 641; SSE2-NEXT: pxor %xmm1, %xmm1 642; SSE2-NEXT: psubw %xmm0, %xmm1 643; SSE2-NEXT: pmaxsw %xmm1, %xmm0 644; SSE2-NEXT: retq 645; 646; SSE42-LABEL: abd_subnsw_v8i16: 647; SSE42: # %bb.0: 648; SSE42-NEXT: psubw %xmm1, %xmm0 649; SSE42-NEXT: pabsw %xmm0, %xmm0 650; SSE42-NEXT: retq 651; 652; AVX-LABEL: abd_subnsw_v8i16: 653; AVX: # %bb.0: 654; AVX-NEXT: vpsubw %xmm1, %xmm0, %xmm0 655; AVX-NEXT: vpabsw %xmm0, %xmm0 656; AVX-NEXT: retq 657 %sub = sub nsw <8 x i16> %a, %b 658 %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 false) 659 ret <8 x i16> %abs 660} 661 662define <4 x i32> @abd_subnsw_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind { 663; SSE2-LABEL: abd_subnsw_v4i32: 664; SSE2: # %bb.0: 665; SSE2-NEXT: psubd %xmm1, %xmm0 666; SSE2-NEXT: movdqa %xmm0, %xmm1 667; SSE2-NEXT: psrad $31, %xmm1 668; SSE2-NEXT: pxor %xmm1, %xmm0 669; SSE2-NEXT: psubd %xmm1, %xmm0 670; SSE2-NEXT: retq 671; 672; SSE42-LABEL: abd_subnsw_v4i32: 673; SSE42: # %bb.0: 674; SSE42-NEXT: psubd %xmm1, %xmm0 675; SSE42-NEXT: pabsd %xmm0, %xmm0 676; SSE42-NEXT: retq 677; 678; AVX-LABEL: abd_subnsw_v4i32: 679; AVX: # %bb.0: 680; AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0 681; AVX-NEXT: vpabsd %xmm0, %xmm0 682; AVX-NEXT: retq 683 %sub = sub nsw <4 x i32> %a, %b 684 %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 false) 685 ret <4 x i32> %abs 686} 687 688define <2 x i64> @abd_subnsw_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { 689; SSE2-LABEL: abd_subnsw_v2i64: 690; SSE2: # %bb.0: 691; SSE2-NEXT: psubq %xmm1, %xmm0 692; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 693; SSE2-NEXT: psrad $31, %xmm1 694; SSE2-NEXT: pxor %xmm1, %xmm0 695; SSE2-NEXT: psubq %xmm1, %xmm0 696; SSE2-NEXT: retq 697; 698; SSE42-LABEL: abd_subnsw_v2i64: 699; SSE42: # %bb.0: 700; SSE42-NEXT: psubq %xmm1, %xmm0 701; SSE42-NEXT: pxor %xmm1, %xmm1 702; SSE42-NEXT: psubq %xmm0, %xmm1 703; SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm0 704; SSE42-NEXT: retq 705; 706; AVX1-LABEL: abd_subnsw_v2i64: 707; AVX1: # %bb.0: 708; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm0 709; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 710; AVX1-NEXT: vpsubq %xmm0, %xmm1, %xmm1 711; AVX1-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm0 712; AVX1-NEXT: retq 713; 714; AVX2-LABEL: abd_subnsw_v2i64: 715; AVX2: # %bb.0: 716; AVX2-NEXT: vpsubq %xmm1, %xmm0, %xmm0 717; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 718; AVX2-NEXT: vpsubq %xmm0, %xmm1, %xmm1 719; AVX2-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm0 720; AVX2-NEXT: retq 721; 722; AVX512-LABEL: abd_subnsw_v2i64: 723; AVX512: # %bb.0: 724; AVX512-NEXT: vpsubq %xmm1, %xmm0, %xmm0 725; AVX512-NEXT: vpabsq %xmm0, %xmm0 726; AVX512-NEXT: retq 727 %sub = sub nsw <2 x i64> %a, %b 728 %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 false) 729 ret <2 x i64> %abs 730} 731 732; 733; Special cases 734; 735 736define <2 x i64> @abd_cmp_v2i64_multiuse_cmp(<2 x i64> %a, <2 x i64> %b) nounwind { 737; SSE2-LABEL: abd_cmp_v2i64_multiuse_cmp: 738; SSE2: # %bb.0: 739; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 740; SSE2-NEXT: movdqa %xmm1, %xmm3 741; SSE2-NEXT: pxor %xmm2, %xmm3 742; SSE2-NEXT: pxor %xmm0, %xmm2 743; SSE2-NEXT: movdqa %xmm2, %xmm4 744; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 745; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 746; SSE2-NEXT: movdqa %xmm2, %xmm6 747; SSE2-NEXT: pcmpeqd %xmm3, %xmm6 748; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 749; SSE2-NEXT: pand %xmm6, %xmm5 750; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 751; SSE2-NEXT: por %xmm5, %xmm4 752; SSE2-NEXT: psubq %xmm1, %xmm0 753; SSE2-NEXT: pxor %xmm4, %xmm0 754; SSE2-NEXT: psubq %xmm0, %xmm4 755; SSE2-NEXT: pcmpgtd %xmm2, %xmm3 756; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 757; SSE2-NEXT: pand %xmm6, %xmm0 758; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3] 759; SSE2-NEXT: por %xmm0, %xmm1 760; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 761; SSE2-NEXT: pxor %xmm1, %xmm0 762; SSE2-NEXT: paddq %xmm4, %xmm0 763; SSE2-NEXT: retq 764; 765; SSE42-LABEL: abd_cmp_v2i64_multiuse_cmp: 766; SSE42: # %bb.0: 767; SSE42-NEXT: movdqa %xmm0, %xmm2 768; SSE42-NEXT: pcmpgtq %xmm1, %xmm2 769; SSE42-NEXT: movdqa %xmm0, %xmm3 770; SSE42-NEXT: psubq %xmm1, %xmm3 771; SSE42-NEXT: pxor %xmm2, %xmm3 772; SSE42-NEXT: psubq %xmm3, %xmm2 773; SSE42-NEXT: pcmpgtq %xmm0, %xmm1 774; SSE42-NEXT: pcmpeqd %xmm0, %xmm0 775; SSE42-NEXT: pxor %xmm1, %xmm0 776; SSE42-NEXT: paddq %xmm2, %xmm0 777; SSE42-NEXT: retq 778; 779; AVX1-LABEL: abd_cmp_v2i64_multiuse_cmp: 780; AVX1: # %bb.0: 781; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 782; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm3 783; AVX1-NEXT: vpxor %xmm2, %xmm3, %xmm3 784; AVX1-NEXT: vpsubq %xmm3, %xmm2, %xmm2 785; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 786; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 787; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 788; AVX1-NEXT: vpaddq %xmm2, %xmm0, %xmm0 789; AVX1-NEXT: retq 790; 791; AVX2-LABEL: abd_cmp_v2i64_multiuse_cmp: 792; AVX2: # %bb.0: 793; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 794; AVX2-NEXT: vpsubq %xmm1, %xmm0, %xmm3 795; AVX2-NEXT: vpxor %xmm2, %xmm3, %xmm3 796; AVX2-NEXT: vpsubq %xmm3, %xmm2, %xmm2 797; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 798; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 799; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 800; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0 801; AVX2-NEXT: retq 802; 803; AVX512-LABEL: abd_cmp_v2i64_multiuse_cmp: 804; AVX512: # %bb.0: 805; AVX512-NEXT: vpminsq %xmm1, %xmm0, %xmm2 806; AVX512-NEXT: vpmaxsq %xmm1, %xmm0, %xmm3 807; AVX512-NEXT: vpsubq %xmm2, %xmm3, %xmm2 808; AVX512-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 809; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 810; AVX512-NEXT: vpaddq %xmm2, %xmm0, %xmm0 811; AVX512-NEXT: retq 812 %cmp = icmp sge <2 x i64> %a, %b 813 %ab = sub <2 x i64> %a, %b 814 %ba = sub <2 x i64> %b, %a 815 %sel = select <2 x i1> %cmp, <2 x i64> %ab, <2 x i64> %ba 816 %ext = sext <2 x i1> %cmp to <2 x i64> 817 %res = add <2 x i64> %ext, %sel 818 ret <2 x i64> %res 819} 820 821define <8 x i16> @abd_cmp_v8i16_multiuse_sub(<8 x i16> %a, <8 x i16> %b) nounwind { 822; SSE-LABEL: abd_cmp_v8i16_multiuse_sub: 823; SSE: # %bb.0: 824; SSE-NEXT: movdqa %xmm0, %xmm2 825; SSE-NEXT: psubw %xmm1, %xmm2 826; SSE-NEXT: movdqa %xmm0, %xmm3 827; SSE-NEXT: pminsw %xmm1, %xmm3 828; SSE-NEXT: pmaxsw %xmm1, %xmm0 829; SSE-NEXT: psubw %xmm3, %xmm0 830; SSE-NEXT: paddw %xmm2, %xmm0 831; SSE-NEXT: retq 832; 833; AVX-LABEL: abd_cmp_v8i16_multiuse_sub: 834; AVX: # %bb.0: 835; AVX-NEXT: vpsubw %xmm1, %xmm0, %xmm2 836; AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm3 837; AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 838; AVX-NEXT: vpsubw %xmm3, %xmm0, %xmm0 839; AVX-NEXT: vpaddw %xmm0, %xmm2, %xmm0 840; AVX-NEXT: retq 841 %cmp = icmp sgt <8 x i16> %a, %b 842 %ab = sub <8 x i16> %a, %b 843 %ba = sub <8 x i16> %b, %a 844 %sel = select <8 x i1> %cmp, <8 x i16> %ab, <8 x i16> %ba 845 %res = add <8 x i16> %ab, %sel 846 ret <8 x i16> %res 847} 848 849declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1) 850declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1) 851declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1) 852declare <2 x i64> @llvm.abs.v2i64(<2 x i64>, i1) 853declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1) 854declare <8 x i64> @llvm.abs.v8i64(<8 x i64>, i1) 855declare <16 x i64> @llvm.abs.v16i64(<16 x i64>, i1) 856declare <2 x i128> @llvm.abs.v2i128(<2 x i128>, i1) 857 858declare <16 x i8> @llvm.smax.v16i8(<16 x i8>, <16 x i8>) 859declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>) 860declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>) 861declare <2 x i64> @llvm.smax.v2i64(<2 x i64>, <2 x i64>) 862 863declare <16 x i8> @llvm.smin.v16i8(<16 x i8>, <16 x i8>) 864declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>) 865declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>) 866declare <2 x i64> @llvm.smin.v2i64(<2 x i64>, <2 x i64>) 867