1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41 5; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX1 6; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2 7; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=AVX512,AVX512F 8; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vl,+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=AVX512,AVX512BW 9 10define <4 x i32> @test_abs_gt_v4i32(<4 x i32> %a) nounwind { 11; SSE2-LABEL: test_abs_gt_v4i32: 12; SSE2: # %bb.0: 13; SSE2-NEXT: movdqa %xmm0, %xmm1 14; SSE2-NEXT: psrad $31, %xmm1 15; SSE2-NEXT: pxor %xmm1, %xmm0 16; SSE2-NEXT: psubd %xmm1, %xmm0 17; SSE2-NEXT: retq 18; 19; SSSE3-LABEL: test_abs_gt_v4i32: 20; SSSE3: # %bb.0: 21; SSSE3-NEXT: pabsd %xmm0, %xmm0 22; SSSE3-NEXT: retq 23; 24; SSE41-LABEL: test_abs_gt_v4i32: 25; SSE41: # %bb.0: 26; SSE41-NEXT: pabsd %xmm0, %xmm0 27; SSE41-NEXT: retq 28; 29; AVX1-LABEL: test_abs_gt_v4i32: 30; AVX1: # %bb.0: 31; AVX1-NEXT: vpabsd %xmm0, %xmm0 32; AVX1-NEXT: retq 33; 34; AVX2-LABEL: test_abs_gt_v4i32: 35; AVX2: # %bb.0: 36; AVX2-NEXT: vpabsd %xmm0, %xmm0 37; AVX2-NEXT: retq 38; 39; AVX512-LABEL: test_abs_gt_v4i32: 40; AVX512: # %bb.0: 41; AVX512-NEXT: vpabsd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1e,0xc0] 42; AVX512-NEXT: retq # encoding: [0xc3] 43 %tmp1neg = sub <4 x i32> zeroinitializer, %a 44 %b = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1> 45 %abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg 46 ret <4 x i32> %abs 47} 48 49define <4 x i32> @test_abs_ge_v4i32(<4 x i32> %a) nounwind { 50; SSE2-LABEL: test_abs_ge_v4i32: 51; SSE2: # %bb.0: 52; SSE2-NEXT: movdqa %xmm0, %xmm1 53; SSE2-NEXT: psrad $31, %xmm1 54; SSE2-NEXT: pxor %xmm1, %xmm0 55; SSE2-NEXT: psubd %xmm1, %xmm0 56; SSE2-NEXT: retq 57; 58; SSSE3-LABEL: test_abs_ge_v4i32: 59; SSSE3: # %bb.0: 60; SSSE3-NEXT: pabsd %xmm0, %xmm0 61; SSSE3-NEXT: retq 62; 63; SSE41-LABEL: test_abs_ge_v4i32: 64; SSE41: # %bb.0: 65; SSE41-NEXT: pabsd %xmm0, %xmm0 66; SSE41-NEXT: retq 67; 68; AVX1-LABEL: test_abs_ge_v4i32: 69; AVX1: # %bb.0: 70; AVX1-NEXT: vpabsd %xmm0, %xmm0 71; AVX1-NEXT: retq 72; 73; AVX2-LABEL: test_abs_ge_v4i32: 74; AVX2: # %bb.0: 75; AVX2-NEXT: vpabsd %xmm0, %xmm0 76; AVX2-NEXT: retq 77; 78; AVX512-LABEL: test_abs_ge_v4i32: 79; AVX512: # %bb.0: 80; AVX512-NEXT: vpabsd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1e,0xc0] 81; AVX512-NEXT: retq # encoding: [0xc3] 82 %tmp1neg = sub <4 x i32> zeroinitializer, %a 83 %b = icmp sge <4 x i32> %a, zeroinitializer 84 %abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg 85 ret <4 x i32> %abs 86} 87 88define <8 x i16> @test_abs_gt_v8i16(<8 x i16> %a) nounwind { 89; SSE2-LABEL: test_abs_gt_v8i16: 90; SSE2: # %bb.0: 91; SSE2-NEXT: pxor %xmm1, %xmm1 92; SSE2-NEXT: psubw %xmm0, %xmm1 93; SSE2-NEXT: pmaxsw %xmm1, %xmm0 94; SSE2-NEXT: retq 95; 96; SSSE3-LABEL: test_abs_gt_v8i16: 97; SSSE3: # %bb.0: 98; SSSE3-NEXT: pabsw %xmm0, %xmm0 99; SSSE3-NEXT: retq 100; 101; SSE41-LABEL: test_abs_gt_v8i16: 102; SSE41: # %bb.0: 103; SSE41-NEXT: pabsw %xmm0, %xmm0 104; SSE41-NEXT: retq 105; 106; AVX1-LABEL: test_abs_gt_v8i16: 107; AVX1: # %bb.0: 108; AVX1-NEXT: vpabsw %xmm0, %xmm0 109; AVX1-NEXT: retq 110; 111; AVX2-LABEL: test_abs_gt_v8i16: 112; AVX2: # %bb.0: 113; AVX2-NEXT: vpabsw %xmm0, %xmm0 114; AVX2-NEXT: retq 115; 116; AVX512F-LABEL: test_abs_gt_v8i16: 117; AVX512F: # %bb.0: 118; AVX512F-NEXT: vpabsw %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x1d,0xc0] 119; AVX512F-NEXT: retq # encoding: [0xc3] 120; 121; AVX512BW-LABEL: test_abs_gt_v8i16: 122; AVX512BW: # %bb.0: 123; AVX512BW-NEXT: vpabsw %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1d,0xc0] 124; AVX512BW-NEXT: retq # encoding: [0xc3] 125 %tmp1neg = sub <8 x i16> zeroinitializer, %a 126 %b = icmp sgt <8 x i16> %a, zeroinitializer 127 %abs = select <8 x i1> %b, <8 x i16> %a, <8 x i16> %tmp1neg 128 ret <8 x i16> %abs 129} 130 131define <16 x i8> @test_abs_lt_v16i8(<16 x i8> %a) nounwind { 132; SSE2-LABEL: test_abs_lt_v16i8: 133; SSE2: # %bb.0: 134; SSE2-NEXT: pxor %xmm1, %xmm1 135; SSE2-NEXT: psubb %xmm0, %xmm1 136; SSE2-NEXT: pminub %xmm1, %xmm0 137; SSE2-NEXT: retq 138; 139; SSSE3-LABEL: test_abs_lt_v16i8: 140; SSSE3: # %bb.0: 141; SSSE3-NEXT: pabsb %xmm0, %xmm0 142; SSSE3-NEXT: retq 143; 144; SSE41-LABEL: test_abs_lt_v16i8: 145; SSE41: # %bb.0: 146; SSE41-NEXT: pabsb %xmm0, %xmm0 147; SSE41-NEXT: retq 148; 149; AVX1-LABEL: test_abs_lt_v16i8: 150; AVX1: # %bb.0: 151; AVX1-NEXT: vpabsb %xmm0, %xmm0 152; AVX1-NEXT: retq 153; 154; AVX2-LABEL: test_abs_lt_v16i8: 155; AVX2: # %bb.0: 156; AVX2-NEXT: vpabsb %xmm0, %xmm0 157; AVX2-NEXT: retq 158; 159; AVX512F-LABEL: test_abs_lt_v16i8: 160; AVX512F: # %bb.0: 161; AVX512F-NEXT: vpabsb %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x1c,0xc0] 162; AVX512F-NEXT: retq # encoding: [0xc3] 163; 164; AVX512BW-LABEL: test_abs_lt_v16i8: 165; AVX512BW: # %bb.0: 166; AVX512BW-NEXT: vpabsb %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1c,0xc0] 167; AVX512BW-NEXT: retq # encoding: [0xc3] 168 %tmp1neg = sub <16 x i8> zeroinitializer, %a 169 %b = icmp slt <16 x i8> %a, zeroinitializer 170 %abs = select <16 x i1> %b, <16 x i8> %tmp1neg, <16 x i8> %a 171 ret <16 x i8> %abs 172} 173 174define <4 x i32> @test_abs_le_v4i32(<4 x i32> %a) nounwind { 175; SSE2-LABEL: test_abs_le_v4i32: 176; SSE2: # %bb.0: 177; SSE2-NEXT: movdqa %xmm0, %xmm1 178; SSE2-NEXT: psrad $31, %xmm1 179; SSE2-NEXT: pxor %xmm1, %xmm0 180; SSE2-NEXT: psubd %xmm1, %xmm0 181; SSE2-NEXT: retq 182; 183; SSSE3-LABEL: test_abs_le_v4i32: 184; SSSE3: # %bb.0: 185; SSSE3-NEXT: pabsd %xmm0, %xmm0 186; SSSE3-NEXT: retq 187; 188; SSE41-LABEL: test_abs_le_v4i32: 189; SSE41: # %bb.0: 190; SSE41-NEXT: pabsd %xmm0, %xmm0 191; SSE41-NEXT: retq 192; 193; AVX1-LABEL: test_abs_le_v4i32: 194; AVX1: # %bb.0: 195; AVX1-NEXT: vpabsd %xmm0, %xmm0 196; AVX1-NEXT: retq 197; 198; AVX2-LABEL: test_abs_le_v4i32: 199; AVX2: # %bb.0: 200; AVX2-NEXT: vpabsd %xmm0, %xmm0 201; AVX2-NEXT: retq 202; 203; AVX512-LABEL: test_abs_le_v4i32: 204; AVX512: # %bb.0: 205; AVX512-NEXT: vpabsd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1e,0xc0] 206; AVX512-NEXT: retq # encoding: [0xc3] 207 %tmp1neg = sub <4 x i32> zeroinitializer, %a 208 %b = icmp sle <4 x i32> %a, zeroinitializer 209 %abs = select <4 x i1> %b, <4 x i32> %tmp1neg, <4 x i32> %a 210 ret <4 x i32> %abs 211} 212 213define <8 x i32> @test_abs_gt_v8i32(<8 x i32> %a) nounwind { 214; SSE2-LABEL: test_abs_gt_v8i32: 215; SSE2: # %bb.0: 216; SSE2-NEXT: movdqa %xmm0, %xmm2 217; SSE2-NEXT: psrad $31, %xmm2 218; SSE2-NEXT: pxor %xmm2, %xmm0 219; SSE2-NEXT: psubd %xmm2, %xmm0 220; SSE2-NEXT: movdqa %xmm1, %xmm2 221; SSE2-NEXT: psrad $31, %xmm2 222; SSE2-NEXT: pxor %xmm2, %xmm1 223; SSE2-NEXT: psubd %xmm2, %xmm1 224; SSE2-NEXT: retq 225; 226; SSSE3-LABEL: test_abs_gt_v8i32: 227; SSSE3: # %bb.0: 228; SSSE3-NEXT: pabsd %xmm0, %xmm0 229; SSSE3-NEXT: pabsd %xmm1, %xmm1 230; SSSE3-NEXT: retq 231; 232; SSE41-LABEL: test_abs_gt_v8i32: 233; SSE41: # %bb.0: 234; SSE41-NEXT: pabsd %xmm0, %xmm0 235; SSE41-NEXT: pabsd %xmm1, %xmm1 236; SSE41-NEXT: retq 237; 238; AVX1-LABEL: test_abs_gt_v8i32: 239; AVX1: # %bb.0: 240; AVX1-NEXT: vpabsd %xmm0, %xmm1 241; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 242; AVX1-NEXT: vpabsd %xmm0, %xmm0 243; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 244; AVX1-NEXT: retq 245; 246; AVX2-LABEL: test_abs_gt_v8i32: 247; AVX2: # %bb.0: 248; AVX2-NEXT: vpabsd %ymm0, %ymm0 249; AVX2-NEXT: retq 250; 251; AVX512-LABEL: test_abs_gt_v8i32: 252; AVX512: # %bb.0: 253; AVX512-NEXT: vpabsd %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1e,0xc0] 254; AVX512-NEXT: retq # encoding: [0xc3] 255 %tmp1neg = sub <8 x i32> zeroinitializer, %a 256 %b = icmp sgt <8 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 257 %abs = select <8 x i1> %b, <8 x i32> %a, <8 x i32> %tmp1neg 258 ret <8 x i32> %abs 259} 260 261define <8 x i32> @test_abs_ge_v8i32(<8 x i32> %a) nounwind { 262; SSE2-LABEL: test_abs_ge_v8i32: 263; SSE2: # %bb.0: 264; SSE2-NEXT: movdqa %xmm0, %xmm2 265; SSE2-NEXT: psrad $31, %xmm2 266; SSE2-NEXT: pxor %xmm2, %xmm0 267; SSE2-NEXT: psubd %xmm2, %xmm0 268; SSE2-NEXT: movdqa %xmm1, %xmm2 269; SSE2-NEXT: psrad $31, %xmm2 270; SSE2-NEXT: pxor %xmm2, %xmm1 271; SSE2-NEXT: psubd %xmm2, %xmm1 272; SSE2-NEXT: retq 273; 274; SSSE3-LABEL: test_abs_ge_v8i32: 275; SSSE3: # %bb.0: 276; SSSE3-NEXT: pabsd %xmm0, %xmm0 277; SSSE3-NEXT: pabsd %xmm1, %xmm1 278; SSSE3-NEXT: retq 279; 280; SSE41-LABEL: test_abs_ge_v8i32: 281; SSE41: # %bb.0: 282; SSE41-NEXT: pabsd %xmm0, %xmm0 283; SSE41-NEXT: pabsd %xmm1, %xmm1 284; SSE41-NEXT: retq 285; 286; AVX1-LABEL: test_abs_ge_v8i32: 287; AVX1: # %bb.0: 288; AVX1-NEXT: vpabsd %xmm0, %xmm1 289; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 290; AVX1-NEXT: vpabsd %xmm0, %xmm0 291; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 292; AVX1-NEXT: retq 293; 294; AVX2-LABEL: test_abs_ge_v8i32: 295; AVX2: # %bb.0: 296; AVX2-NEXT: vpabsd %ymm0, %ymm0 297; AVX2-NEXT: retq 298; 299; AVX512-LABEL: test_abs_ge_v8i32: 300; AVX512: # %bb.0: 301; AVX512-NEXT: vpabsd %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1e,0xc0] 302; AVX512-NEXT: retq # encoding: [0xc3] 303 %tmp1neg = sub <8 x i32> zeroinitializer, %a 304 %b = icmp sge <8 x i32> %a, zeroinitializer 305 %abs = select <8 x i1> %b, <8 x i32> %a, <8 x i32> %tmp1neg 306 ret <8 x i32> %abs 307} 308 309define <16 x i16> @test_abs_gt_v16i16(<16 x i16> %a) nounwind { 310; SSE2-LABEL: test_abs_gt_v16i16: 311; SSE2: # %bb.0: 312; SSE2-NEXT: pxor %xmm2, %xmm2 313; SSE2-NEXT: pxor %xmm3, %xmm3 314; SSE2-NEXT: psubw %xmm0, %xmm3 315; SSE2-NEXT: pmaxsw %xmm3, %xmm0 316; SSE2-NEXT: psubw %xmm1, %xmm2 317; SSE2-NEXT: pmaxsw %xmm2, %xmm1 318; SSE2-NEXT: retq 319; 320; SSSE3-LABEL: test_abs_gt_v16i16: 321; SSSE3: # %bb.0: 322; SSSE3-NEXT: pabsw %xmm0, %xmm0 323; SSSE3-NEXT: pabsw %xmm1, %xmm1 324; SSSE3-NEXT: retq 325; 326; SSE41-LABEL: test_abs_gt_v16i16: 327; SSE41: # %bb.0: 328; SSE41-NEXT: pabsw %xmm0, %xmm0 329; SSE41-NEXT: pabsw %xmm1, %xmm1 330; SSE41-NEXT: retq 331; 332; AVX1-LABEL: test_abs_gt_v16i16: 333; AVX1: # %bb.0: 334; AVX1-NEXT: vpabsw %xmm0, %xmm1 335; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 336; AVX1-NEXT: vpabsw %xmm0, %xmm0 337; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 338; AVX1-NEXT: retq 339; 340; AVX2-LABEL: test_abs_gt_v16i16: 341; AVX2: # %bb.0: 342; AVX2-NEXT: vpabsw %ymm0, %ymm0 343; AVX2-NEXT: retq 344; 345; AVX512F-LABEL: test_abs_gt_v16i16: 346; AVX512F: # %bb.0: 347; AVX512F-NEXT: vpabsw %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1d,0xc0] 348; AVX512F-NEXT: retq # encoding: [0xc3] 349; 350; AVX512BW-LABEL: test_abs_gt_v16i16: 351; AVX512BW: # %bb.0: 352; AVX512BW-NEXT: vpabsw %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1d,0xc0] 353; AVX512BW-NEXT: retq # encoding: [0xc3] 354 %tmp1neg = sub <16 x i16> zeroinitializer, %a 355 %b = icmp sgt <16 x i16> %a, zeroinitializer 356 %abs = select <16 x i1> %b, <16 x i16> %a, <16 x i16> %tmp1neg 357 ret <16 x i16> %abs 358} 359 360define <32 x i8> @test_abs_lt_v32i8(<32 x i8> %a) nounwind { 361; SSE2-LABEL: test_abs_lt_v32i8: 362; SSE2: # %bb.0: 363; SSE2-NEXT: pxor %xmm2, %xmm2 364; SSE2-NEXT: pxor %xmm3, %xmm3 365; SSE2-NEXT: psubb %xmm0, %xmm3 366; SSE2-NEXT: pminub %xmm3, %xmm0 367; SSE2-NEXT: psubb %xmm1, %xmm2 368; SSE2-NEXT: pminub %xmm2, %xmm1 369; SSE2-NEXT: retq 370; 371; SSSE3-LABEL: test_abs_lt_v32i8: 372; SSSE3: # %bb.0: 373; SSSE3-NEXT: pabsb %xmm0, %xmm0 374; SSSE3-NEXT: pabsb %xmm1, %xmm1 375; SSSE3-NEXT: retq 376; 377; SSE41-LABEL: test_abs_lt_v32i8: 378; SSE41: # %bb.0: 379; SSE41-NEXT: pabsb %xmm0, %xmm0 380; SSE41-NEXT: pabsb %xmm1, %xmm1 381; SSE41-NEXT: retq 382; 383; AVX1-LABEL: test_abs_lt_v32i8: 384; AVX1: # %bb.0: 385; AVX1-NEXT: vpabsb %xmm0, %xmm1 386; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 387; AVX1-NEXT: vpabsb %xmm0, %xmm0 388; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 389; AVX1-NEXT: retq 390; 391; AVX2-LABEL: test_abs_lt_v32i8: 392; AVX2: # %bb.0: 393; AVX2-NEXT: vpabsb %ymm0, %ymm0 394; AVX2-NEXT: retq 395; 396; AVX512F-LABEL: test_abs_lt_v32i8: 397; AVX512F: # %bb.0: 398; AVX512F-NEXT: vpabsb %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1c,0xc0] 399; AVX512F-NEXT: retq # encoding: [0xc3] 400; 401; AVX512BW-LABEL: test_abs_lt_v32i8: 402; AVX512BW: # %bb.0: 403; AVX512BW-NEXT: vpabsb %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1c,0xc0] 404; AVX512BW-NEXT: retq # encoding: [0xc3] 405 %tmp1neg = sub <32 x i8> zeroinitializer, %a 406 %b = icmp slt <32 x i8> %a, zeroinitializer 407 %abs = select <32 x i1> %b, <32 x i8> %tmp1neg, <32 x i8> %a 408 ret <32 x i8> %abs 409} 410 411define <8 x i32> @test_abs_le_v8i32(<8 x i32> %a) nounwind { 412; SSE2-LABEL: test_abs_le_v8i32: 413; SSE2: # %bb.0: 414; SSE2-NEXT: movdqa %xmm0, %xmm2 415; SSE2-NEXT: psrad $31, %xmm2 416; SSE2-NEXT: pxor %xmm2, %xmm0 417; SSE2-NEXT: psubd %xmm2, %xmm0 418; SSE2-NEXT: movdqa %xmm1, %xmm2 419; SSE2-NEXT: psrad $31, %xmm2 420; SSE2-NEXT: pxor %xmm2, %xmm1 421; SSE2-NEXT: psubd %xmm2, %xmm1 422; SSE2-NEXT: retq 423; 424; SSSE3-LABEL: test_abs_le_v8i32: 425; SSSE3: # %bb.0: 426; SSSE3-NEXT: pabsd %xmm0, %xmm0 427; SSSE3-NEXT: pabsd %xmm1, %xmm1 428; SSSE3-NEXT: retq 429; 430; SSE41-LABEL: test_abs_le_v8i32: 431; SSE41: # %bb.0: 432; SSE41-NEXT: pabsd %xmm0, %xmm0 433; SSE41-NEXT: pabsd %xmm1, %xmm1 434; SSE41-NEXT: retq 435; 436; AVX1-LABEL: test_abs_le_v8i32: 437; AVX1: # %bb.0: 438; AVX1-NEXT: vpabsd %xmm0, %xmm1 439; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 440; AVX1-NEXT: vpabsd %xmm0, %xmm0 441; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 442; AVX1-NEXT: retq 443; 444; AVX2-LABEL: test_abs_le_v8i32: 445; AVX2: # %bb.0: 446; AVX2-NEXT: vpabsd %ymm0, %ymm0 447; AVX2-NEXT: retq 448; 449; AVX512-LABEL: test_abs_le_v8i32: 450; AVX512: # %bb.0: 451; AVX512-NEXT: vpabsd %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1e,0xc0] 452; AVX512-NEXT: retq # encoding: [0xc3] 453 %tmp1neg = sub <8 x i32> zeroinitializer, %a 454 %b = icmp sle <8 x i32> %a, zeroinitializer 455 %abs = select <8 x i1> %b, <8 x i32> %tmp1neg, <8 x i32> %a 456 ret <8 x i32> %abs 457} 458 459define <16 x i32> @test_abs_le_16i32(<16 x i32> %a) nounwind { 460; SSE2-LABEL: test_abs_le_16i32: 461; SSE2: # %bb.0: 462; SSE2-NEXT: movdqa %xmm0, %xmm4 463; SSE2-NEXT: psrad $31, %xmm4 464; SSE2-NEXT: pxor %xmm4, %xmm0 465; SSE2-NEXT: psubd %xmm4, %xmm0 466; SSE2-NEXT: movdqa %xmm1, %xmm4 467; SSE2-NEXT: psrad $31, %xmm4 468; SSE2-NEXT: pxor %xmm4, %xmm1 469; SSE2-NEXT: psubd %xmm4, %xmm1 470; SSE2-NEXT: movdqa %xmm2, %xmm4 471; SSE2-NEXT: psrad $31, %xmm4 472; SSE2-NEXT: pxor %xmm4, %xmm2 473; SSE2-NEXT: psubd %xmm4, %xmm2 474; SSE2-NEXT: movdqa %xmm3, %xmm4 475; SSE2-NEXT: psrad $31, %xmm4 476; SSE2-NEXT: pxor %xmm4, %xmm3 477; SSE2-NEXT: psubd %xmm4, %xmm3 478; SSE2-NEXT: retq 479; 480; SSSE3-LABEL: test_abs_le_16i32: 481; SSSE3: # %bb.0: 482; SSSE3-NEXT: pabsd %xmm0, %xmm0 483; SSSE3-NEXT: pabsd %xmm1, %xmm1 484; SSSE3-NEXT: pabsd %xmm2, %xmm2 485; SSSE3-NEXT: pabsd %xmm3, %xmm3 486; SSSE3-NEXT: retq 487; 488; SSE41-LABEL: test_abs_le_16i32: 489; SSE41: # %bb.0: 490; SSE41-NEXT: pabsd %xmm0, %xmm0 491; SSE41-NEXT: pabsd %xmm1, %xmm1 492; SSE41-NEXT: pabsd %xmm2, %xmm2 493; SSE41-NEXT: pabsd %xmm3, %xmm3 494; SSE41-NEXT: retq 495; 496; AVX1-LABEL: test_abs_le_16i32: 497; AVX1: # %bb.0: 498; AVX1-NEXT: vpabsd %xmm0, %xmm2 499; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 500; AVX1-NEXT: vpabsd %xmm0, %xmm0 501; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 502; AVX1-NEXT: vpabsd %xmm1, %xmm2 503; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 504; AVX1-NEXT: vpabsd %xmm1, %xmm1 505; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 506; AVX1-NEXT: retq 507; 508; AVX2-LABEL: test_abs_le_16i32: 509; AVX2: # %bb.0: 510; AVX2-NEXT: vpabsd %ymm0, %ymm0 511; AVX2-NEXT: vpabsd %ymm1, %ymm1 512; AVX2-NEXT: retq 513; 514; AVX512-LABEL: test_abs_le_16i32: 515; AVX512: # %bb.0: 516; AVX512-NEXT: vpabsd %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x1e,0xc0] 517; AVX512-NEXT: retq # encoding: [0xc3] 518 %tmp1neg = sub <16 x i32> zeroinitializer, %a 519 %b = icmp sle <16 x i32> %a, zeroinitializer 520 %abs = select <16 x i1> %b, <16 x i32> %tmp1neg, <16 x i32> %a 521 ret <16 x i32> %abs 522} 523 524define <2 x i64> @test_abs_ge_v2i64(<2 x i64> %a) nounwind { 525; SSE2-LABEL: test_abs_ge_v2i64: 526; SSE2: # %bb.0: 527; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 528; SSE2-NEXT: psrad $31, %xmm1 529; SSE2-NEXT: pxor %xmm1, %xmm0 530; SSE2-NEXT: psubq %xmm1, %xmm0 531; SSE2-NEXT: retq 532; 533; SSSE3-LABEL: test_abs_ge_v2i64: 534; SSSE3: # %bb.0: 535; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 536; SSSE3-NEXT: psrad $31, %xmm1 537; SSSE3-NEXT: pxor %xmm1, %xmm0 538; SSSE3-NEXT: psubq %xmm1, %xmm0 539; SSSE3-NEXT: retq 540; 541; SSE41-LABEL: test_abs_ge_v2i64: 542; SSE41: # %bb.0: 543; SSE41-NEXT: pxor %xmm1, %xmm1 544; SSE41-NEXT: psubq %xmm0, %xmm1 545; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm0 546; SSE41-NEXT: retq 547; 548; AVX1-LABEL: test_abs_ge_v2i64: 549; AVX1: # %bb.0: 550; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 551; AVX1-NEXT: vpsubq %xmm0, %xmm1, %xmm1 552; AVX1-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm0 553; AVX1-NEXT: retq 554; 555; AVX2-LABEL: test_abs_ge_v2i64: 556; AVX2: # %bb.0: 557; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 558; AVX2-NEXT: vpsubq %xmm0, %xmm1, %xmm1 559; AVX2-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm0 560; AVX2-NEXT: retq 561; 562; AVX512-LABEL: test_abs_ge_v2i64: 563; AVX512: # %bb.0: 564; AVX512-NEXT: vpabsq %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x1f,0xc0] 565; AVX512-NEXT: retq # encoding: [0xc3] 566 %tmp1neg = sub <2 x i64> zeroinitializer, %a 567 %b = icmp sge <2 x i64> %a, zeroinitializer 568 %abs = select <2 x i1> %b, <2 x i64> %a, <2 x i64> %tmp1neg 569 ret <2 x i64> %abs 570} 571 572define <4 x i64> @test_abs_gt_v4i64(<4 x i64> %a) nounwind { 573; SSE2-LABEL: test_abs_gt_v4i64: 574; SSE2: # %bb.0: 575; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 576; SSE2-NEXT: psrad $31, %xmm2 577; SSE2-NEXT: pxor %xmm2, %xmm0 578; SSE2-NEXT: psubq %xmm2, %xmm0 579; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 580; SSE2-NEXT: psrad $31, %xmm2 581; SSE2-NEXT: pxor %xmm2, %xmm1 582; SSE2-NEXT: psubq %xmm2, %xmm1 583; SSE2-NEXT: retq 584; 585; SSSE3-LABEL: test_abs_gt_v4i64: 586; SSSE3: # %bb.0: 587; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 588; SSSE3-NEXT: psrad $31, %xmm2 589; SSSE3-NEXT: pxor %xmm2, %xmm0 590; SSSE3-NEXT: psubq %xmm2, %xmm0 591; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 592; SSSE3-NEXT: psrad $31, %xmm2 593; SSSE3-NEXT: pxor %xmm2, %xmm1 594; SSSE3-NEXT: psubq %xmm2, %xmm1 595; SSSE3-NEXT: retq 596; 597; SSE41-LABEL: test_abs_gt_v4i64: 598; SSE41: # %bb.0: 599; SSE41-NEXT: movdqa %xmm0, %xmm2 600; SSE41-NEXT: pxor %xmm3, %xmm3 601; SSE41-NEXT: pxor %xmm4, %xmm4 602; SSE41-NEXT: psubq %xmm0, %xmm4 603; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2 604; SSE41-NEXT: psubq %xmm1, %xmm3 605; SSE41-NEXT: movdqa %xmm1, %xmm0 606; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm1 607; SSE41-NEXT: movapd %xmm2, %xmm0 608; SSE41-NEXT: retq 609; 610; AVX1-LABEL: test_abs_gt_v4i64: 611; AVX1: # %bb.0: 612; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 613; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 614; AVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm1 615; AVX1-NEXT: vpsubq %xmm0, %xmm2, %xmm2 616; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 617; AVX1-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm0 618; AVX1-NEXT: retq 619; 620; AVX2-LABEL: test_abs_gt_v4i64: 621; AVX2: # %bb.0: 622; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 623; AVX2-NEXT: vpsubq %ymm0, %ymm1, %ymm1 624; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm0 625; AVX2-NEXT: retq 626; 627; AVX512-LABEL: test_abs_gt_v4i64: 628; AVX512: # %bb.0: 629; AVX512-NEXT: vpabsq %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x1f,0xc0] 630; AVX512-NEXT: retq # encoding: [0xc3] 631 %tmp1neg = sub <4 x i64> zeroinitializer, %a 632 %b = icmp sgt <4 x i64> %a, <i64 -1, i64 -1, i64 -1, i64 -1> 633 %abs = select <4 x i1> %b, <4 x i64> %a, <4 x i64> %tmp1neg 634 ret <4 x i64> %abs 635} 636 637define <8 x i64> @test_abs_le_v8i64(<8 x i64> %a) nounwind { 638; SSE2-LABEL: test_abs_le_v8i64: 639; SSE2: # %bb.0: 640; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] 641; SSE2-NEXT: psrad $31, %xmm4 642; SSE2-NEXT: pxor %xmm4, %xmm0 643; SSE2-NEXT: psubq %xmm4, %xmm0 644; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3] 645; SSE2-NEXT: psrad $31, %xmm4 646; SSE2-NEXT: pxor %xmm4, %xmm1 647; SSE2-NEXT: psubq %xmm4, %xmm1 648; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 649; SSE2-NEXT: psrad $31, %xmm4 650; SSE2-NEXT: pxor %xmm4, %xmm2 651; SSE2-NEXT: psubq %xmm4, %xmm2 652; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 653; SSE2-NEXT: psrad $31, %xmm4 654; SSE2-NEXT: pxor %xmm4, %xmm3 655; SSE2-NEXT: psubq %xmm4, %xmm3 656; SSE2-NEXT: retq 657; 658; SSSE3-LABEL: test_abs_le_v8i64: 659; SSSE3: # %bb.0: 660; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] 661; SSSE3-NEXT: psrad $31, %xmm4 662; SSSE3-NEXT: pxor %xmm4, %xmm0 663; SSSE3-NEXT: psubq %xmm4, %xmm0 664; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3] 665; SSSE3-NEXT: psrad $31, %xmm4 666; SSSE3-NEXT: pxor %xmm4, %xmm1 667; SSSE3-NEXT: psubq %xmm4, %xmm1 668; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 669; SSSE3-NEXT: psrad $31, %xmm4 670; SSSE3-NEXT: pxor %xmm4, %xmm2 671; SSSE3-NEXT: psubq %xmm4, %xmm2 672; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 673; SSSE3-NEXT: psrad $31, %xmm4 674; SSSE3-NEXT: pxor %xmm4, %xmm3 675; SSSE3-NEXT: psubq %xmm4, %xmm3 676; SSSE3-NEXT: retq 677; 678; SSE41-LABEL: test_abs_le_v8i64: 679; SSE41: # %bb.0: 680; SSE41-NEXT: movdqa %xmm0, %xmm4 681; SSE41-NEXT: pxor %xmm5, %xmm5 682; SSE41-NEXT: pxor %xmm6, %xmm6 683; SSE41-NEXT: psubq %xmm0, %xmm6 684; SSE41-NEXT: blendvpd %xmm0, %xmm6, %xmm4 685; SSE41-NEXT: pxor %xmm6, %xmm6 686; SSE41-NEXT: psubq %xmm1, %xmm6 687; SSE41-NEXT: movdqa %xmm1, %xmm0 688; SSE41-NEXT: blendvpd %xmm0, %xmm6, %xmm1 689; SSE41-NEXT: pxor %xmm6, %xmm6 690; SSE41-NEXT: psubq %xmm2, %xmm6 691; SSE41-NEXT: movdqa %xmm2, %xmm0 692; SSE41-NEXT: blendvpd %xmm0, %xmm6, %xmm2 693; SSE41-NEXT: psubq %xmm3, %xmm5 694; SSE41-NEXT: movdqa %xmm3, %xmm0 695; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm3 696; SSE41-NEXT: movapd %xmm4, %xmm0 697; SSE41-NEXT: retq 698; 699; AVX1-LABEL: test_abs_le_v8i64: 700; AVX1: # %bb.0: 701; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 702; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 703; AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2 704; AVX1-NEXT: vpsubq %xmm0, %xmm3, %xmm4 705; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm4, %ymm2 706; AVX1-NEXT: vblendvpd %ymm0, %ymm2, %ymm0, %ymm0 707; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 708; AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2 709; AVX1-NEXT: vpsubq %xmm1, %xmm3, %xmm3 710; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 711; AVX1-NEXT: vblendvpd %ymm1, %ymm2, %ymm1, %ymm1 712; AVX1-NEXT: retq 713; 714; AVX2-LABEL: test_abs_le_v8i64: 715; AVX2: # %bb.0: 716; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 717; AVX2-NEXT: vpsubq %ymm0, %ymm2, %ymm3 718; AVX2-NEXT: vblendvpd %ymm0, %ymm3, %ymm0, %ymm0 719; AVX2-NEXT: vpsubq %ymm1, %ymm2, %ymm2 720; AVX2-NEXT: vblendvpd %ymm1, %ymm2, %ymm1, %ymm1 721; AVX2-NEXT: retq 722; 723; AVX512-LABEL: test_abs_le_v8i64: 724; AVX512: # %bb.0: 725; AVX512-NEXT: vpabsq %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x1f,0xc0] 726; AVX512-NEXT: retq # encoding: [0xc3] 727 %tmp1neg = sub <8 x i64> zeroinitializer, %a 728 %b = icmp sle <8 x i64> %a, zeroinitializer 729 %abs = select <8 x i1> %b, <8 x i64> %tmp1neg, <8 x i64> %a 730 ret <8 x i64> %abs 731} 732 733define <8 x i64> @test_abs_le_v8i64_fold(ptr %a.ptr) nounwind { 734; SSE2-LABEL: test_abs_le_v8i64_fold: 735; SSE2: # %bb.0: 736; SSE2-NEXT: movdqu (%rdi), %xmm0 737; SSE2-NEXT: movdqu 16(%rdi), %xmm1 738; SSE2-NEXT: movdqu 32(%rdi), %xmm2 739; SSE2-NEXT: movdqu 48(%rdi), %xmm3 740; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] 741; SSE2-NEXT: psrad $31, %xmm4 742; SSE2-NEXT: pxor %xmm4, %xmm0 743; SSE2-NEXT: psubq %xmm4, %xmm0 744; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3] 745; SSE2-NEXT: psrad $31, %xmm4 746; SSE2-NEXT: pxor %xmm4, %xmm1 747; SSE2-NEXT: psubq %xmm4, %xmm1 748; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 749; SSE2-NEXT: psrad $31, %xmm4 750; SSE2-NEXT: pxor %xmm4, %xmm2 751; SSE2-NEXT: psubq %xmm4, %xmm2 752; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 753; SSE2-NEXT: psrad $31, %xmm4 754; SSE2-NEXT: pxor %xmm4, %xmm3 755; SSE2-NEXT: psubq %xmm4, %xmm3 756; SSE2-NEXT: retq 757; 758; SSSE3-LABEL: test_abs_le_v8i64_fold: 759; SSSE3: # %bb.0: 760; SSSE3-NEXT: movdqu (%rdi), %xmm0 761; SSSE3-NEXT: movdqu 16(%rdi), %xmm1 762; SSSE3-NEXT: movdqu 32(%rdi), %xmm2 763; SSSE3-NEXT: movdqu 48(%rdi), %xmm3 764; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] 765; SSSE3-NEXT: psrad $31, %xmm4 766; SSSE3-NEXT: pxor %xmm4, %xmm0 767; SSSE3-NEXT: psubq %xmm4, %xmm0 768; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3] 769; SSSE3-NEXT: psrad $31, %xmm4 770; SSSE3-NEXT: pxor %xmm4, %xmm1 771; SSSE3-NEXT: psubq %xmm4, %xmm1 772; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 773; SSSE3-NEXT: psrad $31, %xmm4 774; SSSE3-NEXT: pxor %xmm4, %xmm2 775; SSSE3-NEXT: psubq %xmm4, %xmm2 776; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 777; SSSE3-NEXT: psrad $31, %xmm4 778; SSSE3-NEXT: pxor %xmm4, %xmm3 779; SSSE3-NEXT: psubq %xmm4, %xmm3 780; SSSE3-NEXT: retq 781; 782; SSE41-LABEL: test_abs_le_v8i64_fold: 783; SSE41: # %bb.0: 784; SSE41-NEXT: movdqu (%rdi), %xmm1 785; SSE41-NEXT: movdqu 16(%rdi), %xmm2 786; SSE41-NEXT: movdqu 32(%rdi), %xmm3 787; SSE41-NEXT: movdqu 48(%rdi), %xmm4 788; SSE41-NEXT: pxor %xmm5, %xmm5 789; SSE41-NEXT: pxor %xmm6, %xmm6 790; SSE41-NEXT: psubq %xmm1, %xmm6 791; SSE41-NEXT: movdqa %xmm1, %xmm0 792; SSE41-NEXT: blendvpd %xmm0, %xmm6, %xmm1 793; SSE41-NEXT: pxor %xmm6, %xmm6 794; SSE41-NEXT: psubq %xmm2, %xmm6 795; SSE41-NEXT: movdqa %xmm2, %xmm0 796; SSE41-NEXT: blendvpd %xmm0, %xmm6, %xmm2 797; SSE41-NEXT: pxor %xmm6, %xmm6 798; SSE41-NEXT: psubq %xmm3, %xmm6 799; SSE41-NEXT: movdqa %xmm3, %xmm0 800; SSE41-NEXT: blendvpd %xmm0, %xmm6, %xmm3 801; SSE41-NEXT: psubq %xmm4, %xmm5 802; SSE41-NEXT: movdqa %xmm4, %xmm0 803; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm4 804; SSE41-NEXT: movapd %xmm1, %xmm0 805; SSE41-NEXT: movapd %xmm2, %xmm1 806; SSE41-NEXT: movapd %xmm3, %xmm2 807; SSE41-NEXT: movapd %xmm4, %xmm3 808; SSE41-NEXT: retq 809; 810; AVX1-LABEL: test_abs_le_v8i64_fold: 811; AVX1: # %bb.0: 812; AVX1-NEXT: vmovdqu (%rdi), %ymm0 813; AVX1-NEXT: vmovdqu 32(%rdi), %ymm1 814; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 815; AVX1-NEXT: vpsubq 16(%rdi), %xmm2, %xmm3 816; AVX1-NEXT: vpsubq %xmm0, %xmm2, %xmm4 817; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3 818; AVX1-NEXT: vblendvpd %ymm0, %ymm3, %ymm0, %ymm0 819; AVX1-NEXT: vpsubq 48(%rdi), %xmm2, %xmm3 820; AVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm2 821; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 822; AVX1-NEXT: vblendvpd %ymm1, %ymm2, %ymm1, %ymm1 823; AVX1-NEXT: retq 824; 825; AVX2-LABEL: test_abs_le_v8i64_fold: 826; AVX2: # %bb.0: 827; AVX2-NEXT: vmovdqu (%rdi), %ymm0 828; AVX2-NEXT: vmovdqu 32(%rdi), %ymm1 829; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 830; AVX2-NEXT: vpsubq %ymm0, %ymm2, %ymm3 831; AVX2-NEXT: vblendvpd %ymm0, %ymm3, %ymm0, %ymm0 832; AVX2-NEXT: vpsubq %ymm1, %ymm2, %ymm2 833; AVX2-NEXT: vblendvpd %ymm1, %ymm2, %ymm1, %ymm1 834; AVX2-NEXT: retq 835; 836; AVX512-LABEL: test_abs_le_v8i64_fold: 837; AVX512: # %bb.0: 838; AVX512-NEXT: vpabsq (%rdi), %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x1f,0x07] 839; AVX512-NEXT: retq # encoding: [0xc3] 840 %a = load <8 x i64>, ptr %a.ptr, align 8 841 %tmp1neg = sub <8 x i64> zeroinitializer, %a 842 %b = icmp sle <8 x i64> %a, zeroinitializer 843 %abs = select <8 x i1> %b, <8 x i64> %tmp1neg, <8 x i64> %a 844 ret <8 x i64> %abs 845} 846 847define <64 x i8> @test_abs_lt_v64i8(<64 x i8> %a) nounwind { 848; SSE2-LABEL: test_abs_lt_v64i8: 849; SSE2: # %bb.0: 850; SSE2-NEXT: pxor %xmm4, %xmm4 851; SSE2-NEXT: pxor %xmm5, %xmm5 852; SSE2-NEXT: psubb %xmm0, %xmm5 853; SSE2-NEXT: pminub %xmm5, %xmm0 854; SSE2-NEXT: pxor %xmm5, %xmm5 855; SSE2-NEXT: psubb %xmm1, %xmm5 856; SSE2-NEXT: pminub %xmm5, %xmm1 857; SSE2-NEXT: pxor %xmm5, %xmm5 858; SSE2-NEXT: psubb %xmm2, %xmm5 859; SSE2-NEXT: pminub %xmm5, %xmm2 860; SSE2-NEXT: psubb %xmm3, %xmm4 861; SSE2-NEXT: pminub %xmm4, %xmm3 862; SSE2-NEXT: retq 863; 864; SSSE3-LABEL: test_abs_lt_v64i8: 865; SSSE3: # %bb.0: 866; SSSE3-NEXT: pabsb %xmm0, %xmm0 867; SSSE3-NEXT: pabsb %xmm1, %xmm1 868; SSSE3-NEXT: pabsb %xmm2, %xmm2 869; SSSE3-NEXT: pabsb %xmm3, %xmm3 870; SSSE3-NEXT: retq 871; 872; SSE41-LABEL: test_abs_lt_v64i8: 873; SSE41: # %bb.0: 874; SSE41-NEXT: pabsb %xmm0, %xmm0 875; SSE41-NEXT: pabsb %xmm1, %xmm1 876; SSE41-NEXT: pabsb %xmm2, %xmm2 877; SSE41-NEXT: pabsb %xmm3, %xmm3 878; SSE41-NEXT: retq 879; 880; AVX1-LABEL: test_abs_lt_v64i8: 881; AVX1: # %bb.0: 882; AVX1-NEXT: vpabsb %xmm0, %xmm2 883; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 884; AVX1-NEXT: vpabsb %xmm0, %xmm0 885; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 886; AVX1-NEXT: vpabsb %xmm1, %xmm2 887; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 888; AVX1-NEXT: vpabsb %xmm1, %xmm1 889; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 890; AVX1-NEXT: retq 891; 892; AVX2-LABEL: test_abs_lt_v64i8: 893; AVX2: # %bb.0: 894; AVX2-NEXT: vpabsb %ymm0, %ymm0 895; AVX2-NEXT: vpabsb %ymm1, %ymm1 896; AVX2-NEXT: retq 897; 898; AVX512F-LABEL: test_abs_lt_v64i8: 899; AVX512F: # %bb.0: 900; AVX512F-NEXT: vpabsb %ymm0, %ymm1 # encoding: [0xc4,0xe2,0x7d,0x1c,0xc8] 901; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x48,0x3b,0xc0,0x01] 902; AVX512F-NEXT: vpabsb %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1c,0xc0] 903; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 # encoding: [0x62,0xf3,0xf5,0x48,0x3a,0xc0,0x01] 904; AVX512F-NEXT: retq # encoding: [0xc3] 905; 906; AVX512BW-LABEL: test_abs_lt_v64i8: 907; AVX512BW: # %bb.0: 908; AVX512BW-NEXT: vpabsb %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x1c,0xc0] 909; AVX512BW-NEXT: retq # encoding: [0xc3] 910 %tmp1neg = sub <64 x i8> zeroinitializer, %a 911 %b = icmp slt <64 x i8> %a, zeroinitializer 912 %abs = select <64 x i1> %b, <64 x i8> %tmp1neg, <64 x i8> %a 913 ret <64 x i8> %abs 914} 915 916define <32 x i16> @test_abs_gt_v32i16(<32 x i16> %a) nounwind { 917; SSE2-LABEL: test_abs_gt_v32i16: 918; SSE2: # %bb.0: 919; SSE2-NEXT: pxor %xmm4, %xmm4 920; SSE2-NEXT: pxor %xmm5, %xmm5 921; SSE2-NEXT: psubw %xmm0, %xmm5 922; SSE2-NEXT: pmaxsw %xmm5, %xmm0 923; SSE2-NEXT: pxor %xmm5, %xmm5 924; SSE2-NEXT: psubw %xmm1, %xmm5 925; SSE2-NEXT: pmaxsw %xmm5, %xmm1 926; SSE2-NEXT: pxor %xmm5, %xmm5 927; SSE2-NEXT: psubw %xmm2, %xmm5 928; SSE2-NEXT: pmaxsw %xmm5, %xmm2 929; SSE2-NEXT: psubw %xmm3, %xmm4 930; SSE2-NEXT: pmaxsw %xmm4, %xmm3 931; SSE2-NEXT: retq 932; 933; SSSE3-LABEL: test_abs_gt_v32i16: 934; SSSE3: # %bb.0: 935; SSSE3-NEXT: pabsw %xmm0, %xmm0 936; SSSE3-NEXT: pabsw %xmm1, %xmm1 937; SSSE3-NEXT: pabsw %xmm2, %xmm2 938; SSSE3-NEXT: pabsw %xmm3, %xmm3 939; SSSE3-NEXT: retq 940; 941; SSE41-LABEL: test_abs_gt_v32i16: 942; SSE41: # %bb.0: 943; SSE41-NEXT: pabsw %xmm0, %xmm0 944; SSE41-NEXT: pabsw %xmm1, %xmm1 945; SSE41-NEXT: pabsw %xmm2, %xmm2 946; SSE41-NEXT: pabsw %xmm3, %xmm3 947; SSE41-NEXT: retq 948; 949; AVX1-LABEL: test_abs_gt_v32i16: 950; AVX1: # %bb.0: 951; AVX1-NEXT: vpabsw %xmm0, %xmm2 952; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 953; AVX1-NEXT: vpabsw %xmm0, %xmm0 954; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 955; AVX1-NEXT: vpabsw %xmm1, %xmm2 956; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 957; AVX1-NEXT: vpabsw %xmm1, %xmm1 958; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 959; AVX1-NEXT: retq 960; 961; AVX2-LABEL: test_abs_gt_v32i16: 962; AVX2: # %bb.0: 963; AVX2-NEXT: vpabsw %ymm0, %ymm0 964; AVX2-NEXT: vpabsw %ymm1, %ymm1 965; AVX2-NEXT: retq 966; 967; AVX512F-LABEL: test_abs_gt_v32i16: 968; AVX512F: # %bb.0: 969; AVX512F-NEXT: vpabsw %ymm0, %ymm1 # encoding: [0xc4,0xe2,0x7d,0x1d,0xc8] 970; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x48,0x3b,0xc0,0x01] 971; AVX512F-NEXT: vpabsw %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1d,0xc0] 972; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 # encoding: [0x62,0xf3,0xf5,0x48,0x3a,0xc0,0x01] 973; AVX512F-NEXT: retq # encoding: [0xc3] 974; 975; AVX512BW-LABEL: test_abs_gt_v32i16: 976; AVX512BW: # %bb.0: 977; AVX512BW-NEXT: vpabsw %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x1d,0xc0] 978; AVX512BW-NEXT: retq # encoding: [0xc3] 979 %tmp1neg = sub <32 x i16> zeroinitializer, %a 980 %b = icmp sgt <32 x i16> %a, zeroinitializer 981 %abs = select <32 x i1> %b, <32 x i16> %a, <32 x i16> %tmp1neg 982 ret <32 x i16> %abs 983} 984