1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE41 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE42 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1OR2,AVX1 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX1OR2,AVX2 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX,AVX2,AVX512F 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX,AVX2,AVX512BW 9 10define i8 @test_i8_knownbits(i8 %a) { 11; CHECK-LABEL: test_i8_knownbits: 12; CHECK: # %bb.0: 13; CHECK-NEXT: movl %edi, %eax 14; CHECK-NEXT: shrb %al 15; CHECK-NEXT: # kill: def $al killed $al killed $eax 16; CHECK-NEXT: retq 17 %x = lshr i8 %a, 1 18 %1 = call i8 @llvm.smax.i8(i8 %x, i8 0) 19 ret i8 %1 20} 21 22define <16 x i8> @test_v16i8_nosignbit(<16 x i8> %a, <16 x i8> %b) { 23; SSE2-LABEL: test_v16i8_nosignbit: 24; SSE2: # %bb.0: 25; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 26; SSE2-NEXT: pand %xmm2, %xmm0 27; SSE2-NEXT: pand %xmm1, %xmm2 28; SSE2-NEXT: pmaxub %xmm2, %xmm0 29; SSE2-NEXT: retq 30; 31; SSE41-LABEL: test_v16i8_nosignbit: 32; SSE41: # %bb.0: 33; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 34; SSE41-NEXT: pand %xmm2, %xmm0 35; SSE41-NEXT: pand %xmm1, %xmm2 36; SSE41-NEXT: pmaxsb %xmm2, %xmm0 37; SSE41-NEXT: retq 38; 39; SSE42-LABEL: test_v16i8_nosignbit: 40; SSE42: # %bb.0: 41; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 42; SSE42-NEXT: pand %xmm2, %xmm0 43; SSE42-NEXT: pand %xmm1, %xmm2 44; SSE42-NEXT: pmaxsb %xmm2, %xmm0 45; SSE42-NEXT: retq 46; 47; AVX1-LABEL: test_v16i8_nosignbit: 48; AVX1: # %bb.0: 49; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 50; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 51; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 52; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 53; AVX1-NEXT: retq 54; 55; AVX2-LABEL: test_v16i8_nosignbit: 56; AVX2: # %bb.0: 57; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 58; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 59; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 60; AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 61; AVX2-NEXT: retq 62 %1 = and <16 x i8> %a, <i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15> 63 %2 = and <16 x i8> %b, <i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15> 64 %3 = icmp sgt <16 x i8> %1, %2 65 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2 66 ret <16 x i8> %4 67} 68 69define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) { 70; SSE2-LABEL: test_v16i8_reassociation: 71; SSE2: # %bb.0: 72; SSE2-NEXT: pxor %xmm1, %xmm1 73; SSE2-NEXT: movdqa %xmm0, %xmm2 74; SSE2-NEXT: pcmpgtb %xmm1, %xmm2 75; SSE2-NEXT: pand %xmm2, %xmm0 76; SSE2-NEXT: retq 77; 78; SSE41-LABEL: test_v16i8_reassociation: 79; SSE41: # %bb.0: 80; SSE41-NEXT: pxor %xmm1, %xmm1 81; SSE41-NEXT: pmaxsb %xmm1, %xmm0 82; SSE41-NEXT: retq 83; 84; SSE42-LABEL: test_v16i8_reassociation: 85; SSE42: # %bb.0: 86; SSE42-NEXT: pxor %xmm1, %xmm1 87; SSE42-NEXT: pmaxsb %xmm1, %xmm0 88; SSE42-NEXT: retq 89; 90; AVX-LABEL: test_v16i8_reassociation: 91; AVX: # %bb.0: 92; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 93; AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 94; AVX-NEXT: retq 95 %1 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer) 96 %2 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %1, <16 x i8> zeroinitializer) 97 ret <16 x i8> %2 98} 99 100define <16 x i8> @test_v16i8_demandedbits(<16 x i8> %x, <16 x i8> %y, <16 x i8> %a, <16 x i8> %b) { 101; SSE2-LABEL: test_v16i8_demandedbits: 102; SSE2: # %bb.0: 103; SSE2-NEXT: movdqa %xmm0, %xmm4 104; SSE2-NEXT: pcmpgtb %xmm1, %xmm4 105; SSE2-NEXT: pand %xmm4, %xmm0 106; SSE2-NEXT: pandn %xmm1, %xmm4 107; SSE2-NEXT: por %xmm0, %xmm4 108; SSE2-NEXT: pxor %xmm0, %xmm0 109; SSE2-NEXT: pcmpgtb %xmm4, %xmm0 110; SSE2-NEXT: pand %xmm0, %xmm3 111; SSE2-NEXT: pandn %xmm2, %xmm0 112; SSE2-NEXT: por %xmm3, %xmm0 113; SSE2-NEXT: retq 114; 115; SSE41-LABEL: test_v16i8_demandedbits: 116; SSE41: # %bb.0: 117; SSE41-NEXT: andps %xmm1, %xmm0 118; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2 119; SSE41-NEXT: movdqa %xmm2, %xmm0 120; SSE41-NEXT: retq 121; 122; SSE42-LABEL: test_v16i8_demandedbits: 123; SSE42: # %bb.0: 124; SSE42-NEXT: andps %xmm1, %xmm0 125; SSE42-NEXT: pblendvb %xmm0, %xmm3, %xmm2 126; SSE42-NEXT: movdqa %xmm2, %xmm0 127; SSE42-NEXT: retq 128; 129; AVX1OR2-LABEL: test_v16i8_demandedbits: 130; AVX1OR2: # %bb.0: 131; AVX1OR2-NEXT: vpand %xmm1, %xmm0, %xmm0 132; AVX1OR2-NEXT: vpblendvb %xmm0, %xmm3, %xmm2, %xmm0 133; AVX1OR2-NEXT: retq 134; 135; AVX512F-LABEL: test_v16i8_demandedbits: 136; AVX512F: # %bb.0: 137; AVX512F-NEXT: vpand %xmm1, %xmm0, %xmm0 138; AVX512F-NEXT: vpblendvb %xmm0, %xmm3, %xmm2, %xmm0 139; AVX512F-NEXT: retq 140; 141; AVX512BW-LABEL: test_v16i8_demandedbits: 142; AVX512BW: # %bb.0: 143; AVX512BW-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3 144; AVX512BW-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 145; AVX512BW-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 146; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 147; AVX512BW-NEXT: vpcmpnltb %zmm1, %zmm0, %k1 148; AVX512BW-NEXT: vpblendmb %zmm2, %zmm3, %zmm0 {%k1} 149; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 150; AVX512BW-NEXT: vzeroupper 151; AVX512BW-NEXT: retq 152 %smax = tail call <16 x i8> @llvm.smax.v16i8(<16 x i8> %x, <16 x i8> %y) 153 %cmp = icmp sge <16 x i8> %smax, zeroinitializer 154 %res = select <16 x i1> %cmp, <16 x i8> %a, <16 x i8> %b 155 ret <16 x i8> %res 156} 157 158declare i8 @llvm.smax.i8(i8, i8) 159declare <16 x i8> @llvm.smax.v16i8(<16 x i8>, <16 x i8>) 160