1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE41 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE42 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1OR2,AVX1 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX1OR2,AVX2 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX,AVX2,AVX512F 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX,AVX2,AVX512BW 9 10define i8 @test_i8_knownbits(i8 %a) { 11; CHECK-LABEL: test_i8_knownbits: 12; CHECK: # %bb.0: 13; CHECK-NEXT: movl %edi, %eax 14; CHECK-NEXT: orb $-128, %al 15; CHECK-NEXT: # kill: def $al killed $al killed $eax 16; CHECK-NEXT: retq 17 %x = or i8 %a, -128 18 %1 = call i8 @llvm.smin.i8(i8 %x, i8 0) 19 ret i8 %1 20} 21 22define <16 x i8> @test_v16i8_nosignbit(<16 x i8> %a, <16 x i8> %b) { 23; SSE2-LABEL: test_v16i8_nosignbit: 24; SSE2: # %bb.0: 25; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 26; SSE2-NEXT: pand %xmm2, %xmm0 27; SSE2-NEXT: pand %xmm1, %xmm2 28; SSE2-NEXT: pminub %xmm2, %xmm0 29; SSE2-NEXT: retq 30; 31; SSE41-LABEL: test_v16i8_nosignbit: 32; SSE41: # %bb.0: 33; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 34; SSE41-NEXT: pand %xmm2, %xmm0 35; SSE41-NEXT: pand %xmm1, %xmm2 36; SSE41-NEXT: pminsb %xmm2, %xmm0 37; SSE41-NEXT: retq 38; 39; SSE42-LABEL: test_v16i8_nosignbit: 40; SSE42: # %bb.0: 41; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 42; SSE42-NEXT: pand %xmm2, %xmm0 43; SSE42-NEXT: pand %xmm1, %xmm2 44; SSE42-NEXT: pminsb %xmm2, %xmm0 45; SSE42-NEXT: retq 46; 47; AVX1-LABEL: test_v16i8_nosignbit: 48; AVX1: # %bb.0: 49; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 50; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 51; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 52; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 53; AVX1-NEXT: retq 54; 55; AVX2-LABEL: test_v16i8_nosignbit: 56; AVX2: # %bb.0: 57; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 58; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 59; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 60; AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 61; AVX2-NEXT: retq 62 %1 = and <16 x i8> %a, <i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15> 63 %2 = and <16 x i8> %b, <i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15> 64 %3 = icmp slt <16 x i8> %1, %2 65 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2 66 ret <16 x i8> %4 67} 68 69define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) { 70; SSE2-LABEL: test_v16i8_reassociation: 71; SSE2: # %bb.0: 72; SSE2-NEXT: pxor %xmm1, %xmm1 73; SSE2-NEXT: pcmpgtb %xmm0, %xmm1 74; SSE2-NEXT: pand %xmm1, %xmm0 75; SSE2-NEXT: retq 76; 77; SSE41-LABEL: test_v16i8_reassociation: 78; SSE41: # %bb.0: 79; SSE41-NEXT: pxor %xmm1, %xmm1 80; SSE41-NEXT: pminsb %xmm1, %xmm0 81; SSE41-NEXT: retq 82; 83; SSE42-LABEL: test_v16i8_reassociation: 84; SSE42: # %bb.0: 85; SSE42-NEXT: pxor %xmm1, %xmm1 86; SSE42-NEXT: pminsb %xmm1, %xmm0 87; SSE42-NEXT: retq 88; 89; AVX-LABEL: test_v16i8_reassociation: 90; AVX: # %bb.0: 91; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 92; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 93; AVX-NEXT: retq 94 %1 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer) 95 %2 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %1, <16 x i8> zeroinitializer) 96 ret <16 x i8> %2 97} 98 99define <16 x i8> @test_v16i8_demandedbits(<16 x i8> %x, <16 x i8> %y, <16 x i8> %a, <16 x i8> %b) { 100; SSE2-LABEL: test_v16i8_demandedbits: 101; SSE2: # %bb.0: 102; SSE2-NEXT: movdqa %xmm1, %xmm4 103; SSE2-NEXT: pcmpgtb %xmm0, %xmm4 104; SSE2-NEXT: pand %xmm4, %xmm0 105; SSE2-NEXT: pandn %xmm1, %xmm4 106; SSE2-NEXT: por %xmm0, %xmm4 107; SSE2-NEXT: pxor %xmm0, %xmm0 108; SSE2-NEXT: pcmpgtb %xmm4, %xmm0 109; SSE2-NEXT: pand %xmm0, %xmm3 110; SSE2-NEXT: pandn %xmm2, %xmm0 111; SSE2-NEXT: por %xmm3, %xmm0 112; SSE2-NEXT: retq 113; 114; SSE41-LABEL: test_v16i8_demandedbits: 115; SSE41: # %bb.0: 116; SSE41-NEXT: orps %xmm1, %xmm0 117; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2 118; SSE41-NEXT: movdqa %xmm2, %xmm0 119; SSE41-NEXT: retq 120; 121; SSE42-LABEL: test_v16i8_demandedbits: 122; SSE42: # %bb.0: 123; SSE42-NEXT: orps %xmm1, %xmm0 124; SSE42-NEXT: pblendvb %xmm0, %xmm3, %xmm2 125; SSE42-NEXT: movdqa %xmm2, %xmm0 126; SSE42-NEXT: retq 127; 128; AVX1OR2-LABEL: test_v16i8_demandedbits: 129; AVX1OR2: # %bb.0: 130; AVX1OR2-NEXT: vpor %xmm1, %xmm0, %xmm0 131; AVX1OR2-NEXT: vpblendvb %xmm0, %xmm3, %xmm2, %xmm0 132; AVX1OR2-NEXT: retq 133; 134; AVX512F-LABEL: test_v16i8_demandedbits: 135; AVX512F: # %bb.0: 136; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 137; AVX512F-NEXT: vpblendvb %xmm0, %xmm3, %xmm2, %xmm0 138; AVX512F-NEXT: retq 139; 140; AVX512BW-LABEL: test_v16i8_demandedbits: 141; AVX512BW: # %bb.0: 142; AVX512BW-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3 143; AVX512BW-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 144; AVX512BW-NEXT: vpminsb %xmm1, %xmm0, %xmm0 145; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 146; AVX512BW-NEXT: vpcmpnltb %zmm1, %zmm0, %k1 147; AVX512BW-NEXT: vpblendmb %zmm2, %zmm3, %zmm0 {%k1} 148; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 149; AVX512BW-NEXT: vzeroupper 150; AVX512BW-NEXT: retq 151 %smin = tail call <16 x i8> @llvm.smin.v16i8(<16 x i8> %x, <16 x i8> %y) 152 %cmp = icmp sge <16 x i8> %smin, zeroinitializer 153 %res = select <16 x i1> %cmp, <16 x i8> %a, <16 x i8> %b 154 ret <16 x i8> %res 155} 156 157declare i8 @llvm.smin.i8(i8, i8) 158declare <16 x i8> @llvm.smin.v16i8(<16 x i8>, <16 x i8>) 159