1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE41 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-SLOW 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST,AVX2-FAST-ALL 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST,AVX2-FAST-PERLANE 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F 10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL 11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL 12; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW 13; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512 14; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL 15; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL 16; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefixes=SKX 17 18; 19; Unsigned saturation truncation to vXi32 20; 21 22define <2 x i32> @trunc_usat_v2i64_v2i32(<2 x i64> %a0) { 23; SSE2-SSSE3-LABEL: trunc_usat_v2i64_v2i32: 24; SSE2-SSSE3: # %bb.0: 25; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456] 26; SSE2-SSSE3-NEXT: pxor %xmm0, %xmm1 27; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 28; SSE2-SSSE3-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 29; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 30; SSE2-SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 31; SSE2-SSSE3-NEXT: pandn %xmm2, %xmm1 32; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm2 33; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm2 34; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1 35; SSE2-SSSE3-NEXT: por %xmm2, %xmm1 36; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 37; SSE2-SSSE3-NEXT: retq 38; 39; SSE41-LABEL: trunc_usat_v2i64_v2i32: 40; SSE41: # %bb.0: 41; SSE41-NEXT: movdqa %xmm0, %xmm1 42; SSE41-NEXT: movapd {{.*#+}} xmm2 = [4294967295,4294967295] 43; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456] 44; SSE41-NEXT: pxor %xmm0, %xmm3 45; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 46; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 47; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 48; SSE41-NEXT: pandn %xmm3, %xmm0 49; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2 50; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 51; SSE41-NEXT: retq 52; 53; AVX-LABEL: trunc_usat_v2i64_v2i32: 54; AVX: # %bb.0: 55; AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 56; AVX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 57; AVX-NEXT: vblendvpd %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 58; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3] 59; AVX-NEXT: retq 60; 61; AVX512F-LABEL: trunc_usat_v2i64_v2i32: 62; AVX512F: # %bb.0: 63; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 64; AVX512F-NEXT: vpmovusqd %zmm0, %ymm0 65; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 66; AVX512F-NEXT: vzeroupper 67; AVX512F-NEXT: retq 68; 69; AVX512VL-LABEL: trunc_usat_v2i64_v2i32: 70; AVX512VL: # %bb.0: 71; AVX512VL-NEXT: vpmovusqd %xmm0, %xmm0 72; AVX512VL-NEXT: retq 73; 74; AVX512BW-LABEL: trunc_usat_v2i64_v2i32: 75; AVX512BW: # %bb.0: 76; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 77; AVX512BW-NEXT: vpmovusqd %zmm0, %ymm0 78; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 79; AVX512BW-NEXT: vzeroupper 80; AVX512BW-NEXT: retq 81; 82; AVX512BWVL-LABEL: trunc_usat_v2i64_v2i32: 83; AVX512BWVL: # %bb.0: 84; AVX512BWVL-NEXT: vpmovusqd %xmm0, %xmm0 85; AVX512BWVL-NEXT: retq 86; 87; SKX-LABEL: trunc_usat_v2i64_v2i32: 88; SKX: # %bb.0: 89; SKX-NEXT: vpmovusqd %xmm0, %xmm0 90; SKX-NEXT: retq 91 %1 = icmp ult <2 x i64> %a0, <i64 4294967295, i64 4294967295> 92 %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 4294967295, i64 4294967295> 93 %3 = trunc <2 x i64> %2 to <2 x i32> 94 ret <2 x i32> %3 95} 96 97define void @trunc_usat_v2i64_v2i32_store(<2 x i64> %a0, ptr %p1) { 98; SSE2-SSSE3-LABEL: trunc_usat_v2i64_v2i32_store: 99; SSE2-SSSE3: # %bb.0: 100; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456] 101; SSE2-SSSE3-NEXT: pxor %xmm0, %xmm1 102; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 103; SSE2-SSSE3-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 104; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 105; SSE2-SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 106; SSE2-SSSE3-NEXT: pandn %xmm2, %xmm1 107; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm2 108; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm2 109; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1 110; SSE2-SSSE3-NEXT: por %xmm2, %xmm1 111; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 112; SSE2-SSSE3-NEXT: movq %xmm0, (%rdi) 113; SSE2-SSSE3-NEXT: retq 114; 115; SSE41-LABEL: trunc_usat_v2i64_v2i32_store: 116; SSE41: # %bb.0: 117; SSE41-NEXT: movdqa %xmm0, %xmm1 118; SSE41-NEXT: movapd {{.*#+}} xmm2 = [4294967295,4294967295] 119; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456] 120; SSE41-NEXT: pxor %xmm0, %xmm3 121; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 122; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 123; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 124; SSE41-NEXT: pandn %xmm3, %xmm0 125; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2 126; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 127; SSE41-NEXT: movq %xmm0, (%rdi) 128; SSE41-NEXT: retq 129; 130; AVX-LABEL: trunc_usat_v2i64_v2i32_store: 131; AVX: # %bb.0: 132; AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 133; AVX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 134; AVX-NEXT: vblendvpd %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 135; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3] 136; AVX-NEXT: vmovlpd %xmm0, (%rdi) 137; AVX-NEXT: retq 138; 139; AVX512F-LABEL: trunc_usat_v2i64_v2i32_store: 140; AVX512F: # %bb.0: 141; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 142; AVX512F-NEXT: vpmovusqd %zmm0, %ymm0 143; AVX512F-NEXT: vmovq %xmm0, (%rdi) 144; AVX512F-NEXT: vzeroupper 145; AVX512F-NEXT: retq 146; 147; AVX512VL-LABEL: trunc_usat_v2i64_v2i32_store: 148; AVX512VL: # %bb.0: 149; AVX512VL-NEXT: vpmovusqd %xmm0, (%rdi) 150; AVX512VL-NEXT: retq 151; 152; AVX512BW-LABEL: trunc_usat_v2i64_v2i32_store: 153; AVX512BW: # %bb.0: 154; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 155; AVX512BW-NEXT: vpmovusqd %zmm0, %ymm0 156; AVX512BW-NEXT: vmovq %xmm0, (%rdi) 157; AVX512BW-NEXT: vzeroupper 158; AVX512BW-NEXT: retq 159; 160; AVX512BWVL-LABEL: trunc_usat_v2i64_v2i32_store: 161; AVX512BWVL: # %bb.0: 162; AVX512BWVL-NEXT: vpmovusqd %xmm0, (%rdi) 163; AVX512BWVL-NEXT: retq 164; 165; SKX-LABEL: trunc_usat_v2i64_v2i32_store: 166; SKX: # %bb.0: 167; SKX-NEXT: vpmovusqd %xmm0, (%rdi) 168; SKX-NEXT: retq 169 %1 = icmp ult <2 x i64> %a0, <i64 4294967295, i64 4294967295> 170 %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 4294967295, i64 4294967295> 171 %3 = trunc <2 x i64> %2 to <2 x i32> 172 store <2 x i32> %3, ptr %p1 173 ret void 174} 175 176define <4 x i32> @trunc_usat_v4i64_v4i32(<4 x i64> %a0) { 177; SSE2-SSSE3-LABEL: trunc_usat_v4i64_v4i32: 178; SSE2-SSSE3: # %bb.0: 179; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456] 180; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm3 181; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm3 182; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] 183; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [2147483647,2147483647,2147483647,2147483647] 184; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm6 185; SSE2-SSSE3-NEXT: pcmpgtd %xmm4, %xmm6 186; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 187; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm3 188; SSE2-SSSE3-NEXT: pand %xmm6, %xmm3 189; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm4 190; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm4 191; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] 192; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm5 193; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 194; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm4 195; SSE2-SSSE3-NEXT: pand %xmm5, %xmm4 196; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1 197; SSE2-SSSE3-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 198; SSE2-SSSE3-NEXT: por %xmm1, %xmm4 199; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm1 200; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm1 201; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0 202; SSE2-SSSE3-NEXT: por %xmm1, %xmm0 203; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm4[0,2] 204; SSE2-SSSE3-NEXT: retq 205; 206; SSE41-LABEL: trunc_usat_v4i64_v4i32: 207; SSE41: # %bb.0: 208; SSE41-NEXT: movdqa %xmm0, %xmm2 209; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456] 210; SSE41-NEXT: movdqa %xmm0, %xmm5 211; SSE41-NEXT: pxor %xmm4, %xmm5 212; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [9223372039002259455,9223372039002259455] 213; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm5[0,0,2,2] 214; SSE41-NEXT: pcmpeqd %xmm6, %xmm5 215; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483647,2147483647,2147483647,2147483647] 216; SSE41-NEXT: movdqa %xmm0, %xmm3 217; SSE41-NEXT: pcmpgtd %xmm7, %xmm3 218; SSE41-NEXT: pand %xmm5, %xmm3 219; SSE41-NEXT: pxor %xmm1, %xmm4 220; SSE41-NEXT: pcmpeqd %xmm4, %xmm6 221; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,0,2,2] 222; SSE41-NEXT: pcmpgtd %xmm4, %xmm0 223; SSE41-NEXT: pand %xmm6, %xmm0 224; SSE41-NEXT: movapd {{.*#+}} xmm4 = [4294967295,4294967295] 225; SSE41-NEXT: movapd {{.*#+}} xmm5 = [4294967295,429496729] 226; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm5 227; SSE41-NEXT: movdqa %xmm3, %xmm0 228; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm4 229; SSE41-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,2],xmm5[0,2] 230; SSE41-NEXT: movaps %xmm4, %xmm0 231; SSE41-NEXT: retq 232; 233; AVX1-LABEL: trunc_usat_v4i64_v4i32: 234; AVX1: # %bb.0: 235; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] 236; AVX1-NEXT: # xmm1 = mem[0,0] 237; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm2 238; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372041149743103,9223372041149743103] 239; AVX1-NEXT: # xmm3 = mem[0,0] 240; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 241; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 242; AVX1-NEXT: vpxor %xmm1, %xmm4, %xmm1 243; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1 244; AVX1-NEXT: vmovapd {{.*#+}} xmm3 = [4294967295,429496729] 245; AVX1-NEXT: vblendvpd %xmm1, %xmm4, %xmm3, %xmm1 246; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [4294967295,4294967295] 247; AVX1-NEXT: # xmm3 = mem[0,0] 248; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm3, %xmm0 249; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 250; AVX1-NEXT: vzeroupper 251; AVX1-NEXT: retq 252; 253; AVX2-SLOW-LABEL: trunc_usat_v4i64_v4i32: 254; AVX2-SLOW: # %bb.0: 255; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] 256; AVX2-SLOW-NEXT: vpxor %ymm1, %ymm0, %ymm1 257; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372041149743102,9223372041149743102,9223372041149743102,9223372041149743102] 258; AVX2-SLOW-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm1 259; AVX2-SLOW-NEXT: vblendvpd %ymm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 260; AVX2-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1 261; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 262; AVX2-SLOW-NEXT: vzeroupper 263; AVX2-SLOW-NEXT: retq 264; 265; AVX2-FAST-ALL-LABEL: trunc_usat_v4i64_v4i32: 266; AVX2-FAST-ALL: # %bb.0: 267; AVX2-FAST-ALL-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] 268; AVX2-FAST-ALL-NEXT: vpxor %ymm1, %ymm0, %ymm1 269; AVX2-FAST-ALL-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372041149743102,9223372041149743102,9223372041149743102,9223372041149743102] 270; AVX2-FAST-ALL-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm1 271; AVX2-FAST-ALL-NEXT: vblendvpd %ymm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 272; AVX2-FAST-ALL-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [0,2,4,6,0,2,4,6] 273; AVX2-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1] 274; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0 275; AVX2-FAST-ALL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 276; AVX2-FAST-ALL-NEXT: vzeroupper 277; AVX2-FAST-ALL-NEXT: retq 278; 279; AVX2-FAST-PERLANE-LABEL: trunc_usat_v4i64_v4i32: 280; AVX2-FAST-PERLANE: # %bb.0: 281; AVX2-FAST-PERLANE-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] 282; AVX2-FAST-PERLANE-NEXT: vpxor %ymm1, %ymm0, %ymm1 283; AVX2-FAST-PERLANE-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372041149743102,9223372041149743102,9223372041149743102,9223372041149743102] 284; AVX2-FAST-PERLANE-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm1 285; AVX2-FAST-PERLANE-NEXT: vblendvpd %ymm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 286; AVX2-FAST-PERLANE-NEXT: vextractf128 $1, %ymm0, %xmm1 287; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 288; AVX2-FAST-PERLANE-NEXT: vzeroupper 289; AVX2-FAST-PERLANE-NEXT: retq 290; 291; AVX512F-LABEL: trunc_usat_v4i64_v4i32: 292; AVX512F: # %bb.0: 293; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 294; AVX512F-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1 295; AVX512F-NEXT: vpmovzxdq {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,429496729] 296; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} 297; AVX512F-NEXT: vpmovqd %zmm1, %ymm0 298; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 299; AVX512F-NEXT: vzeroupper 300; AVX512F-NEXT: retq 301; 302; AVX512VL-LABEL: trunc_usat_v4i64_v4i32: 303; AVX512VL: # %bb.0: 304; AVX512VL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1 305; AVX512VL-NEXT: vpmovzxdq {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,429496729] 306; AVX512VL-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} 307; AVX512VL-NEXT: vpmovqd %ymm1, %xmm0 308; AVX512VL-NEXT: vzeroupper 309; AVX512VL-NEXT: retq 310; 311; AVX512BW-LABEL: trunc_usat_v4i64_v4i32: 312; AVX512BW: # %bb.0: 313; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 314; AVX512BW-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1 315; AVX512BW-NEXT: vpmovzxdq {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,429496729] 316; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} 317; AVX512BW-NEXT: vpmovqd %zmm1, %ymm0 318; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 319; AVX512BW-NEXT: vzeroupper 320; AVX512BW-NEXT: retq 321; 322; AVX512BWVL-LABEL: trunc_usat_v4i64_v4i32: 323; AVX512BWVL: # %bb.0: 324; AVX512BWVL-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1 325; AVX512BWVL-NEXT: vpmovzxdq {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,429496729] 326; AVX512BWVL-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} 327; AVX512BWVL-NEXT: vpmovqd %ymm1, %xmm0 328; AVX512BWVL-NEXT: vzeroupper 329; AVX512BWVL-NEXT: retq 330; 331; SKX-LABEL: trunc_usat_v4i64_v4i32: 332; SKX: # %bb.0: 333; SKX-NEXT: vpcmpltuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1 334; SKX-NEXT: vpmovzxdq {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,429496729] 335; SKX-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} 336; SKX-NEXT: vpmovqd %ymm1, %xmm0 337; SKX-NEXT: vzeroupper 338; SKX-NEXT: retq 339 %1 = icmp ult <4 x i64> %a0, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295> 340 %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 429496729> 341 %3 = trunc <4 x i64> %2 to <4 x i32> 342 ret <4 x i32> %3 343} 344 345define <8 x i32> @trunc_usat_v8i64_v8i32(ptr %p0) { 346; SSE2-SSSE3-LABEL: trunc_usat_v8i64_v8i32: 347; SSE2-SSSE3: # %bb.0: 348; SSE2-SSSE3-NEXT: movdqa (%rdi), %xmm2 349; SSE2-SSSE3-NEXT: movdqa 16(%rdi), %xmm0 350; SSE2-SSSE3-NEXT: movdqa 32(%rdi), %xmm6 351; SSE2-SSSE3-NEXT: movdqa 48(%rdi), %xmm1 352; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456] 353; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm5 354; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm5 355; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm5[0,0,2,2] 356; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [2147483647,2147483647,2147483647,2147483647] 357; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm8 358; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm8 359; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm5[1,1,3,3] 360; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm7 361; SSE2-SSSE3-NEXT: pand %xmm8, %xmm7 362; SSE2-SSSE3-NEXT: pcmpeqd %xmm5, %xmm5 363; SSE2-SSSE3-NEXT: pand %xmm7, %xmm1 364; SSE2-SSSE3-NEXT: pxor %xmm5, %xmm7 365; SSE2-SSSE3-NEXT: por %xmm1, %xmm7 366; SSE2-SSSE3-NEXT: movdqa %xmm6, %xmm1 367; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm1 368; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm8 = xmm1[0,0,2,2] 369; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm9 370; SSE2-SSSE3-NEXT: pcmpgtd %xmm8, %xmm9 371; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 372; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm1 373; SSE2-SSSE3-NEXT: pand %xmm9, %xmm1 374; SSE2-SSSE3-NEXT: pand %xmm1, %xmm6 375; SSE2-SSSE3-NEXT: pxor %xmm5, %xmm1 376; SSE2-SSSE3-NEXT: por %xmm6, %xmm1 377; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm7[0,2] 378; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm6 379; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm6 380; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 381; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm8 382; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm8 383; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 384; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm6 385; SSE2-SSSE3-NEXT: pand %xmm8, %xmm6 386; SSE2-SSSE3-NEXT: pand %xmm6, %xmm0 387; SSE2-SSSE3-NEXT: pxor %xmm5, %xmm6 388; SSE2-SSSE3-NEXT: por %xmm0, %xmm6 389; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm0 390; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm0 391; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm0[0,0,2,2] 392; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm4 393; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 394; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm0 395; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0 396; SSE2-SSSE3-NEXT: pxor %xmm0, %xmm5 397; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 398; SSE2-SSSE3-NEXT: por %xmm5, %xmm0 399; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm6[0,2] 400; SSE2-SSSE3-NEXT: retq 401; 402; SSE41-LABEL: trunc_usat_v8i64_v8i32: 403; SSE41: # %bb.0: 404; SSE41-NEXT: movdqa (%rdi), %xmm4 405; SSE41-NEXT: movdqa 16(%rdi), %xmm7 406; SSE41-NEXT: movdqa 32(%rdi), %xmm8 407; SSE41-NEXT: movdqa 48(%rdi), %xmm1 408; SSE41-NEXT: movapd {{.*#+}} xmm3 = [4294967295,4294967295] 409; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [9223372039002259456,9223372039002259456] 410; SSE41-NEXT: movdqa %xmm1, %xmm9 411; SSE41-NEXT: pxor %xmm6, %xmm9 412; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259455,9223372039002259455] 413; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm9[0,0,2,2] 414; SSE41-NEXT: pcmpeqd %xmm5, %xmm9 415; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647] 416; SSE41-NEXT: movdqa %xmm2, %xmm0 417; SSE41-NEXT: pcmpgtd %xmm10, %xmm0 418; SSE41-NEXT: pand %xmm9, %xmm0 419; SSE41-NEXT: movapd %xmm3, %xmm9 420; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm9 421; SSE41-NEXT: movdqa %xmm8, %xmm1 422; SSE41-NEXT: pxor %xmm6, %xmm1 423; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm1[0,0,2,2] 424; SSE41-NEXT: pcmpeqd %xmm5, %xmm1 425; SSE41-NEXT: movdqa %xmm2, %xmm0 426; SSE41-NEXT: pcmpgtd %xmm10, %xmm0 427; SSE41-NEXT: pand %xmm1, %xmm0 428; SSE41-NEXT: movapd %xmm3, %xmm1 429; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm1 430; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm9[0,2] 431; SSE41-NEXT: movdqa %xmm7, %xmm8 432; SSE41-NEXT: pxor %xmm6, %xmm8 433; SSE41-NEXT: pshufd {{.*#+}} xmm9 = xmm8[0,0,2,2] 434; SSE41-NEXT: pcmpeqd %xmm5, %xmm8 435; SSE41-NEXT: movdqa %xmm2, %xmm0 436; SSE41-NEXT: pcmpgtd %xmm9, %xmm0 437; SSE41-NEXT: pand %xmm8, %xmm0 438; SSE41-NEXT: movapd %xmm3, %xmm8 439; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm8 440; SSE41-NEXT: pxor %xmm4, %xmm6 441; SSE41-NEXT: pcmpeqd %xmm6, %xmm5 442; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] 443; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 444; SSE41-NEXT: pand %xmm5, %xmm2 445; SSE41-NEXT: movdqa %xmm2, %xmm0 446; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm3 447; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,2],xmm8[0,2] 448; SSE41-NEXT: movaps %xmm3, %xmm0 449; SSE41-NEXT: retq 450; 451; AVX1-LABEL: trunc_usat_v8i64_v8i32: 452; AVX1: # %bb.0: 453; AVX1-NEXT: vmovdqa (%rdi), %xmm0 454; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1 455; AVX1-NEXT: vmovdqa 32(%rdi), %xmm2 456; AVX1-NEXT: vmovdqa 48(%rdi), %xmm3 457; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808] 458; AVX1-NEXT: # xmm4 = mem[0,0] 459; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm5 460; AVX1-NEXT: vmovddup {{.*#+}} xmm6 = [9223372041149743103,9223372041149743103] 461; AVX1-NEXT: # xmm6 = mem[0,0] 462; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5 463; AVX1-NEXT: vmovddup {{.*#+}} xmm7 = [4294967295,4294967295] 464; AVX1-NEXT: # xmm7 = mem[0,0] 465; AVX1-NEXT: vblendvpd %xmm5, %xmm2, %xmm7, %xmm2 466; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm5 467; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5 468; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm7, %xmm0 469; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm5 470; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5 471; AVX1-NEXT: vblendvpd %xmm5, %xmm3, %xmm7, %xmm3 472; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm4 473; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4 474; AVX1-NEXT: vblendvpd %xmm4, %xmm1, %xmm7, %xmm1 475; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 476; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 477; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6] 478; AVX1-NEXT: retq 479; 480; AVX2-SLOW-LABEL: trunc_usat_v8i64_v8i32: 481; AVX2-SLOW: # %bb.0: 482; AVX2-SLOW-NEXT: vmovdqa (%rdi), %ymm0 483; AVX2-SLOW-NEXT: vmovdqa 32(%rdi), %ymm1 484; AVX2-SLOW-NEXT: vbroadcastsd {{.*#+}} ymm2 = [4294967295,4294967295,4294967295,4294967295] 485; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] 486; AVX2-SLOW-NEXT: vpxor %ymm3, %ymm1, %ymm4 487; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm5 = [9223372041149743103,9223372041149743103,9223372041149743103,9223372041149743103] 488; AVX2-SLOW-NEXT: vpcmpgtq %ymm4, %ymm5, %ymm4 489; AVX2-SLOW-NEXT: vblendvpd %ymm4, %ymm1, %ymm2, %ymm1 490; AVX2-SLOW-NEXT: vpxor %ymm3, %ymm0, %ymm3 491; AVX2-SLOW-NEXT: vpcmpgtq %ymm3, %ymm5, %ymm3 492; AVX2-SLOW-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0 493; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] 494; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 495; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] 496; AVX2-SLOW-NEXT: retq 497; 498; AVX2-FAST-ALL-LABEL: trunc_usat_v8i64_v8i32: 499; AVX2-FAST-ALL: # %bb.0: 500; AVX2-FAST-ALL-NEXT: vmovdqa (%rdi), %ymm0 501; AVX2-FAST-ALL-NEXT: vmovdqa 32(%rdi), %ymm1 502; AVX2-FAST-ALL-NEXT: vbroadcastsd {{.*#+}} ymm2 = [4294967295,4294967295,4294967295,4294967295] 503; AVX2-FAST-ALL-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] 504; AVX2-FAST-ALL-NEXT: vpxor %ymm3, %ymm1, %ymm4 505; AVX2-FAST-ALL-NEXT: vpbroadcastq {{.*#+}} ymm5 = [9223372041149743103,9223372041149743103,9223372041149743103,9223372041149743103] 506; AVX2-FAST-ALL-NEXT: vpcmpgtq %ymm4, %ymm5, %ymm4 507; AVX2-FAST-ALL-NEXT: vblendvpd %ymm4, %ymm1, %ymm2, %ymm1 508; AVX2-FAST-ALL-NEXT: vpxor %ymm3, %ymm0, %ymm3 509; AVX2-FAST-ALL-NEXT: vpcmpgtq %ymm3, %ymm5, %ymm3 510; AVX2-FAST-ALL-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0 511; AVX2-FAST-ALL-NEXT: vmovapd {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7] 512; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0 513; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1 514; AVX2-FAST-ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 515; AVX2-FAST-ALL-NEXT: retq 516; 517; AVX2-FAST-PERLANE-LABEL: trunc_usat_v8i64_v8i32: 518; AVX2-FAST-PERLANE: # %bb.0: 519; AVX2-FAST-PERLANE-NEXT: vmovdqa (%rdi), %ymm0 520; AVX2-FAST-PERLANE-NEXT: vmovdqa 32(%rdi), %ymm1 521; AVX2-FAST-PERLANE-NEXT: vbroadcastsd {{.*#+}} ymm2 = [4294967295,4294967295,4294967295,4294967295] 522; AVX2-FAST-PERLANE-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] 523; AVX2-FAST-PERLANE-NEXT: vpxor %ymm3, %ymm1, %ymm4 524; AVX2-FAST-PERLANE-NEXT: vpbroadcastq {{.*#+}} ymm5 = [9223372041149743103,9223372041149743103,9223372041149743103,9223372041149743103] 525; AVX2-FAST-PERLANE-NEXT: vpcmpgtq %ymm4, %ymm5, %ymm4 526; AVX2-FAST-PERLANE-NEXT: vblendvpd %ymm4, %ymm1, %ymm2, %ymm1 527; AVX2-FAST-PERLANE-NEXT: vpxor %ymm3, %ymm0, %ymm3 528; AVX2-FAST-PERLANE-NEXT: vpcmpgtq %ymm3, %ymm5, %ymm3 529; AVX2-FAST-PERLANE-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0 530; AVX2-FAST-PERLANE-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] 531; AVX2-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 532; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] 533; AVX2-FAST-PERLANE-NEXT: retq 534; 535; AVX512-LABEL: trunc_usat_v8i64_v8i32: 536; AVX512: # %bb.0: 537; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 538; AVX512-NEXT: vpmovusqd %zmm0, %ymm0 539; AVX512-NEXT: retq 540; 541; SKX-LABEL: trunc_usat_v8i64_v8i32: 542; SKX: # %bb.0: 543; SKX-NEXT: vmovdqa64 (%rdi), %zmm0 544; SKX-NEXT: vpmovusqd %zmm0, %ymm0 545; SKX-NEXT: retq 546 %a0 = load <8 x i64>, ptr %p0 547 %1 = icmp ult <8 x i64> %a0, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295> 548 %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295> 549 %3 = trunc <8 x i64> %2 to <8 x i32> 550 ret <8 x i32> %3 551} 552 553; 554; Unsigned saturation truncation to vXi16 555; 556 557define <2 x i16> @trunc_usat_v2i64_v2i16(<2 x i64> %a0) { 558; SSE2-SSSE3-LABEL: trunc_usat_v2i64_v2i16: 559; SSE2-SSSE3: # %bb.0: 560; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456] 561; SSE2-SSSE3-NEXT: pxor %xmm0, %xmm1 562; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 563; SSE2-SSSE3-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 564; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 565; SSE2-SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 566; SSE2-SSSE3-NEXT: pandn %xmm2, %xmm1 567; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm2 568; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm2 569; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1 570; SSE2-SSSE3-NEXT: por %xmm2, %xmm1 571; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 572; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 573; SSE2-SSSE3-NEXT: retq 574; 575; SSE41-LABEL: trunc_usat_v2i64_v2i16: 576; SSE41: # %bb.0: 577; SSE41-NEXT: movdqa %xmm0, %xmm1 578; SSE41-NEXT: movapd {{.*#+}} xmm2 = [65535,65535] 579; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456] 580; SSE41-NEXT: pxor %xmm0, %xmm3 581; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 582; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 583; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 584; SSE41-NEXT: pandn %xmm3, %xmm0 585; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2 586; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 587; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 588; SSE41-NEXT: retq 589; 590; AVX1-LABEL: trunc_usat_v2i64_v2i16: 591; AVX1: # %bb.0: 592; AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 593; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 594; AVX1-NEXT: vblendvpd %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 595; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3] 596; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 597; AVX1-NEXT: retq 598; 599; AVX2-SLOW-LABEL: trunc_usat_v2i64_v2i16: 600; AVX2-SLOW: # %bb.0: 601; AVX2-SLOW-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 602; AVX2-SLOW-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 603; AVX2-SLOW-NEXT: vblendvpd %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 604; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3] 605; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 606; AVX2-SLOW-NEXT: retq 607; 608; AVX2-FAST-LABEL: trunc_usat_v2i64_v2i16: 609; AVX2-FAST: # %bb.0: 610; AVX2-FAST-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 611; AVX2-FAST-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 612; AVX2-FAST-NEXT: vblendvpd %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 613; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15] 614; AVX2-FAST-NEXT: retq 615; 616; AVX512F-LABEL: trunc_usat_v2i64_v2i16: 617; AVX512F: # %bb.0: 618; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 619; AVX512F-NEXT: vpmovusqw %zmm0, %xmm0 620; AVX512F-NEXT: vzeroupper 621; AVX512F-NEXT: retq 622; 623; AVX512VL-LABEL: trunc_usat_v2i64_v2i16: 624; AVX512VL: # %bb.0: 625; AVX512VL-NEXT: vpmovusqw %xmm0, %xmm0 626; AVX512VL-NEXT: retq 627; 628; AVX512BW-LABEL: trunc_usat_v2i64_v2i16: 629; AVX512BW: # %bb.0: 630; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 631; AVX512BW-NEXT: vpmovusqw %zmm0, %xmm0 632; AVX512BW-NEXT: vzeroupper 633; AVX512BW-NEXT: retq 634; 635; AVX512BWVL-LABEL: trunc_usat_v2i64_v2i16: 636; AVX512BWVL: # %bb.0: 637; AVX512BWVL-NEXT: vpmovusqw %xmm0, %xmm0 638; AVX512BWVL-NEXT: retq 639; 640; SKX-LABEL: trunc_usat_v2i64_v2i16: 641; SKX: # %bb.0: 642; SKX-NEXT: vpmovusqw %xmm0, %xmm0 643; SKX-NEXT: retq 644 %1 = icmp ult <2 x i64> %a0, <i64 65535, i64 65535> 645 %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 65535, i64 65535> 646 %3 = trunc <2 x i64> %2 to <2 x i16> 647 ret <2 x i16> %3 648} 649 650define void @trunc_usat_v2i64_v2i16_store(<2 x i64> %a0, ptr %p1) { 651; SSE2-SSSE3-LABEL: trunc_usat_v2i64_v2i16_store: 652; SSE2-SSSE3: # %bb.0: 653; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456] 654; SSE2-SSSE3-NEXT: pxor %xmm0, %xmm1 655; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 656; SSE2-SSSE3-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 657; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 658; SSE2-SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 659; SSE2-SSSE3-NEXT: pandn %xmm2, %xmm1 660; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm2 661; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm2 662; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1 663; SSE2-SSSE3-NEXT: por %xmm2, %xmm1 664; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 665; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 666; SSE2-SSSE3-NEXT: movd %xmm0, (%rdi) 667; SSE2-SSSE3-NEXT: retq 668; 669; SSE41-LABEL: trunc_usat_v2i64_v2i16_store: 670; SSE41: # %bb.0: 671; SSE41-NEXT: movdqa %xmm0, %xmm1 672; SSE41-NEXT: movapd {{.*#+}} xmm2 = [65535,65535] 673; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456] 674; SSE41-NEXT: pxor %xmm0, %xmm3 675; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 676; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 677; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 678; SSE41-NEXT: pandn %xmm3, %xmm0 679; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2 680; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 681; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 682; SSE41-NEXT: movd %xmm0, (%rdi) 683; SSE41-NEXT: retq 684; 685; AVX1-LABEL: trunc_usat_v2i64_v2i16_store: 686; AVX1: # %bb.0: 687; AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 688; AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 689; AVX1-NEXT: vblendvpd %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 690; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3] 691; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 692; AVX1-NEXT: vmovd %xmm0, (%rdi) 693; AVX1-NEXT: retq 694; 695; AVX2-SLOW-LABEL: trunc_usat_v2i64_v2i16_store: 696; AVX2-SLOW: # %bb.0: 697; AVX2-SLOW-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 698; AVX2-SLOW-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 699; AVX2-SLOW-NEXT: vblendvpd %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 700; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3] 701; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 702; AVX2-SLOW-NEXT: vmovd %xmm0, (%rdi) 703; AVX2-SLOW-NEXT: retq 704; 705; AVX2-FAST-LABEL: trunc_usat_v2i64_v2i16_store: 706; AVX2-FAST: # %bb.0: 707; AVX2-FAST-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 708; AVX2-FAST-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 709; AVX2-FAST-NEXT: vblendvpd %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 710; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,u,u,u,u,u,u,u,u,u,u,u,u] 711; AVX2-FAST-NEXT: vmovd %xmm0, (%rdi) 712; AVX2-FAST-NEXT: retq 713; 714; AVX512F-LABEL: trunc_usat_v2i64_v2i16_store: 715; AVX512F: # %bb.0: 716; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 717; AVX512F-NEXT: vpmovusqw %zmm0, %xmm0 718; AVX512F-NEXT: vmovd %xmm0, (%rdi) 719; AVX512F-NEXT: vzeroupper 720; AVX512F-NEXT: retq 721; 722; AVX512VL-LABEL: trunc_usat_v2i64_v2i16_store: 723; AVX512VL: # %bb.0: 724; AVX512VL-NEXT: vpmovusqw %xmm0, (%rdi) 725; AVX512VL-NEXT: retq 726; 727; AVX512BW-LABEL: trunc_usat_v2i64_v2i16_store: 728; AVX512BW: # %bb.0: 729; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 730; AVX512BW-NEXT: vpmovusqw %zmm0, %xmm0 731; AVX512BW-NEXT: vmovd %xmm0, (%rdi) 732; AVX512BW-NEXT: vzeroupper 733; AVX512BW-NEXT: retq 734; 735; AVX512BWVL-LABEL: trunc_usat_v2i64_v2i16_store: 736; AVX512BWVL: # %bb.0: 737; AVX512BWVL-NEXT: vpmovusqw %xmm0, (%rdi) 738; AVX512BWVL-NEXT: retq 739; 740; SKX-LABEL: trunc_usat_v2i64_v2i16_store: 741; SKX: # %bb.0: 742; SKX-NEXT: vpmovusqw %xmm0, (%rdi) 743; SKX-NEXT: retq 744 %1 = icmp ult <2 x i64> %a0, <i64 65535, i64 65535> 745 %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 65535, i64 65535> 746 %3 = trunc <2 x i64> %2 to <2 x i16> 747 store <2 x i16> %3, ptr %p1 748 ret void 749} 750 751define <4 x i16> @trunc_usat_v4i64_v4i16(<4 x i64> %a0) { 752; SSE2-SSSE3-LABEL: trunc_usat_v4i64_v4i16: 753; SSE2-SSSE3: # %bb.0: 754; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456] 755; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm3 756; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm3 757; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] 758; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [2147549183,2147549183,2147549183,2147549183] 759; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm6 760; SSE2-SSSE3-NEXT: pcmpgtd %xmm4, %xmm6 761; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 762; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm3 763; SSE2-SSSE3-NEXT: pand %xmm6, %xmm3 764; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm4 765; SSE2-SSSE3-NEXT: pand %xmm3, %xmm1 766; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm3 767; SSE2-SSSE3-NEXT: por %xmm1, %xmm3 768; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm1 769; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm1 770; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm1[0,0,2,2] 771; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm5 772; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 773; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1 774; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1 775; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm4 776; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 777; SSE2-SSSE3-NEXT: por %xmm4, %xmm0 778; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2] 779; SSE2-SSSE3-NEXT: pslld $16, %xmm0 780; SSE2-SSSE3-NEXT: psrad $16, %xmm0 781; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm1 782; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0 783; SSE2-SSSE3-NEXT: retq 784; 785; SSE41-LABEL: trunc_usat_v4i64_v4i16: 786; SSE41: # %bb.0: 787; SSE41-NEXT: movdqa %xmm0, %xmm3 788; SSE41-NEXT: movapd {{.*#+}} xmm2 = [65535,65535] 789; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456] 790; SSE41-NEXT: movdqa %xmm1, %xmm6 791; SSE41-NEXT: pxor %xmm5, %xmm6 792; SSE41-NEXT: movdqa {{.*#+}} xmm7 = [9223372039002324991,9223372039002324991] 793; SSE41-NEXT: pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2] 794; SSE41-NEXT: pcmpeqd %xmm7, %xmm6 795; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [2147549183,2147549183,2147549183,2147549183] 796; SSE41-NEXT: movdqa %xmm4, %xmm0 797; SSE41-NEXT: pcmpgtd %xmm8, %xmm0 798; SSE41-NEXT: pand %xmm6, %xmm0 799; SSE41-NEXT: movapd %xmm2, %xmm6 800; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm6 801; SSE41-NEXT: pxor %xmm3, %xmm5 802; SSE41-NEXT: pcmpeqd %xmm5, %xmm7 803; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2] 804; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 805; SSE41-NEXT: pand %xmm7, %xmm4 806; SSE41-NEXT: movdqa %xmm4, %xmm0 807; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm2 808; SSE41-NEXT: packusdw %xmm6, %xmm2 809; SSE41-NEXT: packusdw %xmm2, %xmm2 810; SSE41-NEXT: movdqa %xmm2, %xmm0 811; SSE41-NEXT: retq 812; 813; AVX1-LABEL: trunc_usat_v4i64_v4i16: 814; AVX1: # %bb.0: 815; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] 816; AVX1-NEXT: # xmm1 = mem[0,0] 817; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm2 818; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343] 819; AVX1-NEXT: # xmm3 = mem[0,0] 820; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 821; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 822; AVX1-NEXT: vpxor %xmm1, %xmm4, %xmm1 823; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1 824; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [65535,65535] 825; AVX1-NEXT: # xmm3 = mem[0,0] 826; AVX1-NEXT: vblendvpd %xmm1, %xmm4, %xmm3, %xmm1 827; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm3, %xmm0 828; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 829; AVX1-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 830; AVX1-NEXT: vzeroupper 831; AVX1-NEXT: retq 832; 833; AVX2-LABEL: trunc_usat_v4i64_v4i16: 834; AVX2: # %bb.0: 835; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [65535,65535,65535,65535] 836; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] 837; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2 838; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854841342,9223372036854841342,9223372036854841342,9223372036854841342] 839; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 840; AVX2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 841; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 842; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 843; AVX2-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 844; AVX2-NEXT: vzeroupper 845; AVX2-NEXT: retq 846; 847; AVX512F-LABEL: trunc_usat_v4i64_v4i16: 848; AVX512F: # %bb.0: 849; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 850; AVX512F-NEXT: vpmovusqw %zmm0, %xmm0 851; AVX512F-NEXT: vzeroupper 852; AVX512F-NEXT: retq 853; 854; AVX512VL-LABEL: trunc_usat_v4i64_v4i16: 855; AVX512VL: # %bb.0: 856; AVX512VL-NEXT: vpmovusqw %ymm0, %xmm0 857; AVX512VL-NEXT: vzeroupper 858; AVX512VL-NEXT: retq 859; 860; AVX512BW-LABEL: trunc_usat_v4i64_v4i16: 861; AVX512BW: # %bb.0: 862; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 863; AVX512BW-NEXT: vpmovusqw %zmm0, %xmm0 864; AVX512BW-NEXT: vzeroupper 865; AVX512BW-NEXT: retq 866; 867; AVX512BWVL-LABEL: trunc_usat_v4i64_v4i16: 868; AVX512BWVL: # %bb.0: 869; AVX512BWVL-NEXT: vpmovusqw %ymm0, %xmm0 870; AVX512BWVL-NEXT: vzeroupper 871; AVX512BWVL-NEXT: retq 872; 873; SKX-LABEL: trunc_usat_v4i64_v4i16: 874; SKX: # %bb.0: 875; SKX-NEXT: vpmovusqw %ymm0, %xmm0 876; SKX-NEXT: vzeroupper 877; SKX-NEXT: retq 878 %1 = icmp ult <4 x i64> %a0, <i64 65535, i64 65535, i64 65535, i64 65535> 879 %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> <i64 65535, i64 65535, i64 65535, i64 65535> 880 %3 = trunc <4 x i64> %2 to <4 x i16> 881 ret <4 x i16> %3 882} 883 884define void @trunc_usat_v4i64_v4i16_store(<4 x i64> %a0, ptr%p1) { 885; SSE2-SSSE3-LABEL: trunc_usat_v4i64_v4i16_store: 886; SSE2-SSSE3: # %bb.0: 887; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456] 888; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm3 889; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm3 890; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] 891; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [2147549183,2147549183,2147549183,2147549183] 892; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm6 893; SSE2-SSSE3-NEXT: pcmpgtd %xmm4, %xmm6 894; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 895; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm3 896; SSE2-SSSE3-NEXT: pand %xmm6, %xmm3 897; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm4 898; SSE2-SSSE3-NEXT: pand %xmm3, %xmm1 899; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm3 900; SSE2-SSSE3-NEXT: por %xmm1, %xmm3 901; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm1 902; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm1 903; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm1[0,0,2,2] 904; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm5 905; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 906; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1 907; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1 908; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm4 909; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1 910; SSE2-SSSE3-NEXT: por %xmm4, %xmm1 911; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm3[0,2] 912; SSE2-SSSE3-NEXT: pslld $16, %xmm1 913; SSE2-SSSE3-NEXT: psrad $16, %xmm1 914; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm1 915; SSE2-SSSE3-NEXT: movq %xmm1, (%rdi) 916; SSE2-SSSE3-NEXT: retq 917; 918; SSE41-LABEL: trunc_usat_v4i64_v4i16_store: 919; SSE41: # %bb.0: 920; SSE41-NEXT: movdqa %xmm0, %xmm2 921; SSE41-NEXT: movapd {{.*#+}} xmm4 = [65535,65535] 922; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456] 923; SSE41-NEXT: movdqa %xmm1, %xmm6 924; SSE41-NEXT: pxor %xmm5, %xmm6 925; SSE41-NEXT: movdqa {{.*#+}} xmm7 = [9223372039002324991,9223372039002324991] 926; SSE41-NEXT: pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2] 927; SSE41-NEXT: pcmpeqd %xmm7, %xmm6 928; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147549183,2147549183,2147549183,2147549183] 929; SSE41-NEXT: movdqa %xmm3, %xmm0 930; SSE41-NEXT: pcmpgtd %xmm8, %xmm0 931; SSE41-NEXT: pand %xmm6, %xmm0 932; SSE41-NEXT: movapd %xmm4, %xmm6 933; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm6 934; SSE41-NEXT: pxor %xmm2, %xmm5 935; SSE41-NEXT: pcmpeqd %xmm5, %xmm7 936; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2] 937; SSE41-NEXT: pcmpgtd %xmm0, %xmm3 938; SSE41-NEXT: pand %xmm7, %xmm3 939; SSE41-NEXT: movdqa %xmm3, %xmm0 940; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm4 941; SSE41-NEXT: packusdw %xmm6, %xmm4 942; SSE41-NEXT: packusdw %xmm4, %xmm4 943; SSE41-NEXT: movq %xmm4, (%rdi) 944; SSE41-NEXT: retq 945; 946; AVX1-LABEL: trunc_usat_v4i64_v4i16_store: 947; AVX1: # %bb.0: 948; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] 949; AVX1-NEXT: # xmm1 = mem[0,0] 950; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm2 951; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343] 952; AVX1-NEXT: # xmm3 = mem[0,0] 953; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 954; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 955; AVX1-NEXT: vpxor %xmm1, %xmm4, %xmm1 956; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1 957; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [65535,65535] 958; AVX1-NEXT: # xmm3 = mem[0,0] 959; AVX1-NEXT: vblendvpd %xmm1, %xmm4, %xmm3, %xmm1 960; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm3, %xmm0 961; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 962; AVX1-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 963; AVX1-NEXT: vmovq %xmm0, (%rdi) 964; AVX1-NEXT: vzeroupper 965; AVX1-NEXT: retq 966; 967; AVX2-LABEL: trunc_usat_v4i64_v4i16_store: 968; AVX2: # %bb.0: 969; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [65535,65535,65535,65535] 970; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] 971; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2 972; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854841342,9223372036854841342,9223372036854841342,9223372036854841342] 973; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 974; AVX2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 975; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 976; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 977; AVX2-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 978; AVX2-NEXT: vmovq %xmm0, (%rdi) 979; AVX2-NEXT: vzeroupper 980; AVX2-NEXT: retq 981; 982; AVX512F-LABEL: trunc_usat_v4i64_v4i16_store: 983; AVX512F: # %bb.0: 984; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 985; AVX512F-NEXT: vpmovusqw %zmm0, %xmm0 986; AVX512F-NEXT: vmovq %xmm0, (%rdi) 987; AVX512F-NEXT: vzeroupper 988; AVX512F-NEXT: retq 989; 990; AVX512VL-LABEL: trunc_usat_v4i64_v4i16_store: 991; AVX512VL: # %bb.0: 992; AVX512VL-NEXT: vpmovusqw %ymm0, (%rdi) 993; AVX512VL-NEXT: vzeroupper 994; AVX512VL-NEXT: retq 995; 996; AVX512BW-LABEL: trunc_usat_v4i64_v4i16_store: 997; AVX512BW: # %bb.0: 998; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 999; AVX512BW-NEXT: vpmovusqw %zmm0, %xmm0 1000; AVX512BW-NEXT: vmovq %xmm0, (%rdi) 1001; AVX512BW-NEXT: vzeroupper 1002; AVX512BW-NEXT: retq 1003; 1004; AVX512BWVL-LABEL: trunc_usat_v4i64_v4i16_store: 1005; AVX512BWVL: # %bb.0: 1006; AVX512BWVL-NEXT: vpmovusqw %ymm0, (%rdi) 1007; AVX512BWVL-NEXT: vzeroupper 1008; AVX512BWVL-NEXT: retq 1009; 1010; SKX-LABEL: trunc_usat_v4i64_v4i16_store: 1011; SKX: # %bb.0: 1012; SKX-NEXT: vpmovusqw %ymm0, (%rdi) 1013; SKX-NEXT: vzeroupper 1014; SKX-NEXT: retq 1015 %1 = icmp ult <4 x i64> %a0, <i64 65535, i64 65535, i64 65535, i64 65535> 1016 %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> <i64 65535, i64 65535, i64 65535, i64 65535> 1017 %3 = trunc <4 x i64> %2 to <4 x i16> 1018 store <4 x i16> %3, ptr%p1 1019 ret void 1020} 1021 1022define <8 x i16> @trunc_usat_v8i64_v8i16(ptr %p0) { 1023; SSE2-SSSE3-LABEL: trunc_usat_v8i64_v8i16: 1024; SSE2-SSSE3: # %bb.0: 1025; SSE2-SSSE3-NEXT: movdqa (%rdi), %xmm6 1026; SSE2-SSSE3-NEXT: movdqa 16(%rdi), %xmm0 1027; SSE2-SSSE3-NEXT: movdqa 32(%rdi), %xmm1 1028; SSE2-SSSE3-NEXT: movdqa 48(%rdi), %xmm5 1029; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456] 1030; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm4 1031; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm4 1032; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm4[0,0,2,2] 1033; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [2147549183,2147549183,2147549183,2147549183] 1034; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm8 1035; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm8 1036; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm4[1,1,3,3] 1037; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm7 1038; SSE2-SSSE3-NEXT: pand %xmm8, %xmm7 1039; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm4 1040; SSE2-SSSE3-NEXT: pand %xmm7, %xmm0 1041; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm7 1042; SSE2-SSSE3-NEXT: por %xmm0, %xmm7 1043; SSE2-SSSE3-NEXT: movdqa %xmm6, %xmm0 1044; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm0 1045; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm8 = xmm0[0,0,2,2] 1046; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm9 1047; SSE2-SSSE3-NEXT: pcmpgtd %xmm8, %xmm9 1048; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1049; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm0 1050; SSE2-SSSE3-NEXT: pand %xmm9, %xmm0 1051; SSE2-SSSE3-NEXT: pand %xmm0, %xmm6 1052; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm0 1053; SSE2-SSSE3-NEXT: por %xmm6, %xmm0 1054; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm7[0,2] 1055; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm6 1056; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm6 1057; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 1058; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm8 1059; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm8 1060; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 1061; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm6 1062; SSE2-SSSE3-NEXT: pand %xmm8, %xmm6 1063; SSE2-SSSE3-NEXT: pand %xmm6, %xmm5 1064; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm6 1065; SSE2-SSSE3-NEXT: por %xmm5, %xmm6 1066; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm5 1067; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm5 1068; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm5[0,0,2,2] 1069; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm3 1070; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] 1071; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm5 1072; SSE2-SSSE3-NEXT: pand %xmm3, %xmm5 1073; SSE2-SSSE3-NEXT: pxor %xmm5, %xmm4 1074; SSE2-SSSE3-NEXT: pand %xmm1, %xmm5 1075; SSE2-SSSE3-NEXT: por %xmm4, %xmm5 1076; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,2],xmm6[0,2] 1077; SSE2-SSSE3-NEXT: pslld $16, %xmm5 1078; SSE2-SSSE3-NEXT: psrad $16, %xmm5 1079; SSE2-SSSE3-NEXT: pslld $16, %xmm0 1080; SSE2-SSSE3-NEXT: psrad $16, %xmm0 1081; SSE2-SSSE3-NEXT: packssdw %xmm5, %xmm0 1082; SSE2-SSSE3-NEXT: retq 1083; 1084; SSE41-LABEL: trunc_usat_v8i64_v8i16: 1085; SSE41: # %bb.0: 1086; SSE41-NEXT: movdqa (%rdi), %xmm8 1087; SSE41-NEXT: movdqa 16(%rdi), %xmm2 1088; SSE41-NEXT: movdqa 32(%rdi), %xmm4 1089; SSE41-NEXT: movdqa 48(%rdi), %xmm7 1090; SSE41-NEXT: movapd {{.*#+}} xmm3 = [65535,65535] 1091; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [9223372039002259456,9223372039002259456] 1092; SSE41-NEXT: movdqa %xmm2, %xmm9 1093; SSE41-NEXT: pxor %xmm6, %xmm9 1094; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002324991,9223372039002324991] 1095; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm9[0,0,2,2] 1096; SSE41-NEXT: pcmpeqd %xmm5, %xmm9 1097; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147549183,2147549183,2147549183,2147549183] 1098; SSE41-NEXT: movdqa %xmm1, %xmm0 1099; SSE41-NEXT: pcmpgtd %xmm10, %xmm0 1100; SSE41-NEXT: pand %xmm9, %xmm0 1101; SSE41-NEXT: movapd %xmm3, %xmm9 1102; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm9 1103; SSE41-NEXT: movdqa %xmm8, %xmm2 1104; SSE41-NEXT: pxor %xmm6, %xmm2 1105; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm2[0,0,2,2] 1106; SSE41-NEXT: pcmpeqd %xmm5, %xmm2 1107; SSE41-NEXT: movdqa %xmm1, %xmm0 1108; SSE41-NEXT: pcmpgtd %xmm10, %xmm0 1109; SSE41-NEXT: pand %xmm2, %xmm0 1110; SSE41-NEXT: movapd %xmm3, %xmm2 1111; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm2 1112; SSE41-NEXT: packusdw %xmm9, %xmm2 1113; SSE41-NEXT: movdqa %xmm7, %xmm8 1114; SSE41-NEXT: pxor %xmm6, %xmm8 1115; SSE41-NEXT: pshufd {{.*#+}} xmm9 = xmm8[0,0,2,2] 1116; SSE41-NEXT: pcmpeqd %xmm5, %xmm8 1117; SSE41-NEXT: movdqa %xmm1, %xmm0 1118; SSE41-NEXT: pcmpgtd %xmm9, %xmm0 1119; SSE41-NEXT: pand %xmm8, %xmm0 1120; SSE41-NEXT: movapd %xmm3, %xmm8 1121; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm8 1122; SSE41-NEXT: pxor %xmm4, %xmm6 1123; SSE41-NEXT: pcmpeqd %xmm6, %xmm5 1124; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] 1125; SSE41-NEXT: pcmpgtd %xmm0, %xmm1 1126; SSE41-NEXT: pand %xmm5, %xmm1 1127; SSE41-NEXT: movdqa %xmm1, %xmm0 1128; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm3 1129; SSE41-NEXT: packusdw %xmm8, %xmm3 1130; SSE41-NEXT: packusdw %xmm3, %xmm2 1131; SSE41-NEXT: movdqa %xmm2, %xmm0 1132; SSE41-NEXT: retq 1133; 1134; AVX1-LABEL: trunc_usat_v8i64_v8i16: 1135; AVX1: # %bb.0: 1136; AVX1-NEXT: vmovdqa (%rdi), %xmm0 1137; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1 1138; AVX1-NEXT: vmovdqa 32(%rdi), %xmm2 1139; AVX1-NEXT: vmovdqa 48(%rdi), %xmm3 1140; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808] 1141; AVX1-NEXT: # xmm4 = mem[0,0] 1142; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm5 1143; AVX1-NEXT: vmovddup {{.*#+}} xmm6 = [9223372036854841343,9223372036854841343] 1144; AVX1-NEXT: # xmm6 = mem[0,0] 1145; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5 1146; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm7 1147; AVX1-NEXT: vpcmpgtq %xmm7, %xmm6, %xmm7 1148; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm8 1149; AVX1-NEXT: vpcmpgtq %xmm8, %xmm6, %xmm8 1150; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm4 1151; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4 1152; AVX1-NEXT: vmovddup {{.*#+}} xmm6 = [65535,65535] 1153; AVX1-NEXT: # xmm6 = mem[0,0] 1154; AVX1-NEXT: vblendvpd %xmm4, %xmm3, %xmm6, %xmm3 1155; AVX1-NEXT: vblendvpd %xmm8, %xmm2, %xmm6, %xmm2 1156; AVX1-NEXT: vpackusdw %xmm3, %xmm2, %xmm2 1157; AVX1-NEXT: vblendvpd %xmm7, %xmm1, %xmm6, %xmm1 1158; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm6, %xmm0 1159; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 1160; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 1161; AVX1-NEXT: retq 1162; 1163; AVX2-LABEL: trunc_usat_v8i64_v8i16: 1164; AVX2: # %bb.0: 1165; AVX2-NEXT: vmovdqa (%rdi), %ymm0 1166; AVX2-NEXT: vmovdqa 32(%rdi), %ymm1 1167; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [65535,65535,65535,65535] 1168; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] 1169; AVX2-NEXT: vpxor %ymm3, %ymm1, %ymm4 1170; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm5 = [9223372036854841343,9223372036854841343,9223372036854841343,9223372036854841343] 1171; AVX2-NEXT: vpcmpgtq %ymm4, %ymm5, %ymm4 1172; AVX2-NEXT: vblendvpd %ymm4, %ymm1, %ymm2, %ymm1 1173; AVX2-NEXT: vpxor %ymm3, %ymm0, %ymm3 1174; AVX2-NEXT: vpcmpgtq %ymm3, %ymm5, %ymm3 1175; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0 1176; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 1177; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1178; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 1179; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 1180; AVX2-NEXT: vzeroupper 1181; AVX2-NEXT: retq 1182; 1183; AVX512-LABEL: trunc_usat_v8i64_v8i16: 1184; AVX512: # %bb.0: 1185; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 1186; AVX512-NEXT: vpmovusqw %zmm0, %xmm0 1187; AVX512-NEXT: vzeroupper 1188; AVX512-NEXT: retq 1189; 1190; SKX-LABEL: trunc_usat_v8i64_v8i16: 1191; SKX: # %bb.0: 1192; SKX-NEXT: vmovdqa64 (%rdi), %zmm0 1193; SKX-NEXT: vpmovusqw %zmm0, %xmm0 1194; SKX-NEXT: vzeroupper 1195; SKX-NEXT: retq 1196 %a0 = load <8 x i64>, ptr %p0 1197 %1 = icmp ult <8 x i64> %a0, <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535> 1198 %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535> 1199 %3 = trunc <8 x i64> %2 to <8 x i16> 1200 ret <8 x i16> %3 1201} 1202 1203define <4 x i16> @trunc_usat_v4i32_v4i16(<4 x i32> %a0) { 1204; SSE2-LABEL: trunc_usat_v4i32_v4i16: 1205; SSE2: # %bb.0: 1206; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] 1207; SSE2-NEXT: pxor %xmm0, %xmm1 1208; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1209; SSE2-NEXT: movdqa %xmm1, %xmm2 1210; SSE2-NEXT: pandn %xmm0, %xmm2 1211; SSE2-NEXT: psrld $16, %xmm1 1212; SSE2-NEXT: por %xmm2, %xmm1 1213; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7] 1214; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] 1215; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 1216; SSE2-NEXT: retq 1217; 1218; SSSE3-LABEL: trunc_usat_v4i32_v4i16: 1219; SSSE3: # %bb.0: 1220; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] 1221; SSSE3-NEXT: pxor %xmm0, %xmm1 1222; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1223; SSSE3-NEXT: movdqa %xmm1, %xmm2 1224; SSSE3-NEXT: pandn %xmm0, %xmm2 1225; SSSE3-NEXT: psrld $16, %xmm1 1226; SSSE3-NEXT: por %xmm2, %xmm1 1227; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 1228; SSSE3-NEXT: movdqa %xmm1, %xmm0 1229; SSSE3-NEXT: retq 1230; 1231; SSE41-LABEL: trunc_usat_v4i32_v4i16: 1232; SSE41: # %bb.0: 1233; SSE41-NEXT: pminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1234; SSE41-NEXT: packusdw %xmm0, %xmm0 1235; SSE41-NEXT: retq 1236; 1237; AVX1-LABEL: trunc_usat_v4i32_v4i16: 1238; AVX1: # %bb.0: 1239; AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1240; AVX1-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 1241; AVX1-NEXT: retq 1242; 1243; AVX2-LABEL: trunc_usat_v4i32_v4i16: 1244; AVX2: # %bb.0: 1245; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65535,65535,65535,65535] 1246; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0 1247; AVX2-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 1248; AVX2-NEXT: retq 1249; 1250; AVX512F-LABEL: trunc_usat_v4i32_v4i16: 1251; AVX512F: # %bb.0: 1252; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1253; AVX512F-NEXT: vpmovusdw %zmm0, %ymm0 1254; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1255; AVX512F-NEXT: vzeroupper 1256; AVX512F-NEXT: retq 1257; 1258; AVX512VL-LABEL: trunc_usat_v4i32_v4i16: 1259; AVX512VL: # %bb.0: 1260; AVX512VL-NEXT: vpmovusdw %xmm0, %xmm0 1261; AVX512VL-NEXT: retq 1262; 1263; AVX512BW-LABEL: trunc_usat_v4i32_v4i16: 1264; AVX512BW: # %bb.0: 1265; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1266; AVX512BW-NEXT: vpmovusdw %zmm0, %ymm0 1267; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1268; AVX512BW-NEXT: vzeroupper 1269; AVX512BW-NEXT: retq 1270; 1271; AVX512BWVL-LABEL: trunc_usat_v4i32_v4i16: 1272; AVX512BWVL: # %bb.0: 1273; AVX512BWVL-NEXT: vpmovusdw %xmm0, %xmm0 1274; AVX512BWVL-NEXT: retq 1275; 1276; SKX-LABEL: trunc_usat_v4i32_v4i16: 1277; SKX: # %bb.0: 1278; SKX-NEXT: vpmovusdw %xmm0, %xmm0 1279; SKX-NEXT: retq 1280 %1 = icmp ult <4 x i32> %a0, <i32 65535, i32 65535, i32 65535, i32 65535> 1281 %2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535> 1282 %3 = trunc <4 x i32> %2 to <4 x i16> 1283 ret <4 x i16> %3 1284} 1285 1286define void @trunc_usat_v4i32_v4i16_store(<4 x i32> %a0, ptr%p1) { 1287; SSE2-LABEL: trunc_usat_v4i32_v4i16_store: 1288; SSE2: # %bb.0: 1289; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] 1290; SSE2-NEXT: pxor %xmm0, %xmm1 1291; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1292; SSE2-NEXT: movdqa %xmm1, %xmm2 1293; SSE2-NEXT: pandn %xmm0, %xmm2 1294; SSE2-NEXT: psrld $16, %xmm1 1295; SSE2-NEXT: por %xmm2, %xmm1 1296; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7] 1297; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] 1298; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 1299; SSE2-NEXT: movq %xmm0, (%rdi) 1300; SSE2-NEXT: retq 1301; 1302; SSSE3-LABEL: trunc_usat_v4i32_v4i16_store: 1303; SSSE3: # %bb.0: 1304; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] 1305; SSSE3-NEXT: pxor %xmm0, %xmm1 1306; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1307; SSSE3-NEXT: movdqa %xmm1, %xmm2 1308; SSSE3-NEXT: pandn %xmm0, %xmm2 1309; SSSE3-NEXT: psrld $16, %xmm1 1310; SSSE3-NEXT: por %xmm2, %xmm1 1311; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u] 1312; SSSE3-NEXT: movq %xmm1, (%rdi) 1313; SSSE3-NEXT: retq 1314; 1315; SSE41-LABEL: trunc_usat_v4i32_v4i16_store: 1316; SSE41: # %bb.0: 1317; SSE41-NEXT: pminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1318; SSE41-NEXT: packusdw %xmm0, %xmm0 1319; SSE41-NEXT: movq %xmm0, (%rdi) 1320; SSE41-NEXT: retq 1321; 1322; AVX1-LABEL: trunc_usat_v4i32_v4i16_store: 1323; AVX1: # %bb.0: 1324; AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1325; AVX1-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 1326; AVX1-NEXT: vmovq %xmm0, (%rdi) 1327; AVX1-NEXT: retq 1328; 1329; AVX2-LABEL: trunc_usat_v4i32_v4i16_store: 1330; AVX2: # %bb.0: 1331; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65535,65535,65535,65535] 1332; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0 1333; AVX2-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 1334; AVX2-NEXT: vmovq %xmm0, (%rdi) 1335; AVX2-NEXT: retq 1336; 1337; AVX512F-LABEL: trunc_usat_v4i32_v4i16_store: 1338; AVX512F: # %bb.0: 1339; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1340; AVX512F-NEXT: vpmovusdw %zmm0, %ymm0 1341; AVX512F-NEXT: vmovq %xmm0, (%rdi) 1342; AVX512F-NEXT: vzeroupper 1343; AVX512F-NEXT: retq 1344; 1345; AVX512VL-LABEL: trunc_usat_v4i32_v4i16_store: 1346; AVX512VL: # %bb.0: 1347; AVX512VL-NEXT: vpmovusdw %xmm0, (%rdi) 1348; AVX512VL-NEXT: retq 1349; 1350; AVX512BW-LABEL: trunc_usat_v4i32_v4i16_store: 1351; AVX512BW: # %bb.0: 1352; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1353; AVX512BW-NEXT: vpmovusdw %zmm0, %ymm0 1354; AVX512BW-NEXT: vmovq %xmm0, (%rdi) 1355; AVX512BW-NEXT: vzeroupper 1356; AVX512BW-NEXT: retq 1357; 1358; AVX512BWVL-LABEL: trunc_usat_v4i32_v4i16_store: 1359; AVX512BWVL: # %bb.0: 1360; AVX512BWVL-NEXT: vpmovusdw %xmm0, (%rdi) 1361; AVX512BWVL-NEXT: retq 1362; 1363; SKX-LABEL: trunc_usat_v4i32_v4i16_store: 1364; SKX: # %bb.0: 1365; SKX-NEXT: vpmovusdw %xmm0, (%rdi) 1366; SKX-NEXT: retq 1367 %1 = icmp ult <4 x i32> %a0, <i32 65535, i32 65535, i32 65535, i32 65535> 1368 %2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535> 1369 %3 = trunc <4 x i32> %2 to <4 x i16> 1370 store <4 x i16> %3, ptr%p1 1371 ret void 1372} 1373 1374define <8 x i16> @trunc_usat_v8i32_v8i16(<8 x i32> %a0) { 1375; SSE2-LABEL: trunc_usat_v8i32_v8i16: 1376; SSE2: # %bb.0: 1377; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 1378; SSE2-NEXT: movdqa %xmm0, %xmm3 1379; SSE2-NEXT: pxor %xmm2, %xmm3 1380; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147549183,2147549183,2147549183,2147549183] 1381; SSE2-NEXT: movdqa %xmm4, %xmm5 1382; SSE2-NEXT: pcmpgtd %xmm3, %xmm5 1383; SSE2-NEXT: pcmpeqd %xmm3, %xmm3 1384; SSE2-NEXT: pand %xmm5, %xmm0 1385; SSE2-NEXT: pxor %xmm3, %xmm5 1386; SSE2-NEXT: por %xmm5, %xmm0 1387; SSE2-NEXT: pxor %xmm1, %xmm2 1388; SSE2-NEXT: pcmpgtd %xmm2, %xmm4 1389; SSE2-NEXT: pxor %xmm4, %xmm3 1390; SSE2-NEXT: pand %xmm1, %xmm4 1391; SSE2-NEXT: por %xmm3, %xmm4 1392; SSE2-NEXT: pslld $16, %xmm4 1393; SSE2-NEXT: psrad $16, %xmm4 1394; SSE2-NEXT: pslld $16, %xmm0 1395; SSE2-NEXT: psrad $16, %xmm0 1396; SSE2-NEXT: packssdw %xmm4, %xmm0 1397; SSE2-NEXT: retq 1398; 1399; SSSE3-LABEL: trunc_usat_v8i32_v8i16: 1400; SSSE3: # %bb.0: 1401; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 1402; SSSE3-NEXT: movdqa %xmm0, %xmm3 1403; SSSE3-NEXT: pxor %xmm2, %xmm3 1404; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [2147549183,2147549183,2147549183,2147549183] 1405; SSSE3-NEXT: movdqa %xmm4, %xmm5 1406; SSSE3-NEXT: pcmpgtd %xmm3, %xmm5 1407; SSSE3-NEXT: pcmpeqd %xmm3, %xmm3 1408; SSSE3-NEXT: pand %xmm5, %xmm0 1409; SSSE3-NEXT: pxor %xmm3, %xmm5 1410; SSSE3-NEXT: por %xmm5, %xmm0 1411; SSSE3-NEXT: pxor %xmm1, %xmm2 1412; SSSE3-NEXT: pcmpgtd %xmm2, %xmm4 1413; SSSE3-NEXT: pxor %xmm4, %xmm3 1414; SSSE3-NEXT: pand %xmm1, %xmm4 1415; SSSE3-NEXT: por %xmm3, %xmm4 1416; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 1417; SSSE3-NEXT: pshufb %xmm1, %xmm4 1418; SSSE3-NEXT: pshufb %xmm1, %xmm0 1419; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0] 1420; SSSE3-NEXT: retq 1421; 1422; SSE41-LABEL: trunc_usat_v8i32_v8i16: 1423; SSE41: # %bb.0: 1424; SSE41-NEXT: pmovsxbw {{.*#+}} xmm2 = [65535,0,65535,0,65535,0,65535,0] 1425; SSE41-NEXT: pminud %xmm2, %xmm1 1426; SSE41-NEXT: pminud %xmm2, %xmm0 1427; SSE41-NEXT: packusdw %xmm1, %xmm0 1428; SSE41-NEXT: retq 1429; 1430; AVX1-LABEL: trunc_usat_v8i32_v8i16: 1431; AVX1: # %bb.0: 1432; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1433; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [65535,65535,65535,65535] 1434; AVX1-NEXT: vpminud %xmm2, %xmm1, %xmm1 1435; AVX1-NEXT: vpminud %xmm2, %xmm0, %xmm0 1436; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 1437; AVX1-NEXT: vzeroupper 1438; AVX1-NEXT: retq 1439; 1440; AVX2-LABEL: trunc_usat_v8i32_v8i16: 1441; AVX2: # %bb.0: 1442; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [65535,65535,65535,65535,65535,65535,65535,65535] 1443; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0 1444; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1445; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 1446; AVX2-NEXT: vzeroupper 1447; AVX2-NEXT: retq 1448; 1449; AVX512F-LABEL: trunc_usat_v8i32_v8i16: 1450; AVX512F: # %bb.0: 1451; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1452; AVX512F-NEXT: vpmovusdw %zmm0, %ymm0 1453; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1454; AVX512F-NEXT: vzeroupper 1455; AVX512F-NEXT: retq 1456; 1457; AVX512VL-LABEL: trunc_usat_v8i32_v8i16: 1458; AVX512VL: # %bb.0: 1459; AVX512VL-NEXT: vpmovusdw %ymm0, %xmm0 1460; AVX512VL-NEXT: vzeroupper 1461; AVX512VL-NEXT: retq 1462; 1463; AVX512BW-LABEL: trunc_usat_v8i32_v8i16: 1464; AVX512BW: # %bb.0: 1465; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1466; AVX512BW-NEXT: vpmovusdw %zmm0, %ymm0 1467; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1468; AVX512BW-NEXT: vzeroupper 1469; AVX512BW-NEXT: retq 1470; 1471; AVX512BWVL-LABEL: trunc_usat_v8i32_v8i16: 1472; AVX512BWVL: # %bb.0: 1473; AVX512BWVL-NEXT: vpmovusdw %ymm0, %xmm0 1474; AVX512BWVL-NEXT: vzeroupper 1475; AVX512BWVL-NEXT: retq 1476; 1477; SKX-LABEL: trunc_usat_v8i32_v8i16: 1478; SKX: # %bb.0: 1479; SKX-NEXT: vpmovusdw %ymm0, %xmm0 1480; SKX-NEXT: vzeroupper 1481; SKX-NEXT: retq 1482 %1 = icmp ult <8 x i32> %a0, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535> 1483 %2 = select <8 x i1> %1, <8 x i32> %a0, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535> 1484 %3 = trunc <8 x i32> %2 to <8 x i16> 1485 ret <8 x i16> %3 1486} 1487 1488define <16 x i16> @trunc_usat_v16i32_v16i16(ptr %p0) { 1489; SSE2-LABEL: trunc_usat_v16i32_v16i16: 1490; SSE2: # %bb.0: 1491; SSE2-NEXT: movdqa (%rdi), %xmm5 1492; SSE2-NEXT: movdqa 16(%rdi), %xmm4 1493; SSE2-NEXT: movdqa 32(%rdi), %xmm0 1494; SSE2-NEXT: movdqa 48(%rdi), %xmm8 1495; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648] 1496; SSE2-NEXT: movdqa %xmm0, %xmm3 1497; SSE2-NEXT: pxor %xmm6, %xmm3 1498; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147549183,2147549183,2147549183,2147549183] 1499; SSE2-NEXT: movdqa %xmm2, %xmm1 1500; SSE2-NEXT: pcmpgtd %xmm3, %xmm1 1501; SSE2-NEXT: pcmpeqd %xmm7, %xmm7 1502; SSE2-NEXT: pand %xmm1, %xmm0 1503; SSE2-NEXT: pxor %xmm7, %xmm1 1504; SSE2-NEXT: por %xmm0, %xmm1 1505; SSE2-NEXT: movdqa %xmm8, %xmm0 1506; SSE2-NEXT: pxor %xmm6, %xmm0 1507; SSE2-NEXT: movdqa %xmm2, %xmm3 1508; SSE2-NEXT: pcmpgtd %xmm0, %xmm3 1509; SSE2-NEXT: pand %xmm3, %xmm8 1510; SSE2-NEXT: pxor %xmm7, %xmm3 1511; SSE2-NEXT: por %xmm8, %xmm3 1512; SSE2-NEXT: movdqa %xmm5, %xmm8 1513; SSE2-NEXT: pxor %xmm6, %xmm8 1514; SSE2-NEXT: movdqa %xmm2, %xmm0 1515; SSE2-NEXT: pcmpgtd %xmm8, %xmm0 1516; SSE2-NEXT: pand %xmm0, %xmm5 1517; SSE2-NEXT: pxor %xmm7, %xmm0 1518; SSE2-NEXT: por %xmm5, %xmm0 1519; SSE2-NEXT: pxor %xmm4, %xmm6 1520; SSE2-NEXT: pcmpgtd %xmm6, %xmm2 1521; SSE2-NEXT: pxor %xmm2, %xmm7 1522; SSE2-NEXT: pand %xmm4, %xmm2 1523; SSE2-NEXT: por %xmm7, %xmm2 1524; SSE2-NEXT: pslld $16, %xmm2 1525; SSE2-NEXT: psrad $16, %xmm2 1526; SSE2-NEXT: pslld $16, %xmm0 1527; SSE2-NEXT: psrad $16, %xmm0 1528; SSE2-NEXT: packssdw %xmm2, %xmm0 1529; SSE2-NEXT: pslld $16, %xmm3 1530; SSE2-NEXT: psrad $16, %xmm3 1531; SSE2-NEXT: pslld $16, %xmm1 1532; SSE2-NEXT: psrad $16, %xmm1 1533; SSE2-NEXT: packssdw %xmm3, %xmm1 1534; SSE2-NEXT: retq 1535; 1536; SSSE3-LABEL: trunc_usat_v16i32_v16i16: 1537; SSSE3: # %bb.0: 1538; SSSE3-NEXT: movdqa (%rdi), %xmm5 1539; SSSE3-NEXT: movdqa 16(%rdi), %xmm3 1540; SSSE3-NEXT: movdqa 32(%rdi), %xmm0 1541; SSSE3-NEXT: movdqa 48(%rdi), %xmm8 1542; SSSE3-NEXT: movdqa {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648] 1543; SSSE3-NEXT: movdqa %xmm0, %xmm4 1544; SSSE3-NEXT: pxor %xmm6, %xmm4 1545; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147549183,2147549183,2147549183,2147549183] 1546; SSSE3-NEXT: movdqa %xmm2, %xmm1 1547; SSSE3-NEXT: pcmpgtd %xmm4, %xmm1 1548; SSSE3-NEXT: pcmpeqd %xmm7, %xmm7 1549; SSSE3-NEXT: pand %xmm1, %xmm0 1550; SSSE3-NEXT: pxor %xmm7, %xmm1 1551; SSSE3-NEXT: por %xmm0, %xmm1 1552; SSSE3-NEXT: movdqa %xmm8, %xmm0 1553; SSSE3-NEXT: pxor %xmm6, %xmm0 1554; SSSE3-NEXT: movdqa %xmm2, %xmm4 1555; SSSE3-NEXT: pcmpgtd %xmm0, %xmm4 1556; SSSE3-NEXT: pand %xmm4, %xmm8 1557; SSSE3-NEXT: pxor %xmm7, %xmm4 1558; SSSE3-NEXT: por %xmm8, %xmm4 1559; SSSE3-NEXT: movdqa %xmm5, %xmm8 1560; SSSE3-NEXT: pxor %xmm6, %xmm8 1561; SSSE3-NEXT: movdqa %xmm2, %xmm0 1562; SSSE3-NEXT: pcmpgtd %xmm8, %xmm0 1563; SSSE3-NEXT: pand %xmm0, %xmm5 1564; SSSE3-NEXT: pxor %xmm7, %xmm0 1565; SSSE3-NEXT: por %xmm5, %xmm0 1566; SSSE3-NEXT: pxor %xmm3, %xmm6 1567; SSSE3-NEXT: pcmpgtd %xmm6, %xmm2 1568; SSSE3-NEXT: pxor %xmm2, %xmm7 1569; SSSE3-NEXT: pand %xmm3, %xmm2 1570; SSSE3-NEXT: por %xmm7, %xmm2 1571; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 1572; SSSE3-NEXT: pshufb %xmm3, %xmm2 1573; SSSE3-NEXT: pshufb %xmm3, %xmm0 1574; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 1575; SSSE3-NEXT: pshufb %xmm3, %xmm4 1576; SSSE3-NEXT: pshufb %xmm3, %xmm1 1577; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm4[0] 1578; SSSE3-NEXT: retq 1579; 1580; SSE41-LABEL: trunc_usat_v16i32_v16i16: 1581; SSE41: # %bb.0: 1582; SSE41-NEXT: pmovsxbw {{.*#+}} xmm0 = [65535,0,65535,0,65535,0,65535,0] 1583; SSE41-NEXT: movdqa 48(%rdi), %xmm2 1584; SSE41-NEXT: pminud %xmm0, %xmm2 1585; SSE41-NEXT: movdqa 32(%rdi), %xmm1 1586; SSE41-NEXT: pminud %xmm0, %xmm1 1587; SSE41-NEXT: packusdw %xmm2, %xmm1 1588; SSE41-NEXT: movdqa 16(%rdi), %xmm2 1589; SSE41-NEXT: pminud %xmm0, %xmm2 1590; SSE41-NEXT: pminud (%rdi), %xmm0 1591; SSE41-NEXT: packusdw %xmm2, %xmm0 1592; SSE41-NEXT: retq 1593; 1594; AVX1-LABEL: trunc_usat_v16i32_v16i16: 1595; AVX1: # %bb.0: 1596; AVX1-NEXT: vbroadcastss {{.*#+}} xmm0 = [65535,65535,65535,65535] 1597; AVX1-NEXT: vpminud 16(%rdi), %xmm0, %xmm1 1598; AVX1-NEXT: vpminud (%rdi), %xmm0, %xmm2 1599; AVX1-NEXT: vpackusdw %xmm1, %xmm2, %xmm1 1600; AVX1-NEXT: vpminud 48(%rdi), %xmm0, %xmm2 1601; AVX1-NEXT: vpminud 32(%rdi), %xmm0, %xmm0 1602; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 1603; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1604; AVX1-NEXT: retq 1605; 1606; AVX2-LABEL: trunc_usat_v16i32_v16i16: 1607; AVX2: # %bb.0: 1608; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm0 = [65535,65535,65535,65535,65535,65535,65535,65535] 1609; AVX2-NEXT: vpminud 32(%rdi), %ymm0, %ymm1 1610; AVX2-NEXT: vpminud (%rdi), %ymm0, %ymm0 1611; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 1612; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 1613; AVX2-NEXT: retq 1614; 1615; AVX512-LABEL: trunc_usat_v16i32_v16i16: 1616; AVX512: # %bb.0: 1617; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 1618; AVX512-NEXT: vpmovusdw %zmm0, %ymm0 1619; AVX512-NEXT: retq 1620; 1621; SKX-LABEL: trunc_usat_v16i32_v16i16: 1622; SKX: # %bb.0: 1623; SKX-NEXT: vmovdqa64 (%rdi), %zmm0 1624; SKX-NEXT: vpmovusdw %zmm0, %ymm0 1625; SKX-NEXT: retq 1626 %a0 = load <16 x i32>, ptr %p0 1627 %1 = icmp ult <16 x i32> %a0, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535> 1628 %2 = select <16 x i1> %1, <16 x i32> %a0, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535> 1629 %3 = trunc <16 x i32> %2 to <16 x i16> 1630 ret <16 x i16> %3 1631} 1632 1633; 1634; Unsigned saturation truncation to vXi8 1635; 1636 1637define <2 x i8> @trunc_usat_v2i64_v2i8(<2 x i64> %a0) { 1638; SSE2-LABEL: trunc_usat_v2i64_v2i8: 1639; SSE2: # %bb.0: 1640; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456] 1641; SSE2-NEXT: pxor %xmm0, %xmm1 1642; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1643; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 1644; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 1645; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1646; SSE2-NEXT: pandn %xmm2, %xmm1 1647; SSE2-NEXT: pand %xmm1, %xmm0 1648; SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1649; SSE2-NEXT: por %xmm1, %xmm0 1650; SSE2-NEXT: packuswb %xmm0, %xmm0 1651; SSE2-NEXT: packuswb %xmm0, %xmm0 1652; SSE2-NEXT: packuswb %xmm0, %xmm0 1653; SSE2-NEXT: retq 1654; 1655; SSSE3-LABEL: trunc_usat_v2i64_v2i8: 1656; SSSE3: # %bb.0: 1657; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456] 1658; SSSE3-NEXT: pxor %xmm0, %xmm1 1659; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1660; SSSE3-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 1661; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 1662; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1663; SSSE3-NEXT: pandn %xmm2, %xmm1 1664; SSSE3-NEXT: pcmpeqd %xmm2, %xmm2 1665; SSSE3-NEXT: pxor %xmm1, %xmm2 1666; SSSE3-NEXT: pand %xmm1, %xmm0 1667; SSSE3-NEXT: por %xmm2, %xmm0 1668; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 1669; SSSE3-NEXT: retq 1670; 1671; SSE41-LABEL: trunc_usat_v2i64_v2i8: 1672; SSE41: # %bb.0: 1673; SSE41-NEXT: movdqa %xmm0, %xmm1 1674; SSE41-NEXT: movapd {{.*#+}} xmm2 = [255,255] 1675; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456] 1676; SSE41-NEXT: pxor %xmm0, %xmm3 1677; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 1678; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 1679; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1680; SSE41-NEXT: pandn %xmm3, %xmm0 1681; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2 1682; SSE41-NEXT: pshufb {{.*#+}} xmm2 = xmm2[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 1683; SSE41-NEXT: movdqa %xmm2, %xmm0 1684; SSE41-NEXT: retq 1685; 1686; AVX-LABEL: trunc_usat_v2i64_v2i8: 1687; AVX: # %bb.0: 1688; AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1689; AVX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 1690; AVX-NEXT: vblendvpd %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1691; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 1692; AVX-NEXT: retq 1693; 1694; AVX512F-LABEL: trunc_usat_v2i64_v2i8: 1695; AVX512F: # %bb.0: 1696; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1697; AVX512F-NEXT: vpmovusqb %zmm0, %xmm0 1698; AVX512F-NEXT: vzeroupper 1699; AVX512F-NEXT: retq 1700; 1701; AVX512VL-LABEL: trunc_usat_v2i64_v2i8: 1702; AVX512VL: # %bb.0: 1703; AVX512VL-NEXT: vpmovusqb %xmm0, %xmm0 1704; AVX512VL-NEXT: retq 1705; 1706; AVX512BW-LABEL: trunc_usat_v2i64_v2i8: 1707; AVX512BW: # %bb.0: 1708; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1709; AVX512BW-NEXT: vpmovusqb %zmm0, %xmm0 1710; AVX512BW-NEXT: vzeroupper 1711; AVX512BW-NEXT: retq 1712; 1713; AVX512BWVL-LABEL: trunc_usat_v2i64_v2i8: 1714; AVX512BWVL: # %bb.0: 1715; AVX512BWVL-NEXT: vpmovusqb %xmm0, %xmm0 1716; AVX512BWVL-NEXT: retq 1717; 1718; SKX-LABEL: trunc_usat_v2i64_v2i8: 1719; SKX: # %bb.0: 1720; SKX-NEXT: vpmovusqb %xmm0, %xmm0 1721; SKX-NEXT: retq 1722 %1 = icmp ult <2 x i64> %a0, <i64 255, i64 255> 1723 %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 255, i64 255> 1724 %3 = trunc <2 x i64> %2 to <2 x i8> 1725 ret <2 x i8> %3 1726} 1727 1728define void @trunc_usat_v2i64_v2i8_store(<2 x i64> %a0, ptr %p1) { 1729; SSE2-LABEL: trunc_usat_v2i64_v2i8_store: 1730; SSE2: # %bb.0: 1731; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456] 1732; SSE2-NEXT: pxor %xmm0, %xmm1 1733; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1734; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 1735; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 1736; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1737; SSE2-NEXT: pandn %xmm2, %xmm1 1738; SSE2-NEXT: pand %xmm1, %xmm0 1739; SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1740; SSE2-NEXT: por %xmm0, %xmm1 1741; SSE2-NEXT: packuswb %xmm1, %xmm1 1742; SSE2-NEXT: packuswb %xmm1, %xmm1 1743; SSE2-NEXT: packuswb %xmm1, %xmm1 1744; SSE2-NEXT: movd %xmm1, %eax 1745; SSE2-NEXT: movw %ax, (%rdi) 1746; SSE2-NEXT: retq 1747; 1748; SSSE3-LABEL: trunc_usat_v2i64_v2i8_store: 1749; SSSE3: # %bb.0: 1750; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456] 1751; SSSE3-NEXT: pxor %xmm0, %xmm1 1752; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1753; SSSE3-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 1754; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 1755; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1756; SSSE3-NEXT: pandn %xmm2, %xmm1 1757; SSSE3-NEXT: pcmpeqd %xmm2, %xmm2 1758; SSSE3-NEXT: pxor %xmm1, %xmm2 1759; SSSE3-NEXT: pand %xmm0, %xmm1 1760; SSSE3-NEXT: por %xmm2, %xmm1 1761; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 1762; SSSE3-NEXT: movd %xmm1, %eax 1763; SSSE3-NEXT: movw %ax, (%rdi) 1764; SSSE3-NEXT: retq 1765; 1766; SSE41-LABEL: trunc_usat_v2i64_v2i8_store: 1767; SSE41: # %bb.0: 1768; SSE41-NEXT: movdqa %xmm0, %xmm1 1769; SSE41-NEXT: movapd {{.*#+}} xmm2 = [255,255] 1770; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456] 1771; SSE41-NEXT: pxor %xmm0, %xmm3 1772; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 1773; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 1774; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1775; SSE41-NEXT: pandn %xmm3, %xmm0 1776; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2 1777; SSE41-NEXT: pshufb {{.*#+}} xmm2 = xmm2[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 1778; SSE41-NEXT: pextrw $0, %xmm2, (%rdi) 1779; SSE41-NEXT: retq 1780; 1781; AVX-LABEL: trunc_usat_v2i64_v2i8_store: 1782; AVX: # %bb.0: 1783; AVX-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1784; AVX-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 1785; AVX-NEXT: vblendvpd %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1786; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 1787; AVX-NEXT: vpextrw $0, %xmm0, (%rdi) 1788; AVX-NEXT: retq 1789; 1790; AVX512F-LABEL: trunc_usat_v2i64_v2i8_store: 1791; AVX512F: # %bb.0: 1792; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1793; AVX512F-NEXT: vpmovusqb %zmm0, %xmm0 1794; AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi) 1795; AVX512F-NEXT: vzeroupper 1796; AVX512F-NEXT: retq 1797; 1798; AVX512VL-LABEL: trunc_usat_v2i64_v2i8_store: 1799; AVX512VL: # %bb.0: 1800; AVX512VL-NEXT: vpmovusqb %xmm0, (%rdi) 1801; AVX512VL-NEXT: retq 1802; 1803; AVX512BW-LABEL: trunc_usat_v2i64_v2i8_store: 1804; AVX512BW: # %bb.0: 1805; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1806; AVX512BW-NEXT: vpmovusqb %zmm0, %xmm0 1807; AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi) 1808; AVX512BW-NEXT: vzeroupper 1809; AVX512BW-NEXT: retq 1810; 1811; AVX512BWVL-LABEL: trunc_usat_v2i64_v2i8_store: 1812; AVX512BWVL: # %bb.0: 1813; AVX512BWVL-NEXT: vpmovusqb %xmm0, (%rdi) 1814; AVX512BWVL-NEXT: retq 1815; 1816; SKX-LABEL: trunc_usat_v2i64_v2i8_store: 1817; SKX: # %bb.0: 1818; SKX-NEXT: vpmovusqb %xmm0, (%rdi) 1819; SKX-NEXT: retq 1820 %1 = icmp ult <2 x i64> %a0, <i64 255, i64 255> 1821 %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 255, i64 255> 1822 %3 = trunc <2 x i64> %2 to <2 x i8> 1823 store <2 x i8> %3, ptr %p1 1824 ret void 1825} 1826 1827define <4 x i8> @trunc_usat_v4i64_v4i8(<4 x i64> %a0) { 1828; SSE2-SSSE3-LABEL: trunc_usat_v4i64_v4i8: 1829; SSE2-SSSE3: # %bb.0: 1830; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255] 1831; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456] 1832; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm4 1833; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm4 1834; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 1835; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm6 = [2147483903,2147483903,2147483903,2147483903] 1836; SSE2-SSSE3-NEXT: movdqa %xmm6, %xmm7 1837; SSE2-SSSE3-NEXT: pcmpgtd %xmm5, %xmm7 1838; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 1839; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm4 1840; SSE2-SSSE3-NEXT: pand %xmm7, %xmm4 1841; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1 1842; SSE2-SSSE3-NEXT: pandn %xmm2, %xmm4 1843; SSE2-SSSE3-NEXT: por %xmm1, %xmm4 1844; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm1 1845; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm1 1846; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2] 1847; SSE2-SSSE3-NEXT: pcmpgtd %xmm5, %xmm6 1848; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1849; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm1 1850; SSE2-SSSE3-NEXT: pand %xmm6, %xmm1 1851; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 1852; SSE2-SSSE3-NEXT: pandn %xmm2, %xmm1 1853; SSE2-SSSE3-NEXT: por %xmm1, %xmm0 1854; SSE2-SSSE3-NEXT: packuswb %xmm4, %xmm0 1855; SSE2-SSSE3-NEXT: packuswb %xmm0, %xmm0 1856; SSE2-SSSE3-NEXT: packuswb %xmm0, %xmm0 1857; SSE2-SSSE3-NEXT: retq 1858; 1859; SSE41-LABEL: trunc_usat_v4i64_v4i8: 1860; SSE41: # %bb.0: 1861; SSE41-NEXT: movdqa %xmm0, %xmm3 1862; SSE41-NEXT: movapd {{.*#+}} xmm2 = [255,255] 1863; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456] 1864; SSE41-NEXT: movdqa %xmm1, %xmm6 1865; SSE41-NEXT: pxor %xmm5, %xmm6 1866; SSE41-NEXT: movdqa {{.*#+}} xmm7 = [9223372039002259711,9223372039002259711] 1867; SSE41-NEXT: pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2] 1868; SSE41-NEXT: pcmpeqd %xmm7, %xmm6 1869; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [2147483903,2147483903,2147483903,2147483903] 1870; SSE41-NEXT: movdqa %xmm4, %xmm0 1871; SSE41-NEXT: pcmpgtd %xmm8, %xmm0 1872; SSE41-NEXT: pand %xmm6, %xmm0 1873; SSE41-NEXT: movapd %xmm2, %xmm6 1874; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm6 1875; SSE41-NEXT: pxor %xmm3, %xmm5 1876; SSE41-NEXT: pcmpeqd %xmm5, %xmm7 1877; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2] 1878; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 1879; SSE41-NEXT: pand %xmm7, %xmm4 1880; SSE41-NEXT: movdqa %xmm4, %xmm0 1881; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm2 1882; SSE41-NEXT: packusdw %xmm6, %xmm2 1883; SSE41-NEXT: packusdw %xmm2, %xmm2 1884; SSE41-NEXT: packuswb %xmm2, %xmm2 1885; SSE41-NEXT: movdqa %xmm2, %xmm0 1886; SSE41-NEXT: retq 1887; 1888; AVX1-LABEL: trunc_usat_v4i64_v4i8: 1889; AVX1: # %bb.0: 1890; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] 1891; AVX1-NEXT: # xmm1 = mem[0,0] 1892; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm2 1893; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854776063,9223372036854776063] 1894; AVX1-NEXT: # xmm3 = mem[0,0] 1895; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 1896; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 1897; AVX1-NEXT: vpxor %xmm1, %xmm4, %xmm1 1898; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1 1899; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [255,255] 1900; AVX1-NEXT: # xmm3 = mem[0,0] 1901; AVX1-NEXT: vblendvpd %xmm1, %xmm4, %xmm3, %xmm1 1902; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm3, %xmm0 1903; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 1904; AVX1-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 1905; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 1906; AVX1-NEXT: vzeroupper 1907; AVX1-NEXT: retq 1908; 1909; AVX2-LABEL: trunc_usat_v4i64_v4i8: 1910; AVX2: # %bb.0: 1911; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [255,255,255,255] 1912; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] 1913; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2 1914; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854776062,9223372036854776062,9223372036854776062,9223372036854776062] 1915; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 1916; AVX2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 1917; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 1918; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 1919; AVX2-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 1920; AVX2-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 1921; AVX2-NEXT: vzeroupper 1922; AVX2-NEXT: retq 1923; 1924; AVX512F-LABEL: trunc_usat_v4i64_v4i8: 1925; AVX512F: # %bb.0: 1926; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1927; AVX512F-NEXT: vpmovusqb %zmm0, %xmm0 1928; AVX512F-NEXT: vzeroupper 1929; AVX512F-NEXT: retq 1930; 1931; AVX512VL-LABEL: trunc_usat_v4i64_v4i8: 1932; AVX512VL: # %bb.0: 1933; AVX512VL-NEXT: vpmovusqb %ymm0, %xmm0 1934; AVX512VL-NEXT: vzeroupper 1935; AVX512VL-NEXT: retq 1936; 1937; AVX512BW-LABEL: trunc_usat_v4i64_v4i8: 1938; AVX512BW: # %bb.0: 1939; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1940; AVX512BW-NEXT: vpmovusqb %zmm0, %xmm0 1941; AVX512BW-NEXT: vzeroupper 1942; AVX512BW-NEXT: retq 1943; 1944; AVX512BWVL-LABEL: trunc_usat_v4i64_v4i8: 1945; AVX512BWVL: # %bb.0: 1946; AVX512BWVL-NEXT: vpmovusqb %ymm0, %xmm0 1947; AVX512BWVL-NEXT: vzeroupper 1948; AVX512BWVL-NEXT: retq 1949; 1950; SKX-LABEL: trunc_usat_v4i64_v4i8: 1951; SKX: # %bb.0: 1952; SKX-NEXT: vpmovusqb %ymm0, %xmm0 1953; SKX-NEXT: vzeroupper 1954; SKX-NEXT: retq 1955 %1 = icmp ult <4 x i64> %a0, <i64 255, i64 255, i64 255, i64 255> 1956 %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> <i64 255, i64 255, i64 255, i64 255> 1957 %3 = trunc <4 x i64> %2 to <4 x i8> 1958 ret <4 x i8> %3 1959} 1960 1961define void @trunc_usat_v4i64_v4i8_store(<4 x i64> %a0, ptr%p1) { 1962; SSE2-SSSE3-LABEL: trunc_usat_v4i64_v4i8_store: 1963; SSE2-SSSE3: # %bb.0: 1964; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255] 1965; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456] 1966; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm4 1967; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm4 1968; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 1969; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm6 = [2147483903,2147483903,2147483903,2147483903] 1970; SSE2-SSSE3-NEXT: movdqa %xmm6, %xmm7 1971; SSE2-SSSE3-NEXT: pcmpgtd %xmm5, %xmm7 1972; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 1973; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm4 1974; SSE2-SSSE3-NEXT: pand %xmm7, %xmm4 1975; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1 1976; SSE2-SSSE3-NEXT: pandn %xmm2, %xmm4 1977; SSE2-SSSE3-NEXT: por %xmm1, %xmm4 1978; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm1 1979; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm1 1980; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2] 1981; SSE2-SSSE3-NEXT: pcmpgtd %xmm5, %xmm6 1982; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1983; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm1 1984; SSE2-SSSE3-NEXT: pand %xmm6, %xmm1 1985; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 1986; SSE2-SSSE3-NEXT: pandn %xmm2, %xmm1 1987; SSE2-SSSE3-NEXT: por %xmm0, %xmm1 1988; SSE2-SSSE3-NEXT: packuswb %xmm4, %xmm1 1989; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm1 1990; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm1 1991; SSE2-SSSE3-NEXT: movd %xmm1, (%rdi) 1992; SSE2-SSSE3-NEXT: retq 1993; 1994; SSE41-LABEL: trunc_usat_v4i64_v4i8_store: 1995; SSE41: # %bb.0: 1996; SSE41-NEXT: movdqa %xmm0, %xmm2 1997; SSE41-NEXT: movapd {{.*#+}} xmm4 = [255,255] 1998; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456] 1999; SSE41-NEXT: movdqa %xmm1, %xmm6 2000; SSE41-NEXT: pxor %xmm5, %xmm6 2001; SSE41-NEXT: movdqa {{.*#+}} xmm7 = [9223372039002259711,9223372039002259711] 2002; SSE41-NEXT: pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2] 2003; SSE41-NEXT: pcmpeqd %xmm7, %xmm6 2004; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483903,2147483903,2147483903,2147483903] 2005; SSE41-NEXT: movdqa %xmm3, %xmm0 2006; SSE41-NEXT: pcmpgtd %xmm8, %xmm0 2007; SSE41-NEXT: pand %xmm6, %xmm0 2008; SSE41-NEXT: movapd %xmm4, %xmm6 2009; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm6 2010; SSE41-NEXT: pxor %xmm2, %xmm5 2011; SSE41-NEXT: pcmpeqd %xmm5, %xmm7 2012; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2] 2013; SSE41-NEXT: pcmpgtd %xmm0, %xmm3 2014; SSE41-NEXT: pand %xmm7, %xmm3 2015; SSE41-NEXT: movdqa %xmm3, %xmm0 2016; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm4 2017; SSE41-NEXT: packusdw %xmm6, %xmm4 2018; SSE41-NEXT: packusdw %xmm4, %xmm4 2019; SSE41-NEXT: packuswb %xmm4, %xmm4 2020; SSE41-NEXT: movd %xmm4, (%rdi) 2021; SSE41-NEXT: retq 2022; 2023; AVX1-LABEL: trunc_usat_v4i64_v4i8_store: 2024; AVX1: # %bb.0: 2025; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] 2026; AVX1-NEXT: # xmm1 = mem[0,0] 2027; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm2 2028; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854776063,9223372036854776063] 2029; AVX1-NEXT: # xmm3 = mem[0,0] 2030; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 2031; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 2032; AVX1-NEXT: vpxor %xmm1, %xmm4, %xmm1 2033; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1 2034; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [255,255] 2035; AVX1-NEXT: # xmm3 = mem[0,0] 2036; AVX1-NEXT: vblendvpd %xmm1, %xmm4, %xmm3, %xmm1 2037; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm3, %xmm0 2038; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 2039; AVX1-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 2040; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 2041; AVX1-NEXT: vmovd %xmm0, (%rdi) 2042; AVX1-NEXT: vzeroupper 2043; AVX1-NEXT: retq 2044; 2045; AVX2-LABEL: trunc_usat_v4i64_v4i8_store: 2046; AVX2: # %bb.0: 2047; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [255,255,255,255] 2048; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] 2049; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2 2050; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854776062,9223372036854776062,9223372036854776062,9223372036854776062] 2051; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 2052; AVX2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 2053; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 2054; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 2055; AVX2-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 2056; AVX2-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 2057; AVX2-NEXT: vmovd %xmm0, (%rdi) 2058; AVX2-NEXT: vzeroupper 2059; AVX2-NEXT: retq 2060; 2061; AVX512F-LABEL: trunc_usat_v4i64_v4i8_store: 2062; AVX512F: # %bb.0: 2063; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2064; AVX512F-NEXT: vpmovusqb %zmm0, %xmm0 2065; AVX512F-NEXT: vmovd %xmm0, (%rdi) 2066; AVX512F-NEXT: vzeroupper 2067; AVX512F-NEXT: retq 2068; 2069; AVX512VL-LABEL: trunc_usat_v4i64_v4i8_store: 2070; AVX512VL: # %bb.0: 2071; AVX512VL-NEXT: vpmovusqb %ymm0, (%rdi) 2072; AVX512VL-NEXT: vzeroupper 2073; AVX512VL-NEXT: retq 2074; 2075; AVX512BW-LABEL: trunc_usat_v4i64_v4i8_store: 2076; AVX512BW: # %bb.0: 2077; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2078; AVX512BW-NEXT: vpmovusqb %zmm0, %xmm0 2079; AVX512BW-NEXT: vmovd %xmm0, (%rdi) 2080; AVX512BW-NEXT: vzeroupper 2081; AVX512BW-NEXT: retq 2082; 2083; AVX512BWVL-LABEL: trunc_usat_v4i64_v4i8_store: 2084; AVX512BWVL: # %bb.0: 2085; AVX512BWVL-NEXT: vpmovusqb %ymm0, (%rdi) 2086; AVX512BWVL-NEXT: vzeroupper 2087; AVX512BWVL-NEXT: retq 2088; 2089; SKX-LABEL: trunc_usat_v4i64_v4i8_store: 2090; SKX: # %bb.0: 2091; SKX-NEXT: vpmovusqb %ymm0, (%rdi) 2092; SKX-NEXT: vzeroupper 2093; SKX-NEXT: retq 2094 %1 = icmp ult <4 x i64> %a0, <i64 255, i64 255, i64 255, i64 255> 2095 %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> <i64 255, i64 255, i64 255, i64 255> 2096 %3 = trunc <4 x i64> %2 to <4 x i8> 2097 store <4 x i8> %3, ptr%p1 2098 ret void 2099} 2100 2101define <8 x i8> @trunc_usat_v8i64_v8i8(ptr %p0) { 2102; SSE2-SSSE3-LABEL: trunc_usat_v8i64_v8i8: 2103; SSE2-SSSE3: # %bb.0: 2104; SSE2-SSSE3-NEXT: movdqa (%rdi), %xmm6 2105; SSE2-SSSE3-NEXT: movdqa 16(%rdi), %xmm0 2106; SSE2-SSSE3-NEXT: movdqa 32(%rdi), %xmm1 2107; SSE2-SSSE3-NEXT: movdqa 48(%rdi), %xmm5 2108; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255] 2109; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456] 2110; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm7 2111; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm7 2112; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2] 2113; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [2147483903,2147483903,2147483903,2147483903] 2114; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm9 2115; SSE2-SSSE3-NEXT: pcmpgtd %xmm8, %xmm9 2116; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3] 2117; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm7 2118; SSE2-SSSE3-NEXT: pand %xmm9, %xmm7 2119; SSE2-SSSE3-NEXT: pand %xmm7, %xmm0 2120; SSE2-SSSE3-NEXT: pandn %xmm2, %xmm7 2121; SSE2-SSSE3-NEXT: por %xmm0, %xmm7 2122; SSE2-SSSE3-NEXT: movdqa %xmm6, %xmm0 2123; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm0 2124; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm8 = xmm0[0,0,2,2] 2125; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm9 2126; SSE2-SSSE3-NEXT: pcmpgtd %xmm8, %xmm9 2127; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 2128; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm0 2129; SSE2-SSSE3-NEXT: pand %xmm9, %xmm0 2130; SSE2-SSSE3-NEXT: pand %xmm0, %xmm6 2131; SSE2-SSSE3-NEXT: pandn %xmm2, %xmm0 2132; SSE2-SSSE3-NEXT: por %xmm6, %xmm0 2133; SSE2-SSSE3-NEXT: packuswb %xmm7, %xmm0 2134; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm6 2135; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm6 2136; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 2137; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm8 2138; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm8 2139; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 2140; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm6 2141; SSE2-SSSE3-NEXT: pand %xmm8, %xmm6 2142; SSE2-SSSE3-NEXT: pand %xmm6, %xmm5 2143; SSE2-SSSE3-NEXT: pandn %xmm2, %xmm6 2144; SSE2-SSSE3-NEXT: por %xmm5, %xmm6 2145; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm5 2146; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm5 2147; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm5[0,0,2,2] 2148; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm4 2149; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] 2150; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm5 2151; SSE2-SSSE3-NEXT: pand %xmm4, %xmm5 2152; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1 2153; SSE2-SSSE3-NEXT: pandn %xmm2, %xmm5 2154; SSE2-SSSE3-NEXT: por %xmm1, %xmm5 2155; SSE2-SSSE3-NEXT: packuswb %xmm6, %xmm5 2156; SSE2-SSSE3-NEXT: packuswb %xmm5, %xmm0 2157; SSE2-SSSE3-NEXT: packuswb %xmm0, %xmm0 2158; SSE2-SSSE3-NEXT: retq 2159; 2160; SSE41-LABEL: trunc_usat_v8i64_v8i8: 2161; SSE41: # %bb.0: 2162; SSE41-NEXT: movdqa (%rdi), %xmm8 2163; SSE41-NEXT: movdqa 16(%rdi), %xmm2 2164; SSE41-NEXT: movdqa 32(%rdi), %xmm4 2165; SSE41-NEXT: movdqa 48(%rdi), %xmm7 2166; SSE41-NEXT: movapd {{.*#+}} xmm3 = [255,255] 2167; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [9223372039002259456,9223372039002259456] 2168; SSE41-NEXT: movdqa %xmm2, %xmm9 2169; SSE41-NEXT: pxor %xmm6, %xmm9 2170; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259711,9223372039002259711] 2171; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm9[0,0,2,2] 2172; SSE41-NEXT: pcmpeqd %xmm5, %xmm9 2173; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483903,2147483903,2147483903,2147483903] 2174; SSE41-NEXT: movdqa %xmm1, %xmm0 2175; SSE41-NEXT: pcmpgtd %xmm10, %xmm0 2176; SSE41-NEXT: pand %xmm9, %xmm0 2177; SSE41-NEXT: movapd %xmm3, %xmm9 2178; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm9 2179; SSE41-NEXT: movdqa %xmm8, %xmm2 2180; SSE41-NEXT: pxor %xmm6, %xmm2 2181; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm2[0,0,2,2] 2182; SSE41-NEXT: pcmpeqd %xmm5, %xmm2 2183; SSE41-NEXT: movdqa %xmm1, %xmm0 2184; SSE41-NEXT: pcmpgtd %xmm10, %xmm0 2185; SSE41-NEXT: pand %xmm2, %xmm0 2186; SSE41-NEXT: movapd %xmm3, %xmm2 2187; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm2 2188; SSE41-NEXT: packusdw %xmm9, %xmm2 2189; SSE41-NEXT: movdqa %xmm7, %xmm8 2190; SSE41-NEXT: pxor %xmm6, %xmm8 2191; SSE41-NEXT: pshufd {{.*#+}} xmm9 = xmm8[0,0,2,2] 2192; SSE41-NEXT: pcmpeqd %xmm5, %xmm8 2193; SSE41-NEXT: movdqa %xmm1, %xmm0 2194; SSE41-NEXT: pcmpgtd %xmm9, %xmm0 2195; SSE41-NEXT: pand %xmm8, %xmm0 2196; SSE41-NEXT: movapd %xmm3, %xmm8 2197; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm8 2198; SSE41-NEXT: pxor %xmm4, %xmm6 2199; SSE41-NEXT: pcmpeqd %xmm6, %xmm5 2200; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] 2201; SSE41-NEXT: pcmpgtd %xmm0, %xmm1 2202; SSE41-NEXT: pand %xmm5, %xmm1 2203; SSE41-NEXT: movdqa %xmm1, %xmm0 2204; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm3 2205; SSE41-NEXT: packusdw %xmm8, %xmm3 2206; SSE41-NEXT: packusdw %xmm3, %xmm2 2207; SSE41-NEXT: packuswb %xmm2, %xmm2 2208; SSE41-NEXT: movdqa %xmm2, %xmm0 2209; SSE41-NEXT: retq 2210; 2211; AVX1-LABEL: trunc_usat_v8i64_v8i8: 2212; AVX1: # %bb.0: 2213; AVX1-NEXT: vmovdqa (%rdi), %xmm0 2214; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1 2215; AVX1-NEXT: vmovdqa 32(%rdi), %xmm2 2216; AVX1-NEXT: vmovdqa 48(%rdi), %xmm3 2217; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808] 2218; AVX1-NEXT: # xmm4 = mem[0,0] 2219; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm5 2220; AVX1-NEXT: vmovddup {{.*#+}} xmm6 = [9223372036854776063,9223372036854776063] 2221; AVX1-NEXT: # xmm6 = mem[0,0] 2222; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5 2223; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm7 2224; AVX1-NEXT: vpcmpgtq %xmm7, %xmm6, %xmm7 2225; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm8 2226; AVX1-NEXT: vpcmpgtq %xmm8, %xmm6, %xmm8 2227; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm4 2228; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4 2229; AVX1-NEXT: vmovddup {{.*#+}} xmm6 = [255,255] 2230; AVX1-NEXT: # xmm6 = mem[0,0] 2231; AVX1-NEXT: vblendvpd %xmm4, %xmm3, %xmm6, %xmm3 2232; AVX1-NEXT: vblendvpd %xmm8, %xmm2, %xmm6, %xmm2 2233; AVX1-NEXT: vpackusdw %xmm3, %xmm2, %xmm2 2234; AVX1-NEXT: vblendvpd %xmm7, %xmm1, %xmm6, %xmm1 2235; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm6, %xmm0 2236; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 2237; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 2238; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 2239; AVX1-NEXT: retq 2240; 2241; AVX2-LABEL: trunc_usat_v8i64_v8i8: 2242; AVX2: # %bb.0: 2243; AVX2-NEXT: vmovdqa (%rdi), %ymm0 2244; AVX2-NEXT: vmovdqa 32(%rdi), %ymm1 2245; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [255,255,255,255] 2246; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] 2247; AVX2-NEXT: vpxor %ymm3, %ymm1, %ymm4 2248; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm5 = [9223372036854776063,9223372036854776063,9223372036854776063,9223372036854776063] 2249; AVX2-NEXT: vpcmpgtq %ymm4, %ymm5, %ymm4 2250; AVX2-NEXT: vblendvpd %ymm4, %ymm1, %ymm2, %ymm1 2251; AVX2-NEXT: vpxor %ymm3, %ymm0, %ymm3 2252; AVX2-NEXT: vpcmpgtq %ymm3, %ymm5, %ymm3 2253; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0 2254; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 2255; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2256; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 2257; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 2258; AVX2-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 2259; AVX2-NEXT: vzeroupper 2260; AVX2-NEXT: retq 2261; 2262; AVX512-LABEL: trunc_usat_v8i64_v8i8: 2263; AVX512: # %bb.0: 2264; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 2265; AVX512-NEXT: vpmovusqb %zmm0, %xmm0 2266; AVX512-NEXT: vzeroupper 2267; AVX512-NEXT: retq 2268; 2269; SKX-LABEL: trunc_usat_v8i64_v8i8: 2270; SKX: # %bb.0: 2271; SKX-NEXT: vmovdqa64 (%rdi), %zmm0 2272; SKX-NEXT: vpmovusqb %zmm0, %xmm0 2273; SKX-NEXT: vzeroupper 2274; SKX-NEXT: retq 2275 %a0 = load <8 x i64>, ptr %p0 2276 %1 = icmp ult <8 x i64> %a0, <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255> 2277 %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255> 2278 %3 = trunc <8 x i64> %2 to <8 x i8> 2279 ret <8 x i8> %3 2280} 2281 2282define void @trunc_usat_v8i64_v8i8_store(ptr %p0, ptr%p1) { 2283; SSE2-SSSE3-LABEL: trunc_usat_v8i64_v8i8_store: 2284; SSE2-SSSE3: # %bb.0: 2285; SSE2-SSSE3-NEXT: movdqa (%rdi), %xmm6 2286; SSE2-SSSE3-NEXT: movdqa 16(%rdi), %xmm5 2287; SSE2-SSSE3-NEXT: movdqa 32(%rdi), %xmm0 2288; SSE2-SSSE3-NEXT: movdqa 48(%rdi), %xmm4 2289; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [255,255] 2290; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456] 2291; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm7 2292; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm7 2293; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2] 2294; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [2147483903,2147483903,2147483903,2147483903] 2295; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm9 2296; SSE2-SSSE3-NEXT: pcmpgtd %xmm8, %xmm9 2297; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3] 2298; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm7 2299; SSE2-SSSE3-NEXT: pand %xmm9, %xmm7 2300; SSE2-SSSE3-NEXT: pand %xmm7, %xmm5 2301; SSE2-SSSE3-NEXT: pandn %xmm1, %xmm7 2302; SSE2-SSSE3-NEXT: por %xmm5, %xmm7 2303; SSE2-SSSE3-NEXT: movdqa %xmm6, %xmm5 2304; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm5 2305; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm8 = xmm5[0,0,2,2] 2306; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm9 2307; SSE2-SSSE3-NEXT: pcmpgtd %xmm8, %xmm9 2308; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] 2309; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm5 2310; SSE2-SSSE3-NEXT: pand %xmm9, %xmm5 2311; SSE2-SSSE3-NEXT: pand %xmm5, %xmm6 2312; SSE2-SSSE3-NEXT: pandn %xmm1, %xmm5 2313; SSE2-SSSE3-NEXT: por %xmm6, %xmm5 2314; SSE2-SSSE3-NEXT: packuswb %xmm7, %xmm5 2315; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm6 2316; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm6 2317; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 2318; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm8 2319; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm8 2320; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 2321; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm6 2322; SSE2-SSSE3-NEXT: pand %xmm8, %xmm6 2323; SSE2-SSSE3-NEXT: pand %xmm6, %xmm4 2324; SSE2-SSSE3-NEXT: pandn %xmm1, %xmm6 2325; SSE2-SSSE3-NEXT: por %xmm4, %xmm6 2326; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm4 2327; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm4 2328; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm4[0,0,2,2] 2329; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm3 2330; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 2331; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm4 2332; SSE2-SSSE3-NEXT: pand %xmm3, %xmm4 2333; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0 2334; SSE2-SSSE3-NEXT: pandn %xmm1, %xmm4 2335; SSE2-SSSE3-NEXT: por %xmm0, %xmm4 2336; SSE2-SSSE3-NEXT: packuswb %xmm6, %xmm4 2337; SSE2-SSSE3-NEXT: packuswb %xmm4, %xmm5 2338; SSE2-SSSE3-NEXT: packuswb %xmm5, %xmm5 2339; SSE2-SSSE3-NEXT: movq %xmm5, (%rsi) 2340; SSE2-SSSE3-NEXT: retq 2341; 2342; SSE41-LABEL: trunc_usat_v8i64_v8i8_store: 2343; SSE41: # %bb.0: 2344; SSE41-NEXT: movdqa (%rdi), %xmm8 2345; SSE41-NEXT: movdqa 16(%rdi), %xmm7 2346; SSE41-NEXT: movdqa 32(%rdi), %xmm3 2347; SSE41-NEXT: movdqa 48(%rdi), %xmm6 2348; SSE41-NEXT: movapd {{.*#+}} xmm2 = [255,255] 2349; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456] 2350; SSE41-NEXT: movdqa %xmm7, %xmm9 2351; SSE41-NEXT: pxor %xmm5, %xmm9 2352; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [9223372039002259711,9223372039002259711] 2353; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm9[0,0,2,2] 2354; SSE41-NEXT: pcmpeqd %xmm4, %xmm9 2355; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483903,2147483903,2147483903,2147483903] 2356; SSE41-NEXT: movdqa %xmm1, %xmm0 2357; SSE41-NEXT: pcmpgtd %xmm10, %xmm0 2358; SSE41-NEXT: pand %xmm9, %xmm0 2359; SSE41-NEXT: movapd %xmm2, %xmm9 2360; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm9 2361; SSE41-NEXT: movdqa %xmm8, %xmm7 2362; SSE41-NEXT: pxor %xmm5, %xmm7 2363; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2] 2364; SSE41-NEXT: pcmpeqd %xmm4, %xmm7 2365; SSE41-NEXT: movdqa %xmm1, %xmm0 2366; SSE41-NEXT: pcmpgtd %xmm10, %xmm0 2367; SSE41-NEXT: pand %xmm7, %xmm0 2368; SSE41-NEXT: movapd %xmm2, %xmm7 2369; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm7 2370; SSE41-NEXT: packusdw %xmm9, %xmm7 2371; SSE41-NEXT: movdqa %xmm6, %xmm8 2372; SSE41-NEXT: pxor %xmm5, %xmm8 2373; SSE41-NEXT: pshufd {{.*#+}} xmm9 = xmm8[0,0,2,2] 2374; SSE41-NEXT: pcmpeqd %xmm4, %xmm8 2375; SSE41-NEXT: movdqa %xmm1, %xmm0 2376; SSE41-NEXT: pcmpgtd %xmm9, %xmm0 2377; SSE41-NEXT: pand %xmm8, %xmm0 2378; SSE41-NEXT: movapd %xmm2, %xmm8 2379; SSE41-NEXT: blendvpd %xmm0, %xmm6, %xmm8 2380; SSE41-NEXT: pxor %xmm3, %xmm5 2381; SSE41-NEXT: pcmpeqd %xmm5, %xmm4 2382; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2] 2383; SSE41-NEXT: pcmpgtd %xmm0, %xmm1 2384; SSE41-NEXT: pand %xmm4, %xmm1 2385; SSE41-NEXT: movdqa %xmm1, %xmm0 2386; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm2 2387; SSE41-NEXT: packusdw %xmm8, %xmm2 2388; SSE41-NEXT: packusdw %xmm2, %xmm7 2389; SSE41-NEXT: packuswb %xmm7, %xmm7 2390; SSE41-NEXT: movq %xmm7, (%rsi) 2391; SSE41-NEXT: retq 2392; 2393; AVX1-LABEL: trunc_usat_v8i64_v8i8_store: 2394; AVX1: # %bb.0: 2395; AVX1-NEXT: vmovdqa (%rdi), %xmm0 2396; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1 2397; AVX1-NEXT: vmovdqa 32(%rdi), %xmm2 2398; AVX1-NEXT: vmovdqa 48(%rdi), %xmm3 2399; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808] 2400; AVX1-NEXT: # xmm4 = mem[0,0] 2401; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm5 2402; AVX1-NEXT: vmovddup {{.*#+}} xmm6 = [9223372036854776063,9223372036854776063] 2403; AVX1-NEXT: # xmm6 = mem[0,0] 2404; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5 2405; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm7 2406; AVX1-NEXT: vpcmpgtq %xmm7, %xmm6, %xmm7 2407; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm8 2408; AVX1-NEXT: vpcmpgtq %xmm8, %xmm6, %xmm8 2409; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm4 2410; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4 2411; AVX1-NEXT: vmovddup {{.*#+}} xmm6 = [255,255] 2412; AVX1-NEXT: # xmm6 = mem[0,0] 2413; AVX1-NEXT: vblendvpd %xmm4, %xmm3, %xmm6, %xmm3 2414; AVX1-NEXT: vblendvpd %xmm8, %xmm2, %xmm6, %xmm2 2415; AVX1-NEXT: vpackusdw %xmm3, %xmm2, %xmm2 2416; AVX1-NEXT: vblendvpd %xmm7, %xmm1, %xmm6, %xmm1 2417; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm6, %xmm0 2418; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 2419; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 2420; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 2421; AVX1-NEXT: vmovq %xmm0, (%rsi) 2422; AVX1-NEXT: retq 2423; 2424; AVX2-LABEL: trunc_usat_v8i64_v8i8_store: 2425; AVX2: # %bb.0: 2426; AVX2-NEXT: vmovdqa (%rdi), %ymm0 2427; AVX2-NEXT: vmovdqa 32(%rdi), %ymm1 2428; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [255,255,255,255] 2429; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] 2430; AVX2-NEXT: vpxor %ymm3, %ymm1, %ymm4 2431; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm5 = [9223372036854776063,9223372036854776063,9223372036854776063,9223372036854776063] 2432; AVX2-NEXT: vpcmpgtq %ymm4, %ymm5, %ymm4 2433; AVX2-NEXT: vblendvpd %ymm4, %ymm1, %ymm2, %ymm1 2434; AVX2-NEXT: vpxor %ymm3, %ymm0, %ymm3 2435; AVX2-NEXT: vpcmpgtq %ymm3, %ymm5, %ymm3 2436; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0 2437; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 2438; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2439; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 2440; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 2441; AVX2-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 2442; AVX2-NEXT: vmovq %xmm0, (%rsi) 2443; AVX2-NEXT: vzeroupper 2444; AVX2-NEXT: retq 2445; 2446; AVX512-LABEL: trunc_usat_v8i64_v8i8_store: 2447; AVX512: # %bb.0: 2448; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 2449; AVX512-NEXT: vpmovusqb %zmm0, (%rsi) 2450; AVX512-NEXT: vzeroupper 2451; AVX512-NEXT: retq 2452; 2453; SKX-LABEL: trunc_usat_v8i64_v8i8_store: 2454; SKX: # %bb.0: 2455; SKX-NEXT: vmovdqa64 (%rdi), %zmm0 2456; SKX-NEXT: vpmovusqb %zmm0, (%rsi) 2457; SKX-NEXT: vzeroupper 2458; SKX-NEXT: retq 2459 %a0 = load <8 x i64>, ptr %p0 2460 %1 = icmp ult <8 x i64> %a0, <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255> 2461 %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255> 2462 %3 = trunc <8 x i64> %2 to <8 x i8> 2463 store <8 x i8> %3, ptr%p1 2464 ret void 2465} 2466 2467define <16 x i8> @trunc_usat_v16i64_v16i8(ptr %p0) { 2468; SSE2-SSSE3-LABEL: trunc_usat_v16i64_v16i8: 2469; SSE2-SSSE3: # %bb.0: 2470; SSE2-SSSE3-NEXT: movdqa 96(%rdi), %xmm1 2471; SSE2-SSSE3-NEXT: movdqa 112(%rdi), %xmm4 2472; SSE2-SSSE3-NEXT: movdqa 64(%rdi), %xmm6 2473; SSE2-SSSE3-NEXT: movdqa 80(%rdi), %xmm7 2474; SSE2-SSSE3-NEXT: movdqa (%rdi), %xmm10 2475; SSE2-SSSE3-NEXT: movdqa 16(%rdi), %xmm0 2476; SSE2-SSSE3-NEXT: movdqa 32(%rdi), %xmm8 2477; SSE2-SSSE3-NEXT: movdqa 48(%rdi), %xmm9 2478; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255] 2479; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456] 2480; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm11 2481; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm11 2482; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm12 = xmm11[0,0,2,2] 2483; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [2147483903,2147483903,2147483903,2147483903] 2484; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm13 2485; SSE2-SSSE3-NEXT: pcmpgtd %xmm12, %xmm13 2486; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm11 = xmm11[1,1,3,3] 2487; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm11 2488; SSE2-SSSE3-NEXT: pand %xmm13, %xmm11 2489; SSE2-SSSE3-NEXT: pand %xmm11, %xmm0 2490; SSE2-SSSE3-NEXT: pandn %xmm2, %xmm11 2491; SSE2-SSSE3-NEXT: por %xmm0, %xmm11 2492; SSE2-SSSE3-NEXT: movdqa %xmm10, %xmm0 2493; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm0 2494; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm12 = xmm0[0,0,2,2] 2495; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm13 2496; SSE2-SSSE3-NEXT: pcmpgtd %xmm12, %xmm13 2497; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 2498; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm0 2499; SSE2-SSSE3-NEXT: pand %xmm13, %xmm0 2500; SSE2-SSSE3-NEXT: pand %xmm0, %xmm10 2501; SSE2-SSSE3-NEXT: pandn %xmm2, %xmm0 2502; SSE2-SSSE3-NEXT: por %xmm10, %xmm0 2503; SSE2-SSSE3-NEXT: packuswb %xmm11, %xmm0 2504; SSE2-SSSE3-NEXT: movdqa %xmm9, %xmm10 2505; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm10 2506; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2] 2507; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm12 2508; SSE2-SSSE3-NEXT: pcmpgtd %xmm11, %xmm12 2509; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm10[1,1,3,3] 2510; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm10 2511; SSE2-SSSE3-NEXT: pand %xmm12, %xmm10 2512; SSE2-SSSE3-NEXT: pand %xmm10, %xmm9 2513; SSE2-SSSE3-NEXT: pandn %xmm2, %xmm10 2514; SSE2-SSSE3-NEXT: por %xmm9, %xmm10 2515; SSE2-SSSE3-NEXT: movdqa %xmm8, %xmm9 2516; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm9 2517; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm11 = xmm9[0,0,2,2] 2518; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm12 2519; SSE2-SSSE3-NEXT: pcmpgtd %xmm11, %xmm12 2520; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm9[1,1,3,3] 2521; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm9 2522; SSE2-SSSE3-NEXT: pand %xmm12, %xmm9 2523; SSE2-SSSE3-NEXT: pand %xmm9, %xmm8 2524; SSE2-SSSE3-NEXT: pandn %xmm2, %xmm9 2525; SSE2-SSSE3-NEXT: por %xmm8, %xmm9 2526; SSE2-SSSE3-NEXT: packuswb %xmm10, %xmm9 2527; SSE2-SSSE3-NEXT: packuswb %xmm9, %xmm0 2528; SSE2-SSSE3-NEXT: movdqa %xmm7, %xmm8 2529; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm8 2530; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm8[0,0,2,2] 2531; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm10 2532; SSE2-SSSE3-NEXT: pcmpgtd %xmm9, %xmm10 2533; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm8 = xmm8[1,1,3,3] 2534; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm8 2535; SSE2-SSSE3-NEXT: pand %xmm10, %xmm8 2536; SSE2-SSSE3-NEXT: pand %xmm8, %xmm7 2537; SSE2-SSSE3-NEXT: pandn %xmm2, %xmm8 2538; SSE2-SSSE3-NEXT: por %xmm7, %xmm8 2539; SSE2-SSSE3-NEXT: movdqa %xmm6, %xmm7 2540; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm7 2541; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm7[0,0,2,2] 2542; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm10 2543; SSE2-SSSE3-NEXT: pcmpgtd %xmm9, %xmm10 2544; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3] 2545; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm7 2546; SSE2-SSSE3-NEXT: pand %xmm10, %xmm7 2547; SSE2-SSSE3-NEXT: pand %xmm7, %xmm6 2548; SSE2-SSSE3-NEXT: pandn %xmm2, %xmm7 2549; SSE2-SSSE3-NEXT: por %xmm6, %xmm7 2550; SSE2-SSSE3-NEXT: packuswb %xmm8, %xmm7 2551; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm6 2552; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm6 2553; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2] 2554; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm9 2555; SSE2-SSSE3-NEXT: pcmpgtd %xmm8, %xmm9 2556; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 2557; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm6 2558; SSE2-SSSE3-NEXT: pand %xmm9, %xmm6 2559; SSE2-SSSE3-NEXT: pand %xmm6, %xmm4 2560; SSE2-SSSE3-NEXT: pandn %xmm2, %xmm6 2561; SSE2-SSSE3-NEXT: por %xmm4, %xmm6 2562; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm4 2563; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm4 2564; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm8 = xmm4[0,0,2,2] 2565; SSE2-SSSE3-NEXT: pcmpgtd %xmm8, %xmm5 2566; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 2567; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm4 2568; SSE2-SSSE3-NEXT: pand %xmm5, %xmm4 2569; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1 2570; SSE2-SSSE3-NEXT: pandn %xmm2, %xmm4 2571; SSE2-SSSE3-NEXT: por %xmm1, %xmm4 2572; SSE2-SSSE3-NEXT: packuswb %xmm6, %xmm4 2573; SSE2-SSSE3-NEXT: packuswb %xmm4, %xmm7 2574; SSE2-SSSE3-NEXT: packuswb %xmm7, %xmm0 2575; SSE2-SSSE3-NEXT: retq 2576; 2577; SSE41-LABEL: trunc_usat_v16i64_v16i8: 2578; SSE41: # %bb.0: 2579; SSE41-NEXT: movdqa 96(%rdi), %xmm3 2580; SSE41-NEXT: movdqa 112(%rdi), %xmm5 2581; SSE41-NEXT: movdqa 64(%rdi), %xmm8 2582; SSE41-NEXT: movdqa 80(%rdi), %xmm9 2583; SSE41-NEXT: movdqa (%rdi), %xmm12 2584; SSE41-NEXT: movdqa 16(%rdi), %xmm2 2585; SSE41-NEXT: movdqa 32(%rdi), %xmm10 2586; SSE41-NEXT: movdqa 48(%rdi), %xmm11 2587; SSE41-NEXT: movapd {{.*#+}} xmm4 = [255,255] 2588; SSE41-NEXT: movdqa {{.*#+}} xmm7 = [9223372039002259456,9223372039002259456] 2589; SSE41-NEXT: movdqa %xmm2, %xmm13 2590; SSE41-NEXT: pxor %xmm7, %xmm13 2591; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [9223372039002259711,9223372039002259711] 2592; SSE41-NEXT: pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2] 2593; SSE41-NEXT: pcmpeqd %xmm6, %xmm13 2594; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2147483903,2147483903,2147483903,2147483903] 2595; SSE41-NEXT: movdqa %xmm1, %xmm0 2596; SSE41-NEXT: pcmpgtd %xmm14, %xmm0 2597; SSE41-NEXT: pand %xmm13, %xmm0 2598; SSE41-NEXT: movapd %xmm4, %xmm13 2599; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm13 2600; SSE41-NEXT: movdqa %xmm12, %xmm2 2601; SSE41-NEXT: pxor %xmm7, %xmm2 2602; SSE41-NEXT: pshufd {{.*#+}} xmm14 = xmm2[0,0,2,2] 2603; SSE41-NEXT: pcmpeqd %xmm6, %xmm2 2604; SSE41-NEXT: movdqa %xmm1, %xmm0 2605; SSE41-NEXT: pcmpgtd %xmm14, %xmm0 2606; SSE41-NEXT: pand %xmm2, %xmm0 2607; SSE41-NEXT: movapd %xmm4, %xmm2 2608; SSE41-NEXT: blendvpd %xmm0, %xmm12, %xmm2 2609; SSE41-NEXT: packusdw %xmm13, %xmm2 2610; SSE41-NEXT: movdqa %xmm11, %xmm12 2611; SSE41-NEXT: pxor %xmm7, %xmm12 2612; SSE41-NEXT: pshufd {{.*#+}} xmm13 = xmm12[0,0,2,2] 2613; SSE41-NEXT: pcmpeqd %xmm6, %xmm12 2614; SSE41-NEXT: movdqa %xmm1, %xmm0 2615; SSE41-NEXT: pcmpgtd %xmm13, %xmm0 2616; SSE41-NEXT: pand %xmm12, %xmm0 2617; SSE41-NEXT: movapd %xmm4, %xmm12 2618; SSE41-NEXT: blendvpd %xmm0, %xmm11, %xmm12 2619; SSE41-NEXT: movdqa %xmm10, %xmm11 2620; SSE41-NEXT: pxor %xmm7, %xmm11 2621; SSE41-NEXT: pshufd {{.*#+}} xmm13 = xmm11[0,0,2,2] 2622; SSE41-NEXT: pcmpeqd %xmm6, %xmm11 2623; SSE41-NEXT: movdqa %xmm1, %xmm0 2624; SSE41-NEXT: pcmpgtd %xmm13, %xmm0 2625; SSE41-NEXT: pand %xmm11, %xmm0 2626; SSE41-NEXT: movapd %xmm4, %xmm11 2627; SSE41-NEXT: blendvpd %xmm0, %xmm10, %xmm11 2628; SSE41-NEXT: packusdw %xmm12, %xmm11 2629; SSE41-NEXT: packusdw %xmm11, %xmm2 2630; SSE41-NEXT: movdqa %xmm9, %xmm10 2631; SSE41-NEXT: pxor %xmm7, %xmm10 2632; SSE41-NEXT: pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2] 2633; SSE41-NEXT: pcmpeqd %xmm6, %xmm10 2634; SSE41-NEXT: movdqa %xmm1, %xmm0 2635; SSE41-NEXT: pcmpgtd %xmm11, %xmm0 2636; SSE41-NEXT: pand %xmm10, %xmm0 2637; SSE41-NEXT: movapd %xmm4, %xmm10 2638; SSE41-NEXT: blendvpd %xmm0, %xmm9, %xmm10 2639; SSE41-NEXT: movdqa %xmm8, %xmm9 2640; SSE41-NEXT: pxor %xmm7, %xmm9 2641; SSE41-NEXT: pshufd {{.*#+}} xmm11 = xmm9[0,0,2,2] 2642; SSE41-NEXT: pcmpeqd %xmm6, %xmm9 2643; SSE41-NEXT: movdqa %xmm1, %xmm0 2644; SSE41-NEXT: pcmpgtd %xmm11, %xmm0 2645; SSE41-NEXT: pand %xmm9, %xmm0 2646; SSE41-NEXT: movapd %xmm4, %xmm9 2647; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm9 2648; SSE41-NEXT: packusdw %xmm10, %xmm9 2649; SSE41-NEXT: movdqa %xmm5, %xmm8 2650; SSE41-NEXT: pxor %xmm7, %xmm8 2651; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm8[0,0,2,2] 2652; SSE41-NEXT: pcmpeqd %xmm6, %xmm8 2653; SSE41-NEXT: movdqa %xmm1, %xmm0 2654; SSE41-NEXT: pcmpgtd %xmm10, %xmm0 2655; SSE41-NEXT: pand %xmm8, %xmm0 2656; SSE41-NEXT: movapd %xmm4, %xmm8 2657; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm8 2658; SSE41-NEXT: pxor %xmm3, %xmm7 2659; SSE41-NEXT: pcmpeqd %xmm7, %xmm6 2660; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] 2661; SSE41-NEXT: pcmpgtd %xmm0, %xmm1 2662; SSE41-NEXT: pand %xmm6, %xmm1 2663; SSE41-NEXT: movdqa %xmm1, %xmm0 2664; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm4 2665; SSE41-NEXT: packusdw %xmm8, %xmm4 2666; SSE41-NEXT: packusdw %xmm4, %xmm9 2667; SSE41-NEXT: packuswb %xmm9, %xmm2 2668; SSE41-NEXT: movdqa %xmm2, %xmm0 2669; SSE41-NEXT: retq 2670; 2671; AVX1-LABEL: trunc_usat_v16i64_v16i8: 2672; AVX1: # %bb.0: 2673; AVX1-NEXT: vmovdqa (%rdi), %xmm0 2674; AVX1-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2675; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1 2676; AVX1-NEXT: vmovdqa 32(%rdi), %xmm2 2677; AVX1-NEXT: vmovdqa 48(%rdi), %xmm3 2678; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808] 2679; AVX1-NEXT: # xmm4 = mem[0,0] 2680; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm5 2681; AVX1-NEXT: vmovddup {{.*#+}} xmm6 = [9223372036854776063,9223372036854776063] 2682; AVX1-NEXT: # xmm6 = mem[0,0] 2683; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5 2684; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm7 2685; AVX1-NEXT: vpcmpgtq %xmm7, %xmm6, %xmm7 2686; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm8 2687; AVX1-NEXT: vpcmpgtq %xmm8, %xmm6, %xmm8 2688; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm9 2689; AVX1-NEXT: vpcmpgtq %xmm9, %xmm6, %xmm9 2690; AVX1-NEXT: vmovdqa 64(%rdi), %xmm10 2691; AVX1-NEXT: vpxor %xmm4, %xmm10, %xmm11 2692; AVX1-NEXT: vpcmpgtq %xmm11, %xmm6, %xmm11 2693; AVX1-NEXT: vmovdqa 80(%rdi), %xmm12 2694; AVX1-NEXT: vpxor %xmm4, %xmm12, %xmm13 2695; AVX1-NEXT: vpcmpgtq %xmm13, %xmm6, %xmm13 2696; AVX1-NEXT: vmovdqa 96(%rdi), %xmm14 2697; AVX1-NEXT: vpxor %xmm4, %xmm14, %xmm15 2698; AVX1-NEXT: vpcmpgtq %xmm15, %xmm6, %xmm15 2699; AVX1-NEXT: vmovdqa 112(%rdi), %xmm0 2700; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm4 2701; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4 2702; AVX1-NEXT: vmovddup {{.*#+}} xmm6 = [255,255] 2703; AVX1-NEXT: # xmm6 = mem[0,0] 2704; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm6, %xmm0 2705; AVX1-NEXT: vblendvpd %xmm15, %xmm14, %xmm6, %xmm4 2706; AVX1-NEXT: vblendvpd %xmm13, %xmm12, %xmm6, %xmm12 2707; AVX1-NEXT: vblendvpd %xmm11, %xmm10, %xmm6, %xmm10 2708; AVX1-NEXT: vblendvpd %xmm9, %xmm3, %xmm6, %xmm3 2709; AVX1-NEXT: vblendvpd %xmm8, %xmm2, %xmm6, %xmm2 2710; AVX1-NEXT: vblendvpd %xmm7, %xmm1, %xmm6, %xmm1 2711; AVX1-NEXT: vblendvpd %xmm5, {{[-0-9]+}}(%r{{[sb]}}p), %xmm6, %xmm5 # 16-byte Folded Reload 2712; AVX1-NEXT: vpackusdw %xmm0, %xmm4, %xmm0 2713; AVX1-NEXT: vpackusdw %xmm12, %xmm10, %xmm4 2714; AVX1-NEXT: vpackusdw %xmm0, %xmm4, %xmm0 2715; AVX1-NEXT: vpackusdw %xmm3, %xmm2, %xmm2 2716; AVX1-NEXT: vpackusdw %xmm1, %xmm5, %xmm1 2717; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 2718; AVX1-NEXT: vpackuswb %xmm0, %xmm1, %xmm0 2719; AVX1-NEXT: retq 2720; 2721; AVX2-LABEL: trunc_usat_v16i64_v16i8: 2722; AVX2: # %bb.0: 2723; AVX2-NEXT: vmovdqa (%rdi), %ymm0 2724; AVX2-NEXT: vmovdqa 32(%rdi), %ymm1 2725; AVX2-NEXT: vmovdqa 64(%rdi), %ymm2 2726; AVX2-NEXT: vmovdqa 96(%rdi), %ymm3 2727; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm4 = [255,255,255,255] 2728; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm5 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] 2729; AVX2-NEXT: vpxor %ymm5, %ymm1, %ymm6 2730; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm7 = [9223372036854776063,9223372036854776063,9223372036854776063,9223372036854776063] 2731; AVX2-NEXT: vpcmpgtq %ymm6, %ymm7, %ymm6 2732; AVX2-NEXT: vblendvpd %ymm6, %ymm1, %ymm4, %ymm1 2733; AVX2-NEXT: vpxor %ymm5, %ymm0, %ymm6 2734; AVX2-NEXT: vpcmpgtq %ymm6, %ymm7, %ymm6 2735; AVX2-NEXT: vblendvpd %ymm6, %ymm0, %ymm4, %ymm0 2736; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 2737; AVX2-NEXT: vpxor %ymm5, %ymm3, %ymm1 2738; AVX2-NEXT: vpcmpgtq %ymm1, %ymm7, %ymm1 2739; AVX2-NEXT: vblendvpd %ymm1, %ymm3, %ymm4, %ymm1 2740; AVX2-NEXT: vpxor %ymm5, %ymm2, %ymm3 2741; AVX2-NEXT: vpcmpgtq %ymm3, %ymm7, %ymm3 2742; AVX2-NEXT: vblendvpd %ymm3, %ymm2, %ymm4, %ymm2 2743; AVX2-NEXT: vpackusdw %ymm1, %ymm2, %ymm1 2744; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,1,3] 2745; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 2746; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 2747; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2748; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 2749; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 2750; AVX2-NEXT: vzeroupper 2751; AVX2-NEXT: retq 2752; 2753; AVX512-LABEL: trunc_usat_v16i64_v16i8: 2754; AVX512: # %bb.0: 2755; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 2756; AVX512-NEXT: vmovdqa64 64(%rdi), %zmm1 2757; AVX512-NEXT: vpmovusqb %zmm1, %xmm1 2758; AVX512-NEXT: vpmovusqb %zmm0, %xmm0 2759; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2760; AVX512-NEXT: vzeroupper 2761; AVX512-NEXT: retq 2762; 2763; SKX-LABEL: trunc_usat_v16i64_v16i8: 2764; SKX: # %bb.0: 2765; SKX-NEXT: vmovdqa64 (%rdi), %zmm0 2766; SKX-NEXT: vmovdqa64 64(%rdi), %zmm1 2767; SKX-NEXT: vpmovusqb %zmm1, %xmm1 2768; SKX-NEXT: vpmovusqb %zmm0, %xmm0 2769; SKX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2770; SKX-NEXT: vzeroupper 2771; SKX-NEXT: retq 2772 %a0 = load <16 x i64>, ptr %p0 2773 %1 = icmp ult <16 x i64> %a0, <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255> 2774 %2 = select <16 x i1> %1, <16 x i64> %a0, <16 x i64> <i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255, i64 255> 2775 %3 = trunc <16 x i64> %2 to <16 x i8> 2776 ret <16 x i8> %3 2777} 2778 2779define <4 x i8> @trunc_usat_v4i32_v4i8(<4 x i32> %a0) { 2780; SSE2-SSSE3-LABEL: trunc_usat_v4i32_v4i8: 2781; SSE2-SSSE3: # %bb.0: 2782; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] 2783; SSE2-SSSE3-NEXT: pxor %xmm0, %xmm1 2784; SSE2-SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 2785; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm2 2786; SSE2-SSSE3-NEXT: pandn %xmm0, %xmm2 2787; SSE2-SSSE3-NEXT: psrld $24, %xmm1 2788; SSE2-SSSE3-NEXT: por %xmm2, %xmm1 2789; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm1 2790; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm1 2791; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0 2792; SSE2-SSSE3-NEXT: retq 2793; 2794; SSE41-LABEL: trunc_usat_v4i32_v4i8: 2795; SSE41: # %bb.0: 2796; SSE41-NEXT: pminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2797; SSE41-NEXT: packusdw %xmm0, %xmm0 2798; SSE41-NEXT: packuswb %xmm0, %xmm0 2799; SSE41-NEXT: retq 2800; 2801; AVX1-LABEL: trunc_usat_v4i32_v4i8: 2802; AVX1: # %bb.0: 2803; AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2804; AVX1-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 2805; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 2806; AVX1-NEXT: retq 2807; 2808; AVX2-SLOW-LABEL: trunc_usat_v4i32_v4i8: 2809; AVX2-SLOW: # %bb.0: 2810; AVX2-SLOW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255] 2811; AVX2-SLOW-NEXT: vpminud %xmm1, %xmm0, %xmm0 2812; AVX2-SLOW-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 2813; AVX2-SLOW-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 2814; AVX2-SLOW-NEXT: retq 2815; 2816; AVX2-FAST-LABEL: trunc_usat_v4i32_v4i8: 2817; AVX2-FAST: # %bb.0: 2818; AVX2-FAST-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255] 2819; AVX2-FAST-NEXT: vpminud %xmm1, %xmm0, %xmm0 2820; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,0,4,8,12,0,4,8,12,0,4,8,12] 2821; AVX2-FAST-NEXT: retq 2822; 2823; AVX512F-LABEL: trunc_usat_v4i32_v4i8: 2824; AVX512F: # %bb.0: 2825; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2826; AVX512F-NEXT: vpmovusdb %zmm0, %xmm0 2827; AVX512F-NEXT: vzeroupper 2828; AVX512F-NEXT: retq 2829; 2830; AVX512VL-LABEL: trunc_usat_v4i32_v4i8: 2831; AVX512VL: # %bb.0: 2832; AVX512VL-NEXT: vpmovusdb %xmm0, %xmm0 2833; AVX512VL-NEXT: retq 2834; 2835; AVX512BW-LABEL: trunc_usat_v4i32_v4i8: 2836; AVX512BW: # %bb.0: 2837; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2838; AVX512BW-NEXT: vpmovusdb %zmm0, %xmm0 2839; AVX512BW-NEXT: vzeroupper 2840; AVX512BW-NEXT: retq 2841; 2842; AVX512BWVL-LABEL: trunc_usat_v4i32_v4i8: 2843; AVX512BWVL: # %bb.0: 2844; AVX512BWVL-NEXT: vpmovusdb %xmm0, %xmm0 2845; AVX512BWVL-NEXT: retq 2846; 2847; SKX-LABEL: trunc_usat_v4i32_v4i8: 2848; SKX: # %bb.0: 2849; SKX-NEXT: vpmovusdb %xmm0, %xmm0 2850; SKX-NEXT: retq 2851 %1 = icmp ult <4 x i32> %a0, <i32 255, i32 255, i32 255, i32 255> 2852 %2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> <i32 255, i32 255, i32 255, i32 255> 2853 %3 = trunc <4 x i32> %2 to <4 x i8> 2854 ret <4 x i8> %3 2855} 2856 2857define void @trunc_usat_v4i32_v4i8_store(<4 x i32> %a0, ptr%p1) { 2858; SSE2-SSSE3-LABEL: trunc_usat_v4i32_v4i8_store: 2859; SSE2-SSSE3: # %bb.0: 2860; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] 2861; SSE2-SSSE3-NEXT: pxor %xmm0, %xmm1 2862; SSE2-SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 2863; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm2 2864; SSE2-SSSE3-NEXT: pandn %xmm0, %xmm2 2865; SSE2-SSSE3-NEXT: psrld $24, %xmm1 2866; SSE2-SSSE3-NEXT: por %xmm2, %xmm1 2867; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm1 2868; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm1 2869; SSE2-SSSE3-NEXT: movd %xmm1, (%rdi) 2870; SSE2-SSSE3-NEXT: retq 2871; 2872; SSE41-LABEL: trunc_usat_v4i32_v4i8_store: 2873; SSE41: # %bb.0: 2874; SSE41-NEXT: pminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2875; SSE41-NEXT: packusdw %xmm0, %xmm0 2876; SSE41-NEXT: packuswb %xmm0, %xmm0 2877; SSE41-NEXT: movd %xmm0, (%rdi) 2878; SSE41-NEXT: retq 2879; 2880; AVX1-LABEL: trunc_usat_v4i32_v4i8_store: 2881; AVX1: # %bb.0: 2882; AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2883; AVX1-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 2884; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 2885; AVX1-NEXT: vmovd %xmm0, (%rdi) 2886; AVX1-NEXT: retq 2887; 2888; AVX2-SLOW-LABEL: trunc_usat_v4i32_v4i8_store: 2889; AVX2-SLOW: # %bb.0: 2890; AVX2-SLOW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255] 2891; AVX2-SLOW-NEXT: vpminud %xmm1, %xmm0, %xmm0 2892; AVX2-SLOW-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 2893; AVX2-SLOW-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 2894; AVX2-SLOW-NEXT: vmovd %xmm0, (%rdi) 2895; AVX2-SLOW-NEXT: retq 2896; 2897; AVX2-FAST-LABEL: trunc_usat_v4i32_v4i8_store: 2898; AVX2-FAST: # %bb.0: 2899; AVX2-FAST-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255] 2900; AVX2-FAST-NEXT: vpminud %xmm1, %xmm0, %xmm0 2901; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] 2902; AVX2-FAST-NEXT: vmovd %xmm0, (%rdi) 2903; AVX2-FAST-NEXT: retq 2904; 2905; AVX512F-LABEL: trunc_usat_v4i32_v4i8_store: 2906; AVX512F: # %bb.0: 2907; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2908; AVX512F-NEXT: vpmovusdb %zmm0, %xmm0 2909; AVX512F-NEXT: vmovd %xmm0, (%rdi) 2910; AVX512F-NEXT: vzeroupper 2911; AVX512F-NEXT: retq 2912; 2913; AVX512VL-LABEL: trunc_usat_v4i32_v4i8_store: 2914; AVX512VL: # %bb.0: 2915; AVX512VL-NEXT: vpmovusdb %xmm0, (%rdi) 2916; AVX512VL-NEXT: retq 2917; 2918; AVX512BW-LABEL: trunc_usat_v4i32_v4i8_store: 2919; AVX512BW: # %bb.0: 2920; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2921; AVX512BW-NEXT: vpmovusdb %zmm0, %xmm0 2922; AVX512BW-NEXT: vmovd %xmm0, (%rdi) 2923; AVX512BW-NEXT: vzeroupper 2924; AVX512BW-NEXT: retq 2925; 2926; AVX512BWVL-LABEL: trunc_usat_v4i32_v4i8_store: 2927; AVX512BWVL: # %bb.0: 2928; AVX512BWVL-NEXT: vpmovusdb %xmm0, (%rdi) 2929; AVX512BWVL-NEXT: retq 2930; 2931; SKX-LABEL: trunc_usat_v4i32_v4i8_store: 2932; SKX: # %bb.0: 2933; SKX-NEXT: vpmovusdb %xmm0, (%rdi) 2934; SKX-NEXT: retq 2935 %1 = icmp ult <4 x i32> %a0, <i32 255, i32 255, i32 255, i32 255> 2936 %2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> <i32 255, i32 255, i32 255, i32 255> 2937 %3 = trunc <4 x i32> %2 to <4 x i8> 2938 store <4 x i8> %3, ptr%p1 2939 ret void 2940} 2941 2942define <8 x i8> @trunc_usat_v8i32_v8i8(<8 x i32> %a0) { 2943; SSE2-SSSE3-LABEL: trunc_usat_v8i32_v8i8: 2944; SSE2-SSSE3: # %bb.0: 2945; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255] 2946; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648] 2947; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm4 2948; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm4 2949; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [2147483903,2147483903,2147483903,2147483903] 2950; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm6 2951; SSE2-SSSE3-NEXT: pcmpgtd %xmm4, %xmm6 2952; SSE2-SSSE3-NEXT: pand %xmm6, %xmm1 2953; SSE2-SSSE3-NEXT: pandn %xmm2, %xmm6 2954; SSE2-SSSE3-NEXT: por %xmm1, %xmm6 2955; SSE2-SSSE3-NEXT: pxor %xmm0, %xmm3 2956; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm5 2957; SSE2-SSSE3-NEXT: pand %xmm5, %xmm0 2958; SSE2-SSSE3-NEXT: pandn %xmm2, %xmm5 2959; SSE2-SSSE3-NEXT: por %xmm5, %xmm0 2960; SSE2-SSSE3-NEXT: packuswb %xmm6, %xmm0 2961; SSE2-SSSE3-NEXT: packuswb %xmm0, %xmm0 2962; SSE2-SSSE3-NEXT: retq 2963; 2964; SSE41-LABEL: trunc_usat_v8i32_v8i8: 2965; SSE41: # %bb.0: 2966; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = [255,255,255,255] 2967; SSE41-NEXT: pminud %xmm2, %xmm1 2968; SSE41-NEXT: pminud %xmm2, %xmm0 2969; SSE41-NEXT: packusdw %xmm1, %xmm0 2970; SSE41-NEXT: packuswb %xmm0, %xmm0 2971; SSE41-NEXT: retq 2972; 2973; AVX1-LABEL: trunc_usat_v8i32_v8i8: 2974; AVX1: # %bb.0: 2975; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2976; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [255,255,255,255] 2977; AVX1-NEXT: vpminud %xmm2, %xmm1, %xmm1 2978; AVX1-NEXT: vpminud %xmm2, %xmm0, %xmm0 2979; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 2980; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 2981; AVX1-NEXT: vzeroupper 2982; AVX1-NEXT: retq 2983; 2984; AVX2-LABEL: trunc_usat_v8i32_v8i8: 2985; AVX2: # %bb.0: 2986; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255] 2987; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0 2988; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2989; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 2990; AVX2-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 2991; AVX2-NEXT: vzeroupper 2992; AVX2-NEXT: retq 2993; 2994; AVX512F-LABEL: trunc_usat_v8i32_v8i8: 2995; AVX512F: # %bb.0: 2996; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2997; AVX512F-NEXT: vpmovusdb %zmm0, %xmm0 2998; AVX512F-NEXT: vzeroupper 2999; AVX512F-NEXT: retq 3000; 3001; AVX512VL-LABEL: trunc_usat_v8i32_v8i8: 3002; AVX512VL: # %bb.0: 3003; AVX512VL-NEXT: vpmovusdb %ymm0, %xmm0 3004; AVX512VL-NEXT: vzeroupper 3005; AVX512VL-NEXT: retq 3006; 3007; AVX512BW-LABEL: trunc_usat_v8i32_v8i8: 3008; AVX512BW: # %bb.0: 3009; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 3010; AVX512BW-NEXT: vpmovusdb %zmm0, %xmm0 3011; AVX512BW-NEXT: vzeroupper 3012; AVX512BW-NEXT: retq 3013; 3014; AVX512BWVL-LABEL: trunc_usat_v8i32_v8i8: 3015; AVX512BWVL: # %bb.0: 3016; AVX512BWVL-NEXT: vpmovusdb %ymm0, %xmm0 3017; AVX512BWVL-NEXT: vzeroupper 3018; AVX512BWVL-NEXT: retq 3019; 3020; SKX-LABEL: trunc_usat_v8i32_v8i8: 3021; SKX: # %bb.0: 3022; SKX-NEXT: vpmovusdb %ymm0, %xmm0 3023; SKX-NEXT: vzeroupper 3024; SKX-NEXT: retq 3025 %1 = icmp ult <8 x i32> %a0, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255> 3026 %2 = select <8 x i1> %1, <8 x i32> %a0, <8 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255> 3027 %3 = trunc <8 x i32> %2 to <8 x i8> 3028 ret <8 x i8> %3 3029} 3030 3031define void @trunc_usat_v8i32_v8i8_store(<8 x i32> %a0, ptr%p1) { 3032; SSE2-SSSE3-LABEL: trunc_usat_v8i32_v8i8_store: 3033; SSE2-SSSE3: # %bb.0: 3034; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255] 3035; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648] 3036; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm4 3037; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm4 3038; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [2147483903,2147483903,2147483903,2147483903] 3039; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm6 3040; SSE2-SSSE3-NEXT: pcmpgtd %xmm4, %xmm6 3041; SSE2-SSSE3-NEXT: pand %xmm6, %xmm1 3042; SSE2-SSSE3-NEXT: pandn %xmm2, %xmm6 3043; SSE2-SSSE3-NEXT: por %xmm1, %xmm6 3044; SSE2-SSSE3-NEXT: pxor %xmm0, %xmm3 3045; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm5 3046; SSE2-SSSE3-NEXT: pand %xmm5, %xmm0 3047; SSE2-SSSE3-NEXT: pandn %xmm2, %xmm5 3048; SSE2-SSSE3-NEXT: por %xmm0, %xmm5 3049; SSE2-SSSE3-NEXT: packuswb %xmm6, %xmm5 3050; SSE2-SSSE3-NEXT: packuswb %xmm5, %xmm5 3051; SSE2-SSSE3-NEXT: movq %xmm5, (%rdi) 3052; SSE2-SSSE3-NEXT: retq 3053; 3054; SSE41-LABEL: trunc_usat_v8i32_v8i8_store: 3055; SSE41: # %bb.0: 3056; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = [255,255,255,255] 3057; SSE41-NEXT: pminud %xmm2, %xmm1 3058; SSE41-NEXT: pminud %xmm2, %xmm0 3059; SSE41-NEXT: packusdw %xmm1, %xmm0 3060; SSE41-NEXT: packuswb %xmm0, %xmm0 3061; SSE41-NEXT: movq %xmm0, (%rdi) 3062; SSE41-NEXT: retq 3063; 3064; AVX1-LABEL: trunc_usat_v8i32_v8i8_store: 3065; AVX1: # %bb.0: 3066; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 3067; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [255,255,255,255] 3068; AVX1-NEXT: vpminud %xmm2, %xmm1, %xmm1 3069; AVX1-NEXT: vpminud %xmm2, %xmm0, %xmm0 3070; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 3071; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 3072; AVX1-NEXT: vmovq %xmm0, (%rdi) 3073; AVX1-NEXT: vzeroupper 3074; AVX1-NEXT: retq 3075; 3076; AVX2-LABEL: trunc_usat_v8i32_v8i8_store: 3077; AVX2: # %bb.0: 3078; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255] 3079; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0 3080; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 3081; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 3082; AVX2-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 3083; AVX2-NEXT: vmovq %xmm0, (%rdi) 3084; AVX2-NEXT: vzeroupper 3085; AVX2-NEXT: retq 3086; 3087; AVX512F-LABEL: trunc_usat_v8i32_v8i8_store: 3088; AVX512F: # %bb.0: 3089; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 3090; AVX512F-NEXT: vpmovusdb %zmm0, %xmm0 3091; AVX512F-NEXT: vmovq %xmm0, (%rdi) 3092; AVX512F-NEXT: vzeroupper 3093; AVX512F-NEXT: retq 3094; 3095; AVX512VL-LABEL: trunc_usat_v8i32_v8i8_store: 3096; AVX512VL: # %bb.0: 3097; AVX512VL-NEXT: vpmovusdb %ymm0, (%rdi) 3098; AVX512VL-NEXT: vzeroupper 3099; AVX512VL-NEXT: retq 3100; 3101; AVX512BW-LABEL: trunc_usat_v8i32_v8i8_store: 3102; AVX512BW: # %bb.0: 3103; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 3104; AVX512BW-NEXT: vpmovusdb %zmm0, %xmm0 3105; AVX512BW-NEXT: vmovq %xmm0, (%rdi) 3106; AVX512BW-NEXT: vzeroupper 3107; AVX512BW-NEXT: retq 3108; 3109; AVX512BWVL-LABEL: trunc_usat_v8i32_v8i8_store: 3110; AVX512BWVL: # %bb.0: 3111; AVX512BWVL-NEXT: vpmovusdb %ymm0, (%rdi) 3112; AVX512BWVL-NEXT: vzeroupper 3113; AVX512BWVL-NEXT: retq 3114; 3115; SKX-LABEL: trunc_usat_v8i32_v8i8_store: 3116; SKX: # %bb.0: 3117; SKX-NEXT: vpmovusdb %ymm0, (%rdi) 3118; SKX-NEXT: vzeroupper 3119; SKX-NEXT: retq 3120 %1 = icmp ult <8 x i32> %a0, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255> 3121 %2 = select <8 x i1> %1, <8 x i32> %a0, <8 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255> 3122 %3 = trunc <8 x i32> %2 to <8 x i8> 3123 store <8 x i8> %3, ptr%p1 3124 ret void 3125} 3126 3127define <16 x i8> @trunc_usat_v16i32_v16i8(ptr %p0) { 3128; SSE2-SSSE3-LABEL: trunc_usat_v16i32_v16i8: 3129; SSE2-SSSE3: # %bb.0: 3130; SSE2-SSSE3-NEXT: movdqa (%rdi), %xmm6 3131; SSE2-SSSE3-NEXT: movdqa 16(%rdi), %xmm0 3132; SSE2-SSSE3-NEXT: movdqa 32(%rdi), %xmm1 3133; SSE2-SSSE3-NEXT: movdqa 48(%rdi), %xmm5 3134; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255] 3135; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] 3136; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm7 3137; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm7 3138; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483903,2147483903,2147483903,2147483903] 3139; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm8 3140; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm8 3141; SSE2-SSSE3-NEXT: pand %xmm8, %xmm0 3142; SSE2-SSSE3-NEXT: pandn %xmm3, %xmm8 3143; SSE2-SSSE3-NEXT: por %xmm0, %xmm8 3144; SSE2-SSSE3-NEXT: movdqa %xmm6, %xmm7 3145; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm7 3146; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm0 3147; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm0 3148; SSE2-SSSE3-NEXT: pand %xmm0, %xmm6 3149; SSE2-SSSE3-NEXT: pandn %xmm3, %xmm0 3150; SSE2-SSSE3-NEXT: por %xmm6, %xmm0 3151; SSE2-SSSE3-NEXT: packuswb %xmm8, %xmm0 3152; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm6 3153; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm6 3154; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm7 3155; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm7 3156; SSE2-SSSE3-NEXT: pand %xmm7, %xmm5 3157; SSE2-SSSE3-NEXT: pandn %xmm3, %xmm7 3158; SSE2-SSSE3-NEXT: por %xmm5, %xmm7 3159; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm4 3160; SSE2-SSSE3-NEXT: pcmpgtd %xmm4, %xmm2 3161; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 3162; SSE2-SSSE3-NEXT: pandn %xmm3, %xmm2 3163; SSE2-SSSE3-NEXT: por %xmm1, %xmm2 3164; SSE2-SSSE3-NEXT: packuswb %xmm7, %xmm2 3165; SSE2-SSSE3-NEXT: packuswb %xmm2, %xmm0 3166; SSE2-SSSE3-NEXT: retq 3167; 3168; SSE41-LABEL: trunc_usat_v16i32_v16i8: 3169; SSE41: # %bb.0: 3170; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = [255,255,255,255] 3171; SSE41-NEXT: movdqa 16(%rdi), %xmm2 3172; SSE41-NEXT: pminud %xmm1, %xmm2 3173; SSE41-NEXT: movdqa (%rdi), %xmm0 3174; SSE41-NEXT: pminud %xmm1, %xmm0 3175; SSE41-NEXT: packusdw %xmm2, %xmm0 3176; SSE41-NEXT: movdqa 48(%rdi), %xmm2 3177; SSE41-NEXT: pminud %xmm1, %xmm2 3178; SSE41-NEXT: pminud 32(%rdi), %xmm1 3179; SSE41-NEXT: packusdw %xmm2, %xmm1 3180; SSE41-NEXT: packuswb %xmm1, %xmm0 3181; SSE41-NEXT: retq 3182; 3183; AVX1-LABEL: trunc_usat_v16i32_v16i8: 3184; AVX1: # %bb.0: 3185; AVX1-NEXT: vbroadcastss {{.*#+}} xmm0 = [255,255,255,255] 3186; AVX1-NEXT: vpminud 16(%rdi), %xmm0, %xmm1 3187; AVX1-NEXT: vpminud (%rdi), %xmm0, %xmm2 3188; AVX1-NEXT: vpackusdw %xmm1, %xmm2, %xmm1 3189; AVX1-NEXT: vpminud 48(%rdi), %xmm0, %xmm2 3190; AVX1-NEXT: vpminud 32(%rdi), %xmm0, %xmm0 3191; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 3192; AVX1-NEXT: vpackuswb %xmm0, %xmm1, %xmm0 3193; AVX1-NEXT: retq 3194; 3195; AVX2-LABEL: trunc_usat_v16i32_v16i8: 3196; AVX2: # %bb.0: 3197; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255] 3198; AVX2-NEXT: vpminud 32(%rdi), %ymm0, %ymm1 3199; AVX2-NEXT: vpminud (%rdi), %ymm0, %ymm0 3200; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 3201; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 3202; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 3203; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 3204; AVX2-NEXT: vzeroupper 3205; AVX2-NEXT: retq 3206; 3207; AVX512-LABEL: trunc_usat_v16i32_v16i8: 3208; AVX512: # %bb.0: 3209; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 3210; AVX512-NEXT: vpmovusdb %zmm0, %xmm0 3211; AVX512-NEXT: vzeroupper 3212; AVX512-NEXT: retq 3213; 3214; SKX-LABEL: trunc_usat_v16i32_v16i8: 3215; SKX: # %bb.0: 3216; SKX-NEXT: vmovdqa64 (%rdi), %zmm0 3217; SKX-NEXT: vpmovusdb %zmm0, %xmm0 3218; SKX-NEXT: vzeroupper 3219; SKX-NEXT: retq 3220 %a0 = load <16 x i32>, ptr %p0 3221 %1 = icmp ult <16 x i32> %a0, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255> 3222 %2 = select <16 x i1> %1, <16 x i32> %a0, <16 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255> 3223 %3 = trunc <16 x i32> %2 to <16 x i8> 3224 ret <16 x i8> %3 3225} 3226 3227define void @trunc_usat_v16i32_v16i8_store(ptr %p0, ptr %p1) { 3228; SSE2-SSSE3-LABEL: trunc_usat_v16i32_v16i8_store: 3229; SSE2-SSSE3: # %bb.0: 3230; SSE2-SSSE3-NEXT: movdqa (%rdi), %xmm6 3231; SSE2-SSSE3-NEXT: movdqa 16(%rdi), %xmm5 3232; SSE2-SSSE3-NEXT: movdqa 32(%rdi), %xmm0 3233; SSE2-SSSE3-NEXT: movdqa 48(%rdi), %xmm4 3234; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255] 3235; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648] 3236; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm7 3237; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm7 3238; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483903,2147483903,2147483903,2147483903] 3239; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm8 3240; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm8 3241; SSE2-SSSE3-NEXT: pand %xmm8, %xmm5 3242; SSE2-SSSE3-NEXT: pandn %xmm2, %xmm8 3243; SSE2-SSSE3-NEXT: por %xmm5, %xmm8 3244; SSE2-SSSE3-NEXT: movdqa %xmm6, %xmm7 3245; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm7 3246; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm5 3247; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm5 3248; SSE2-SSSE3-NEXT: pand %xmm5, %xmm6 3249; SSE2-SSSE3-NEXT: pandn %xmm2, %xmm5 3250; SSE2-SSSE3-NEXT: por %xmm6, %xmm5 3251; SSE2-SSSE3-NEXT: packuswb %xmm8, %xmm5 3252; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm6 3253; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm6 3254; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm7 3255; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm7 3256; SSE2-SSSE3-NEXT: pand %xmm7, %xmm4 3257; SSE2-SSSE3-NEXT: pandn %xmm2, %xmm7 3258; SSE2-SSSE3-NEXT: por %xmm4, %xmm7 3259; SSE2-SSSE3-NEXT: pxor %xmm0, %xmm3 3260; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm1 3261; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 3262; SSE2-SSSE3-NEXT: pandn %xmm2, %xmm1 3263; SSE2-SSSE3-NEXT: por %xmm0, %xmm1 3264; SSE2-SSSE3-NEXT: packuswb %xmm7, %xmm1 3265; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm5 3266; SSE2-SSSE3-NEXT: movdqa %xmm5, (%rsi) 3267; SSE2-SSSE3-NEXT: retq 3268; 3269; SSE41-LABEL: trunc_usat_v16i32_v16i8_store: 3270; SSE41: # %bb.0: 3271; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = [255,255,255,255] 3272; SSE41-NEXT: movdqa 16(%rdi), %xmm1 3273; SSE41-NEXT: pminud %xmm0, %xmm1 3274; SSE41-NEXT: movdqa (%rdi), %xmm2 3275; SSE41-NEXT: pminud %xmm0, %xmm2 3276; SSE41-NEXT: packusdw %xmm1, %xmm2 3277; SSE41-NEXT: movdqa 48(%rdi), %xmm1 3278; SSE41-NEXT: pminud %xmm0, %xmm1 3279; SSE41-NEXT: pminud 32(%rdi), %xmm0 3280; SSE41-NEXT: packusdw %xmm1, %xmm0 3281; SSE41-NEXT: packuswb %xmm0, %xmm2 3282; SSE41-NEXT: movdqa %xmm2, (%rsi) 3283; SSE41-NEXT: retq 3284; 3285; AVX1-LABEL: trunc_usat_v16i32_v16i8_store: 3286; AVX1: # %bb.0: 3287; AVX1-NEXT: vbroadcastss {{.*#+}} xmm0 = [255,255,255,255] 3288; AVX1-NEXT: vpminud 16(%rdi), %xmm0, %xmm1 3289; AVX1-NEXT: vpminud (%rdi), %xmm0, %xmm2 3290; AVX1-NEXT: vpackusdw %xmm1, %xmm2, %xmm1 3291; AVX1-NEXT: vpminud 48(%rdi), %xmm0, %xmm2 3292; AVX1-NEXT: vpminud 32(%rdi), %xmm0, %xmm0 3293; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 3294; AVX1-NEXT: vpackuswb %xmm0, %xmm1, %xmm0 3295; AVX1-NEXT: vmovdqa %xmm0, (%rsi) 3296; AVX1-NEXT: retq 3297; 3298; AVX2-LABEL: trunc_usat_v16i32_v16i8_store: 3299; AVX2: # %bb.0: 3300; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255] 3301; AVX2-NEXT: vpminud 32(%rdi), %ymm0, %ymm1 3302; AVX2-NEXT: vpminud (%rdi), %ymm0, %ymm0 3303; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 3304; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 3305; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 3306; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 3307; AVX2-NEXT: vmovdqa %xmm0, (%rsi) 3308; AVX2-NEXT: vzeroupper 3309; AVX2-NEXT: retq 3310; 3311; AVX512-LABEL: trunc_usat_v16i32_v16i8_store: 3312; AVX512: # %bb.0: 3313; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 3314; AVX512-NEXT: vpmovusdb %zmm0, (%rsi) 3315; AVX512-NEXT: vzeroupper 3316; AVX512-NEXT: retq 3317; 3318; SKX-LABEL: trunc_usat_v16i32_v16i8_store: 3319; SKX: # %bb.0: 3320; SKX-NEXT: vmovdqa64 (%rdi), %zmm0 3321; SKX-NEXT: vpmovusdb %zmm0, (%rsi) 3322; SKX-NEXT: vzeroupper 3323; SKX-NEXT: retq 3324 %a0 = load <16 x i32>, ptr %p0 3325 %1 = icmp ult <16 x i32> %a0, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255> 3326 %2 = select <16 x i1> %1, <16 x i32> %a0, <16 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255> 3327 %3 = trunc <16 x i32> %2 to <16 x i8> 3328 store <16 x i8> %3, ptr %p1 3329 ret void 3330} 3331 3332define <8 x i8> @trunc_usat_v8i16_v8i8(<8 x i16> %a0) { 3333; SSE2-SSSE3-LABEL: trunc_usat_v8i16_v8i8: 3334; SSE2-SSSE3: # %bb.0: 3335; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm1 3336; SSE2-SSSE3-NEXT: psubusw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 3337; SSE2-SSSE3-NEXT: psubw %xmm1, %xmm0 3338; SSE2-SSSE3-NEXT: packuswb %xmm0, %xmm0 3339; SSE2-SSSE3-NEXT: retq 3340; 3341; SSE41-LABEL: trunc_usat_v8i16_v8i8: 3342; SSE41: # %bb.0: 3343; SSE41-NEXT: pminuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 3344; SSE41-NEXT: packuswb %xmm0, %xmm0 3345; SSE41-NEXT: retq 3346; 3347; AVX-LABEL: trunc_usat_v8i16_v8i8: 3348; AVX: # %bb.0: 3349; AVX-NEXT: vpminuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 3350; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 3351; AVX-NEXT: retq 3352; 3353; AVX512F-LABEL: trunc_usat_v8i16_v8i8: 3354; AVX512F: # %bb.0: 3355; AVX512F-NEXT: vpminuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 3356; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 3357; AVX512F-NEXT: retq 3358; 3359; AVX512VL-LABEL: trunc_usat_v8i16_v8i8: 3360; AVX512VL: # %bb.0: 3361; AVX512VL-NEXT: vpminuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 3362; AVX512VL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 3363; AVX512VL-NEXT: retq 3364; 3365; AVX512BW-LABEL: trunc_usat_v8i16_v8i8: 3366; AVX512BW: # %bb.0: 3367; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3368; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm0 3369; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 3370; AVX512BW-NEXT: vzeroupper 3371; AVX512BW-NEXT: retq 3372; 3373; AVX512BWVL-LABEL: trunc_usat_v8i16_v8i8: 3374; AVX512BWVL: # %bb.0: 3375; AVX512BWVL-NEXT: vpmovuswb %xmm0, %xmm0 3376; AVX512BWVL-NEXT: retq 3377; 3378; SKX-LABEL: trunc_usat_v8i16_v8i8: 3379; SKX: # %bb.0: 3380; SKX-NEXT: vpmovuswb %xmm0, %xmm0 3381; SKX-NEXT: retq 3382 %1 = icmp ult <8 x i16> %a0, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255> 3383 %2 = select <8 x i1> %1, <8 x i16> %a0, <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255> 3384 %3 = trunc <8 x i16> %2 to <8 x i8> 3385 ret <8 x i8> %3 3386} 3387 3388define void @trunc_usat_v8i16_v8i8_store(<8 x i16> %a0, ptr%p1) { 3389; SSE2-SSSE3-LABEL: trunc_usat_v8i16_v8i8_store: 3390; SSE2-SSSE3: # %bb.0: 3391; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm1 3392; SSE2-SSSE3-NEXT: psubusw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 3393; SSE2-SSSE3-NEXT: psubw %xmm1, %xmm0 3394; SSE2-SSSE3-NEXT: packuswb %xmm0, %xmm0 3395; SSE2-SSSE3-NEXT: movq %xmm0, (%rdi) 3396; SSE2-SSSE3-NEXT: retq 3397; 3398; SSE41-LABEL: trunc_usat_v8i16_v8i8_store: 3399; SSE41: # %bb.0: 3400; SSE41-NEXT: pminuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 3401; SSE41-NEXT: packuswb %xmm0, %xmm0 3402; SSE41-NEXT: movq %xmm0, (%rdi) 3403; SSE41-NEXT: retq 3404; 3405; AVX-LABEL: trunc_usat_v8i16_v8i8_store: 3406; AVX: # %bb.0: 3407; AVX-NEXT: vpminuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 3408; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 3409; AVX-NEXT: vmovq %xmm0, (%rdi) 3410; AVX-NEXT: retq 3411; 3412; AVX512F-LABEL: trunc_usat_v8i16_v8i8_store: 3413; AVX512F: # %bb.0: 3414; AVX512F-NEXT: vpminuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 3415; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 3416; AVX512F-NEXT: vmovq %xmm0, (%rdi) 3417; AVX512F-NEXT: retq 3418; 3419; AVX512VL-LABEL: trunc_usat_v8i16_v8i8_store: 3420; AVX512VL: # %bb.0: 3421; AVX512VL-NEXT: vpminuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 3422; AVX512VL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 3423; AVX512VL-NEXT: vmovq %xmm0, (%rdi) 3424; AVX512VL-NEXT: retq 3425; 3426; AVX512BW-LABEL: trunc_usat_v8i16_v8i8_store: 3427; AVX512BW: # %bb.0: 3428; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3429; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm0 3430; AVX512BW-NEXT: vmovq %xmm0, (%rdi) 3431; AVX512BW-NEXT: vzeroupper 3432; AVX512BW-NEXT: retq 3433; 3434; AVX512BWVL-LABEL: trunc_usat_v8i16_v8i8_store: 3435; AVX512BWVL: # %bb.0: 3436; AVX512BWVL-NEXT: vpmovuswb %xmm0, (%rdi) 3437; AVX512BWVL-NEXT: retq 3438; 3439; SKX-LABEL: trunc_usat_v8i16_v8i8_store: 3440; SKX: # %bb.0: 3441; SKX-NEXT: vpmovuswb %xmm0, (%rdi) 3442; SKX-NEXT: retq 3443 %1 = icmp ult <8 x i16> %a0, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255> 3444 %2 = select <8 x i1> %1, <8 x i16> %a0, <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255> 3445 %3 = trunc <8 x i16> %2 to <8 x i8> 3446 store <8 x i8> %3, ptr%p1 3447 ret void 3448} 3449 3450define <16 x i8> @trunc_usat_v16i16_v16i8(<16 x i16> %a0) { 3451; SSE2-SSSE3-LABEL: trunc_usat_v16i16_v16i8: 3452; SSE2-SSSE3: # %bb.0: 3453; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 3454; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm3 3455; SSE2-SSSE3-NEXT: psubusw %xmm2, %xmm3 3456; SSE2-SSSE3-NEXT: psubw %xmm3, %xmm1 3457; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm3 3458; SSE2-SSSE3-NEXT: psubusw %xmm2, %xmm3 3459; SSE2-SSSE3-NEXT: psubw %xmm3, %xmm0 3460; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0 3461; SSE2-SSSE3-NEXT: retq 3462; 3463; SSE41-LABEL: trunc_usat_v16i16_v16i8: 3464; SSE41: # %bb.0: 3465; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 3466; SSE41-NEXT: pminuw %xmm2, %xmm1 3467; SSE41-NEXT: pminuw %xmm2, %xmm0 3468; SSE41-NEXT: packuswb %xmm1, %xmm0 3469; SSE41-NEXT: retq 3470; 3471; AVX1-LABEL: trunc_usat_v16i16_v16i8: 3472; AVX1: # %bb.0: 3473; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 3474; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 3475; AVX1-NEXT: vpminuw %xmm2, %xmm1, %xmm1 3476; AVX1-NEXT: vpminuw %xmm2, %xmm0, %xmm0 3477; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 3478; AVX1-NEXT: vzeroupper 3479; AVX1-NEXT: retq 3480; 3481; AVX2-LABEL: trunc_usat_v16i16_v16i8: 3482; AVX2: # %bb.0: 3483; AVX2-NEXT: vpminuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 3484; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 3485; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 3486; AVX2-NEXT: vzeroupper 3487; AVX2-NEXT: retq 3488; 3489; AVX512F-LABEL: trunc_usat_v16i16_v16i8: 3490; AVX512F: # %bb.0: 3491; AVX512F-NEXT: vpminuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 3492; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 3493; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 3494; AVX512F-NEXT: vzeroupper 3495; AVX512F-NEXT: retq 3496; 3497; AVX512VL-LABEL: trunc_usat_v16i16_v16i8: 3498; AVX512VL: # %bb.0: 3499; AVX512VL-NEXT: vpminuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 3500; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 3501; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 3502; AVX512VL-NEXT: vzeroupper 3503; AVX512VL-NEXT: retq 3504; 3505; AVX512BW-LABEL: trunc_usat_v16i16_v16i8: 3506; AVX512BW: # %bb.0: 3507; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 3508; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm0 3509; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 3510; AVX512BW-NEXT: vzeroupper 3511; AVX512BW-NEXT: retq 3512; 3513; AVX512BWVL-LABEL: trunc_usat_v16i16_v16i8: 3514; AVX512BWVL: # %bb.0: 3515; AVX512BWVL-NEXT: vpmovuswb %ymm0, %xmm0 3516; AVX512BWVL-NEXT: vzeroupper 3517; AVX512BWVL-NEXT: retq 3518; 3519; SKX-LABEL: trunc_usat_v16i16_v16i8: 3520; SKX: # %bb.0: 3521; SKX-NEXT: vpmovuswb %ymm0, %xmm0 3522; SKX-NEXT: vzeroupper 3523; SKX-NEXT: retq 3524 %1 = icmp ult <16 x i16> %a0, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255> 3525 %2 = select <16 x i1> %1, <16 x i16> %a0, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255> 3526 %3 = trunc <16 x i16> %2 to <16 x i8> 3527 ret <16 x i8> %3 3528} 3529 3530define <32 x i8> @trunc_usat_v32i16_v32i8(ptr %p0) { 3531; SSE2-SSSE3-LABEL: trunc_usat_v32i16_v32i8: 3532; SSE2-SSSE3: # %bb.0: 3533; SSE2-SSSE3-NEXT: movdqa (%rdi), %xmm0 3534; SSE2-SSSE3-NEXT: movdqa 16(%rdi), %xmm2 3535; SSE2-SSSE3-NEXT: movdqa 32(%rdi), %xmm1 3536; SSE2-SSSE3-NEXT: movdqa 48(%rdi), %xmm3 3537; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] 3538; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm5 3539; SSE2-SSSE3-NEXT: psubusw %xmm4, %xmm5 3540; SSE2-SSSE3-NEXT: psubw %xmm5, %xmm3 3541; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm5 3542; SSE2-SSSE3-NEXT: psubusw %xmm4, %xmm5 3543; SSE2-SSSE3-NEXT: psubw %xmm5, %xmm1 3544; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm1 3545; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm3 3546; SSE2-SSSE3-NEXT: psubusw %xmm4, %xmm3 3547; SSE2-SSSE3-NEXT: psubw %xmm3, %xmm2 3548; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm3 3549; SSE2-SSSE3-NEXT: psubusw %xmm4, %xmm3 3550; SSE2-SSSE3-NEXT: psubw %xmm3, %xmm0 3551; SSE2-SSSE3-NEXT: packuswb %xmm2, %xmm0 3552; SSE2-SSSE3-NEXT: retq 3553; 3554; SSE41-LABEL: trunc_usat_v32i16_v32i8: 3555; SSE41: # %bb.0: 3556; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = [255,255,255,255,255,255,255,255] 3557; SSE41-NEXT: movdqa 48(%rdi), %xmm2 3558; SSE41-NEXT: pminuw %xmm0, %xmm2 3559; SSE41-NEXT: movdqa 32(%rdi), %xmm1 3560; SSE41-NEXT: pminuw %xmm0, %xmm1 3561; SSE41-NEXT: packuswb %xmm2, %xmm1 3562; SSE41-NEXT: movdqa 16(%rdi), %xmm2 3563; SSE41-NEXT: pminuw %xmm0, %xmm2 3564; SSE41-NEXT: pminuw (%rdi), %xmm0 3565; SSE41-NEXT: packuswb %xmm2, %xmm0 3566; SSE41-NEXT: retq 3567; 3568; AVX1-LABEL: trunc_usat_v32i16_v32i8: 3569; AVX1: # %bb.0: 3570; AVX1-NEXT: vbroadcastss {{.*#+}} xmm0 = [255,255,255,255,255,255,255,255] 3571; AVX1-NEXT: vpminuw 16(%rdi), %xmm0, %xmm1 3572; AVX1-NEXT: vpminuw (%rdi), %xmm0, %xmm2 3573; AVX1-NEXT: vpackuswb %xmm1, %xmm2, %xmm1 3574; AVX1-NEXT: vpminuw 48(%rdi), %xmm0, %xmm2 3575; AVX1-NEXT: vpminuw 32(%rdi), %xmm0, %xmm0 3576; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 3577; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3578; AVX1-NEXT: retq 3579; 3580; AVX2-LABEL: trunc_usat_v32i16_v32i8: 3581; AVX2: # %bb.0: 3582; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 3583; AVX2-NEXT: vpminuw 32(%rdi), %ymm0, %ymm1 3584; AVX2-NEXT: vpminuw (%rdi), %ymm0, %ymm0 3585; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 3586; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 3587; AVX2-NEXT: retq 3588; 3589; AVX512F-LABEL: trunc_usat_v32i16_v32i8: 3590; AVX512F: # %bb.0: 3591; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 3592; AVX512F-NEXT: vpminuw 32(%rdi), %ymm0, %ymm1 3593; AVX512F-NEXT: vpminuw (%rdi), %ymm0, %ymm0 3594; AVX512F-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 3595; AVX512F-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 3596; AVX512F-NEXT: retq 3597; 3598; AVX512VL-LABEL: trunc_usat_v32i16_v32i8: 3599; AVX512VL: # %bb.0: 3600; AVX512VL-NEXT: vpbroadcastd {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 3601; AVX512VL-NEXT: vpminuw 32(%rdi), %ymm0, %ymm1 3602; AVX512VL-NEXT: vpminuw (%rdi), %ymm0, %ymm0 3603; AVX512VL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 3604; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 3605; AVX512VL-NEXT: retq 3606; 3607; AVX512BW-LABEL: trunc_usat_v32i16_v32i8: 3608; AVX512BW: # %bb.0: 3609; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0 3610; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm0 3611; AVX512BW-NEXT: retq 3612; 3613; AVX512BWVL-LABEL: trunc_usat_v32i16_v32i8: 3614; AVX512BWVL: # %bb.0: 3615; AVX512BWVL-NEXT: vmovdqa64 (%rdi), %zmm0 3616; AVX512BWVL-NEXT: vpmovuswb %zmm0, %ymm0 3617; AVX512BWVL-NEXT: retq 3618; 3619; SKX-LABEL: trunc_usat_v32i16_v32i8: 3620; SKX: # %bb.0: 3621; SKX-NEXT: vmovdqa64 (%rdi), %zmm0 3622; SKX-NEXT: vpmovuswb %zmm0, %ymm0 3623; SKX-NEXT: retq 3624 %a0 = load <32 x i16>, ptr %p0 3625 %1 = icmp ult <32 x i16> %a0, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255> 3626 %2 = select <32 x i1> %1, <32 x i16> %a0, <32 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255> 3627 %3 = trunc <32 x i16> %2 to <32 x i8> 3628 ret <32 x i8> %3 3629} 3630 3631define <32 x i8> @trunc_usat_v32i32_v32i8(ptr %p0) { 3632; SSE2-SSSE3-LABEL: trunc_usat_v32i32_v32i8: 3633; SSE2-SSSE3: # %bb.0: 3634; SSE2-SSSE3-NEXT: movdqa (%rdi), %xmm7 3635; SSE2-SSSE3-NEXT: movdqa 16(%rdi), %xmm0 3636; SSE2-SSSE3-NEXT: movdqa 32(%rdi), %xmm2 3637; SSE2-SSSE3-NEXT: movdqa 48(%rdi), %xmm5 3638; SSE2-SSSE3-NEXT: movdqa 96(%rdi), %xmm8 3639; SSE2-SSSE3-NEXT: movdqa 112(%rdi), %xmm9 3640; SSE2-SSSE3-NEXT: movdqa 64(%rdi), %xmm10 3641; SSE2-SSSE3-NEXT: movdqa 80(%rdi), %xmm1 3642; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255] 3643; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648] 3644; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm11 3645; SSE2-SSSE3-NEXT: pxor %xmm6, %xmm11 3646; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [2147483903,2147483903,2147483903,2147483903] 3647; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm12 3648; SSE2-SSSE3-NEXT: pcmpgtd %xmm11, %xmm12 3649; SSE2-SSSE3-NEXT: pand %xmm12, %xmm1 3650; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm12 3651; SSE2-SSSE3-NEXT: por %xmm1, %xmm12 3652; SSE2-SSSE3-NEXT: movdqa %xmm10, %xmm11 3653; SSE2-SSSE3-NEXT: pxor %xmm6, %xmm11 3654; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm1 3655; SSE2-SSSE3-NEXT: pcmpgtd %xmm11, %xmm1 3656; SSE2-SSSE3-NEXT: pand %xmm1, %xmm10 3657; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm1 3658; SSE2-SSSE3-NEXT: por %xmm10, %xmm1 3659; SSE2-SSSE3-NEXT: packuswb %xmm12, %xmm1 3660; SSE2-SSSE3-NEXT: movdqa %xmm9, %xmm10 3661; SSE2-SSSE3-NEXT: pxor %xmm6, %xmm10 3662; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm11 3663; SSE2-SSSE3-NEXT: pcmpgtd %xmm10, %xmm11 3664; SSE2-SSSE3-NEXT: pand %xmm11, %xmm9 3665; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm11 3666; SSE2-SSSE3-NEXT: por %xmm9, %xmm11 3667; SSE2-SSSE3-NEXT: movdqa %xmm8, %xmm9 3668; SSE2-SSSE3-NEXT: pxor %xmm6, %xmm9 3669; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm10 3670; SSE2-SSSE3-NEXT: pcmpgtd %xmm9, %xmm10 3671; SSE2-SSSE3-NEXT: pand %xmm10, %xmm8 3672; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm10 3673; SSE2-SSSE3-NEXT: por %xmm8, %xmm10 3674; SSE2-SSSE3-NEXT: packuswb %xmm11, %xmm10 3675; SSE2-SSSE3-NEXT: packuswb %xmm10, %xmm1 3676; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm8 3677; SSE2-SSSE3-NEXT: pxor %xmm6, %xmm8 3678; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm9 3679; SSE2-SSSE3-NEXT: pcmpgtd %xmm8, %xmm9 3680; SSE2-SSSE3-NEXT: pand %xmm9, %xmm0 3681; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm9 3682; SSE2-SSSE3-NEXT: por %xmm0, %xmm9 3683; SSE2-SSSE3-NEXT: movdqa %xmm7, %xmm8 3684; SSE2-SSSE3-NEXT: pxor %xmm6, %xmm8 3685; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm0 3686; SSE2-SSSE3-NEXT: pcmpgtd %xmm8, %xmm0 3687; SSE2-SSSE3-NEXT: pand %xmm0, %xmm7 3688; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm0 3689; SSE2-SSSE3-NEXT: por %xmm7, %xmm0 3690; SSE2-SSSE3-NEXT: packuswb %xmm9, %xmm0 3691; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm7 3692; SSE2-SSSE3-NEXT: pxor %xmm6, %xmm7 3693; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm8 3694; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm8 3695; SSE2-SSSE3-NEXT: pand %xmm8, %xmm5 3696; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm8 3697; SSE2-SSSE3-NEXT: por %xmm5, %xmm8 3698; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm6 3699; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm3 3700; SSE2-SSSE3-NEXT: pand %xmm3, %xmm2 3701; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm3 3702; SSE2-SSSE3-NEXT: por %xmm2, %xmm3 3703; SSE2-SSSE3-NEXT: packuswb %xmm8, %xmm3 3704; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm0 3705; SSE2-SSSE3-NEXT: retq 3706; 3707; SSE41-LABEL: trunc_usat_v32i32_v32i8: 3708; SSE41: # %bb.0: 3709; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = [255,255,255,255] 3710; SSE41-NEXT: movdqa 80(%rdi), %xmm0 3711; SSE41-NEXT: pminud %xmm2, %xmm0 3712; SSE41-NEXT: movdqa 64(%rdi), %xmm1 3713; SSE41-NEXT: pminud %xmm2, %xmm1 3714; SSE41-NEXT: packusdw %xmm0, %xmm1 3715; SSE41-NEXT: movdqa 112(%rdi), %xmm0 3716; SSE41-NEXT: pminud %xmm2, %xmm0 3717; SSE41-NEXT: movdqa 96(%rdi), %xmm3 3718; SSE41-NEXT: pminud %xmm2, %xmm3 3719; SSE41-NEXT: packusdw %xmm0, %xmm3 3720; SSE41-NEXT: packuswb %xmm3, %xmm1 3721; SSE41-NEXT: movdqa 16(%rdi), %xmm3 3722; SSE41-NEXT: pminud %xmm2, %xmm3 3723; SSE41-NEXT: movdqa (%rdi), %xmm0 3724; SSE41-NEXT: pminud %xmm2, %xmm0 3725; SSE41-NEXT: packusdw %xmm3, %xmm0 3726; SSE41-NEXT: movdqa 48(%rdi), %xmm3 3727; SSE41-NEXT: pminud %xmm2, %xmm3 3728; SSE41-NEXT: pminud 32(%rdi), %xmm2 3729; SSE41-NEXT: packusdw %xmm3, %xmm2 3730; SSE41-NEXT: packuswb %xmm2, %xmm0 3731; SSE41-NEXT: retq 3732; 3733; AVX1-LABEL: trunc_usat_v32i32_v32i8: 3734; AVX1: # %bb.0: 3735; AVX1-NEXT: vbroadcastss {{.*#+}} xmm0 = [255,255,255,255] 3736; AVX1-NEXT: vpminud 16(%rdi), %xmm0, %xmm1 3737; AVX1-NEXT: vpminud (%rdi), %xmm0, %xmm2 3738; AVX1-NEXT: vpackusdw %xmm1, %xmm2, %xmm1 3739; AVX1-NEXT: vpminud 48(%rdi), %xmm0, %xmm2 3740; AVX1-NEXT: vpminud 32(%rdi), %xmm0, %xmm3 3741; AVX1-NEXT: vpackusdw %xmm2, %xmm3, %xmm2 3742; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 3743; AVX1-NEXT: vpminud 80(%rdi), %xmm0, %xmm2 3744; AVX1-NEXT: vpminud 64(%rdi), %xmm0, %xmm3 3745; AVX1-NEXT: vpackusdw %xmm2, %xmm3, %xmm2 3746; AVX1-NEXT: vpminud 112(%rdi), %xmm0, %xmm3 3747; AVX1-NEXT: vpminud 96(%rdi), %xmm0, %xmm0 3748; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0 3749; AVX1-NEXT: vpackuswb %xmm0, %xmm2, %xmm0 3750; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3751; AVX1-NEXT: retq 3752; 3753; AVX2-LABEL: trunc_usat_v32i32_v32i8: 3754; AVX2: # %bb.0: 3755; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255] 3756; AVX2-NEXT: vpminud 32(%rdi), %ymm0, %ymm1 3757; AVX2-NEXT: vpminud (%rdi), %ymm0, %ymm2 3758; AVX2-NEXT: vpackusdw %ymm1, %ymm2, %ymm1 3759; AVX2-NEXT: vpminud 96(%rdi), %ymm0, %ymm2 3760; AVX2-NEXT: vpminud 64(%rdi), %ymm0, %ymm0 3761; AVX2-NEXT: vpackusdw %ymm2, %ymm0, %ymm0 3762; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 3763; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,1,3] 3764; AVX2-NEXT: vpackuswb %ymm0, %ymm1, %ymm0 3765; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 3766; AVX2-NEXT: retq 3767; 3768; AVX512-LABEL: trunc_usat_v32i32_v32i8: 3769; AVX512: # %bb.0: 3770; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 3771; AVX512-NEXT: vmovdqa64 64(%rdi), %zmm1 3772; AVX512-NEXT: vpmovusdb %zmm0, %xmm0 3773; AVX512-NEXT: vpmovusdb %zmm1, %xmm1 3774; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 3775; AVX512-NEXT: retq 3776; 3777; SKX-LABEL: trunc_usat_v32i32_v32i8: 3778; SKX: # %bb.0: 3779; SKX-NEXT: vmovdqa64 (%rdi), %zmm0 3780; SKX-NEXT: vmovdqa64 64(%rdi), %zmm1 3781; SKX-NEXT: vpmovusdb %zmm0, %xmm0 3782; SKX-NEXT: vpmovusdb %zmm1, %xmm1 3783; SKX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 3784; SKX-NEXT: retq 3785 %a0 = load <32 x i32>, ptr %p0 3786 %1 = icmp ult <32 x i32> %a0, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255> 3787 %2 = select <32 x i1> %1, <32 x i32> %a0, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255> 3788 %3 = trunc <32 x i32> %2 to <32 x i8> 3789 ret <32 x i8> %3 3790} 3791