1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2-SSSE3,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSE2-SSSE3,SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-SLOW 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST,AVX2-FAST-ALL 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST,AVX2-FAST-PERLANE 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F 10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL 11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL 12; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW 13; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW 14; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL 15; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL 16; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefixes=SKX 17 18; 19; Signed saturation truncation to vXi32 20; 21 22define <2 x i32> @trunc_ssat_v2i64_v2i32(<2 x i64> %a0) { 23; SSE2-SSSE3-LABEL: trunc_ssat_v2i64_v2i32: 24; SSE2-SSSE3: # %bb.0: 25; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648] 26; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm2 27; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm2 28; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 29; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm4 30; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm4 31; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [4294967295,4294967295] 32; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm3 33; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2] 34; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2 35; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 36; SSE2-SSSE3-NEXT: por %xmm2, %xmm3 37; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0 38; SSE2-SSSE3-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 39; SSE2-SSSE3-NEXT: por %xmm0, %xmm3 40; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm1 41; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 42; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm2 43; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm2 44; SSE2-SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 45; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2] 46; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 47; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 48; SSE2-SSSE3-NEXT: por %xmm0, %xmm1 49; SSE2-SSSE3-NEXT: pand %xmm1, %xmm3 50; SSE2-SSSE3-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 51; SSE2-SSSE3-NEXT: por %xmm3, %xmm1 52; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 53; SSE2-SSSE3-NEXT: retq 54; 55; SSE41-LABEL: trunc_ssat_v2i64_v2i32: 56; SSE41: # %bb.0: 57; SSE41-NEXT: movdqa %xmm0, %xmm1 58; SSE41-NEXT: movapd {{.*#+}} xmm2 = [2147483647,2147483647] 59; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648] 60; SSE41-NEXT: pxor %xmm3, %xmm0 61; SSE41-NEXT: pmovsxbd {{.*#+}} xmm4 = [4294967295,0,4294967295,0] 62; SSE41-NEXT: movdqa %xmm0, %xmm5 63; SSE41-NEXT: pcmpeqd %xmm4, %xmm5 64; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 65; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] 66; SSE41-NEXT: pand %xmm5, %xmm0 67; SSE41-NEXT: por %xmm4, %xmm0 68; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2 69; SSE41-NEXT: movapd {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968] 70; SSE41-NEXT: pxor %xmm2, %xmm3 71; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [0,4294967295,0,4294967295] 72; SSE41-NEXT: movdqa %xmm3, %xmm4 73; SSE41-NEXT: pcmpeqd %xmm0, %xmm4 74; SSE41-NEXT: pcmpgtd %xmm0, %xmm3 75; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 76; SSE41-NEXT: pand %xmm4, %xmm0 77; SSE41-NEXT: por %xmm3, %xmm0 78; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1 79; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 80; SSE41-NEXT: retq 81; 82; AVX1-LABEL: trunc_ssat_v2i64_v2i32: 83; AVX1: # %bb.0: 84; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [2147483647,2147483647] 85; AVX1-NEXT: # xmm1 = mem[0,0] 86; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 87; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 88; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968] 89; AVX1-NEXT: # xmm1 = mem[0,0] 90; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 91; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 92; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3] 93; AVX1-NEXT: retq 94; 95; AVX2-LABEL: trunc_ssat_v2i64_v2i32: 96; AVX2: # %bb.0: 97; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2147483647,2147483647] 98; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 99; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 100; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968] 101; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 102; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 103; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3] 104; AVX2-NEXT: retq 105; 106; AVX512F-LABEL: trunc_ssat_v2i64_v2i32: 107; AVX512F: # %bb.0: 108; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 109; AVX512F-NEXT: vpmovsqd %zmm0, %ymm0 110; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 111; AVX512F-NEXT: vzeroupper 112; AVX512F-NEXT: retq 113; 114; AVX512VL-LABEL: trunc_ssat_v2i64_v2i32: 115; AVX512VL: # %bb.0: 116; AVX512VL-NEXT: vpmovsqd %xmm0, %xmm0 117; AVX512VL-NEXT: retq 118; 119; AVX512BW-LABEL: trunc_ssat_v2i64_v2i32: 120; AVX512BW: # %bb.0: 121; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 122; AVX512BW-NEXT: vpmovsqd %zmm0, %ymm0 123; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 124; AVX512BW-NEXT: vzeroupper 125; AVX512BW-NEXT: retq 126; 127; AVX512BWVL-LABEL: trunc_ssat_v2i64_v2i32: 128; AVX512BWVL: # %bb.0: 129; AVX512BWVL-NEXT: vpmovsqd %xmm0, %xmm0 130; AVX512BWVL-NEXT: retq 131; 132; SKX-LABEL: trunc_ssat_v2i64_v2i32: 133; SKX: # %bb.0: 134; SKX-NEXT: vpmovsqd %xmm0, %xmm0 135; SKX-NEXT: retq 136 %1 = icmp slt <2 x i64> %a0, <i64 2147483647, i64 2147483647> 137 %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 2147483647, i64 2147483647> 138 %3 = icmp sgt <2 x i64> %2, <i64 -2147483648, i64 -2147483648> 139 %4 = select <2 x i1> %3, <2 x i64> %2, <2 x i64> <i64 -2147483648, i64 -2147483648> 140 %5 = trunc <2 x i64> %4 to <2 x i32> 141 ret <2 x i32> %5 142} 143 144define void @trunc_ssat_v2i64_v2i32_store(<2 x i64> %a0, ptr %p1) { 145; SSE2-SSSE3-LABEL: trunc_ssat_v2i64_v2i32_store: 146; SSE2-SSSE3: # %bb.0: 147; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648] 148; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm2 149; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm2 150; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 151; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm4 152; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm4 153; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [4294967295,4294967295] 154; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm3 155; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2] 156; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2 157; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 158; SSE2-SSSE3-NEXT: por %xmm2, %xmm3 159; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0 160; SSE2-SSSE3-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 161; SSE2-SSSE3-NEXT: por %xmm0, %xmm3 162; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm1 163; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 164; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm2 165; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm2 166; SSE2-SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 167; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2] 168; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 169; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 170; SSE2-SSSE3-NEXT: por %xmm0, %xmm1 171; SSE2-SSSE3-NEXT: pand %xmm1, %xmm3 172; SSE2-SSSE3-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 173; SSE2-SSSE3-NEXT: por %xmm3, %xmm1 174; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 175; SSE2-SSSE3-NEXT: movq %xmm0, (%rdi) 176; SSE2-SSSE3-NEXT: retq 177; 178; SSE41-LABEL: trunc_ssat_v2i64_v2i32_store: 179; SSE41: # %bb.0: 180; SSE41-NEXT: movdqa %xmm0, %xmm1 181; SSE41-NEXT: movapd {{.*#+}} xmm2 = [2147483647,2147483647] 182; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648] 183; SSE41-NEXT: pxor %xmm3, %xmm0 184; SSE41-NEXT: pmovsxbd {{.*#+}} xmm4 = [4294967295,0,4294967295,0] 185; SSE41-NEXT: movdqa %xmm0, %xmm5 186; SSE41-NEXT: pcmpeqd %xmm4, %xmm5 187; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 188; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] 189; SSE41-NEXT: pand %xmm5, %xmm0 190; SSE41-NEXT: por %xmm4, %xmm0 191; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2 192; SSE41-NEXT: movapd {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968] 193; SSE41-NEXT: pxor %xmm2, %xmm3 194; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [0,4294967295,0,4294967295] 195; SSE41-NEXT: movdqa %xmm3, %xmm4 196; SSE41-NEXT: pcmpeqd %xmm0, %xmm4 197; SSE41-NEXT: pcmpgtd %xmm0, %xmm3 198; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 199; SSE41-NEXT: pand %xmm4, %xmm0 200; SSE41-NEXT: por %xmm3, %xmm0 201; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1 202; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 203; SSE41-NEXT: movq %xmm0, (%rdi) 204; SSE41-NEXT: retq 205; 206; AVX1-LABEL: trunc_ssat_v2i64_v2i32_store: 207; AVX1: # %bb.0: 208; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [2147483647,2147483647] 209; AVX1-NEXT: # xmm1 = mem[0,0] 210; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 211; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 212; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968] 213; AVX1-NEXT: # xmm1 = mem[0,0] 214; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 215; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 216; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3] 217; AVX1-NEXT: vmovlpd %xmm0, (%rdi) 218; AVX1-NEXT: retq 219; 220; AVX2-LABEL: trunc_ssat_v2i64_v2i32_store: 221; AVX2: # %bb.0: 222; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2147483647,2147483647] 223; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 224; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 225; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968] 226; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 227; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 228; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3] 229; AVX2-NEXT: vmovlpd %xmm0, (%rdi) 230; AVX2-NEXT: retq 231; 232; AVX512F-LABEL: trunc_ssat_v2i64_v2i32_store: 233; AVX512F: # %bb.0: 234; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 235; AVX512F-NEXT: vpmovsqd %zmm0, %ymm0 236; AVX512F-NEXT: vmovq %xmm0, (%rdi) 237; AVX512F-NEXT: vzeroupper 238; AVX512F-NEXT: retq 239; 240; AVX512VL-LABEL: trunc_ssat_v2i64_v2i32_store: 241; AVX512VL: # %bb.0: 242; AVX512VL-NEXT: vpmovsqd %xmm0, (%rdi) 243; AVX512VL-NEXT: retq 244; 245; AVX512BW-LABEL: trunc_ssat_v2i64_v2i32_store: 246; AVX512BW: # %bb.0: 247; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 248; AVX512BW-NEXT: vpmovsqd %zmm0, %ymm0 249; AVX512BW-NEXT: vmovq %xmm0, (%rdi) 250; AVX512BW-NEXT: vzeroupper 251; AVX512BW-NEXT: retq 252; 253; AVX512BWVL-LABEL: trunc_ssat_v2i64_v2i32_store: 254; AVX512BWVL: # %bb.0: 255; AVX512BWVL-NEXT: vpmovsqd %xmm0, (%rdi) 256; AVX512BWVL-NEXT: retq 257; 258; SKX-LABEL: trunc_ssat_v2i64_v2i32_store: 259; SKX: # %bb.0: 260; SKX-NEXT: vpmovsqd %xmm0, (%rdi) 261; SKX-NEXT: retq 262 %1 = icmp slt <2 x i64> %a0, <i64 2147483647, i64 2147483647> 263 %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 2147483647, i64 2147483647> 264 %3 = icmp sgt <2 x i64> %2, <i64 -2147483648, i64 -2147483648> 265 %4 = select <2 x i1> %3, <2 x i64> %2, <2 x i64> <i64 -2147483648, i64 -2147483648> 266 %5 = trunc <2 x i64> %4 to <2 x i32> 267 store <2 x i32> %5, ptr %p1 268 ret void 269} 270 271define <4 x i32> @trunc_ssat_v4i64_v4i32(<4 x i64> %a0) { 272; SSE2-SSSE3-LABEL: trunc_ssat_v4i64_v4i32: 273; SSE2-SSSE3: # %bb.0: 274; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [2147483647,2147483647] 275; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 276; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm4 277; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm4 278; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[1,1,3,3] 279; SSE2-SSSE3-NEXT: pxor %xmm6, %xmm6 280; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm5 281; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm7 = [4294967295,4294967295] 282; SSE2-SSSE3-NEXT: movdqa %xmm7, %xmm8 283; SSE2-SSSE3-NEXT: pcmpgtd %xmm4, %xmm8 284; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm8[0,0,2,2] 285; SSE2-SSSE3-NEXT: pand %xmm5, %xmm4 286; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm8[1,1,3,3] 287; SSE2-SSSE3-NEXT: por %xmm4, %xmm5 288; SSE2-SSSE3-NEXT: pand %xmm5, %xmm0 289; SSE2-SSSE3-NEXT: pandn %xmm3, %xmm5 290; SSE2-SSSE3-NEXT: por %xmm5, %xmm0 291; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm4 292; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm4 293; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[1,1,3,3] 294; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm5 295; SSE2-SSSE3-NEXT: pcmpgtd %xmm4, %xmm7 296; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2] 297; SSE2-SSSE3-NEXT: pand %xmm5, %xmm4 298; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] 299; SSE2-SSSE3-NEXT: por %xmm4, %xmm5 300; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1 301; SSE2-SSSE3-NEXT: pandn %xmm3, %xmm5 302; SSE2-SSSE3-NEXT: por %xmm1, %xmm5 303; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968] 304; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm3 305; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm3 306; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 307; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm6 308; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm4 309; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm7 = [18446744069414584320,18446744069414584320] 310; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm3 311; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm8 = xmm3[0,0,2,2] 312; SSE2-SSSE3-NEXT: pand %xmm4, %xmm8 313; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 314; SSE2-SSSE3-NEXT: por %xmm8, %xmm3 315; SSE2-SSSE3-NEXT: pand %xmm3, %xmm5 316; SSE2-SSSE3-NEXT: pandn %xmm1, %xmm3 317; SSE2-SSSE3-NEXT: por %xmm5, %xmm3 318; SSE2-SSSE3-NEXT: pxor %xmm0, %xmm2 319; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 320; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm4 321; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm2 322; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm2[0,0,2,2] 323; SSE2-SSSE3-NEXT: pand %xmm4, %xmm5 324; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 325; SSE2-SSSE3-NEXT: por %xmm5, %xmm2 326; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 327; SSE2-SSSE3-NEXT: pandn %xmm1, %xmm2 328; SSE2-SSSE3-NEXT: por %xmm2, %xmm0 329; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2] 330; SSE2-SSSE3-NEXT: retq 331; 332; SSE41-LABEL: trunc_ssat_v4i64_v4i32: 333; SSE41: # %bb.0: 334; SSE41-NEXT: movdqa %xmm0, %xmm2 335; SSE41-NEXT: movapd {{.*#+}} xmm4 = [2147483647,2147483647] 336; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648] 337; SSE41-NEXT: movdqa %xmm0, %xmm5 338; SSE41-NEXT: pxor %xmm3, %xmm5 339; SSE41-NEXT: pmovsxbd {{.*#+}} xmm6 = [4294967295,0,4294967295,0] 340; SSE41-NEXT: movdqa %xmm6, %xmm7 341; SSE41-NEXT: pcmpgtd %xmm5, %xmm7 342; SSE41-NEXT: pcmpeqd %xmm6, %xmm5 343; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] 344; SSE41-NEXT: pand %xmm5, %xmm0 345; SSE41-NEXT: por %xmm7, %xmm0 346; SSE41-NEXT: movapd %xmm4, %xmm5 347; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm5 348; SSE41-NEXT: movdqa %xmm1, %xmm0 349; SSE41-NEXT: pxor %xmm3, %xmm0 350; SSE41-NEXT: movdqa %xmm0, %xmm2 351; SSE41-NEXT: pcmpeqd %xmm6, %xmm2 352; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 353; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] 354; SSE41-NEXT: pand %xmm2, %xmm0 355; SSE41-NEXT: por %xmm6, %xmm0 356; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm4 357; SSE41-NEXT: movapd {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968] 358; SSE41-NEXT: movapd %xmm4, %xmm2 359; SSE41-NEXT: xorpd %xmm3, %xmm2 360; SSE41-NEXT: pmovsxbd {{.*#+}} xmm6 = [0,4294967295,0,4294967295] 361; SSE41-NEXT: movapd %xmm2, %xmm7 362; SSE41-NEXT: pcmpeqd %xmm6, %xmm7 363; SSE41-NEXT: pcmpgtd %xmm6, %xmm2 364; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] 365; SSE41-NEXT: pand %xmm7, %xmm0 366; SSE41-NEXT: por %xmm2, %xmm0 367; SSE41-NEXT: movapd %xmm1, %xmm2 368; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2 369; SSE41-NEXT: xorpd %xmm5, %xmm3 370; SSE41-NEXT: movapd %xmm3, %xmm4 371; SSE41-NEXT: pcmpeqd %xmm6, %xmm4 372; SSE41-NEXT: pcmpgtd %xmm6, %xmm3 373; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 374; SSE41-NEXT: pand %xmm4, %xmm0 375; SSE41-NEXT: por %xmm3, %xmm0 376; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm1 377; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2] 378; SSE41-NEXT: movaps %xmm1, %xmm0 379; SSE41-NEXT: retq 380; 381; AVX1-LABEL: trunc_ssat_v4i64_v4i32: 382; AVX1: # %bb.0: 383; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [2147483647,2147483647] 384; AVX1-NEXT: # xmm1 = mem[0,0] 385; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 386; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm2 387; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 388; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm3 389; AVX1-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0 390; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968] 391; AVX1-NEXT: # xmm1 = mem[0,0] 392; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3 393; AVX1-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0 394; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm3 395; AVX1-NEXT: vblendvpd %xmm3, %xmm2, %xmm1, %xmm1 396; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[0,2] 397; AVX1-NEXT: vzeroupper 398; AVX1-NEXT: retq 399; 400; AVX2-SLOW-LABEL: trunc_ssat_v4i64_v4i32: 401; AVX2-SLOW: # %bb.0: 402; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [2147483647,2147483647,2147483647,2147483647] 403; AVX2-SLOW-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 404; AVX2-SLOW-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 405; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18446744071562067968,18446744071562067968,18446744071562067968,18446744071562067968] 406; AVX2-SLOW-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 407; AVX2-SLOW-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 408; AVX2-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1 409; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 410; AVX2-SLOW-NEXT: vzeroupper 411; AVX2-SLOW-NEXT: retq 412; 413; AVX2-FAST-ALL-LABEL: trunc_ssat_v4i64_v4i32: 414; AVX2-FAST-ALL: # %bb.0: 415; AVX2-FAST-ALL-NEXT: vpbroadcastq {{.*#+}} ymm1 = [2147483647,2147483647,2147483647,2147483647] 416; AVX2-FAST-ALL-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 417; AVX2-FAST-ALL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 418; AVX2-FAST-ALL-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18446744071562067968,18446744071562067968,18446744071562067968,18446744071562067968] 419; AVX2-FAST-ALL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 420; AVX2-FAST-ALL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 421; AVX2-FAST-ALL-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [0,2,4,6,0,2,4,6] 422; AVX2-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1] 423; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0 424; AVX2-FAST-ALL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 425; AVX2-FAST-ALL-NEXT: vzeroupper 426; AVX2-FAST-ALL-NEXT: retq 427; 428; AVX2-FAST-PERLANE-LABEL: trunc_ssat_v4i64_v4i32: 429; AVX2-FAST-PERLANE: # %bb.0: 430; AVX2-FAST-PERLANE-NEXT: vpbroadcastq {{.*#+}} ymm1 = [2147483647,2147483647,2147483647,2147483647] 431; AVX2-FAST-PERLANE-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 432; AVX2-FAST-PERLANE-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 433; AVX2-FAST-PERLANE-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18446744071562067968,18446744071562067968,18446744071562067968,18446744071562067968] 434; AVX2-FAST-PERLANE-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 435; AVX2-FAST-PERLANE-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 436; AVX2-FAST-PERLANE-NEXT: vextractf128 $1, %ymm0, %xmm1 437; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 438; AVX2-FAST-PERLANE-NEXT: vzeroupper 439; AVX2-FAST-PERLANE-NEXT: retq 440; 441; AVX512F-LABEL: trunc_ssat_v4i64_v4i32: 442; AVX512F: # %bb.0: 443; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 444; AVX512F-NEXT: vpmovsqd %zmm0, %ymm0 445; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 446; AVX512F-NEXT: vzeroupper 447; AVX512F-NEXT: retq 448; 449; AVX512VL-LABEL: trunc_ssat_v4i64_v4i32: 450; AVX512VL: # %bb.0: 451; AVX512VL-NEXT: vpmovsqd %ymm0, %xmm0 452; AVX512VL-NEXT: vzeroupper 453; AVX512VL-NEXT: retq 454; 455; AVX512BW-LABEL: trunc_ssat_v4i64_v4i32: 456; AVX512BW: # %bb.0: 457; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 458; AVX512BW-NEXT: vpmovsqd %zmm0, %ymm0 459; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 460; AVX512BW-NEXT: vzeroupper 461; AVX512BW-NEXT: retq 462; 463; AVX512BWVL-LABEL: trunc_ssat_v4i64_v4i32: 464; AVX512BWVL: # %bb.0: 465; AVX512BWVL-NEXT: vpmovsqd %ymm0, %xmm0 466; AVX512BWVL-NEXT: vzeroupper 467; AVX512BWVL-NEXT: retq 468; 469; SKX-LABEL: trunc_ssat_v4i64_v4i32: 470; SKX: # %bb.0: 471; SKX-NEXT: vpmovsqd %ymm0, %xmm0 472; SKX-NEXT: vzeroupper 473; SKX-NEXT: retq 474 %1 = icmp slt <4 x i64> %a0, <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647> 475 %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647> 476 %3 = icmp sgt <4 x i64> %2, <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648> 477 %4 = select <4 x i1> %3, <4 x i64> %2, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648> 478 %5 = trunc <4 x i64> %4 to <4 x i32> 479 ret <4 x i32> %5 480} 481 482 483define <8 x i32> @trunc_ssat_v8i64_v8i32(ptr %p0) "min-legal-vector-width"="256" { 484; SSE2-SSSE3-LABEL: trunc_ssat_v8i64_v8i32: 485; SSE2-SSSE3: # %bb.0: 486; SSE2-SSSE3-NEXT: movdqa (%rdi), %xmm3 487; SSE2-SSSE3-NEXT: movdqa 16(%rdi), %xmm5 488; SSE2-SSSE3-NEXT: movdqa 32(%rdi), %xmm7 489; SSE2-SSSE3-NEXT: movdqa 48(%rdi), %xmm1 490; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [2147483647,2147483647] 491; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648] 492; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2 493; SSE2-SSSE3-NEXT: pxor %xmm0, %xmm2 494; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm2[1,1,3,3] 495; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm8 496; SSE2-SSSE3-NEXT: pcmpeqd %xmm8, %xmm9 497; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm6 = [4294967295,4294967295] 498; SSE2-SSSE3-NEXT: movdqa %xmm6, %xmm10 499; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm10 500; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2] 501; SSE2-SSSE3-NEXT: pand %xmm9, %xmm11 502; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm10[1,1,3,3] 503; SSE2-SSSE3-NEXT: por %xmm11, %xmm2 504; SSE2-SSSE3-NEXT: pand %xmm2, %xmm3 505; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm2 506; SSE2-SSSE3-NEXT: por %xmm3, %xmm2 507; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm3 508; SSE2-SSSE3-NEXT: pxor %xmm0, %xmm3 509; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm3[1,1,3,3] 510; SSE2-SSSE3-NEXT: pcmpeqd %xmm8, %xmm9 511; SSE2-SSSE3-NEXT: movdqa %xmm6, %xmm10 512; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm10 513; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2] 514; SSE2-SSSE3-NEXT: pand %xmm9, %xmm11 515; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm10[1,1,3,3] 516; SSE2-SSSE3-NEXT: por %xmm11, %xmm3 517; SSE2-SSSE3-NEXT: pand %xmm3, %xmm5 518; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm3 519; SSE2-SSSE3-NEXT: por %xmm5, %xmm3 520; SSE2-SSSE3-NEXT: movdqa %xmm7, %xmm5 521; SSE2-SSSE3-NEXT: pxor %xmm0, %xmm5 522; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm5[1,1,3,3] 523; SSE2-SSSE3-NEXT: pcmpeqd %xmm8, %xmm9 524; SSE2-SSSE3-NEXT: movdqa %xmm6, %xmm10 525; SSE2-SSSE3-NEXT: pcmpgtd %xmm5, %xmm10 526; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2] 527; SSE2-SSSE3-NEXT: pand %xmm9, %xmm11 528; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm10[1,1,3,3] 529; SSE2-SSSE3-NEXT: por %xmm11, %xmm5 530; SSE2-SSSE3-NEXT: pand %xmm5, %xmm7 531; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm5 532; SSE2-SSSE3-NEXT: por %xmm7, %xmm5 533; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm7 534; SSE2-SSSE3-NEXT: pxor %xmm0, %xmm7 535; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm7[1,1,3,3] 536; SSE2-SSSE3-NEXT: pcmpeqd %xmm8, %xmm9 537; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm6 538; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 539; SSE2-SSSE3-NEXT: pand %xmm9, %xmm7 540; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm8 = xmm6[1,1,3,3] 541; SSE2-SSSE3-NEXT: por %xmm7, %xmm8 542; SSE2-SSSE3-NEXT: pand %xmm8, %xmm1 543; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm8 544; SSE2-SSSE3-NEXT: por %xmm1, %xmm8 545; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [18446744071562067968,18446744071562067968] 546; SSE2-SSSE3-NEXT: movdqa %xmm8, %xmm1 547; SSE2-SSSE3-NEXT: pxor %xmm0, %xmm1 548; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm1[1,1,3,3] 549; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm6 550; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm9 551; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm7 = [18446744069414584320,18446744069414584320] 552; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm1 553; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm1[0,0,2,2] 554; SSE2-SSSE3-NEXT: pand %xmm9, %xmm10 555; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm1[1,1,3,3] 556; SSE2-SSSE3-NEXT: por %xmm10, %xmm9 557; SSE2-SSSE3-NEXT: pand %xmm9, %xmm8 558; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm9 559; SSE2-SSSE3-NEXT: por %xmm8, %xmm9 560; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm1 561; SSE2-SSSE3-NEXT: pxor %xmm0, %xmm1 562; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm8 = xmm1[1,1,3,3] 563; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm8 564; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm1 565; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm1[0,0,2,2] 566; SSE2-SSSE3-NEXT: pand %xmm8, %xmm10 567; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 568; SSE2-SSSE3-NEXT: por %xmm10, %xmm1 569; SSE2-SSSE3-NEXT: pand %xmm1, %xmm5 570; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm1 571; SSE2-SSSE3-NEXT: por %xmm5, %xmm1 572; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm9[0,2] 573; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm5 574; SSE2-SSSE3-NEXT: pxor %xmm0, %xmm5 575; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm8 = xmm5[1,1,3,3] 576; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm8 577; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm5 578; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm5[0,0,2,2] 579; SSE2-SSSE3-NEXT: pand %xmm8, %xmm9 580; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] 581; SSE2-SSSE3-NEXT: por %xmm9, %xmm5 582; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3 583; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm5 584; SSE2-SSSE3-NEXT: por %xmm3, %xmm5 585; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm0 586; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 587; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm3 588; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm0 589; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm0[0,0,2,2] 590; SSE2-SSSE3-NEXT: pand %xmm3, %xmm6 591; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 592; SSE2-SSSE3-NEXT: por %xmm6, %xmm0 593; SSE2-SSSE3-NEXT: pand %xmm0, %xmm2 594; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm0 595; SSE2-SSSE3-NEXT: por %xmm2, %xmm0 596; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm5[0,2] 597; SSE2-SSSE3-NEXT: retq 598; 599; SSE41-LABEL: trunc_ssat_v8i64_v8i32: 600; SSE41: # %bb.0: 601; SSE41-NEXT: movdqa (%rdi), %xmm5 602; SSE41-NEXT: movdqa 16(%rdi), %xmm8 603; SSE41-NEXT: movdqa 32(%rdi), %xmm7 604; SSE41-NEXT: movdqa 48(%rdi), %xmm2 605; SSE41-NEXT: movapd {{.*#+}} xmm1 = [2147483647,2147483647] 606; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648] 607; SSE41-NEXT: movdqa %xmm5, %xmm4 608; SSE41-NEXT: pxor %xmm3, %xmm4 609; SSE41-NEXT: pmovsxbd {{.*#+}} xmm6 = [4294967295,0,4294967295,0] 610; SSE41-NEXT: movdqa %xmm6, %xmm9 611; SSE41-NEXT: pcmpgtd %xmm4, %xmm9 612; SSE41-NEXT: pcmpeqd %xmm6, %xmm4 613; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] 614; SSE41-NEXT: pand %xmm4, %xmm0 615; SSE41-NEXT: por %xmm9, %xmm0 616; SSE41-NEXT: movapd %xmm1, %xmm4 617; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm4 618; SSE41-NEXT: movdqa %xmm8, %xmm5 619; SSE41-NEXT: pxor %xmm3, %xmm5 620; SSE41-NEXT: movdqa %xmm6, %xmm9 621; SSE41-NEXT: pcmpgtd %xmm5, %xmm9 622; SSE41-NEXT: pcmpeqd %xmm6, %xmm5 623; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] 624; SSE41-NEXT: pand %xmm5, %xmm0 625; SSE41-NEXT: por %xmm9, %xmm0 626; SSE41-NEXT: movapd %xmm1, %xmm5 627; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm5 628; SSE41-NEXT: movdqa %xmm7, %xmm8 629; SSE41-NEXT: pxor %xmm3, %xmm8 630; SSE41-NEXT: movdqa %xmm6, %xmm9 631; SSE41-NEXT: pcmpgtd %xmm8, %xmm9 632; SSE41-NEXT: pcmpeqd %xmm6, %xmm8 633; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] 634; SSE41-NEXT: pand %xmm8, %xmm0 635; SSE41-NEXT: por %xmm9, %xmm0 636; SSE41-NEXT: movapd %xmm1, %xmm8 637; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm8 638; SSE41-NEXT: movdqa %xmm2, %xmm0 639; SSE41-NEXT: pxor %xmm3, %xmm0 640; SSE41-NEXT: movdqa %xmm0, %xmm7 641; SSE41-NEXT: pcmpeqd %xmm6, %xmm7 642; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 643; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] 644; SSE41-NEXT: pand %xmm7, %xmm0 645; SSE41-NEXT: por %xmm6, %xmm0 646; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1 647; SSE41-NEXT: movapd {{.*#+}} xmm2 = [18446744071562067968,18446744071562067968] 648; SSE41-NEXT: movapd %xmm1, %xmm7 649; SSE41-NEXT: xorpd %xmm3, %xmm7 650; SSE41-NEXT: pmovsxbd {{.*#+}} xmm6 = [0,4294967295,0,4294967295] 651; SSE41-NEXT: movapd %xmm7, %xmm9 652; SSE41-NEXT: pcmpeqd %xmm6, %xmm9 653; SSE41-NEXT: pcmpgtd %xmm6, %xmm7 654; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] 655; SSE41-NEXT: pand %xmm9, %xmm0 656; SSE41-NEXT: por %xmm7, %xmm0 657; SSE41-NEXT: movapd %xmm2, %xmm7 658; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm7 659; SSE41-NEXT: movapd %xmm8, %xmm1 660; SSE41-NEXT: xorpd %xmm3, %xmm1 661; SSE41-NEXT: movapd %xmm1, %xmm9 662; SSE41-NEXT: pcmpeqd %xmm6, %xmm9 663; SSE41-NEXT: pcmpgtd %xmm6, %xmm1 664; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2] 665; SSE41-NEXT: pand %xmm9, %xmm0 666; SSE41-NEXT: por %xmm1, %xmm0 667; SSE41-NEXT: movapd %xmm2, %xmm1 668; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm1 669; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm7[0,2] 670; SSE41-NEXT: movapd %xmm5, %xmm7 671; SSE41-NEXT: xorpd %xmm3, %xmm7 672; SSE41-NEXT: movapd %xmm7, %xmm8 673; SSE41-NEXT: pcmpeqd %xmm6, %xmm8 674; SSE41-NEXT: pcmpgtd %xmm6, %xmm7 675; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] 676; SSE41-NEXT: pand %xmm8, %xmm0 677; SSE41-NEXT: por %xmm7, %xmm0 678; SSE41-NEXT: movapd %xmm2, %xmm7 679; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm7 680; SSE41-NEXT: xorpd %xmm4, %xmm3 681; SSE41-NEXT: movapd %xmm3, %xmm5 682; SSE41-NEXT: pcmpeqd %xmm6, %xmm5 683; SSE41-NEXT: pcmpgtd %xmm6, %xmm3 684; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 685; SSE41-NEXT: pand %xmm5, %xmm0 686; SSE41-NEXT: por %xmm3, %xmm0 687; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2 688; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm7[0,2] 689; SSE41-NEXT: movaps %xmm2, %xmm0 690; SSE41-NEXT: retq 691; 692; AVX1-LABEL: trunc_ssat_v8i64_v8i32: 693; AVX1: # %bb.0: 694; AVX1-NEXT: vmovdqa (%rdi), %xmm0 695; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1 696; AVX1-NEXT: vmovdqa 32(%rdi), %xmm2 697; AVX1-NEXT: vmovdqa 48(%rdi), %xmm3 698; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [2147483647,2147483647] 699; AVX1-NEXT: # xmm4 = mem[0,0] 700; AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm5 701; AVX1-NEXT: vblendvpd %xmm5, %xmm1, %xmm4, %xmm1 702; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm5 703; AVX1-NEXT: vblendvpd %xmm5, %xmm3, %xmm4, %xmm3 704; AVX1-NEXT: vpcmpgtq %xmm0, %xmm4, %xmm5 705; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm4, %xmm0 706; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm5 707; AVX1-NEXT: vblendvpd %xmm5, %xmm2, %xmm4, %xmm2 708; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [18446744071562067968,18446744071562067968] 709; AVX1-NEXT: # xmm4 = mem[0,0] 710; AVX1-NEXT: vpcmpgtq %xmm4, %xmm2, %xmm5 711; AVX1-NEXT: vblendvpd %xmm5, %xmm2, %xmm4, %xmm2 712; AVX1-NEXT: vpcmpgtq %xmm4, %xmm0, %xmm5 713; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm4, %xmm0 714; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm5 715; AVX1-NEXT: vblendvpd %xmm5, %xmm3, %xmm4, %xmm3 716; AVX1-NEXT: vpcmpgtq %xmm4, %xmm1, %xmm5 717; AVX1-NEXT: vblendvpd %xmm5, %xmm1, %xmm4, %xmm1 718; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 719; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 720; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6] 721; AVX1-NEXT: retq 722; 723; AVX2-SLOW-LABEL: trunc_ssat_v8i64_v8i32: 724; AVX2-SLOW: # %bb.0: 725; AVX2-SLOW-NEXT: vmovdqa (%rdi), %ymm0 726; AVX2-SLOW-NEXT: vmovdqa 32(%rdi), %ymm1 727; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm2 = [2147483647,2147483647,2147483647,2147483647] 728; AVX2-SLOW-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3 729; AVX2-SLOW-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0 730; AVX2-SLOW-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3 731; AVX2-SLOW-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1 732; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm2 = [18446744071562067968,18446744071562067968,18446744071562067968,18446744071562067968] 733; AVX2-SLOW-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm3 734; AVX2-SLOW-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1 735; AVX2-SLOW-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm3 736; AVX2-SLOW-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0 737; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] 738; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 739; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] 740; AVX2-SLOW-NEXT: retq 741; 742; AVX2-FAST-ALL-LABEL: trunc_ssat_v8i64_v8i32: 743; AVX2-FAST-ALL: # %bb.0: 744; AVX2-FAST-ALL-NEXT: vmovdqa (%rdi), %ymm0 745; AVX2-FAST-ALL-NEXT: vmovdqa 32(%rdi), %ymm1 746; AVX2-FAST-ALL-NEXT: vpbroadcastq {{.*#+}} ymm2 = [2147483647,2147483647,2147483647,2147483647] 747; AVX2-FAST-ALL-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3 748; AVX2-FAST-ALL-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0 749; AVX2-FAST-ALL-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3 750; AVX2-FAST-ALL-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1 751; AVX2-FAST-ALL-NEXT: vpbroadcastq {{.*#+}} ymm2 = [18446744071562067968,18446744071562067968,18446744071562067968,18446744071562067968] 752; AVX2-FAST-ALL-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm3 753; AVX2-FAST-ALL-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1 754; AVX2-FAST-ALL-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm3 755; AVX2-FAST-ALL-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0 756; AVX2-FAST-ALL-NEXT: vmovapd {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7] 757; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0 758; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1 759; AVX2-FAST-ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 760; AVX2-FAST-ALL-NEXT: retq 761; 762; AVX2-FAST-PERLANE-LABEL: trunc_ssat_v8i64_v8i32: 763; AVX2-FAST-PERLANE: # %bb.0: 764; AVX2-FAST-PERLANE-NEXT: vmovdqa (%rdi), %ymm0 765; AVX2-FAST-PERLANE-NEXT: vmovdqa 32(%rdi), %ymm1 766; AVX2-FAST-PERLANE-NEXT: vpbroadcastq {{.*#+}} ymm2 = [2147483647,2147483647,2147483647,2147483647] 767; AVX2-FAST-PERLANE-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3 768; AVX2-FAST-PERLANE-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0 769; AVX2-FAST-PERLANE-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3 770; AVX2-FAST-PERLANE-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1 771; AVX2-FAST-PERLANE-NEXT: vpbroadcastq {{.*#+}} ymm2 = [18446744071562067968,18446744071562067968,18446744071562067968,18446744071562067968] 772; AVX2-FAST-PERLANE-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm3 773; AVX2-FAST-PERLANE-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1 774; AVX2-FAST-PERLANE-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm3 775; AVX2-FAST-PERLANE-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0 776; AVX2-FAST-PERLANE-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] 777; AVX2-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 778; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] 779; AVX2-FAST-PERLANE-NEXT: retq 780; 781; AVX512-LABEL: trunc_ssat_v8i64_v8i32: 782; AVX512: # %bb.0: 783; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 784; AVX512-NEXT: vpmovsqd %zmm0, %ymm0 785; AVX512-NEXT: retq 786; 787; SKX-LABEL: trunc_ssat_v8i64_v8i32: 788; SKX: # %bb.0: 789; SKX-NEXT: vmovdqa (%rdi), %ymm0 790; SKX-NEXT: vmovdqa 32(%rdi), %ymm1 791; SKX-NEXT: vpmovsqd %ymm0, %xmm0 792; SKX-NEXT: vpmovsqd %ymm1, %xmm1 793; SKX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 794; SKX-NEXT: retq 795 %a0 = load <8 x i64>, ptr %p0 796 %1 = icmp slt <8 x i64> %a0, <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647> 797 %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647> 798 %3 = icmp sgt <8 x i64> %2, <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648> 799 %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648> 800 %5 = trunc <8 x i64> %4 to <8 x i32> 801 ret <8 x i32> %5 802} 803 804; 805; Signed saturation truncation to vXi16 806; 807 808define <2 x i16> @trunc_ssat_v2i64_v2i16(<2 x i64> %a0) { 809; SSE2-SSSE3-LABEL: trunc_ssat_v2i64_v2i16: 810; SSE2-SSSE3: # %bb.0: 811; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648] 812; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm2 813; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm2 814; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 815; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm4 816; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm4 817; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [2147516415,2147516415] 818; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm3 819; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2] 820; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2 821; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 822; SSE2-SSSE3-NEXT: por %xmm2, %xmm3 823; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0 824; SSE2-SSSE3-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 825; SSE2-SSSE3-NEXT: por %xmm0, %xmm3 826; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm1 827; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 828; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm2 829; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm2 830; SSE2-SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 831; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2] 832; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 833; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 834; SSE2-SSSE3-NEXT: por %xmm0, %xmm1 835; SSE2-SSSE3-NEXT: pand %xmm1, %xmm3 836; SSE2-SSSE3-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 837; SSE2-SSSE3-NEXT: por %xmm3, %xmm1 838; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 839; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 840; SSE2-SSSE3-NEXT: retq 841; 842; SSE41-LABEL: trunc_ssat_v2i64_v2i16: 843; SSE41: # %bb.0: 844; SSE41-NEXT: movdqa %xmm0, %xmm1 845; SSE41-NEXT: movapd {{.*#+}} xmm2 = [32767,32767] 846; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648] 847; SSE41-NEXT: pxor %xmm3, %xmm0 848; SSE41-NEXT: pmovzxdq {{.*#+}} xmm4 = [2147516415,2147516415] 849; SSE41-NEXT: movdqa %xmm0, %xmm5 850; SSE41-NEXT: pcmpeqd %xmm4, %xmm5 851; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 852; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] 853; SSE41-NEXT: pand %xmm5, %xmm0 854; SSE41-NEXT: por %xmm4, %xmm0 855; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2 856; SSE41-NEXT: movapd {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] 857; SSE41-NEXT: pxor %xmm2, %xmm3 858; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [18446744071562035200,18446744071562035200] 859; SSE41-NEXT: movdqa %xmm3, %xmm4 860; SSE41-NEXT: pcmpeqd %xmm0, %xmm4 861; SSE41-NEXT: pcmpgtd %xmm0, %xmm3 862; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 863; SSE41-NEXT: pand %xmm4, %xmm0 864; SSE41-NEXT: por %xmm3, %xmm0 865; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1 866; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 867; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 868; SSE41-NEXT: retq 869; 870; AVX1-LABEL: trunc_ssat_v2i64_v2i16: 871; AVX1: # %bb.0: 872; AVX1-NEXT: vpmovsxwq {{.*#+}} xmm1 = [32767,32767] 873; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 874; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 875; AVX1-NEXT: vpmovsxwq {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] 876; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 877; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 878; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3] 879; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 880; AVX1-NEXT: retq 881; 882; AVX2-SLOW-LABEL: trunc_ssat_v2i64_v2i16: 883; AVX2-SLOW: # %bb.0: 884; AVX2-SLOW-NEXT: vpmovsxwq {{.*#+}} xmm1 = [32767,32767] 885; AVX2-SLOW-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 886; AVX2-SLOW-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 887; AVX2-SLOW-NEXT: vpmovsxwq {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] 888; AVX2-SLOW-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 889; AVX2-SLOW-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 890; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3] 891; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 892; AVX2-SLOW-NEXT: retq 893; 894; AVX2-FAST-LABEL: trunc_ssat_v2i64_v2i16: 895; AVX2-FAST: # %bb.0: 896; AVX2-FAST-NEXT: vpmovsxwq {{.*#+}} xmm1 = [32767,32767] 897; AVX2-FAST-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 898; AVX2-FAST-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 899; AVX2-FAST-NEXT: vpmovsxwq {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] 900; AVX2-FAST-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 901; AVX2-FAST-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 902; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15] 903; AVX2-FAST-NEXT: retq 904; 905; AVX512F-LABEL: trunc_ssat_v2i64_v2i16: 906; AVX512F: # %bb.0: 907; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 908; AVX512F-NEXT: vpmovsqw %zmm0, %xmm0 909; AVX512F-NEXT: vzeroupper 910; AVX512F-NEXT: retq 911; 912; AVX512VL-LABEL: trunc_ssat_v2i64_v2i16: 913; AVX512VL: # %bb.0: 914; AVX512VL-NEXT: vpmovsqw %xmm0, %xmm0 915; AVX512VL-NEXT: retq 916; 917; AVX512BW-LABEL: trunc_ssat_v2i64_v2i16: 918; AVX512BW: # %bb.0: 919; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 920; AVX512BW-NEXT: vpmovsqw %zmm0, %xmm0 921; AVX512BW-NEXT: vzeroupper 922; AVX512BW-NEXT: retq 923; 924; AVX512BWVL-LABEL: trunc_ssat_v2i64_v2i16: 925; AVX512BWVL: # %bb.0: 926; AVX512BWVL-NEXT: vpmovsqw %xmm0, %xmm0 927; AVX512BWVL-NEXT: retq 928; 929; SKX-LABEL: trunc_ssat_v2i64_v2i16: 930; SKX: # %bb.0: 931; SKX-NEXT: vpmovsqw %xmm0, %xmm0 932; SKX-NEXT: retq 933 %1 = icmp slt <2 x i64> %a0, <i64 32767, i64 32767> 934 %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 32767, i64 32767> 935 %3 = icmp sgt <2 x i64> %2, <i64 -32768, i64 -32768> 936 %4 = select <2 x i1> %3, <2 x i64> %2, <2 x i64> <i64 -32768, i64 -32768> 937 %5 = trunc <2 x i64> %4 to <2 x i16> 938 ret <2 x i16> %5 939} 940 941define void @trunc_ssat_v2i64_v2i16_store(<2 x i64> %a0, ptr%p1) { 942; SSE2-SSSE3-LABEL: trunc_ssat_v2i64_v2i16_store: 943; SSE2-SSSE3: # %bb.0: 944; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648] 945; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm2 946; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm2 947; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 948; SSE2-SSSE3-NEXT: pxor %xmm4, %xmm4 949; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm4 950; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [2147516415,2147516415] 951; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm3 952; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2] 953; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2 954; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 955; SSE2-SSSE3-NEXT: por %xmm2, %xmm3 956; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0 957; SSE2-SSSE3-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 958; SSE2-SSSE3-NEXT: por %xmm0, %xmm3 959; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm1 960; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 961; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm2 962; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm2 963; SSE2-SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 964; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2] 965; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 966; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 967; SSE2-SSSE3-NEXT: por %xmm0, %xmm1 968; SSE2-SSSE3-NEXT: pand %xmm1, %xmm3 969; SSE2-SSSE3-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 970; SSE2-SSSE3-NEXT: por %xmm3, %xmm1 971; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 972; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 973; SSE2-SSSE3-NEXT: movd %xmm0, (%rdi) 974; SSE2-SSSE3-NEXT: retq 975; 976; SSE41-LABEL: trunc_ssat_v2i64_v2i16_store: 977; SSE41: # %bb.0: 978; SSE41-NEXT: movdqa %xmm0, %xmm1 979; SSE41-NEXT: movapd {{.*#+}} xmm2 = [32767,32767] 980; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648] 981; SSE41-NEXT: pxor %xmm3, %xmm0 982; SSE41-NEXT: pmovzxdq {{.*#+}} xmm4 = [2147516415,2147516415] 983; SSE41-NEXT: movdqa %xmm0, %xmm5 984; SSE41-NEXT: pcmpeqd %xmm4, %xmm5 985; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 986; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] 987; SSE41-NEXT: pand %xmm5, %xmm0 988; SSE41-NEXT: por %xmm4, %xmm0 989; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2 990; SSE41-NEXT: movapd {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] 991; SSE41-NEXT: pxor %xmm2, %xmm3 992; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [18446744071562035200,18446744071562035200] 993; SSE41-NEXT: movdqa %xmm3, %xmm4 994; SSE41-NEXT: pcmpeqd %xmm0, %xmm4 995; SSE41-NEXT: pcmpgtd %xmm0, %xmm3 996; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 997; SSE41-NEXT: pand %xmm4, %xmm0 998; SSE41-NEXT: por %xmm3, %xmm0 999; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1 1000; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 1001; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 1002; SSE41-NEXT: movd %xmm0, (%rdi) 1003; SSE41-NEXT: retq 1004; 1005; AVX1-LABEL: trunc_ssat_v2i64_v2i16_store: 1006; AVX1: # %bb.0: 1007; AVX1-NEXT: vpmovsxwq {{.*#+}} xmm1 = [32767,32767] 1008; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 1009; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1010; AVX1-NEXT: vpmovsxwq {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] 1011; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 1012; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1013; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3] 1014; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 1015; AVX1-NEXT: vmovd %xmm0, (%rdi) 1016; AVX1-NEXT: retq 1017; 1018; AVX2-SLOW-LABEL: trunc_ssat_v2i64_v2i16_store: 1019; AVX2-SLOW: # %bb.0: 1020; AVX2-SLOW-NEXT: vpmovsxwq {{.*#+}} xmm1 = [32767,32767] 1021; AVX2-SLOW-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 1022; AVX2-SLOW-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1023; AVX2-SLOW-NEXT: vpmovsxwq {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] 1024; AVX2-SLOW-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 1025; AVX2-SLOW-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1026; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3] 1027; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 1028; AVX2-SLOW-NEXT: vmovd %xmm0, (%rdi) 1029; AVX2-SLOW-NEXT: retq 1030; 1031; AVX2-FAST-LABEL: trunc_ssat_v2i64_v2i16_store: 1032; AVX2-FAST: # %bb.0: 1033; AVX2-FAST-NEXT: vpmovsxwq {{.*#+}} xmm1 = [32767,32767] 1034; AVX2-FAST-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 1035; AVX2-FAST-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1036; AVX2-FAST-NEXT: vpmovsxwq {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] 1037; AVX2-FAST-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 1038; AVX2-FAST-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1039; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,u,u,u,u,u,u,u,u,u,u,u,u] 1040; AVX2-FAST-NEXT: vmovd %xmm0, (%rdi) 1041; AVX2-FAST-NEXT: retq 1042; 1043; AVX512F-LABEL: trunc_ssat_v2i64_v2i16_store: 1044; AVX512F: # %bb.0: 1045; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1046; AVX512F-NEXT: vpmovsqw %zmm0, %xmm0 1047; AVX512F-NEXT: vmovd %xmm0, (%rdi) 1048; AVX512F-NEXT: vzeroupper 1049; AVX512F-NEXT: retq 1050; 1051; AVX512VL-LABEL: trunc_ssat_v2i64_v2i16_store: 1052; AVX512VL: # %bb.0: 1053; AVX512VL-NEXT: vpmovsqw %xmm0, (%rdi) 1054; AVX512VL-NEXT: retq 1055; 1056; AVX512BW-LABEL: trunc_ssat_v2i64_v2i16_store: 1057; AVX512BW: # %bb.0: 1058; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1059; AVX512BW-NEXT: vpmovsqw %zmm0, %xmm0 1060; AVX512BW-NEXT: vmovd %xmm0, (%rdi) 1061; AVX512BW-NEXT: vzeroupper 1062; AVX512BW-NEXT: retq 1063; 1064; AVX512BWVL-LABEL: trunc_ssat_v2i64_v2i16_store: 1065; AVX512BWVL: # %bb.0: 1066; AVX512BWVL-NEXT: vpmovsqw %xmm0, (%rdi) 1067; AVX512BWVL-NEXT: retq 1068; 1069; SKX-LABEL: trunc_ssat_v2i64_v2i16_store: 1070; SKX: # %bb.0: 1071; SKX-NEXT: vpmovsqw %xmm0, (%rdi) 1072; SKX-NEXT: retq 1073 %1 = icmp slt <2 x i64> %a0, <i64 32767, i64 32767> 1074 %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 32767, i64 32767> 1075 %3 = icmp sgt <2 x i64> %2, <i64 -32768, i64 -32768> 1076 %4 = select <2 x i1> %3, <2 x i64> %2, <2 x i64> <i64 -32768, i64 -32768> 1077 %5 = trunc <2 x i64> %4 to <2 x i16> 1078 store <2 x i16> %5, ptr%p1 1079 ret void 1080} 1081 1082define <4 x i16> @trunc_ssat_v4i64_v4i16(<4 x i64> %a0) { 1083; SSE2-SSSE3-LABEL: trunc_ssat_v4i64_v4i16: 1084; SSE2-SSSE3: # %bb.0: 1085; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [32767,32767] 1086; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 1087; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm4 1088; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm4 1089; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[1,1,3,3] 1090; SSE2-SSSE3-NEXT: pxor %xmm6, %xmm6 1091; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm5 1092; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm7 = [2147516415,2147516415] 1093; SSE2-SSSE3-NEXT: movdqa %xmm7, %xmm8 1094; SSE2-SSSE3-NEXT: pcmpgtd %xmm4, %xmm8 1095; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm8[0,0,2,2] 1096; SSE2-SSSE3-NEXT: pand %xmm5, %xmm4 1097; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm8[1,1,3,3] 1098; SSE2-SSSE3-NEXT: por %xmm4, %xmm5 1099; SSE2-SSSE3-NEXT: pand %xmm5, %xmm0 1100; SSE2-SSSE3-NEXT: pandn %xmm3, %xmm5 1101; SSE2-SSSE3-NEXT: por %xmm5, %xmm0 1102; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm4 1103; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm4 1104; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[1,1,3,3] 1105; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm5 1106; SSE2-SSSE3-NEXT: pcmpgtd %xmm4, %xmm7 1107; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2] 1108; SSE2-SSSE3-NEXT: pand %xmm5, %xmm4 1109; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] 1110; SSE2-SSSE3-NEXT: por %xmm4, %xmm5 1111; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1 1112; SSE2-SSSE3-NEXT: pandn %xmm3, %xmm5 1113; SSE2-SSSE3-NEXT: por %xmm1, %xmm5 1114; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] 1115; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm3 1116; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm3 1117; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 1118; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm6 1119; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm4 1120; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm7 = [18446744071562035200,18446744071562035200] 1121; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm3 1122; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm8 = xmm3[0,0,2,2] 1123; SSE2-SSSE3-NEXT: pand %xmm4, %xmm8 1124; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 1125; SSE2-SSSE3-NEXT: por %xmm8, %xmm3 1126; SSE2-SSSE3-NEXT: pand %xmm3, %xmm5 1127; SSE2-SSSE3-NEXT: pandn %xmm1, %xmm3 1128; SSE2-SSSE3-NEXT: por %xmm5, %xmm3 1129; SSE2-SSSE3-NEXT: pxor %xmm0, %xmm2 1130; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 1131; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm4 1132; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm2 1133; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm2[0,0,2,2] 1134; SSE2-SSSE3-NEXT: pand %xmm4, %xmm5 1135; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1136; SSE2-SSSE3-NEXT: por %xmm5, %xmm2 1137; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 1138; SSE2-SSSE3-NEXT: pandn %xmm1, %xmm2 1139; SSE2-SSSE3-NEXT: por %xmm2, %xmm0 1140; SSE2-SSSE3-NEXT: packssdw %xmm3, %xmm0 1141; SSE2-SSSE3-NEXT: packssdw %xmm0, %xmm0 1142; SSE2-SSSE3-NEXT: retq 1143; 1144; SSE41-LABEL: trunc_ssat_v4i64_v4i16: 1145; SSE41: # %bb.0: 1146; SSE41-NEXT: movdqa %xmm0, %xmm2 1147; SSE41-NEXT: movapd {{.*#+}} xmm4 = [32767,32767] 1148; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648] 1149; SSE41-NEXT: movdqa %xmm0, %xmm5 1150; SSE41-NEXT: pxor %xmm3, %xmm5 1151; SSE41-NEXT: pmovzxdq {{.*#+}} xmm6 = [2147516415,2147516415] 1152; SSE41-NEXT: movdqa %xmm6, %xmm7 1153; SSE41-NEXT: pcmpgtd %xmm5, %xmm7 1154; SSE41-NEXT: pcmpeqd %xmm6, %xmm5 1155; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] 1156; SSE41-NEXT: pand %xmm5, %xmm0 1157; SSE41-NEXT: por %xmm7, %xmm0 1158; SSE41-NEXT: movapd %xmm4, %xmm5 1159; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm5 1160; SSE41-NEXT: movdqa %xmm1, %xmm0 1161; SSE41-NEXT: pxor %xmm3, %xmm0 1162; SSE41-NEXT: movdqa %xmm0, %xmm2 1163; SSE41-NEXT: pcmpeqd %xmm6, %xmm2 1164; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 1165; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] 1166; SSE41-NEXT: pand %xmm2, %xmm0 1167; SSE41-NEXT: por %xmm6, %xmm0 1168; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm4 1169; SSE41-NEXT: movapd {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] 1170; SSE41-NEXT: movapd %xmm4, %xmm2 1171; SSE41-NEXT: xorpd %xmm3, %xmm2 1172; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [18446744071562035200,18446744071562035200] 1173; SSE41-NEXT: movapd %xmm2, %xmm7 1174; SSE41-NEXT: pcmpeqd %xmm6, %xmm7 1175; SSE41-NEXT: pcmpgtd %xmm6, %xmm2 1176; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] 1177; SSE41-NEXT: pand %xmm7, %xmm0 1178; SSE41-NEXT: por %xmm2, %xmm0 1179; SSE41-NEXT: movapd %xmm1, %xmm2 1180; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2 1181; SSE41-NEXT: xorpd %xmm5, %xmm3 1182; SSE41-NEXT: movapd %xmm3, %xmm4 1183; SSE41-NEXT: pcmpeqd %xmm6, %xmm4 1184; SSE41-NEXT: pcmpgtd %xmm6, %xmm3 1185; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 1186; SSE41-NEXT: pand %xmm4, %xmm0 1187; SSE41-NEXT: por %xmm3, %xmm0 1188; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm1 1189; SSE41-NEXT: packssdw %xmm2, %xmm1 1190; SSE41-NEXT: packssdw %xmm1, %xmm1 1191; SSE41-NEXT: movdqa %xmm1, %xmm0 1192; SSE41-NEXT: retq 1193; 1194; AVX1-LABEL: trunc_ssat_v4i64_v4i16: 1195; AVX1: # %bb.0: 1196; AVX1-NEXT: vpmovsxwq {{.*#+}} xmm1 = [32767,32767] 1197; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 1198; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm2 1199; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1200; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm3 1201; AVX1-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0 1202; AVX1-NEXT: vpmovsxwq {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] 1203; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3 1204; AVX1-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0 1205; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm3 1206; AVX1-NEXT: vblendvpd %xmm3, %xmm2, %xmm1, %xmm1 1207; AVX1-NEXT: vpackssdw %xmm0, %xmm1, %xmm0 1208; AVX1-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 1209; AVX1-NEXT: vzeroupper 1210; AVX1-NEXT: retq 1211; 1212; AVX2-LABEL: trunc_ssat_v4i64_v4i16: 1213; AVX2: # %bb.0: 1214; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [32767,32767,32767,32767] 1215; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 1216; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1217; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18446744073709518848,18446744073709518848,18446744073709518848,18446744073709518848] 1218; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 1219; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1220; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 1221; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 1222; AVX2-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 1223; AVX2-NEXT: vzeroupper 1224; AVX2-NEXT: retq 1225; 1226; AVX512F-LABEL: trunc_ssat_v4i64_v4i16: 1227; AVX512F: # %bb.0: 1228; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1229; AVX512F-NEXT: vpmovsqw %zmm0, %xmm0 1230; AVX512F-NEXT: vzeroupper 1231; AVX512F-NEXT: retq 1232; 1233; AVX512VL-LABEL: trunc_ssat_v4i64_v4i16: 1234; AVX512VL: # %bb.0: 1235; AVX512VL-NEXT: vpmovsqw %ymm0, %xmm0 1236; AVX512VL-NEXT: vzeroupper 1237; AVX512VL-NEXT: retq 1238; 1239; AVX512BW-LABEL: trunc_ssat_v4i64_v4i16: 1240; AVX512BW: # %bb.0: 1241; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1242; AVX512BW-NEXT: vpmovsqw %zmm0, %xmm0 1243; AVX512BW-NEXT: vzeroupper 1244; AVX512BW-NEXT: retq 1245; 1246; AVX512BWVL-LABEL: trunc_ssat_v4i64_v4i16: 1247; AVX512BWVL: # %bb.0: 1248; AVX512BWVL-NEXT: vpmovsqw %ymm0, %xmm0 1249; AVX512BWVL-NEXT: vzeroupper 1250; AVX512BWVL-NEXT: retq 1251; 1252; SKX-LABEL: trunc_ssat_v4i64_v4i16: 1253; SKX: # %bb.0: 1254; SKX-NEXT: vpmovsqw %ymm0, %xmm0 1255; SKX-NEXT: vzeroupper 1256; SKX-NEXT: retq 1257 %1 = icmp slt <4 x i64> %a0, <i64 32767, i64 32767, i64 32767, i64 32767> 1258 %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> <i64 32767, i64 32767, i64 32767, i64 32767> 1259 %3 = icmp sgt <4 x i64> %2, <i64 -32768, i64 -32768, i64 -32768, i64 -32768> 1260 %4 = select <4 x i1> %3, <4 x i64> %2, <4 x i64> <i64 -32768, i64 -32768, i64 -32768, i64 -32768> 1261 %5 = trunc <4 x i64> %4 to <4 x i16> 1262 ret <4 x i16> %5 1263} 1264 1265define void @trunc_ssat_v4i64_v4i16_store(<4 x i64> %a0, ptr%p1) { 1266; SSE2-SSSE3-LABEL: trunc_ssat_v4i64_v4i16_store: 1267; SSE2-SSSE3: # %bb.0: 1268; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [32767,32767] 1269; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 1270; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm3 1271; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm3 1272; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm3[1,1,3,3] 1273; SSE2-SSSE3-NEXT: pxor %xmm6, %xmm6 1274; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm5 1275; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm7 = [2147516415,2147516415] 1276; SSE2-SSSE3-NEXT: movdqa %xmm7, %xmm8 1277; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm8 1278; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm8[0,0,2,2] 1279; SSE2-SSSE3-NEXT: pand %xmm5, %xmm9 1280; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm8[1,1,3,3] 1281; SSE2-SSSE3-NEXT: por %xmm9, %xmm3 1282; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0 1283; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm3 1284; SSE2-SSSE3-NEXT: por %xmm0, %xmm3 1285; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0 1286; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm0 1287; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3] 1288; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm5 1289; SSE2-SSSE3-NEXT: pcmpgtd %xmm0, %xmm7 1290; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] 1291; SSE2-SSSE3-NEXT: pand %xmm5, %xmm0 1292; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] 1293; SSE2-SSSE3-NEXT: por %xmm0, %xmm5 1294; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1 1295; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm5 1296; SSE2-SSSE3-NEXT: por %xmm1, %xmm5 1297; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [18446744073709518848,18446744073709518848] 1298; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm1 1299; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm1 1300; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3] 1301; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm6 1302; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm4 1303; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm7 = [18446744071562035200,18446744071562035200] 1304; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm1 1305; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm8 = xmm1[0,0,2,2] 1306; SSE2-SSSE3-NEXT: pand %xmm4, %xmm8 1307; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1308; SSE2-SSSE3-NEXT: por %xmm8, %xmm1 1309; SSE2-SSSE3-NEXT: pand %xmm1, %xmm5 1310; SSE2-SSSE3-NEXT: pandn %xmm0, %xmm1 1311; SSE2-SSSE3-NEXT: por %xmm5, %xmm1 1312; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm2 1313; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 1314; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm4 1315; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm2 1316; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm2[0,0,2,2] 1317; SSE2-SSSE3-NEXT: pand %xmm4, %xmm5 1318; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1319; SSE2-SSSE3-NEXT: por %xmm5, %xmm2 1320; SSE2-SSSE3-NEXT: pand %xmm2, %xmm3 1321; SSE2-SSSE3-NEXT: pandn %xmm0, %xmm2 1322; SSE2-SSSE3-NEXT: por %xmm3, %xmm2 1323; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm2 1324; SSE2-SSSE3-NEXT: packssdw %xmm2, %xmm2 1325; SSE2-SSSE3-NEXT: movq %xmm2, (%rdi) 1326; SSE2-SSSE3-NEXT: retq 1327; 1328; SSE41-LABEL: trunc_ssat_v4i64_v4i16_store: 1329; SSE41: # %bb.0: 1330; SSE41-NEXT: movdqa %xmm0, %xmm2 1331; SSE41-NEXT: movapd {{.*#+}} xmm4 = [32767,32767] 1332; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648] 1333; SSE41-NEXT: movdqa %xmm0, %xmm5 1334; SSE41-NEXT: pxor %xmm3, %xmm5 1335; SSE41-NEXT: pmovzxdq {{.*#+}} xmm6 = [2147516415,2147516415] 1336; SSE41-NEXT: movdqa %xmm6, %xmm7 1337; SSE41-NEXT: pcmpgtd %xmm5, %xmm7 1338; SSE41-NEXT: pcmpeqd %xmm6, %xmm5 1339; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] 1340; SSE41-NEXT: pand %xmm5, %xmm0 1341; SSE41-NEXT: por %xmm7, %xmm0 1342; SSE41-NEXT: movapd %xmm4, %xmm5 1343; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm5 1344; SSE41-NEXT: movdqa %xmm1, %xmm0 1345; SSE41-NEXT: pxor %xmm3, %xmm0 1346; SSE41-NEXT: movdqa %xmm0, %xmm2 1347; SSE41-NEXT: pcmpeqd %xmm6, %xmm2 1348; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 1349; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] 1350; SSE41-NEXT: pand %xmm2, %xmm0 1351; SSE41-NEXT: por %xmm6, %xmm0 1352; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm4 1353; SSE41-NEXT: movapd {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] 1354; SSE41-NEXT: movapd %xmm4, %xmm2 1355; SSE41-NEXT: xorpd %xmm3, %xmm2 1356; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [18446744071562035200,18446744071562035200] 1357; SSE41-NEXT: movapd %xmm2, %xmm7 1358; SSE41-NEXT: pcmpeqd %xmm6, %xmm7 1359; SSE41-NEXT: pcmpgtd %xmm6, %xmm2 1360; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] 1361; SSE41-NEXT: pand %xmm7, %xmm0 1362; SSE41-NEXT: por %xmm2, %xmm0 1363; SSE41-NEXT: movapd %xmm1, %xmm2 1364; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2 1365; SSE41-NEXT: xorpd %xmm5, %xmm3 1366; SSE41-NEXT: movapd %xmm3, %xmm4 1367; SSE41-NEXT: pcmpeqd %xmm6, %xmm4 1368; SSE41-NEXT: pcmpgtd %xmm6, %xmm3 1369; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 1370; SSE41-NEXT: pand %xmm4, %xmm0 1371; SSE41-NEXT: por %xmm3, %xmm0 1372; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm1 1373; SSE41-NEXT: packssdw %xmm2, %xmm1 1374; SSE41-NEXT: packssdw %xmm1, %xmm1 1375; SSE41-NEXT: movq %xmm1, (%rdi) 1376; SSE41-NEXT: retq 1377; 1378; AVX1-LABEL: trunc_ssat_v4i64_v4i16_store: 1379; AVX1: # %bb.0: 1380; AVX1-NEXT: vpmovsxwq {{.*#+}} xmm1 = [32767,32767] 1381; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 1382; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm2 1383; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1384; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm3 1385; AVX1-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0 1386; AVX1-NEXT: vpmovsxwq {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] 1387; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3 1388; AVX1-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0 1389; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm3 1390; AVX1-NEXT: vblendvpd %xmm3, %xmm2, %xmm1, %xmm1 1391; AVX1-NEXT: vpackssdw %xmm0, %xmm1, %xmm0 1392; AVX1-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 1393; AVX1-NEXT: vmovq %xmm0, (%rdi) 1394; AVX1-NEXT: vzeroupper 1395; AVX1-NEXT: retq 1396; 1397; AVX2-LABEL: trunc_ssat_v4i64_v4i16_store: 1398; AVX2: # %bb.0: 1399; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [32767,32767,32767,32767] 1400; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 1401; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1402; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18446744073709518848,18446744073709518848,18446744073709518848,18446744073709518848] 1403; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 1404; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1405; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 1406; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 1407; AVX2-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 1408; AVX2-NEXT: vmovq %xmm0, (%rdi) 1409; AVX2-NEXT: vzeroupper 1410; AVX2-NEXT: retq 1411; 1412; AVX512F-LABEL: trunc_ssat_v4i64_v4i16_store: 1413; AVX512F: # %bb.0: 1414; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1415; AVX512F-NEXT: vpmovsqw %zmm0, %xmm0 1416; AVX512F-NEXT: vmovq %xmm0, (%rdi) 1417; AVX512F-NEXT: vzeroupper 1418; AVX512F-NEXT: retq 1419; 1420; AVX512VL-LABEL: trunc_ssat_v4i64_v4i16_store: 1421; AVX512VL: # %bb.0: 1422; AVX512VL-NEXT: vpmovsqw %ymm0, (%rdi) 1423; AVX512VL-NEXT: vzeroupper 1424; AVX512VL-NEXT: retq 1425; 1426; AVX512BW-LABEL: trunc_ssat_v4i64_v4i16_store: 1427; AVX512BW: # %bb.0: 1428; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1429; AVX512BW-NEXT: vpmovsqw %zmm0, %xmm0 1430; AVX512BW-NEXT: vmovq %xmm0, (%rdi) 1431; AVX512BW-NEXT: vzeroupper 1432; AVX512BW-NEXT: retq 1433; 1434; AVX512BWVL-LABEL: trunc_ssat_v4i64_v4i16_store: 1435; AVX512BWVL: # %bb.0: 1436; AVX512BWVL-NEXT: vpmovsqw %ymm0, (%rdi) 1437; AVX512BWVL-NEXT: vzeroupper 1438; AVX512BWVL-NEXT: retq 1439; 1440; SKX-LABEL: trunc_ssat_v4i64_v4i16_store: 1441; SKX: # %bb.0: 1442; SKX-NEXT: vpmovsqw %ymm0, (%rdi) 1443; SKX-NEXT: vzeroupper 1444; SKX-NEXT: retq 1445 %1 = icmp slt <4 x i64> %a0, <i64 32767, i64 32767, i64 32767, i64 32767> 1446 %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> <i64 32767, i64 32767, i64 32767, i64 32767> 1447 %3 = icmp sgt <4 x i64> %2, <i64 -32768, i64 -32768, i64 -32768, i64 -32768> 1448 %4 = select <4 x i1> %3, <4 x i64> %2, <4 x i64> <i64 -32768, i64 -32768, i64 -32768, i64 -32768> 1449 %5 = trunc <4 x i64> %4 to <4 x i16> 1450 store <4 x i16> %5, ptr%p1 1451 ret void 1452} 1453 1454define <8 x i16> @trunc_ssat_v8i64_v8i16(ptr %p0) "min-legal-vector-width"="256" { 1455; SSE2-SSSE3-LABEL: trunc_ssat_v8i64_v8i16: 1456; SSE2-SSSE3: # %bb.0: 1457; SSE2-SSSE3-NEXT: movdqa (%rdi), %xmm6 1458; SSE2-SSSE3-NEXT: movdqa 16(%rdi), %xmm0 1459; SSE2-SSSE3-NEXT: movdqa 32(%rdi), %xmm3 1460; SSE2-SSSE3-NEXT: movdqa 48(%rdi), %xmm5 1461; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [32767,32767] 1462; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648] 1463; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2 1464; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm2 1465; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm2[1,1,3,3] 1466; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm8 1467; SSE2-SSSE3-NEXT: pcmpeqd %xmm8, %xmm9 1468; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm7 = [2147516415,2147516415] 1469; SSE2-SSSE3-NEXT: movdqa %xmm7, %xmm10 1470; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm10 1471; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2] 1472; SSE2-SSSE3-NEXT: pand %xmm9, %xmm11 1473; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm10[1,1,3,3] 1474; SSE2-SSSE3-NEXT: por %xmm11, %xmm2 1475; SSE2-SSSE3-NEXT: pand %xmm2, %xmm3 1476; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm2 1477; SSE2-SSSE3-NEXT: por %xmm3, %xmm2 1478; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm3 1479; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm3 1480; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm3[1,1,3,3] 1481; SSE2-SSSE3-NEXT: pcmpeqd %xmm8, %xmm9 1482; SSE2-SSSE3-NEXT: movdqa %xmm7, %xmm10 1483; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm10 1484; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2] 1485; SSE2-SSSE3-NEXT: pand %xmm9, %xmm11 1486; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm10[1,1,3,3] 1487; SSE2-SSSE3-NEXT: por %xmm11, %xmm3 1488; SSE2-SSSE3-NEXT: pand %xmm3, %xmm5 1489; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm3 1490; SSE2-SSSE3-NEXT: por %xmm5, %xmm3 1491; SSE2-SSSE3-NEXT: movdqa %xmm6, %xmm5 1492; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm5 1493; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm5[1,1,3,3] 1494; SSE2-SSSE3-NEXT: pcmpeqd %xmm8, %xmm9 1495; SSE2-SSSE3-NEXT: movdqa %xmm7, %xmm10 1496; SSE2-SSSE3-NEXT: pcmpgtd %xmm5, %xmm10 1497; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2] 1498; SSE2-SSSE3-NEXT: pand %xmm9, %xmm11 1499; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm10[1,1,3,3] 1500; SSE2-SSSE3-NEXT: por %xmm11, %xmm5 1501; SSE2-SSSE3-NEXT: pand %xmm5, %xmm6 1502; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm5 1503; SSE2-SSSE3-NEXT: por %xmm6, %xmm5 1504; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm6 1505; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm6 1506; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm6[1,1,3,3] 1507; SSE2-SSSE3-NEXT: pcmpeqd %xmm8, %xmm9 1508; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm7 1509; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm7[0,0,2,2] 1510; SSE2-SSSE3-NEXT: pand %xmm9, %xmm6 1511; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm8 = xmm7[1,1,3,3] 1512; SSE2-SSSE3-NEXT: por %xmm6, %xmm8 1513; SSE2-SSSE3-NEXT: pand %xmm8, %xmm0 1514; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm8 1515; SSE2-SSSE3-NEXT: por %xmm0, %xmm8 1516; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709518848,18446744073709518848] 1517; SSE2-SSSE3-NEXT: movdqa %xmm8, %xmm0 1518; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm0 1519; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm0[1,1,3,3] 1520; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm6 1521; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm9 1522; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm7 = [18446744071562035200,18446744071562035200] 1523; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm0 1524; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm0[0,0,2,2] 1525; SSE2-SSSE3-NEXT: pand %xmm9, %xmm10 1526; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm0[1,1,3,3] 1527; SSE2-SSSE3-NEXT: por %xmm10, %xmm9 1528; SSE2-SSSE3-NEXT: pand %xmm9, %xmm8 1529; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm9 1530; SSE2-SSSE3-NEXT: por %xmm8, %xmm9 1531; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm0 1532; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm0 1533; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm8 = xmm0[1,1,3,3] 1534; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm8 1535; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm0 1536; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm0[0,0,2,2] 1537; SSE2-SSSE3-NEXT: pand %xmm8, %xmm10 1538; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1539; SSE2-SSSE3-NEXT: por %xmm10, %xmm0 1540; SSE2-SSSE3-NEXT: pand %xmm0, %xmm5 1541; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm0 1542; SSE2-SSSE3-NEXT: por %xmm5, %xmm0 1543; SSE2-SSSE3-NEXT: packssdw %xmm9, %xmm0 1544; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm5 1545; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm5 1546; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm8 = xmm5[1,1,3,3] 1547; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm8 1548; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm5 1549; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm5[0,0,2,2] 1550; SSE2-SSSE3-NEXT: pand %xmm8, %xmm9 1551; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] 1552; SSE2-SSSE3-NEXT: por %xmm9, %xmm5 1553; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3 1554; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm5 1555; SSE2-SSSE3-NEXT: por %xmm3, %xmm5 1556; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm1 1557; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] 1558; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm3 1559; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm1 1560; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm1[0,0,2,2] 1561; SSE2-SSSE3-NEXT: pand %xmm3, %xmm6 1562; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1563; SSE2-SSSE3-NEXT: por %xmm6, %xmm1 1564; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2 1565; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm1 1566; SSE2-SSSE3-NEXT: por %xmm2, %xmm1 1567; SSE2-SSSE3-NEXT: packssdw %xmm5, %xmm1 1568; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0 1569; SSE2-SSSE3-NEXT: retq 1570; 1571; SSE41-LABEL: trunc_ssat_v8i64_v8i16: 1572; SSE41: # %bb.0: 1573; SSE41-NEXT: movdqa (%rdi), %xmm7 1574; SSE41-NEXT: movdqa 16(%rdi), %xmm5 1575; SSE41-NEXT: movdqa 32(%rdi), %xmm4 1576; SSE41-NEXT: movdqa 48(%rdi), %xmm8 1577; SSE41-NEXT: movapd {{.*#+}} xmm1 = [32767,32767] 1578; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = [2147483648,2147483648] 1579; SSE41-NEXT: movdqa %xmm4, %xmm3 1580; SSE41-NEXT: pxor %xmm2, %xmm3 1581; SSE41-NEXT: pmovzxdq {{.*#+}} xmm6 = [2147516415,2147516415] 1582; SSE41-NEXT: movdqa %xmm6, %xmm9 1583; SSE41-NEXT: pcmpgtd %xmm3, %xmm9 1584; SSE41-NEXT: pcmpeqd %xmm6, %xmm3 1585; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] 1586; SSE41-NEXT: pand %xmm3, %xmm0 1587; SSE41-NEXT: por %xmm9, %xmm0 1588; SSE41-NEXT: movapd %xmm1, %xmm3 1589; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm3 1590; SSE41-NEXT: movdqa %xmm8, %xmm4 1591; SSE41-NEXT: pxor %xmm2, %xmm4 1592; SSE41-NEXT: movdqa %xmm6, %xmm9 1593; SSE41-NEXT: pcmpgtd %xmm4, %xmm9 1594; SSE41-NEXT: pcmpeqd %xmm6, %xmm4 1595; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] 1596; SSE41-NEXT: pand %xmm4, %xmm0 1597; SSE41-NEXT: por %xmm9, %xmm0 1598; SSE41-NEXT: movapd %xmm1, %xmm4 1599; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm4 1600; SSE41-NEXT: movdqa %xmm7, %xmm8 1601; SSE41-NEXT: pxor %xmm2, %xmm8 1602; SSE41-NEXT: movdqa %xmm6, %xmm9 1603; SSE41-NEXT: pcmpgtd %xmm8, %xmm9 1604; SSE41-NEXT: pcmpeqd %xmm6, %xmm8 1605; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] 1606; SSE41-NEXT: pand %xmm8, %xmm0 1607; SSE41-NEXT: por %xmm9, %xmm0 1608; SSE41-NEXT: movapd %xmm1, %xmm8 1609; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm8 1610; SSE41-NEXT: movdqa %xmm5, %xmm0 1611; SSE41-NEXT: pxor %xmm2, %xmm0 1612; SSE41-NEXT: movdqa %xmm0, %xmm7 1613; SSE41-NEXT: pcmpeqd %xmm6, %xmm7 1614; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 1615; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] 1616; SSE41-NEXT: pand %xmm7, %xmm0 1617; SSE41-NEXT: por %xmm6, %xmm0 1618; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm1 1619; SSE41-NEXT: movapd {{.*#+}} xmm5 = [18446744073709518848,18446744073709518848] 1620; SSE41-NEXT: movapd %xmm1, %xmm7 1621; SSE41-NEXT: xorpd %xmm2, %xmm7 1622; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [18446744071562035200,18446744071562035200] 1623; SSE41-NEXT: movapd %xmm7, %xmm9 1624; SSE41-NEXT: pcmpeqd %xmm6, %xmm9 1625; SSE41-NEXT: pcmpgtd %xmm6, %xmm7 1626; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] 1627; SSE41-NEXT: pand %xmm9, %xmm0 1628; SSE41-NEXT: por %xmm7, %xmm0 1629; SSE41-NEXT: movapd %xmm5, %xmm7 1630; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm7 1631; SSE41-NEXT: movapd %xmm8, %xmm1 1632; SSE41-NEXT: xorpd %xmm2, %xmm1 1633; SSE41-NEXT: movapd %xmm1, %xmm9 1634; SSE41-NEXT: pcmpeqd %xmm6, %xmm9 1635; SSE41-NEXT: pcmpgtd %xmm6, %xmm1 1636; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2] 1637; SSE41-NEXT: pand %xmm9, %xmm0 1638; SSE41-NEXT: por %xmm1, %xmm0 1639; SSE41-NEXT: movapd %xmm5, %xmm1 1640; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm1 1641; SSE41-NEXT: packssdw %xmm7, %xmm1 1642; SSE41-NEXT: movapd %xmm4, %xmm7 1643; SSE41-NEXT: xorpd %xmm2, %xmm7 1644; SSE41-NEXT: movapd %xmm7, %xmm8 1645; SSE41-NEXT: pcmpeqd %xmm6, %xmm8 1646; SSE41-NEXT: pcmpgtd %xmm6, %xmm7 1647; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] 1648; SSE41-NEXT: pand %xmm8, %xmm0 1649; SSE41-NEXT: por %xmm7, %xmm0 1650; SSE41-NEXT: movapd %xmm5, %xmm7 1651; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm7 1652; SSE41-NEXT: xorpd %xmm3, %xmm2 1653; SSE41-NEXT: movapd %xmm2, %xmm4 1654; SSE41-NEXT: pcmpeqd %xmm6, %xmm4 1655; SSE41-NEXT: pcmpgtd %xmm6, %xmm2 1656; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] 1657; SSE41-NEXT: pand %xmm4, %xmm0 1658; SSE41-NEXT: por %xmm2, %xmm0 1659; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm5 1660; SSE41-NEXT: packssdw %xmm7, %xmm5 1661; SSE41-NEXT: packssdw %xmm5, %xmm1 1662; SSE41-NEXT: movdqa %xmm1, %xmm0 1663; SSE41-NEXT: retq 1664; 1665; AVX1-LABEL: trunc_ssat_v8i64_v8i16: 1666; AVX1: # %bb.0: 1667; AVX1-NEXT: vmovdqa (%rdi), %xmm0 1668; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1 1669; AVX1-NEXT: vmovdqa 32(%rdi), %xmm2 1670; AVX1-NEXT: vmovdqa 48(%rdi), %xmm3 1671; AVX1-NEXT: vpmovsxwq {{.*#+}} xmm4 = [32767,32767] 1672; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm5 1673; AVX1-NEXT: vblendvpd %xmm5, %xmm2, %xmm4, %xmm2 1674; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm5 1675; AVX1-NEXT: vblendvpd %xmm5, %xmm3, %xmm4, %xmm3 1676; AVX1-NEXT: vpcmpgtq %xmm0, %xmm4, %xmm5 1677; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm4, %xmm0 1678; AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm5 1679; AVX1-NEXT: vblendvpd %xmm5, %xmm1, %xmm4, %xmm1 1680; AVX1-NEXT: vpmovsxwq {{.*#+}} xmm4 = [18446744073709518848,18446744073709518848] 1681; AVX1-NEXT: vpcmpgtq %xmm4, %xmm1, %xmm5 1682; AVX1-NEXT: vblendvpd %xmm5, %xmm1, %xmm4, %xmm1 1683; AVX1-NEXT: vpcmpgtq %xmm4, %xmm0, %xmm5 1684; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm4, %xmm0 1685; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 1686; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm1 1687; AVX1-NEXT: vblendvpd %xmm1, %xmm3, %xmm4, %xmm1 1688; AVX1-NEXT: vpcmpgtq %xmm4, %xmm2, %xmm3 1689; AVX1-NEXT: vblendvpd %xmm3, %xmm2, %xmm4, %xmm2 1690; AVX1-NEXT: vpackssdw %xmm1, %xmm2, %xmm1 1691; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 1692; AVX1-NEXT: retq 1693; 1694; AVX2-LABEL: trunc_ssat_v8i64_v8i16: 1695; AVX2: # %bb.0: 1696; AVX2-NEXT: vmovdqa (%rdi), %ymm0 1697; AVX2-NEXT: vmovdqa 32(%rdi), %ymm1 1698; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [32767,32767,32767,32767] 1699; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3 1700; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0 1701; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3 1702; AVX2-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1 1703; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [18446744073709518848,18446744073709518848,18446744073709518848,18446744073709518848] 1704; AVX2-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm3 1705; AVX2-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1 1706; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm3 1707; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0 1708; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 1709; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1710; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 1711; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 1712; AVX2-NEXT: vzeroupper 1713; AVX2-NEXT: retq 1714; 1715; AVX512-LABEL: trunc_ssat_v8i64_v8i16: 1716; AVX512: # %bb.0: 1717; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 1718; AVX512-NEXT: vpmovsqw %zmm0, %xmm0 1719; AVX512-NEXT: vzeroupper 1720; AVX512-NEXT: retq 1721; 1722; SKX-LABEL: trunc_ssat_v8i64_v8i16: 1723; SKX: # %bb.0: 1724; SKX-NEXT: vmovdqa (%rdi), %ymm0 1725; SKX-NEXT: vmovdqa 32(%rdi), %ymm1 1726; SKX-NEXT: vpmovsqw %ymm1, %xmm1 1727; SKX-NEXT: vpmovsqw %ymm0, %xmm0 1728; SKX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1729; SKX-NEXT: vzeroupper 1730; SKX-NEXT: retq 1731 %a0 = load <8 x i64>, ptr %p0 1732 %1 = icmp slt <8 x i64> %a0, <i64 32767, i64 32767, i64 32767, i64 32767, i64 32767, i64 32767, i64 32767, i64 32767> 1733 %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> <i64 32767, i64 32767, i64 32767, i64 32767, i64 32767, i64 32767, i64 32767, i64 32767> 1734 %3 = icmp sgt <8 x i64> %2, <i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768> 1735 %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> <i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768> 1736 %5 = trunc <8 x i64> %4 to <8 x i16> 1737 ret <8 x i16> %5 1738} 1739 1740define <4 x i16> @trunc_ssat_v4i32_v4i16(<4 x i32> %a0) { 1741; SSE-LABEL: trunc_ssat_v4i32_v4i16: 1742; SSE: # %bb.0: 1743; SSE-NEXT: packssdw %xmm0, %xmm0 1744; SSE-NEXT: retq 1745; 1746; AVX-LABEL: trunc_ssat_v4i32_v4i16: 1747; AVX: # %bb.0: 1748; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 1749; AVX-NEXT: retq 1750; 1751; AVX512-LABEL: trunc_ssat_v4i32_v4i16: 1752; AVX512: # %bb.0: 1753; AVX512-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 1754; AVX512-NEXT: retq 1755; 1756; SKX-LABEL: trunc_ssat_v4i32_v4i16: 1757; SKX: # %bb.0: 1758; SKX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 1759; SKX-NEXT: retq 1760 %1 = icmp slt <4 x i32> %a0, <i32 32767, i32 32767, i32 32767, i32 32767> 1761 %2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767> 1762 %3 = icmp sgt <4 x i32> %2, <i32 -32768, i32 -32768, i32 -32768, i32 -32768> 1763 %4 = select <4 x i1> %3, <4 x i32> %2, <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768> 1764 %5 = trunc <4 x i32> %4 to <4 x i16> 1765 ret <4 x i16> %5 1766} 1767 1768define void @trunc_ssat_v4i32_v4i16_store(<4 x i32> %a0, ptr%p1) { 1769; SSE-LABEL: trunc_ssat_v4i32_v4i16_store: 1770; SSE: # %bb.0: 1771; SSE-NEXT: packssdw %xmm0, %xmm0 1772; SSE-NEXT: movq %xmm0, (%rdi) 1773; SSE-NEXT: retq 1774; 1775; AVX-LABEL: trunc_ssat_v4i32_v4i16_store: 1776; AVX: # %bb.0: 1777; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 1778; AVX-NEXT: vmovq %xmm0, (%rdi) 1779; AVX-NEXT: retq 1780; 1781; AVX512F-LABEL: trunc_ssat_v4i32_v4i16_store: 1782; AVX512F: # %bb.0: 1783; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 1784; AVX512F-NEXT: vmovq %xmm0, (%rdi) 1785; AVX512F-NEXT: retq 1786; 1787; AVX512VL-LABEL: trunc_ssat_v4i32_v4i16_store: 1788; AVX512VL: # %bb.0: 1789; AVX512VL-NEXT: vpmovsdw %xmm0, (%rdi) 1790; AVX512VL-NEXT: retq 1791; 1792; AVX512BW-LABEL: trunc_ssat_v4i32_v4i16_store: 1793; AVX512BW: # %bb.0: 1794; AVX512BW-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 1795; AVX512BW-NEXT: vmovq %xmm0, (%rdi) 1796; AVX512BW-NEXT: retq 1797; 1798; AVX512BWVL-LABEL: trunc_ssat_v4i32_v4i16_store: 1799; AVX512BWVL: # %bb.0: 1800; AVX512BWVL-NEXT: vpmovsdw %xmm0, (%rdi) 1801; AVX512BWVL-NEXT: retq 1802; 1803; SKX-LABEL: trunc_ssat_v4i32_v4i16_store: 1804; SKX: # %bb.0: 1805; SKX-NEXT: vpmovsdw %xmm0, (%rdi) 1806; SKX-NEXT: retq 1807 %1 = icmp slt <4 x i32> %a0, <i32 32767, i32 32767, i32 32767, i32 32767> 1808 %2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767> 1809 %3 = icmp sgt <4 x i32> %2, <i32 -32768, i32 -32768, i32 -32768, i32 -32768> 1810 %4 = select <4 x i1> %3, <4 x i32> %2, <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768> 1811 %5 = trunc <4 x i32> %4 to <4 x i16> 1812 store <4 x i16> %5, ptr%p1 1813 ret void 1814} 1815 1816define <8 x i16> @trunc_ssat_v8i32_v8i16(<8 x i32> %a0) { 1817; SSE-LABEL: trunc_ssat_v8i32_v8i16: 1818; SSE: # %bb.0: 1819; SSE-NEXT: packssdw %xmm1, %xmm0 1820; SSE-NEXT: retq 1821; 1822; AVX1-LABEL: trunc_ssat_v8i32_v8i16: 1823; AVX1: # %bb.0: 1824; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1825; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 1826; AVX1-NEXT: vzeroupper 1827; AVX1-NEXT: retq 1828; 1829; AVX2-LABEL: trunc_ssat_v8i32_v8i16: 1830; AVX2: # %bb.0: 1831; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1832; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 1833; AVX2-NEXT: vzeroupper 1834; AVX2-NEXT: retq 1835; 1836; AVX512F-LABEL: trunc_ssat_v8i32_v8i16: 1837; AVX512F: # %bb.0: 1838; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 1839; AVX512F-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 1840; AVX512F-NEXT: vzeroupper 1841; AVX512F-NEXT: retq 1842; 1843; AVX512VL-LABEL: trunc_ssat_v8i32_v8i16: 1844; AVX512VL: # %bb.0: 1845; AVX512VL-NEXT: vpmovsdw %ymm0, %xmm0 1846; AVX512VL-NEXT: vzeroupper 1847; AVX512VL-NEXT: retq 1848; 1849; AVX512BW-LABEL: trunc_ssat_v8i32_v8i16: 1850; AVX512BW: # %bb.0: 1851; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1 1852; AVX512BW-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 1853; AVX512BW-NEXT: vzeroupper 1854; AVX512BW-NEXT: retq 1855; 1856; AVX512BWVL-LABEL: trunc_ssat_v8i32_v8i16: 1857; AVX512BWVL: # %bb.0: 1858; AVX512BWVL-NEXT: vpmovsdw %ymm0, %xmm0 1859; AVX512BWVL-NEXT: vzeroupper 1860; AVX512BWVL-NEXT: retq 1861; 1862; SKX-LABEL: trunc_ssat_v8i32_v8i16: 1863; SKX: # %bb.0: 1864; SKX-NEXT: vpmovsdw %ymm0, %xmm0 1865; SKX-NEXT: vzeroupper 1866; SKX-NEXT: retq 1867 %1 = icmp slt <8 x i32> %a0, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767> 1868 %2 = select <8 x i1> %1, <8 x i32> %a0, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767> 1869 %3 = icmp sgt <8 x i32> %2, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768> 1870 %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768> 1871 %5 = trunc <8 x i32> %4 to <8 x i16> 1872 ret <8 x i16> %5 1873} 1874 1875define <16 x i16> @trunc_ssat_v16i32_v16i16(ptr %p0) "min-legal-vector-width"="256" { 1876; SSE-LABEL: trunc_ssat_v16i32_v16i16: 1877; SSE: # %bb.0: 1878; SSE-NEXT: movdqa (%rdi), %xmm0 1879; SSE-NEXT: movdqa 32(%rdi), %xmm1 1880; SSE-NEXT: packssdw 16(%rdi), %xmm0 1881; SSE-NEXT: packssdw 48(%rdi), %xmm1 1882; SSE-NEXT: retq 1883; 1884; AVX1-LABEL: trunc_ssat_v16i32_v16i16: 1885; AVX1: # %bb.0: 1886; AVX1-NEXT: vmovdqa (%rdi), %xmm0 1887; AVX1-NEXT: vmovdqa 32(%rdi), %xmm1 1888; AVX1-NEXT: vpackssdw 48(%rdi), %xmm1, %xmm1 1889; AVX1-NEXT: vpackssdw 16(%rdi), %xmm0, %xmm0 1890; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1891; AVX1-NEXT: retq 1892; 1893; AVX2-LABEL: trunc_ssat_v16i32_v16i16: 1894; AVX2: # %bb.0: 1895; AVX2-NEXT: vmovdqa (%rdi), %ymm0 1896; AVX2-NEXT: vpackssdw 32(%rdi), %ymm0, %ymm0 1897; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 1898; AVX2-NEXT: retq 1899; 1900; AVX512-LABEL: trunc_ssat_v16i32_v16i16: 1901; AVX512: # %bb.0: 1902; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 1903; AVX512-NEXT: vpmovsdw %zmm0, %ymm0 1904; AVX512-NEXT: retq 1905; 1906; SKX-LABEL: trunc_ssat_v16i32_v16i16: 1907; SKX: # %bb.0: 1908; SKX-NEXT: vmovdqa (%rdi), %ymm0 1909; SKX-NEXT: vpackssdw 32(%rdi), %ymm0, %ymm0 1910; SKX-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 1911; SKX-NEXT: retq 1912 %a0 = load <16 x i32>, ptr %p0 1913 %1 = icmp slt <16 x i32> %a0, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767> 1914 %2 = select <16 x i1> %1, <16 x i32> %a0, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767> 1915 %3 = icmp sgt <16 x i32> %2, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768> 1916 %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768> 1917 %5 = trunc <16 x i32> %4 to <16 x i16> 1918 ret <16 x i16> %5 1919} 1920 1921; 1922; Signed saturation truncation to vXi8 1923; 1924 1925define <2 x i8> @trunc_ssat_v2i64_v2i8(<2 x i64> %a0) { 1926; SSE2-LABEL: trunc_ssat_v2i64_v2i8: 1927; SSE2: # %bb.0: 1928; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648] 1929; SSE2-NEXT: movdqa %xmm0, %xmm2 1930; SSE2-NEXT: pxor %xmm1, %xmm2 1931; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 1932; SSE2-NEXT: pxor %xmm4, %xmm4 1933; SSE2-NEXT: pcmpeqd %xmm3, %xmm4 1934; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483775,2147483775] 1935; SSE2-NEXT: pcmpgtd %xmm2, %xmm3 1936; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2] 1937; SSE2-NEXT: pand %xmm4, %xmm2 1938; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 1939; SSE2-NEXT: por %xmm2, %xmm3 1940; SSE2-NEXT: pand %xmm3, %xmm0 1941; SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 1942; SSE2-NEXT: por %xmm3, %xmm0 1943; SSE2-NEXT: pxor %xmm0, %xmm1 1944; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1945; SSE2-NEXT: pcmpeqd %xmm3, %xmm3 1946; SSE2-NEXT: pcmpeqd %xmm2, %xmm3 1947; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1948; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,0,2,2] 1949; SSE2-NEXT: pand %xmm3, %xmm2 1950; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1951; SSE2-NEXT: por %xmm2, %xmm1 1952; SSE2-NEXT: pand %xmm1, %xmm0 1953; SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1954; SSE2-NEXT: por %xmm1, %xmm0 1955; SSE2-NEXT: packssdw %xmm0, %xmm0 1956; SSE2-NEXT: packssdw %xmm0, %xmm0 1957; SSE2-NEXT: packsswb %xmm0, %xmm0 1958; SSE2-NEXT: retq 1959; 1960; SSSE3-LABEL: trunc_ssat_v2i64_v2i8: 1961; SSSE3: # %bb.0: 1962; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648] 1963; SSSE3-NEXT: movdqa %xmm0, %xmm2 1964; SSSE3-NEXT: pxor %xmm1, %xmm2 1965; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 1966; SSSE3-NEXT: pxor %xmm4, %xmm4 1967; SSSE3-NEXT: pcmpeqd %xmm3, %xmm4 1968; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [2147483775,2147483775] 1969; SSSE3-NEXT: pcmpgtd %xmm2, %xmm3 1970; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2] 1971; SSSE3-NEXT: pand %xmm4, %xmm2 1972; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 1973; SSSE3-NEXT: por %xmm2, %xmm3 1974; SSSE3-NEXT: pand %xmm3, %xmm0 1975; SSSE3-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 1976; SSSE3-NEXT: por %xmm3, %xmm0 1977; SSSE3-NEXT: pxor %xmm0, %xmm1 1978; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1979; SSSE3-NEXT: pcmpeqd %xmm3, %xmm3 1980; SSSE3-NEXT: pcmpeqd %xmm2, %xmm3 1981; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1982; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,0,2,2] 1983; SSSE3-NEXT: pand %xmm3, %xmm2 1984; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1985; SSSE3-NEXT: por %xmm2, %xmm1 1986; SSSE3-NEXT: pand %xmm1, %xmm0 1987; SSSE3-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1988; SSSE3-NEXT: por %xmm1, %xmm0 1989; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 1990; SSSE3-NEXT: retq 1991; 1992; SSE41-LABEL: trunc_ssat_v2i64_v2i8: 1993; SSE41: # %bb.0: 1994; SSE41-NEXT: movdqa %xmm0, %xmm1 1995; SSE41-NEXT: movapd {{.*#+}} xmm2 = [127,127] 1996; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648] 1997; SSE41-NEXT: pxor %xmm3, %xmm0 1998; SSE41-NEXT: pmovzxdq {{.*#+}} xmm4 = [2147483775,2147483775] 1999; SSE41-NEXT: movdqa %xmm0, %xmm5 2000; SSE41-NEXT: pcmpeqd %xmm4, %xmm5 2001; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 2002; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] 2003; SSE41-NEXT: pand %xmm5, %xmm0 2004; SSE41-NEXT: por %xmm4, %xmm0 2005; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2 2006; SSE41-NEXT: movapd {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488] 2007; SSE41-NEXT: pxor %xmm2, %xmm3 2008; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [18446744071562067840,18446744071562067840] 2009; SSE41-NEXT: movdqa %xmm3, %xmm4 2010; SSE41-NEXT: pcmpeqd %xmm0, %xmm4 2011; SSE41-NEXT: pcmpgtd %xmm0, %xmm3 2012; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 2013; SSE41-NEXT: pand %xmm4, %xmm0 2014; SSE41-NEXT: por %xmm3, %xmm0 2015; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1 2016; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 2017; SSE41-NEXT: movdqa %xmm1, %xmm0 2018; SSE41-NEXT: retq 2019; 2020; AVX-LABEL: trunc_ssat_v2i64_v2i8: 2021; AVX: # %bb.0: 2022; AVX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [127,127] 2023; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 2024; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 2025; AVX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488] 2026; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 2027; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 2028; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 2029; AVX-NEXT: retq 2030; 2031; AVX512F-LABEL: trunc_ssat_v2i64_v2i8: 2032; AVX512F: # %bb.0: 2033; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2034; AVX512F-NEXT: vpmovsqb %zmm0, %xmm0 2035; AVX512F-NEXT: vzeroupper 2036; AVX512F-NEXT: retq 2037; 2038; AVX512VL-LABEL: trunc_ssat_v2i64_v2i8: 2039; AVX512VL: # %bb.0: 2040; AVX512VL-NEXT: vpmovsqb %xmm0, %xmm0 2041; AVX512VL-NEXT: retq 2042; 2043; AVX512BW-LABEL: trunc_ssat_v2i64_v2i8: 2044; AVX512BW: # %bb.0: 2045; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2046; AVX512BW-NEXT: vpmovsqb %zmm0, %xmm0 2047; AVX512BW-NEXT: vzeroupper 2048; AVX512BW-NEXT: retq 2049; 2050; AVX512BWVL-LABEL: trunc_ssat_v2i64_v2i8: 2051; AVX512BWVL: # %bb.0: 2052; AVX512BWVL-NEXT: vpmovsqb %xmm0, %xmm0 2053; AVX512BWVL-NEXT: retq 2054; 2055; SKX-LABEL: trunc_ssat_v2i64_v2i8: 2056; SKX: # %bb.0: 2057; SKX-NEXT: vpmovsqb %xmm0, %xmm0 2058; SKX-NEXT: retq 2059 %1 = icmp slt <2 x i64> %a0, <i64 127, i64 127> 2060 %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 127, i64 127> 2061 %3 = icmp sgt <2 x i64> %2, <i64 -128, i64 -128> 2062 %4 = select <2 x i1> %3, <2 x i64> %2, <2 x i64> <i64 -128, i64 -128> 2063 %5 = trunc <2 x i64> %4 to <2 x i8> 2064 ret <2 x i8> %5 2065} 2066 2067define void @trunc_ssat_v2i64_v2i8_store(<2 x i64> %a0, ptr%p1) { 2068; SSE2-LABEL: trunc_ssat_v2i64_v2i8_store: 2069; SSE2: # %bb.0: 2070; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648] 2071; SSE2-NEXT: movdqa %xmm0, %xmm2 2072; SSE2-NEXT: pxor %xmm1, %xmm2 2073; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 2074; SSE2-NEXT: pxor %xmm4, %xmm4 2075; SSE2-NEXT: pcmpeqd %xmm3, %xmm4 2076; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483775,2147483775] 2077; SSE2-NEXT: pcmpgtd %xmm2, %xmm3 2078; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2] 2079; SSE2-NEXT: pand %xmm4, %xmm2 2080; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 2081; SSE2-NEXT: por %xmm2, %xmm3 2082; SSE2-NEXT: pand %xmm3, %xmm0 2083; SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 2084; SSE2-NEXT: por %xmm0, %xmm3 2085; SSE2-NEXT: pxor %xmm3, %xmm1 2086; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 2087; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 2088; SSE2-NEXT: pcmpeqd %xmm0, %xmm2 2089; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 2090; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2] 2091; SSE2-NEXT: pand %xmm2, %xmm0 2092; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2093; SSE2-NEXT: por %xmm0, %xmm1 2094; SSE2-NEXT: pand %xmm1, %xmm3 2095; SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 2096; SSE2-NEXT: por %xmm3, %xmm1 2097; SSE2-NEXT: packssdw %xmm1, %xmm1 2098; SSE2-NEXT: packssdw %xmm1, %xmm1 2099; SSE2-NEXT: packsswb %xmm1, %xmm1 2100; SSE2-NEXT: movd %xmm1, %eax 2101; SSE2-NEXT: movw %ax, (%rdi) 2102; SSE2-NEXT: retq 2103; 2104; SSSE3-LABEL: trunc_ssat_v2i64_v2i8_store: 2105; SSSE3: # %bb.0: 2106; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648] 2107; SSSE3-NEXT: movdqa %xmm0, %xmm2 2108; SSSE3-NEXT: pxor %xmm1, %xmm2 2109; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 2110; SSSE3-NEXT: pxor %xmm4, %xmm4 2111; SSSE3-NEXT: pcmpeqd %xmm3, %xmm4 2112; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [2147483775,2147483775] 2113; SSSE3-NEXT: pcmpgtd %xmm2, %xmm3 2114; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2] 2115; SSSE3-NEXT: pand %xmm4, %xmm2 2116; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 2117; SSSE3-NEXT: por %xmm2, %xmm3 2118; SSSE3-NEXT: pand %xmm3, %xmm0 2119; SSSE3-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 2120; SSSE3-NEXT: por %xmm0, %xmm3 2121; SSSE3-NEXT: pxor %xmm3, %xmm1 2122; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 2123; SSSE3-NEXT: pcmpeqd %xmm2, %xmm2 2124; SSSE3-NEXT: pcmpeqd %xmm0, %xmm2 2125; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 2126; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2] 2127; SSSE3-NEXT: pand %xmm2, %xmm0 2128; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2129; SSSE3-NEXT: por %xmm0, %xmm1 2130; SSSE3-NEXT: pand %xmm1, %xmm3 2131; SSSE3-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 2132; SSSE3-NEXT: por %xmm3, %xmm1 2133; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 2134; SSSE3-NEXT: movd %xmm1, %eax 2135; SSSE3-NEXT: movw %ax, (%rdi) 2136; SSSE3-NEXT: retq 2137; 2138; SSE41-LABEL: trunc_ssat_v2i64_v2i8_store: 2139; SSE41: # %bb.0: 2140; SSE41-NEXT: movdqa %xmm0, %xmm1 2141; SSE41-NEXT: movapd {{.*#+}} xmm2 = [127,127] 2142; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648] 2143; SSE41-NEXT: pxor %xmm3, %xmm0 2144; SSE41-NEXT: pmovzxdq {{.*#+}} xmm4 = [2147483775,2147483775] 2145; SSE41-NEXT: movdqa %xmm0, %xmm5 2146; SSE41-NEXT: pcmpeqd %xmm4, %xmm5 2147; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 2148; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] 2149; SSE41-NEXT: pand %xmm5, %xmm0 2150; SSE41-NEXT: por %xmm4, %xmm0 2151; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2 2152; SSE41-NEXT: movapd {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488] 2153; SSE41-NEXT: pxor %xmm2, %xmm3 2154; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [18446744071562067840,18446744071562067840] 2155; SSE41-NEXT: movdqa %xmm3, %xmm4 2156; SSE41-NEXT: pcmpeqd %xmm0, %xmm4 2157; SSE41-NEXT: pcmpgtd %xmm0, %xmm3 2158; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 2159; SSE41-NEXT: pand %xmm4, %xmm0 2160; SSE41-NEXT: por %xmm3, %xmm0 2161; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1 2162; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 2163; SSE41-NEXT: pextrw $0, %xmm1, (%rdi) 2164; SSE41-NEXT: retq 2165; 2166; AVX-LABEL: trunc_ssat_v2i64_v2i8_store: 2167; AVX: # %bb.0: 2168; AVX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [127,127] 2169; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 2170; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 2171; AVX-NEXT: vpmovsxbq {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488] 2172; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 2173; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 2174; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 2175; AVX-NEXT: vpextrw $0, %xmm0, (%rdi) 2176; AVX-NEXT: retq 2177; 2178; AVX512F-LABEL: trunc_ssat_v2i64_v2i8_store: 2179; AVX512F: # %bb.0: 2180; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2181; AVX512F-NEXT: vpmovsqb %zmm0, %xmm0 2182; AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi) 2183; AVX512F-NEXT: vzeroupper 2184; AVX512F-NEXT: retq 2185; 2186; AVX512VL-LABEL: trunc_ssat_v2i64_v2i8_store: 2187; AVX512VL: # %bb.0: 2188; AVX512VL-NEXT: vpmovsqb %xmm0, (%rdi) 2189; AVX512VL-NEXT: retq 2190; 2191; AVX512BW-LABEL: trunc_ssat_v2i64_v2i8_store: 2192; AVX512BW: # %bb.0: 2193; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2194; AVX512BW-NEXT: vpmovsqb %zmm0, %xmm0 2195; AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi) 2196; AVX512BW-NEXT: vzeroupper 2197; AVX512BW-NEXT: retq 2198; 2199; AVX512BWVL-LABEL: trunc_ssat_v2i64_v2i8_store: 2200; AVX512BWVL: # %bb.0: 2201; AVX512BWVL-NEXT: vpmovsqb %xmm0, (%rdi) 2202; AVX512BWVL-NEXT: retq 2203; 2204; SKX-LABEL: trunc_ssat_v2i64_v2i8_store: 2205; SKX: # %bb.0: 2206; SKX-NEXT: vpmovsqb %xmm0, (%rdi) 2207; SKX-NEXT: retq 2208 %1 = icmp slt <2 x i64> %a0, <i64 127, i64 127> 2209 %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 127, i64 127> 2210 %3 = icmp sgt <2 x i64> %2, <i64 -128, i64 -128> 2211 %4 = select <2 x i1> %3, <2 x i64> %2, <2 x i64> <i64 -128, i64 -128> 2212 %5 = trunc <2 x i64> %4 to <2 x i8> 2213 store <2 x i8> %5, ptr%p1 2214 ret void 2215} 2216 2217define <4 x i8> @trunc_ssat_v4i64_v4i8(<4 x i64> %a0) { 2218; SSE2-SSSE3-LABEL: trunc_ssat_v4i64_v4i8: 2219; SSE2-SSSE3: # %bb.0: 2220; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [127,127] 2221; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 2222; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm4 2223; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm4 2224; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[1,1,3,3] 2225; SSE2-SSSE3-NEXT: pxor %xmm6, %xmm6 2226; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm5 2227; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm7 = [2147483775,2147483775] 2228; SSE2-SSSE3-NEXT: movdqa %xmm7, %xmm8 2229; SSE2-SSSE3-NEXT: pcmpgtd %xmm4, %xmm8 2230; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm8[0,0,2,2] 2231; SSE2-SSSE3-NEXT: pand %xmm5, %xmm4 2232; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm8[1,1,3,3] 2233; SSE2-SSSE3-NEXT: por %xmm4, %xmm5 2234; SSE2-SSSE3-NEXT: pand %xmm5, %xmm0 2235; SSE2-SSSE3-NEXT: pandn %xmm3, %xmm5 2236; SSE2-SSSE3-NEXT: por %xmm5, %xmm0 2237; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm4 2238; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm4 2239; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm4[1,1,3,3] 2240; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm5 2241; SSE2-SSSE3-NEXT: pcmpgtd %xmm4, %xmm7 2242; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2] 2243; SSE2-SSSE3-NEXT: pand %xmm5, %xmm4 2244; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] 2245; SSE2-SSSE3-NEXT: por %xmm4, %xmm5 2246; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1 2247; SSE2-SSSE3-NEXT: pandn %xmm3, %xmm5 2248; SSE2-SSSE3-NEXT: por %xmm1, %xmm5 2249; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488] 2250; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm3 2251; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm3 2252; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 2253; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm6 2254; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm4 2255; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm7 = [18446744071562067840,18446744071562067840] 2256; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm3 2257; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm8 = xmm3[0,0,2,2] 2258; SSE2-SSSE3-NEXT: pand %xmm4, %xmm8 2259; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 2260; SSE2-SSSE3-NEXT: por %xmm8, %xmm3 2261; SSE2-SSSE3-NEXT: pand %xmm3, %xmm5 2262; SSE2-SSSE3-NEXT: pandn %xmm1, %xmm3 2263; SSE2-SSSE3-NEXT: por %xmm5, %xmm3 2264; SSE2-SSSE3-NEXT: pxor %xmm0, %xmm2 2265; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 2266; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm4 2267; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm2 2268; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm2[0,0,2,2] 2269; SSE2-SSSE3-NEXT: pand %xmm4, %xmm5 2270; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 2271; SSE2-SSSE3-NEXT: por %xmm5, %xmm2 2272; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 2273; SSE2-SSSE3-NEXT: pandn %xmm1, %xmm2 2274; SSE2-SSSE3-NEXT: por %xmm2, %xmm0 2275; SSE2-SSSE3-NEXT: packssdw %xmm3, %xmm0 2276; SSE2-SSSE3-NEXT: packssdw %xmm0, %xmm0 2277; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0 2278; SSE2-SSSE3-NEXT: retq 2279; 2280; SSE41-LABEL: trunc_ssat_v4i64_v4i8: 2281; SSE41: # %bb.0: 2282; SSE41-NEXT: movdqa %xmm0, %xmm2 2283; SSE41-NEXT: movapd {{.*#+}} xmm4 = [127,127] 2284; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648] 2285; SSE41-NEXT: movdqa %xmm0, %xmm5 2286; SSE41-NEXT: pxor %xmm3, %xmm5 2287; SSE41-NEXT: pmovzxdq {{.*#+}} xmm6 = [2147483775,2147483775] 2288; SSE41-NEXT: movdqa %xmm6, %xmm7 2289; SSE41-NEXT: pcmpgtd %xmm5, %xmm7 2290; SSE41-NEXT: pcmpeqd %xmm6, %xmm5 2291; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] 2292; SSE41-NEXT: pand %xmm5, %xmm0 2293; SSE41-NEXT: por %xmm7, %xmm0 2294; SSE41-NEXT: movapd %xmm4, %xmm5 2295; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm5 2296; SSE41-NEXT: movdqa %xmm1, %xmm0 2297; SSE41-NEXT: pxor %xmm3, %xmm0 2298; SSE41-NEXT: movdqa %xmm0, %xmm2 2299; SSE41-NEXT: pcmpeqd %xmm6, %xmm2 2300; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 2301; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] 2302; SSE41-NEXT: pand %xmm2, %xmm0 2303; SSE41-NEXT: por %xmm6, %xmm0 2304; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm4 2305; SSE41-NEXT: movapd {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488] 2306; SSE41-NEXT: movapd %xmm4, %xmm2 2307; SSE41-NEXT: xorpd %xmm3, %xmm2 2308; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [18446744071562067840,18446744071562067840] 2309; SSE41-NEXT: movapd %xmm2, %xmm7 2310; SSE41-NEXT: pcmpeqd %xmm6, %xmm7 2311; SSE41-NEXT: pcmpgtd %xmm6, %xmm2 2312; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] 2313; SSE41-NEXT: pand %xmm7, %xmm0 2314; SSE41-NEXT: por %xmm2, %xmm0 2315; SSE41-NEXT: movapd %xmm1, %xmm2 2316; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2 2317; SSE41-NEXT: xorpd %xmm5, %xmm3 2318; SSE41-NEXT: movapd %xmm3, %xmm4 2319; SSE41-NEXT: pcmpeqd %xmm6, %xmm4 2320; SSE41-NEXT: pcmpgtd %xmm6, %xmm3 2321; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 2322; SSE41-NEXT: pand %xmm4, %xmm0 2323; SSE41-NEXT: por %xmm3, %xmm0 2324; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm1 2325; SSE41-NEXT: packssdw %xmm2, %xmm1 2326; SSE41-NEXT: packssdw %xmm1, %xmm1 2327; SSE41-NEXT: packsswb %xmm1, %xmm1 2328; SSE41-NEXT: movdqa %xmm1, %xmm0 2329; SSE41-NEXT: retq 2330; 2331; AVX1-LABEL: trunc_ssat_v4i64_v4i8: 2332; AVX1: # %bb.0: 2333; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [127,127] 2334; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 2335; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm2 2336; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 2337; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm3 2338; AVX1-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0 2339; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488] 2340; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3 2341; AVX1-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0 2342; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm3 2343; AVX1-NEXT: vblendvpd %xmm3, %xmm2, %xmm1, %xmm1 2344; AVX1-NEXT: vpackssdw %xmm0, %xmm1, %xmm0 2345; AVX1-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 2346; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 2347; AVX1-NEXT: vzeroupper 2348; AVX1-NEXT: retq 2349; 2350; AVX2-LABEL: trunc_ssat_v4i64_v4i8: 2351; AVX2: # %bb.0: 2352; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [127,127,127,127] 2353; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 2354; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 2355; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488] 2356; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 2357; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 2358; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 2359; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 2360; AVX2-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 2361; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 2362; AVX2-NEXT: vzeroupper 2363; AVX2-NEXT: retq 2364; 2365; AVX512F-LABEL: trunc_ssat_v4i64_v4i8: 2366; AVX512F: # %bb.0: 2367; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2368; AVX512F-NEXT: vpmovsqb %zmm0, %xmm0 2369; AVX512F-NEXT: vzeroupper 2370; AVX512F-NEXT: retq 2371; 2372; AVX512VL-LABEL: trunc_ssat_v4i64_v4i8: 2373; AVX512VL: # %bb.0: 2374; AVX512VL-NEXT: vpmovsqb %ymm0, %xmm0 2375; AVX512VL-NEXT: vzeroupper 2376; AVX512VL-NEXT: retq 2377; 2378; AVX512BW-LABEL: trunc_ssat_v4i64_v4i8: 2379; AVX512BW: # %bb.0: 2380; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2381; AVX512BW-NEXT: vpmovsqb %zmm0, %xmm0 2382; AVX512BW-NEXT: vzeroupper 2383; AVX512BW-NEXT: retq 2384; 2385; AVX512BWVL-LABEL: trunc_ssat_v4i64_v4i8: 2386; AVX512BWVL: # %bb.0: 2387; AVX512BWVL-NEXT: vpmovsqb %ymm0, %xmm0 2388; AVX512BWVL-NEXT: vzeroupper 2389; AVX512BWVL-NEXT: retq 2390; 2391; SKX-LABEL: trunc_ssat_v4i64_v4i8: 2392; SKX: # %bb.0: 2393; SKX-NEXT: vpmovsqb %ymm0, %xmm0 2394; SKX-NEXT: vzeroupper 2395; SKX-NEXT: retq 2396 %1 = icmp slt <4 x i64> %a0, <i64 127, i64 127, i64 127, i64 127> 2397 %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> <i64 127, i64 127, i64 127, i64 127> 2398 %3 = icmp sgt <4 x i64> %2, <i64 -128, i64 -128, i64 -128, i64 -128> 2399 %4 = select <4 x i1> %3, <4 x i64> %2, <4 x i64> <i64 -128, i64 -128, i64 -128, i64 -128> 2400 %5 = trunc <4 x i64> %4 to <4 x i8> 2401 ret <4 x i8> %5 2402} 2403 2404define void @trunc_ssat_v4i64_v4i8_store(<4 x i64> %a0, ptr%p1) { 2405; SSE2-SSSE3-LABEL: trunc_ssat_v4i64_v4i8_store: 2406; SSE2-SSSE3: # %bb.0: 2407; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [127,127] 2408; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 2409; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm3 2410; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm3 2411; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm3[1,1,3,3] 2412; SSE2-SSSE3-NEXT: pxor %xmm6, %xmm6 2413; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm5 2414; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm7 = [2147483775,2147483775] 2415; SSE2-SSSE3-NEXT: movdqa %xmm7, %xmm8 2416; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm8 2417; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm8[0,0,2,2] 2418; SSE2-SSSE3-NEXT: pand %xmm5, %xmm9 2419; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm8[1,1,3,3] 2420; SSE2-SSSE3-NEXT: por %xmm9, %xmm3 2421; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0 2422; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm3 2423; SSE2-SSSE3-NEXT: por %xmm0, %xmm3 2424; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0 2425; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm0 2426; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3] 2427; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm5 2428; SSE2-SSSE3-NEXT: pcmpgtd %xmm0, %xmm7 2429; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] 2430; SSE2-SSSE3-NEXT: pand %xmm5, %xmm0 2431; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] 2432; SSE2-SSSE3-NEXT: por %xmm0, %xmm5 2433; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1 2434; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm5 2435; SSE2-SSSE3-NEXT: por %xmm1, %xmm5 2436; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [18446744073709551488,18446744073709551488] 2437; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm1 2438; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm1 2439; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3] 2440; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm6 2441; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm4 2442; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm7 = [18446744071562067840,18446744071562067840] 2443; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm1 2444; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm8 = xmm1[0,0,2,2] 2445; SSE2-SSSE3-NEXT: pand %xmm4, %xmm8 2446; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2447; SSE2-SSSE3-NEXT: por %xmm8, %xmm1 2448; SSE2-SSSE3-NEXT: pand %xmm1, %xmm5 2449; SSE2-SSSE3-NEXT: pandn %xmm0, %xmm1 2450; SSE2-SSSE3-NEXT: por %xmm5, %xmm1 2451; SSE2-SSSE3-NEXT: pxor %xmm3, %xmm2 2452; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 2453; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm4 2454; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm2 2455; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm2[0,0,2,2] 2456; SSE2-SSSE3-NEXT: pand %xmm4, %xmm5 2457; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 2458; SSE2-SSSE3-NEXT: por %xmm5, %xmm2 2459; SSE2-SSSE3-NEXT: pand %xmm2, %xmm3 2460; SSE2-SSSE3-NEXT: pandn %xmm0, %xmm2 2461; SSE2-SSSE3-NEXT: por %xmm3, %xmm2 2462; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm2 2463; SSE2-SSSE3-NEXT: packssdw %xmm2, %xmm2 2464; SSE2-SSSE3-NEXT: packsswb %xmm2, %xmm2 2465; SSE2-SSSE3-NEXT: movd %xmm2, (%rdi) 2466; SSE2-SSSE3-NEXT: retq 2467; 2468; SSE41-LABEL: trunc_ssat_v4i64_v4i8_store: 2469; SSE41: # %bb.0: 2470; SSE41-NEXT: movdqa %xmm0, %xmm2 2471; SSE41-NEXT: movapd {{.*#+}} xmm4 = [127,127] 2472; SSE41-NEXT: pmovzxdq {{.*#+}} xmm3 = [2147483648,2147483648] 2473; SSE41-NEXT: movdqa %xmm0, %xmm5 2474; SSE41-NEXT: pxor %xmm3, %xmm5 2475; SSE41-NEXT: pmovzxdq {{.*#+}} xmm6 = [2147483775,2147483775] 2476; SSE41-NEXT: movdqa %xmm6, %xmm7 2477; SSE41-NEXT: pcmpgtd %xmm5, %xmm7 2478; SSE41-NEXT: pcmpeqd %xmm6, %xmm5 2479; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] 2480; SSE41-NEXT: pand %xmm5, %xmm0 2481; SSE41-NEXT: por %xmm7, %xmm0 2482; SSE41-NEXT: movapd %xmm4, %xmm5 2483; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm5 2484; SSE41-NEXT: movdqa %xmm1, %xmm0 2485; SSE41-NEXT: pxor %xmm3, %xmm0 2486; SSE41-NEXT: movdqa %xmm0, %xmm2 2487; SSE41-NEXT: pcmpeqd %xmm6, %xmm2 2488; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 2489; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] 2490; SSE41-NEXT: pand %xmm2, %xmm0 2491; SSE41-NEXT: por %xmm6, %xmm0 2492; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm4 2493; SSE41-NEXT: movapd {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488] 2494; SSE41-NEXT: movapd %xmm4, %xmm2 2495; SSE41-NEXT: xorpd %xmm3, %xmm2 2496; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [18446744071562067840,18446744071562067840] 2497; SSE41-NEXT: movapd %xmm2, %xmm7 2498; SSE41-NEXT: pcmpeqd %xmm6, %xmm7 2499; SSE41-NEXT: pcmpgtd %xmm6, %xmm2 2500; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] 2501; SSE41-NEXT: pand %xmm7, %xmm0 2502; SSE41-NEXT: por %xmm2, %xmm0 2503; SSE41-NEXT: movapd %xmm1, %xmm2 2504; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2 2505; SSE41-NEXT: xorpd %xmm5, %xmm3 2506; SSE41-NEXT: movapd %xmm3, %xmm4 2507; SSE41-NEXT: pcmpeqd %xmm6, %xmm4 2508; SSE41-NEXT: pcmpgtd %xmm6, %xmm3 2509; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 2510; SSE41-NEXT: pand %xmm4, %xmm0 2511; SSE41-NEXT: por %xmm3, %xmm0 2512; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm1 2513; SSE41-NEXT: packssdw %xmm2, %xmm1 2514; SSE41-NEXT: packssdw %xmm1, %xmm1 2515; SSE41-NEXT: packsswb %xmm1, %xmm1 2516; SSE41-NEXT: movd %xmm1, (%rdi) 2517; SSE41-NEXT: retq 2518; 2519; AVX1-LABEL: trunc_ssat_v4i64_v4i8_store: 2520; AVX1: # %bb.0: 2521; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [127,127] 2522; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 2523; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm2 2524; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 2525; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm3 2526; AVX1-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0 2527; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488] 2528; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3 2529; AVX1-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0 2530; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm3 2531; AVX1-NEXT: vblendvpd %xmm3, %xmm2, %xmm1, %xmm1 2532; AVX1-NEXT: vpackssdw %xmm0, %xmm1, %xmm0 2533; AVX1-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 2534; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 2535; AVX1-NEXT: vmovd %xmm0, (%rdi) 2536; AVX1-NEXT: vzeroupper 2537; AVX1-NEXT: retq 2538; 2539; AVX2-LABEL: trunc_ssat_v4i64_v4i8_store: 2540; AVX2: # %bb.0: 2541; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [127,127,127,127] 2542; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 2543; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 2544; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488] 2545; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 2546; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 2547; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 2548; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 2549; AVX2-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 2550; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 2551; AVX2-NEXT: vmovd %xmm0, (%rdi) 2552; AVX2-NEXT: vzeroupper 2553; AVX2-NEXT: retq 2554; 2555; AVX512F-LABEL: trunc_ssat_v4i64_v4i8_store: 2556; AVX512F: # %bb.0: 2557; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2558; AVX512F-NEXT: vpmovsqb %zmm0, %xmm0 2559; AVX512F-NEXT: vmovd %xmm0, (%rdi) 2560; AVX512F-NEXT: vzeroupper 2561; AVX512F-NEXT: retq 2562; 2563; AVX512VL-LABEL: trunc_ssat_v4i64_v4i8_store: 2564; AVX512VL: # %bb.0: 2565; AVX512VL-NEXT: vpmovsqb %ymm0, (%rdi) 2566; AVX512VL-NEXT: vzeroupper 2567; AVX512VL-NEXT: retq 2568; 2569; AVX512BW-LABEL: trunc_ssat_v4i64_v4i8_store: 2570; AVX512BW: # %bb.0: 2571; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2572; AVX512BW-NEXT: vpmovsqb %zmm0, %xmm0 2573; AVX512BW-NEXT: vmovd %xmm0, (%rdi) 2574; AVX512BW-NEXT: vzeroupper 2575; AVX512BW-NEXT: retq 2576; 2577; AVX512BWVL-LABEL: trunc_ssat_v4i64_v4i8_store: 2578; AVX512BWVL: # %bb.0: 2579; AVX512BWVL-NEXT: vpmovsqb %ymm0, (%rdi) 2580; AVX512BWVL-NEXT: vzeroupper 2581; AVX512BWVL-NEXT: retq 2582; 2583; SKX-LABEL: trunc_ssat_v4i64_v4i8_store: 2584; SKX: # %bb.0: 2585; SKX-NEXT: vpmovsqb %ymm0, (%rdi) 2586; SKX-NEXT: vzeroupper 2587; SKX-NEXT: retq 2588 %1 = icmp slt <4 x i64> %a0, <i64 127, i64 127, i64 127, i64 127> 2589 %2 = select <4 x i1> %1, <4 x i64> %a0, <4 x i64> <i64 127, i64 127, i64 127, i64 127> 2590 %3 = icmp sgt <4 x i64> %2, <i64 -128, i64 -128, i64 -128, i64 -128> 2591 %4 = select <4 x i1> %3, <4 x i64> %2, <4 x i64> <i64 -128, i64 -128, i64 -128, i64 -128> 2592 %5 = trunc <4 x i64> %4 to <4 x i8> 2593 store <4 x i8> %5, ptr%p1 2594 ret void 2595} 2596 2597define <8 x i8> @trunc_ssat_v8i64_v8i8(ptr %p0) "min-legal-vector-width"="256" { 2598; SSE2-SSSE3-LABEL: trunc_ssat_v8i64_v8i8: 2599; SSE2-SSSE3: # %bb.0: 2600; SSE2-SSSE3-NEXT: movdqa (%rdi), %xmm6 2601; SSE2-SSSE3-NEXT: movdqa 16(%rdi), %xmm0 2602; SSE2-SSSE3-NEXT: movdqa 32(%rdi), %xmm3 2603; SSE2-SSSE3-NEXT: movdqa 48(%rdi), %xmm5 2604; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [127,127] 2605; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648] 2606; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2 2607; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm2 2608; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm2[1,1,3,3] 2609; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm8 2610; SSE2-SSSE3-NEXT: pcmpeqd %xmm8, %xmm9 2611; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm7 = [2147483775,2147483775] 2612; SSE2-SSSE3-NEXT: movdqa %xmm7, %xmm10 2613; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm10 2614; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2] 2615; SSE2-SSSE3-NEXT: pand %xmm9, %xmm11 2616; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm10[1,1,3,3] 2617; SSE2-SSSE3-NEXT: por %xmm11, %xmm2 2618; SSE2-SSSE3-NEXT: pand %xmm2, %xmm3 2619; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm2 2620; SSE2-SSSE3-NEXT: por %xmm3, %xmm2 2621; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm3 2622; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm3 2623; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm3[1,1,3,3] 2624; SSE2-SSSE3-NEXT: pcmpeqd %xmm8, %xmm9 2625; SSE2-SSSE3-NEXT: movdqa %xmm7, %xmm10 2626; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm10 2627; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2] 2628; SSE2-SSSE3-NEXT: pand %xmm9, %xmm11 2629; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm10[1,1,3,3] 2630; SSE2-SSSE3-NEXT: por %xmm11, %xmm3 2631; SSE2-SSSE3-NEXT: pand %xmm3, %xmm5 2632; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm3 2633; SSE2-SSSE3-NEXT: por %xmm5, %xmm3 2634; SSE2-SSSE3-NEXT: movdqa %xmm6, %xmm5 2635; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm5 2636; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm5[1,1,3,3] 2637; SSE2-SSSE3-NEXT: pcmpeqd %xmm8, %xmm9 2638; SSE2-SSSE3-NEXT: movdqa %xmm7, %xmm10 2639; SSE2-SSSE3-NEXT: pcmpgtd %xmm5, %xmm10 2640; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2] 2641; SSE2-SSSE3-NEXT: pand %xmm9, %xmm11 2642; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm10[1,1,3,3] 2643; SSE2-SSSE3-NEXT: por %xmm11, %xmm5 2644; SSE2-SSSE3-NEXT: pand %xmm5, %xmm6 2645; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm5 2646; SSE2-SSSE3-NEXT: por %xmm6, %xmm5 2647; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm6 2648; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm6 2649; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm6[1,1,3,3] 2650; SSE2-SSSE3-NEXT: pcmpeqd %xmm8, %xmm9 2651; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm7 2652; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm7[0,0,2,2] 2653; SSE2-SSSE3-NEXT: pand %xmm9, %xmm6 2654; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm8 = xmm7[1,1,3,3] 2655; SSE2-SSSE3-NEXT: por %xmm6, %xmm8 2656; SSE2-SSSE3-NEXT: pand %xmm8, %xmm0 2657; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm8 2658; SSE2-SSSE3-NEXT: por %xmm0, %xmm8 2659; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709551488,18446744073709551488] 2660; SSE2-SSSE3-NEXT: movdqa %xmm8, %xmm0 2661; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm0 2662; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm0[1,1,3,3] 2663; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm6 2664; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm9 2665; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm7 = [18446744071562067840,18446744071562067840] 2666; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm0 2667; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm0[0,0,2,2] 2668; SSE2-SSSE3-NEXT: pand %xmm9, %xmm10 2669; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm0[1,1,3,3] 2670; SSE2-SSSE3-NEXT: por %xmm10, %xmm9 2671; SSE2-SSSE3-NEXT: pand %xmm9, %xmm8 2672; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm9 2673; SSE2-SSSE3-NEXT: por %xmm8, %xmm9 2674; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm0 2675; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm0 2676; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm8 = xmm0[1,1,3,3] 2677; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm8 2678; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm0 2679; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm0[0,0,2,2] 2680; SSE2-SSSE3-NEXT: pand %xmm8, %xmm10 2681; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 2682; SSE2-SSSE3-NEXT: por %xmm10, %xmm0 2683; SSE2-SSSE3-NEXT: pand %xmm0, %xmm5 2684; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm0 2685; SSE2-SSSE3-NEXT: por %xmm5, %xmm0 2686; SSE2-SSSE3-NEXT: packssdw %xmm9, %xmm0 2687; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm5 2688; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm5 2689; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm8 = xmm5[1,1,3,3] 2690; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm8 2691; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm5 2692; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm5[0,0,2,2] 2693; SSE2-SSSE3-NEXT: pand %xmm8, %xmm9 2694; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] 2695; SSE2-SSSE3-NEXT: por %xmm9, %xmm5 2696; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3 2697; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm5 2698; SSE2-SSSE3-NEXT: por %xmm3, %xmm5 2699; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm1 2700; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] 2701; SSE2-SSSE3-NEXT: pcmpeqd %xmm6, %xmm3 2702; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm1 2703; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm1[0,0,2,2] 2704; SSE2-SSSE3-NEXT: pand %xmm3, %xmm6 2705; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2706; SSE2-SSSE3-NEXT: por %xmm6, %xmm1 2707; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2 2708; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm1 2709; SSE2-SSSE3-NEXT: por %xmm2, %xmm1 2710; SSE2-SSSE3-NEXT: packssdw %xmm5, %xmm1 2711; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0 2712; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0 2713; SSE2-SSSE3-NEXT: retq 2714; 2715; SSE41-LABEL: trunc_ssat_v8i64_v8i8: 2716; SSE41: # %bb.0: 2717; SSE41-NEXT: movdqa (%rdi), %xmm7 2718; SSE41-NEXT: movdqa 16(%rdi), %xmm5 2719; SSE41-NEXT: movdqa 32(%rdi), %xmm4 2720; SSE41-NEXT: movdqa 48(%rdi), %xmm8 2721; SSE41-NEXT: movapd {{.*#+}} xmm1 = [127,127] 2722; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = [2147483648,2147483648] 2723; SSE41-NEXT: movdqa %xmm4, %xmm3 2724; SSE41-NEXT: pxor %xmm2, %xmm3 2725; SSE41-NEXT: pmovzxdq {{.*#+}} xmm6 = [2147483775,2147483775] 2726; SSE41-NEXT: movdqa %xmm6, %xmm9 2727; SSE41-NEXT: pcmpgtd %xmm3, %xmm9 2728; SSE41-NEXT: pcmpeqd %xmm6, %xmm3 2729; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] 2730; SSE41-NEXT: pand %xmm3, %xmm0 2731; SSE41-NEXT: por %xmm9, %xmm0 2732; SSE41-NEXT: movapd %xmm1, %xmm3 2733; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm3 2734; SSE41-NEXT: movdqa %xmm8, %xmm4 2735; SSE41-NEXT: pxor %xmm2, %xmm4 2736; SSE41-NEXT: movdqa %xmm6, %xmm9 2737; SSE41-NEXT: pcmpgtd %xmm4, %xmm9 2738; SSE41-NEXT: pcmpeqd %xmm6, %xmm4 2739; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] 2740; SSE41-NEXT: pand %xmm4, %xmm0 2741; SSE41-NEXT: por %xmm9, %xmm0 2742; SSE41-NEXT: movapd %xmm1, %xmm4 2743; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm4 2744; SSE41-NEXT: movdqa %xmm7, %xmm8 2745; SSE41-NEXT: pxor %xmm2, %xmm8 2746; SSE41-NEXT: movdqa %xmm6, %xmm9 2747; SSE41-NEXT: pcmpgtd %xmm8, %xmm9 2748; SSE41-NEXT: pcmpeqd %xmm6, %xmm8 2749; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] 2750; SSE41-NEXT: pand %xmm8, %xmm0 2751; SSE41-NEXT: por %xmm9, %xmm0 2752; SSE41-NEXT: movapd %xmm1, %xmm8 2753; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm8 2754; SSE41-NEXT: movdqa %xmm5, %xmm0 2755; SSE41-NEXT: pxor %xmm2, %xmm0 2756; SSE41-NEXT: movdqa %xmm0, %xmm7 2757; SSE41-NEXT: pcmpeqd %xmm6, %xmm7 2758; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 2759; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] 2760; SSE41-NEXT: pand %xmm7, %xmm0 2761; SSE41-NEXT: por %xmm6, %xmm0 2762; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm1 2763; SSE41-NEXT: movapd {{.*#+}} xmm5 = [18446744073709551488,18446744073709551488] 2764; SSE41-NEXT: movapd %xmm1, %xmm7 2765; SSE41-NEXT: xorpd %xmm2, %xmm7 2766; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [18446744071562067840,18446744071562067840] 2767; SSE41-NEXT: movapd %xmm7, %xmm9 2768; SSE41-NEXT: pcmpeqd %xmm6, %xmm9 2769; SSE41-NEXT: pcmpgtd %xmm6, %xmm7 2770; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] 2771; SSE41-NEXT: pand %xmm9, %xmm0 2772; SSE41-NEXT: por %xmm7, %xmm0 2773; SSE41-NEXT: movapd %xmm5, %xmm7 2774; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm7 2775; SSE41-NEXT: movapd %xmm8, %xmm1 2776; SSE41-NEXT: xorpd %xmm2, %xmm1 2777; SSE41-NEXT: movapd %xmm1, %xmm9 2778; SSE41-NEXT: pcmpeqd %xmm6, %xmm9 2779; SSE41-NEXT: pcmpgtd %xmm6, %xmm1 2780; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2] 2781; SSE41-NEXT: pand %xmm9, %xmm0 2782; SSE41-NEXT: por %xmm1, %xmm0 2783; SSE41-NEXT: movapd %xmm5, %xmm1 2784; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm1 2785; SSE41-NEXT: packssdw %xmm7, %xmm1 2786; SSE41-NEXT: movapd %xmm4, %xmm7 2787; SSE41-NEXT: xorpd %xmm2, %xmm7 2788; SSE41-NEXT: movapd %xmm7, %xmm8 2789; SSE41-NEXT: pcmpeqd %xmm6, %xmm8 2790; SSE41-NEXT: pcmpgtd %xmm6, %xmm7 2791; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] 2792; SSE41-NEXT: pand %xmm8, %xmm0 2793; SSE41-NEXT: por %xmm7, %xmm0 2794; SSE41-NEXT: movapd %xmm5, %xmm7 2795; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm7 2796; SSE41-NEXT: xorpd %xmm3, %xmm2 2797; SSE41-NEXT: movapd %xmm2, %xmm4 2798; SSE41-NEXT: pcmpeqd %xmm6, %xmm4 2799; SSE41-NEXT: pcmpgtd %xmm6, %xmm2 2800; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] 2801; SSE41-NEXT: pand %xmm4, %xmm0 2802; SSE41-NEXT: por %xmm2, %xmm0 2803; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm5 2804; SSE41-NEXT: packssdw %xmm7, %xmm5 2805; SSE41-NEXT: packssdw %xmm5, %xmm1 2806; SSE41-NEXT: packsswb %xmm1, %xmm1 2807; SSE41-NEXT: movdqa %xmm1, %xmm0 2808; SSE41-NEXT: retq 2809; 2810; AVX1-LABEL: trunc_ssat_v8i64_v8i8: 2811; AVX1: # %bb.0: 2812; AVX1-NEXT: vmovdqa (%rdi), %xmm0 2813; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1 2814; AVX1-NEXT: vmovdqa 32(%rdi), %xmm2 2815; AVX1-NEXT: vmovdqa 48(%rdi), %xmm3 2816; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm4 = [127,127] 2817; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm5 2818; AVX1-NEXT: vblendvpd %xmm5, %xmm2, %xmm4, %xmm2 2819; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm5 2820; AVX1-NEXT: vblendvpd %xmm5, %xmm3, %xmm4, %xmm3 2821; AVX1-NEXT: vpcmpgtq %xmm0, %xmm4, %xmm5 2822; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm4, %xmm0 2823; AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm5 2824; AVX1-NEXT: vblendvpd %xmm5, %xmm1, %xmm4, %xmm1 2825; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm4 = [18446744073709551488,18446744073709551488] 2826; AVX1-NEXT: vpcmpgtq %xmm4, %xmm1, %xmm5 2827; AVX1-NEXT: vblendvpd %xmm5, %xmm1, %xmm4, %xmm1 2828; AVX1-NEXT: vpcmpgtq %xmm4, %xmm0, %xmm5 2829; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm4, %xmm0 2830; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 2831; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm1 2832; AVX1-NEXT: vblendvpd %xmm1, %xmm3, %xmm4, %xmm1 2833; AVX1-NEXT: vpcmpgtq %xmm4, %xmm2, %xmm3 2834; AVX1-NEXT: vblendvpd %xmm3, %xmm2, %xmm4, %xmm2 2835; AVX1-NEXT: vpackssdw %xmm1, %xmm2, %xmm1 2836; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 2837; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 2838; AVX1-NEXT: retq 2839; 2840; AVX2-LABEL: trunc_ssat_v8i64_v8i8: 2841; AVX2: # %bb.0: 2842; AVX2-NEXT: vmovdqa (%rdi), %ymm0 2843; AVX2-NEXT: vmovdqa 32(%rdi), %ymm1 2844; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [127,127,127,127] 2845; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3 2846; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0 2847; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3 2848; AVX2-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1 2849; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488] 2850; AVX2-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm3 2851; AVX2-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1 2852; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm3 2853; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0 2854; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 2855; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2856; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 2857; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 2858; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 2859; AVX2-NEXT: vzeroupper 2860; AVX2-NEXT: retq 2861; 2862; AVX512-LABEL: trunc_ssat_v8i64_v8i8: 2863; AVX512: # %bb.0: 2864; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 2865; AVX512-NEXT: vpmovsqb %zmm0, %xmm0 2866; AVX512-NEXT: vzeroupper 2867; AVX512-NEXT: retq 2868; 2869; SKX-LABEL: trunc_ssat_v8i64_v8i8: 2870; SKX: # %bb.0: 2871; SKX-NEXT: vmovdqa (%rdi), %ymm0 2872; SKX-NEXT: vmovdqa 32(%rdi), %ymm1 2873; SKX-NEXT: vpmovsqb %ymm1, %xmm1 2874; SKX-NEXT: vpmovsqb %ymm0, %xmm0 2875; SKX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2876; SKX-NEXT: vzeroupper 2877; SKX-NEXT: retq 2878 %a0 = load <8 x i64>, ptr %p0 2879 %1 = icmp slt <8 x i64> %a0, <i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127> 2880 %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> <i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127> 2881 %3 = icmp sgt <8 x i64> %2, <i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128> 2882 %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> <i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128> 2883 %5 = trunc <8 x i64> %4 to <8 x i8> 2884 ret <8 x i8> %5 2885} 2886 2887; TODO: The AVX1 codegen shows a missed opportunity to narrow blendv+logic to 128-bit. 2888 2889define void @trunc_ssat_v8i64_v8i8_store(ptr %p0, ptr%p1) "min-legal-vector-width"="256" { 2890; SSE2-SSSE3-LABEL: trunc_ssat_v8i64_v8i8_store: 2891; SSE2-SSSE3: # %bb.0: 2892; SSE2-SSSE3-NEXT: movdqa (%rdi), %xmm6 2893; SSE2-SSSE3-NEXT: movdqa 16(%rdi), %xmm3 2894; SSE2-SSSE3-NEXT: movdqa 32(%rdi), %xmm2 2895; SSE2-SSSE3-NEXT: movdqa 48(%rdi), %xmm5 2896; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [127,127] 2897; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648] 2898; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm1 2899; SSE2-SSSE3-NEXT: pxor %xmm0, %xmm1 2900; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm1[1,1,3,3] 2901; SSE2-SSSE3-NEXT: pxor %xmm8, %xmm8 2902; SSE2-SSSE3-NEXT: pcmpeqd %xmm8, %xmm9 2903; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm7 = [2147483775,2147483775] 2904; SSE2-SSSE3-NEXT: movdqa %xmm7, %xmm10 2905; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm10 2906; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2] 2907; SSE2-SSSE3-NEXT: pand %xmm9, %xmm11 2908; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm10[1,1,3,3] 2909; SSE2-SSSE3-NEXT: por %xmm11, %xmm1 2910; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2 2911; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm1 2912; SSE2-SSSE3-NEXT: por %xmm2, %xmm1 2913; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm2 2914; SSE2-SSSE3-NEXT: pxor %xmm0, %xmm2 2915; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm2[1,1,3,3] 2916; SSE2-SSSE3-NEXT: pcmpeqd %xmm8, %xmm9 2917; SSE2-SSSE3-NEXT: movdqa %xmm7, %xmm10 2918; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm10 2919; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2] 2920; SSE2-SSSE3-NEXT: pand %xmm9, %xmm11 2921; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm10[1,1,3,3] 2922; SSE2-SSSE3-NEXT: por %xmm11, %xmm2 2923; SSE2-SSSE3-NEXT: pand %xmm2, %xmm5 2924; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm2 2925; SSE2-SSSE3-NEXT: por %xmm5, %xmm2 2926; SSE2-SSSE3-NEXT: movdqa %xmm6, %xmm5 2927; SSE2-SSSE3-NEXT: pxor %xmm0, %xmm5 2928; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm5[1,1,3,3] 2929; SSE2-SSSE3-NEXT: pcmpeqd %xmm8, %xmm9 2930; SSE2-SSSE3-NEXT: movdqa %xmm7, %xmm10 2931; SSE2-SSSE3-NEXT: pcmpgtd %xmm5, %xmm10 2932; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2] 2933; SSE2-SSSE3-NEXT: pand %xmm9, %xmm11 2934; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm10[1,1,3,3] 2935; SSE2-SSSE3-NEXT: por %xmm11, %xmm5 2936; SSE2-SSSE3-NEXT: pand %xmm5, %xmm6 2937; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm5 2938; SSE2-SSSE3-NEXT: por %xmm6, %xmm5 2939; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm6 2940; SSE2-SSSE3-NEXT: pxor %xmm0, %xmm6 2941; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm6[1,1,3,3] 2942; SSE2-SSSE3-NEXT: pcmpeqd %xmm8, %xmm9 2943; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm7 2944; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm7[0,0,2,2] 2945; SSE2-SSSE3-NEXT: pand %xmm9, %xmm6 2946; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3] 2947; SSE2-SSSE3-NEXT: por %xmm6, %xmm7 2948; SSE2-SSSE3-NEXT: pand %xmm7, %xmm3 2949; SSE2-SSSE3-NEXT: pandn %xmm4, %xmm7 2950; SSE2-SSSE3-NEXT: por %xmm3, %xmm7 2951; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [18446744073709551488,18446744073709551488] 2952; SSE2-SSSE3-NEXT: movdqa %xmm7, %xmm8 2953; SSE2-SSSE3-NEXT: pxor %xmm0, %xmm8 2954; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm8[1,1,3,3] 2955; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm4 2956; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm9 2957; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm6 = [18446744071562067840,18446744071562067840] 2958; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm8 2959; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm8[0,0,2,2] 2960; SSE2-SSSE3-NEXT: pand %xmm9, %xmm10 2961; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm8 = xmm8[1,1,3,3] 2962; SSE2-SSSE3-NEXT: por %xmm10, %xmm8 2963; SSE2-SSSE3-NEXT: pand %xmm8, %xmm7 2964; SSE2-SSSE3-NEXT: pandn %xmm3, %xmm8 2965; SSE2-SSSE3-NEXT: por %xmm7, %xmm8 2966; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm7 2967; SSE2-SSSE3-NEXT: pxor %xmm0, %xmm7 2968; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm7[1,1,3,3] 2969; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm9 2970; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm7 2971; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2] 2972; SSE2-SSSE3-NEXT: pand %xmm9, %xmm10 2973; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3] 2974; SSE2-SSSE3-NEXT: por %xmm10, %xmm7 2975; SSE2-SSSE3-NEXT: pand %xmm7, %xmm5 2976; SSE2-SSSE3-NEXT: pandn %xmm3, %xmm7 2977; SSE2-SSSE3-NEXT: por %xmm5, %xmm7 2978; SSE2-SSSE3-NEXT: packssdw %xmm8, %xmm7 2979; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm5 2980; SSE2-SSSE3-NEXT: pxor %xmm0, %xmm5 2981; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm8 = xmm5[1,1,3,3] 2982; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm8 2983; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm5 2984; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm5[0,0,2,2] 2985; SSE2-SSSE3-NEXT: pand %xmm8, %xmm9 2986; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] 2987; SSE2-SSSE3-NEXT: por %xmm9, %xmm5 2988; SSE2-SSSE3-NEXT: pand %xmm5, %xmm2 2989; SSE2-SSSE3-NEXT: pandn %xmm3, %xmm5 2990; SSE2-SSSE3-NEXT: por %xmm2, %xmm5 2991; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm0 2992; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 2993; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm2 2994; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm0 2995; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,0,2,2] 2996; SSE2-SSSE3-NEXT: pand %xmm2, %xmm4 2997; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 2998; SSE2-SSSE3-NEXT: por %xmm4, %xmm0 2999; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1 3000; SSE2-SSSE3-NEXT: pandn %xmm3, %xmm0 3001; SSE2-SSSE3-NEXT: por %xmm1, %xmm0 3002; SSE2-SSSE3-NEXT: packssdw %xmm5, %xmm0 3003; SSE2-SSSE3-NEXT: packssdw %xmm0, %xmm7 3004; SSE2-SSSE3-NEXT: packsswb %xmm7, %xmm7 3005; SSE2-SSSE3-NEXT: movq %xmm7, (%rsi) 3006; SSE2-SSSE3-NEXT: retq 3007; 3008; SSE41-LABEL: trunc_ssat_v8i64_v8i8_store: 3009; SSE41: # %bb.0: 3010; SSE41-NEXT: movdqa (%rdi), %xmm7 3011; SSE41-NEXT: movdqa 16(%rdi), %xmm5 3012; SSE41-NEXT: movdqa 32(%rdi), %xmm3 3013; SSE41-NEXT: movdqa 48(%rdi), %xmm8 3014; SSE41-NEXT: movapd {{.*#+}} xmm4 = [127,127] 3015; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = [2147483648,2147483648] 3016; SSE41-NEXT: movdqa %xmm3, %xmm2 3017; SSE41-NEXT: pxor %xmm1, %xmm2 3018; SSE41-NEXT: pmovzxdq {{.*#+}} xmm6 = [2147483775,2147483775] 3019; SSE41-NEXT: movdqa %xmm6, %xmm9 3020; SSE41-NEXT: pcmpgtd %xmm2, %xmm9 3021; SSE41-NEXT: pcmpeqd %xmm6, %xmm2 3022; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] 3023; SSE41-NEXT: pand %xmm2, %xmm0 3024; SSE41-NEXT: por %xmm9, %xmm0 3025; SSE41-NEXT: movapd %xmm4, %xmm2 3026; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm2 3027; SSE41-NEXT: movdqa %xmm8, %xmm3 3028; SSE41-NEXT: pxor %xmm1, %xmm3 3029; SSE41-NEXT: movdqa %xmm6, %xmm9 3030; SSE41-NEXT: pcmpgtd %xmm3, %xmm9 3031; SSE41-NEXT: pcmpeqd %xmm6, %xmm3 3032; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] 3033; SSE41-NEXT: pand %xmm3, %xmm0 3034; SSE41-NEXT: por %xmm9, %xmm0 3035; SSE41-NEXT: movapd %xmm4, %xmm3 3036; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm3 3037; SSE41-NEXT: movdqa %xmm7, %xmm8 3038; SSE41-NEXT: pxor %xmm1, %xmm8 3039; SSE41-NEXT: movdqa %xmm6, %xmm9 3040; SSE41-NEXT: pcmpgtd %xmm8, %xmm9 3041; SSE41-NEXT: pcmpeqd %xmm6, %xmm8 3042; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] 3043; SSE41-NEXT: pand %xmm8, %xmm0 3044; SSE41-NEXT: por %xmm9, %xmm0 3045; SSE41-NEXT: movapd %xmm4, %xmm8 3046; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm8 3047; SSE41-NEXT: movdqa %xmm5, %xmm0 3048; SSE41-NEXT: pxor %xmm1, %xmm0 3049; SSE41-NEXT: movdqa %xmm0, %xmm7 3050; SSE41-NEXT: pcmpeqd %xmm6, %xmm7 3051; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 3052; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] 3053; SSE41-NEXT: pand %xmm7, %xmm0 3054; SSE41-NEXT: por %xmm6, %xmm0 3055; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm4 3056; SSE41-NEXT: movapd {{.*#+}} xmm5 = [18446744073709551488,18446744073709551488] 3057; SSE41-NEXT: movapd %xmm4, %xmm7 3058; SSE41-NEXT: xorpd %xmm1, %xmm7 3059; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [18446744071562067840,18446744071562067840] 3060; SSE41-NEXT: movapd %xmm7, %xmm9 3061; SSE41-NEXT: pcmpeqd %xmm6, %xmm9 3062; SSE41-NEXT: pcmpgtd %xmm6, %xmm7 3063; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] 3064; SSE41-NEXT: pand %xmm9, %xmm0 3065; SSE41-NEXT: por %xmm7, %xmm0 3066; SSE41-NEXT: movapd %xmm5, %xmm7 3067; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm7 3068; SSE41-NEXT: movapd %xmm8, %xmm4 3069; SSE41-NEXT: xorpd %xmm1, %xmm4 3070; SSE41-NEXT: movapd %xmm4, %xmm9 3071; SSE41-NEXT: pcmpeqd %xmm6, %xmm9 3072; SSE41-NEXT: pcmpgtd %xmm6, %xmm4 3073; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,0,2,2] 3074; SSE41-NEXT: pand %xmm9, %xmm0 3075; SSE41-NEXT: por %xmm4, %xmm0 3076; SSE41-NEXT: movapd %xmm5, %xmm4 3077; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm4 3078; SSE41-NEXT: packssdw %xmm7, %xmm4 3079; SSE41-NEXT: movapd %xmm3, %xmm7 3080; SSE41-NEXT: xorpd %xmm1, %xmm7 3081; SSE41-NEXT: movapd %xmm7, %xmm8 3082; SSE41-NEXT: pcmpeqd %xmm6, %xmm8 3083; SSE41-NEXT: pcmpgtd %xmm6, %xmm7 3084; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,0,2,2] 3085; SSE41-NEXT: pand %xmm8, %xmm0 3086; SSE41-NEXT: por %xmm7, %xmm0 3087; SSE41-NEXT: movapd %xmm5, %xmm7 3088; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm7 3089; SSE41-NEXT: xorpd %xmm2, %xmm1 3090; SSE41-NEXT: movapd %xmm1, %xmm3 3091; SSE41-NEXT: pcmpeqd %xmm6, %xmm3 3092; SSE41-NEXT: pcmpgtd %xmm6, %xmm1 3093; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2] 3094; SSE41-NEXT: pand %xmm3, %xmm0 3095; SSE41-NEXT: por %xmm1, %xmm0 3096; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm5 3097; SSE41-NEXT: packssdw %xmm7, %xmm5 3098; SSE41-NEXT: packssdw %xmm5, %xmm4 3099; SSE41-NEXT: packsswb %xmm4, %xmm4 3100; SSE41-NEXT: movq %xmm4, (%rsi) 3101; SSE41-NEXT: retq 3102; 3103; AVX1-LABEL: trunc_ssat_v8i64_v8i8_store: 3104; AVX1: # %bb.0: 3105; AVX1-NEXT: vmovdqa (%rdi), %xmm0 3106; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1 3107; AVX1-NEXT: vmovdqa 32(%rdi), %xmm2 3108; AVX1-NEXT: vmovdqa 48(%rdi), %xmm3 3109; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm4 = [127,127] 3110; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm5 3111; AVX1-NEXT: vblendvpd %xmm5, %xmm2, %xmm4, %xmm2 3112; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm5 3113; AVX1-NEXT: vblendvpd %xmm5, %xmm3, %xmm4, %xmm3 3114; AVX1-NEXT: vpcmpgtq %xmm0, %xmm4, %xmm5 3115; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm4, %xmm0 3116; AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm5 3117; AVX1-NEXT: vblendvpd %xmm5, %xmm1, %xmm4, %xmm1 3118; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm4 = [18446744073709551488,18446744073709551488] 3119; AVX1-NEXT: vpcmpgtq %xmm4, %xmm1, %xmm5 3120; AVX1-NEXT: vblendvpd %xmm5, %xmm1, %xmm4, %xmm1 3121; AVX1-NEXT: vpcmpgtq %xmm4, %xmm0, %xmm5 3122; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm4, %xmm0 3123; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 3124; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm1 3125; AVX1-NEXT: vblendvpd %xmm1, %xmm3, %xmm4, %xmm1 3126; AVX1-NEXT: vpcmpgtq %xmm4, %xmm2, %xmm3 3127; AVX1-NEXT: vblendvpd %xmm3, %xmm2, %xmm4, %xmm2 3128; AVX1-NEXT: vpackssdw %xmm1, %xmm2, %xmm1 3129; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 3130; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 3131; AVX1-NEXT: vmovq %xmm0, (%rsi) 3132; AVX1-NEXT: retq 3133; 3134; AVX2-LABEL: trunc_ssat_v8i64_v8i8_store: 3135; AVX2: # %bb.0: 3136; AVX2-NEXT: vmovdqa (%rdi), %ymm0 3137; AVX2-NEXT: vmovdqa 32(%rdi), %ymm1 3138; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [127,127,127,127] 3139; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm3 3140; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0 3141; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm3 3142; AVX2-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1 3143; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488] 3144; AVX2-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm3 3145; AVX2-NEXT: vblendvpd %ymm3, %ymm1, %ymm2, %ymm1 3146; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm3 3147; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0 3148; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 3149; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 3150; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 3151; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 3152; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 3153; AVX2-NEXT: vmovq %xmm0, (%rsi) 3154; AVX2-NEXT: vzeroupper 3155; AVX2-NEXT: retq 3156; 3157; AVX512-LABEL: trunc_ssat_v8i64_v8i8_store: 3158; AVX512: # %bb.0: 3159; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 3160; AVX512-NEXT: vpmovsqb %zmm0, (%rsi) 3161; AVX512-NEXT: vzeroupper 3162; AVX512-NEXT: retq 3163; 3164; SKX-LABEL: trunc_ssat_v8i64_v8i8_store: 3165; SKX: # %bb.0: 3166; SKX-NEXT: vmovdqa (%rdi), %ymm0 3167; SKX-NEXT: vmovdqa 32(%rdi), %ymm1 3168; SKX-NEXT: vpmovsqb %ymm1, %xmm1 3169; SKX-NEXT: vpmovsqb %ymm0, %xmm0 3170; SKX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3171; SKX-NEXT: vmovq %xmm0, (%rsi) 3172; SKX-NEXT: vzeroupper 3173; SKX-NEXT: retq 3174 %a0 = load <8 x i64>, ptr %p0 3175 %1 = icmp slt <8 x i64> %a0, <i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127> 3176 %2 = select <8 x i1> %1, <8 x i64> %a0, <8 x i64> <i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127> 3177 %3 = icmp sgt <8 x i64> %2, <i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128> 3178 %4 = select <8 x i1> %3, <8 x i64> %2, <8 x i64> <i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128> 3179 %5 = trunc <8 x i64> %4 to <8 x i8> 3180 store <8 x i8> %5, ptr%p1 3181 ret void 3182} 3183 3184define <16 x i8> @trunc_ssat_v16i64_v16i8(ptr %p0) "min-legal-vector-width"="256" { 3185; SSE2-SSSE3-LABEL: trunc_ssat_v16i64_v16i8: 3186; SSE2-SSSE3: # %bb.0: 3187; SSE2-SSSE3-NEXT: movdqa (%rdi), %xmm8 3188; SSE2-SSSE3-NEXT: movdqa 16(%rdi), %xmm0 3189; SSE2-SSSE3-NEXT: movdqa 32(%rdi), %xmm12 3190; SSE2-SSSE3-NEXT: movdqa 48(%rdi), %xmm11 3191; SSE2-SSSE3-NEXT: movdqa 80(%rdi), %xmm7 3192; SSE2-SSSE3-NEXT: movdqa 64(%rdi), %xmm5 3193; SSE2-SSSE3-NEXT: movdqa 112(%rdi), %xmm4 3194; SSE2-SSSE3-NEXT: movdqa 96(%rdi), %xmm3 3195; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm6 = [127,127] 3196; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648] 3197; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2 3198; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm2 3199; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm13 = xmm2[1,1,3,3] 3200; SSE2-SSSE3-NEXT: pxor %xmm10, %xmm10 3201; SSE2-SSSE3-NEXT: pcmpeqd %xmm10, %xmm13 3202; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm9 = [2147483775,2147483775] 3203; SSE2-SSSE3-NEXT: movdqa %xmm9, %xmm14 3204; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm14 3205; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm15 = xmm14[0,0,2,2] 3206; SSE2-SSSE3-NEXT: pand %xmm13, %xmm15 3207; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm14[1,1,3,3] 3208; SSE2-SSSE3-NEXT: por %xmm15, %xmm2 3209; SSE2-SSSE3-NEXT: pand %xmm2, %xmm3 3210; SSE2-SSSE3-NEXT: pandn %xmm6, %xmm2 3211; SSE2-SSSE3-NEXT: por %xmm3, %xmm2 3212; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm3 3213; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm3 3214; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm13 = xmm3[1,1,3,3] 3215; SSE2-SSSE3-NEXT: pcmpeqd %xmm10, %xmm13 3216; SSE2-SSSE3-NEXT: movdqa %xmm9, %xmm14 3217; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm14 3218; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm15 = xmm14[0,0,2,2] 3219; SSE2-SSSE3-NEXT: pand %xmm13, %xmm15 3220; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm14[1,1,3,3] 3221; SSE2-SSSE3-NEXT: por %xmm15, %xmm3 3222; SSE2-SSSE3-NEXT: pand %xmm3, %xmm4 3223; SSE2-SSSE3-NEXT: pandn %xmm6, %xmm3 3224; SSE2-SSSE3-NEXT: por %xmm4, %xmm3 3225; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm4 3226; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm4 3227; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm13 = xmm4[1,1,3,3] 3228; SSE2-SSSE3-NEXT: pcmpeqd %xmm10, %xmm13 3229; SSE2-SSSE3-NEXT: movdqa %xmm9, %xmm14 3230; SSE2-SSSE3-NEXT: pcmpgtd %xmm4, %xmm14 3231; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm15 = xmm14[0,0,2,2] 3232; SSE2-SSSE3-NEXT: pand %xmm13, %xmm15 3233; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm14[1,1,3,3] 3234; SSE2-SSSE3-NEXT: por %xmm15, %xmm4 3235; SSE2-SSSE3-NEXT: pand %xmm4, %xmm5 3236; SSE2-SSSE3-NEXT: pandn %xmm6, %xmm4 3237; SSE2-SSSE3-NEXT: por %xmm5, %xmm4 3238; SSE2-SSSE3-NEXT: movdqa %xmm7, %xmm5 3239; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm5 3240; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm13 = xmm5[1,1,3,3] 3241; SSE2-SSSE3-NEXT: pcmpeqd %xmm10, %xmm13 3242; SSE2-SSSE3-NEXT: movdqa %xmm9, %xmm14 3243; SSE2-SSSE3-NEXT: pcmpgtd %xmm5, %xmm14 3244; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm15 = xmm14[0,0,2,2] 3245; SSE2-SSSE3-NEXT: pand %xmm13, %xmm15 3246; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm14[1,1,3,3] 3247; SSE2-SSSE3-NEXT: por %xmm15, %xmm5 3248; SSE2-SSSE3-NEXT: pand %xmm5, %xmm7 3249; SSE2-SSSE3-NEXT: pandn %xmm6, %xmm5 3250; SSE2-SSSE3-NEXT: por %xmm7, %xmm5 3251; SSE2-SSSE3-NEXT: movdqa %xmm12, %xmm7 3252; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm7 3253; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm13 = xmm7[1,1,3,3] 3254; SSE2-SSSE3-NEXT: pcmpeqd %xmm10, %xmm13 3255; SSE2-SSSE3-NEXT: movdqa %xmm9, %xmm14 3256; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm14 3257; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm15 = xmm14[0,0,2,2] 3258; SSE2-SSSE3-NEXT: pand %xmm13, %xmm15 3259; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm14[1,1,3,3] 3260; SSE2-SSSE3-NEXT: por %xmm15, %xmm7 3261; SSE2-SSSE3-NEXT: pand %xmm7, %xmm12 3262; SSE2-SSSE3-NEXT: pandn %xmm6, %xmm7 3263; SSE2-SSSE3-NEXT: por %xmm12, %xmm7 3264; SSE2-SSSE3-NEXT: movdqa %xmm11, %xmm12 3265; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm12 3266; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm13 = xmm12[1,1,3,3] 3267; SSE2-SSSE3-NEXT: pcmpeqd %xmm10, %xmm13 3268; SSE2-SSSE3-NEXT: movdqa %xmm9, %xmm14 3269; SSE2-SSSE3-NEXT: pcmpgtd %xmm12, %xmm14 3270; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm15 = xmm14[0,0,2,2] 3271; SSE2-SSSE3-NEXT: pand %xmm13, %xmm15 3272; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm12 = xmm14[1,1,3,3] 3273; SSE2-SSSE3-NEXT: por %xmm15, %xmm12 3274; SSE2-SSSE3-NEXT: pand %xmm12, %xmm11 3275; SSE2-SSSE3-NEXT: pandn %xmm6, %xmm12 3276; SSE2-SSSE3-NEXT: por %xmm11, %xmm12 3277; SSE2-SSSE3-NEXT: movdqa %xmm8, %xmm11 3278; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm11 3279; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm13 = xmm11[1,1,3,3] 3280; SSE2-SSSE3-NEXT: pcmpeqd %xmm10, %xmm13 3281; SSE2-SSSE3-NEXT: movdqa %xmm9, %xmm14 3282; SSE2-SSSE3-NEXT: pcmpgtd %xmm11, %xmm14 3283; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm15 = xmm14[0,0,2,2] 3284; SSE2-SSSE3-NEXT: pand %xmm13, %xmm15 3285; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm11 = xmm14[1,1,3,3] 3286; SSE2-SSSE3-NEXT: por %xmm15, %xmm11 3287; SSE2-SSSE3-NEXT: pand %xmm11, %xmm8 3288; SSE2-SSSE3-NEXT: pandn %xmm6, %xmm11 3289; SSE2-SSSE3-NEXT: por %xmm8, %xmm11 3290; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm8 3291; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm8 3292; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm13 = xmm8[1,1,3,3] 3293; SSE2-SSSE3-NEXT: pcmpeqd %xmm10, %xmm13 3294; SSE2-SSSE3-NEXT: pcmpgtd %xmm8, %xmm9 3295; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm8 = xmm9[0,0,2,2] 3296; SSE2-SSSE3-NEXT: pand %xmm13, %xmm8 3297; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm9[1,1,3,3] 3298; SSE2-SSSE3-NEXT: por %xmm8, %xmm10 3299; SSE2-SSSE3-NEXT: pand %xmm10, %xmm0 3300; SSE2-SSSE3-NEXT: pandn %xmm6, %xmm10 3301; SSE2-SSSE3-NEXT: por %xmm0, %xmm10 3302; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm6 = [18446744073709551488,18446744073709551488] 3303; SSE2-SSSE3-NEXT: movdqa %xmm10, %xmm0 3304; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm0 3305; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm13 = xmm0[1,1,3,3] 3306; SSE2-SSSE3-NEXT: pcmpeqd %xmm8, %xmm8 3307; SSE2-SSSE3-NEXT: pcmpeqd %xmm8, %xmm13 3308; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm9 = [18446744071562067840,18446744071562067840] 3309; SSE2-SSSE3-NEXT: pcmpgtd %xmm9, %xmm0 3310; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm14 = xmm0[0,0,2,2] 3311; SSE2-SSSE3-NEXT: pand %xmm13, %xmm14 3312; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm13 = xmm0[1,1,3,3] 3313; SSE2-SSSE3-NEXT: por %xmm14, %xmm13 3314; SSE2-SSSE3-NEXT: pand %xmm13, %xmm10 3315; SSE2-SSSE3-NEXT: pandn %xmm6, %xmm13 3316; SSE2-SSSE3-NEXT: por %xmm10, %xmm13 3317; SSE2-SSSE3-NEXT: movdqa %xmm11, %xmm0 3318; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm0 3319; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm0[1,1,3,3] 3320; SSE2-SSSE3-NEXT: pcmpeqd %xmm8, %xmm10 3321; SSE2-SSSE3-NEXT: pcmpgtd %xmm9, %xmm0 3322; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm14 = xmm0[0,0,2,2] 3323; SSE2-SSSE3-NEXT: pand %xmm10, %xmm14 3324; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 3325; SSE2-SSSE3-NEXT: por %xmm14, %xmm0 3326; SSE2-SSSE3-NEXT: pand %xmm0, %xmm11 3327; SSE2-SSSE3-NEXT: pandn %xmm6, %xmm0 3328; SSE2-SSSE3-NEXT: por %xmm11, %xmm0 3329; SSE2-SSSE3-NEXT: packssdw %xmm13, %xmm0 3330; SSE2-SSSE3-NEXT: movdqa %xmm12, %xmm10 3331; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm10 3332; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm11 = xmm10[1,1,3,3] 3333; SSE2-SSSE3-NEXT: pcmpeqd %xmm8, %xmm11 3334; SSE2-SSSE3-NEXT: pcmpgtd %xmm9, %xmm10 3335; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm13 = xmm10[0,0,2,2] 3336; SSE2-SSSE3-NEXT: pand %xmm11, %xmm13 3337; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm10[1,1,3,3] 3338; SSE2-SSSE3-NEXT: por %xmm13, %xmm10 3339; SSE2-SSSE3-NEXT: pand %xmm10, %xmm12 3340; SSE2-SSSE3-NEXT: pandn %xmm6, %xmm10 3341; SSE2-SSSE3-NEXT: por %xmm12, %xmm10 3342; SSE2-SSSE3-NEXT: movdqa %xmm7, %xmm11 3343; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm11 3344; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm12 = xmm11[1,1,3,3] 3345; SSE2-SSSE3-NEXT: pcmpeqd %xmm8, %xmm12 3346; SSE2-SSSE3-NEXT: pcmpgtd %xmm9, %xmm11 3347; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm13 = xmm11[0,0,2,2] 3348; SSE2-SSSE3-NEXT: pand %xmm12, %xmm13 3349; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm11 = xmm11[1,1,3,3] 3350; SSE2-SSSE3-NEXT: por %xmm13, %xmm11 3351; SSE2-SSSE3-NEXT: pand %xmm11, %xmm7 3352; SSE2-SSSE3-NEXT: pandn %xmm6, %xmm11 3353; SSE2-SSSE3-NEXT: por %xmm7, %xmm11 3354; SSE2-SSSE3-NEXT: packssdw %xmm10, %xmm11 3355; SSE2-SSSE3-NEXT: packssdw %xmm11, %xmm0 3356; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm7 3357; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm7 3358; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm7[1,1,3,3] 3359; SSE2-SSSE3-NEXT: pcmpeqd %xmm8, %xmm10 3360; SSE2-SSSE3-NEXT: pcmpgtd %xmm9, %xmm7 3361; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm11 = xmm7[0,0,2,2] 3362; SSE2-SSSE3-NEXT: pand %xmm10, %xmm11 3363; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3] 3364; SSE2-SSSE3-NEXT: por %xmm11, %xmm7 3365; SSE2-SSSE3-NEXT: pand %xmm7, %xmm5 3366; SSE2-SSSE3-NEXT: pandn %xmm6, %xmm7 3367; SSE2-SSSE3-NEXT: por %xmm5, %xmm7 3368; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm5 3369; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm5 3370; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm5[1,1,3,3] 3371; SSE2-SSSE3-NEXT: pcmpeqd %xmm8, %xmm10 3372; SSE2-SSSE3-NEXT: pcmpgtd %xmm9, %xmm5 3373; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm11 = xmm5[0,0,2,2] 3374; SSE2-SSSE3-NEXT: pand %xmm10, %xmm11 3375; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] 3376; SSE2-SSSE3-NEXT: por %xmm11, %xmm5 3377; SSE2-SSSE3-NEXT: pand %xmm5, %xmm4 3378; SSE2-SSSE3-NEXT: pandn %xmm6, %xmm5 3379; SSE2-SSSE3-NEXT: por %xmm4, %xmm5 3380; SSE2-SSSE3-NEXT: packssdw %xmm7, %xmm5 3381; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm4 3382; SSE2-SSSE3-NEXT: pxor %xmm1, %xmm4 3383; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm4[1,1,3,3] 3384; SSE2-SSSE3-NEXT: pcmpeqd %xmm8, %xmm7 3385; SSE2-SSSE3-NEXT: pcmpgtd %xmm9, %xmm4 3386; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm4[0,0,2,2] 3387; SSE2-SSSE3-NEXT: pand %xmm7, %xmm10 3388; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 3389; SSE2-SSSE3-NEXT: por %xmm10, %xmm4 3390; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3 3391; SSE2-SSSE3-NEXT: pandn %xmm6, %xmm4 3392; SSE2-SSSE3-NEXT: por %xmm3, %xmm4 3393; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm1 3394; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] 3395; SSE2-SSSE3-NEXT: pcmpeqd %xmm8, %xmm3 3396; SSE2-SSSE3-NEXT: pcmpgtd %xmm9, %xmm1 3397; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm1[0,0,2,2] 3398; SSE2-SSSE3-NEXT: pand %xmm3, %xmm7 3399; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 3400; SSE2-SSSE3-NEXT: por %xmm7, %xmm1 3401; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2 3402; SSE2-SSSE3-NEXT: pandn %xmm6, %xmm1 3403; SSE2-SSSE3-NEXT: por %xmm2, %xmm1 3404; SSE2-SSSE3-NEXT: packssdw %xmm4, %xmm1 3405; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm5 3406; SSE2-SSSE3-NEXT: packsswb %xmm5, %xmm0 3407; SSE2-SSSE3-NEXT: retq 3408; 3409; SSE41-LABEL: trunc_ssat_v16i64_v16i8: 3410; SSE41: # %bb.0: 3411; SSE41-NEXT: movdqa (%rdi), %xmm8 3412; SSE41-NEXT: movdqa 16(%rdi), %xmm7 3413; SSE41-NEXT: movdqa 32(%rdi), %xmm12 3414; SSE41-NEXT: movdqa 48(%rdi), %xmm11 3415; SSE41-NEXT: movdqa 80(%rdi), %xmm10 3416; SSE41-NEXT: movdqa 64(%rdi), %xmm6 3417; SSE41-NEXT: movdqa 112(%rdi), %xmm5 3418; SSE41-NEXT: movdqa 96(%rdi), %xmm4 3419; SSE41-NEXT: movapd {{.*#+}} xmm1 = [127,127] 3420; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = [2147483648,2147483648] 3421; SSE41-NEXT: movdqa %xmm4, %xmm3 3422; SSE41-NEXT: pxor %xmm2, %xmm3 3423; SSE41-NEXT: pmovzxdq {{.*#+}} xmm9 = [2147483775,2147483775] 3424; SSE41-NEXT: movdqa %xmm9, %xmm13 3425; SSE41-NEXT: pcmpgtd %xmm3, %xmm13 3426; SSE41-NEXT: pcmpeqd %xmm9, %xmm3 3427; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm13[0,0,2,2] 3428; SSE41-NEXT: pand %xmm3, %xmm0 3429; SSE41-NEXT: por %xmm13, %xmm0 3430; SSE41-NEXT: movapd %xmm1, %xmm3 3431; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm3 3432; SSE41-NEXT: movdqa %xmm5, %xmm4 3433; SSE41-NEXT: pxor %xmm2, %xmm4 3434; SSE41-NEXT: movdqa %xmm9, %xmm13 3435; SSE41-NEXT: pcmpgtd %xmm4, %xmm13 3436; SSE41-NEXT: pcmpeqd %xmm9, %xmm4 3437; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm13[0,0,2,2] 3438; SSE41-NEXT: pand %xmm4, %xmm0 3439; SSE41-NEXT: por %xmm13, %xmm0 3440; SSE41-NEXT: movapd %xmm1, %xmm4 3441; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm4 3442; SSE41-NEXT: movdqa %xmm6, %xmm5 3443; SSE41-NEXT: pxor %xmm2, %xmm5 3444; SSE41-NEXT: movdqa %xmm9, %xmm13 3445; SSE41-NEXT: pcmpgtd %xmm5, %xmm13 3446; SSE41-NEXT: pcmpeqd %xmm9, %xmm5 3447; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm13[0,0,2,2] 3448; SSE41-NEXT: pand %xmm5, %xmm0 3449; SSE41-NEXT: por %xmm13, %xmm0 3450; SSE41-NEXT: movapd %xmm1, %xmm5 3451; SSE41-NEXT: blendvpd %xmm0, %xmm6, %xmm5 3452; SSE41-NEXT: movdqa %xmm10, %xmm6 3453; SSE41-NEXT: pxor %xmm2, %xmm6 3454; SSE41-NEXT: movdqa %xmm9, %xmm13 3455; SSE41-NEXT: pcmpgtd %xmm6, %xmm13 3456; SSE41-NEXT: pcmpeqd %xmm9, %xmm6 3457; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm13[0,0,2,2] 3458; SSE41-NEXT: pand %xmm6, %xmm0 3459; SSE41-NEXT: por %xmm13, %xmm0 3460; SSE41-NEXT: movapd %xmm1, %xmm6 3461; SSE41-NEXT: blendvpd %xmm0, %xmm10, %xmm6 3462; SSE41-NEXT: movdqa %xmm12, %xmm10 3463; SSE41-NEXT: pxor %xmm2, %xmm10 3464; SSE41-NEXT: movdqa %xmm9, %xmm13 3465; SSE41-NEXT: pcmpgtd %xmm10, %xmm13 3466; SSE41-NEXT: pcmpeqd %xmm9, %xmm10 3467; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm13[0,0,2,2] 3468; SSE41-NEXT: pand %xmm10, %xmm0 3469; SSE41-NEXT: por %xmm13, %xmm0 3470; SSE41-NEXT: movapd %xmm1, %xmm10 3471; SSE41-NEXT: blendvpd %xmm0, %xmm12, %xmm10 3472; SSE41-NEXT: movdqa %xmm11, %xmm12 3473; SSE41-NEXT: pxor %xmm2, %xmm12 3474; SSE41-NEXT: movdqa %xmm9, %xmm13 3475; SSE41-NEXT: pcmpgtd %xmm12, %xmm13 3476; SSE41-NEXT: pcmpeqd %xmm9, %xmm12 3477; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm13[0,0,2,2] 3478; SSE41-NEXT: pand %xmm12, %xmm0 3479; SSE41-NEXT: por %xmm13, %xmm0 3480; SSE41-NEXT: movapd %xmm1, %xmm12 3481; SSE41-NEXT: blendvpd %xmm0, %xmm11, %xmm12 3482; SSE41-NEXT: movdqa %xmm8, %xmm11 3483; SSE41-NEXT: pxor %xmm2, %xmm11 3484; SSE41-NEXT: movdqa %xmm9, %xmm13 3485; SSE41-NEXT: pcmpgtd %xmm11, %xmm13 3486; SSE41-NEXT: pcmpeqd %xmm9, %xmm11 3487; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm13[0,0,2,2] 3488; SSE41-NEXT: pand %xmm11, %xmm0 3489; SSE41-NEXT: por %xmm13, %xmm0 3490; SSE41-NEXT: movapd %xmm1, %xmm11 3491; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm11 3492; SSE41-NEXT: movdqa %xmm7, %xmm0 3493; SSE41-NEXT: pxor %xmm2, %xmm0 3494; SSE41-NEXT: movdqa %xmm0, %xmm8 3495; SSE41-NEXT: pcmpeqd %xmm9, %xmm8 3496; SSE41-NEXT: pcmpgtd %xmm0, %xmm9 3497; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] 3498; SSE41-NEXT: pand %xmm8, %xmm0 3499; SSE41-NEXT: por %xmm9, %xmm0 3500; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm1 3501; SSE41-NEXT: movapd {{.*#+}} xmm7 = [18446744073709551488,18446744073709551488] 3502; SSE41-NEXT: movapd %xmm1, %xmm9 3503; SSE41-NEXT: xorpd %xmm2, %xmm9 3504; SSE41-NEXT: movdqa {{.*#+}} xmm8 = [18446744071562067840,18446744071562067840] 3505; SSE41-NEXT: movapd %xmm9, %xmm13 3506; SSE41-NEXT: pcmpeqd %xmm8, %xmm13 3507; SSE41-NEXT: pcmpgtd %xmm8, %xmm9 3508; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] 3509; SSE41-NEXT: pand %xmm13, %xmm0 3510; SSE41-NEXT: por %xmm9, %xmm0 3511; SSE41-NEXT: movapd %xmm7, %xmm9 3512; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm9 3513; SSE41-NEXT: movapd %xmm11, %xmm1 3514; SSE41-NEXT: xorpd %xmm2, %xmm1 3515; SSE41-NEXT: movapd %xmm1, %xmm13 3516; SSE41-NEXT: pcmpeqd %xmm8, %xmm13 3517; SSE41-NEXT: pcmpgtd %xmm8, %xmm1 3518; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,2,2] 3519; SSE41-NEXT: pand %xmm13, %xmm0 3520; SSE41-NEXT: por %xmm1, %xmm0 3521; SSE41-NEXT: movapd %xmm7, %xmm1 3522; SSE41-NEXT: blendvpd %xmm0, %xmm11, %xmm1 3523; SSE41-NEXT: packssdw %xmm9, %xmm1 3524; SSE41-NEXT: movapd %xmm12, %xmm9 3525; SSE41-NEXT: xorpd %xmm2, %xmm9 3526; SSE41-NEXT: movapd %xmm9, %xmm11 3527; SSE41-NEXT: pcmpeqd %xmm8, %xmm11 3528; SSE41-NEXT: pcmpgtd %xmm8, %xmm9 3529; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] 3530; SSE41-NEXT: pand %xmm11, %xmm0 3531; SSE41-NEXT: por %xmm9, %xmm0 3532; SSE41-NEXT: movapd %xmm7, %xmm9 3533; SSE41-NEXT: blendvpd %xmm0, %xmm12, %xmm9 3534; SSE41-NEXT: movapd %xmm10, %xmm11 3535; SSE41-NEXT: xorpd %xmm2, %xmm11 3536; SSE41-NEXT: movapd %xmm11, %xmm12 3537; SSE41-NEXT: pcmpeqd %xmm8, %xmm12 3538; SSE41-NEXT: pcmpgtd %xmm8, %xmm11 3539; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm11[0,0,2,2] 3540; SSE41-NEXT: pand %xmm12, %xmm0 3541; SSE41-NEXT: por %xmm11, %xmm0 3542; SSE41-NEXT: movapd %xmm7, %xmm11 3543; SSE41-NEXT: blendvpd %xmm0, %xmm10, %xmm11 3544; SSE41-NEXT: packssdw %xmm9, %xmm11 3545; SSE41-NEXT: packssdw %xmm11, %xmm1 3546; SSE41-NEXT: movapd %xmm6, %xmm9 3547; SSE41-NEXT: xorpd %xmm2, %xmm9 3548; SSE41-NEXT: movapd %xmm9, %xmm10 3549; SSE41-NEXT: pcmpeqd %xmm8, %xmm10 3550; SSE41-NEXT: pcmpgtd %xmm8, %xmm9 3551; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm9[0,0,2,2] 3552; SSE41-NEXT: pand %xmm10, %xmm0 3553; SSE41-NEXT: por %xmm9, %xmm0 3554; SSE41-NEXT: movapd %xmm7, %xmm9 3555; SSE41-NEXT: blendvpd %xmm0, %xmm6, %xmm9 3556; SSE41-NEXT: movapd %xmm5, %xmm6 3557; SSE41-NEXT: xorpd %xmm2, %xmm6 3558; SSE41-NEXT: movapd %xmm6, %xmm10 3559; SSE41-NEXT: pcmpeqd %xmm8, %xmm10 3560; SSE41-NEXT: pcmpgtd %xmm8, %xmm6 3561; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] 3562; SSE41-NEXT: pand %xmm10, %xmm0 3563; SSE41-NEXT: por %xmm6, %xmm0 3564; SSE41-NEXT: movapd %xmm7, %xmm6 3565; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm6 3566; SSE41-NEXT: packssdw %xmm9, %xmm6 3567; SSE41-NEXT: movapd %xmm4, %xmm5 3568; SSE41-NEXT: xorpd %xmm2, %xmm5 3569; SSE41-NEXT: movapd %xmm5, %xmm9 3570; SSE41-NEXT: pcmpeqd %xmm8, %xmm9 3571; SSE41-NEXT: pcmpgtd %xmm8, %xmm5 3572; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2] 3573; SSE41-NEXT: pand %xmm9, %xmm0 3574; SSE41-NEXT: por %xmm5, %xmm0 3575; SSE41-NEXT: movapd %xmm7, %xmm5 3576; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm5 3577; SSE41-NEXT: xorpd %xmm3, %xmm2 3578; SSE41-NEXT: movapd %xmm2, %xmm4 3579; SSE41-NEXT: pcmpeqd %xmm8, %xmm4 3580; SSE41-NEXT: pcmpgtd %xmm8, %xmm2 3581; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,2,2] 3582; SSE41-NEXT: pand %xmm4, %xmm0 3583; SSE41-NEXT: por %xmm2, %xmm0 3584; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm7 3585; SSE41-NEXT: packssdw %xmm5, %xmm7 3586; SSE41-NEXT: packssdw %xmm7, %xmm6 3587; SSE41-NEXT: packsswb %xmm6, %xmm1 3588; SSE41-NEXT: movdqa %xmm1, %xmm0 3589; SSE41-NEXT: retq 3590; 3591; AVX1-LABEL: trunc_ssat_v16i64_v16i8: 3592; AVX1: # %bb.0: 3593; AVX1-NEXT: vmovdqa 96(%rdi), %xmm0 3594; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm2 = [127,127] 3595; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm1 3596; AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm2, %xmm0 3597; AVX1-NEXT: vmovdqa 112(%rdi), %xmm1 3598; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm3 3599; AVX1-NEXT: vblendvpd %xmm3, %xmm1, %xmm2, %xmm1 3600; AVX1-NEXT: vmovdqa 64(%rdi), %xmm3 3601; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm4 3602; AVX1-NEXT: vblendvpd %xmm4, %xmm3, %xmm2, %xmm3 3603; AVX1-NEXT: vmovdqa 80(%rdi), %xmm4 3604; AVX1-NEXT: vpcmpgtq %xmm4, %xmm2, %xmm5 3605; AVX1-NEXT: vblendvpd %xmm5, %xmm4, %xmm2, %xmm4 3606; AVX1-NEXT: vmovdqa (%rdi), %xmm5 3607; AVX1-NEXT: vmovdqa 16(%rdi), %xmm6 3608; AVX1-NEXT: vmovdqa 32(%rdi), %xmm7 3609; AVX1-NEXT: vmovdqa 48(%rdi), %xmm8 3610; AVX1-NEXT: vpcmpgtq %xmm7, %xmm2, %xmm9 3611; AVX1-NEXT: vblendvpd %xmm9, %xmm7, %xmm2, %xmm7 3612; AVX1-NEXT: vpcmpgtq %xmm8, %xmm2, %xmm9 3613; AVX1-NEXT: vblendvpd %xmm9, %xmm8, %xmm2, %xmm8 3614; AVX1-NEXT: vpcmpgtq %xmm5, %xmm2, %xmm9 3615; AVX1-NEXT: vblendvpd %xmm9, %xmm5, %xmm2, %xmm5 3616; AVX1-NEXT: vpcmpgtq %xmm6, %xmm2, %xmm9 3617; AVX1-NEXT: vblendvpd %xmm9, %xmm6, %xmm2, %xmm2 3618; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm6 = [18446744073709551488,18446744073709551488] 3619; AVX1-NEXT: vpcmpgtq %xmm6, %xmm2, %xmm9 3620; AVX1-NEXT: vblendvpd %xmm9, %xmm2, %xmm6, %xmm2 3621; AVX1-NEXT: vpcmpgtq %xmm6, %xmm5, %xmm9 3622; AVX1-NEXT: vblendvpd %xmm9, %xmm5, %xmm6, %xmm5 3623; AVX1-NEXT: vpackssdw %xmm2, %xmm5, %xmm2 3624; AVX1-NEXT: vpcmpgtq %xmm6, %xmm8, %xmm5 3625; AVX1-NEXT: vblendvpd %xmm5, %xmm8, %xmm6, %xmm5 3626; AVX1-NEXT: vpcmpgtq %xmm6, %xmm7, %xmm8 3627; AVX1-NEXT: vblendvpd %xmm8, %xmm7, %xmm6, %xmm7 3628; AVX1-NEXT: vpackssdw %xmm5, %xmm7, %xmm5 3629; AVX1-NEXT: vpackssdw %xmm5, %xmm2, %xmm2 3630; AVX1-NEXT: vpcmpgtq %xmm6, %xmm4, %xmm5 3631; AVX1-NEXT: vblendvpd %xmm5, %xmm4, %xmm6, %xmm4 3632; AVX1-NEXT: vpcmpgtq %xmm6, %xmm3, %xmm5 3633; AVX1-NEXT: vblendvpd %xmm5, %xmm3, %xmm6, %xmm3 3634; AVX1-NEXT: vpackssdw %xmm4, %xmm3, %xmm3 3635; AVX1-NEXT: vpcmpgtq %xmm6, %xmm1, %xmm4 3636; AVX1-NEXT: vblendvpd %xmm4, %xmm1, %xmm6, %xmm1 3637; AVX1-NEXT: vpcmpgtq %xmm6, %xmm0, %xmm4 3638; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm6, %xmm0 3639; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 3640; AVX1-NEXT: vpackssdw %xmm0, %xmm3, %xmm0 3641; AVX1-NEXT: vpacksswb %xmm0, %xmm2, %xmm0 3642; AVX1-NEXT: retq 3643; 3644; AVX2-LABEL: trunc_ssat_v16i64_v16i8: 3645; AVX2: # %bb.0: 3646; AVX2-NEXT: vmovdqa (%rdi), %ymm0 3647; AVX2-NEXT: vmovdqa 32(%rdi), %ymm1 3648; AVX2-NEXT: vmovdqa 64(%rdi), %ymm2 3649; AVX2-NEXT: vmovdqa 96(%rdi), %ymm3 3650; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [127,127,127,127] 3651; AVX2-NEXT: vpcmpgtq %ymm2, %ymm4, %ymm5 3652; AVX2-NEXT: vblendvpd %ymm5, %ymm2, %ymm4, %ymm2 3653; AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm5 3654; AVX2-NEXT: vblendvpd %ymm5, %ymm3, %ymm4, %ymm3 3655; AVX2-NEXT: vpcmpgtq %ymm0, %ymm4, %ymm5 3656; AVX2-NEXT: vblendvpd %ymm5, %ymm0, %ymm4, %ymm0 3657; AVX2-NEXT: vpcmpgtq %ymm1, %ymm4, %ymm5 3658; AVX2-NEXT: vblendvpd %ymm5, %ymm1, %ymm4, %ymm1 3659; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [18446744073709551488,18446744073709551488,18446744073709551488,18446744073709551488] 3660; AVX2-NEXT: vpcmpgtq %ymm4, %ymm1, %ymm5 3661; AVX2-NEXT: vblendvpd %ymm5, %ymm1, %ymm4, %ymm1 3662; AVX2-NEXT: vpcmpgtq %ymm4, %ymm0, %ymm5 3663; AVX2-NEXT: vblendvpd %ymm5, %ymm0, %ymm4, %ymm0 3664; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 3665; AVX2-NEXT: vpcmpgtq %ymm4, %ymm3, %ymm1 3666; AVX2-NEXT: vblendvpd %ymm1, %ymm3, %ymm4, %ymm1 3667; AVX2-NEXT: vpcmpgtq %ymm4, %ymm2, %ymm3 3668; AVX2-NEXT: vblendvpd %ymm3, %ymm2, %ymm4, %ymm2 3669; AVX2-NEXT: vpackssdw %ymm1, %ymm2, %ymm1 3670; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,1,3] 3671; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 3672; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 3673; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 3674; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 3675; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 3676; AVX2-NEXT: vzeroupper 3677; AVX2-NEXT: retq 3678; 3679; AVX512-LABEL: trunc_ssat_v16i64_v16i8: 3680; AVX512: # %bb.0: 3681; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 3682; AVX512-NEXT: vmovdqa64 64(%rdi), %zmm1 3683; AVX512-NEXT: vpmovsqb %zmm1, %xmm1 3684; AVX512-NEXT: vpmovsqb %zmm0, %xmm0 3685; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 3686; AVX512-NEXT: vzeroupper 3687; AVX512-NEXT: retq 3688; 3689; SKX-LABEL: trunc_ssat_v16i64_v16i8: 3690; SKX: # %bb.0: 3691; SKX-NEXT: vmovdqa (%rdi), %ymm0 3692; SKX-NEXT: vmovdqa 32(%rdi), %ymm1 3693; SKX-NEXT: vmovdqa 64(%rdi), %ymm2 3694; SKX-NEXT: vmovdqa 96(%rdi), %ymm3 3695; SKX-NEXT: vpmovsqb %ymm3, %xmm3 3696; SKX-NEXT: vpmovsqb %ymm2, %xmm2 3697; SKX-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 3698; SKX-NEXT: vpmovsqb %ymm1, %xmm1 3699; SKX-NEXT: vpmovsqb %ymm0, %xmm0 3700; SKX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3701; SKX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 3702; SKX-NEXT: vzeroupper 3703; SKX-NEXT: retq 3704 %a0 = load <16 x i64>, ptr %p0 3705 %1 = icmp slt <16 x i64> %a0, <i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127> 3706 %2 = select <16 x i1> %1, <16 x i64> %a0, <16 x i64> <i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127, i64 127> 3707 %3 = icmp sgt <16 x i64> %2, <i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128> 3708 %4 = select <16 x i1> %3, <16 x i64> %2, <16 x i64> <i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128, i64 -128> 3709 %5 = trunc <16 x i64> %4 to <16 x i8> 3710 ret <16 x i8> %5 3711} 3712 3713define <4 x i8> @trunc_ssat_v4i32_v4i8(<4 x i32> %a0) { 3714; SSE-LABEL: trunc_ssat_v4i32_v4i8: 3715; SSE: # %bb.0: 3716; SSE-NEXT: packssdw %xmm0, %xmm0 3717; SSE-NEXT: packsswb %xmm0, %xmm0 3718; SSE-NEXT: retq 3719; 3720; AVX-LABEL: trunc_ssat_v4i32_v4i8: 3721; AVX: # %bb.0: 3722; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 3723; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 3724; AVX-NEXT: retq 3725; 3726; AVX512-LABEL: trunc_ssat_v4i32_v4i8: 3727; AVX512: # %bb.0: 3728; AVX512-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 3729; AVX512-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 3730; AVX512-NEXT: retq 3731; 3732; SKX-LABEL: trunc_ssat_v4i32_v4i8: 3733; SKX: # %bb.0: 3734; SKX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 3735; SKX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 3736; SKX-NEXT: retq 3737 %1 = icmp slt <4 x i32> %a0, <i32 127, i32 127, i32 127, i32 127> 3738 %2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> <i32 127, i32 127, i32 127, i32 127> 3739 %3 = icmp sgt <4 x i32> %2, <i32 -128, i32 -128, i32 -128, i32 -128> 3740 %4 = select <4 x i1> %3, <4 x i32> %2, <4 x i32> <i32 -128, i32 -128, i32 -128, i32 -128> 3741 %5 = trunc <4 x i32> %4 to <4 x i8> 3742 ret <4 x i8> %5 3743} 3744 3745define void @trunc_ssat_v4i32_v4i8_store(<4 x i32> %a0, ptr%p1) { 3746; SSE-LABEL: trunc_ssat_v4i32_v4i8_store: 3747; SSE: # %bb.0: 3748; SSE-NEXT: packssdw %xmm0, %xmm0 3749; SSE-NEXT: packsswb %xmm0, %xmm0 3750; SSE-NEXT: movd %xmm0, (%rdi) 3751; SSE-NEXT: retq 3752; 3753; AVX-LABEL: trunc_ssat_v4i32_v4i8_store: 3754; AVX: # %bb.0: 3755; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 3756; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 3757; AVX-NEXT: vmovd %xmm0, (%rdi) 3758; AVX-NEXT: retq 3759; 3760; AVX512F-LABEL: trunc_ssat_v4i32_v4i8_store: 3761; AVX512F: # %bb.0: 3762; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 3763; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 3764; AVX512F-NEXT: vmovd %xmm0, (%rdi) 3765; AVX512F-NEXT: retq 3766; 3767; AVX512VL-LABEL: trunc_ssat_v4i32_v4i8_store: 3768; AVX512VL: # %bb.0: 3769; AVX512VL-NEXT: vpmovsdb %xmm0, (%rdi) 3770; AVX512VL-NEXT: retq 3771; 3772; AVX512BW-LABEL: trunc_ssat_v4i32_v4i8_store: 3773; AVX512BW: # %bb.0: 3774; AVX512BW-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 3775; AVX512BW-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 3776; AVX512BW-NEXT: vmovd %xmm0, (%rdi) 3777; AVX512BW-NEXT: retq 3778; 3779; AVX512BWVL-LABEL: trunc_ssat_v4i32_v4i8_store: 3780; AVX512BWVL: # %bb.0: 3781; AVX512BWVL-NEXT: vpmovsdb %xmm0, (%rdi) 3782; AVX512BWVL-NEXT: retq 3783; 3784; SKX-LABEL: trunc_ssat_v4i32_v4i8_store: 3785; SKX: # %bb.0: 3786; SKX-NEXT: vpmovsdb %xmm0, (%rdi) 3787; SKX-NEXT: retq 3788 %1 = icmp slt <4 x i32> %a0, <i32 127, i32 127, i32 127, i32 127> 3789 %2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> <i32 127, i32 127, i32 127, i32 127> 3790 %3 = icmp sgt <4 x i32> %2, <i32 -128, i32 -128, i32 -128, i32 -128> 3791 %4 = select <4 x i1> %3, <4 x i32> %2, <4 x i32> <i32 -128, i32 -128, i32 -128, i32 -128> 3792 %5 = trunc <4 x i32> %4 to <4 x i8> 3793 store <4 x i8> %5, ptr%p1 3794 ret void 3795} 3796 3797define <8 x i8> @trunc_ssat_v8i32_v8i8(<8 x i32> %a0) { 3798; SSE-LABEL: trunc_ssat_v8i32_v8i8: 3799; SSE: # %bb.0: 3800; SSE-NEXT: packssdw %xmm1, %xmm0 3801; SSE-NEXT: packsswb %xmm0, %xmm0 3802; SSE-NEXT: retq 3803; 3804; AVX1-LABEL: trunc_ssat_v8i32_v8i8: 3805; AVX1: # %bb.0: 3806; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 3807; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 3808; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 3809; AVX1-NEXT: vzeroupper 3810; AVX1-NEXT: retq 3811; 3812; AVX2-LABEL: trunc_ssat_v8i32_v8i8: 3813; AVX2: # %bb.0: 3814; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 3815; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 3816; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 3817; AVX2-NEXT: vzeroupper 3818; AVX2-NEXT: retq 3819; 3820; AVX512F-LABEL: trunc_ssat_v8i32_v8i8: 3821; AVX512F: # %bb.0: 3822; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 3823; AVX512F-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 3824; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 3825; AVX512F-NEXT: vzeroupper 3826; AVX512F-NEXT: retq 3827; 3828; AVX512VL-LABEL: trunc_ssat_v8i32_v8i8: 3829; AVX512VL: # %bb.0: 3830; AVX512VL-NEXT: vpmovsdb %ymm0, %xmm0 3831; AVX512VL-NEXT: vzeroupper 3832; AVX512VL-NEXT: retq 3833; 3834; AVX512BW-LABEL: trunc_ssat_v8i32_v8i8: 3835; AVX512BW: # %bb.0: 3836; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1 3837; AVX512BW-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 3838; AVX512BW-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 3839; AVX512BW-NEXT: vzeroupper 3840; AVX512BW-NEXT: retq 3841; 3842; AVX512BWVL-LABEL: trunc_ssat_v8i32_v8i8: 3843; AVX512BWVL: # %bb.0: 3844; AVX512BWVL-NEXT: vpmovsdb %ymm0, %xmm0 3845; AVX512BWVL-NEXT: vzeroupper 3846; AVX512BWVL-NEXT: retq 3847; 3848; SKX-LABEL: trunc_ssat_v8i32_v8i8: 3849; SKX: # %bb.0: 3850; SKX-NEXT: vpmovsdb %ymm0, %xmm0 3851; SKX-NEXT: vzeroupper 3852; SKX-NEXT: retq 3853 %1 = icmp slt <8 x i32> %a0, <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> 3854 %2 = select <8 x i1> %1, <8 x i32> %a0, <8 x i32> <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> 3855 %3 = icmp sgt <8 x i32> %2, <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128> 3856 %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128> 3857 %5 = trunc <8 x i32> %4 to <8 x i8> 3858 ret <8 x i8> %5 3859} 3860 3861define void @trunc_ssat_v8i32_v8i8_store(<8 x i32> %a0, ptr%p1) { 3862; SSE-LABEL: trunc_ssat_v8i32_v8i8_store: 3863; SSE: # %bb.0: 3864; SSE-NEXT: packssdw %xmm1, %xmm0 3865; SSE-NEXT: packsswb %xmm0, %xmm0 3866; SSE-NEXT: movq %xmm0, (%rdi) 3867; SSE-NEXT: retq 3868; 3869; AVX1-LABEL: trunc_ssat_v8i32_v8i8_store: 3870; AVX1: # %bb.0: 3871; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 3872; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 3873; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 3874; AVX1-NEXT: vmovq %xmm0, (%rdi) 3875; AVX1-NEXT: vzeroupper 3876; AVX1-NEXT: retq 3877; 3878; AVX2-LABEL: trunc_ssat_v8i32_v8i8_store: 3879; AVX2: # %bb.0: 3880; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 3881; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 3882; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 3883; AVX2-NEXT: vmovq %xmm0, (%rdi) 3884; AVX2-NEXT: vzeroupper 3885; AVX2-NEXT: retq 3886; 3887; AVX512F-LABEL: trunc_ssat_v8i32_v8i8_store: 3888; AVX512F: # %bb.0: 3889; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 3890; AVX512F-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 3891; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 3892; AVX512F-NEXT: vmovq %xmm0, (%rdi) 3893; AVX512F-NEXT: vzeroupper 3894; AVX512F-NEXT: retq 3895; 3896; AVX512VL-LABEL: trunc_ssat_v8i32_v8i8_store: 3897; AVX512VL: # %bb.0: 3898; AVX512VL-NEXT: vpmovsdb %ymm0, (%rdi) 3899; AVX512VL-NEXT: vzeroupper 3900; AVX512VL-NEXT: retq 3901; 3902; AVX512BW-LABEL: trunc_ssat_v8i32_v8i8_store: 3903; AVX512BW: # %bb.0: 3904; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1 3905; AVX512BW-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 3906; AVX512BW-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 3907; AVX512BW-NEXT: vmovq %xmm0, (%rdi) 3908; AVX512BW-NEXT: vzeroupper 3909; AVX512BW-NEXT: retq 3910; 3911; AVX512BWVL-LABEL: trunc_ssat_v8i32_v8i8_store: 3912; AVX512BWVL: # %bb.0: 3913; AVX512BWVL-NEXT: vpmovsdb %ymm0, (%rdi) 3914; AVX512BWVL-NEXT: vzeroupper 3915; AVX512BWVL-NEXT: retq 3916; 3917; SKX-LABEL: trunc_ssat_v8i32_v8i8_store: 3918; SKX: # %bb.0: 3919; SKX-NEXT: vpmovsdb %ymm0, (%rdi) 3920; SKX-NEXT: vzeroupper 3921; SKX-NEXT: retq 3922 %1 = icmp slt <8 x i32> %a0, <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> 3923 %2 = select <8 x i1> %1, <8 x i32> %a0, <8 x i32> <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> 3924 %3 = icmp sgt <8 x i32> %2, <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128> 3925 %4 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128> 3926 %5 = trunc <8 x i32> %4 to <8 x i8> 3927 store <8 x i8> %5, ptr%p1 3928 ret void 3929} 3930 3931define <16 x i8> @trunc_ssat_v16i32_v16i8(ptr %p0) "min-legal-vector-width"="256" { 3932; SSE-LABEL: trunc_ssat_v16i32_v16i8: 3933; SSE: # %bb.0: 3934; SSE-NEXT: movdqa (%rdi), %xmm0 3935; SSE-NEXT: movdqa 32(%rdi), %xmm1 3936; SSE-NEXT: packssdw 48(%rdi), %xmm1 3937; SSE-NEXT: packssdw 16(%rdi), %xmm0 3938; SSE-NEXT: packsswb %xmm1, %xmm0 3939; SSE-NEXT: retq 3940; 3941; AVX1-LABEL: trunc_ssat_v16i32_v16i8: 3942; AVX1: # %bb.0: 3943; AVX1-NEXT: vmovdqa (%rdi), %xmm0 3944; AVX1-NEXT: vmovdqa 32(%rdi), %xmm1 3945; AVX1-NEXT: vpackssdw 48(%rdi), %xmm1, %xmm1 3946; AVX1-NEXT: vpackssdw 16(%rdi), %xmm0, %xmm0 3947; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 3948; AVX1-NEXT: retq 3949; 3950; AVX2-LABEL: trunc_ssat_v16i32_v16i8: 3951; AVX2: # %bb.0: 3952; AVX2-NEXT: vmovdqa (%rdi), %ymm0 3953; AVX2-NEXT: vpackssdw 32(%rdi), %ymm0, %ymm0 3954; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 3955; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 3956; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 3957; AVX2-NEXT: vzeroupper 3958; AVX2-NEXT: retq 3959; 3960; AVX512-LABEL: trunc_ssat_v16i32_v16i8: 3961; AVX512: # %bb.0: 3962; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 3963; AVX512-NEXT: vpmovsdb %zmm0, %xmm0 3964; AVX512-NEXT: vzeroupper 3965; AVX512-NEXT: retq 3966; 3967; SKX-LABEL: trunc_ssat_v16i32_v16i8: 3968; SKX: # %bb.0: 3969; SKX-NEXT: vmovdqa (%rdi), %ymm0 3970; SKX-NEXT: vmovdqa 32(%rdi), %ymm1 3971; SKX-NEXT: vpmovsdb %ymm1, %xmm1 3972; SKX-NEXT: vpmovsdb %ymm0, %xmm0 3973; SKX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 3974; SKX-NEXT: vzeroupper 3975; SKX-NEXT: retq 3976 %a0 = load <16 x i32>, ptr %p0 3977 %1 = icmp slt <16 x i32> %a0, <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> 3978 %2 = select <16 x i1> %1, <16 x i32> %a0, <16 x i32> <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> 3979 %3 = icmp sgt <16 x i32> %2, <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128> 3980 %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128> 3981 %5 = trunc <16 x i32> %4 to <16 x i8> 3982 ret <16 x i8> %5 3983} 3984 3985define void @trunc_ssat_v16i32_v16i8_store(ptr %p0, ptr %p1) "min-legal-vector-width"="256" { 3986; SSE-LABEL: trunc_ssat_v16i32_v16i8_store: 3987; SSE: # %bb.0: 3988; SSE-NEXT: movdqa (%rdi), %xmm0 3989; SSE-NEXT: movdqa 32(%rdi), %xmm1 3990; SSE-NEXT: packssdw 48(%rdi), %xmm1 3991; SSE-NEXT: packssdw 16(%rdi), %xmm0 3992; SSE-NEXT: packsswb %xmm1, %xmm0 3993; SSE-NEXT: movdqa %xmm0, (%rsi) 3994; SSE-NEXT: retq 3995; 3996; AVX1-LABEL: trunc_ssat_v16i32_v16i8_store: 3997; AVX1: # %bb.0: 3998; AVX1-NEXT: vmovdqa (%rdi), %xmm0 3999; AVX1-NEXT: vmovdqa 32(%rdi), %xmm1 4000; AVX1-NEXT: vpackssdw 48(%rdi), %xmm1, %xmm1 4001; AVX1-NEXT: vpackssdw 16(%rdi), %xmm0, %xmm0 4002; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 4003; AVX1-NEXT: vmovdqa %xmm0, (%rsi) 4004; AVX1-NEXT: retq 4005; 4006; AVX2-LABEL: trunc_ssat_v16i32_v16i8_store: 4007; AVX2: # %bb.0: 4008; AVX2-NEXT: vmovdqa (%rdi), %ymm0 4009; AVX2-NEXT: vpackssdw 32(%rdi), %ymm0, %ymm0 4010; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 4011; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 4012; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 4013; AVX2-NEXT: vmovdqa %xmm0, (%rsi) 4014; AVX2-NEXT: vzeroupper 4015; AVX2-NEXT: retq 4016; 4017; AVX512-LABEL: trunc_ssat_v16i32_v16i8_store: 4018; AVX512: # %bb.0: 4019; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 4020; AVX512-NEXT: vpmovsdb %zmm0, (%rsi) 4021; AVX512-NEXT: vzeroupper 4022; AVX512-NEXT: retq 4023; 4024; SKX-LABEL: trunc_ssat_v16i32_v16i8_store: 4025; SKX: # %bb.0: 4026; SKX-NEXT: vmovdqa (%rdi), %ymm0 4027; SKX-NEXT: vmovdqa 32(%rdi), %ymm1 4028; SKX-NEXT: vpmovsdb %ymm1, %xmm1 4029; SKX-NEXT: vpmovsdb %ymm0, %xmm0 4030; SKX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4031; SKX-NEXT: vmovdqa %xmm0, (%rsi) 4032; SKX-NEXT: vzeroupper 4033; SKX-NEXT: retq 4034 %a0 = load <16 x i32>, ptr %p0 4035 %1 = icmp slt <16 x i32> %a0, <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> 4036 %2 = select <16 x i1> %1, <16 x i32> %a0, <16 x i32> <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> 4037 %3 = icmp sgt <16 x i32> %2, <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128> 4038 %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128> 4039 %5 = trunc <16 x i32> %4 to <16 x i8> 4040 store <16 x i8> %5, ptr %p1 4041 ret void 4042} 4043 4044define <8 x i8> @trunc_ssat_v8i16_v8i8(<8 x i16> %a0) { 4045; SSE-LABEL: trunc_ssat_v8i16_v8i8: 4046; SSE: # %bb.0: 4047; SSE-NEXT: packsswb %xmm0, %xmm0 4048; SSE-NEXT: retq 4049; 4050; AVX-LABEL: trunc_ssat_v8i16_v8i8: 4051; AVX: # %bb.0: 4052; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 4053; AVX-NEXT: retq 4054; 4055; AVX512-LABEL: trunc_ssat_v8i16_v8i8: 4056; AVX512: # %bb.0: 4057; AVX512-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 4058; AVX512-NEXT: retq 4059; 4060; SKX-LABEL: trunc_ssat_v8i16_v8i8: 4061; SKX: # %bb.0: 4062; SKX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 4063; SKX-NEXT: retq 4064 %1 = icmp slt <8 x i16> %a0, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127> 4065 %2 = select <8 x i1> %1, <8 x i16> %a0, <8 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127> 4066 %3 = icmp sgt <8 x i16> %2, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128> 4067 %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128> 4068 %5 = trunc <8 x i16> %4 to <8 x i8> 4069 ret <8 x i8> %5 4070} 4071 4072define void @trunc_ssat_v8i16_v8i8_store(<8 x i16> %a0, ptr%p1) { 4073; SSE-LABEL: trunc_ssat_v8i16_v8i8_store: 4074; SSE: # %bb.0: 4075; SSE-NEXT: packsswb %xmm0, %xmm0 4076; SSE-NEXT: movq %xmm0, (%rdi) 4077; SSE-NEXT: retq 4078; 4079; AVX-LABEL: trunc_ssat_v8i16_v8i8_store: 4080; AVX: # %bb.0: 4081; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 4082; AVX-NEXT: vmovq %xmm0, (%rdi) 4083; AVX-NEXT: retq 4084; 4085; AVX512F-LABEL: trunc_ssat_v8i16_v8i8_store: 4086; AVX512F: # %bb.0: 4087; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 4088; AVX512F-NEXT: vmovq %xmm0, (%rdi) 4089; AVX512F-NEXT: retq 4090; 4091; AVX512VL-LABEL: trunc_ssat_v8i16_v8i8_store: 4092; AVX512VL: # %bb.0: 4093; AVX512VL-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 4094; AVX512VL-NEXT: vmovq %xmm0, (%rdi) 4095; AVX512VL-NEXT: retq 4096; 4097; AVX512BW-LABEL: trunc_ssat_v8i16_v8i8_store: 4098; AVX512BW: # %bb.0: 4099; AVX512BW-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 4100; AVX512BW-NEXT: vmovq %xmm0, (%rdi) 4101; AVX512BW-NEXT: retq 4102; 4103; AVX512BWVL-LABEL: trunc_ssat_v8i16_v8i8_store: 4104; AVX512BWVL: # %bb.0: 4105; AVX512BWVL-NEXT: vpmovswb %xmm0, (%rdi) 4106; AVX512BWVL-NEXT: retq 4107; 4108; SKX-LABEL: trunc_ssat_v8i16_v8i8_store: 4109; SKX: # %bb.0: 4110; SKX-NEXT: vpmovswb %xmm0, (%rdi) 4111; SKX-NEXT: retq 4112 %1 = icmp slt <8 x i16> %a0, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127> 4113 %2 = select <8 x i1> %1, <8 x i16> %a0, <8 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127> 4114 %3 = icmp sgt <8 x i16> %2, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128> 4115 %4 = select <8 x i1> %3, <8 x i16> %2, <8 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128> 4116 %5 = trunc <8 x i16> %4 to <8 x i8> 4117 store <8 x i8> %5, ptr%p1 4118 ret void 4119} 4120 4121define <16 x i8> @trunc_ssat_v16i16_v16i8(<16 x i16> %a0) { 4122; SSE-LABEL: trunc_ssat_v16i16_v16i8: 4123; SSE: # %bb.0: 4124; SSE-NEXT: packsswb %xmm1, %xmm0 4125; SSE-NEXT: retq 4126; 4127; AVX1-LABEL: trunc_ssat_v16i16_v16i8: 4128; AVX1: # %bb.0: 4129; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 4130; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 4131; AVX1-NEXT: vzeroupper 4132; AVX1-NEXT: retq 4133; 4134; AVX2-LABEL: trunc_ssat_v16i16_v16i8: 4135; AVX2: # %bb.0: 4136; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 4137; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 4138; AVX2-NEXT: vzeroupper 4139; AVX2-NEXT: retq 4140; 4141; AVX512F-LABEL: trunc_ssat_v16i16_v16i8: 4142; AVX512F: # %bb.0: 4143; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 4144; AVX512F-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 4145; AVX512F-NEXT: vzeroupper 4146; AVX512F-NEXT: retq 4147; 4148; AVX512VL-LABEL: trunc_ssat_v16i16_v16i8: 4149; AVX512VL: # %bb.0: 4150; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 4151; AVX512VL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 4152; AVX512VL-NEXT: vzeroupper 4153; AVX512VL-NEXT: retq 4154; 4155; AVX512BW-LABEL: trunc_ssat_v16i16_v16i8: 4156; AVX512BW: # %bb.0: 4157; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1 4158; AVX512BW-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 4159; AVX512BW-NEXT: vzeroupper 4160; AVX512BW-NEXT: retq 4161; 4162; AVX512BWVL-LABEL: trunc_ssat_v16i16_v16i8: 4163; AVX512BWVL: # %bb.0: 4164; AVX512BWVL-NEXT: vpmovswb %ymm0, %xmm0 4165; AVX512BWVL-NEXT: vzeroupper 4166; AVX512BWVL-NEXT: retq 4167; 4168; SKX-LABEL: trunc_ssat_v16i16_v16i8: 4169; SKX: # %bb.0: 4170; SKX-NEXT: vpmovswb %ymm0, %xmm0 4171; SKX-NEXT: vzeroupper 4172; SKX-NEXT: retq 4173 %1 = icmp slt <16 x i16> %a0, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127> 4174 %2 = select <16 x i1> %1, <16 x i16> %a0, <16 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127> 4175 %3 = icmp sgt <16 x i16> %2, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128> 4176 %4 = select <16 x i1> %3, <16 x i16> %2, <16 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128> 4177 %5 = trunc <16 x i16> %4 to <16 x i8> 4178 ret <16 x i8> %5 4179} 4180 4181define <32 x i8> @trunc_ssat_v32i16_v32i8(ptr %p0) "min-legal-vector-width"="256" { 4182; SSE-LABEL: trunc_ssat_v32i16_v32i8: 4183; SSE: # %bb.0: 4184; SSE-NEXT: movdqa (%rdi), %xmm0 4185; SSE-NEXT: movdqa 32(%rdi), %xmm1 4186; SSE-NEXT: packsswb 16(%rdi), %xmm0 4187; SSE-NEXT: packsswb 48(%rdi), %xmm1 4188; SSE-NEXT: retq 4189; 4190; AVX1-LABEL: trunc_ssat_v32i16_v32i8: 4191; AVX1: # %bb.0: 4192; AVX1-NEXT: vmovdqa (%rdi), %xmm0 4193; AVX1-NEXT: vmovdqa 32(%rdi), %xmm1 4194; AVX1-NEXT: vpacksswb 48(%rdi), %xmm1, %xmm1 4195; AVX1-NEXT: vpacksswb 16(%rdi), %xmm0, %xmm0 4196; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 4197; AVX1-NEXT: retq 4198; 4199; AVX2-LABEL: trunc_ssat_v32i16_v32i8: 4200; AVX2: # %bb.0: 4201; AVX2-NEXT: vmovdqa (%rdi), %ymm0 4202; AVX2-NEXT: vpacksswb 32(%rdi), %ymm0, %ymm0 4203; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 4204; AVX2-NEXT: retq 4205; 4206; AVX512F-LABEL: trunc_ssat_v32i16_v32i8: 4207; AVX512F: # %bb.0: 4208; AVX512F-NEXT: vmovdqa (%rdi), %ymm0 4209; AVX512F-NEXT: vpacksswb 32(%rdi), %ymm0, %ymm0 4210; AVX512F-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 4211; AVX512F-NEXT: retq 4212; 4213; AVX512VL-LABEL: trunc_ssat_v32i16_v32i8: 4214; AVX512VL: # %bb.0: 4215; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0 4216; AVX512VL-NEXT: vpacksswb 32(%rdi), %ymm0, %ymm0 4217; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 4218; AVX512VL-NEXT: retq 4219; 4220; AVX512BW-LABEL: trunc_ssat_v32i16_v32i8: 4221; AVX512BW: # %bb.0: 4222; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0 4223; AVX512BW-NEXT: vpmovswb %zmm0, %ymm0 4224; AVX512BW-NEXT: retq 4225; 4226; AVX512BWVL-LABEL: trunc_ssat_v32i16_v32i8: 4227; AVX512BWVL: # %bb.0: 4228; AVX512BWVL-NEXT: vmovdqa64 (%rdi), %zmm0 4229; AVX512BWVL-NEXT: vpmovswb %zmm0, %ymm0 4230; AVX512BWVL-NEXT: retq 4231; 4232; SKX-LABEL: trunc_ssat_v32i16_v32i8: 4233; SKX: # %bb.0: 4234; SKX-NEXT: vmovdqa (%rdi), %ymm0 4235; SKX-NEXT: vpacksswb 32(%rdi), %ymm0, %ymm0 4236; SKX-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 4237; SKX-NEXT: retq 4238 %a0 = load <32 x i16>, ptr %p0 4239 %1 = icmp slt <32 x i16> %a0, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127> 4240 %2 = select <32 x i1> %1, <32 x i16> %a0, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127> 4241 %3 = icmp sgt <32 x i16> %2, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128> 4242 %4 = select <32 x i1> %3, <32 x i16> %2, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128> 4243 %5 = trunc <32 x i16> %4 to <32 x i8> 4244 ret <32 x i8> %5 4245} 4246 4247define <32 x i8> @trunc_ssat_v32i32_v32i8(ptr %p0) "min-legal-vector-width"="256" { 4248; SSE-LABEL: trunc_ssat_v32i32_v32i8: 4249; SSE: # %bb.0: 4250; SSE-NEXT: movdqa (%rdi), %xmm0 4251; SSE-NEXT: movdqa 32(%rdi), %xmm2 4252; SSE-NEXT: movdqa 64(%rdi), %xmm1 4253; SSE-NEXT: movdqa 96(%rdi), %xmm3 4254; SSE-NEXT: packssdw 48(%rdi), %xmm2 4255; SSE-NEXT: packssdw 16(%rdi), %xmm0 4256; SSE-NEXT: packsswb %xmm2, %xmm0 4257; SSE-NEXT: packssdw 112(%rdi), %xmm3 4258; SSE-NEXT: packssdw 80(%rdi), %xmm1 4259; SSE-NEXT: packsswb %xmm3, %xmm1 4260; SSE-NEXT: retq 4261; 4262; AVX1-LABEL: trunc_ssat_v32i32_v32i8: 4263; AVX1: # %bb.0: 4264; AVX1-NEXT: vmovdqa (%rdi), %xmm0 4265; AVX1-NEXT: vmovdqa 32(%rdi), %xmm1 4266; AVX1-NEXT: vmovdqa 64(%rdi), %xmm2 4267; AVX1-NEXT: vmovdqa 96(%rdi), %xmm3 4268; AVX1-NEXT: vpackssdw 112(%rdi), %xmm3, %xmm3 4269; AVX1-NEXT: vpackssdw 80(%rdi), %xmm2, %xmm2 4270; AVX1-NEXT: vpacksswb %xmm3, %xmm2, %xmm2 4271; AVX1-NEXT: vpackssdw 48(%rdi), %xmm1, %xmm1 4272; AVX1-NEXT: vpackssdw 16(%rdi), %xmm0, %xmm0 4273; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 4274; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 4275; AVX1-NEXT: retq 4276; 4277; AVX2-LABEL: trunc_ssat_v32i32_v32i8: 4278; AVX2: # %bb.0: 4279; AVX2-NEXT: vmovdqa (%rdi), %ymm0 4280; AVX2-NEXT: vmovdqa 64(%rdi), %ymm1 4281; AVX2-NEXT: vpackssdw 96(%rdi), %ymm1, %ymm1 4282; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,1,3] 4283; AVX2-NEXT: vpackssdw 32(%rdi), %ymm0, %ymm0 4284; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 4285; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 4286; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 4287; AVX2-NEXT: retq 4288; 4289; AVX512-LABEL: trunc_ssat_v32i32_v32i8: 4290; AVX512: # %bb.0: 4291; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 4292; AVX512-NEXT: vmovdqa64 64(%rdi), %zmm1 4293; AVX512-NEXT: vpmovsdb %zmm0, %xmm0 4294; AVX512-NEXT: vpmovsdb %zmm1, %xmm1 4295; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 4296; AVX512-NEXT: retq 4297; 4298; SKX-LABEL: trunc_ssat_v32i32_v32i8: 4299; SKX: # %bb.0: 4300; SKX-NEXT: vmovdqa (%rdi), %ymm0 4301; SKX-NEXT: vmovdqa 64(%rdi), %ymm1 4302; SKX-NEXT: vpackssdw 96(%rdi), %ymm1, %ymm1 4303; SKX-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,1,3] 4304; SKX-NEXT: vpackssdw 32(%rdi), %ymm0, %ymm0 4305; SKX-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 4306; SKX-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 4307; SKX-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 4308; SKX-NEXT: retq 4309 %a0 = load <32 x i32>, ptr %p0 4310 %1 = icmp slt <32 x i32> %a0, <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> 4311 %2 = select <32 x i1> %1, <32 x i32> %a0, <32 x i32> <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127> 4312 %3 = icmp sgt <32 x i32> %2, <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128> 4313 %4 = select <32 x i1> %3, <32 x i32> %2, <32 x i32> <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128> 4314 %5 = trunc <32 x i32> %4 to <32 x i8> 4315 ret <32 x i8> %5 4316} 4317 4318; This used to crash with avx512 due because we were missing a check for 4319; unsupported element types like i24. 4320define void @trunc_ssat_v16i32_v16i24(<16 x i32> %x, ptr %y) nounwind { 4321; SSE2-SSSE3-LABEL: trunc_ssat_v16i32_v16i24: 4322; SSE2-SSSE3: # %bb.0: 4323; SSE2-SSSE3-NEXT: pushq %rbp 4324; SSE2-SSSE3-NEXT: pushq %r15 4325; SSE2-SSSE3-NEXT: pushq %r14 4326; SSE2-SSSE3-NEXT: pushq %r12 4327; SSE2-SSSE3-NEXT: pushq %rbx 4328; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [8388607,8388607,8388607,8388607] 4329; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm4 4330; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm4 4331; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3 4332; SSE2-SSSE3-NEXT: pandn %xmm5, %xmm4 4333; SSE2-SSSE3-NEXT: por %xmm3, %xmm4 4334; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm3 4335; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm3 4336; SSE2-SSSE3-NEXT: pand %xmm3, %xmm2 4337; SSE2-SSSE3-NEXT: pandn %xmm5, %xmm3 4338; SSE2-SSSE3-NEXT: por %xmm2, %xmm3 4339; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm2 4340; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 4341; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 4342; SSE2-SSSE3-NEXT: pandn %xmm5, %xmm2 4343; SSE2-SSSE3-NEXT: por %xmm1, %xmm2 4344; SSE2-SSSE3-NEXT: movdqa %xmm5, %xmm1 4345; SSE2-SSSE3-NEXT: pcmpgtd %xmm0, %xmm1 4346; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0 4347; SSE2-SSSE3-NEXT: pandn %xmm5, %xmm1 4348; SSE2-SSSE3-NEXT: por %xmm0, %xmm1 4349; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [4286578688,4286578688,4286578688,4286578688] 4350; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0 4351; SSE2-SSSE3-NEXT: pcmpgtd %xmm5, %xmm0 4352; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1 4353; SSE2-SSSE3-NEXT: pandn %xmm5, %xmm0 4354; SSE2-SSSE3-NEXT: por %xmm1, %xmm0 4355; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm1 4356; SSE2-SSSE3-NEXT: pcmpgtd %xmm5, %xmm1 4357; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2 4358; SSE2-SSSE3-NEXT: pandn %xmm5, %xmm1 4359; SSE2-SSSE3-NEXT: por %xmm2, %xmm1 4360; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2 4361; SSE2-SSSE3-NEXT: pcmpgtd %xmm5, %xmm2 4362; SSE2-SSSE3-NEXT: pand %xmm2, %xmm3 4363; SSE2-SSSE3-NEXT: pandn %xmm5, %xmm2 4364; SSE2-SSSE3-NEXT: por %xmm3, %xmm2 4365; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm3 4366; SSE2-SSSE3-NEXT: pcmpgtd %xmm5, %xmm3 4367; SSE2-SSSE3-NEXT: pand %xmm3, %xmm4 4368; SSE2-SSSE3-NEXT: pandn %xmm5, %xmm3 4369; SSE2-SSSE3-NEXT: por %xmm4, %xmm3 4370; SSE2-SSSE3-NEXT: movd %xmm3, %r8d 4371; SSE2-SSSE3-NEXT: movw %r8w, 36(%rdi) 4372; SSE2-SSSE3-NEXT: movd %xmm2, %r11d 4373; SSE2-SSSE3-NEXT: movw %r11w, 24(%rdi) 4374; SSE2-SSSE3-NEXT: movd %xmm1, %r14d 4375; SSE2-SSSE3-NEXT: movw %r14w, 12(%rdi) 4376; SSE2-SSSE3-NEXT: movd %xmm0, %eax 4377; SSE2-SSSE3-NEXT: movw %ax, (%rdi) 4378; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[3,3,3,3] 4379; SSE2-SSSE3-NEXT: movd %xmm4, %ecx 4380; SSE2-SSSE3-NEXT: movw %cx, 45(%rdi) 4381; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm3[2,3,2,3] 4382; SSE2-SSSE3-NEXT: movd %xmm4, %edx 4383; SSE2-SSSE3-NEXT: movw %dx, 42(%rdi) 4384; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,1,1] 4385; SSE2-SSSE3-NEXT: movd %xmm3, %esi 4386; SSE2-SSSE3-NEXT: movw %si, 39(%rdi) 4387; SSE2-SSSE3-NEXT: shrl $16, %r8d 4388; SSE2-SSSE3-NEXT: movb %r8b, 38(%rdi) 4389; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[3,3,3,3] 4390; SSE2-SSSE3-NEXT: movd %xmm3, %r8d 4391; SSE2-SSSE3-NEXT: movw %r8w, 33(%rdi) 4392; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,2,3] 4393; SSE2-SSSE3-NEXT: movd %xmm3, %r9d 4394; SSE2-SSSE3-NEXT: movw %r9w, 30(%rdi) 4395; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,1,1] 4396; SSE2-SSSE3-NEXT: movd %xmm2, %r10d 4397; SSE2-SSSE3-NEXT: movw %r10w, 27(%rdi) 4398; SSE2-SSSE3-NEXT: shrl $16, %r11d 4399; SSE2-SSSE3-NEXT: movb %r11b, 26(%rdi) 4400; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] 4401; SSE2-SSSE3-NEXT: movd %xmm2, %r11d 4402; SSE2-SSSE3-NEXT: movw %r11w, 21(%rdi) 4403; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] 4404; SSE2-SSSE3-NEXT: movd %xmm2, %ebx 4405; SSE2-SSSE3-NEXT: movw %bx, 18(%rdi) 4406; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1] 4407; SSE2-SSSE3-NEXT: movd %xmm1, %ebp 4408; SSE2-SSSE3-NEXT: movw %bp, 15(%rdi) 4409; SSE2-SSSE3-NEXT: shrl $16, %r14d 4410; SSE2-SSSE3-NEXT: movb %r14b, 14(%rdi) 4411; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] 4412; SSE2-SSSE3-NEXT: movd %xmm1, %r14d 4413; SSE2-SSSE3-NEXT: movw %r14w, 9(%rdi) 4414; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 4415; SSE2-SSSE3-NEXT: movd %xmm1, %r15d 4416; SSE2-SSSE3-NEXT: movw %r15w, 6(%rdi) 4417; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 4418; SSE2-SSSE3-NEXT: movd %xmm0, %r12d 4419; SSE2-SSSE3-NEXT: movw %r12w, 3(%rdi) 4420; SSE2-SSSE3-NEXT: shrl $16, %eax 4421; SSE2-SSSE3-NEXT: movb %al, 2(%rdi) 4422; SSE2-SSSE3-NEXT: shrl $16, %ecx 4423; SSE2-SSSE3-NEXT: movb %cl, 47(%rdi) 4424; SSE2-SSSE3-NEXT: shrl $16, %edx 4425; SSE2-SSSE3-NEXT: movb %dl, 44(%rdi) 4426; SSE2-SSSE3-NEXT: shrl $16, %esi 4427; SSE2-SSSE3-NEXT: movb %sil, 41(%rdi) 4428; SSE2-SSSE3-NEXT: shrl $16, %r8d 4429; SSE2-SSSE3-NEXT: movb %r8b, 35(%rdi) 4430; SSE2-SSSE3-NEXT: shrl $16, %r9d 4431; SSE2-SSSE3-NEXT: movb %r9b, 32(%rdi) 4432; SSE2-SSSE3-NEXT: shrl $16, %r10d 4433; SSE2-SSSE3-NEXT: movb %r10b, 29(%rdi) 4434; SSE2-SSSE3-NEXT: shrl $16, %r11d 4435; SSE2-SSSE3-NEXT: movb %r11b, 23(%rdi) 4436; SSE2-SSSE3-NEXT: shrl $16, %ebx 4437; SSE2-SSSE3-NEXT: movb %bl, 20(%rdi) 4438; SSE2-SSSE3-NEXT: shrl $16, %ebp 4439; SSE2-SSSE3-NEXT: movb %bpl, 17(%rdi) 4440; SSE2-SSSE3-NEXT: shrl $16, %r14d 4441; SSE2-SSSE3-NEXT: movb %r14b, 11(%rdi) 4442; SSE2-SSSE3-NEXT: shrl $16, %r15d 4443; SSE2-SSSE3-NEXT: movb %r15b, 8(%rdi) 4444; SSE2-SSSE3-NEXT: shrl $16, %r12d 4445; SSE2-SSSE3-NEXT: movb %r12b, 5(%rdi) 4446; SSE2-SSSE3-NEXT: popq %rbx 4447; SSE2-SSSE3-NEXT: popq %r12 4448; SSE2-SSSE3-NEXT: popq %r14 4449; SSE2-SSSE3-NEXT: popq %r15 4450; SSE2-SSSE3-NEXT: popq %rbp 4451; SSE2-SSSE3-NEXT: retq 4452; 4453; SSE41-LABEL: trunc_ssat_v16i32_v16i24: 4454; SSE41: # %bb.0: 4455; SSE41-NEXT: pmovsxbw {{.*#+}} xmm4 = [65535,127,65535,127,65535,127,65535,127] 4456; SSE41-NEXT: pminsd %xmm4, %xmm3 4457; SSE41-NEXT: pminsd %xmm4, %xmm2 4458; SSE41-NEXT: pminsd %xmm4, %xmm1 4459; SSE41-NEXT: pminsd %xmm4, %xmm0 4460; SSE41-NEXT: pmovsxbw {{.*#+}} xmm4 = [0,65408,0,65408,0,65408,0,65408] 4461; SSE41-NEXT: pmaxsd %xmm4, %xmm0 4462; SSE41-NEXT: pmaxsd %xmm4, %xmm1 4463; SSE41-NEXT: pmaxsd %xmm4, %xmm2 4464; SSE41-NEXT: pmaxsd %xmm4, %xmm3 4465; SSE41-NEXT: pextrd $3, %xmm3, %eax 4466; SSE41-NEXT: movw %ax, 45(%rdi) 4467; SSE41-NEXT: shrl $16, %eax 4468; SSE41-NEXT: movb %al, 47(%rdi) 4469; SSE41-NEXT: pextrd $2, %xmm3, %eax 4470; SSE41-NEXT: movw %ax, 42(%rdi) 4471; SSE41-NEXT: shrl $16, %eax 4472; SSE41-NEXT: movb %al, 44(%rdi) 4473; SSE41-NEXT: pextrd $1, %xmm3, %eax 4474; SSE41-NEXT: movw %ax, 39(%rdi) 4475; SSE41-NEXT: shrl $16, %eax 4476; SSE41-NEXT: movb %al, 41(%rdi) 4477; SSE41-NEXT: movd %xmm3, %eax 4478; SSE41-NEXT: movw %ax, 36(%rdi) 4479; SSE41-NEXT: shrl $16, %eax 4480; SSE41-NEXT: movb %al, 38(%rdi) 4481; SSE41-NEXT: pextrd $3, %xmm2, %eax 4482; SSE41-NEXT: movw %ax, 33(%rdi) 4483; SSE41-NEXT: shrl $16, %eax 4484; SSE41-NEXT: movb %al, 35(%rdi) 4485; SSE41-NEXT: pextrd $2, %xmm2, %eax 4486; SSE41-NEXT: movw %ax, 30(%rdi) 4487; SSE41-NEXT: shrl $16, %eax 4488; SSE41-NEXT: movb %al, 32(%rdi) 4489; SSE41-NEXT: pextrd $1, %xmm2, %eax 4490; SSE41-NEXT: movw %ax, 27(%rdi) 4491; SSE41-NEXT: shrl $16, %eax 4492; SSE41-NEXT: movb %al, 29(%rdi) 4493; SSE41-NEXT: movd %xmm2, %eax 4494; SSE41-NEXT: movw %ax, 24(%rdi) 4495; SSE41-NEXT: shrl $16, %eax 4496; SSE41-NEXT: movb %al, 26(%rdi) 4497; SSE41-NEXT: pextrd $3, %xmm1, %eax 4498; SSE41-NEXT: movw %ax, 21(%rdi) 4499; SSE41-NEXT: shrl $16, %eax 4500; SSE41-NEXT: movb %al, 23(%rdi) 4501; SSE41-NEXT: pextrd $2, %xmm1, %eax 4502; SSE41-NEXT: movw %ax, 18(%rdi) 4503; SSE41-NEXT: shrl $16, %eax 4504; SSE41-NEXT: movb %al, 20(%rdi) 4505; SSE41-NEXT: pextrd $1, %xmm1, %eax 4506; SSE41-NEXT: movw %ax, 15(%rdi) 4507; SSE41-NEXT: shrl $16, %eax 4508; SSE41-NEXT: movb %al, 17(%rdi) 4509; SSE41-NEXT: movd %xmm1, %eax 4510; SSE41-NEXT: movw %ax, 12(%rdi) 4511; SSE41-NEXT: shrl $16, %eax 4512; SSE41-NEXT: movb %al, 14(%rdi) 4513; SSE41-NEXT: pextrd $3, %xmm0, %eax 4514; SSE41-NEXT: movw %ax, 9(%rdi) 4515; SSE41-NEXT: shrl $16, %eax 4516; SSE41-NEXT: movb %al, 11(%rdi) 4517; SSE41-NEXT: pextrd $2, %xmm0, %eax 4518; SSE41-NEXT: movw %ax, 6(%rdi) 4519; SSE41-NEXT: shrl $16, %eax 4520; SSE41-NEXT: movb %al, 8(%rdi) 4521; SSE41-NEXT: pextrd $1, %xmm0, %eax 4522; SSE41-NEXT: movw %ax, 3(%rdi) 4523; SSE41-NEXT: shrl $16, %eax 4524; SSE41-NEXT: movb %al, 5(%rdi) 4525; SSE41-NEXT: movd %xmm0, %eax 4526; SSE41-NEXT: movw %ax, (%rdi) 4527; SSE41-NEXT: shrl $16, %eax 4528; SSE41-NEXT: movb %al, 2(%rdi) 4529; SSE41-NEXT: retq 4530; 4531; AVX1-LABEL: trunc_ssat_v16i32_v16i24: 4532; AVX1: # %bb.0: 4533; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 4534; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [8388607,8388607,8388607,8388607] 4535; AVX1-NEXT: vpminsd %xmm3, %xmm2, %xmm4 4536; AVX1-NEXT: vpminsd %xmm3, %xmm1, %xmm2 4537; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 4538; AVX1-NEXT: vpminsd %xmm3, %xmm1, %xmm1 4539; AVX1-NEXT: vpminsd %xmm3, %xmm0, %xmm0 4540; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [4286578688,4286578688,4286578688,4286578688] 4541; AVX1-NEXT: vpmaxsd %xmm3, %xmm0, %xmm0 4542; AVX1-NEXT: vpmaxsd %xmm3, %xmm1, %xmm1 4543; AVX1-NEXT: vpmaxsd %xmm3, %xmm2, %xmm2 4544; AVX1-NEXT: vpmaxsd %xmm3, %xmm4, %xmm3 4545; AVX1-NEXT: vpextrd $3, %xmm3, %eax 4546; AVX1-NEXT: movw %ax, 45(%rdi) 4547; AVX1-NEXT: shrl $16, %eax 4548; AVX1-NEXT: movb %al, 47(%rdi) 4549; AVX1-NEXT: vpextrd $2, %xmm3, %eax 4550; AVX1-NEXT: movw %ax, 42(%rdi) 4551; AVX1-NEXT: shrl $16, %eax 4552; AVX1-NEXT: movb %al, 44(%rdi) 4553; AVX1-NEXT: vpextrd $1, %xmm3, %eax 4554; AVX1-NEXT: movw %ax, 39(%rdi) 4555; AVX1-NEXT: shrl $16, %eax 4556; AVX1-NEXT: movb %al, 41(%rdi) 4557; AVX1-NEXT: vmovd %xmm3, %eax 4558; AVX1-NEXT: movw %ax, 36(%rdi) 4559; AVX1-NEXT: shrl $16, %eax 4560; AVX1-NEXT: movb %al, 38(%rdi) 4561; AVX1-NEXT: vpextrd $3, %xmm2, %eax 4562; AVX1-NEXT: movw %ax, 33(%rdi) 4563; AVX1-NEXT: shrl $16, %eax 4564; AVX1-NEXT: movb %al, 35(%rdi) 4565; AVX1-NEXT: vpextrd $2, %xmm2, %eax 4566; AVX1-NEXT: movw %ax, 30(%rdi) 4567; AVX1-NEXT: shrl $16, %eax 4568; AVX1-NEXT: movb %al, 32(%rdi) 4569; AVX1-NEXT: vpextrd $1, %xmm2, %eax 4570; AVX1-NEXT: movw %ax, 27(%rdi) 4571; AVX1-NEXT: shrl $16, %eax 4572; AVX1-NEXT: movb %al, 29(%rdi) 4573; AVX1-NEXT: vmovd %xmm2, %eax 4574; AVX1-NEXT: movw %ax, 24(%rdi) 4575; AVX1-NEXT: shrl $16, %eax 4576; AVX1-NEXT: movb %al, 26(%rdi) 4577; AVX1-NEXT: vpextrd $3, %xmm1, %eax 4578; AVX1-NEXT: movw %ax, 21(%rdi) 4579; AVX1-NEXT: shrl $16, %eax 4580; AVX1-NEXT: movb %al, 23(%rdi) 4581; AVX1-NEXT: vpextrd $2, %xmm1, %eax 4582; AVX1-NEXT: movw %ax, 18(%rdi) 4583; AVX1-NEXT: shrl $16, %eax 4584; AVX1-NEXT: movb %al, 20(%rdi) 4585; AVX1-NEXT: vpextrd $1, %xmm1, %eax 4586; AVX1-NEXT: movw %ax, 15(%rdi) 4587; AVX1-NEXT: shrl $16, %eax 4588; AVX1-NEXT: movb %al, 17(%rdi) 4589; AVX1-NEXT: vmovd %xmm1, %eax 4590; AVX1-NEXT: movw %ax, 12(%rdi) 4591; AVX1-NEXT: shrl $16, %eax 4592; AVX1-NEXT: movb %al, 14(%rdi) 4593; AVX1-NEXT: vpextrd $3, %xmm0, %eax 4594; AVX1-NEXT: movw %ax, 9(%rdi) 4595; AVX1-NEXT: shrl $16, %eax 4596; AVX1-NEXT: movb %al, 11(%rdi) 4597; AVX1-NEXT: vpextrd $2, %xmm0, %eax 4598; AVX1-NEXT: movw %ax, 6(%rdi) 4599; AVX1-NEXT: shrl $16, %eax 4600; AVX1-NEXT: movb %al, 8(%rdi) 4601; AVX1-NEXT: vpextrd $1, %xmm0, %eax 4602; AVX1-NEXT: movw %ax, 3(%rdi) 4603; AVX1-NEXT: shrl $16, %eax 4604; AVX1-NEXT: movb %al, 5(%rdi) 4605; AVX1-NEXT: vmovd %xmm0, %eax 4606; AVX1-NEXT: movw %ax, (%rdi) 4607; AVX1-NEXT: shrl $16, %eax 4608; AVX1-NEXT: movb %al, 2(%rdi) 4609; AVX1-NEXT: vzeroupper 4610; AVX1-NEXT: retq 4611; 4612; AVX2-LABEL: trunc_ssat_v16i32_v16i24: 4613; AVX2: # %bb.0: 4614; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [8388607,8388607,8388607,8388607,8388607,8388607,8388607,8388607] 4615; AVX2-NEXT: vpminsd %ymm2, %ymm1, %ymm1 4616; AVX2-NEXT: vpminsd %ymm2, %ymm0, %ymm0 4617; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [4286578688,4286578688,4286578688,4286578688,4286578688,4286578688,4286578688,4286578688] 4618; AVX2-NEXT: vpmaxsd %ymm2, %ymm0, %ymm0 4619; AVX2-NEXT: vpmaxsd %ymm2, %ymm1, %ymm1 4620; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 4621; AVX2-NEXT: vpextrd $3, %xmm2, %eax 4622; AVX2-NEXT: movw %ax, 45(%rdi) 4623; AVX2-NEXT: shrl $16, %eax 4624; AVX2-NEXT: movb %al, 47(%rdi) 4625; AVX2-NEXT: vpextrd $2, %xmm2, %eax 4626; AVX2-NEXT: movw %ax, 42(%rdi) 4627; AVX2-NEXT: shrl $16, %eax 4628; AVX2-NEXT: movb %al, 44(%rdi) 4629; AVX2-NEXT: vpextrd $1, %xmm2, %eax 4630; AVX2-NEXT: movw %ax, 39(%rdi) 4631; AVX2-NEXT: shrl $16, %eax 4632; AVX2-NEXT: movb %al, 41(%rdi) 4633; AVX2-NEXT: vmovd %xmm2, %eax 4634; AVX2-NEXT: movw %ax, 36(%rdi) 4635; AVX2-NEXT: shrl $16, %eax 4636; AVX2-NEXT: movb %al, 38(%rdi) 4637; AVX2-NEXT: vpextrd $3, %xmm1, %eax 4638; AVX2-NEXT: movw %ax, 33(%rdi) 4639; AVX2-NEXT: shrl $16, %eax 4640; AVX2-NEXT: movb %al, 35(%rdi) 4641; AVX2-NEXT: vpextrd $2, %xmm1, %eax 4642; AVX2-NEXT: movw %ax, 30(%rdi) 4643; AVX2-NEXT: shrl $16, %eax 4644; AVX2-NEXT: movb %al, 32(%rdi) 4645; AVX2-NEXT: vpextrd $1, %xmm1, %eax 4646; AVX2-NEXT: movw %ax, 27(%rdi) 4647; AVX2-NEXT: shrl $16, %eax 4648; AVX2-NEXT: movb %al, 29(%rdi) 4649; AVX2-NEXT: vmovd %xmm1, %eax 4650; AVX2-NEXT: movw %ax, 24(%rdi) 4651; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 4652; AVX2-NEXT: shrl $16, %eax 4653; AVX2-NEXT: movb %al, 26(%rdi) 4654; AVX2-NEXT: vpextrd $3, %xmm1, %eax 4655; AVX2-NEXT: movw %ax, 21(%rdi) 4656; AVX2-NEXT: shrl $16, %eax 4657; AVX2-NEXT: movb %al, 23(%rdi) 4658; AVX2-NEXT: vpextrd $2, %xmm1, %eax 4659; AVX2-NEXT: movw %ax, 18(%rdi) 4660; AVX2-NEXT: shrl $16, %eax 4661; AVX2-NEXT: movb %al, 20(%rdi) 4662; AVX2-NEXT: vpextrd $1, %xmm1, %eax 4663; AVX2-NEXT: movw %ax, 15(%rdi) 4664; AVX2-NEXT: shrl $16, %eax 4665; AVX2-NEXT: movb %al, 17(%rdi) 4666; AVX2-NEXT: vmovd %xmm1, %eax 4667; AVX2-NEXT: movw %ax, 12(%rdi) 4668; AVX2-NEXT: shrl $16, %eax 4669; AVX2-NEXT: movb %al, 14(%rdi) 4670; AVX2-NEXT: vpextrd $3, %xmm0, %eax 4671; AVX2-NEXT: movw %ax, 9(%rdi) 4672; AVX2-NEXT: shrl $16, %eax 4673; AVX2-NEXT: movb %al, 11(%rdi) 4674; AVX2-NEXT: vpextrd $2, %xmm0, %eax 4675; AVX2-NEXT: movw %ax, 6(%rdi) 4676; AVX2-NEXT: shrl $16, %eax 4677; AVX2-NEXT: movb %al, 8(%rdi) 4678; AVX2-NEXT: vpextrd $1, %xmm0, %eax 4679; AVX2-NEXT: movw %ax, 3(%rdi) 4680; AVX2-NEXT: shrl $16, %eax 4681; AVX2-NEXT: movb %al, 5(%rdi) 4682; AVX2-NEXT: vmovd %xmm0, %eax 4683; AVX2-NEXT: movw %ax, (%rdi) 4684; AVX2-NEXT: shrl $16, %eax 4685; AVX2-NEXT: movb %al, 2(%rdi) 4686; AVX2-NEXT: vzeroupper 4687; AVX2-NEXT: retq 4688; 4689; AVX512-LABEL: trunc_ssat_v16i32_v16i24: 4690; AVX512: # %bb.0: 4691; AVX512-NEXT: pushq %rbp 4692; AVX512-NEXT: pushq %r15 4693; AVX512-NEXT: pushq %r14 4694; AVX512-NEXT: pushq %rbx 4695; AVX512-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 4696; AVX512-NEXT: vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 4697; AVX512-NEXT: vextracti32x4 $3, %zmm0, %xmm1 4698; AVX512-NEXT: vpextrd $3, %xmm1, %r15d 4699; AVX512-NEXT: movw %r15w, 45(%rdi) 4700; AVX512-NEXT: vpextrd $2, %xmm1, %r14d 4701; AVX512-NEXT: movw %r14w, 42(%rdi) 4702; AVX512-NEXT: vpextrd $1, %xmm1, %ebp 4703; AVX512-NEXT: movw %bp, 39(%rdi) 4704; AVX512-NEXT: vmovd %xmm1, %r11d 4705; AVX512-NEXT: movw %r11w, 36(%rdi) 4706; AVX512-NEXT: vextracti32x4 $2, %zmm0, %xmm1 4707; AVX512-NEXT: vpextrd $3, %xmm1, %ebx 4708; AVX512-NEXT: movw %bx, 33(%rdi) 4709; AVX512-NEXT: vpextrd $2, %xmm1, %r10d 4710; AVX512-NEXT: movw %r10w, 30(%rdi) 4711; AVX512-NEXT: vpextrd $1, %xmm1, %r9d 4712; AVX512-NEXT: movw %r9w, 27(%rdi) 4713; AVX512-NEXT: vmovd %xmm1, %r8d 4714; AVX512-NEXT: movw %r8w, 24(%rdi) 4715; AVX512-NEXT: vpextrd $3, %xmm0, %esi 4716; AVX512-NEXT: movw %si, 9(%rdi) 4717; AVX512-NEXT: vpextrd $2, %xmm0, %edx 4718; AVX512-NEXT: movw %dx, 6(%rdi) 4719; AVX512-NEXT: vpextrd $1, %xmm0, %ecx 4720; AVX512-NEXT: movw %cx, 3(%rdi) 4721; AVX512-NEXT: vmovd %xmm0, %eax 4722; AVX512-NEXT: movw %ax, (%rdi) 4723; AVX512-NEXT: shrl $16, %r15d 4724; AVX512-NEXT: movb %r15b, 47(%rdi) 4725; AVX512-NEXT: shrl $16, %r14d 4726; AVX512-NEXT: movb %r14b, 44(%rdi) 4727; AVX512-NEXT: shrl $16, %ebp 4728; AVX512-NEXT: movb %bpl, 41(%rdi) 4729; AVX512-NEXT: shrl $16, %r11d 4730; AVX512-NEXT: movb %r11b, 38(%rdi) 4731; AVX512-NEXT: shrl $16, %ebx 4732; AVX512-NEXT: movb %bl, 35(%rdi) 4733; AVX512-NEXT: shrl $16, %r10d 4734; AVX512-NEXT: movb %r10b, 32(%rdi) 4735; AVX512-NEXT: shrl $16, %r9d 4736; AVX512-NEXT: movb %r9b, 29(%rdi) 4737; AVX512-NEXT: shrl $16, %r8d 4738; AVX512-NEXT: movb %r8b, 26(%rdi) 4739; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0 4740; AVX512-NEXT: vpextrd $3, %xmm0, %r11d 4741; AVX512-NEXT: movw %r11w, 21(%rdi) 4742; AVX512-NEXT: vpextrd $2, %xmm0, %r10d 4743; AVX512-NEXT: movw %r10w, 18(%rdi) 4744; AVX512-NEXT: vpextrd $1, %xmm0, %r9d 4745; AVX512-NEXT: movw %r9w, 15(%rdi) 4746; AVX512-NEXT: vmovd %xmm0, %r8d 4747; AVX512-NEXT: movw %r8w, 12(%rdi) 4748; AVX512-NEXT: shrl $16, %esi 4749; AVX512-NEXT: movb %sil, 11(%rdi) 4750; AVX512-NEXT: shrl $16, %edx 4751; AVX512-NEXT: movb %dl, 8(%rdi) 4752; AVX512-NEXT: shrl $16, %ecx 4753; AVX512-NEXT: movb %cl, 5(%rdi) 4754; AVX512-NEXT: shrl $16, %eax 4755; AVX512-NEXT: movb %al, 2(%rdi) 4756; AVX512-NEXT: shrl $16, %r11d 4757; AVX512-NEXT: movb %r11b, 23(%rdi) 4758; AVX512-NEXT: shrl $16, %r10d 4759; AVX512-NEXT: movb %r10b, 20(%rdi) 4760; AVX512-NEXT: shrl $16, %r9d 4761; AVX512-NEXT: movb %r9b, 17(%rdi) 4762; AVX512-NEXT: shrl $16, %r8d 4763; AVX512-NEXT: movb %r8b, 14(%rdi) 4764; AVX512-NEXT: popq %rbx 4765; AVX512-NEXT: popq %r14 4766; AVX512-NEXT: popq %r15 4767; AVX512-NEXT: popq %rbp 4768; AVX512-NEXT: vzeroupper 4769; AVX512-NEXT: retq 4770; 4771; SKX-LABEL: trunc_ssat_v16i32_v16i24: 4772; SKX: # %bb.0: 4773; SKX-NEXT: pushq %rbp 4774; SKX-NEXT: pushq %r15 4775; SKX-NEXT: pushq %r14 4776; SKX-NEXT: pushq %rbx 4777; SKX-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 4778; SKX-NEXT: vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 4779; SKX-NEXT: vextracti32x4 $3, %zmm0, %xmm1 4780; SKX-NEXT: vpextrd $3, %xmm1, %r15d 4781; SKX-NEXT: movw %r15w, 45(%rdi) 4782; SKX-NEXT: vpextrd $2, %xmm1, %r14d 4783; SKX-NEXT: movw %r14w, 42(%rdi) 4784; SKX-NEXT: vpextrd $1, %xmm1, %ebp 4785; SKX-NEXT: movw %bp, 39(%rdi) 4786; SKX-NEXT: vmovd %xmm1, %r11d 4787; SKX-NEXT: movw %r11w, 36(%rdi) 4788; SKX-NEXT: vextracti32x4 $2, %zmm0, %xmm1 4789; SKX-NEXT: vpextrd $3, %xmm1, %ebx 4790; SKX-NEXT: movw %bx, 33(%rdi) 4791; SKX-NEXT: vpextrd $2, %xmm1, %r10d 4792; SKX-NEXT: movw %r10w, 30(%rdi) 4793; SKX-NEXT: vpextrd $1, %xmm1, %r9d 4794; SKX-NEXT: movw %r9w, 27(%rdi) 4795; SKX-NEXT: vmovd %xmm1, %r8d 4796; SKX-NEXT: vpextrd $3, %xmm0, %edx 4797; SKX-NEXT: movw %r8w, 24(%rdi) 4798; SKX-NEXT: movw %dx, 9(%rdi) 4799; SKX-NEXT: vpextrd $2, %xmm0, %esi 4800; SKX-NEXT: vpextrd $1, %xmm0, %eax 4801; SKX-NEXT: movw %si, 6(%rdi) 4802; SKX-NEXT: movw %ax, 3(%rdi) 4803; SKX-NEXT: vmovd %xmm0, %ecx 4804; SKX-NEXT: movw %cx, (%rdi) 4805; SKX-NEXT: shrl $16, %r15d 4806; SKX-NEXT: movb %r15b, 47(%rdi) 4807; SKX-NEXT: shrl $16, %r14d 4808; SKX-NEXT: movb %r14b, 44(%rdi) 4809; SKX-NEXT: shrl $16, %ebp 4810; SKX-NEXT: movb %bpl, 41(%rdi) 4811; SKX-NEXT: shrl $16, %r11d 4812; SKX-NEXT: movb %r11b, 38(%rdi) 4813; SKX-NEXT: shrl $16, %ebx 4814; SKX-NEXT: movb %bl, 35(%rdi) 4815; SKX-NEXT: shrl $16, %r10d 4816; SKX-NEXT: movb %r10b, 32(%rdi) 4817; SKX-NEXT: shrl $16, %r9d 4818; SKX-NEXT: movb %r9b, 29(%rdi) 4819; SKX-NEXT: shrl $16, %r8d 4820; SKX-NEXT: movb %r8b, 26(%rdi) 4821; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0 4822; SKX-NEXT: vpextrd $3, %xmm0, %r11d 4823; SKX-NEXT: movw %r11w, 21(%rdi) 4824; SKX-NEXT: vpextrd $2, %xmm0, %r10d 4825; SKX-NEXT: movw %r10w, 18(%rdi) 4826; SKX-NEXT: vpextrd $1, %xmm0, %r9d 4827; SKX-NEXT: movw %r9w, 15(%rdi) 4828; SKX-NEXT: vmovd %xmm0, %r8d 4829; SKX-NEXT: movw %r8w, 12(%rdi) 4830; SKX-NEXT: shrl $16, %edx 4831; SKX-NEXT: movb %dl, 11(%rdi) 4832; SKX-NEXT: shrl $16, %esi 4833; SKX-NEXT: movb %sil, 8(%rdi) 4834; SKX-NEXT: shrl $16, %eax 4835; SKX-NEXT: movb %al, 5(%rdi) 4836; SKX-NEXT: shrl $16, %ecx 4837; SKX-NEXT: movb %cl, 2(%rdi) 4838; SKX-NEXT: shrl $16, %r11d 4839; SKX-NEXT: movb %r11b, 23(%rdi) 4840; SKX-NEXT: shrl $16, %r10d 4841; SKX-NEXT: movb %r10b, 20(%rdi) 4842; SKX-NEXT: shrl $16, %r9d 4843; SKX-NEXT: movb %r9b, 17(%rdi) 4844; SKX-NEXT: shrl $16, %r8d 4845; SKX-NEXT: movb %r8b, 14(%rdi) 4846; SKX-NEXT: popq %rbx 4847; SKX-NEXT: popq %r14 4848; SKX-NEXT: popq %r15 4849; SKX-NEXT: popq %rbp 4850; SKX-NEXT: vzeroupper 4851; SKX-NEXT: retq 4852 %a = call <16 x i32> @llvm.smin.v16i32(<16 x i32> %x, <16 x i32> <i32 8388607, i32 8388607, i32 8388607, i32 8388607, i32 8388607, i32 8388607, i32 8388607, i32 8388607, i32 8388607, i32 8388607, i32 8388607, i32 8388607, i32 8388607, i32 8388607, i32 8388607, i32 8388607>) 4853 %b = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %a, <16 x i32> <i32 -8388608, i32 -8388608, i32 -8388608, i32 -8388608, i32 -8388608, i32 -8388608, i32 -8388608, i32 -8388608, i32 -8388608, i32 -8388608, i32 -8388608, i32 -8388608, i32 -8388608, i32 -8388608, i32 -8388608, i32 -8388608>) 4854 %c = trunc <16 x i32> %b to <16 x i24> 4855 store <16 x i24> %c, ptr %y 4856 ret void 4857} 4858declare <16 x i32> @llvm.smax.v16i32(<16 x i32>, <16 x i32>) 4859declare <16 x i32> @llvm.smin.v16i32(<16 x i32>, <16 x i32>) 4860