1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2-SSSE3,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSE2-SSSE3,SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-SLOW 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST-ALL 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST-PERLANE 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F 10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL 11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL 12; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW 13; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW 14; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL 15; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL 16 17define <8 x i32> @trunc8i64_8i32_nsw(<8 x i64> %a) { 18; SSE-LABEL: trunc8i64_8i32_nsw: 19; SSE: # %bb.0: # %entry 20; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 21; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] 22; SSE-NEXT: movaps %xmm2, %xmm1 23; SSE-NEXT: retq 24; 25; AVX1-LABEL: trunc8i64_8i32_nsw: 26; AVX1: # %bb.0: # %entry 27; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] 28; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 29; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] 30; AVX1-NEXT: retq 31; 32; AVX2-SLOW-LABEL: trunc8i64_8i32_nsw: 33; AVX2-SLOW: # %bb.0: # %entry 34; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] 35; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 36; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] 37; AVX2-SLOW-NEXT: retq 38; 39; AVX2-FAST-ALL-LABEL: trunc8i64_8i32_nsw: 40; AVX2-FAST-ALL: # %bb.0: # %entry 41; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7] 42; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0 43; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1 44; AVX2-FAST-ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 45; AVX2-FAST-ALL-NEXT: retq 46; 47; AVX2-FAST-PERLANE-LABEL: trunc8i64_8i32_nsw: 48; AVX2-FAST-PERLANE: # %bb.0: # %entry 49; AVX2-FAST-PERLANE-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] 50; AVX2-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 51; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] 52; AVX2-FAST-PERLANE-NEXT: retq 53; 54; AVX512-LABEL: trunc8i64_8i32_nsw: 55; AVX512: # %bb.0: # %entry 56; AVX512-NEXT: vpmovqd %zmm0, %ymm0 57; AVX512-NEXT: retq 58entry: 59 %0 = trunc nsw <8 x i64> %a to <8 x i32> 60 ret <8 x i32> %0 61} 62 63define <8 x i32> @trunc8i64_8i32_nuw(<8 x i64> %a) { 64; SSE-LABEL: trunc8i64_8i32_nuw: 65; SSE: # %bb.0: # %entry 66; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 67; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] 68; SSE-NEXT: movaps %xmm2, %xmm1 69; SSE-NEXT: retq 70; 71; AVX1-LABEL: trunc8i64_8i32_nuw: 72; AVX1: # %bb.0: # %entry 73; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] 74; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 75; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] 76; AVX1-NEXT: retq 77; 78; AVX2-SLOW-LABEL: trunc8i64_8i32_nuw: 79; AVX2-SLOW: # %bb.0: # %entry 80; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] 81; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 82; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] 83; AVX2-SLOW-NEXT: retq 84; 85; AVX2-FAST-ALL-LABEL: trunc8i64_8i32_nuw: 86; AVX2-FAST-ALL: # %bb.0: # %entry 87; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7] 88; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0 89; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1 90; AVX2-FAST-ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 91; AVX2-FAST-ALL-NEXT: retq 92; 93; AVX2-FAST-PERLANE-LABEL: trunc8i64_8i32_nuw: 94; AVX2-FAST-PERLANE: # %bb.0: # %entry 95; AVX2-FAST-PERLANE-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] 96; AVX2-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 97; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] 98; AVX2-FAST-PERLANE-NEXT: retq 99; 100; AVX512-LABEL: trunc8i64_8i32_nuw: 101; AVX512: # %bb.0: # %entry 102; AVX512-NEXT: vpmovqd %zmm0, %ymm0 103; AVX512-NEXT: retq 104entry: 105 %0 = trunc nuw <8 x i64> %a to <8 x i32> 106 ret <8 x i32> %0 107} 108 109define <8 x i16> @trunc8i64_8i16_nsw(<8 x i64> %a) { 110; SSE-LABEL: trunc8i64_8i16_nsw: 111; SSE: # %bb.0: # %entry 112; SSE-NEXT: packssdw %xmm3, %xmm2 113; SSE-NEXT: packssdw %xmm1, %xmm0 114; SSE-NEXT: packssdw %xmm2, %xmm0 115; SSE-NEXT: retq 116; 117; AVX1-LABEL: trunc8i64_8i16_nsw: 118; AVX1: # %bb.0: # %entry 119; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 120; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 121; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 122; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 123; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 124; AVX1-NEXT: vzeroupper 125; AVX1-NEXT: retq 126; 127; AVX2-LABEL: trunc8i64_8i16_nsw: 128; AVX2: # %bb.0: # %entry 129; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 130; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 131; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 132; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 133; AVX2-NEXT: vzeroupper 134; AVX2-NEXT: retq 135; 136; AVX512-LABEL: trunc8i64_8i16_nsw: 137; AVX512: # %bb.0: # %entry 138; AVX512-NEXT: vpmovqw %zmm0, %xmm0 139; AVX512-NEXT: vzeroupper 140; AVX512-NEXT: retq 141entry: 142 %0 = trunc nsw <8 x i64> %a to <8 x i16> 143 ret <8 x i16> %0 144} 145 146define <8 x i16> @trunc8i64_8i16_nuw(<8 x i64> %a) { 147; SSE2-SSSE3-LABEL: trunc8i64_8i16_nuw: 148; SSE2-SSSE3: # %bb.0: # %entry 149; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] 150; SSE2-SSSE3-NEXT: pslld $16, %xmm2 151; SSE2-SSSE3-NEXT: psrad $16, %xmm2 152; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 153; SSE2-SSSE3-NEXT: pslld $16, %xmm0 154; SSE2-SSSE3-NEXT: psrad $16, %xmm0 155; SSE2-SSSE3-NEXT: packssdw %xmm2, %xmm0 156; SSE2-SSSE3-NEXT: retq 157; 158; SSE41-LABEL: trunc8i64_8i16_nuw: 159; SSE41: # %bb.0: # %entry 160; SSE41-NEXT: packusdw %xmm3, %xmm2 161; SSE41-NEXT: packusdw %xmm1, %xmm0 162; SSE41-NEXT: packusdw %xmm2, %xmm0 163; SSE41-NEXT: retq 164; 165; AVX1-LABEL: trunc8i64_8i16_nuw: 166; AVX1: # %bb.0: # %entry 167; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 168; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 169; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 170; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 171; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 172; AVX1-NEXT: vzeroupper 173; AVX1-NEXT: retq 174; 175; AVX2-LABEL: trunc8i64_8i16_nuw: 176; AVX2: # %bb.0: # %entry 177; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 178; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 179; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 180; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 181; AVX2-NEXT: vzeroupper 182; AVX2-NEXT: retq 183; 184; AVX512-LABEL: trunc8i64_8i16_nuw: 185; AVX512: # %bb.0: # %entry 186; AVX512-NEXT: vpmovqw %zmm0, %xmm0 187; AVX512-NEXT: vzeroupper 188; AVX512-NEXT: retq 189entry: 190 %0 = trunc nuw <8 x i64> %a to <8 x i16> 191 ret <8 x i16> %0 192} 193 194define void @trunc8i64_8i8_nsw(<8 x i64> %a) { 195; SSE-LABEL: trunc8i64_8i8_nsw: 196; SSE: # %bb.0: # %entry 197; SSE-NEXT: packssdw %xmm3, %xmm2 198; SSE-NEXT: packssdw %xmm1, %xmm0 199; SSE-NEXT: packssdw %xmm2, %xmm0 200; SSE-NEXT: packsswb %xmm0, %xmm0 201; SSE-NEXT: movq %xmm0, (%rax) 202; SSE-NEXT: retq 203; 204; AVX1-LABEL: trunc8i64_8i8_nsw: 205; AVX1: # %bb.0: # %entry 206; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 207; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 208; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 209; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 210; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 211; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 212; AVX1-NEXT: vmovq %xmm0, (%rax) 213; AVX1-NEXT: vzeroupper 214; AVX1-NEXT: retq 215; 216; AVX2-LABEL: trunc8i64_8i8_nsw: 217; AVX2: # %bb.0: # %entry 218; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 219; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 220; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 221; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 222; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 223; AVX2-NEXT: vmovq %xmm0, (%rax) 224; AVX2-NEXT: vzeroupper 225; AVX2-NEXT: retq 226; 227; AVX512-LABEL: trunc8i64_8i8_nsw: 228; AVX512: # %bb.0: # %entry 229; AVX512-NEXT: vpmovqb %zmm0, (%rax) 230; AVX512-NEXT: vzeroupper 231; AVX512-NEXT: retq 232entry: 233 %0 = trunc nsw <8 x i64> %a to <8 x i8> 234 store <8 x i8> %0, ptr undef, align 4 235 ret void 236} 237 238define void @trunc8i64_8i8_nuw(<8 x i64> %a) { 239; SSE2-SSSE3-LABEL: trunc8i64_8i8_nuw: 240; SSE2-SSSE3: # %bb.0: # %entry 241; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm2 242; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0 243; SSE2-SSSE3-NEXT: packuswb %xmm2, %xmm0 244; SSE2-SSSE3-NEXT: packuswb %xmm0, %xmm0 245; SSE2-SSSE3-NEXT: movq %xmm0, (%rax) 246; SSE2-SSSE3-NEXT: retq 247; 248; SSE41-LABEL: trunc8i64_8i8_nuw: 249; SSE41: # %bb.0: # %entry 250; SSE41-NEXT: packusdw %xmm3, %xmm2 251; SSE41-NEXT: packusdw %xmm1, %xmm0 252; SSE41-NEXT: packusdw %xmm2, %xmm0 253; SSE41-NEXT: packuswb %xmm0, %xmm0 254; SSE41-NEXT: movq %xmm0, (%rax) 255; SSE41-NEXT: retq 256; 257; AVX1-LABEL: trunc8i64_8i8_nuw: 258; AVX1: # %bb.0: # %entry 259; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 260; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 261; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 262; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 263; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 264; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 265; AVX1-NEXT: vmovq %xmm0, (%rax) 266; AVX1-NEXT: vzeroupper 267; AVX1-NEXT: retq 268; 269; AVX2-LABEL: trunc8i64_8i8_nuw: 270; AVX2: # %bb.0: # %entry 271; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 272; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 273; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 274; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 275; AVX2-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 276; AVX2-NEXT: vmovq %xmm0, (%rax) 277; AVX2-NEXT: vzeroupper 278; AVX2-NEXT: retq 279; 280; AVX512-LABEL: trunc8i64_8i8_nuw: 281; AVX512: # %bb.0: # %entry 282; AVX512-NEXT: vpmovqb %zmm0, (%rax) 283; AVX512-NEXT: vzeroupper 284; AVX512-NEXT: retq 285entry: 286 %0 = trunc nuw <8 x i64> %a to <8 x i8> 287 store <8 x i8> %0, ptr undef, align 4 288 ret void 289} 290 291define <8 x i16> @trunc8i32_8i16_nsw(<8 x i32> %a) { 292; SSE-LABEL: trunc8i32_8i16_nsw: 293; SSE: # %bb.0: # %entry 294; SSE-NEXT: packssdw %xmm1, %xmm0 295; SSE-NEXT: retq 296; 297; AVX1-LABEL: trunc8i32_8i16_nsw: 298; AVX1: # %bb.0: # %entry 299; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 300; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 301; AVX1-NEXT: vzeroupper 302; AVX1-NEXT: retq 303; 304; AVX2-LABEL: trunc8i32_8i16_nsw: 305; AVX2: # %bb.0: # %entry 306; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 307; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 308; AVX2-NEXT: vzeroupper 309; AVX2-NEXT: retq 310; 311; AVX512F-LABEL: trunc8i32_8i16_nsw: 312; AVX512F: # %bb.0: # %entry 313; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 314; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 315; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 316; AVX512F-NEXT: vzeroupper 317; AVX512F-NEXT: retq 318; 319; AVX512VL-LABEL: trunc8i32_8i16_nsw: 320; AVX512VL: # %bb.0: # %entry 321; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0 322; AVX512VL-NEXT: vzeroupper 323; AVX512VL-NEXT: retq 324; 325; AVX512BW-LABEL: trunc8i32_8i16_nsw: 326; AVX512BW: # %bb.0: # %entry 327; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 328; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0 329; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 330; AVX512BW-NEXT: vzeroupper 331; AVX512BW-NEXT: retq 332; 333; AVX512BWVL-LABEL: trunc8i32_8i16_nsw: 334; AVX512BWVL: # %bb.0: # %entry 335; AVX512BWVL-NEXT: vpmovdw %ymm0, %xmm0 336; AVX512BWVL-NEXT: vzeroupper 337; AVX512BWVL-NEXT: retq 338entry: 339 %0 = trunc nsw <8 x i32> %a to <8 x i16> 340 ret <8 x i16> %0 341} 342 343define <8 x i16> @trunc8i32_8i16_nuw(<8 x i32> %a) { 344; SSE2-LABEL: trunc8i32_8i16_nuw: 345; SSE2: # %bb.0: # %entry 346; SSE2-NEXT: pslld $16, %xmm1 347; SSE2-NEXT: psrad $16, %xmm1 348; SSE2-NEXT: pslld $16, %xmm0 349; SSE2-NEXT: psrad $16, %xmm0 350; SSE2-NEXT: packssdw %xmm1, %xmm0 351; SSE2-NEXT: retq 352; 353; SSSE3-LABEL: trunc8i32_8i16_nuw: 354; SSSE3: # %bb.0: # %entry 355; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 356; SSSE3-NEXT: pshufb %xmm2, %xmm1 357; SSSE3-NEXT: pshufb %xmm2, %xmm0 358; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 359; SSSE3-NEXT: retq 360; 361; SSE41-LABEL: trunc8i32_8i16_nuw: 362; SSE41: # %bb.0: # %entry 363; SSE41-NEXT: packusdw %xmm1, %xmm0 364; SSE41-NEXT: retq 365; 366; AVX1-LABEL: trunc8i32_8i16_nuw: 367; AVX1: # %bb.0: # %entry 368; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 369; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 370; AVX1-NEXT: vzeroupper 371; AVX1-NEXT: retq 372; 373; AVX2-LABEL: trunc8i32_8i16_nuw: 374; AVX2: # %bb.0: # %entry 375; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 376; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 377; AVX2-NEXT: vzeroupper 378; AVX2-NEXT: retq 379; 380; AVX512F-LABEL: trunc8i32_8i16_nuw: 381; AVX512F: # %bb.0: # %entry 382; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 383; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 384; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 385; AVX512F-NEXT: vzeroupper 386; AVX512F-NEXT: retq 387; 388; AVX512VL-LABEL: trunc8i32_8i16_nuw: 389; AVX512VL: # %bb.0: # %entry 390; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0 391; AVX512VL-NEXT: vzeroupper 392; AVX512VL-NEXT: retq 393; 394; AVX512BW-LABEL: trunc8i32_8i16_nuw: 395; AVX512BW: # %bb.0: # %entry 396; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 397; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0 398; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 399; AVX512BW-NEXT: vzeroupper 400; AVX512BW-NEXT: retq 401; 402; AVX512BWVL-LABEL: trunc8i32_8i16_nuw: 403; AVX512BWVL: # %bb.0: # %entry 404; AVX512BWVL-NEXT: vpmovdw %ymm0, %xmm0 405; AVX512BWVL-NEXT: vzeroupper 406; AVX512BWVL-NEXT: retq 407entry: 408 %0 = trunc nuw <8 x i32> %a to <8 x i16> 409 ret <8 x i16> %0 410} 411 412define void @trunc8i32_8i8_nsw(<8 x i32> %a) { 413; SSE-LABEL: trunc8i32_8i8_nsw: 414; SSE: # %bb.0: # %entry 415; SSE-NEXT: packssdw %xmm1, %xmm0 416; SSE-NEXT: packsswb %xmm0, %xmm0 417; SSE-NEXT: movq %xmm0, (%rax) 418; SSE-NEXT: retq 419; 420; AVX1-LABEL: trunc8i32_8i8_nsw: 421; AVX1: # %bb.0: # %entry 422; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 423; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 424; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 425; AVX1-NEXT: vmovq %xmm0, (%rax) 426; AVX1-NEXT: vzeroupper 427; AVX1-NEXT: retq 428; 429; AVX2-LABEL: trunc8i32_8i8_nsw: 430; AVX2: # %bb.0: # %entry 431; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 432; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 433; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 434; AVX2-NEXT: vmovq %xmm0, (%rax) 435; AVX2-NEXT: vzeroupper 436; AVX2-NEXT: retq 437; 438; AVX512F-LABEL: trunc8i32_8i8_nsw: 439; AVX512F: # %bb.0: # %entry 440; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 441; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 442; AVX512F-NEXT: vmovq %xmm0, (%rax) 443; AVX512F-NEXT: vzeroupper 444; AVX512F-NEXT: retq 445; 446; AVX512VL-LABEL: trunc8i32_8i8_nsw: 447; AVX512VL: # %bb.0: # %entry 448; AVX512VL-NEXT: vpmovdb %ymm0, (%rax) 449; AVX512VL-NEXT: vzeroupper 450; AVX512VL-NEXT: retq 451; 452; AVX512BW-LABEL: trunc8i32_8i8_nsw: 453; AVX512BW: # %bb.0: # %entry 454; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 455; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 456; AVX512BW-NEXT: vmovq %xmm0, (%rax) 457; AVX512BW-NEXT: vzeroupper 458; AVX512BW-NEXT: retq 459; 460; AVX512BWVL-LABEL: trunc8i32_8i8_nsw: 461; AVX512BWVL: # %bb.0: # %entry 462; AVX512BWVL-NEXT: vpmovdb %ymm0, (%rax) 463; AVX512BWVL-NEXT: vzeroupper 464; AVX512BWVL-NEXT: retq 465entry: 466 %0 = trunc nsw <8 x i32> %a to <8 x i8> 467 store <8 x i8> %0, ptr undef, align 4 468 ret void 469} 470 471define void @trunc8i32_8i8_nuw(<8 x i32> %a) { 472; SSE2-SSSE3-LABEL: trunc8i32_8i8_nuw: 473; SSE2-SSSE3: # %bb.0: # %entry 474; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0 475; SSE2-SSSE3-NEXT: packuswb %xmm0, %xmm0 476; SSE2-SSSE3-NEXT: movq %xmm0, (%rax) 477; SSE2-SSSE3-NEXT: retq 478; 479; SSE41-LABEL: trunc8i32_8i8_nuw: 480; SSE41: # %bb.0: # %entry 481; SSE41-NEXT: packusdw %xmm1, %xmm0 482; SSE41-NEXT: packuswb %xmm0, %xmm0 483; SSE41-NEXT: movq %xmm0, (%rax) 484; SSE41-NEXT: retq 485; 486; AVX1-LABEL: trunc8i32_8i8_nuw: 487; AVX1: # %bb.0: # %entry 488; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 489; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 490; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 491; AVX1-NEXT: vmovq %xmm0, (%rax) 492; AVX1-NEXT: vzeroupper 493; AVX1-NEXT: retq 494; 495; AVX2-LABEL: trunc8i32_8i8_nuw: 496; AVX2: # %bb.0: # %entry 497; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 498; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 499; AVX2-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 500; AVX2-NEXT: vmovq %xmm0, (%rax) 501; AVX2-NEXT: vzeroupper 502; AVX2-NEXT: retq 503; 504; AVX512F-LABEL: trunc8i32_8i8_nuw: 505; AVX512F: # %bb.0: # %entry 506; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 507; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 508; AVX512F-NEXT: vmovq %xmm0, (%rax) 509; AVX512F-NEXT: vzeroupper 510; AVX512F-NEXT: retq 511; 512; AVX512VL-LABEL: trunc8i32_8i8_nuw: 513; AVX512VL: # %bb.0: # %entry 514; AVX512VL-NEXT: vpmovdb %ymm0, (%rax) 515; AVX512VL-NEXT: vzeroupper 516; AVX512VL-NEXT: retq 517; 518; AVX512BW-LABEL: trunc8i32_8i8_nuw: 519; AVX512BW: # %bb.0: # %entry 520; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 521; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 522; AVX512BW-NEXT: vmovq %xmm0, (%rax) 523; AVX512BW-NEXT: vzeroupper 524; AVX512BW-NEXT: retq 525; 526; AVX512BWVL-LABEL: trunc8i32_8i8_nuw: 527; AVX512BWVL: # %bb.0: # %entry 528; AVX512BWVL-NEXT: vpmovdb %ymm0, (%rax) 529; AVX512BWVL-NEXT: vzeroupper 530; AVX512BWVL-NEXT: retq 531entry: 532 %0 = trunc nuw <8 x i32> %a to <8 x i8> 533 store <8 x i8> %0, ptr undef, align 4 534 ret void 535} 536 537define void @trunc16i32_16i16_nsw(<16 x i32> %a) { 538; SSE-LABEL: trunc16i32_16i16_nsw: 539; SSE: # %bb.0: # %entry 540; SSE-NEXT: packssdw %xmm1, %xmm0 541; SSE-NEXT: packssdw %xmm3, %xmm2 542; SSE-NEXT: movdqu %xmm2, (%rax) 543; SSE-NEXT: movdqu %xmm0, (%rax) 544; SSE-NEXT: retq 545; 546; AVX1-LABEL: trunc16i32_16i16_nsw: 547; AVX1: # %bb.0: # %entry 548; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 549; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 550; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 551; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 552; AVX1-NEXT: vmovdqu %xmm1, (%rax) 553; AVX1-NEXT: vmovdqu %xmm0, (%rax) 554; AVX1-NEXT: vzeroupper 555; AVX1-NEXT: retq 556; 557; AVX2-LABEL: trunc16i32_16i16_nsw: 558; AVX2: # %bb.0: # %entry 559; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 560; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 561; AVX2-NEXT: vmovdqu %ymm0, (%rax) 562; AVX2-NEXT: vzeroupper 563; AVX2-NEXT: retq 564; 565; AVX512-LABEL: trunc16i32_16i16_nsw: 566; AVX512: # %bb.0: # %entry 567; AVX512-NEXT: vpmovdw %zmm0, (%rax) 568; AVX512-NEXT: vzeroupper 569; AVX512-NEXT: retq 570entry: 571 %0 = trunc nsw <16 x i32> %a to <16 x i16> 572 store <16 x i16> %0, ptr undef, align 4 573 ret void 574} 575 576define void @trunc16i32_16i16_nuw(<16 x i32> %a) { 577; SSE2-LABEL: trunc16i32_16i16_nuw: 578; SSE2: # %bb.0: # %entry 579; SSE2-NEXT: pslld $16, %xmm1 580; SSE2-NEXT: psrad $16, %xmm1 581; SSE2-NEXT: pslld $16, %xmm0 582; SSE2-NEXT: psrad $16, %xmm0 583; SSE2-NEXT: packssdw %xmm1, %xmm0 584; SSE2-NEXT: pslld $16, %xmm3 585; SSE2-NEXT: psrad $16, %xmm3 586; SSE2-NEXT: pslld $16, %xmm2 587; SSE2-NEXT: psrad $16, %xmm2 588; SSE2-NEXT: packssdw %xmm3, %xmm2 589; SSE2-NEXT: movdqu %xmm2, (%rax) 590; SSE2-NEXT: movdqu %xmm0, (%rax) 591; SSE2-NEXT: retq 592; 593; SSSE3-LABEL: trunc16i32_16i16_nuw: 594; SSSE3: # %bb.0: # %entry 595; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 596; SSSE3-NEXT: pshufb %xmm4, %xmm1 597; SSSE3-NEXT: pshufb %xmm4, %xmm0 598; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 599; SSSE3-NEXT: pshufb %xmm4, %xmm3 600; SSSE3-NEXT: pshufb %xmm4, %xmm2 601; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] 602; SSSE3-NEXT: movdqu %xmm2, (%rax) 603; SSSE3-NEXT: movdqu %xmm0, (%rax) 604; SSSE3-NEXT: retq 605; 606; SSE41-LABEL: trunc16i32_16i16_nuw: 607; SSE41: # %bb.0: # %entry 608; SSE41-NEXT: packusdw %xmm1, %xmm0 609; SSE41-NEXT: packusdw %xmm3, %xmm2 610; SSE41-NEXT: movdqu %xmm2, (%rax) 611; SSE41-NEXT: movdqu %xmm0, (%rax) 612; SSE41-NEXT: retq 613; 614; AVX1-LABEL: trunc16i32_16i16_nuw: 615; AVX1: # %bb.0: # %entry 616; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 617; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 618; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 619; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 620; AVX1-NEXT: vmovdqu %xmm1, (%rax) 621; AVX1-NEXT: vmovdqu %xmm0, (%rax) 622; AVX1-NEXT: vzeroupper 623; AVX1-NEXT: retq 624; 625; AVX2-LABEL: trunc16i32_16i16_nuw: 626; AVX2: # %bb.0: # %entry 627; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 628; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 629; AVX2-NEXT: vmovdqu %ymm0, (%rax) 630; AVX2-NEXT: vzeroupper 631; AVX2-NEXT: retq 632; 633; AVX512-LABEL: trunc16i32_16i16_nuw: 634; AVX512: # %bb.0: # %entry 635; AVX512-NEXT: vpmovdw %zmm0, (%rax) 636; AVX512-NEXT: vzeroupper 637; AVX512-NEXT: retq 638entry: 639 %0 = trunc nuw <16 x i32> %a to <16 x i16> 640 store <16 x i16> %0, ptr undef, align 4 641 ret void 642} 643 644define void @trunc16i32_16i8_nsw(<16 x i32> %a) { 645; SSE2-SSSE3-LABEL: trunc16i32_16i8_nsw: 646; SSE2-SSSE3: # %bb.0: # %entry 647; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm2 648; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0 649; SSE2-SSSE3-NEXT: packuswb %xmm2, %xmm0 650; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rax) 651; SSE2-SSSE3-NEXT: retq 652; 653; SSE41-LABEL: trunc16i32_16i8_nsw: 654; SSE41: # %bb.0: # %entry 655; SSE41-NEXT: packusdw %xmm3, %xmm2 656; SSE41-NEXT: packusdw %xmm1, %xmm0 657; SSE41-NEXT: packuswb %xmm2, %xmm0 658; SSE41-NEXT: movdqu %xmm0, (%rax) 659; SSE41-NEXT: retq 660; 661; AVX1-LABEL: trunc16i32_16i8_nsw: 662; AVX1: # %bb.0: # %entry 663; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 664; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 665; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 666; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 667; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 668; AVX1-NEXT: vmovdqu %xmm0, (%rax) 669; AVX1-NEXT: vzeroupper 670; AVX1-NEXT: retq 671; 672; AVX2-LABEL: trunc16i32_16i8_nsw: 673; AVX2: # %bb.0: # %entry 674; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 675; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 676; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 677; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 678; AVX2-NEXT: vmovdqu %xmm0, (%rax) 679; AVX2-NEXT: vzeroupper 680; AVX2-NEXT: retq 681; 682; AVX512-LABEL: trunc16i32_16i8_nsw: 683; AVX512: # %bb.0: # %entry 684; AVX512-NEXT: vpmovdb %zmm0, (%rax) 685; AVX512-NEXT: vzeroupper 686; AVX512-NEXT: retq 687entry: 688 %0 = trunc nuw <16 x i32> %a to <16 x i8> 689 store <16 x i8> %0, ptr undef, align 4 690 ret void 691} 692 693define void @trunc16i32_16i8_nuw(<16 x i32> %a) { 694; SSE2-SSSE3-LABEL: trunc16i32_16i8_nuw: 695; SSE2-SSSE3: # %bb.0: # %entry 696; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm2 697; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0 698; SSE2-SSSE3-NEXT: packuswb %xmm2, %xmm0 699; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rax) 700; SSE2-SSSE3-NEXT: retq 701; 702; SSE41-LABEL: trunc16i32_16i8_nuw: 703; SSE41: # %bb.0: # %entry 704; SSE41-NEXT: packusdw %xmm3, %xmm2 705; SSE41-NEXT: packusdw %xmm1, %xmm0 706; SSE41-NEXT: packuswb %xmm2, %xmm0 707; SSE41-NEXT: movdqu %xmm0, (%rax) 708; SSE41-NEXT: retq 709; 710; AVX1-LABEL: trunc16i32_16i8_nuw: 711; AVX1: # %bb.0: # %entry 712; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 713; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 714; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 715; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 716; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 717; AVX1-NEXT: vmovdqu %xmm0, (%rax) 718; AVX1-NEXT: vzeroupper 719; AVX1-NEXT: retq 720; 721; AVX2-LABEL: trunc16i32_16i8_nuw: 722; AVX2: # %bb.0: # %entry 723; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 724; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 725; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 726; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 727; AVX2-NEXT: vmovdqu %xmm0, (%rax) 728; AVX2-NEXT: vzeroupper 729; AVX2-NEXT: retq 730; 731; AVX512-LABEL: trunc16i32_16i8_nuw: 732; AVX512: # %bb.0: # %entry 733; AVX512-NEXT: vpmovdb %zmm0, (%rax) 734; AVX512-NEXT: vzeroupper 735; AVX512-NEXT: retq 736entry: 737 %0 = trunc nuw <16 x i32> %a to <16 x i8> 738 store <16 x i8> %0, ptr undef, align 4 739 ret void 740} 741 742define void @trunc16i16_16i8_nsw(<16 x i16> %a) { 743; SSE-LABEL: trunc16i16_16i8_nsw: 744; SSE: # %bb.0: # %entry 745; SSE-NEXT: packsswb %xmm1, %xmm0 746; SSE-NEXT: movdqu %xmm0, (%rax) 747; SSE-NEXT: retq 748; 749; AVX1-LABEL: trunc16i16_16i8_nsw: 750; AVX1: # %bb.0: # %entry 751; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 752; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 753; AVX1-NEXT: vmovdqu %xmm0, (%rax) 754; AVX1-NEXT: vzeroupper 755; AVX1-NEXT: retq 756; 757; AVX2-LABEL: trunc16i16_16i8_nsw: 758; AVX2: # %bb.0: # %entry 759; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 760; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 761; AVX2-NEXT: vmovdqu %xmm0, (%rax) 762; AVX2-NEXT: vzeroupper 763; AVX2-NEXT: retq 764; 765; AVX512F-LABEL: trunc16i16_16i8_nsw: 766; AVX512F: # %bb.0: # %entry 767; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 768; AVX512F-NEXT: vpmovdb %zmm0, (%rax) 769; AVX512F-NEXT: vzeroupper 770; AVX512F-NEXT: retq 771; 772; AVX512VL-LABEL: trunc16i16_16i8_nsw: 773; AVX512VL: # %bb.0: # %entry 774; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 775; AVX512VL-NEXT: vpmovdb %zmm0, (%rax) 776; AVX512VL-NEXT: vzeroupper 777; AVX512VL-NEXT: retq 778; 779; AVX512BW-LABEL: trunc16i16_16i8_nsw: 780; AVX512BW: # %bb.0: # %entry 781; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 782; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 783; AVX512BW-NEXT: vmovdqu %xmm0, (%rax) 784; AVX512BW-NEXT: vzeroupper 785; AVX512BW-NEXT: retq 786; 787; AVX512BWVL-LABEL: trunc16i16_16i8_nsw: 788; AVX512BWVL: # %bb.0: # %entry 789; AVX512BWVL-NEXT: vpmovwb %ymm0, (%rax) 790; AVX512BWVL-NEXT: vzeroupper 791; AVX512BWVL-NEXT: retq 792entry: 793 %0 = trunc nsw <16 x i16> %a to <16 x i8> 794 store <16 x i8> %0, ptr undef, align 4 795 ret void 796} 797 798define void @trunc16i16_16i8_nuw(<16 x i16> %a) { 799; SSE-LABEL: trunc16i16_16i8_nuw: 800; SSE: # %bb.0: # %entry 801; SSE-NEXT: packuswb %xmm1, %xmm0 802; SSE-NEXT: movdqu %xmm0, (%rax) 803; SSE-NEXT: retq 804; 805; AVX1-LABEL: trunc16i16_16i8_nuw: 806; AVX1: # %bb.0: # %entry 807; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 808; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 809; AVX1-NEXT: vmovdqu %xmm0, (%rax) 810; AVX1-NEXT: vzeroupper 811; AVX1-NEXT: retq 812; 813; AVX2-LABEL: trunc16i16_16i8_nuw: 814; AVX2: # %bb.0: # %entry 815; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 816; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 817; AVX2-NEXT: vmovdqu %xmm0, (%rax) 818; AVX2-NEXT: vzeroupper 819; AVX2-NEXT: retq 820; 821; AVX512F-LABEL: trunc16i16_16i8_nuw: 822; AVX512F: # %bb.0: # %entry 823; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 824; AVX512F-NEXT: vpmovdb %zmm0, (%rax) 825; AVX512F-NEXT: vzeroupper 826; AVX512F-NEXT: retq 827; 828; AVX512VL-LABEL: trunc16i16_16i8_nuw: 829; AVX512VL: # %bb.0: # %entry 830; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 831; AVX512VL-NEXT: vpmovdb %zmm0, (%rax) 832; AVX512VL-NEXT: vzeroupper 833; AVX512VL-NEXT: retq 834; 835; AVX512BW-LABEL: trunc16i16_16i8_nuw: 836; AVX512BW: # %bb.0: # %entry 837; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 838; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 839; AVX512BW-NEXT: vmovdqu %xmm0, (%rax) 840; AVX512BW-NEXT: vzeroupper 841; AVX512BW-NEXT: retq 842; 843; AVX512BWVL-LABEL: trunc16i16_16i8_nuw: 844; AVX512BWVL: # %bb.0: # %entry 845; AVX512BWVL-NEXT: vpmovwb %ymm0, (%rax) 846; AVX512BWVL-NEXT: vzeroupper 847; AVX512BWVL-NEXT: retq 848entry: 849 %0 = trunc nuw <16 x i16> %a to <16 x i8> 850 store <16 x i8> %0, ptr undef, align 4 851 ret void 852} 853 854define void @trunc32i16_32i8_nsw(<32 x i16> %a) { 855; SSE-LABEL: trunc32i16_32i8_nsw: 856; SSE: # %bb.0: # %entry 857; SSE-NEXT: packsswb %xmm1, %xmm0 858; SSE-NEXT: packsswb %xmm3, %xmm2 859; SSE-NEXT: movdqu %xmm2, (%rax) 860; SSE-NEXT: movdqu %xmm0, (%rax) 861; SSE-NEXT: retq 862; 863; AVX1-LABEL: trunc32i16_32i8_nsw: 864; AVX1: # %bb.0: # %entry 865; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 866; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 867; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 868; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1 869; AVX1-NEXT: vmovdqu %xmm1, (%rax) 870; AVX1-NEXT: vmovdqu %xmm0, (%rax) 871; AVX1-NEXT: vzeroupper 872; AVX1-NEXT: retq 873; 874; AVX2-LABEL: trunc32i16_32i8_nsw: 875; AVX2: # %bb.0: # %entry 876; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 877; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 878; AVX2-NEXT: vmovdqu %ymm0, (%rax) 879; AVX2-NEXT: vzeroupper 880; AVX2-NEXT: retq 881; 882; AVX512F-LABEL: trunc32i16_32i8_nsw: 883; AVX512F: # %bb.0: # %entry 884; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 885; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero 886; AVX512F-NEXT: vpmovdb %zmm1, (%rax) 887; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 888; AVX512F-NEXT: vpmovdb %zmm0, (%rax) 889; AVX512F-NEXT: vzeroupper 890; AVX512F-NEXT: retq 891; 892; AVX512VL-LABEL: trunc32i16_32i8_nsw: 893; AVX512VL: # %bb.0: # %entry 894; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 895; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero 896; AVX512VL-NEXT: vpmovdb %zmm1, (%rax) 897; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 898; AVX512VL-NEXT: vpmovdb %zmm0, (%rax) 899; AVX512VL-NEXT: vzeroupper 900; AVX512VL-NEXT: retq 901; 902; AVX512BW-LABEL: trunc32i16_32i8_nsw: 903; AVX512BW: # %bb.0: # %entry 904; AVX512BW-NEXT: vpmovwb %zmm0, (%rax) 905; AVX512BW-NEXT: vzeroupper 906; AVX512BW-NEXT: retq 907; 908; AVX512BWVL-LABEL: trunc32i16_32i8_nsw: 909; AVX512BWVL: # %bb.0: # %entry 910; AVX512BWVL-NEXT: vpmovwb %zmm0, (%rax) 911; AVX512BWVL-NEXT: vzeroupper 912; AVX512BWVL-NEXT: retq 913entry: 914 %0 = trunc nsw <32 x i16> %a to <32 x i8> 915 store <32 x i8> %0, ptr undef, align 4 916 ret void 917} 918 919define void @trunc32i16_32i8_nuw(<32 x i16> %a) { 920; SSE-LABEL: trunc32i16_32i8_nuw: 921; SSE: # %bb.0: # %entry 922; SSE-NEXT: packsswb %xmm1, %xmm0 923; SSE-NEXT: packsswb %xmm3, %xmm2 924; SSE-NEXT: movdqu %xmm2, (%rax) 925; SSE-NEXT: movdqu %xmm0, (%rax) 926; SSE-NEXT: retq 927; 928; AVX1-LABEL: trunc32i16_32i8_nuw: 929; AVX1: # %bb.0: # %entry 930; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 931; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 932; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 933; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1 934; AVX1-NEXT: vmovdqu %xmm1, (%rax) 935; AVX1-NEXT: vmovdqu %xmm0, (%rax) 936; AVX1-NEXT: vzeroupper 937; AVX1-NEXT: retq 938; 939; AVX2-LABEL: trunc32i16_32i8_nuw: 940; AVX2: # %bb.0: # %entry 941; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 942; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 943; AVX2-NEXT: vmovdqu %ymm0, (%rax) 944; AVX2-NEXT: vzeroupper 945; AVX2-NEXT: retq 946; 947; AVX512F-LABEL: trunc32i16_32i8_nuw: 948; AVX512F: # %bb.0: # %entry 949; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 950; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero 951; AVX512F-NEXT: vpmovdb %zmm1, (%rax) 952; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 953; AVX512F-NEXT: vpmovdb %zmm0, (%rax) 954; AVX512F-NEXT: vzeroupper 955; AVX512F-NEXT: retq 956; 957; AVX512VL-LABEL: trunc32i16_32i8_nuw: 958; AVX512VL: # %bb.0: # %entry 959; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 960; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero 961; AVX512VL-NEXT: vpmovdb %zmm1, (%rax) 962; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 963; AVX512VL-NEXT: vpmovdb %zmm0, (%rax) 964; AVX512VL-NEXT: vzeroupper 965; AVX512VL-NEXT: retq 966; 967; AVX512BW-LABEL: trunc32i16_32i8_nuw: 968; AVX512BW: # %bb.0: # %entry 969; AVX512BW-NEXT: vpmovwb %zmm0, (%rax) 970; AVX512BW-NEXT: vzeroupper 971; AVX512BW-NEXT: retq 972; 973; AVX512BWVL-LABEL: trunc32i16_32i8_nuw: 974; AVX512BWVL: # %bb.0: # %entry 975; AVX512BWVL-NEXT: vpmovwb %zmm0, (%rax) 976; AVX512BWVL-NEXT: vzeroupper 977; AVX512BWVL-NEXT: retq 978entry: 979 %0 = trunc nsw <32 x i16> %a to <32 x i8> 980 store <32 x i8> %0, ptr undef, align 4 981 ret void 982} 983 984define <8 x i32> @trunc2x4i64_8i32_nsw(<4 x i64> %a, <4 x i64> %b) { 985; SSE-LABEL: trunc2x4i64_8i32_nsw: 986; SSE: # %bb.0: # %entry 987; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 988; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] 989; SSE-NEXT: movaps %xmm2, %xmm1 990; SSE-NEXT: retq 991; 992; AVX1-LABEL: trunc2x4i64_8i32_nsw: 993; AVX1: # %bb.0: # %entry 994; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] 995; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 996; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] 997; AVX1-NEXT: retq 998; 999; AVX2-SLOW-LABEL: trunc2x4i64_8i32_nsw: 1000; AVX2-SLOW: # %bb.0: # %entry 1001; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] 1002; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1003; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] 1004; AVX2-SLOW-NEXT: retq 1005; 1006; AVX2-FAST-ALL-LABEL: trunc2x4i64_8i32_nsw: 1007; AVX2-FAST-ALL: # %bb.0: # %entry 1008; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7] 1009; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0 1010; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1 1011; AVX2-FAST-ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1012; AVX2-FAST-ALL-NEXT: retq 1013; 1014; AVX2-FAST-PERLANE-LABEL: trunc2x4i64_8i32_nsw: 1015; AVX2-FAST-PERLANE: # %bb.0: # %entry 1016; AVX2-FAST-PERLANE-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] 1017; AVX2-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1018; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] 1019; AVX2-FAST-PERLANE-NEXT: retq 1020; 1021; AVX512-LABEL: trunc2x4i64_8i32_nsw: 1022; AVX512: # %bb.0: # %entry 1023; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1024; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 1025; AVX512-NEXT: vpmovqd %zmm0, %ymm0 1026; AVX512-NEXT: retq 1027entry: 1028 %0 = trunc nsw <4 x i64> %a to <4 x i32> 1029 %1 = trunc nsw <4 x i64> %b to <4 x i32> 1030 %2 = shufflevector <4 x i32> %0, <4 x i32> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1031 ret <8 x i32> %2 1032} 1033 1034define <8 x i32> @trunc2x4i64_8i32_nuw(<4 x i64> %a, <4 x i64> %b) { 1035; SSE-LABEL: trunc2x4i64_8i32_nuw: 1036; SSE: # %bb.0: # %entry 1037; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 1038; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] 1039; SSE-NEXT: movaps %xmm2, %xmm1 1040; SSE-NEXT: retq 1041; 1042; AVX1-LABEL: trunc2x4i64_8i32_nuw: 1043; AVX1: # %bb.0: # %entry 1044; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] 1045; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1046; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] 1047; AVX1-NEXT: retq 1048; 1049; AVX2-SLOW-LABEL: trunc2x4i64_8i32_nuw: 1050; AVX2-SLOW: # %bb.0: # %entry 1051; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] 1052; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1053; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] 1054; AVX2-SLOW-NEXT: retq 1055; 1056; AVX2-FAST-ALL-LABEL: trunc2x4i64_8i32_nuw: 1057; AVX2-FAST-ALL: # %bb.0: # %entry 1058; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7] 1059; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0 1060; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1 1061; AVX2-FAST-ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1062; AVX2-FAST-ALL-NEXT: retq 1063; 1064; AVX2-FAST-PERLANE-LABEL: trunc2x4i64_8i32_nuw: 1065; AVX2-FAST-PERLANE: # %bb.0: # %entry 1066; AVX2-FAST-PERLANE-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] 1067; AVX2-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1068; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] 1069; AVX2-FAST-PERLANE-NEXT: retq 1070; 1071; AVX512-LABEL: trunc2x4i64_8i32_nuw: 1072; AVX512: # %bb.0: # %entry 1073; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1074; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 1075; AVX512-NEXT: vpmovqd %zmm0, %ymm0 1076; AVX512-NEXT: retq 1077entry: 1078 %0 = trunc nuw <4 x i64> %a to <4 x i32> 1079 %1 = trunc nuw <4 x i64> %b to <4 x i32> 1080 %2 = shufflevector <4 x i32> %0, <4 x i32> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1081 ret <8 x i32> %2 1082} 1083 1084define <8 x i16> @trunc2x4i64_8i16_nsw(<4 x i64> %a, <4 x i64> %b) { 1085; SSE-LABEL: trunc2x4i64_8i16_nsw: 1086; SSE: # %bb.0: # %entry 1087; SSE-NEXT: packssdw %xmm1, %xmm0 1088; SSE-NEXT: packssdw %xmm3, %xmm2 1089; SSE-NEXT: packssdw %xmm2, %xmm0 1090; SSE-NEXT: retq 1091; 1092; AVX1-LABEL: trunc2x4i64_8i16_nsw: 1093; AVX1: # %bb.0: # %entry 1094; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1095; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 1096; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1097; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 1098; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 1099; AVX1-NEXT: vzeroupper 1100; AVX1-NEXT: retq 1101; 1102; AVX2-LABEL: trunc2x4i64_8i16_nsw: 1103; AVX2: # %bb.0: # %entry 1104; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 1105; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 1106; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 1107; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 1108; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 1109; AVX2-NEXT: vzeroupper 1110; AVX2-NEXT: retq 1111; 1112; AVX512F-LABEL: trunc2x4i64_8i16_nsw: 1113; AVX512F: # %bb.0: # %entry 1114; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 1115; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1116; AVX512F-NEXT: vpmovqw %zmm0, %xmm0 1117; AVX512F-NEXT: vpmovqw %zmm1, %xmm1 1118; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1119; AVX512F-NEXT: vzeroupper 1120; AVX512F-NEXT: retq 1121; 1122; AVX512VL-LABEL: trunc2x4i64_8i16_nsw: 1123; AVX512VL: # %bb.0: # %entry 1124; AVX512VL-NEXT: vpmovqw %ymm0, %xmm0 1125; AVX512VL-NEXT: vpmovqw %ymm1, %xmm1 1126; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1127; AVX512VL-NEXT: vzeroupper 1128; AVX512VL-NEXT: retq 1129; 1130; AVX512BW-LABEL: trunc2x4i64_8i16_nsw: 1131; AVX512BW: # %bb.0: # %entry 1132; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 1133; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1134; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0 1135; AVX512BW-NEXT: vpmovqw %zmm1, %xmm1 1136; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1137; AVX512BW-NEXT: vzeroupper 1138; AVX512BW-NEXT: retq 1139; 1140; AVX512BWVL-LABEL: trunc2x4i64_8i16_nsw: 1141; AVX512BWVL: # %bb.0: # %entry 1142; AVX512BWVL-NEXT: vpmovqw %ymm0, %xmm0 1143; AVX512BWVL-NEXT: vpmovqw %ymm1, %xmm1 1144; AVX512BWVL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1145; AVX512BWVL-NEXT: vzeroupper 1146; AVX512BWVL-NEXT: retq 1147entry: 1148 %0 = trunc nsw <4 x i64> %a to <4 x i16> 1149 %1 = trunc nsw <4 x i64> %b to <4 x i16> 1150 %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1151 ret <8 x i16> %2 1152} 1153 1154define <8 x i16> @trunc2x4i64_8i16_nuw(<4 x i64> %a, <4 x i64> %b) { 1155; SSE2-SSSE3-LABEL: trunc2x4i64_8i16_nuw: 1156; SSE2-SSSE3: # %bb.0: # %entry 1157; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 1158; SSE2-SSSE3-NEXT: pslld $16, %xmm0 1159; SSE2-SSSE3-NEXT: psrad $16, %xmm0 1160; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] 1161; SSE2-SSSE3-NEXT: pslld $16, %xmm2 1162; SSE2-SSSE3-NEXT: psrad $16, %xmm2 1163; SSE2-SSSE3-NEXT: packssdw %xmm2, %xmm0 1164; SSE2-SSSE3-NEXT: retq 1165; 1166; SSE41-LABEL: trunc2x4i64_8i16_nuw: 1167; SSE41: # %bb.0: # %entry 1168; SSE41-NEXT: packusdw %xmm1, %xmm0 1169; SSE41-NEXT: packusdw %xmm3, %xmm2 1170; SSE41-NEXT: packusdw %xmm2, %xmm0 1171; SSE41-NEXT: retq 1172; 1173; AVX1-LABEL: trunc2x4i64_8i16_nuw: 1174; AVX1: # %bb.0: # %entry 1175; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1176; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 1177; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1178; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 1179; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 1180; AVX1-NEXT: vzeroupper 1181; AVX1-NEXT: retq 1182; 1183; AVX2-LABEL: trunc2x4i64_8i16_nuw: 1184; AVX2: # %bb.0: # %entry 1185; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 1186; AVX2-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 1187; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 1188; AVX2-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 1189; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 1190; AVX2-NEXT: vzeroupper 1191; AVX2-NEXT: retq 1192; 1193; AVX512F-LABEL: trunc2x4i64_8i16_nuw: 1194; AVX512F: # %bb.0: # %entry 1195; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 1196; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1197; AVX512F-NEXT: vpmovqw %zmm0, %xmm0 1198; AVX512F-NEXT: vpmovqw %zmm1, %xmm1 1199; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1200; AVX512F-NEXT: vzeroupper 1201; AVX512F-NEXT: retq 1202; 1203; AVX512VL-LABEL: trunc2x4i64_8i16_nuw: 1204; AVX512VL: # %bb.0: # %entry 1205; AVX512VL-NEXT: vpmovqw %ymm0, %xmm0 1206; AVX512VL-NEXT: vpmovqw %ymm1, %xmm1 1207; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1208; AVX512VL-NEXT: vzeroupper 1209; AVX512VL-NEXT: retq 1210; 1211; AVX512BW-LABEL: trunc2x4i64_8i16_nuw: 1212; AVX512BW: # %bb.0: # %entry 1213; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 1214; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1215; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0 1216; AVX512BW-NEXT: vpmovqw %zmm1, %xmm1 1217; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1218; AVX512BW-NEXT: vzeroupper 1219; AVX512BW-NEXT: retq 1220; 1221; AVX512BWVL-LABEL: trunc2x4i64_8i16_nuw: 1222; AVX512BWVL: # %bb.0: # %entry 1223; AVX512BWVL-NEXT: vpmovqw %ymm0, %xmm0 1224; AVX512BWVL-NEXT: vpmovqw %ymm1, %xmm1 1225; AVX512BWVL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1226; AVX512BWVL-NEXT: vzeroupper 1227; AVX512BWVL-NEXT: retq 1228entry: 1229 %0 = trunc nuw <4 x i64> %a to <4 x i16> 1230 %1 = trunc nuw <4 x i64> %b to <4 x i16> 1231 %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1232 ret <8 x i16> %2 1233} 1234 1235define <4 x i32> @trunc2x2i64_4i32_nsw(<2 x i64> %a, <2 x i64> %b) { 1236; SSE-LABEL: trunc2x2i64_4i32_nsw: 1237; SSE: # %bb.0: # %entry 1238; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 1239; SSE-NEXT: retq 1240; 1241; AVX-LABEL: trunc2x2i64_4i32_nsw: 1242; AVX: # %bb.0: # %entry 1243; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 1244; AVX-NEXT: retq 1245; 1246; AVX512F-LABEL: trunc2x2i64_4i32_nsw: 1247; AVX512F: # %bb.0: # %entry 1248; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 1249; AVX512F-NEXT: retq 1250; 1251; AVX512VL-LABEL: trunc2x2i64_4i32_nsw: 1252; AVX512VL: # %bb.0: # %entry 1253; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 1254; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1255; AVX512VL-NEXT: vpmovqd %ymm0, %xmm0 1256; AVX512VL-NEXT: vzeroupper 1257; AVX512VL-NEXT: retq 1258; 1259; AVX512BW-LABEL: trunc2x2i64_4i32_nsw: 1260; AVX512BW: # %bb.0: # %entry 1261; AVX512BW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 1262; AVX512BW-NEXT: retq 1263; 1264; AVX512BWVL-LABEL: trunc2x2i64_4i32_nsw: 1265; AVX512BWVL: # %bb.0: # %entry 1266; AVX512BWVL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 1267; AVX512BWVL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1268; AVX512BWVL-NEXT: vpmovqd %ymm0, %xmm0 1269; AVX512BWVL-NEXT: vzeroupper 1270; AVX512BWVL-NEXT: retq 1271entry: 1272 %0 = trunc nsw <2 x i64> %a to <2 x i32> 1273 %1 = trunc nsw <2 x i64> %b to <2 x i32> 1274 %2 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1275 ret <4 x i32> %2 1276} 1277 1278define <4 x i32> @trunc2x2i64_4i32_nuw(<2 x i64> %a, <2 x i64> %b) { 1279; SSE-LABEL: trunc2x2i64_4i32_nuw: 1280; SSE: # %bb.0: # %entry 1281; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 1282; SSE-NEXT: retq 1283; 1284; AVX-LABEL: trunc2x2i64_4i32_nuw: 1285; AVX: # %bb.0: # %entry 1286; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 1287; AVX-NEXT: retq 1288; 1289; AVX512F-LABEL: trunc2x2i64_4i32_nuw: 1290; AVX512F: # %bb.0: # %entry 1291; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 1292; AVX512F-NEXT: retq 1293; 1294; AVX512VL-LABEL: trunc2x2i64_4i32_nuw: 1295; AVX512VL: # %bb.0: # %entry 1296; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 1297; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1298; AVX512VL-NEXT: vpmovqd %ymm0, %xmm0 1299; AVX512VL-NEXT: vzeroupper 1300; AVX512VL-NEXT: retq 1301; 1302; AVX512BW-LABEL: trunc2x2i64_4i32_nuw: 1303; AVX512BW: # %bb.0: # %entry 1304; AVX512BW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 1305; AVX512BW-NEXT: retq 1306; 1307; AVX512BWVL-LABEL: trunc2x2i64_4i32_nuw: 1308; AVX512BWVL: # %bb.0: # %entry 1309; AVX512BWVL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 1310; AVX512BWVL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1311; AVX512BWVL-NEXT: vpmovqd %ymm0, %xmm0 1312; AVX512BWVL-NEXT: vzeroupper 1313; AVX512BWVL-NEXT: retq 1314entry: 1315 %0 = trunc nuw <2 x i64> %a to <2 x i32> 1316 %1 = trunc nuw <2 x i64> %b to <2 x i32> 1317 %2 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1318 ret <4 x i32> %2 1319} 1320 1321define <8 x i16> @trunc2x4i32_8i16_nsw(<4 x i32> %a, <4 x i32> %b) { 1322; SSE-LABEL: trunc2x4i32_8i16_nsw: 1323; SSE: # %bb.0: # %entry 1324; SSE-NEXT: packssdw %xmm1, %xmm0 1325; SSE-NEXT: retq 1326; 1327; AVX-LABEL: trunc2x4i32_8i16_nsw: 1328; AVX: # %bb.0: # %entry 1329; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 1330; AVX-NEXT: retq 1331; 1332; AVX512-LABEL: trunc2x4i32_8i16_nsw: 1333; AVX512: # %bb.0: # %entry 1334; AVX512-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 1335; AVX512-NEXT: retq 1336entry: 1337 %0 = trunc nsw <4 x i32> %a to <4 x i16> 1338 %1 = trunc nsw <4 x i32> %b to <4 x i16> 1339 %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1340 ret <8 x i16> %2 1341} 1342 1343define <8 x i16> @trunc2x4i32_8i16_nuw(<4 x i32> %a, <4 x i32> %b) { 1344; SSE2-LABEL: trunc2x4i32_8i16_nuw: 1345; SSE2: # %bb.0: # %entry 1346; SSE2-NEXT: pslld $16, %xmm1 1347; SSE2-NEXT: psrad $16, %xmm1 1348; SSE2-NEXT: pslld $16, %xmm0 1349; SSE2-NEXT: psrad $16, %xmm0 1350; SSE2-NEXT: packssdw %xmm1, %xmm0 1351; SSE2-NEXT: retq 1352; 1353; SSSE3-LABEL: trunc2x4i32_8i16_nuw: 1354; SSSE3: # %bb.0: # %entry 1355; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 1356; SSSE3-NEXT: pshufb %xmm2, %xmm1 1357; SSSE3-NEXT: pshufb %xmm2, %xmm0 1358; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1359; SSSE3-NEXT: retq 1360; 1361; SSE41-LABEL: trunc2x4i32_8i16_nuw: 1362; SSE41: # %bb.0: # %entry 1363; SSE41-NEXT: packusdw %xmm1, %xmm0 1364; SSE41-NEXT: retq 1365; 1366; AVX-LABEL: trunc2x4i32_8i16_nuw: 1367; AVX: # %bb.0: # %entry 1368; AVX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 1369; AVX-NEXT: retq 1370; 1371; AVX512-LABEL: trunc2x4i32_8i16_nuw: 1372; AVX512: # %bb.0: # %entry 1373; AVX512-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 1374; AVX512-NEXT: retq 1375entry: 1376 %0 = trunc nuw <4 x i32> %a to <4 x i16> 1377 %1 = trunc nuw <4 x i32> %b to <4 x i16> 1378 %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1379 ret <8 x i16> %2 1380} 1381 1382define <32 x i8> @trunc2x16i16_32i8_nsw(<16 x i16> %a, <16 x i16> %b) { 1383; SSE-LABEL: trunc2x16i16_32i8_nsw: 1384; SSE: # %bb.0: # %entry 1385; SSE-NEXT: packsswb %xmm1, %xmm0 1386; SSE-NEXT: packsswb %xmm3, %xmm2 1387; SSE-NEXT: movdqa %xmm2, %xmm1 1388; SSE-NEXT: retq 1389; 1390; AVX1-LABEL: trunc2x16i16_32i8_nsw: 1391; AVX1: # %bb.0: # %entry 1392; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1393; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 1394; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1395; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1 1396; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1397; AVX1-NEXT: retq 1398; 1399; AVX2-LABEL: trunc2x16i16_32i8_nsw: 1400; AVX2: # %bb.0: # %entry 1401; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 1402; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 1403; AVX2-NEXT: retq 1404; 1405; AVX512F-LABEL: trunc2x16i16_32i8_nsw: 1406; AVX512F: # %bb.0: # %entry 1407; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 1408; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 1409; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero 1410; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 1411; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1412; AVX512F-NEXT: retq 1413; 1414; AVX512VL-LABEL: trunc2x16i16_32i8_nsw: 1415; AVX512VL: # %bb.0: # %entry 1416; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 1417; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 1418; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero 1419; AVX512VL-NEXT: vpmovdb %zmm1, %xmm1 1420; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1421; AVX512VL-NEXT: retq 1422; 1423; AVX512BW-LABEL: trunc2x16i16_32i8_nsw: 1424; AVX512BW: # %bb.0: # %entry 1425; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1426; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 1427; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 1428; AVX512BW-NEXT: retq 1429; 1430; AVX512BWVL-LABEL: trunc2x16i16_32i8_nsw: 1431; AVX512BWVL: # %bb.0: # %entry 1432; AVX512BWVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1433; AVX512BWVL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 1434; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0 1435; AVX512BWVL-NEXT: retq 1436entry: 1437 %0 = trunc nsw <16 x i16> %a to <16 x i8> 1438 %1 = trunc nsw <16 x i16> %b to <16 x i8> 1439 %2 = shufflevector <16 x i8> %0, <16 x i8> %1, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 1440 ret <32 x i8> %2 1441} 1442 1443define <32 x i8> @trunc2x16i16_32i8_nuw(<16 x i16> %a, <16 x i16> %b) { 1444; SSE-LABEL: trunc2x16i16_32i8_nuw: 1445; SSE: # %bb.0: # %entry 1446; SSE-NEXT: packuswb %xmm1, %xmm0 1447; SSE-NEXT: packuswb %xmm3, %xmm2 1448; SSE-NEXT: movdqa %xmm2, %xmm1 1449; SSE-NEXT: retq 1450; 1451; AVX1-LABEL: trunc2x16i16_32i8_nuw: 1452; AVX1: # %bb.0: # %entry 1453; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1454; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 1455; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1456; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 1457; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1458; AVX1-NEXT: retq 1459; 1460; AVX2-LABEL: trunc2x16i16_32i8_nuw: 1461; AVX2: # %bb.0: # %entry 1462; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 1463; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 1464; AVX2-NEXT: retq 1465; 1466; AVX512F-LABEL: trunc2x16i16_32i8_nuw: 1467; AVX512F: # %bb.0: # %entry 1468; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 1469; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 1470; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero 1471; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 1472; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1473; AVX512F-NEXT: retq 1474; 1475; AVX512VL-LABEL: trunc2x16i16_32i8_nuw: 1476; AVX512VL: # %bb.0: # %entry 1477; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 1478; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 1479; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero 1480; AVX512VL-NEXT: vpmovdb %zmm1, %xmm1 1481; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1482; AVX512VL-NEXT: retq 1483; 1484; AVX512BW-LABEL: trunc2x16i16_32i8_nuw: 1485; AVX512BW: # %bb.0: # %entry 1486; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1487; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 1488; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 1489; AVX512BW-NEXT: retq 1490; 1491; AVX512BWVL-LABEL: trunc2x16i16_32i8_nuw: 1492; AVX512BWVL: # %bb.0: # %entry 1493; AVX512BWVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1494; AVX512BWVL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 1495; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0 1496; AVX512BWVL-NEXT: retq 1497entry: 1498 %0 = trunc nuw <16 x i16> %a to <16 x i8> 1499 %1 = trunc nuw <16 x i16> %b to <16 x i8> 1500 %2 = shufflevector <16 x i8> %0, <16 x i8> %1, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 1501 ret <32 x i8> %2 1502} 1503 1504define <16 x i8> @trunc2x8i16_16i8_nsw(<8 x i16> %a, <8 x i16> %b) { 1505; SSE-LABEL: trunc2x8i16_16i8_nsw: 1506; SSE: # %bb.0: # %entry 1507; SSE-NEXT: packsswb %xmm1, %xmm0 1508; SSE-NEXT: retq 1509; 1510; AVX-LABEL: trunc2x8i16_16i8_nsw: 1511; AVX: # %bb.0: # %entry 1512; AVX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 1513; AVX-NEXT: retq 1514; 1515; AVX512-LABEL: trunc2x8i16_16i8_nsw: 1516; AVX512: # %bb.0: # %entry 1517; AVX512-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 1518; AVX512-NEXT: retq 1519entry: 1520 %0 = trunc nsw <8 x i16> %a to <8 x i8> 1521 %1 = trunc nsw <8 x i16> %b to <8 x i8> 1522 %2 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1523 ret <16 x i8> %2 1524} 1525 1526define <16 x i8> @trunc2x8i16_16i8_nuw(<8 x i16> %a, <8 x i16> %b) { 1527; SSE-LABEL: trunc2x8i16_16i8_nuw: 1528; SSE: # %bb.0: # %entry 1529; SSE-NEXT: packuswb %xmm1, %xmm0 1530; SSE-NEXT: retq 1531; 1532; AVX-LABEL: trunc2x8i16_16i8_nuw: 1533; AVX: # %bb.0: # %entry 1534; AVX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 1535; AVX-NEXT: retq 1536; 1537; AVX512-LABEL: trunc2x8i16_16i8_nuw: 1538; AVX512: # %bb.0: # %entry 1539; AVX512-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 1540; AVX512-NEXT: retq 1541entry: 1542 %0 = trunc nuw <8 x i16> %a to <8 x i8> 1543 %1 = trunc nuw <8 x i16> %b to <8 x i8> 1544 %2 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1545 ret <16 x i8> %2 1546} 1547 1548define i64 @trunc8i16_i64_nsw(<8 x i16> %inval) { 1549; SSE-LABEL: trunc8i16_i64_nsw: 1550; SSE: # %bb.0: # %entry 1551; SSE-NEXT: packsswb %xmm0, %xmm0 1552; SSE-NEXT: movq %xmm0, %rax 1553; SSE-NEXT: retq 1554; 1555; AVX-LABEL: trunc8i16_i64_nsw: 1556; AVX: # %bb.0: # %entry 1557; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 1558; AVX-NEXT: vmovq %xmm0, %rax 1559; AVX-NEXT: retq 1560; 1561; AVX512-LABEL: trunc8i16_i64_nsw: 1562; AVX512: # %bb.0: # %entry 1563; AVX512-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 1564; AVX512-NEXT: vmovq %xmm0, %rax 1565; AVX512-NEXT: retq 1566entry: 1567 %0 = trunc nsw <8 x i16> %inval to <8 x i8> 1568 %1 = bitcast <8 x i8> %0 to i64 1569 ret i64 %1 1570} 1571 1572define i64 @trunc8i16_i64_nuw(<8 x i16> %inval) { 1573; SSE-LABEL: trunc8i16_i64_nuw: 1574; SSE: # %bb.0: # %entry 1575; SSE-NEXT: packuswb %xmm0, %xmm0 1576; SSE-NEXT: movq %xmm0, %rax 1577; SSE-NEXT: retq 1578; 1579; AVX-LABEL: trunc8i16_i64_nuw: 1580; AVX: # %bb.0: # %entry 1581; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 1582; AVX-NEXT: vmovq %xmm0, %rax 1583; AVX-NEXT: retq 1584; 1585; AVX512-LABEL: trunc8i16_i64_nuw: 1586; AVX512: # %bb.0: # %entry 1587; AVX512-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 1588; AVX512-NEXT: vmovq %xmm0, %rax 1589; AVX512-NEXT: retq 1590entry: 1591 %0 = trunc nuw <8 x i16> %inval to <8 x i8> 1592 %1 = bitcast <8 x i8> %0 to i64 1593 ret i64 %1 1594} 1595