1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE4 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512f | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 8 9; trunc(concat(x,y)) -> pack 10 11define <8 x i16> @trunc_concat_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) nounwind { 12; SSE-LABEL: trunc_concat_packssdw_128: 13; SSE: # %bb.0: 14; SSE-NEXT: psrad $17, %xmm0 15; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 16; SSE-NEXT: packssdw %xmm1, %xmm0 17; SSE-NEXT: retq 18; 19; AVX1-LABEL: trunc_concat_packssdw_128: 20; AVX1: # %bb.0: 21; AVX1-NEXT: vpsrad $17, %xmm0, %xmm0 22; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 23; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 24; AVX1-NEXT: retq 25; 26; AVX2-LABEL: trunc_concat_packssdw_128: 27; AVX2: # %bb.0: 28; AVX2-NEXT: vpsrad $17, %xmm0, %xmm0 29; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [15,15,15,15] 30; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 31; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 32; AVX2-NEXT: retq 33; 34; AVX512-LABEL: trunc_concat_packssdw_128: 35; AVX512: # %bb.0: 36; AVX512-NEXT: vpsrad $17, %xmm0, %xmm0 37; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1 38; AVX512-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 39; AVX512-NEXT: retq 40 %1 = ashr <4 x i32> %a0, <i32 17, i32 17, i32 17, i32 17> 41 %2 = and <4 x i32> %a1, <i32 15, i32 15, i32 15, i32 15> 42 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 43 %4 = trunc <8 x i32> %3 to <8 x i16> 44 ret <8 x i16> %4 45} 46 47define <8 x i16> @trunc_concat_packusdw_128(<4 x i32> %a0, <4 x i32> %a1) nounwind { 48; SSE2-LABEL: trunc_concat_packusdw_128: 49; SSE2: # %bb.0: 50; SSE2-NEXT: psrld $17, %xmm0 51; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 52; SSE2-NEXT: packssdw %xmm1, %xmm0 53; SSE2-NEXT: retq 54; 55; SSE4-LABEL: trunc_concat_packusdw_128: 56; SSE4: # %bb.0: 57; SSE4-NEXT: psrld $17, %xmm0 58; SSE4-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 59; SSE4-NEXT: packusdw %xmm1, %xmm0 60; SSE4-NEXT: retq 61; 62; AVX1-LABEL: trunc_concat_packusdw_128: 63; AVX1: # %bb.0: 64; AVX1-NEXT: vpsrld $17, %xmm0, %xmm0 65; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 66; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 67; AVX1-NEXT: retq 68; 69; AVX2-LABEL: trunc_concat_packusdw_128: 70; AVX2: # %bb.0: 71; AVX2-NEXT: vpsrld $17, %xmm0, %xmm0 72; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [15,15,15,15] 73; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 74; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 75; AVX2-NEXT: retq 76; 77; AVX512-LABEL: trunc_concat_packusdw_128: 78; AVX512: # %bb.0: 79; AVX512-NEXT: vpsrld $17, %xmm0, %xmm0 80; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1 81; AVX512-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 82; AVX512-NEXT: retq 83 %1 = lshr <4 x i32> %a0, <i32 17, i32 17, i32 17, i32 17> 84 %2 = and <4 x i32> %a1, <i32 15, i32 15, i32 15, i32 15> 85 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 86 %4 = trunc <8 x i32> %3 to <8 x i16> 87 ret <8 x i16> %4 88} 89 90define <16 x i8> @trunc_concat_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) nounwind { 91; SSE-LABEL: trunc_concat_packsswb_128: 92; SSE: # %bb.0: 93; SSE-NEXT: psraw $15, %xmm0 94; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 95; SSE-NEXT: packsswb %xmm1, %xmm0 96; SSE-NEXT: retq 97; 98; AVX1-LABEL: trunc_concat_packsswb_128: 99; AVX1: # %bb.0: 100; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0 101; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 102; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 103; AVX1-NEXT: retq 104; 105; AVX2-LABEL: trunc_concat_packsswb_128: 106; AVX2: # %bb.0: 107; AVX2-NEXT: vpsraw $15, %xmm0, %xmm0 108; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 109; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 110; AVX2-NEXT: retq 111; 112; AVX512-LABEL: trunc_concat_packsswb_128: 113; AVX512: # %bb.0: 114; AVX512-NEXT: vpsraw $15, %xmm0, %xmm0 115; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1 116; AVX512-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 117; AVX512-NEXT: retq 118 %1 = ashr <8 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 119 %2 = and <8 x i16> %a1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 120 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 121 %4 = trunc <16 x i16> %3 to <16 x i8> 122 ret <16 x i8> %4 123} 124 125define <16 x i8> @trunc_concat_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) nounwind { 126; SSE-LABEL: trunc_concat_packuswb_128: 127; SSE: # %bb.0: 128; SSE-NEXT: psrlw $15, %xmm0 129; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 130; SSE-NEXT: packuswb %xmm1, %xmm0 131; SSE-NEXT: retq 132; 133; AVX1-LABEL: trunc_concat_packuswb_128: 134; AVX1: # %bb.0: 135; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0 136; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 137; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 138; AVX1-NEXT: retq 139; 140; AVX2-LABEL: trunc_concat_packuswb_128: 141; AVX2: # %bb.0: 142; AVX2-NEXT: vpsrlw $15, %xmm0, %xmm0 143; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 144; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 145; AVX2-NEXT: retq 146; 147; AVX512-LABEL: trunc_concat_packuswb_128: 148; AVX512: # %bb.0: 149; AVX512-NEXT: vpsrlw $15, %xmm0, %xmm0 150; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1 151; AVX512-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 152; AVX512-NEXT: retq 153 %1 = lshr <8 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 154 %2 = and <8 x i16> %a1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 155 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 156 %4 = trunc <16 x i16> %3 to <16 x i8> 157 ret <16 x i8> %4 158} 159 160; concat(trunc(x),trunc(y)) -> pack 161 162define <8 x i16> @concat_trunc_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) nounwind { 163; SSE2-LABEL: concat_trunc_packssdw_128: 164; SSE2: # %bb.0: 165; SSE2-NEXT: psrad $17, %xmm0 166; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 167; SSE2-NEXT: packssdw %xmm0, %xmm0 168; SSE2-NEXT: packuswb %xmm1, %xmm1 169; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 170; SSE2-NEXT: retq 171; 172; SSE4-LABEL: concat_trunc_packssdw_128: 173; SSE4: # %bb.0: 174; SSE4-NEXT: psrad $17, %xmm0 175; SSE4-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 176; SSE4-NEXT: packssdw %xmm1, %xmm0 177; SSE4-NEXT: retq 178; 179; AVX1-LABEL: concat_trunc_packssdw_128: 180; AVX1: # %bb.0: 181; AVX1-NEXT: vpsrad $17, %xmm0, %xmm0 182; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 183; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 184; AVX1-NEXT: retq 185; 186; AVX2-LABEL: concat_trunc_packssdw_128: 187; AVX2: # %bb.0: 188; AVX2-NEXT: vpsrad $17, %xmm0, %xmm0 189; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [15,15,15,15] 190; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 191; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 192; AVX2-NEXT: retq 193; 194; AVX512-LABEL: concat_trunc_packssdw_128: 195; AVX512: # %bb.0: 196; AVX512-NEXT: vpsrad $17, %xmm0, %xmm0 197; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1 198; AVX512-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 199; AVX512-NEXT: retq 200 %1 = ashr <4 x i32> %a0, <i32 17, i32 17, i32 17, i32 17> 201 %2 = and <4 x i32> %a1, <i32 15, i32 15, i32 15, i32 15> 202 %3 = trunc <4 x i32> %1 to <4 x i16> 203 %4 = trunc <4 x i32> %2 to <4 x i16> 204 %5 = shufflevector <4 x i16> %3, <4 x i16> %4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 205 ret <8 x i16> %5 206} 207 208define <8 x i16> @concat_trunc_packusdw_128(<4 x i32> %a0, <4 x i32> %a1) nounwind { 209; SSE2-LABEL: concat_trunc_packusdw_128: 210; SSE2: # %bb.0: 211; SSE2-NEXT: psrld $17, %xmm0 212; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 213; SSE2-NEXT: packssdw %xmm0, %xmm0 214; SSE2-NEXT: packuswb %xmm1, %xmm1 215; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 216; SSE2-NEXT: retq 217; 218; SSE4-LABEL: concat_trunc_packusdw_128: 219; SSE4: # %bb.0: 220; SSE4-NEXT: psrld $17, %xmm0 221; SSE4-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 222; SSE4-NEXT: packusdw %xmm1, %xmm0 223; SSE4-NEXT: retq 224; 225; AVX1-LABEL: concat_trunc_packusdw_128: 226; AVX1: # %bb.0: 227; AVX1-NEXT: vpsrld $17, %xmm0, %xmm0 228; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 229; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 230; AVX1-NEXT: retq 231; 232; AVX2-LABEL: concat_trunc_packusdw_128: 233; AVX2: # %bb.0: 234; AVX2-NEXT: vpsrld $17, %xmm0, %xmm0 235; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [15,15,15,15] 236; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 237; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 238; AVX2-NEXT: retq 239; 240; AVX512-LABEL: concat_trunc_packusdw_128: 241; AVX512: # %bb.0: 242; AVX512-NEXT: vpsrld $17, %xmm0, %xmm0 243; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1 244; AVX512-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 245; AVX512-NEXT: retq 246 %1 = lshr <4 x i32> %a0, <i32 17, i32 17, i32 17, i32 17> 247 %2 = and <4 x i32> %a1, <i32 15, i32 15, i32 15, i32 15> 248 %3 = trunc <4 x i32> %1 to <4 x i16> 249 %4 = trunc <4 x i32> %2 to <4 x i16> 250 %5 = shufflevector <4 x i16> %3, <4 x i16> %4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 251 ret <8 x i16> %5 252} 253 254define <16 x i8> @concat_trunc_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) nounwind { 255; SSE-LABEL: concat_trunc_packsswb_128: 256; SSE: # %bb.0: 257; SSE-NEXT: psraw $15, %xmm0 258; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 259; SSE-NEXT: packsswb %xmm1, %xmm0 260; SSE-NEXT: retq 261; 262; AVX1-LABEL: concat_trunc_packsswb_128: 263; AVX1: # %bb.0: 264; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0 265; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 266; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 267; AVX1-NEXT: retq 268; 269; AVX2-LABEL: concat_trunc_packsswb_128: 270; AVX2: # %bb.0: 271; AVX2-NEXT: vpsraw $15, %xmm0, %xmm0 272; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 273; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 274; AVX2-NEXT: retq 275; 276; AVX512-LABEL: concat_trunc_packsswb_128: 277; AVX512: # %bb.0: 278; AVX512-NEXT: vpsraw $15, %xmm0, %xmm0 279; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1 280; AVX512-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 281; AVX512-NEXT: retq 282 %1 = ashr <8 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 283 %2 = and <8 x i16> %a1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 284 %3 = trunc <8 x i16> %1 to <8 x i8> 285 %4 = trunc <8 x i16> %2 to <8 x i8> 286 %5 = shufflevector <8 x i8> %3, <8 x i8> %4, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 287 ret <16 x i8> %5 288} 289 290define <16 x i8> @concat_trunc_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) nounwind { 291; SSE-LABEL: concat_trunc_packuswb_128: 292; SSE: # %bb.0: 293; SSE-NEXT: psrlw $15, %xmm0 294; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 295; SSE-NEXT: packuswb %xmm1, %xmm0 296; SSE-NEXT: retq 297; 298; AVX1-LABEL: concat_trunc_packuswb_128: 299; AVX1: # %bb.0: 300; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0 301; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 302; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 303; AVX1-NEXT: retq 304; 305; AVX2-LABEL: concat_trunc_packuswb_128: 306; AVX2: # %bb.0: 307; AVX2-NEXT: vpsrlw $15, %xmm0, %xmm0 308; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 309; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 310; AVX2-NEXT: retq 311; 312; AVX512-LABEL: concat_trunc_packuswb_128: 313; AVX512: # %bb.0: 314; AVX512-NEXT: vpsrlw $15, %xmm0, %xmm0 315; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1 316; AVX512-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 317; AVX512-NEXT: retq 318 %1 = lshr <8 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 319 %2 = and <8 x i16> %a1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 320 %3 = trunc <8 x i16> %1 to <8 x i8> 321 %4 = trunc <8 x i16> %2 to <8 x i8> 322 %5 = shufflevector <8 x i8> %3, <8 x i8> %4, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 323 ret <16 x i8> %5 324} 325 326; Fuzz test - don't pack a v1i32 comparison result. 327define void @autogen_SD10339(<1 x i32> %I49) { 328; CHECK-LABEL: autogen_SD10339: 329; CHECK: # %bb.0: # %BB 330; CHECK-NEXT: .p2align 4 331; CHECK-NEXT: .LBB8_1: # %CF 332; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 333; CHECK-NEXT: movw $1, 0 334; CHECK-NEXT: jmp .LBB8_1 335BB: 336 %Cmp53 = icmp uge <1 x i32> %I49, zeroinitializer 337 br label %CF 338 339CF: ; preds = %CF, %BB 340 %ZE166 = zext <1 x i1> %Cmp53 to <1 x i16> 341 store <1 x i16> %ZE166, ptr null, align 2 342 br label %CF 343} 344 345;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 346; AVX: {{.*}} 347