1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s 3 4; 5; UNDEF Elts 6; 7 8define <8 x i16> @undef_packssdw_128() { 9; CHECK-LABEL: @undef_packssdw_128( 10; CHECK-NEXT: ret <8 x i16> undef 11; 12 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> undef, <4 x i32> undef) 13 ret <8 x i16> %1 14} 15 16define <8 x i16> @undef_packusdw_128() { 17; CHECK-LABEL: @undef_packusdw_128( 18; CHECK-NEXT: ret <8 x i16> undef 19; 20 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> undef, <4 x i32> undef) 21 ret <8 x i16> %1 22} 23 24define <16 x i8> @undef_packsswb_128() { 25; CHECK-LABEL: @undef_packsswb_128( 26; CHECK-NEXT: ret <16 x i8> undef 27; 28 %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> undef, <8 x i16> undef) 29 ret <16 x i8> %1 30} 31 32define <16 x i8> @undef_packuswb_128() { 33; CHECK-LABEL: @undef_packuswb_128( 34; CHECK-NEXT: ret <16 x i8> undef 35; 36 %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> undef, <8 x i16> undef) 37 ret <16 x i8> %1 38} 39 40define <16 x i16> @undef_packssdw_256() { 41; CHECK-LABEL: @undef_packssdw_256( 42; CHECK-NEXT: ret <16 x i16> undef 43; 44 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> undef, <8 x i32> undef) 45 ret <16 x i16> %1 46} 47 48define <16 x i16> @undef_packusdw_256() { 49; CHECK-LABEL: @undef_packusdw_256( 50; CHECK-NEXT: ret <16 x i16> undef 51; 52 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> undef, <8 x i32> undef) 53 ret <16 x i16> %1 54} 55 56define <32 x i8> @undef_packsswb_256() { 57; CHECK-LABEL: @undef_packsswb_256( 58; CHECK-NEXT: ret <32 x i8> undef 59; 60 %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> undef, <16 x i16> undef) 61 ret <32 x i8> %1 62} 63 64define <32 x i8> @undef_packuswb_256() { 65; CHECK-LABEL: @undef_packuswb_256( 66; CHECK-NEXT: ret <32 x i8> undef 67; 68 %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> undef, <16 x i16> undef) 69 ret <32 x i8> %1 70} 71 72define <32 x i16> @undef_packssdw_512() { 73; CHECK-LABEL: @undef_packssdw_512( 74; CHECK-NEXT: ret <32 x i16> undef 75; 76 %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> undef, <16 x i32> undef) 77 ret <32 x i16> %1 78} 79 80define <32 x i16> @undef_packusdw_512() { 81; CHECK-LABEL: @undef_packusdw_512( 82; CHECK-NEXT: ret <32 x i16> undef 83; 84 %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> undef, <16 x i32> undef) 85 ret <32 x i16> %1 86} 87 88define <64 x i8> @undef_packsswb_512() { 89; CHECK-LABEL: @undef_packsswb_512( 90; CHECK-NEXT: ret <64 x i8> undef 91; 92 %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> undef, <32 x i16> undef) 93 ret <64 x i8> %1 94} 95 96define <64 x i8> @undef_packuswb_512() { 97; CHECK-LABEL: @undef_packuswb_512( 98; CHECK-NEXT: ret <64 x i8> undef 99; 100 %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> undef, <32 x i16> undef) 101 ret <64 x i8> %1 102} 103 104; 105; Constant Folding 106; 107 108define <8 x i16> @fold_packssdw_128() { 109; CHECK-LABEL: @fold_packssdw_128( 110; CHECK-NEXT: ret <8 x i16> <i16 0, i16 -1, i16 32767, i16 -32768, i16 0, i16 0, i16 0, i16 0> 111; 112 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> <i32 0, i32 -1, i32 65536, i32 -131072>, <4 x i32> zeroinitializer) 113 ret <8 x i16> %1 114} 115 116define <8 x i16> @fold_packusdw_128() { 117; CHECK-LABEL: @fold_packusdw_128( 118; CHECK-NEXT: ret <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 0, i16 0, i16 -32768, i16 -1> 119; 120 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> undef, <4 x i32> <i32 0, i32 -1, i32 32768, i32 65537>) 121 ret <8 x i16> %1 122} 123 124define <16 x i8> @fold_packsswb_128() { 125; CHECK-LABEL: @fold_packsswb_128( 126; CHECK-NEXT: ret <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef> 127; 128 %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> zeroinitializer, <8 x i16> undef) 129 ret <16 x i8> %1 130} 131 132define <16 x i8> @fold_packuswb_128() { 133; CHECK-LABEL: @fold_packuswb_128( 134; CHECK-NEXT: ret <16 x i8> <i8 0, i8 1, i8 0, i8 -1, i8 0, i8 0, i8 0, i8 15, i8 0, i8 127, i8 0, i8 1, i8 0, i8 1, i8 0, i8 0> 135; 136 %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> <i16 0, i16 1, i16 -1, i16 255, i16 65535, i16 -32768, i16 -127, i16 15>, <8 x i16> <i16 -15, i16 127, i16 32768, i16 -65535, i16 -255, i16 1, i16 -1, i16 0>) 137 ret <16 x i8> %1 138} 139 140define <16 x i16> @fold_packssdw_256() { 141; CHECK-LABEL: @fold_packssdw_256( 142; CHECK-NEXT: ret <16 x i16> <i16 0, i16 256, i16 32767, i16 -32768, i16 undef, i16 undef, i16 undef, i16 undef, i16 -127, i16 -32768, i16 -32767, i16 32767, i16 undef, i16 undef, i16 undef, i16 undef> 143; 144 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> <i32 0, i32 256, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767>, <8 x i32> undef) 145 ret <16 x i16> %1 146} 147 148define <16 x i16> @fold_packusdw_256() { 149; CHECK-LABEL: @fold_packusdw_256( 150; CHECK-NEXT: ret <16 x i16> <i16 0, i16 0, i16 0, i16 -1, i16 0, i16 256, i16 -1, i16 0, i16 127, i16 -32768, i16 32767, i16 0, i16 0, i16 0, i16 0, i16 32767> 151; 152 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> <i32 0, i32 -256, i32 -65535, i32 65536, i32 127, i32 32768, i32 32767, i32 -32767>, <8 x i32> <i32 0, i32 256, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767>) 153 ret <16 x i16> %1 154} 155 156define <32 x i8> @fold_packsswb_256() { 157; CHECK-LABEL: @fold_packsswb_256( 158; CHECK-NEXT: ret <32 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0> 159; 160 %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> undef, <16 x i16> zeroinitializer) 161 ret <32 x i8> %1 162} 163 164define <32 x i8> @fold_packuswb_256() { 165; CHECK-LABEL: @fold_packuswb_256( 166; CHECK-NEXT: ret <32 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64> 167; 168 %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> zeroinitializer, <16 x i16> <i16 0, i16 -127, i16 -128, i16 -32768, i16 65536, i16 255, i16 256, i16 512, i16 -1, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64>) 169 ret <32 x i8> %1 170} 171 172define <32 x i16> @fold_packssdw_512() { 173; CHECK-LABEL: @fold_packssdw_512( 174; CHECK-NEXT: ret <32 x i16> <i16 0, i16 512, i16 32767, i16 -32768, i16 undef, i16 undef, i16 undef, i16 undef, i16 -127, i16 -32768, i16 -32767, i16 32767, i16 undef, i16 undef, i16 undef, i16 undef, i16 0, i16 512, i16 32767, i16 -32768, i16 undef, i16 undef, i16 undef, i16 undef, i16 -127, i16 -32768, i16 -32767, i16 32767, i16 undef, i16 undef, i16 undef, i16 undef> 175; 176 %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> <i32 0, i32 512, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767, i32 0, i32 512, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767>, <16 x i32> undef) 177 ret <32 x i16> %1 178} 179 180define <32 x i16> @fold_packusdw_512() { 181; CHECK-LABEL: @fold_packusdw_512( 182; CHECK-NEXT: ret <32 x i16> <i16 0, i16 0, i16 0, i16 -1, i16 0, i16 512, i16 -1, i16 0, i16 127, i16 -32768, i16 32767, i16 0, i16 0, i16 0, i16 0, i16 32767, i16 0, i16 0, i16 0, i16 -1, i16 0, i16 512, i16 -1, i16 0, i16 127, i16 -32768, i16 32767, i16 0, i16 0, i16 0, i16 0, i16 32767> 183; 184 %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> <i32 0, i32 -512, i32 -65535, i32 65536, i32 127, i32 32768, i32 32767, i32 -32767, i32 0, i32 -512, i32 -65535, i32 65536, i32 127, i32 32768, i32 32767, i32 -32767>, <16 x i32> <i32 0, i32 512, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767, i32 0, i32 512, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767>) 185 ret <32 x i16> %1 186} 187 188define <64 x i8> @fold_packsswb_512() { 189; CHECK-LABEL: @fold_packsswb_512( 190; CHECK-NEXT: ret <64 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0> 191; 192 %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> undef, <32 x i16> zeroinitializer) 193 ret <64 x i8> %1 194} 195 196define <64 x i8> @fold_packuswb_512() { 197; CHECK-LABEL: @fold_packuswb_512( 198; CHECK-NEXT: ret <64 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64> 199; 200 %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> zeroinitializer, <32 x i16> <i16 0, i16 -127, i16 -128, i16 -32768, i16 65536, i16 255, i16 512, i16 512, i16 -1, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 0, i16 -127, i16 -128, i16 -32768, i16 65536, i16 255, i16 512, i16 512, i16 -1, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64>) 201 ret <64 x i8> %1 202} 203 204; 205; Demanded Elts 206; 207 208define <8 x i16> @elts_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { 209; CHECK-LABEL: @elts_packssdw_128( 210; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[A0:%.*]], <4 x i32> poison) 211; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 poison, i32 poison, i32 poison, i32 poison> 212; CHECK-NEXT: ret <8 x i16> [[TMP2]] 213; 214 %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 3, i32 1, i32 undef, i32 undef> 215 %2 = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 undef> 216 %3 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %1, <4 x i32> %2) 217 %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 7, i32 7, i32 7, i32 7> 218 ret <8 x i16> %4 219} 220 221define <8 x i16> @elts_packusdw_128(<4 x i32> %a0, <4 x i32> %a1) { 222; CHECK-LABEL: @elts_packusdw_128( 223; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]]) 224; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 poison, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 poison> 225; CHECK-NEXT: ret <8 x i16> [[TMP2]] 226; 227 %1 = insertelement <4 x i32> %a0, i32 0, i32 0 228 %2 = insertelement <4 x i32> %a1, i32 0, i32 3 229 %3 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %1, <4 x i32> %2) 230 %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> <i32 undef, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 undef> 231 ret <8 x i16> %4 232} 233 234define <16 x i8> @elts_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) { 235; CHECK-LABEL: @elts_packsswb_128( 236; CHECK-NEXT: ret <16 x i8> zeroinitializer 237; 238 %1 = insertelement <8 x i16> %a0, i16 0, i32 0 239 %2 = insertelement <8 x i16> %a1, i16 0, i32 0 240 %3 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %1, <8 x i16> %2) 241 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 242 ret <16 x i8> %4 243} 244 245define <16 x i8> @elts_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) { 246; CHECK-LABEL: @elts_packuswb_128( 247; CHECK-NEXT: ret <16 x i8> undef 248; 249 %1 = insertelement <8 x i16> undef, i16 0, i32 0 250 %2 = insertelement <8 x i16> undef, i16 0, i32 0 251 %3 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %1, <8 x i16> %2) 252 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> 253 ret <16 x i8> %4 254} 255 256define <16 x i16> @elts_packssdw_256(<8 x i32> %a0, <8 x i32> %a1) { 257; CHECK-LABEL: @elts_packssdw_256( 258; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[A0:%.*]], <8 x i32> poison) 259; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> poison, <16 x i32> <i32 poison, i32 poison, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 8, i32 poison, i32 poison, i32 11, i32 poison, i32 poison, i32 poison, i32 poison> 260; CHECK-NEXT: ret <16 x i16> [[TMP2]] 261; 262 %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 263 %2 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 undef, i32 2, i32 1, i32 undef, i32 undef, i32 6, i32 5, i32 undef> 264 %3 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %1, <8 x i32> %2) 265 %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 undef, i32 undef, i32 2, i32 3, i32 4, i32 undef, i32 undef, i32 7, i32 8, i32 undef, i32 undef, i32 11, i32 12, i32 undef, i32 undef, i32 15> 266 ret <16 x i16> %4 267} 268 269define <16 x i16> @elts_packusdw_256(<8 x i32> %a0, <8 x i32> %a1) { 270; CHECK-LABEL: @elts_packusdw_256( 271; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A1:%.*]], <8 x i32> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 272; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> poison, <8 x i32> [[TMP1]]) 273; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 12, i32 13, i32 14, i32 15, i32 poison, i32 poison, i32 poison, i32 poison> 274; CHECK-NEXT: ret <16 x i16> [[TMP3]] 275; 276 %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 277 %2 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 278 %3 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %1, <8 x i32> %2) 279 %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef> 280 ret <16 x i16> %4 281} 282 283define <32 x i8> @elts_packsswb_256(<16 x i16> %a0, <16 x i16> %a1) { 284; CHECK-LABEL: @elts_packsswb_256( 285; CHECK-NEXT: ret <32 x i8> zeroinitializer 286; 287 %1 = insertelement <16 x i16> %a0, i16 0, i32 0 288 %2 = insertelement <16 x i16> %a1, i16 0, i32 8 289 %3 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %1, <16 x i16> %2) 290 %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24> 291 ret <32 x i8> %4 292} 293 294define <32 x i8> @elts_packuswb_256(<16 x i16> %a0, <16 x i16> %a1) { 295; CHECK-LABEL: @elts_packuswb_256( 296; CHECK-NEXT: ret <32 x i8> undef 297; 298 %1 = insertelement <16 x i16> undef, i16 0, i32 1 299 %2 = insertelement <16 x i16> undef, i16 0, i32 0 300 %3 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %1, <16 x i16> %2) 301 %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> zeroinitializer 302 ret <32 x i8> %4 303} 304 305define <32 x i16> @elts_packssdw_512(<16 x i32> %a0, <16 x i32> %a1) { 306; CHECK-LABEL: @elts_packssdw_512( 307; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A0:%.*]], <16 x i32> poison) 308; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i16> [[TMP1]], <32 x i16> poison, <32 x i32> <i32 poison, i32 poison, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 8, i32 poison, i32 poison, i32 11, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison, i32 24, i32 poison, i32 poison, i32 27, i32 poison, i32 poison, i32 poison, i32 poison> 309; CHECK-NEXT: ret <32 x i16> [[TMP2]] 310; 311 %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 9, i32 8, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 312 %2 = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> <i32 undef, i32 2, i32 1, i32 undef, i32 undef, i32 6, i32 5, i32 undef, i32 undef, i32 10, i32 9, i32 undef, i32 undef, i32 14, i32 13, i32 undef> 313 %3 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %1, <16 x i32> %2) 314 %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 undef, i32 undef, i32 2, i32 3, i32 4, i32 undef, i32 undef, i32 7, i32 8, i32 undef, i32 undef, i32 11, i32 12, i32 undef, i32 undef, i32 15, i32 undef, i32 undef, i32 18, i32 19, i32 20, i32 undef, i32 undef, i32 23, i32 24, i32 undef, i32 undef, i32 27, i32 28, i32 undef, i32 undef, i32 31> 315 ret <32 x i16> %4 316} 317 318define <32 x i16> @elts_packusdw_512(<16 x i32> %a0, <16 x i32> %a1) { 319; CHECK-LABEL: @elts_packusdw_512( 320; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A1:%.*]], <16 x i32> poison, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8> 321; CHECK-NEXT: [[TMP2:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> poison, <16 x i32> [[TMP1]]) 322; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i16> [[TMP2]], <32 x i16> poison, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 12, i32 13, i32 14, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 20, i32 21, i32 22, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 28, i32 29, i32 30, i32 31, i32 poison, i32 poison, i32 poison, i32 poison> 323; CHECK-NEXT: ret <32 x i16> [[TMP3]] 324; 325 %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 326 %2 = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8> 327 %3 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %1, <16 x i32> %2) 328 %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef> 329 ret <32 x i16> %4 330} 331 332define <64 x i8> @elts_packsswb_512(<32 x i16> %a0, <32 x i16> %a1) { 333; CHECK-LABEL: @elts_packsswb_512( 334; CHECK-NEXT: ret <64 x i8> zeroinitializer 335; 336 %1 = insertelement <32 x i16> %a0, i16 0, i32 0 337 %2 = insertelement <32 x i16> %a1, i16 0, i32 8 338 %3 = insertelement <32 x i16> %1, i16 0, i32 16 339 %4 = insertelement <32 x i16> %2, i16 0, i32 24 340 %5 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %3, <32 x i16> %4) 341 %6 = shufflevector <64 x i8> %5, <64 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56> 342 ret <64 x i8> %6 343} 344 345define <64 x i8> @elts_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) { 346; CHECK-LABEL: @elts_packuswb_512( 347; CHECK-NEXT: ret <64 x i8> undef 348; 349 %1 = insertelement <32 x i16> undef, i16 0, i32 1 350 %2 = insertelement <32 x i16> undef, i16 0, i32 0 351 %3 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %1, <32 x i16> %2) 352 %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> zeroinitializer 353 ret <64 x i8> %4 354} 355 356; 357; Truncation (without Saturation) 358; 359 360define <8 x i16> @trunc_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { 361; CHECK-LABEL: @trunc_packssdw_128( 362; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[A0:%.*]], splat (i32 17) 363; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[A1:%.*]], splat (i32 15) 364; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) 365; CHECK-NEXT: ret <8 x i16> [[TMP3]] 366; 367 %1 = ashr <4 x i32> %a0, <i32 17, i32 17, i32 17, i32 17> 368 %2 = and <4 x i32> %a1, <i32 15, i32 15, i32 15, i32 15> 369 %3 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %1, <4 x i32> %2) 370 ret <8 x i16> %3 371} 372 373define <8 x i16> @trunc_packusdw_128(<4 x i32> %a0, <4 x i32> %a1) { 374; CHECK-LABEL: @trunc_packusdw_128( 375; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[A0:%.*]], splat (i32 17) 376; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[A1:%.*]], splat (i32 15) 377; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) 378; CHECK-NEXT: ret <8 x i16> [[TMP3]] 379; 380 %1 = lshr <4 x i32> %a0, <i32 17, i32 17, i32 17, i32 17> 381 %2 = and <4 x i32> %a1, <i32 15, i32 15, i32 15, i32 15> 382 %3 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %1, <4 x i32> %2) 383 ret <8 x i16> %3 384} 385 386define <16 x i8> @trunc_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) { 387; CHECK-LABEL: @trunc_packsswb_128( 388; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[A0:%.*]], splat (i16 15) 389; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[A1:%.*]], splat (i16 1) 390; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) 391; CHECK-NEXT: ret <16 x i8> [[TMP3]] 392; 393 %1 = ashr <8 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 394 %2 = and <8 x i16> %a1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 395 %3 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %1, <8 x i16> %2) 396 ret <16 x i8> %3 397} 398 399define <16 x i8> @trunc_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) { 400; CHECK-LABEL: @trunc_packuswb_128( 401; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[A0:%.*]], splat (i16 15) 402; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[A1:%.*]], splat (i16 1) 403; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) 404; CHECK-NEXT: ret <16 x i8> [[TMP3]] 405; 406 %1 = lshr <8 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 407 %2 = and <8 x i16> %a1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 408 %3 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %1, <8 x i16> %2) 409 ret <16 x i8> %3 410} 411 412define <16 x i16> @trunc_packssdw_256(<8 x i32> %a0, <8 x i32> %a1) { 413; CHECK-LABEL: @trunc_packssdw_256( 414; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[A0:%.*]], splat (i32 17) 415; CHECK-NEXT: [[TMP2:%.*]] = ashr <8 x i32> [[A1:%.*]], splat (i32 23) 416; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]]) 417; CHECK-NEXT: ret <16 x i16> [[TMP3]] 418; 419 %1 = ashr <8 x i32> %a0, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17> 420 %2 = ashr <8 x i32> %a1, <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23> 421 %3 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %1, <8 x i32> %2) 422 ret <16 x i16> %3 423} 424 425define <16 x i16> @trunc_packusdw_256(<8 x i32> %a0, <8 x i32> %a1) { 426; CHECK-LABEL: @trunc_packusdw_256( 427; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[A0:%.*]], splat (i32 17) 428; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i32> [[A1:%.*]], splat (i32 15) 429; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]]) 430; CHECK-NEXT: ret <16 x i16> [[TMP3]] 431; 432 %1 = lshr <8 x i32> %a0, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17> 433 %2 = and <8 x i32> %a1, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> 434 %3 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %1, <8 x i32> %2) 435 ret <16 x i16> %3 436} 437 438define <32 x i8> @trunc_packsswb_256(<16 x i16> %a0, <16 x i16> %a1) { 439; CHECK-LABEL: @trunc_packsswb_256( 440; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[A0:%.*]], splat (i16 15) 441; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i16> [[A1:%.*]], splat (i16 1) 442; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]]) 443; CHECK-NEXT: ret <32 x i8> [[TMP3]] 444; 445 %1 = ashr <16 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 446 %2 = and <16 x i16> %a1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 447 %3 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %1, <16 x i16> %2) 448 ret <32 x i8> %3 449} 450 451define <32 x i8> @trunc_packuswb_256(<16 x i16> %a0, <16 x i16> %a1) { 452; CHECK-LABEL: @trunc_packuswb_256( 453; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[A0:%.*]], splat (i16 15) 454; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i16> [[A1:%.*]], splat (i16 1) 455; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]]) 456; CHECK-NEXT: ret <32 x i8> [[TMP3]] 457; 458 %1 = lshr <16 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 459 %2 = and <16 x i16> %a1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 460 %3 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %1, <16 x i16> %2) 461 ret <32 x i8> %3 462} 463 464define <32 x i16> @trunc_packssdw_512(<16 x i32> %a0, <16 x i32> %a1) { 465; CHECK-LABEL: @trunc_packssdw_512( 466; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[A0:%.*]], splat (i32 17) 467; CHECK-NEXT: [[TMP2:%.*]] = ashr <16 x i32> [[A1:%.*]], splat (i32 23) 468; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]]) 469; CHECK-NEXT: ret <32 x i16> [[TMP3]] 470; 471 %1 = ashr <16 x i32> %a0, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17> 472 %2 = ashr <16 x i32> %a1, <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23> 473 %3 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %1, <16 x i32> %2) 474 ret <32 x i16> %3 475} 476 477define <32 x i16> @trunc_packusdw_512(<16 x i32> %a0, <16 x i32> %a1) { 478; CHECK-LABEL: @trunc_packusdw_512( 479; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[A0:%.*]], splat (i32 17) 480; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i32> [[A1:%.*]], splat (i32 15) 481; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]]) 482; CHECK-NEXT: ret <32 x i16> [[TMP3]] 483; 484 %1 = lshr <16 x i32> %a0, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17> 485 %2 = and <16 x i32> %a1, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> 486 %3 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %1, <16 x i32> %2) 487 ret <32 x i16> %3 488} 489 490define <64 x i8> @trunc_packsswb_512(<32 x i16> %a0, <32 x i16> %a1) { 491; CHECK-LABEL: @trunc_packsswb_512( 492; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[A0:%.*]], splat (i16 15) 493; CHECK-NEXT: [[TMP2:%.*]] = and <32 x i16> [[A1:%.*]], splat (i16 1) 494; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]]) 495; CHECK-NEXT: ret <64 x i8> [[TMP3]] 496; 497 %1 = ashr <32 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 498 %2 = and <32 x i16> %a1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 499 %3 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %1, <32 x i16> %2) 500 ret <64 x i8> %3 501} 502 503define <64 x i8> @trunc_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) { 504; CHECK-LABEL: @trunc_packuswb_512( 505; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[A0:%.*]], splat (i16 15) 506; CHECK-NEXT: [[TMP2:%.*]] = and <32 x i16> [[A1:%.*]], splat (i16 1) 507; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]]) 508; CHECK-NEXT: ret <64 x i8> [[TMP3]] 509; 510 %1 = lshr <32 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 511 %2 = and <32 x i16> %a1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 512 %3 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %1, <32 x i16> %2) 513 ret <64 x i8> %3 514} 515 516; 517; Signed Pack Comparison Results 518; 519 520define <8 x i16> @cmp_packssdw_128(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3) { 521; CHECK-LABEL: @cmp_packssdw_128( 522; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[A0:%.*]], [[A1:%.*]] 523; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[A2:%.*]], [[A3:%.*]] 524; CHECK-NEXT: [[TMP3:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> 525; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32> 526; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP3]], <4 x i32> [[TMP4]]) 527; CHECK-NEXT: ret <8 x i16> [[TMP5]] 528; 529 %1 = icmp eq <4 x i32> %a0, %a1 530 %2 = icmp eq <4 x i32> %a2, %a3 531 %3 = sext <4 x i1> %1 to <4 x i32> 532 %4 = sext <4 x i1> %2 to <4 x i32> 533 %5 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %3, <4 x i32> %4) 534 ret <8 x i16> %5 535} 536 537define <16 x i8> @cmp_packsswb_128(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2, <8 x i16> %a3) { 538; CHECK-LABEL: @cmp_packsswb_128( 539; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <8 x i16> [[A0:%.*]], [[A1:%.*]] 540; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <8 x i16> [[A2:%.*]], [[A3:%.*]] 541; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16> 542; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i16> 543; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> [[TMP3]], <8 x i16> [[TMP4]]) 544; CHECK-NEXT: ret <16 x i8> [[TMP5]] 545; 546 %1 = icmp eq <8 x i16> %a0, %a1 547 %2 = icmp eq <8 x i16> %a2, %a3 548 %3 = sext <8 x i1> %1 to <8 x i16> 549 %4 = sext <8 x i1> %2 to <8 x i16> 550 %5 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %3, <8 x i16> %4) 551 ret <16 x i8> %5 552} 553 554define <16 x i16> @cmp_packssdw_256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2, <8 x i32> %a3) { 555; CHECK-LABEL: @cmp_packssdw_256( 556; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <8 x i32> [[A0:%.*]], [[A1:%.*]] 557; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <8 x i32> [[A2:%.*]], [[A3:%.*]] 558; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i32> 559; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i32> 560; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[TMP3]], <8 x i32> [[TMP4]]) 561; CHECK-NEXT: ret <16 x i16> [[TMP5]] 562; 563 %1 = icmp eq <8 x i32> %a0, %a1 564 %2 = icmp eq <8 x i32> %a2, %a3 565 %3 = sext <8 x i1> %1 to <8 x i32> 566 %4 = sext <8 x i1> %2 to <8 x i32> 567 %5 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %3, <8 x i32> %4) 568 ret <16 x i16> %5 569} 570 571define <32 x i8> @cmp_packsswb_256(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> %a2, <16 x i16> %a3) { 572; CHECK-LABEL: @cmp_packsswb_256( 573; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <16 x i16> [[A0:%.*]], [[A1:%.*]] 574; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <16 x i16> [[A2:%.*]], [[A3:%.*]] 575; CHECK-NEXT: [[TMP3:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i16> 576; CHECK-NEXT: [[TMP4:%.*]] = sext <16 x i1> [[TMP2]] to <16 x i16> 577; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> [[TMP3]], <16 x i16> [[TMP4]]) 578; CHECK-NEXT: ret <32 x i8> [[TMP5]] 579; 580 %1 = icmp eq <16 x i16> %a0, %a1 581 %2 = icmp eq <16 x i16> %a2, %a3 582 %3 = sext <16 x i1> %1 to <16 x i16> 583 %4 = sext <16 x i1> %2 to <16 x i16> 584 %5 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %3, <16 x i16> %4) 585 ret <32 x i8> %5 586} 587 588define <32 x i16> @cmp_packssdw_512(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, <16 x i32> %a3) { 589; CHECK-LABEL: @cmp_packssdw_512( 590; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <16 x i32> [[A0:%.*]], [[A1:%.*]] 591; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <16 x i32> [[A2:%.*]], [[A3:%.*]] 592; CHECK-NEXT: [[TMP3:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i32> 593; CHECK-NEXT: [[TMP4:%.*]] = sext <16 x i1> [[TMP2]] to <16 x i32> 594; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP3]], <16 x i32> [[TMP4]]) 595; CHECK-NEXT: ret <32 x i16> [[TMP5]] 596; 597 %1 = icmp eq <16 x i32> %a0, %a1 598 %2 = icmp eq <16 x i32> %a2, %a3 599 %3 = sext <16 x i1> %1 to <16 x i32> 600 %4 = sext <16 x i1> %2 to <16 x i32> 601 %5 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %3, <16 x i32> %4) 602 ret <32 x i16> %5 603} 604 605define <64 x i8> @cmp_packsswb_512(<32 x i16> %a0, <32 x i16> %a1, <32 x i16> %a2, <32 x i16> %a3) { 606; CHECK-LABEL: @cmp_packsswb_512( 607; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <32 x i16> [[A0:%.*]], [[A1:%.*]] 608; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <32 x i16> [[A2:%.*]], [[A3:%.*]] 609; CHECK-NEXT: [[TMP3:%.*]] = sext <32 x i1> [[TMP1]] to <32 x i16> 610; CHECK-NEXT: [[TMP4:%.*]] = sext <32 x i1> [[TMP2]] to <32 x i16> 611; CHECK-NEXT: [[TMP5:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP3]], <32 x i16> [[TMP4]]) 612; CHECK-NEXT: ret <64 x i8> [[TMP5]] 613; 614 %1 = icmp eq <32 x i16> %a0, %a1 615 %2 = icmp eq <32 x i16> %a2, %a3 616 %3 = sext <32 x i1> %1 to <32 x i16> 617 %4 = sext <32 x i1> %2 to <32 x i16> 618 %5 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %3, <32 x i16> %4) 619 ret <64 x i8> %5 620} 621 622declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone 623declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone 624declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone 625declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone 626 627declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone 628declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone 629declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone 630declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone 631 632declare <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32>, <16 x i32>) nounwind readnone 633declare <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32>, <16 x i32>) nounwind readnone 634declare <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16>, <32 x i16>) nounwind readnone 635declare <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16>, <32 x i16>) nounwind readnone 636