1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s 3target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 4 5; 6; ASHR - Immediate 7; 8 9define <8 x i16> @sse2_psrai_w_0(<8 x i16> %v) { 10; CHECK-LABEL: @sse2_psrai_w_0( 11; CHECK-NEXT: ret <8 x i16> [[V:%.*]] 12; 13 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 0) 14 ret <8 x i16> %1 15} 16 17define <8 x i16> @sse2_psrai_w_15(<8 x i16> %v) { 18; CHECK-LABEL: @sse2_psrai_w_15( 19; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15) 20; CHECK-NEXT: ret <8 x i16> [[TMP1]] 21; 22 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 15) 23 ret <8 x i16> %1 24} 25 26define <8 x i16> @sse2_psrai_w_64(<8 x i16> %v) { 27; CHECK-LABEL: @sse2_psrai_w_64( 28; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15) 29; CHECK-NEXT: ret <8 x i16> [[TMP1]] 30; 31 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 64) 32 ret <8 x i16> %1 33} 34 35define <4 x i32> @sse2_psrai_d_0(<4 x i32> %v) { 36; CHECK-LABEL: @sse2_psrai_d_0( 37; CHECK-NEXT: ret <4 x i32> [[V:%.*]] 38; 39 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 0) 40 ret <4 x i32> %1 41} 42 43define <4 x i32> @sse2_psrai_d_15(<4 x i32> %v) { 44; CHECK-LABEL: @sse2_psrai_d_15( 45; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 15) 46; CHECK-NEXT: ret <4 x i32> [[TMP1]] 47; 48 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 15) 49 ret <4 x i32> %1 50} 51 52define <4 x i32> @sse2_psrai_d_64(<4 x i32> %v) { 53; CHECK-LABEL: @sse2_psrai_d_64( 54; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 31) 55; CHECK-NEXT: ret <4 x i32> [[TMP1]] 56; 57 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 64) 58 ret <4 x i32> %1 59} 60 61define <16 x i16> @avx2_psrai_w_0(<16 x i16> %v) { 62; CHECK-LABEL: @avx2_psrai_w_0( 63; CHECK-NEXT: ret <16 x i16> [[V:%.*]] 64; 65 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 0) 66 ret <16 x i16> %1 67} 68 69define <16 x i16> @avx2_psrai_w_15(<16 x i16> %v) { 70; CHECK-LABEL: @avx2_psrai_w_15( 71; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15) 72; CHECK-NEXT: ret <16 x i16> [[TMP1]] 73; 74 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 15) 75 ret <16 x i16> %1 76} 77 78define <16 x i16> @avx2_psrai_w_64(<16 x i16> %v) { 79; CHECK-LABEL: @avx2_psrai_w_64( 80; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15) 81; CHECK-NEXT: ret <16 x i16> [[TMP1]] 82; 83 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 64) 84 ret <16 x i16> %1 85} 86 87define <8 x i32> @avx2_psrai_d_0(<8 x i32> %v) { 88; CHECK-LABEL: @avx2_psrai_d_0( 89; CHECK-NEXT: ret <8 x i32> [[V:%.*]] 90; 91 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 0) 92 ret <8 x i32> %1 93} 94 95define <8 x i32> @avx2_psrai_d_15(<8 x i32> %v) { 96; CHECK-LABEL: @avx2_psrai_d_15( 97; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 15) 98; CHECK-NEXT: ret <8 x i32> [[TMP1]] 99; 100 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 15) 101 ret <8 x i32> %1 102} 103 104define <8 x i32> @avx2_psrai_d_64(<8 x i32> %v) { 105; CHECK-LABEL: @avx2_psrai_d_64( 106; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 31) 107; CHECK-NEXT: ret <8 x i32> [[TMP1]] 108; 109 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 64) 110 ret <8 x i32> %1 111} 112 113define <2 x i64> @avx512_psrai_q_128_0(<2 x i64> %v) { 114; CHECK-LABEL: @avx512_psrai_q_128_0( 115; CHECK-NEXT: ret <2 x i64> [[V:%.*]] 116; 117 %1 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %v, i32 0) 118 ret <2 x i64> %1 119} 120 121define <2 x i64> @avx512_psrai_q_128_15(<2 x i64> %v) { 122; CHECK-LABEL: @avx512_psrai_q_128_15( 123; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], splat (i64 15) 124; CHECK-NEXT: ret <2 x i64> [[TMP1]] 125; 126 %1 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %v, i32 15) 127 ret <2 x i64> %1 128} 129 130define <2 x i64> @avx512_psrai_q_128_64(<2 x i64> %v) { 131; CHECK-LABEL: @avx512_psrai_q_128_64( 132; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], splat (i64 63) 133; CHECK-NEXT: ret <2 x i64> [[TMP1]] 134; 135 %1 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %v, i32 64) 136 ret <2 x i64> %1 137} 138 139define <4 x i64> @avx512_psrai_q_256_0(<4 x i64> %v) { 140; CHECK-LABEL: @avx512_psrai_q_256_0( 141; CHECK-NEXT: ret <4 x i64> [[V:%.*]] 142; 143 %1 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %v, i32 0) 144 ret <4 x i64> %1 145} 146 147define <4 x i64> @avx512_psrai_q_256_15(<4 x i64> %v) { 148; CHECK-LABEL: @avx512_psrai_q_256_15( 149; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], splat (i64 15) 150; CHECK-NEXT: ret <4 x i64> [[TMP1]] 151; 152 %1 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %v, i32 15) 153 ret <4 x i64> %1 154} 155 156define <4 x i64> @avx512_psrai_q_256_64(<4 x i64> %v) { 157; CHECK-LABEL: @avx512_psrai_q_256_64( 158; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], splat (i64 63) 159; CHECK-NEXT: ret <4 x i64> [[TMP1]] 160; 161 %1 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %v, i32 64) 162 ret <4 x i64> %1 163} 164 165define <32 x i16> @avx512_psrai_w_512_0(<32 x i16> %v) { 166; CHECK-LABEL: @avx512_psrai_w_512_0( 167; CHECK-NEXT: ret <32 x i16> [[V:%.*]] 168; 169 %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %v, i32 0) 170 ret <32 x i16> %1 171} 172 173define <32 x i16> @avx512_psrai_w_512_15(<32 x i16> %v) { 174; CHECK-LABEL: @avx512_psrai_w_512_15( 175; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15) 176; CHECK-NEXT: ret <32 x i16> [[TMP1]] 177; 178 %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %v, i32 15) 179 ret <32 x i16> %1 180} 181 182define <32 x i16> @avx512_psrai_w_512_64(<32 x i16> %v) { 183; CHECK-LABEL: @avx512_psrai_w_512_64( 184; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15) 185; CHECK-NEXT: ret <32 x i16> [[TMP1]] 186; 187 %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %v, i32 64) 188 ret <32 x i16> %1 189} 190 191define <16 x i32> @avx512_psrai_d_512_0(<16 x i32> %v) { 192; CHECK-LABEL: @avx512_psrai_d_512_0( 193; CHECK-NEXT: ret <16 x i32> [[V:%.*]] 194; 195 %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %v, i32 0) 196 ret <16 x i32> %1 197} 198 199define <16 x i32> @avx512_psrai_d_512_15(<16 x i32> %v) { 200; CHECK-LABEL: @avx512_psrai_d_512_15( 201; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 15) 202; CHECK-NEXT: ret <16 x i32> [[TMP1]] 203; 204 %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %v, i32 15) 205 ret <16 x i32> %1 206} 207 208define <16 x i32> @avx512_psrai_d_512_64(<16 x i32> %v) { 209; CHECK-LABEL: @avx512_psrai_d_512_64( 210; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 31) 211; CHECK-NEXT: ret <16 x i32> [[TMP1]] 212; 213 %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %v, i32 64) 214 ret <16 x i32> %1 215} 216 217define <8 x i64> @avx512_psrai_q_512_0(<8 x i64> %v) { 218; CHECK-LABEL: @avx512_psrai_q_512_0( 219; CHECK-NEXT: ret <8 x i64> [[V:%.*]] 220; 221 %1 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 0) 222 ret <8 x i64> %1 223} 224 225define <8 x i64> @avx512_psrai_q_512_15(<8 x i64> %v) { 226; CHECK-LABEL: @avx512_psrai_q_512_15( 227; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], splat (i64 15) 228; CHECK-NEXT: ret <8 x i64> [[TMP1]] 229; 230 %1 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 15) 231 ret <8 x i64> %1 232} 233 234define <8 x i64> @avx512_psrai_q_512_64(<8 x i64> %v) { 235; CHECK-LABEL: @avx512_psrai_q_512_64( 236; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], splat (i64 63) 237; CHECK-NEXT: ret <8 x i64> [[TMP1]] 238; 239 %1 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 64) 240 ret <8 x i64> %1 241} 242 243; 244; LSHR - Immediate 245; 246 247define <8 x i16> @sse2_psrli_w_0(<8 x i16> %v) { 248; CHECK-LABEL: @sse2_psrli_w_0( 249; CHECK-NEXT: ret <8 x i16> [[V:%.*]] 250; 251 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 0) 252 ret <8 x i16> %1 253} 254 255define <8 x i16> @sse2_psrli_w_15(<8 x i16> %v) { 256; CHECK-LABEL: @sse2_psrli_w_15( 257; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], splat (i16 15) 258; CHECK-NEXT: ret <8 x i16> [[TMP1]] 259; 260 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 15) 261 ret <8 x i16> %1 262} 263 264define <8 x i16> @sse2_psrli_w_64(<8 x i16> %v) { 265; CHECK-LABEL: @sse2_psrli_w_64( 266; CHECK-NEXT: ret <8 x i16> zeroinitializer 267; 268 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 64) 269 ret <8 x i16> %1 270} 271 272define <4 x i32> @sse2_psrli_d_0(<4 x i32> %v) { 273; CHECK-LABEL: @sse2_psrli_d_0( 274; CHECK-NEXT: ret <4 x i32> [[V:%.*]] 275; 276 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 0) 277 ret <4 x i32> %1 278} 279 280define <4 x i32> @sse2_psrli_d_15(<4 x i32> %v) { 281; CHECK-LABEL: @sse2_psrli_d_15( 282; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], splat (i32 15) 283; CHECK-NEXT: ret <4 x i32> [[TMP1]] 284; 285 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 15) 286 ret <4 x i32> %1 287} 288 289define <4 x i32> @sse2_psrli_d_64(<4 x i32> %v) { 290; CHECK-LABEL: @sse2_psrli_d_64( 291; CHECK-NEXT: ret <4 x i32> zeroinitializer 292; 293 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 64) 294 ret <4 x i32> %1 295} 296 297define <2 x i64> @sse2_psrli_q_0(<2 x i64> %v) { 298; CHECK-LABEL: @sse2_psrli_q_0( 299; CHECK-NEXT: ret <2 x i64> [[V:%.*]] 300; 301 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 0) 302 ret <2 x i64> %1 303} 304 305define <2 x i64> @sse2_psrli_q_15(<2 x i64> %v) { 306; CHECK-LABEL: @sse2_psrli_q_15( 307; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], splat (i64 15) 308; CHECK-NEXT: ret <2 x i64> [[TMP1]] 309; 310 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 15) 311 ret <2 x i64> %1 312} 313 314define <2 x i64> @sse2_psrli_q_64(<2 x i64> %v) { 315; CHECK-LABEL: @sse2_psrli_q_64( 316; CHECK-NEXT: ret <2 x i64> zeroinitializer 317; 318 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 64) 319 ret <2 x i64> %1 320} 321 322define <16 x i16> @avx2_psrli_w_0(<16 x i16> %v) { 323; CHECK-LABEL: @avx2_psrli_w_0( 324; CHECK-NEXT: ret <16 x i16> [[V:%.*]] 325; 326 %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 0) 327 ret <16 x i16> %1 328} 329 330define <16 x i16> @avx2_psrli_w_15(<16 x i16> %v) { 331; CHECK-LABEL: @avx2_psrli_w_15( 332; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], splat (i16 15) 333; CHECK-NEXT: ret <16 x i16> [[TMP1]] 334; 335 %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 15) 336 ret <16 x i16> %1 337} 338 339define <16 x i16> @avx2_psrli_w_64(<16 x i16> %v) { 340; CHECK-LABEL: @avx2_psrli_w_64( 341; CHECK-NEXT: ret <16 x i16> zeroinitializer 342; 343 %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 64) 344 ret <16 x i16> %1 345} 346 347define <8 x i32> @avx2_psrli_d_0(<8 x i32> %v) { 348; CHECK-LABEL: @avx2_psrli_d_0( 349; CHECK-NEXT: ret <8 x i32> [[V:%.*]] 350; 351 %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 0) 352 ret <8 x i32> %1 353} 354 355define <8 x i32> @avx2_psrli_d_15(<8 x i32> %v) { 356; CHECK-LABEL: @avx2_psrli_d_15( 357; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], splat (i32 15) 358; CHECK-NEXT: ret <8 x i32> [[TMP1]] 359; 360 %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 15) 361 ret <8 x i32> %1 362} 363 364define <8 x i32> @avx2_psrli_d_64(<8 x i32> %v) { 365; CHECK-LABEL: @avx2_psrli_d_64( 366; CHECK-NEXT: ret <8 x i32> zeroinitializer 367; 368 %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 64) 369 ret <8 x i32> %1 370} 371 372define <4 x i64> @avx2_psrli_q_0(<4 x i64> %v) { 373; CHECK-LABEL: @avx2_psrli_q_0( 374; CHECK-NEXT: ret <4 x i64> [[V:%.*]] 375; 376 %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 0) 377 ret <4 x i64> %1 378} 379 380define <4 x i64> @avx2_psrli_q_15(<4 x i64> %v) { 381; CHECK-LABEL: @avx2_psrli_q_15( 382; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], splat (i64 15) 383; CHECK-NEXT: ret <4 x i64> [[TMP1]] 384; 385 %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 15) 386 ret <4 x i64> %1 387} 388 389define <4 x i64> @avx2_psrli_q_64(<4 x i64> %v) { 390; CHECK-LABEL: @avx2_psrli_q_64( 391; CHECK-NEXT: ret <4 x i64> zeroinitializer 392; 393 %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 64) 394 ret <4 x i64> %1 395} 396 397define <32 x i16> @avx512_psrli_w_512_0(<32 x i16> %v) { 398; CHECK-LABEL: @avx512_psrli_w_512_0( 399; CHECK-NEXT: ret <32 x i16> [[V:%.*]] 400; 401 %1 = tail call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %v, i32 0) 402 ret <32 x i16> %1 403} 404 405define <32 x i16> @avx512_psrli_w_512_15(<32 x i16> %v) { 406; CHECK-LABEL: @avx512_psrli_w_512_15( 407; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], splat (i16 15) 408; CHECK-NEXT: ret <32 x i16> [[TMP1]] 409; 410 %1 = tail call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %v, i32 15) 411 ret <32 x i16> %1 412} 413 414define <32 x i16> @avx512_psrli_w_512_64(<32 x i16> %v) { 415; CHECK-LABEL: @avx512_psrli_w_512_64( 416; CHECK-NEXT: ret <32 x i16> zeroinitializer 417; 418 %1 = tail call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %v, i32 64) 419 ret <32 x i16> %1 420} 421 422define <16 x i32> @avx512_psrli_d_512_0(<16 x i32> %v) { 423; CHECK-LABEL: @avx512_psrli_d_512_0( 424; CHECK-NEXT: ret <16 x i32> [[V:%.*]] 425; 426 %1 = tail call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %v, i32 0) 427 ret <16 x i32> %1 428} 429 430define <16 x i32> @avx512_psrli_d_512_15(<16 x i32> %v) { 431; CHECK-LABEL: @avx512_psrli_d_512_15( 432; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], splat (i32 15) 433; CHECK-NEXT: ret <16 x i32> [[TMP1]] 434; 435 %1 = tail call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %v, i32 15) 436 ret <16 x i32> %1 437} 438 439define <16 x i32> @avx512_psrli_d_512_64(<16 x i32> %v) { 440; CHECK-LABEL: @avx512_psrli_d_512_64( 441; CHECK-NEXT: ret <16 x i32> zeroinitializer 442; 443 %1 = tail call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %v, i32 64) 444 ret <16 x i32> %1 445} 446 447define <8 x i64> @avx512_psrli_q_512_0(<8 x i64> %v) { 448; CHECK-LABEL: @avx512_psrli_q_512_0( 449; CHECK-NEXT: ret <8 x i64> [[V:%.*]] 450; 451 %1 = tail call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %v, i32 0) 452 ret <8 x i64> %1 453} 454 455define <8 x i64> @avx512_psrli_q_512_15(<8 x i64> %v) { 456; CHECK-LABEL: @avx512_psrli_q_512_15( 457; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], splat (i64 15) 458; CHECK-NEXT: ret <8 x i64> [[TMP1]] 459; 460 %1 = tail call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %v, i32 15) 461 ret <8 x i64> %1 462} 463 464define <8 x i64> @avx512_psrli_q_512_64(<8 x i64> %v) { 465; CHECK-LABEL: @avx512_psrli_q_512_64( 466; CHECK-NEXT: ret <8 x i64> zeroinitializer 467; 468 %1 = tail call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %v, i32 64) 469 ret <8 x i64> %1 470} 471 472; 473; SHL - Immediate 474; 475 476define <8 x i16> @sse2_pslli_w_0(<8 x i16> %v) { 477; CHECK-LABEL: @sse2_pslli_w_0( 478; CHECK-NEXT: ret <8 x i16> [[V:%.*]] 479; 480 %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 0) 481 ret <8 x i16> %1 482} 483 484define <8 x i16> @sse2_pslli_w_15(<8 x i16> %v) { 485; CHECK-LABEL: @sse2_pslli_w_15( 486; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], splat (i16 15) 487; CHECK-NEXT: ret <8 x i16> [[TMP1]] 488; 489 %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 15) 490 ret <8 x i16> %1 491} 492 493define <8 x i16> @sse2_pslli_w_64(<8 x i16> %v) { 494; CHECK-LABEL: @sse2_pslli_w_64( 495; CHECK-NEXT: ret <8 x i16> zeroinitializer 496; 497 %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 64) 498 ret <8 x i16> %1 499} 500 501define <4 x i32> @sse2_pslli_d_0(<4 x i32> %v) { 502; CHECK-LABEL: @sse2_pslli_d_0( 503; CHECK-NEXT: ret <4 x i32> [[V:%.*]] 504; 505 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 0) 506 ret <4 x i32> %1 507} 508 509define <4 x i32> @sse2_pslli_d_15(<4 x i32> %v) { 510; CHECK-LABEL: @sse2_pslli_d_15( 511; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 15) 512; CHECK-NEXT: ret <4 x i32> [[TMP1]] 513; 514 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 15) 515 ret <4 x i32> %1 516} 517 518define <4 x i32> @sse2_pslli_d_64(<4 x i32> %v) { 519; CHECK-LABEL: @sse2_pslli_d_64( 520; CHECK-NEXT: ret <4 x i32> zeroinitializer 521; 522 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 64) 523 ret <4 x i32> %1 524} 525 526define <2 x i64> @sse2_pslli_q_0(<2 x i64> %v) { 527; CHECK-LABEL: @sse2_pslli_q_0( 528; CHECK-NEXT: ret <2 x i64> [[V:%.*]] 529; 530 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 0) 531 ret <2 x i64> %1 532} 533 534define <2 x i64> @sse2_pslli_q_15(<2 x i64> %v) { 535; CHECK-LABEL: @sse2_pslli_q_15( 536; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], splat (i64 15) 537; CHECK-NEXT: ret <2 x i64> [[TMP1]] 538; 539 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 15) 540 ret <2 x i64> %1 541} 542 543define <2 x i64> @sse2_pslli_q_64(<2 x i64> %v) { 544; CHECK-LABEL: @sse2_pslli_q_64( 545; CHECK-NEXT: ret <2 x i64> zeroinitializer 546; 547 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 64) 548 ret <2 x i64> %1 549} 550 551define <16 x i16> @avx2_pslli_w_0(<16 x i16> %v) { 552; CHECK-LABEL: @avx2_pslli_w_0( 553; CHECK-NEXT: ret <16 x i16> [[V:%.*]] 554; 555 %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 0) 556 ret <16 x i16> %1 557} 558 559define <16 x i16> @avx2_pslli_w_15(<16 x i16> %v) { 560; CHECK-LABEL: @avx2_pslli_w_15( 561; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], splat (i16 15) 562; CHECK-NEXT: ret <16 x i16> [[TMP1]] 563; 564 %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 15) 565 ret <16 x i16> %1 566} 567 568define <16 x i16> @avx2_pslli_w_64(<16 x i16> %v) { 569; CHECK-LABEL: @avx2_pslli_w_64( 570; CHECK-NEXT: ret <16 x i16> zeroinitializer 571; 572 %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 64) 573 ret <16 x i16> %1 574} 575 576define <8 x i32> @avx2_pslli_d_0(<8 x i32> %v) { 577; CHECK-LABEL: @avx2_pslli_d_0( 578; CHECK-NEXT: ret <8 x i32> [[V:%.*]] 579; 580 %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 0) 581 ret <8 x i32> %1 582} 583 584define <8 x i32> @avx2_pslli_d_15(<8 x i32> %v) { 585; CHECK-LABEL: @avx2_pslli_d_15( 586; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], splat (i32 15) 587; CHECK-NEXT: ret <8 x i32> [[TMP1]] 588; 589 %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 15) 590 ret <8 x i32> %1 591} 592 593define <8 x i32> @avx2_pslli_d_64(<8 x i32> %v) { 594; CHECK-LABEL: @avx2_pslli_d_64( 595; CHECK-NEXT: ret <8 x i32> zeroinitializer 596; 597 %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 64) 598 ret <8 x i32> %1 599} 600 601define <4 x i64> @avx2_pslli_q_0(<4 x i64> %v) { 602; CHECK-LABEL: @avx2_pslli_q_0( 603; CHECK-NEXT: ret <4 x i64> [[V:%.*]] 604; 605 %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 0) 606 ret <4 x i64> %1 607} 608 609define <4 x i64> @avx2_pslli_q_15(<4 x i64> %v) { 610; CHECK-LABEL: @avx2_pslli_q_15( 611; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], splat (i64 15) 612; CHECK-NEXT: ret <4 x i64> [[TMP1]] 613; 614 %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 15) 615 ret <4 x i64> %1 616} 617 618define <4 x i64> @avx2_pslli_q_64(<4 x i64> %v) { 619; CHECK-LABEL: @avx2_pslli_q_64( 620; CHECK-NEXT: ret <4 x i64> zeroinitializer 621; 622 %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 64) 623 ret <4 x i64> %1 624} 625 626define <32 x i16> @avx512_pslli_w_512_0(<32 x i16> %v) { 627; CHECK-LABEL: @avx512_pslli_w_512_0( 628; CHECK-NEXT: ret <32 x i16> [[V:%.*]] 629; 630 %1 = tail call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %v, i32 0) 631 ret <32 x i16> %1 632} 633 634define <32 x i16> @avx512_pslli_w_512_15(<32 x i16> %v) { 635; CHECK-LABEL: @avx512_pslli_w_512_15( 636; CHECK-NEXT: [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], splat (i16 15) 637; CHECK-NEXT: ret <32 x i16> [[TMP1]] 638; 639 %1 = tail call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %v, i32 15) 640 ret <32 x i16> %1 641} 642 643define <32 x i16> @avx512_pslli_w_512_64(<32 x i16> %v) { 644; CHECK-LABEL: @avx512_pslli_w_512_64( 645; CHECK-NEXT: ret <32 x i16> zeroinitializer 646; 647 %1 = tail call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %v, i32 64) 648 ret <32 x i16> %1 649} 650 651define <16 x i32> @avx512_pslli_d_512_0(<16 x i32> %v) { 652; CHECK-LABEL: @avx512_pslli_d_512_0( 653; CHECK-NEXT: ret <16 x i32> [[V:%.*]] 654; 655 %1 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %v, i32 0) 656 ret <16 x i32> %1 657} 658 659define <16 x i32> @avx512_pslli_d_512_15(<16 x i32> %v) { 660; CHECK-LABEL: @avx512_pslli_d_512_15( 661; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], splat (i32 15) 662; CHECK-NEXT: ret <16 x i32> [[TMP1]] 663; 664 %1 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %v, i32 15) 665 ret <16 x i32> %1 666} 667 668define <16 x i32> @avx512_pslli_d_512_64(<16 x i32> %v) { 669; CHECK-LABEL: @avx512_pslli_d_512_64( 670; CHECK-NEXT: ret <16 x i32> zeroinitializer 671; 672 %1 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %v, i32 64) 673 ret <16 x i32> %1 674} 675 676define <8 x i64> @avx512_pslli_q_512_0(<8 x i64> %v) { 677; CHECK-LABEL: @avx512_pslli_q_512_0( 678; CHECK-NEXT: ret <8 x i64> [[V:%.*]] 679; 680 %1 = tail call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %v, i32 0) 681 ret <8 x i64> %1 682} 683 684define <8 x i64> @avx512_pslli_q_512_15(<8 x i64> %v) { 685; CHECK-LABEL: @avx512_pslli_q_512_15( 686; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], splat (i64 15) 687; CHECK-NEXT: ret <8 x i64> [[TMP1]] 688; 689 %1 = tail call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %v, i32 15) 690 ret <8 x i64> %1 691} 692 693define <8 x i64> @avx512_pslli_q_512_64(<8 x i64> %v) { 694; CHECK-LABEL: @avx512_pslli_q_512_64( 695; CHECK-NEXT: ret <8 x i64> zeroinitializer 696; 697 %1 = tail call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %v, i32 64) 698 ret <8 x i64> %1 699} 700 701; 702; ASHR - Constant Vector 703; 704 705define <8 x i16> @sse2_psra_w_0(<8 x i16> %v) { 706; CHECK-LABEL: @sse2_psra_w_0( 707; CHECK-NEXT: ret <8 x i16> [[V:%.*]] 708; 709 %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> zeroinitializer) 710 ret <8 x i16> %1 711} 712 713define <8 x i16> @sse2_psra_w_15(<8 x i16> %v) { 714; CHECK-LABEL: @sse2_psra_w_15( 715; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15) 716; CHECK-NEXT: ret <8 x i16> [[TMP1]] 717; 718 %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 719 ret <8 x i16> %1 720} 721 722define <8 x i16> @sse2_psra_w_15_splat(<8 x i16> %v) { 723; CHECK-LABEL: @sse2_psra_w_15_splat( 724; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15) 725; CHECK-NEXT: ret <8 x i16> [[TMP1]] 726; 727 %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>) 728 ret <8 x i16> %1 729} 730 731define <8 x i16> @sse2_psra_w_64(<8 x i16> %v) { 732; CHECK-LABEL: @sse2_psra_w_64( 733; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15) 734; CHECK-NEXT: ret <8 x i16> [[TMP1]] 735; 736 %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 737 ret <8 x i16> %1 738} 739 740define <4 x i32> @sse2_psra_d_0(<4 x i32> %v) { 741; CHECK-LABEL: @sse2_psra_d_0( 742; CHECK-NEXT: ret <4 x i32> [[V:%.*]] 743; 744 %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> zeroinitializer) 745 ret <4 x i32> %1 746} 747 748define <4 x i32> @sse2_psra_d_15(<4 x i32> %v) { 749; CHECK-LABEL: @sse2_psra_d_15( 750; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 15) 751; CHECK-NEXT: ret <4 x i32> [[TMP1]] 752; 753 %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>) 754 ret <4 x i32> %1 755} 756 757define <4 x i32> @sse2_psra_d_15_splat(<4 x i32> %v) { 758; CHECK-LABEL: @sse2_psra_d_15_splat( 759; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 31) 760; CHECK-NEXT: ret <4 x i32> [[TMP1]] 761; 762 %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>) 763 ret <4 x i32> %1 764} 765 766define <4 x i32> @sse2_psra_d_64(<4 x i32> %v) { 767; CHECK-LABEL: @sse2_psra_d_64( 768; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 31) 769; CHECK-NEXT: ret <4 x i32> [[TMP1]] 770; 771 %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>) 772 ret <4 x i32> %1 773} 774 775define <16 x i16> @avx2_psra_w_0(<16 x i16> %v) { 776; CHECK-LABEL: @avx2_psra_w_0( 777; CHECK-NEXT: ret <16 x i16> [[V:%.*]] 778; 779 %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> zeroinitializer) 780 ret <16 x i16> %1 781} 782 783define <16 x i16> @avx2_psra_w_15(<16 x i16> %v) { 784; CHECK-LABEL: @avx2_psra_w_15( 785; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15) 786; CHECK-NEXT: ret <16 x i16> [[TMP1]] 787; 788 %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 789 ret <16 x i16> %1 790} 791 792define <16 x i16> @avx2_psra_w_15_splat(<16 x i16> %v) { 793; CHECK-LABEL: @avx2_psra_w_15_splat( 794; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15) 795; CHECK-NEXT: ret <16 x i16> [[TMP1]] 796; 797 %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>) 798 ret <16 x i16> %1 799} 800 801define <16 x i16> @avx2_psra_w_64(<16 x i16> %v) { 802; CHECK-LABEL: @avx2_psra_w_64( 803; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15) 804; CHECK-NEXT: ret <16 x i16> [[TMP1]] 805; 806 %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 807 ret <16 x i16> %1 808} 809 810define <8 x i32> @avx2_psra_d_0(<8 x i32> %v) { 811; CHECK-LABEL: @avx2_psra_d_0( 812; CHECK-NEXT: ret <8 x i32> [[V:%.*]] 813; 814 %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> zeroinitializer) 815 ret <8 x i32> %1 816} 817 818define <8 x i32> @avx2_psra_d_15(<8 x i32> %v) { 819; CHECK-LABEL: @avx2_psra_d_15( 820; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 15) 821; CHECK-NEXT: ret <8 x i32> [[TMP1]] 822; 823 %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>) 824 ret <8 x i32> %1 825} 826 827define <8 x i32> @avx2_psra_d_15_splat(<8 x i32> %v) { 828; CHECK-LABEL: @avx2_psra_d_15_splat( 829; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 31) 830; CHECK-NEXT: ret <8 x i32> [[TMP1]] 831; 832 %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>) 833 ret <8 x i32> %1 834} 835 836define <8 x i32> @avx2_psra_d_64(<8 x i32> %v) { 837; CHECK-LABEL: @avx2_psra_d_64( 838; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 31) 839; CHECK-NEXT: ret <8 x i32> [[TMP1]] 840; 841 %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>) 842 ret <8 x i32> %1 843} 844 845define <2 x i64> @avx512_psra_q_128_0(<2 x i64> %v) { 846; CHECK-LABEL: @avx512_psra_q_128_0( 847; CHECK-NEXT: ret <2 x i64> [[V:%.*]] 848; 849 %1 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> zeroinitializer) 850 ret <2 x i64> %1 851} 852 853define <2 x i64> @avx512_psra_q_128_15(<2 x i64> %v) { 854; CHECK-LABEL: @avx512_psra_q_128_15( 855; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], splat (i64 15) 856; CHECK-NEXT: ret <2 x i64> [[TMP1]] 857; 858 %1 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>) 859 ret <2 x i64> %1 860} 861 862define <2 x i64> @avx512_psra_q_128_64(<2 x i64> %v) { 863; CHECK-LABEL: @avx512_psra_q_128_64( 864; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], splat (i64 63) 865; CHECK-NEXT: ret <2 x i64> [[TMP1]] 866; 867 %1 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>) 868 ret <2 x i64> %1 869} 870 871define <4 x i64> @avx512_psra_q_256_0(<4 x i64> %v) { 872; CHECK-LABEL: @avx512_psra_q_256_0( 873; CHECK-NEXT: ret <4 x i64> [[V:%.*]] 874; 875 %1 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> zeroinitializer) 876 ret <4 x i64> %1 877} 878 879define <4 x i64> @avx512_psra_q_256_15(<4 x i64> %v) { 880; CHECK-LABEL: @avx512_psra_q_256_15( 881; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], splat (i64 15) 882; CHECK-NEXT: ret <4 x i64> [[TMP1]] 883; 884 %1 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>) 885 ret <4 x i64> %1 886} 887 888define <4 x i64> @avx512_psra_q_256_64(<4 x i64> %v) { 889; CHECK-LABEL: @avx512_psra_q_256_64( 890; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], splat (i64 63) 891; CHECK-NEXT: ret <4 x i64> [[TMP1]] 892; 893 %1 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>) 894 ret <4 x i64> %1 895} 896 897define <32 x i16> @avx512_psra_w_512_0(<32 x i16> %v) { 898; CHECK-LABEL: @avx512_psra_w_512_0( 899; CHECK-NEXT: ret <32 x i16> [[V:%.*]] 900; 901 %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> zeroinitializer) 902 ret <32 x i16> %1 903} 904 905define <32 x i16> @avx512_psra_w_512_15(<32 x i16> %v) { 906; CHECK-LABEL: @avx512_psra_w_512_15( 907; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15) 908; CHECK-NEXT: ret <32 x i16> [[TMP1]] 909; 910 %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 911 ret <32 x i16> %1 912} 913 914define <32 x i16> @avx512_psra_w_512_15_splat(<32 x i16> %v) { 915; CHECK-LABEL: @avx512_psra_w_512_15_splat( 916; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15) 917; CHECK-NEXT: ret <32 x i16> [[TMP1]] 918; 919 %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>) 920 ret <32 x i16> %1 921} 922 923define <32 x i16> @avx512_psra_w_512_64(<32 x i16> %v) { 924; CHECK-LABEL: @avx512_psra_w_512_64( 925; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15) 926; CHECK-NEXT: ret <32 x i16> [[TMP1]] 927; 928 %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 929 ret <32 x i16> %1 930} 931 932define <16 x i32> @avx512_psra_d_512_0(<16 x i32> %v) { 933; CHECK-LABEL: @avx512_psra_d_512_0( 934; CHECK-NEXT: ret <16 x i32> [[V:%.*]] 935; 936 %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> zeroinitializer) 937 ret <16 x i32> %1 938} 939 940define <16 x i32> @avx512_psra_d_512_15(<16 x i32> %v) { 941; CHECK-LABEL: @avx512_psra_d_512_15( 942; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 15) 943; CHECK-NEXT: ret <16 x i32> [[TMP1]] 944; 945 %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>) 946 ret <16 x i32> %1 947} 948 949define <16 x i32> @avx512_psra_d_512_15_splat(<16 x i32> %v) { 950; CHECK-LABEL: @avx512_psra_d_512_15_splat( 951; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 31) 952; CHECK-NEXT: ret <16 x i32> [[TMP1]] 953; 954 %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>) 955 ret <16 x i32> %1 956} 957 958define <16 x i32> @avx512_psra_d_512_64(<16 x i32> %v) { 959; CHECK-LABEL: @avx512_psra_d_512_64( 960; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 31) 961; CHECK-NEXT: ret <16 x i32> [[TMP1]] 962; 963 %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>) 964 ret <16 x i32> %1 965} 966 967define <8 x i64> @avx512_psra_q_512_0(<8 x i64> %v) { 968; CHECK-LABEL: @avx512_psra_q_512_0( 969; CHECK-NEXT: ret <8 x i64> [[V:%.*]] 970; 971 %1 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> zeroinitializer) 972 ret <8 x i64> %1 973} 974 975define <8 x i64> @avx512_psra_q_512_15(<8 x i64> %v) { 976; CHECK-LABEL: @avx512_psra_q_512_15( 977; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], splat (i64 15) 978; CHECK-NEXT: ret <8 x i64> [[TMP1]] 979; 980 %1 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> <i64 15, i64 9999>) 981 ret <8 x i64> %1 982} 983 984define <8 x i64> @avx512_psra_q_512_64(<8 x i64> %v) { 985; CHECK-LABEL: @avx512_psra_q_512_64( 986; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], splat (i64 63) 987; CHECK-NEXT: ret <8 x i64> [[TMP1]] 988; 989 %1 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> <i64 64, i64 9999>) 990 ret <8 x i64> %1 991} 992 993; 994; LSHR - Constant Vector 995; 996 997define <8 x i16> @sse2_psrl_w_0(<8 x i16> %v) { 998; CHECK-LABEL: @sse2_psrl_w_0( 999; CHECK-NEXT: ret <8 x i16> [[V:%.*]] 1000; 1001 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> zeroinitializer) 1002 ret <8 x i16> %1 1003} 1004 1005define <8 x i16> @sse2_psrl_w_15(<8 x i16> %v) { 1006; CHECK-LABEL: @sse2_psrl_w_15( 1007; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], splat (i16 15) 1008; CHECK-NEXT: ret <8 x i16> [[TMP1]] 1009; 1010 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 1011 ret <8 x i16> %1 1012} 1013 1014define <8 x i16> @sse2_psrl_w_15_splat(<8 x i16> %v) { 1015; CHECK-LABEL: @sse2_psrl_w_15_splat( 1016; CHECK-NEXT: ret <8 x i16> zeroinitializer 1017; 1018 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>) 1019 ret <8 x i16> %1 1020} 1021 1022define <8 x i16> @sse2_psrl_w_64(<8 x i16> %v) { 1023; CHECK-LABEL: @sse2_psrl_w_64( 1024; CHECK-NEXT: ret <8 x i16> zeroinitializer 1025; 1026 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 1027 ret <8 x i16> %1 1028} 1029 1030define <4 x i32> @sse2_psrl_d_0(<4 x i32> %v) { 1031; CHECK-LABEL: @sse2_psrl_d_0( 1032; CHECK-NEXT: ret <4 x i32> [[V:%.*]] 1033; 1034 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> zeroinitializer) 1035 ret <4 x i32> %1 1036} 1037 1038define <4 x i32> @sse2_psrl_d_15(<4 x i32> %v) { 1039; CHECK-LABEL: @sse2_psrl_d_15( 1040; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], splat (i32 15) 1041; CHECK-NEXT: ret <4 x i32> [[TMP1]] 1042; 1043 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>) 1044 ret <4 x i32> %1 1045} 1046 1047define <4 x i32> @sse2_psrl_d_15_splat(<4 x i32> %v) { 1048; CHECK-LABEL: @sse2_psrl_d_15_splat( 1049; CHECK-NEXT: ret <4 x i32> zeroinitializer 1050; 1051 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>) 1052 ret <4 x i32> %1 1053} 1054 1055define <4 x i32> @sse2_psrl_d_64(<4 x i32> %v) { 1056; CHECK-LABEL: @sse2_psrl_d_64( 1057; CHECK-NEXT: ret <4 x i32> zeroinitializer 1058; 1059 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>) 1060 ret <4 x i32> %1 1061} 1062 1063define <2 x i64> @sse2_psrl_q_0(<2 x i64> %v) { 1064; CHECK-LABEL: @sse2_psrl_q_0( 1065; CHECK-NEXT: ret <2 x i64> [[V:%.*]] 1066; 1067 %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> zeroinitializer) 1068 ret <2 x i64> %1 1069} 1070 1071define <2 x i64> @sse2_psrl_q_15(<2 x i64> %v) { 1072; CHECK-LABEL: @sse2_psrl_q_15( 1073; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], splat (i64 15) 1074; CHECK-NEXT: ret <2 x i64> [[TMP1]] 1075; 1076 %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>) 1077 ret <2 x i64> %1 1078} 1079 1080define <2 x i64> @sse2_psrl_q_64(<2 x i64> %v) { 1081; CHECK-LABEL: @sse2_psrl_q_64( 1082; CHECK-NEXT: ret <2 x i64> zeroinitializer 1083; 1084 %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>) 1085 ret <2 x i64> %1 1086} 1087 1088define <16 x i16> @avx2_psrl_w_0(<16 x i16> %v) { 1089; CHECK-LABEL: @avx2_psrl_w_0( 1090; CHECK-NEXT: ret <16 x i16> [[V:%.*]] 1091; 1092 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> zeroinitializer) 1093 ret <16 x i16> %1 1094} 1095 1096define <16 x i16> @avx2_psrl_w_15(<16 x i16> %v) { 1097; CHECK-LABEL: @avx2_psrl_w_15( 1098; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], splat (i16 15) 1099; CHECK-NEXT: ret <16 x i16> [[TMP1]] 1100; 1101 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 1102 ret <16 x i16> %1 1103} 1104 1105define <16 x i16> @avx2_psrl_w_15_splat(<16 x i16> %v) { 1106; CHECK-LABEL: @avx2_psrl_w_15_splat( 1107; CHECK-NEXT: ret <16 x i16> zeroinitializer 1108; 1109 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>) 1110 ret <16 x i16> %1 1111} 1112 1113define <16 x i16> @avx2_psrl_w_64(<16 x i16> %v) { 1114; CHECK-LABEL: @avx2_psrl_w_64( 1115; CHECK-NEXT: ret <16 x i16> zeroinitializer 1116; 1117 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 1118 ret <16 x i16> %1 1119} 1120 1121define <8 x i32> @avx2_psrl_d_0(<8 x i32> %v) { 1122; CHECK-LABEL: @avx2_psrl_d_0( 1123; CHECK-NEXT: ret <8 x i32> [[V:%.*]] 1124; 1125 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> zeroinitializer) 1126 ret <8 x i32> %1 1127} 1128 1129define <8 x i32> @avx2_psrl_d_15(<8 x i32> %v) { 1130; CHECK-LABEL: @avx2_psrl_d_15( 1131; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], splat (i32 15) 1132; CHECK-NEXT: ret <8 x i32> [[TMP1]] 1133; 1134 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>) 1135 ret <8 x i32> %1 1136} 1137 1138define <8 x i32> @avx2_psrl_d_15_splat(<8 x i32> %v) { 1139; CHECK-LABEL: @avx2_psrl_d_15_splat( 1140; CHECK-NEXT: ret <8 x i32> zeroinitializer 1141; 1142 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>) 1143 ret <8 x i32> %1 1144} 1145 1146define <8 x i32> @avx2_psrl_d_64(<8 x i32> %v) { 1147; CHECK-LABEL: @avx2_psrl_d_64( 1148; CHECK-NEXT: ret <8 x i32> zeroinitializer 1149; 1150 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>) 1151 ret <8 x i32> %1 1152} 1153 1154define <4 x i64> @avx2_psrl_q_0(<4 x i64> %v) { 1155; CHECK-LABEL: @avx2_psrl_q_0( 1156; CHECK-NEXT: ret <4 x i64> [[V:%.*]] 1157; 1158 %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> zeroinitializer) 1159 ret <4 x i64> %1 1160} 1161 1162define <4 x i64> @avx2_psrl_q_15(<4 x i64> %v) { 1163; CHECK-LABEL: @avx2_psrl_q_15( 1164; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], splat (i64 15) 1165; CHECK-NEXT: ret <4 x i64> [[TMP1]] 1166; 1167 %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>) 1168 ret <4 x i64> %1 1169} 1170 1171define <4 x i64> @avx2_psrl_q_64(<4 x i64> %v) { 1172; CHECK-LABEL: @avx2_psrl_q_64( 1173; CHECK-NEXT: ret <4 x i64> zeroinitializer 1174; 1175 %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>) 1176 ret <4 x i64> %1 1177} 1178 1179define <32 x i16> @avx512_psrl_w_512_0(<32 x i16> %v) { 1180; CHECK-LABEL: @avx512_psrl_w_512_0( 1181; CHECK-NEXT: ret <32 x i16> [[V:%.*]] 1182; 1183 %1 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> zeroinitializer) 1184 ret <32 x i16> %1 1185} 1186 1187define <32 x i16> @avx512_psrl_w_512_15(<32 x i16> %v) { 1188; CHECK-LABEL: @avx512_psrl_w_512_15( 1189; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], splat (i16 15) 1190; CHECK-NEXT: ret <32 x i16> [[TMP1]] 1191; 1192 %1 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 1193 ret <32 x i16> %1 1194} 1195 1196define <32 x i16> @avx512_psrl_w_512_15_splat(<32 x i16> %v) { 1197; CHECK-LABEL: @avx512_psrl_w_512_15_splat( 1198; CHECK-NEXT: ret <32 x i16> zeroinitializer 1199; 1200 %1 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>) 1201 ret <32 x i16> %1 1202} 1203 1204define <32 x i16> @avx512_psrl_w_512_64(<32 x i16> %v) { 1205; CHECK-LABEL: @avx512_psrl_w_512_64( 1206; CHECK-NEXT: ret <32 x i16> zeroinitializer 1207; 1208 %1 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 1209 ret <32 x i16> %1 1210} 1211 1212define <16 x i32> @avx512_psrl_d_512_0(<16 x i32> %v) { 1213; CHECK-LABEL: @avx512_psrl_d_512_0( 1214; CHECK-NEXT: ret <16 x i32> [[V:%.*]] 1215; 1216 %1 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> zeroinitializer) 1217 ret <16 x i32> %1 1218} 1219 1220define <16 x i32> @avx512_psrl_d_512_15(<16 x i32> %v) { 1221; CHECK-LABEL: @avx512_psrl_d_512_15( 1222; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], splat (i32 15) 1223; CHECK-NEXT: ret <16 x i32> [[TMP1]] 1224; 1225 %1 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>) 1226 ret <16 x i32> %1 1227} 1228 1229define <16 x i32> @avx512_psrl_d_512_15_splat(<16 x i32> %v) { 1230; CHECK-LABEL: @avx512_psrl_d_512_15_splat( 1231; CHECK-NEXT: ret <16 x i32> zeroinitializer 1232; 1233 %1 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>) 1234 ret <16 x i32> %1 1235} 1236 1237define <16 x i32> @avx512_psrl_d_512_64(<16 x i32> %v) { 1238; CHECK-LABEL: @avx512_psrl_d_512_64( 1239; CHECK-NEXT: ret <16 x i32> zeroinitializer 1240; 1241 %1 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>) 1242 ret <16 x i32> %1 1243} 1244 1245define <8 x i64> @avx512_psrl_q_512_0(<8 x i64> %v) { 1246; CHECK-LABEL: @avx512_psrl_q_512_0( 1247; CHECK-NEXT: ret <8 x i64> [[V:%.*]] 1248; 1249 %1 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> zeroinitializer) 1250 ret <8 x i64> %1 1251} 1252 1253define <8 x i64> @avx512_psrl_q_512_15(<8 x i64> %v) { 1254; CHECK-LABEL: @avx512_psrl_q_512_15( 1255; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], splat (i64 15) 1256; CHECK-NEXT: ret <8 x i64> [[TMP1]] 1257; 1258 %1 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> <i64 15, i64 9999>) 1259 ret <8 x i64> %1 1260} 1261 1262define <8 x i64> @avx512_psrl_q_512_64(<8 x i64> %v) { 1263; CHECK-LABEL: @avx512_psrl_q_512_64( 1264; CHECK-NEXT: ret <8 x i64> zeroinitializer 1265; 1266 %1 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> <i64 64, i64 9999>) 1267 ret <8 x i64> %1 1268} 1269 1270; 1271; SHL - Constant Vector 1272; 1273 1274define <8 x i16> @sse2_psll_w_0(<8 x i16> %v) { 1275; CHECK-LABEL: @sse2_psll_w_0( 1276; CHECK-NEXT: ret <8 x i16> [[V:%.*]] 1277; 1278 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> zeroinitializer) 1279 ret <8 x i16> %1 1280} 1281 1282define <8 x i16> @sse2_psll_w_15(<8 x i16> %v) { 1283; CHECK-LABEL: @sse2_psll_w_15( 1284; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], splat (i16 15) 1285; CHECK-NEXT: ret <8 x i16> [[TMP1]] 1286; 1287 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 1288 ret <8 x i16> %1 1289} 1290 1291define <8 x i16> @sse2_psll_w_15_splat(<8 x i16> %v) { 1292; CHECK-LABEL: @sse2_psll_w_15_splat( 1293; CHECK-NEXT: ret <8 x i16> zeroinitializer 1294; 1295 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>) 1296 ret <8 x i16> %1 1297} 1298 1299define <8 x i16> @sse2_psll_w_64(<8 x i16> %v) { 1300; CHECK-LABEL: @sse2_psll_w_64( 1301; CHECK-NEXT: ret <8 x i16> zeroinitializer 1302; 1303 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 1304 ret <8 x i16> %1 1305} 1306 1307define <4 x i32> @sse2_psll_d_0(<4 x i32> %v) { 1308; CHECK-LABEL: @sse2_psll_d_0( 1309; CHECK-NEXT: ret <4 x i32> [[V:%.*]] 1310; 1311 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> zeroinitializer) 1312 ret <4 x i32> %1 1313} 1314 1315define <4 x i32> @sse2_psll_d_15(<4 x i32> %v) { 1316; CHECK-LABEL: @sse2_psll_d_15( 1317; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 15) 1318; CHECK-NEXT: ret <4 x i32> [[TMP1]] 1319; 1320 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>) 1321 ret <4 x i32> %1 1322} 1323 1324define <4 x i32> @sse2_psll_d_15_splat(<4 x i32> %v) { 1325; CHECK-LABEL: @sse2_psll_d_15_splat( 1326; CHECK-NEXT: ret <4 x i32> zeroinitializer 1327; 1328 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>) 1329 ret <4 x i32> %1 1330} 1331 1332define <4 x i32> @sse2_psll_d_64(<4 x i32> %v) { 1333; CHECK-LABEL: @sse2_psll_d_64( 1334; CHECK-NEXT: ret <4 x i32> zeroinitializer 1335; 1336 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>) 1337 ret <4 x i32> %1 1338} 1339 1340define <2 x i64> @sse2_psll_q_0(<2 x i64> %v) { 1341; CHECK-LABEL: @sse2_psll_q_0( 1342; CHECK-NEXT: ret <2 x i64> [[V:%.*]] 1343; 1344 %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> zeroinitializer) 1345 ret <2 x i64> %1 1346} 1347 1348define <2 x i64> @sse2_psll_q_15(<2 x i64> %v) { 1349; CHECK-LABEL: @sse2_psll_q_15( 1350; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], splat (i64 15) 1351; CHECK-NEXT: ret <2 x i64> [[TMP1]] 1352; 1353 %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>) 1354 ret <2 x i64> %1 1355} 1356 1357define <2 x i64> @sse2_psll_q_64(<2 x i64> %v) { 1358; CHECK-LABEL: @sse2_psll_q_64( 1359; CHECK-NEXT: ret <2 x i64> zeroinitializer 1360; 1361 %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>) 1362 ret <2 x i64> %1 1363} 1364 1365define <16 x i16> @avx2_psll_w_0(<16 x i16> %v) { 1366; CHECK-LABEL: @avx2_psll_w_0( 1367; CHECK-NEXT: ret <16 x i16> [[V:%.*]] 1368; 1369 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> zeroinitializer) 1370 ret <16 x i16> %1 1371} 1372 1373define <16 x i16> @avx2_psll_w_15(<16 x i16> %v) { 1374; CHECK-LABEL: @avx2_psll_w_15( 1375; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], splat (i16 15) 1376; CHECK-NEXT: ret <16 x i16> [[TMP1]] 1377; 1378 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 1379 ret <16 x i16> %1 1380} 1381 1382define <16 x i16> @avx2_psll_w_15_splat(<16 x i16> %v) { 1383; CHECK-LABEL: @avx2_psll_w_15_splat( 1384; CHECK-NEXT: ret <16 x i16> zeroinitializer 1385; 1386 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>) 1387 ret <16 x i16> %1 1388} 1389 1390define <16 x i16> @avx2_psll_w_64(<16 x i16> %v) { 1391; CHECK-LABEL: @avx2_psll_w_64( 1392; CHECK-NEXT: ret <16 x i16> zeroinitializer 1393; 1394 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 1395 ret <16 x i16> %1 1396} 1397 1398define <8 x i32> @avx2_psll_d_0(<8 x i32> %v) { 1399; CHECK-LABEL: @avx2_psll_d_0( 1400; CHECK-NEXT: ret <8 x i32> [[V:%.*]] 1401; 1402 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> zeroinitializer) 1403 ret <8 x i32> %1 1404} 1405 1406define <8 x i32> @avx2_psll_d_15(<8 x i32> %v) { 1407; CHECK-LABEL: @avx2_psll_d_15( 1408; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], splat (i32 15) 1409; CHECK-NEXT: ret <8 x i32> [[TMP1]] 1410; 1411 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>) 1412 ret <8 x i32> %1 1413} 1414 1415define <8 x i32> @avx2_psll_d_15_splat(<8 x i32> %v) { 1416; CHECK-LABEL: @avx2_psll_d_15_splat( 1417; CHECK-NEXT: ret <8 x i32> zeroinitializer 1418; 1419 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>) 1420 ret <8 x i32> %1 1421} 1422 1423define <8 x i32> @avx2_psll_d_64(<8 x i32> %v) { 1424; CHECK-LABEL: @avx2_psll_d_64( 1425; CHECK-NEXT: ret <8 x i32> zeroinitializer 1426; 1427 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>) 1428 ret <8 x i32> %1 1429} 1430 1431define <4 x i64> @avx2_psll_q_0(<4 x i64> %v) { 1432; CHECK-LABEL: @avx2_psll_q_0( 1433; CHECK-NEXT: ret <4 x i64> [[V:%.*]] 1434; 1435 %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> zeroinitializer) 1436 ret <4 x i64> %1 1437} 1438 1439define <4 x i64> @avx2_psll_q_15(<4 x i64> %v) { 1440; CHECK-LABEL: @avx2_psll_q_15( 1441; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], splat (i64 15) 1442; CHECK-NEXT: ret <4 x i64> [[TMP1]] 1443; 1444 %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>) 1445 ret <4 x i64> %1 1446} 1447 1448define <4 x i64> @avx2_psll_q_64(<4 x i64> %v) { 1449; CHECK-LABEL: @avx2_psll_q_64( 1450; CHECK-NEXT: ret <4 x i64> zeroinitializer 1451; 1452 %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>) 1453 ret <4 x i64> %1 1454} 1455 1456define <32 x i16> @avx512_psll_w_512_0(<32 x i16> %v) { 1457; CHECK-LABEL: @avx512_psll_w_512_0( 1458; CHECK-NEXT: ret <32 x i16> [[V:%.*]] 1459; 1460 %1 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> zeroinitializer) 1461 ret <32 x i16> %1 1462} 1463 1464define <32 x i16> @avx512_psll_w_512_15(<32 x i16> %v) { 1465; CHECK-LABEL: @avx512_psll_w_512_15( 1466; CHECK-NEXT: [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], splat (i16 15) 1467; CHECK-NEXT: ret <32 x i16> [[TMP1]] 1468; 1469 %1 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 1470 ret <32 x i16> %1 1471} 1472 1473define <32 x i16> @avx512_psll_w_15_512_splat(<32 x i16> %v) { 1474; CHECK-LABEL: @avx512_psll_w_15_512_splat( 1475; CHECK-NEXT: ret <32 x i16> zeroinitializer 1476; 1477 %1 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>) 1478 ret <32 x i16> %1 1479} 1480 1481define <32 x i16> @avx512_psll_w_512_64(<32 x i16> %v) { 1482; CHECK-LABEL: @avx512_psll_w_512_64( 1483; CHECK-NEXT: ret <32 x i16> zeroinitializer 1484; 1485 %1 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>) 1486 ret <32 x i16> %1 1487} 1488 1489define <16 x i32> @avx512_psll_d_512_0(<16 x i32> %v) { 1490; CHECK-LABEL: @avx512_psll_d_512_0( 1491; CHECK-NEXT: ret <16 x i32> [[V:%.*]] 1492; 1493 %1 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> zeroinitializer) 1494 ret <16 x i32> %1 1495} 1496 1497define <16 x i32> @avx512_psll_d_512_15(<16 x i32> %v) { 1498; CHECK-LABEL: @avx512_psll_d_512_15( 1499; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], splat (i32 15) 1500; CHECK-NEXT: ret <16 x i32> [[TMP1]] 1501; 1502 %1 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>) 1503 ret <16 x i32> %1 1504} 1505 1506define <16 x i32> @avx512_psll_d_512_15_splat(<16 x i32> %v) { 1507; CHECK-LABEL: @avx512_psll_d_512_15_splat( 1508; CHECK-NEXT: ret <16 x i32> zeroinitializer 1509; 1510 %1 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>) 1511 ret <16 x i32> %1 1512} 1513 1514define <16 x i32> @avx512_psll_d_512_64(<16 x i32> %v) { 1515; CHECK-LABEL: @avx512_psll_d_512_64( 1516; CHECK-NEXT: ret <16 x i32> zeroinitializer 1517; 1518 %1 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>) 1519 ret <16 x i32> %1 1520} 1521 1522define <8 x i64> @avx512_psll_q_512_0(<8 x i64> %v) { 1523; CHECK-LABEL: @avx512_psll_q_512_0( 1524; CHECK-NEXT: ret <8 x i64> [[V:%.*]] 1525; 1526 %1 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> zeroinitializer) 1527 ret <8 x i64> %1 1528} 1529 1530define <8 x i64> @avx512_psll_q_512_15(<8 x i64> %v) { 1531; CHECK-LABEL: @avx512_psll_q_512_15( 1532; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], splat (i64 15) 1533; CHECK-NEXT: ret <8 x i64> [[TMP1]] 1534; 1535 %1 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> <i64 15, i64 9999>) 1536 ret <8 x i64> %1 1537} 1538 1539define <8 x i64> @avx512_psll_q_512_64(<8 x i64> %v) { 1540; CHECK-LABEL: @avx512_psll_q_512_64( 1541; CHECK-NEXT: ret <8 x i64> zeroinitializer 1542; 1543 %1 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> <i64 64, i64 9999>) 1544 ret <8 x i64> %1 1545} 1546 1547; 1548; ASHR - Constant Per-Element Vector 1549; 1550 1551define <4 x i32> @avx2_psrav_d_128_0(<4 x i32> %v) { 1552; CHECK-LABEL: @avx2_psrav_d_128_0( 1553; CHECK-NEXT: ret <4 x i32> [[V:%.*]] 1554; 1555 %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> zeroinitializer) 1556 ret <4 x i32> %1 1557} 1558 1559define <8 x i32> @avx2_psrav_d_256_0(<8 x i32> %v) { 1560; CHECK-LABEL: @avx2_psrav_d_256_0( 1561; CHECK-NEXT: ret <8 x i32> [[V:%.*]] 1562; 1563 %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> zeroinitializer) 1564 ret <8 x i32> %1 1565} 1566 1567define <16 x i32> @avx512_psrav_d_512_0(<16 x i32> %v) { 1568; CHECK-LABEL: @avx512_psrav_d_512_0( 1569; CHECK-NEXT: ret <16 x i32> [[V:%.*]] 1570; 1571 %1 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %v, <16 x i32> zeroinitializer) 1572 ret <16 x i32> %1 1573} 1574 1575define <4 x i32> @avx2_psrav_d_128_var(<4 x i32> %v) { 1576; CHECK-LABEL: @avx2_psrav_d_128_var( 1577; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], <i32 0, i32 8, i32 16, i32 31> 1578; CHECK-NEXT: ret <4 x i32> [[TMP1]] 1579; 1580 %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>) 1581 ret <4 x i32> %1 1582} 1583 1584define <8 x i32> @avx2_psrav_d_256_var(<8 x i32> %v) { 1585; CHECK-LABEL: @avx2_psrav_d_256_var( 1586; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0> 1587; CHECK-NEXT: ret <8 x i32> [[TMP1]] 1588; 1589 %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>) 1590 ret <8 x i32> %1 1591} 1592 1593define <16 x i32> @avx512_psrav_d_512_var(<16 x i32> %v) { 1594; CHECK-LABEL: @avx512_psrav_d_512_var( 1595; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0> 1596; CHECK-NEXT: ret <16 x i32> [[TMP1]] 1597; 1598 %1 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>) 1599 ret <16 x i32> %1 1600} 1601 1602define <4 x i32> @avx2_psrav_d_128_allbig(<4 x i32> %v) { 1603; CHECK-LABEL: @avx2_psrav_d_128_allbig( 1604; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], <i32 31, i32 31, i32 31, i32 undef> 1605; CHECK-NEXT: ret <4 x i32> [[TMP1]] 1606; 1607 %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>) 1608 ret <4 x i32> %1 1609} 1610 1611define <8 x i32> @avx2_psrav_d_256_allbig(<8 x i32> %v) { 1612; CHECK-LABEL: @avx2_psrav_d_256_allbig( 1613; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], <i32 undef, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 1614; CHECK-NEXT: ret <8 x i32> [[TMP1]] 1615; 1616 %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>) 1617 ret <8 x i32> %1 1618} 1619 1620define <16 x i32> @avx512_psrav_d_512_allbig(<16 x i32> %v) { 1621; CHECK-LABEL: @avx512_psrav_d_512_allbig( 1622; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], <i32 undef, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 undef, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 1623; CHECK-NEXT: ret <16 x i32> [[TMP1]] 1624; 1625 %1 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %v, <16 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555, i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>) 1626 ret <16 x i32> %1 1627} 1628 1629define <4 x i32> @avx2_psrav_d_128_undef(<4 x i32> %v) { 1630; CHECK-LABEL: @avx2_psrav_d_128_undef( 1631; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], <i32 undef, i32 8, i32 16, i32 31> 1632; CHECK-NEXT: ret <4 x i32> [[TMP1]] 1633; 1634 %1 = insertelement <4 x i32> <i32 0, i32 8, i32 16, i32 64>, i32 undef, i32 0 1635 %2 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> %1) 1636 ret <4 x i32> %2 1637} 1638 1639define <8 x i32> @avx2_psrav_d_256_undef(<8 x i32> %v) { 1640; CHECK-LABEL: @avx2_psrav_d_256_undef( 1641; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], <i32 0, i32 undef, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0> 1642; CHECK-NEXT: ret <8 x i32> [[TMP1]] 1643; 1644 %1 = insertelement <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>, i32 undef, i32 1 1645 %2 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> %1) 1646 ret <8 x i32> %2 1647} 1648 1649define <16 x i32> @avx512_psrav_d_512_undef(<16 x i32> %v) { 1650; CHECK-LABEL: @avx512_psrav_d_512_undef( 1651; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], <i32 0, i32 undef, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0> 1652; CHECK-NEXT: ret <16 x i32> [[TMP1]] 1653; 1654 %1 = insertelement <16 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>, i32 undef, i32 1 1655 %2 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %v, <16 x i32> %1) 1656 ret <16 x i32> %2 1657} 1658 1659define <2 x i64> @avx512_psrav_q_128_0(<2 x i64> %v) { 1660; CHECK-LABEL: @avx512_psrav_q_128_0( 1661; CHECK-NEXT: ret <2 x i64> [[V:%.*]] 1662; 1663 %1 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %v, <2 x i64> zeroinitializer) 1664 ret <2 x i64> %1 1665} 1666 1667define <4 x i64> @avx512_psrav_q_256_0(<4 x i64> %v) { 1668; CHECK-LABEL: @avx512_psrav_q_256_0( 1669; CHECK-NEXT: ret <4 x i64> [[V:%.*]] 1670; 1671 %1 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %v, <4 x i64> zeroinitializer) 1672 ret <4 x i64> %1 1673} 1674 1675define <2 x i64> @avx512_psrav_q_128_var(<2 x i64> %v) { 1676; CHECK-LABEL: @avx512_psrav_q_128_var( 1677; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], <i64 0, i64 8> 1678; CHECK-NEXT: ret <2 x i64> [[TMP1]] 1679; 1680 %1 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %v, <2 x i64> <i64 0, i64 8>) 1681 ret <2 x i64> %1 1682} 1683 1684define <4 x i64> @avx512_psrav_q_256_var(<4 x i64> %v) { 1685; CHECK-LABEL: @avx512_psrav_q_256_var( 1686; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], <i64 0, i64 8, i64 16, i64 31> 1687; CHECK-NEXT: ret <4 x i64> [[TMP1]] 1688; 1689 %1 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 31>) 1690 ret <4 x i64> %1 1691} 1692 1693define <2 x i64> @avx512_psrav_q_128_allbig(<2 x i64> %v) { 1694; CHECK-LABEL: @avx512_psrav_q_128_allbig( 1695; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], <i64 63, i64 undef> 1696; CHECK-NEXT: ret <2 x i64> [[TMP1]] 1697; 1698 %1 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %v, <2 x i64> <i64 64, i64 undef>) 1699 ret <2 x i64> %1 1700} 1701 1702define <4 x i64> @avx512_psrav_q_256_allbig(<4 x i64> %v) { 1703; CHECK-LABEL: @avx512_psrav_q_256_allbig( 1704; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], <i64 63, i64 undef, i64 63, i64 63> 1705; CHECK-NEXT: ret <4 x i64> [[TMP1]] 1706; 1707 %1 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %v, <4 x i64> <i64 64, i64 undef, i64 -128, i64 -60>) 1708 ret <4 x i64> %1 1709} 1710 1711define <2 x i64> @avx512_psrav_q_128_undef(<2 x i64> %v) { 1712; CHECK-LABEL: @avx512_psrav_q_128_undef( 1713; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], <i64 undef, i64 8> 1714; CHECK-NEXT: ret <2 x i64> [[TMP1]] 1715; 1716 %1 = insertelement <2 x i64> <i64 0, i64 8>, i64 undef, i64 0 1717 %2 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %v, <2 x i64> %1) 1718 ret <2 x i64> %2 1719} 1720 1721define <4 x i64> @avx512_psrav_q_256_undef(<4 x i64> %v) { 1722; CHECK-LABEL: @avx512_psrav_q_256_undef( 1723; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], <i64 undef, i64 8, i64 16, i64 31> 1724; CHECK-NEXT: ret <4 x i64> [[TMP1]] 1725; 1726 %1 = insertelement <4 x i64> <i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0 1727 %2 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %v, <4 x i64> %1) 1728 ret <4 x i64> %2 1729} 1730 1731define <8 x i64> @avx512_psrav_q_512_0(<8 x i64> %v) { 1732; CHECK-LABEL: @avx512_psrav_q_512_0( 1733; CHECK-NEXT: ret <8 x i64> [[V:%.*]] 1734; 1735 %1 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %v, <8 x i64> zeroinitializer) 1736 ret <8 x i64> %1 1737} 1738 1739define <8 x i64> @avx512_psrav_q_512_var(<8 x i64> %v) { 1740; CHECK-LABEL: @avx512_psrav_q_512_var( 1741; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31> 1742; CHECK-NEXT: ret <8 x i64> [[TMP1]] 1743; 1744 %1 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>) 1745 ret <8 x i64> %1 1746} 1747 1748define <8 x i64> @avx512_psrav_q_512_allbig(<8 x i64> %v) { 1749; CHECK-LABEL: @avx512_psrav_q_512_allbig( 1750; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], <i64 63, i64 undef, i64 63, i64 63, i64 63, i64 undef, i64 63, i64 63> 1751; CHECK-NEXT: ret <8 x i64> [[TMP1]] 1752; 1753 %1 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %v, <8 x i64> <i64 64, i64 undef, i64 -128, i64 -60, i64 64, i64 undef, i64 -128, i64 -60>) 1754 ret <8 x i64> %1 1755} 1756 1757define <8 x i64> @avx512_psrav_q_512_undef(<8 x i64> %v) { 1758; CHECK-LABEL: @avx512_psrav_q_512_undef( 1759; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], <i64 undef, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31> 1760; CHECK-NEXT: ret <8 x i64> [[TMP1]] 1761; 1762 %1 = insertelement <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0 1763 %2 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %v, <8 x i64> %1) 1764 ret <8 x i64> %2 1765} 1766 1767define <8 x i16> @avx512_psrav_w_128_0(<8 x i16> %v) { 1768; CHECK-LABEL: @avx512_psrav_w_128_0( 1769; CHECK-NEXT: ret <8 x i16> [[V:%.*]] 1770; 1771 %1 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %v, <8 x i16> zeroinitializer) 1772 ret <8 x i16> %1 1773} 1774 1775define <8 x i16> @avx512_psrav_w_128_var(<8 x i16> %v) { 1776; CHECK-LABEL: @avx512_psrav_w_128_var( 1777; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7> 1778; CHECK-NEXT: ret <8 x i16> [[TMP1]] 1779; 1780 %1 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>) 1781 ret <8 x i16> %1 1782} 1783 1784define <8 x i16> @avx512_psrav_w_128_allbig(<8 x i16> %v) { 1785; CHECK-LABEL: @avx512_psrav_w_128_allbig( 1786; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 undef> 1787; CHECK-NEXT: ret <8 x i16> [[TMP1]] 1788; 1789 %1 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %v, <8 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 undef>) 1790 ret <8 x i16> %1 1791} 1792 1793define <8 x i16> @avx512_psrav_w_128_undef(<8 x i16> %v) { 1794; CHECK-LABEL: @avx512_psrav_w_128_undef( 1795; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7> 1796; CHECK-NEXT: ret <8 x i16> [[TMP1]] 1797; 1798 %1 = insertelement <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, i16 undef, i64 0 1799 %2 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %v, <8 x i16> %1) 1800 ret <8 x i16> %2 1801} 1802 1803define <16 x i16> @avx512_psrav_w_256_0(<16 x i16> %v) { 1804; CHECK-LABEL: @avx512_psrav_w_256_0( 1805; CHECK-NEXT: ret <16 x i16> [[V:%.*]] 1806; 1807 %1 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %v, <16 x i16> zeroinitializer) 1808 ret <16 x i16> %1 1809} 1810 1811define <16 x i16> @avx512_psrav_w_256_var(<16 x i16> %v) { 1812; CHECK-LABEL: @avx512_psrav_w_256_var( 1813; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15> 1814; CHECK-NEXT: ret <16 x i16> [[TMP1]] 1815; 1816 %1 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>) 1817 ret <16 x i16> %1 1818} 1819 1820define <16 x i16> @avx512_psrav_w_256_allbig(<16 x i16> %v) { 1821; CHECK-LABEL: @avx512_psrav_w_256_allbig( 1822; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 undef, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 1823; CHECK-NEXT: ret <16 x i16> [[TMP1]] 1824; 1825 %1 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %v, <16 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 256, i16 16, i16 28, i16 65535, i16 32767>) 1826 ret <16 x i16> %1 1827} 1828 1829define <16 x i16> @avx512_psrav_w_256_undef(<16 x i16> %v) { 1830; CHECK-LABEL: @avx512_psrav_w_256_undef( 1831; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15> 1832; CHECK-NEXT: ret <16 x i16> [[TMP1]] 1833; 1834 %1 = insertelement <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, i16 undef, i64 0 1835 %2 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %v, <16 x i16> %1) 1836 ret <16 x i16> %2 1837} 1838 1839define <32 x i16> @avx512_psrav_w_512_0(<32 x i16> %v) { 1840; CHECK-LABEL: @avx512_psrav_w_512_0( 1841; CHECK-NEXT: ret <32 x i16> [[V:%.*]] 1842; 1843 %1 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> zeroinitializer) 1844 ret <32 x i16> %1 1845} 1846 1847define <32 x i16> @avx512_psrav_w_512_var(<32 x i16> %v) { 1848; CHECK-LABEL: @avx512_psrav_w_512_var( 1849; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0> 1850; CHECK-NEXT: ret <32 x i16> [[TMP1]] 1851; 1852 %1 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>) 1853 ret <32 x i16> %1 1854} 1855 1856define <32 x i16> @avx512_psrav_w_512_allbig(<32 x i16> %v) { 1857; CHECK-LABEL: @avx512_psrav_w_512_allbig( 1858; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 undef, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 undef, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 undef, i16 15, i16 15, i16 undef, i16 15, i16 15> 1859; CHECK-NEXT: ret <32 x i16> [[TMP1]] 1860; 1861 %1 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 128, i16 16, i16 28, i16 65535, i16 32767, i16 56, i16 -14, i16 undef, i16 16, i16 67, i16 567, i16 -32768, i16 4096, i16 8192, i16 -12345, i16 undef, i16 345, i16 123, i16 undef, i16 1024, i16 54321>) 1862 ret <32 x i16> %1 1863} 1864 1865define <32 x i16> @avx512_psrav_w_512_undef(<32 x i16> %v) { 1866; CHECK-LABEL: @avx512_psrav_w_512_undef( 1867; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0> 1868; CHECK-NEXT: ret <32 x i16> [[TMP1]] 1869; 1870 %1 = insertelement <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 undef, i64 0 1871 %2 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> %1) 1872 ret <32 x i16> %2 1873} 1874 1875; 1876; LSHR - Constant Per-Element Vector 1877; 1878 1879define <4 x i32> @avx2_psrlv_d_128_0(<4 x i32> %v) { 1880; CHECK-LABEL: @avx2_psrlv_d_128_0( 1881; CHECK-NEXT: ret <4 x i32> [[V:%.*]] 1882; 1883 %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> zeroinitializer) 1884 ret <4 x i32> %1 1885} 1886 1887define <8 x i32> @avx2_psrlv_d_256_0(<8 x i32> %v) { 1888; CHECK-LABEL: @avx2_psrlv_d_256_0( 1889; CHECK-NEXT: ret <8 x i32> [[V:%.*]] 1890; 1891 %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> zeroinitializer) 1892 ret <8 x i32> %1 1893} 1894 1895define <4 x i32> @avx2_psrlv_d_128_var(<4 x i32> %v) { 1896; CHECK-LABEL: @avx2_psrlv_d_128_var( 1897; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], <i32 0, i32 8, i32 16, i32 31> 1898; CHECK-NEXT: ret <4 x i32> [[TMP1]] 1899; 1900 %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 31>) 1901 ret <4 x i32> %1 1902} 1903 1904define <8 x i32> @avx2_psrlv_d_256_var(<8 x i32> %v) { 1905; CHECK-LABEL: @avx2_psrlv_d_256_var( 1906; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0> 1907; CHECK-NEXT: ret <8 x i32> [[TMP1]] 1908; 1909 %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>) 1910 ret <8 x i32> %1 1911} 1912 1913define <4 x i32> @avx2_psrlv_d_128_big(<4 x i32> %v) { 1914; CHECK-LABEL: @avx2_psrlv_d_128_big( 1915; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> [[V:%.*]], <4 x i32> <i32 0, i32 8, i32 16, i32 64>) 1916; CHECK-NEXT: ret <4 x i32> [[TMP1]] 1917; 1918 %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>) 1919 ret <4 x i32> %1 1920} 1921 1922define <8 x i32> @avx2_psrlv_d_256_big(<8 x i32> %v) { 1923; CHECK-LABEL: @avx2_psrlv_d_256_big( 1924; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> [[V:%.*]], <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>) 1925; CHECK-NEXT: ret <8 x i32> [[TMP1]] 1926; 1927 %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>) 1928 ret <8 x i32> %1 1929} 1930 1931define <4 x i32> @avx2_psrlv_d_128_allbig(<4 x i32> %v) { 1932; CHECK-LABEL: @avx2_psrlv_d_128_allbig( 1933; CHECK-NEXT: ret <4 x i32> <i32 0, i32 0, i32 0, i32 undef> 1934; 1935 %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>) 1936 ret <4 x i32> %1 1937} 1938 1939define <8 x i32> @avx2_psrlv_d_256_allbig(<8 x i32> %v) { 1940; CHECK-LABEL: @avx2_psrlv_d_256_allbig( 1941; CHECK-NEXT: ret <8 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1942; 1943 %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>) 1944 ret <8 x i32> %1 1945} 1946 1947define <4 x i32> @avx2_psrlv_d_128_undef(<4 x i32> %v) { 1948; CHECK-LABEL: @avx2_psrlv_d_128_undef( 1949; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], <i32 undef, i32 8, i32 16, i32 31> 1950; CHECK-NEXT: ret <4 x i32> [[TMP1]] 1951; 1952 %1 = insertelement <4 x i32> <i32 0, i32 8, i32 16, i32 31>, i32 undef, i32 0 1953 %2 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> %1) 1954 ret <4 x i32> %2 1955} 1956 1957define <8 x i32> @avx2_psrlv_d_256_undef(<8 x i32> %v) { 1958; CHECK-LABEL: @avx2_psrlv_d_256_undef( 1959; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0> 1960; CHECK-NEXT: ret <8 x i32> [[TMP1]] 1961; 1962 %1 = insertelement <8 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1 1963 %2 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> %1) 1964 ret <8 x i32> %2 1965} 1966 1967define <2 x i64> @avx2_psrlv_q_128_0(<2 x i64> %v) { 1968; CHECK-LABEL: @avx2_psrlv_q_128_0( 1969; CHECK-NEXT: ret <2 x i64> [[V:%.*]] 1970; 1971 %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> zeroinitializer) 1972 ret <2 x i64> %1 1973} 1974 1975define <4 x i64> @avx2_psrlv_q_256_0(<4 x i64> %v) { 1976; CHECK-LABEL: @avx2_psrlv_q_256_0( 1977; CHECK-NEXT: ret <4 x i64> [[V:%.*]] 1978; 1979 %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> zeroinitializer) 1980 ret <4 x i64> %1 1981} 1982 1983define <2 x i64> @avx2_psrlv_q_128_var(<2 x i64> %v) { 1984; CHECK-LABEL: @avx2_psrlv_q_128_var( 1985; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], <i64 0, i64 8> 1986; CHECK-NEXT: ret <2 x i64> [[TMP1]] 1987; 1988 %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 8>) 1989 ret <2 x i64> %1 1990} 1991 1992define <4 x i64> @avx2_psrlv_q_256_var(<4 x i64> %v) { 1993; CHECK-LABEL: @avx2_psrlv_q_256_var( 1994; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], <i64 0, i64 8, i64 16, i64 31> 1995; CHECK-NEXT: ret <4 x i64> [[TMP1]] 1996; 1997 %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 31>) 1998 ret <4 x i64> %1 1999} 2000 2001define <2 x i64> @avx2_psrlv_q_128_big(<2 x i64> %v) { 2002; CHECK-LABEL: @avx2_psrlv_q_128_big( 2003; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> [[V:%.*]], <2 x i64> <i64 0, i64 128>) 2004; CHECK-NEXT: ret <2 x i64> [[TMP1]] 2005; 2006 %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 128>) 2007 ret <2 x i64> %1 2008} 2009 2010define <4 x i64> @avx2_psrlv_q_256_big(<4 x i64> %v) { 2011; CHECK-LABEL: @avx2_psrlv_q_256_big( 2012; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> [[V:%.*]], <4 x i64> <i64 0, i64 8, i64 16, i64 64>) 2013; CHECK-NEXT: ret <4 x i64> [[TMP1]] 2014; 2015 %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 64>) 2016 ret <4 x i64> %1 2017} 2018 2019define <2 x i64> @avx2_psrlv_q_128_allbig(<2 x i64> %v) { 2020; CHECK-LABEL: @avx2_psrlv_q_128_allbig( 2021; CHECK-NEXT: ret <2 x i64> zeroinitializer 2022; 2023 %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 128, i64 -64>) 2024 ret <2 x i64> %1 2025} 2026 2027define <4 x i64> @avx2_psrlv_q_256_allbig(<4 x i64> %v) { 2028; CHECK-LABEL: @avx2_psrlv_q_256_allbig( 2029; CHECK-NEXT: ret <4 x i64> <i64 0, i64 undef, i64 0, i64 0> 2030; 2031 %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 64, i64 undef, i64 -128, i64 -60>) 2032 ret <4 x i64> %1 2033} 2034 2035; The shift amount is 0 (the poison lane could be 0), so we return the unshifted input. 2036 2037define <2 x i64> @avx2_psrlv_q_128_poison(<2 x i64> %v) { 2038; CHECK-LABEL: @avx2_psrlv_q_128_poison( 2039; CHECK-NEXT: ret <2 x i64> [[V:%.*]] 2040; 2041 %1 = insertelement <2 x i64> <i64 0, i64 8>, i64 poison, i64 1 2042 %2 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> %1) 2043 ret <2 x i64> %2 2044} 2045 2046define <4 x i64> @avx2_psrlv_q_256_poison(<4 x i64> %v) { 2047; CHECK-LABEL: @avx2_psrlv_q_256_poison( 2048; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], <i64 poison, i64 8, i64 16, i64 31> 2049; CHECK-NEXT: ret <4 x i64> [[TMP1]] 2050; 2051 %1 = insertelement <4 x i64> <i64 0, i64 8, i64 16, i64 31>, i64 poison, i64 0 2052 %2 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> %1) 2053 ret <4 x i64> %2 2054} 2055 2056define <16 x i32> @avx2_psrlv_d_512_0(<16 x i32> %v) { 2057; CHECK-LABEL: @avx2_psrlv_d_512_0( 2058; CHECK-NEXT: ret <16 x i32> [[V:%.*]] 2059; 2060 %1 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> zeroinitializer) 2061 ret <16 x i32> %1 2062} 2063 2064define <16 x i32> @avx512_psrlv_d_512_var(<16 x i32> %v) { 2065; CHECK-LABEL: @avx512_psrlv_d_512_var( 2066; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0> 2067; CHECK-NEXT: ret <16 x i32> [[TMP1]] 2068; 2069 %1 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>) 2070 ret <16 x i32> %1 2071} 2072 2073define <16 x i32> @avx512_psrlv_d_512_big(<16 x i32> %v) { 2074; CHECK-LABEL: @avx512_psrlv_d_512_big( 2075; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> [[V:%.*]], <16 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>) 2076; CHECK-NEXT: ret <16 x i32> [[TMP1]] 2077; 2078 %1 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>) 2079 ret <16 x i32> %1 2080} 2081 2082define <16 x i32> @avx512_psrlv_d_512_allbig(<16 x i32> %v) { 2083; CHECK-LABEL: @avx512_psrlv_d_512_allbig( 2084; CHECK-NEXT: ret <16 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2085; 2086 %1 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555, i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>) 2087 ret <16 x i32> %1 2088} 2089 2090define <16 x i32> @avx512_psrlv_d_512_undef(<16 x i32> %v) { 2091; CHECK-LABEL: @avx512_psrlv_d_512_undef( 2092; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0> 2093; CHECK-NEXT: ret <16 x i32> [[TMP1]] 2094; 2095 %1 = insertelement <16 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1 2096 %2 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> %1) 2097 ret <16 x i32> %2 2098} 2099 2100define <8 x i64> @avx512_psrlv_q_512_0(<8 x i64> %v) { 2101; CHECK-LABEL: @avx512_psrlv_q_512_0( 2102; CHECK-NEXT: ret <8 x i64> [[V:%.*]] 2103; 2104 %1 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> zeroinitializer) 2105 ret <8 x i64> %1 2106} 2107 2108define <8 x i64> @avx512_psrlv_q_512_var(<8 x i64> %v) { 2109; CHECK-LABEL: @avx512_psrlv_q_512_var( 2110; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31> 2111; CHECK-NEXT: ret <8 x i64> [[TMP1]] 2112; 2113 %1 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>) 2114 ret <8 x i64> %1 2115} 2116 2117define <8 x i64> @avx512_psrlv_q_512_big(<8 x i64> %v) { 2118; CHECK-LABEL: @avx512_psrlv_q_512_big( 2119; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> [[V:%.*]], <8 x i64> <i64 0, i64 8, i64 16, i64 64, i64 0, i64 8, i64 16, i64 64>) 2120; CHECK-NEXT: ret <8 x i64> [[TMP1]] 2121; 2122 %1 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 64, i64 0, i64 8, i64 16, i64 64>) 2123 ret <8 x i64> %1 2124} 2125 2126define <8 x i64> @avx512_psrlv_q_512_allbig(<8 x i64> %v) { 2127; CHECK-LABEL: @avx512_psrlv_q_512_allbig( 2128; CHECK-NEXT: ret <8 x i64> <i64 0, i64 undef, i64 0, i64 0, i64 0, i64 undef, i64 0, i64 0> 2129; 2130 %1 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> <i64 64, i64 undef, i64 -128, i64 -60, i64 64, i64 undef, i64 -128, i64 -60>) 2131 ret <8 x i64> %1 2132} 2133 2134define <8 x i64> @avx512_psrlv_q_512_undef(<8 x i64> %v) { 2135; CHECK-LABEL: @avx512_psrlv_q_512_undef( 2136; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], <i64 undef, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31> 2137; CHECK-NEXT: ret <8 x i64> [[TMP1]] 2138; 2139 %1 = insertelement <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0 2140 %2 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> %1) 2141 ret <8 x i64> %2 2142} 2143 2144define <8 x i16> @avx512_psrlv_w_128_0(<8 x i16> %v) { 2145; CHECK-LABEL: @avx512_psrlv_w_128_0( 2146; CHECK-NEXT: ret <8 x i16> [[V:%.*]] 2147; 2148 %1 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> zeroinitializer) 2149 ret <8 x i16> %1 2150} 2151 2152define <8 x i16> @avx512_psrlv_w_128_var(<8 x i16> %v) { 2153; CHECK-LABEL: @avx512_psrlv_w_128_var( 2154; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7> 2155; CHECK-NEXT: ret <8 x i16> [[TMP1]] 2156; 2157 %1 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>) 2158 ret <8 x i16> %1 2159} 2160 2161define <8 x i16> @avx512_psrlv_w_128_big(<8 x i16> %v) { 2162; CHECK-LABEL: @avx512_psrlv_w_128_big( 2163; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> [[V:%.*]], <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 16>) 2164; CHECK-NEXT: ret <8 x i16> [[TMP1]] 2165; 2166 %1 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 16>) 2167 ret <8 x i16> %1 2168} 2169 2170define <8 x i16> @avx512_psrlv_w_128_allbig(<8 x i16> %v) { 2171; CHECK-LABEL: @avx512_psrlv_w_128_allbig( 2172; CHECK-NEXT: ret <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef> 2173; 2174 %1 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 undef>) 2175 ret <8 x i16> %1 2176} 2177 2178define <8 x i16> @avx512_psrlv_w_128_undef(<8 x i16> %v) { 2179; CHECK-LABEL: @avx512_psrlv_w_128_undef( 2180; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7> 2181; CHECK-NEXT: ret <8 x i16> [[TMP1]] 2182; 2183 %1 = insertelement <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, i16 undef, i64 0 2184 %2 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> %1) 2185 ret <8 x i16> %2 2186} 2187 2188define <16 x i16> @avx512_psrlv_w_256_0(<16 x i16> %v) { 2189; CHECK-LABEL: @avx512_psrlv_w_256_0( 2190; CHECK-NEXT: ret <16 x i16> [[V:%.*]] 2191; 2192 %1 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> zeroinitializer) 2193 ret <16 x i16> %1 2194} 2195 2196define <16 x i16> @avx512_psrlv_w_256_var(<16 x i16> %v) { 2197; CHECK-LABEL: @avx512_psrlv_w_256_var( 2198; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15> 2199; CHECK-NEXT: ret <16 x i16> [[TMP1]] 2200; 2201 %1 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>) 2202 ret <16 x i16> %1 2203} 2204 2205define <16 x i16> @avx512_psrlv_w_256_big(<16 x i16> %v) { 2206; CHECK-LABEL: @avx512_psrlv_w_256_big( 2207; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> [[V:%.*]], <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 16>) 2208; CHECK-NEXT: ret <16 x i16> [[TMP1]] 2209; 2210 %1 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 16>) 2211 ret <16 x i16> %1 2212} 2213 2214define <16 x i16> @avx512_psrlv_w_256_allbig(<16 x i16> %v) { 2215; CHECK-LABEL: @avx512_psrlv_w_256_allbig( 2216; CHECK-NEXT: ret <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0> 2217; 2218 %1 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 256, i16 16, i16 28, i16 65535, i16 32767>) 2219 ret <16 x i16> %1 2220} 2221 2222define <16 x i16> @avx512_psrlv_w_256_undef(<16 x i16> %v) { 2223; CHECK-LABEL: @avx512_psrlv_w_256_undef( 2224; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15> 2225; CHECK-NEXT: ret <16 x i16> [[TMP1]] 2226; 2227 %1 = insertelement <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, i16 undef, i64 0 2228 %2 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> %1) 2229 ret <16 x i16> %2 2230} 2231 2232define <32 x i16> @avx512_psrlv_w_512_0(<32 x i16> %v) { 2233; CHECK-LABEL: @avx512_psrlv_w_512_0( 2234; CHECK-NEXT: ret <32 x i16> [[V:%.*]] 2235; 2236 %1 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> zeroinitializer) 2237 ret <32 x i16> %1 2238} 2239 2240define <32 x i16> @avx512_psrlv_w_512_var(<32 x i16> %v) { 2241; CHECK-LABEL: @avx512_psrlv_w_512_var( 2242; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0> 2243; CHECK-NEXT: ret <32 x i16> [[TMP1]] 2244; 2245 %1 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>) 2246 ret <32 x i16> %1 2247} 2248 2249define <32 x i16> @avx512_psrlv_w_512_big(<32 x i16> %v) { 2250; CHECK-LABEL: @avx512_psrlv_w_512_big( 2251; CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> [[V:%.*]], <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>) 2252; CHECK-NEXT: ret <32 x i16> [[TMP1]] 2253; 2254 %1 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>) 2255 ret <32 x i16> %1 2256} 2257 2258define <32 x i16> @avx512_psrlv_w_512_allbig(<32 x i16> %v) { 2259; CHECK-LABEL: @avx512_psrlv_w_512_allbig( 2260; CHECK-NEXT: ret <32 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 undef, i16 0, i16 0> 2261; 2262 %1 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 128, i16 16, i16 28, i16 65535, i16 32767, i16 56, i16 -14, i16 undef, i16 16, i16 67, i16 567, i16 -32768, i16 4096, i16 8192, i16 -12345, i16 undef, i16 345, i16 123, i16 undef, i16 1024, i16 54321>) 2263 ret <32 x i16> %1 2264} 2265 2266define <32 x i16> @avx512_psrlv_w_512_undef(<32 x i16> %v) { 2267; CHECK-LABEL: @avx512_psrlv_w_512_undef( 2268; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0> 2269; CHECK-NEXT: ret <32 x i16> [[TMP1]] 2270; 2271 %1 = insertelement <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 undef, i64 0 2272 %2 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> %1) 2273 ret <32 x i16> %2 2274} 2275 2276; 2277; SHL - Constant Per-Element Vector 2278; 2279 2280define <4 x i32> @avx2_psllv_d_128_0(<4 x i32> %v) { 2281; CHECK-LABEL: @avx2_psllv_d_128_0( 2282; CHECK-NEXT: ret <4 x i32> [[V:%.*]] 2283; 2284 %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> zeroinitializer) 2285 ret <4 x i32> %1 2286} 2287 2288define <8 x i32> @avx2_psllv_d_256_0(<8 x i32> %v) { 2289; CHECK-LABEL: @avx2_psllv_d_256_0( 2290; CHECK-NEXT: ret <8 x i32> [[V:%.*]] 2291; 2292 %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> zeroinitializer) 2293 ret <8 x i32> %1 2294} 2295 2296define <4 x i32> @avx2_psllv_d_128_var(<4 x i32> %v) { 2297; CHECK-LABEL: @avx2_psllv_d_128_var( 2298; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], <i32 0, i32 8, i32 16, i32 31> 2299; CHECK-NEXT: ret <4 x i32> [[TMP1]] 2300; 2301 %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 31>) 2302 ret <4 x i32> %1 2303} 2304 2305define <8 x i32> @avx2_psllv_d_256_var(<8 x i32> %v) { 2306; CHECK-LABEL: @avx2_psllv_d_256_var( 2307; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0> 2308; CHECK-NEXT: ret <8 x i32> [[TMP1]] 2309; 2310 %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>) 2311 ret <8 x i32> %1 2312} 2313 2314define <4 x i32> @avx2_psllv_d_128_big(<4 x i32> %v) { 2315; CHECK-LABEL: @avx2_psllv_d_128_big( 2316; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> [[V:%.*]], <4 x i32> <i32 0, i32 8, i32 16, i32 64>) 2317; CHECK-NEXT: ret <4 x i32> [[TMP1]] 2318; 2319 %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>) 2320 ret <4 x i32> %1 2321} 2322 2323define <8 x i32> @avx2_psllv_d_256_big(<8 x i32> %v) { 2324; CHECK-LABEL: @avx2_psllv_d_256_big( 2325; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> [[V:%.*]], <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>) 2326; CHECK-NEXT: ret <8 x i32> [[TMP1]] 2327; 2328 %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>) 2329 ret <8 x i32> %1 2330} 2331 2332define <4 x i32> @avx2_psllv_d_128_allbig(<4 x i32> %v) { 2333; CHECK-LABEL: @avx2_psllv_d_128_allbig( 2334; CHECK-NEXT: ret <4 x i32> <i32 0, i32 0, i32 0, i32 undef> 2335; 2336 %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>) 2337 ret <4 x i32> %1 2338} 2339 2340define <8 x i32> @avx2_psllv_d_256_allbig(<8 x i32> %v) { 2341; CHECK-LABEL: @avx2_psllv_d_256_allbig( 2342; CHECK-NEXT: ret <8 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2343; 2344 %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>) 2345 ret <8 x i32> %1 2346} 2347 2348define <4 x i32> @avx2_psllv_d_128_undef(<4 x i32> %v) { 2349; CHECK-LABEL: @avx2_psllv_d_128_undef( 2350; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], <i32 undef, i32 8, i32 16, i32 31> 2351; CHECK-NEXT: ret <4 x i32> [[TMP1]] 2352; 2353 %1 = insertelement <4 x i32> <i32 0, i32 8, i32 16, i32 31>, i32 undef, i32 0 2354 %2 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> %1) 2355 ret <4 x i32> %2 2356} 2357 2358define <8 x i32> @avx2_psllv_d_256_undef(<8 x i32> %v) { 2359; CHECK-LABEL: @avx2_psllv_d_256_undef( 2360; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0> 2361; CHECK-NEXT: ret <8 x i32> [[TMP1]] 2362; 2363 %1 = insertelement <8 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1 2364 %2 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> %1) 2365 ret <8 x i32> %2 2366} 2367 2368define <2 x i64> @avx2_psllv_q_128_0(<2 x i64> %v) { 2369; CHECK-LABEL: @avx2_psllv_q_128_0( 2370; CHECK-NEXT: ret <2 x i64> [[V:%.*]] 2371; 2372 %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> zeroinitializer) 2373 ret <2 x i64> %1 2374} 2375 2376define <4 x i64> @avx2_psllv_q_256_0(<4 x i64> %v) { 2377; CHECK-LABEL: @avx2_psllv_q_256_0( 2378; CHECK-NEXT: ret <4 x i64> [[V:%.*]] 2379; 2380 %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> zeroinitializer) 2381 ret <4 x i64> %1 2382} 2383 2384define <2 x i64> @avx2_psllv_q_128_var(<2 x i64> %v) { 2385; CHECK-LABEL: @avx2_psllv_q_128_var( 2386; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], <i64 0, i64 8> 2387; CHECK-NEXT: ret <2 x i64> [[TMP1]] 2388; 2389 %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 8>) 2390 ret <2 x i64> %1 2391} 2392 2393define <4 x i64> @avx2_psllv_q_256_var(<4 x i64> %v) { 2394; CHECK-LABEL: @avx2_psllv_q_256_var( 2395; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], <i64 0, i64 8, i64 16, i64 31> 2396; CHECK-NEXT: ret <4 x i64> [[TMP1]] 2397; 2398 %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 31>) 2399 ret <4 x i64> %1 2400} 2401 2402define <2 x i64> @avx2_psllv_q_128_big(<2 x i64> %v) { 2403; CHECK-LABEL: @avx2_psllv_q_128_big( 2404; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> [[V:%.*]], <2 x i64> <i64 0, i64 128>) 2405; CHECK-NEXT: ret <2 x i64> [[TMP1]] 2406; 2407 %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 128>) 2408 ret <2 x i64> %1 2409} 2410 2411define <4 x i64> @avx2_psllv_q_256_big(<4 x i64> %v) { 2412; CHECK-LABEL: @avx2_psllv_q_256_big( 2413; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> [[V:%.*]], <4 x i64> <i64 0, i64 8, i64 16, i64 64>) 2414; CHECK-NEXT: ret <4 x i64> [[TMP1]] 2415; 2416 %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 64>) 2417 ret <4 x i64> %1 2418} 2419 2420define <2 x i64> @avx2_psllv_q_128_allbig(<2 x i64> %v) { 2421; CHECK-LABEL: @avx2_psllv_q_128_allbig( 2422; CHECK-NEXT: ret <2 x i64> zeroinitializer 2423; 2424 %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 128, i64 -64>) 2425 ret <2 x i64> %1 2426} 2427 2428define <4 x i64> @avx2_psllv_q_256_allbig(<4 x i64> %v) { 2429; CHECK-LABEL: @avx2_psllv_q_256_allbig( 2430; CHECK-NEXT: ret <4 x i64> <i64 0, i64 undef, i64 0, i64 0> 2431; 2432 %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 64, i64 undef, i64 -128, i64 -60>) 2433 ret <4 x i64> %1 2434} 2435 2436; The shift amount is 0 (the undef lane could be 0), so we return the unshifted input. 2437 2438define <2 x i64> @avx2_psllv_q_128_poison(<2 x i64> %v) { 2439; CHECK-LABEL: @avx2_psllv_q_128_poison( 2440; CHECK-NEXT: ret <2 x i64> [[V:%.*]] 2441; 2442 %1 = insertelement <2 x i64> <i64 0, i64 8>, i64 poison, i64 1 2443 %2 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> %1) 2444 ret <2 x i64> %2 2445} 2446 2447define <4 x i64> @avx2_psllv_q_256_poison(<4 x i64> %v) { 2448; CHECK-LABEL: @avx2_psllv_q_256_poison( 2449; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], <i64 poison, i64 8, i64 16, i64 31> 2450; CHECK-NEXT: ret <4 x i64> [[TMP1]] 2451; 2452 %1 = insertelement <4 x i64> <i64 0, i64 8, i64 16, i64 31>, i64 poison, i64 0 2453 %2 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> %1) 2454 ret <4 x i64> %2 2455} 2456 2457define <16 x i32> @avx512_psllv_d_512_0(<16 x i32> %v) { 2458; CHECK-LABEL: @avx512_psllv_d_512_0( 2459; CHECK-NEXT: ret <16 x i32> [[V:%.*]] 2460; 2461 %1 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> zeroinitializer) 2462 ret <16 x i32> %1 2463} 2464 2465define <16 x i32> @avx512_psllv_d_512_var(<16 x i32> %v) { 2466; CHECK-LABEL: @avx512_psllv_d_512_var( 2467; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0> 2468; CHECK-NEXT: ret <16 x i32> [[TMP1]] 2469; 2470 %1 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>) 2471 ret <16 x i32> %1 2472} 2473 2474define <16 x i32> @avx512_psllv_d_512_big(<16 x i32> %v) { 2475; CHECK-LABEL: @avx512_psllv_d_512_big( 2476; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> [[V:%.*]], <16 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>) 2477; CHECK-NEXT: ret <16 x i32> [[TMP1]] 2478; 2479 %1 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>) 2480 ret <16 x i32> %1 2481} 2482 2483define <16 x i32> @avx512_psllv_d_512_allbig(<16 x i32> %v) { 2484; CHECK-LABEL: @avx512_psllv_d_512_allbig( 2485; CHECK-NEXT: ret <16 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2486; 2487 %1 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555, i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>) 2488 ret <16 x i32> %1 2489} 2490 2491define <16 x i32> @avx512_psllv_d_512_undef(<16 x i32> %v) { 2492; CHECK-LABEL: @avx512_psllv_d_512_undef( 2493; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0> 2494; CHECK-NEXT: ret <16 x i32> [[TMP1]] 2495; 2496 %1 = insertelement <16 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1 2497 %2 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> %1) 2498 ret <16 x i32> %2 2499} 2500 2501define <8 x i64> @avx512_psllv_q_512_0(<8 x i64> %v) { 2502; CHECK-LABEL: @avx512_psllv_q_512_0( 2503; CHECK-NEXT: ret <8 x i64> [[V:%.*]] 2504; 2505 %1 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> zeroinitializer) 2506 ret <8 x i64> %1 2507} 2508 2509define <8 x i64> @avx512_psllv_q_512_var(<8 x i64> %v) { 2510; CHECK-LABEL: @avx512_psllv_q_512_var( 2511; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31> 2512; CHECK-NEXT: ret <8 x i64> [[TMP1]] 2513; 2514 %1 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>) 2515 ret <8 x i64> %1 2516} 2517 2518define <8 x i64> @avx512_psllv_q_512_big(<8 x i64> %v) { 2519; CHECK-LABEL: @avx512_psllv_q_512_big( 2520; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> [[V:%.*]], <8 x i64> <i64 0, i64 8, i64 16, i64 64, i64 0, i64 8, i64 16, i64 64>) 2521; CHECK-NEXT: ret <8 x i64> [[TMP1]] 2522; 2523 %1 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 64, i64 0, i64 8, i64 16, i64 64>) 2524 ret <8 x i64> %1 2525} 2526 2527define <8 x i64> @avx512_psllv_q_512_allbig(<8 x i64> %v) { 2528; CHECK-LABEL: @avx512_psllv_q_512_allbig( 2529; CHECK-NEXT: ret <8 x i64> <i64 0, i64 undef, i64 0, i64 0, i64 0, i64 undef, i64 0, i64 0> 2530; 2531 %1 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> <i64 64, i64 undef, i64 -128, i64 -60, i64 64, i64 undef, i64 -128, i64 -60>) 2532 ret <8 x i64> %1 2533} 2534 2535define <8 x i64> @avx512_psllv_q_512_undef(<8 x i64> %v) { 2536; CHECK-LABEL: @avx512_psllv_q_512_undef( 2537; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], <i64 undef, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31> 2538; CHECK-NEXT: ret <8 x i64> [[TMP1]] 2539; 2540 %1 = insertelement <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0 2541 %2 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> %1) 2542 ret <8 x i64> %2 2543} 2544 2545define <8 x i16> @avx512_psllv_w_128_0(<8 x i16> %v) { 2546; CHECK-LABEL: @avx512_psllv_w_128_0( 2547; CHECK-NEXT: ret <8 x i16> [[V:%.*]] 2548; 2549 %1 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> zeroinitializer) 2550 ret <8 x i16> %1 2551} 2552 2553define <8 x i16> @avx512_psllv_w_128_var(<8 x i16> %v) { 2554; CHECK-LABEL: @avx512_psllv_w_128_var( 2555; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7> 2556; CHECK-NEXT: ret <8 x i16> [[TMP1]] 2557; 2558 %1 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>) 2559 ret <8 x i16> %1 2560} 2561 2562define <8 x i16> @avx512_psllv_w_128_big(<8 x i16> %v) { 2563; CHECK-LABEL: @avx512_psllv_w_128_big( 2564; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> [[V:%.*]], <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 16>) 2565; CHECK-NEXT: ret <8 x i16> [[TMP1]] 2566; 2567 %1 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 16>) 2568 ret <8 x i16> %1 2569} 2570 2571define <8 x i16> @avx512_psllv_w_128_allbig(<8 x i16> %v) { 2572; CHECK-LABEL: @avx512_psllv_w_128_allbig( 2573; CHECK-NEXT: ret <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef> 2574; 2575 %1 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 undef>) 2576 ret <8 x i16> %1 2577} 2578 2579define <8 x i16> @avx512_psllv_w_128_undef(<8 x i16> %v) { 2580; CHECK-LABEL: @avx512_psllv_w_128_undef( 2581; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7> 2582; CHECK-NEXT: ret <8 x i16> [[TMP1]] 2583; 2584 %1 = insertelement <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, i16 undef, i64 0 2585 %2 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> %1) 2586 ret <8 x i16> %2 2587} 2588 2589define <16 x i16> @avx512_psllv_w_256_0(<16 x i16> %v) { 2590; CHECK-LABEL: @avx512_psllv_w_256_0( 2591; CHECK-NEXT: ret <16 x i16> [[V:%.*]] 2592; 2593 %1 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> zeroinitializer) 2594 ret <16 x i16> %1 2595} 2596 2597define <16 x i16> @avx512_psllv_w_256_var(<16 x i16> %v) { 2598; CHECK-LABEL: @avx512_psllv_w_256_var( 2599; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15> 2600; CHECK-NEXT: ret <16 x i16> [[TMP1]] 2601; 2602 %1 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>) 2603 ret <16 x i16> %1 2604} 2605 2606define <16 x i16> @avx512_psllv_w_256_big(<16 x i16> %v) { 2607; CHECK-LABEL: @avx512_psllv_w_256_big( 2608; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> [[V:%.*]], <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 16>) 2609; CHECK-NEXT: ret <16 x i16> [[TMP1]] 2610; 2611 %1 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 16>) 2612 ret <16 x i16> %1 2613} 2614 2615define <16 x i16> @avx512_psllv_w_256_allbig(<16 x i16> %v) { 2616; CHECK-LABEL: @avx512_psllv_w_256_allbig( 2617; CHECK-NEXT: ret <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0> 2618; 2619 %1 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 256, i16 16, i16 28, i16 65535, i16 32767>) 2620 ret <16 x i16> %1 2621} 2622 2623define <16 x i16> @avx512_psllv_w_256_undef(<16 x i16> %v) { 2624; CHECK-LABEL: @avx512_psllv_w_256_undef( 2625; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15> 2626; CHECK-NEXT: ret <16 x i16> [[TMP1]] 2627; 2628 %1 = insertelement <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, i16 undef, i64 0 2629 %2 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> %1) 2630 ret <16 x i16> %2 2631} 2632 2633define <32 x i16> @avx512_psllv_w_512_0(<32 x i16> %v) { 2634; CHECK-LABEL: @avx512_psllv_w_512_0( 2635; CHECK-NEXT: ret <32 x i16> [[V:%.*]] 2636; 2637 %1 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> zeroinitializer) 2638 ret <32 x i16> %1 2639} 2640 2641define <32 x i16> @avx512_psllv_w_512_var(<32 x i16> %v) { 2642; CHECK-LABEL: @avx512_psllv_w_512_var( 2643; CHECK-NEXT: [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0> 2644; CHECK-NEXT: ret <32 x i16> [[TMP1]] 2645; 2646 %1 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>) 2647 ret <32 x i16> %1 2648} 2649 2650define <32 x i16> @avx512_psllv_w_512_big(<32 x i16> %v) { 2651; CHECK-LABEL: @avx512_psllv_w_512_big( 2652; CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> [[V:%.*]], <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>) 2653; CHECK-NEXT: ret <32 x i16> [[TMP1]] 2654; 2655 %1 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>) 2656 ret <32 x i16> %1 2657} 2658 2659define <32 x i16> @avx512_psllv_w_512_allbig(<32 x i16> %v) { 2660; CHECK-LABEL: @avx512_psllv_w_512_allbig( 2661; CHECK-NEXT: ret <32 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 undef, i16 0, i16 0> 2662; 2663 %1 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 128, i16 16, i16 28, i16 65535, i16 32767, i16 56, i16 -14, i16 undef, i16 16, i16 67, i16 567, i16 -32768, i16 4096, i16 8192, i16 -12345, i16 undef, i16 345, i16 123, i16 undef, i16 1024, i16 54321>) 2664 ret <32 x i16> %1 2665} 2666 2667define <32 x i16> @avx512_psllv_w_512_undef(<32 x i16> %v) { 2668; CHECK-LABEL: @avx512_psllv_w_512_undef( 2669; CHECK-NEXT: [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0> 2670; CHECK-NEXT: ret <32 x i16> [[TMP1]] 2671; 2672 %1 = insertelement <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 undef, i64 0 2673 %2 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> %1) 2674 ret <32 x i16> %2 2675} 2676 2677; 2678; Vector Masked Shift Amounts 2679; 2680 2681define <8 x i16> @sse2_psra_w_128_masked(<8 x i16> %v, <8 x i16> %a) { 2682; CHECK-LABEL: @sse2_psra_w_128_masked( 2683; CHECK-NEXT: [[TMP1:%.*]] = and <8 x i16> [[A:%.*]], <i16 15, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison> 2684; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> zeroinitializer 2685; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i16> [[V:%.*]], [[TMP2]] 2686; CHECK-NEXT: ret <8 x i16> [[TMP3]] 2687; 2688 %1 = and <8 x i16> %a, <i16 15, i16 0, i16 0, i16 0, i16 undef, i16 undef, i16 undef, i16 undef> 2689 %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %1) 2690 ret <8 x i16> %2 2691} 2692 2693define <8 x i32> @avx2_psra_d_256_masked(<8 x i32> %v, <4 x i32> %a) { 2694; CHECK-LABEL: @avx2_psra_d_256_masked( 2695; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], <i32 31, i32 poison, i32 poison, i32 poison> 2696; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> zeroinitializer 2697; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i32> [[V:%.*]], [[TMP2]] 2698; CHECK-NEXT: ret <8 x i32> [[TMP3]] 2699; 2700 %1 = and <4 x i32> %a, <i32 31, i32 0, i32 undef, i32 undef> 2701 %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %1) 2702 ret <8 x i32> %2 2703} 2704 2705define <8 x i64> @avx512_psra_q_512_masked(<8 x i64> %v, <2 x i64> %a) { 2706; CHECK-LABEL: @avx512_psra_q_512_masked( 2707; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[A:%.*]], <i64 63, i64 poison> 2708; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <8 x i32> zeroinitializer 2709; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i64> [[V:%.*]], [[TMP2]] 2710; CHECK-NEXT: ret <8 x i64> [[TMP3]] 2711; 2712 %1 = and <2 x i64> %a, <i64 63, i64 undef> 2713 %2 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> %1) 2714 ret <8 x i64> %2 2715} 2716 2717define <4 x i32> @sse2_psrl_d_128_masked(<4 x i32> %v, <4 x i32> %a) { 2718; CHECK-LABEL: @sse2_psrl_d_128_masked( 2719; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], <i32 31, i32 poison, i32 poison, i32 poison> 2720; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer 2721; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i32> [[V:%.*]], [[TMP2]] 2722; CHECK-NEXT: ret <4 x i32> [[TMP3]] 2723; 2724 %1 = and <4 x i32> %a, <i32 31, i32 0, i32 undef, i32 undef> 2725 %2 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %1) 2726 ret <4 x i32> %2 2727} 2728 2729define <4 x i64> @avx2_psrl_q_256_masked(<4 x i64> %v, <2 x i64> %a) { 2730; CHECK-LABEL: @avx2_psrl_q_256_masked( 2731; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[A:%.*]], <i64 63, i64 poison> 2732; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <4 x i32> zeroinitializer 2733; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i64> [[V:%.*]], [[TMP2]] 2734; CHECK-NEXT: ret <4 x i64> [[TMP3]] 2735; 2736 %1 = and <2 x i64> %a, <i64 63, i64 undef> 2737 %2 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %1) 2738 ret <4 x i64> %2 2739} 2740 2741define <32 x i16> @avx512_psrl_w_512_masked(<32 x i16> %v, <8 x i16> %a) { 2742; CHECK-LABEL: @avx512_psrl_w_512_masked( 2743; CHECK-NEXT: [[TMP1:%.*]] = and <8 x i16> [[A:%.*]], <i16 15, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison> 2744; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <32 x i32> zeroinitializer 2745; CHECK-NEXT: [[TMP3:%.*]] = lshr <32 x i16> [[V:%.*]], [[TMP2]] 2746; CHECK-NEXT: ret <32 x i16> [[TMP3]] 2747; 2748 %1 = and <8 x i16> %a, <i16 15, i16 0, i16 0, i16 0, i16 undef, i16 undef, i16 undef, i16 undef> 2749 %2 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> %1) 2750 ret <32 x i16> %2 2751} 2752 2753define <2 x i64> @sse2_psll_q_128_masked(<2 x i64> %v, <2 x i64> %a) { 2754; CHECK-LABEL: @sse2_psll_q_128_masked( 2755; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[A:%.*]], <i64 63, i64 poison> 2756; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <2 x i32> zeroinitializer 2757; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[V:%.*]], [[TMP2]] 2758; CHECK-NEXT: ret <2 x i64> [[TMP3]] 2759; 2760 %1 = and <2 x i64> %a, <i64 63, i64 undef> 2761 %2 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %1) 2762 ret <2 x i64> %2 2763} 2764 2765; The shift amount is in range (masked with 31 and high 32-bits are zero), 2766; so convert to standard IR - https://llvm.org/PR50123 2767 2768define <2 x i64> @sse2_psll_q_128_masked_bitcast(<2 x i64> %v, <2 x i64> %a) { 2769; CHECK-LABEL: @sse2_psll_q_128_masked_bitcast( 2770; CHECK-NEXT: [[B:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x i32> 2771; CHECK-NEXT: [[M:%.*]] = and <4 x i32> [[B]], <i32 31, i32 poison, i32 poison, i32 poison> 2772; CHECK-NEXT: [[I:%.*]] = insertelement <4 x i32> [[M]], i32 0, i64 1 2773; CHECK-NEXT: [[SHAMT:%.*]] = bitcast <4 x i32> [[I]] to <2 x i64> 2774; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[SHAMT]], <2 x i64> poison, <2 x i32> zeroinitializer 2775; CHECK-NEXT: [[R:%.*]] = shl <2 x i64> [[V:%.*]], [[TMP1]] 2776; CHECK-NEXT: ret <2 x i64> [[R]] 2777; 2778 %b = bitcast <2 x i64> %a to <4 x i32> 2779 %m = and <4 x i32> %b, <i32 31, i32 poison, i32 poison, i32 poison> 2780 %i = insertelement <4 x i32> %m, i32 0, i32 1 2781 %shamt = bitcast <4 x i32> %i to <2 x i64> 2782 %r = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %shamt) #2 2783 ret <2 x i64> %r 2784} 2785 2786; TODO: This could be recognized as an over-shift. 2787 2788define <2 x i64> @sse2_psll_q_128_masked_bitcast_overshift(<2 x i64> %v, <2 x i64> %a) { 2789; CHECK-LABEL: @sse2_psll_q_128_masked_bitcast_overshift( 2790; CHECK-NEXT: [[B:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x i32> 2791; CHECK-NEXT: [[M:%.*]] = and <4 x i32> [[B]], <i32 31, i32 poison, i32 poison, i32 poison> 2792; CHECK-NEXT: [[I:%.*]] = insertelement <4 x i32> [[M]], i32 1, i64 1 2793; CHECK-NEXT: [[SHAMT:%.*]] = bitcast <4 x i32> [[I]] to <2 x i64> 2794; CHECK-NEXT: [[R:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> [[V:%.*]], <2 x i64> [[SHAMT]]) 2795; CHECK-NEXT: ret <2 x i64> [[R]] 2796; 2797 %b = bitcast <2 x i64> %a to <4 x i32> 2798 %m = and <4 x i32> %b, <i32 31, i32 poison, i32 poison, i32 poison> 2799 %i = insertelement <4 x i32> %m, i32 1, i32 1 2800 %shamt = bitcast <4 x i32> %i to <2 x i64> 2801 %r = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %shamt) #2 2802 ret <2 x i64> %r 2803} 2804 2805define <16 x i16> @avx2_psll_w_256_masked(<16 x i16> %v, <8 x i16> %a) { 2806; CHECK-LABEL: @avx2_psll_w_256_masked( 2807; CHECK-NEXT: [[TMP1:%.*]] = and <8 x i16> [[A:%.*]], <i16 15, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison> 2808; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <16 x i32> zeroinitializer 2809; CHECK-NEXT: [[TMP3:%.*]] = shl <16 x i16> [[V:%.*]], [[TMP2]] 2810; CHECK-NEXT: ret <16 x i16> [[TMP3]] 2811; 2812 %1 = and <8 x i16> %a, <i16 15, i16 0, i16 0, i16 0, i16 undef, i16 undef, i16 undef, i16 undef> 2813 %2 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %1) 2814 ret <16 x i16> %2 2815} 2816 2817define <16 x i32> @avx512_psll_d_512_masked(<16 x i32> %v, <4 x i32> %a) { 2818; CHECK-LABEL: @avx512_psll_d_512_masked( 2819; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], <i32 31, i32 poison, i32 poison, i32 poison> 2820; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <16 x i32> zeroinitializer 2821; CHECK-NEXT: [[TMP3:%.*]] = shl <16 x i32> [[V:%.*]], [[TMP2]] 2822; CHECK-NEXT: ret <16 x i32> [[TMP3]] 2823; 2824 %1 = and <4 x i32> %a, <i32 31, i32 0, i32 undef, i32 undef> 2825 %2 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> %1) 2826 ret <16 x i32> %2 2827} 2828 2829define <8 x i16> @sse2_psrai_w_128_masked(<8 x i16> %v, i32 %a) { 2830; CHECK-LABEL: @sse2_psrai_w_128_masked( 2831; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[A:%.*]] to i16 2832; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15 2833; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i64 0 2834; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer 2835; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i16> [[V:%.*]], [[DOTSPLAT]] 2836; CHECK-NEXT: ret <8 x i16> [[TMP3]] 2837; 2838 %1 = and i32 %a, 15 2839 %2 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 %1) 2840 ret <8 x i16> %2 2841} 2842 2843define <8 x i32> @avx2_psrai_d_256_masked(<8 x i32> %v, i32 %a) { 2844; CHECK-LABEL: @avx2_psrai_d_256_masked( 2845; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 31 2846; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[TMP1]], i64 0 2847; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer 2848; CHECK-NEXT: [[TMP2:%.*]] = ashr <8 x i32> [[V:%.*]], [[DOTSPLAT]] 2849; CHECK-NEXT: ret <8 x i32> [[TMP2]] 2850; 2851 %1 = and i32 %a, 31 2852 %2 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 %1) 2853 ret <8 x i32> %2 2854} 2855 2856define <8 x i64> @avx512_psrai_q_512_masked(<8 x i64> %v, i32 %a) { 2857; CHECK-LABEL: @avx512_psrai_q_512_masked( 2858; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 63 2859; CHECK-NEXT: [[TMP2:%.*]] = zext nneg i32 [[TMP1]] to i64 2860; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TMP2]], i64 0 2861; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer 2862; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i64> [[V:%.*]], [[DOTSPLAT]] 2863; CHECK-NEXT: ret <8 x i64> [[TMP3]] 2864; 2865 %1 = and i32 %a, 63 2866 %2 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 %1) 2867 ret <8 x i64> %2 2868} 2869 2870define <4 x i32> @sse2_psrli_d_128_masked(<4 x i32> %v, i32 %a) { 2871; CHECK-LABEL: @sse2_psrli_d_128_masked( 2872; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 31 2873; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i64 0 2874; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer 2875; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[V:%.*]], [[DOTSPLAT]] 2876; CHECK-NEXT: ret <4 x i32> [[TMP2]] 2877; 2878 %1 = and i32 %a, 31 2879 %2 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 %1) 2880 ret <4 x i32> %2 2881} 2882 2883define <4 x i64> @avx2_psrli_q_256_masked(<4 x i64> %v, i32 %a) { 2884; CHECK-LABEL: @avx2_psrli_q_256_masked( 2885; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 63 2886; CHECK-NEXT: [[TMP2:%.*]] = zext nneg i32 [[TMP1]] to i64 2887; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP2]], i64 0 2888; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer 2889; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i64> [[V:%.*]], [[DOTSPLAT]] 2890; CHECK-NEXT: ret <4 x i64> [[TMP3]] 2891; 2892 %1 = and i32 %a, 63 2893 %2 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 %1) 2894 ret <4 x i64> %2 2895} 2896 2897define <32 x i16> @avx512_psrli_w_512_masked(<32 x i16> %v, i32 %a) { 2898; CHECK-LABEL: @avx512_psrli_w_512_masked( 2899; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[A:%.*]] to i16 2900; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15 2901; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[TMP2]], i64 0 2902; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <32 x i16> [[DOTSPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer 2903; CHECK-NEXT: [[TMP3:%.*]] = lshr <32 x i16> [[V:%.*]], [[DOTSPLAT]] 2904; CHECK-NEXT: ret <32 x i16> [[TMP3]] 2905; 2906 %1 = and i32 %a, 15 2907 %2 = tail call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %v, i32 %1) 2908 ret <32 x i16> %2 2909} 2910 2911define <2 x i64> @sse2_pslli_q_128_masked(<2 x i64> %v, i32 %a) { 2912; CHECK-LABEL: @sse2_pslli_q_128_masked( 2913; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 63 2914; CHECK-NEXT: [[TMP2:%.*]] = zext nneg i32 [[TMP1]] to i64 2915; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i64 0 2916; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer 2917; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[V:%.*]], [[DOTSPLAT]] 2918; CHECK-NEXT: ret <2 x i64> [[TMP3]] 2919; 2920 %1 = and i32 %a, 63 2921 %2 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 %1) 2922 ret <2 x i64> %2 2923} 2924 2925define <16 x i16> @avx2_pslli_w_256_masked(<16 x i16> %v, i32 %a) { 2926; CHECK-LABEL: @avx2_pslli_w_256_masked( 2927; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[A:%.*]] to i16 2928; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15 2929; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[TMP2]], i64 0 2930; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i16> [[DOTSPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer 2931; CHECK-NEXT: [[TMP3:%.*]] = shl <16 x i16> [[V:%.*]], [[DOTSPLAT]] 2932; CHECK-NEXT: ret <16 x i16> [[TMP3]] 2933; 2934 %1 = and i32 %a, 15 2935 %2 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 %1) 2936 ret <16 x i16> %2 2937} 2938 2939define <16 x i32> @avx512_pslli_d_512_masked(<16 x i32> %v, i32 %a) { 2940; CHECK-LABEL: @avx512_pslli_d_512_masked( 2941; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 31 2942; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[TMP1]], i64 0 2943; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i32> [[DOTSPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer 2944; CHECK-NEXT: [[TMP2:%.*]] = shl <16 x i32> [[V:%.*]], [[DOTSPLAT]] 2945; CHECK-NEXT: ret <16 x i32> [[TMP2]] 2946; 2947 %1 = and i32 %a, 31 2948 %2 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %v, i32 %1) 2949 ret <16 x i32> %2 2950} 2951 2952define <4 x i32> @avx2_psrav_d_128_masked(<4 x i32> %v, <4 x i32> %a) { 2953; CHECK-LABEL: @avx2_psrav_d_128_masked( 2954; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], splat (i32 31) 2955; CHECK-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[V:%.*]], [[TMP1]] 2956; CHECK-NEXT: ret <4 x i32> [[TMP2]] 2957; 2958 %1 = and <4 x i32> %a, <i32 31, i32 31, i32 31, i32 31> 2959 %2 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> %1) 2960 ret <4 x i32> %2 2961} 2962 2963define <4 x i32> @avx2_psrav_d_128_masked_shuffle(<4 x i32> %v, <4 x i32> %a) { 2964; CHECK-LABEL: @avx2_psrav_d_128_masked_shuffle( 2965; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], <i32 poison, i32 poison, i32 15, i32 31> 2966; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3> 2967; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[V:%.*]], [[TMP2]] 2968; CHECK-NEXT: ret <4 x i32> [[TMP3]] 2969; 2970 %1 = and <4 x i32> %a, <i32 undef, i32 undef, i32 15, i32 31> 2971 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3> 2972 %3 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> %2) 2973 ret <4 x i32> %3 2974} 2975 2976define <8 x i32> @avx2_psrav_d_256_masked(<8 x i32> %v, <8 x i32> %a) { 2977; CHECK-LABEL: @avx2_psrav_d_256_masked( 2978; CHECK-NEXT: [[TMP1:%.*]] = and <8 x i32> [[A:%.*]], <i32 0, i32 1, i32 7, i32 15, i32 16, i32 30, i32 31, i32 31> 2979; CHECK-NEXT: [[TMP2:%.*]] = ashr <8 x i32> [[V:%.*]], [[TMP1]] 2980; CHECK-NEXT: ret <8 x i32> [[TMP2]] 2981; 2982 %1 = and <8 x i32> %a, <i32 0, i32 1, i32 7, i32 15, i32 16, i32 30, i32 31, i32 31> 2983 %2 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> %1) 2984 ret <8 x i32> %2 2985} 2986 2987define <32 x i16> @avx512_psrav_w_512_masked(<32 x i16> %v, <32 x i16> %a) { 2988; CHECK-LABEL: @avx512_psrav_w_512_masked( 2989; CHECK-NEXT: [[TMP1:%.*]] = and <32 x i16> [[A:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7> 2990; CHECK-NEXT: [[TMP2:%.*]] = ashr <32 x i16> [[V:%.*]], [[TMP1]] 2991; CHECK-NEXT: ret <32 x i16> [[TMP2]] 2992; 2993 %1 = and <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7> 2994 %2 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> %1) 2995 ret <32 x i16> %2 2996} 2997 2998define <2 x i64> @avx2_psrlv_q_128_masked(<2 x i64> %v, <2 x i64> %a) { 2999; CHECK-LABEL: @avx2_psrlv_q_128_masked( 3000; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[A:%.*]], <i64 32, i64 63> 3001; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[V:%.*]], [[TMP1]] 3002; CHECK-NEXT: ret <2 x i64> [[TMP2]] 3003; 3004 %1 = and <2 x i64> %a, <i64 32, i64 63> 3005 %2 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> %1) 3006 ret <2 x i64> %2 3007} 3008 3009define <8 x i32> @avx2_psrlv_d_256_masked(<8 x i32> %v, <8 x i32> %a) { 3010; CHECK-LABEL: @avx2_psrlv_d_256_masked( 3011; CHECK-NEXT: [[TMP1:%.*]] = and <8 x i32> [[A:%.*]], <i32 0, i32 1, i32 7, i32 15, i32 16, i32 30, i32 31, i32 31> 3012; CHECK-NEXT: [[TMP2:%.*]] = lshr <8 x i32> [[V:%.*]], [[TMP1]] 3013; CHECK-NEXT: ret <8 x i32> [[TMP2]] 3014; 3015 %1 = and <8 x i32> %a, <i32 0, i32 1, i32 7, i32 15, i32 16, i32 30, i32 31, i32 31> 3016 %2 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> %1) 3017 ret <8 x i32> %2 3018} 3019 3020define <8 x i64> @avx512_psrlv_q_512_masked(<8 x i64> %v, <8 x i64> %a) { 3021; CHECK-LABEL: @avx512_psrlv_q_512_masked( 3022; CHECK-NEXT: [[TMP1:%.*]] = and <8 x i64> [[A:%.*]], <i64 0, i64 1, i64 4, i64 16, i64 32, i64 47, i64 62, i64 63> 3023; CHECK-NEXT: [[TMP2:%.*]] = lshr <8 x i64> [[V:%.*]], [[TMP1]] 3024; CHECK-NEXT: ret <8 x i64> [[TMP2]] 3025; 3026 %1 = and <8 x i64> %a, <i64 0, i64 1, i64 4, i64 16, i64 32, i64 47, i64 62, i64 63> 3027 %2 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> %1) 3028 ret <8 x i64> %2 3029} 3030 3031define <4 x i32> @avx2_psllv_d_128_masked(<4 x i32> %v, <4 x i32> %a) { 3032; CHECK-LABEL: @avx2_psllv_d_128_masked( 3033; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], <i32 0, i32 15, i32 16, i32 31> 3034; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[V:%.*]], [[TMP1]] 3035; CHECK-NEXT: ret <4 x i32> [[TMP2]] 3036; 3037 %1 = and <4 x i32> %a, <i32 0, i32 15, i32 16, i32 31> 3038 %2 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> %1) 3039 ret <4 x i32> %2 3040} 3041 3042define <4 x i64> @avx2_psllv_q_256_masked(<4 x i64> %v, <4 x i64> %a) { 3043; CHECK-LABEL: @avx2_psllv_q_256_masked( 3044; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i64> [[A:%.*]], <i64 0, i64 16, i64 32, i64 63> 3045; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i64> [[V:%.*]], [[TMP1]] 3046; CHECK-NEXT: ret <4 x i64> [[TMP2]] 3047; 3048 %1 = and <4 x i64> %a, <i64 0, i64 16, i64 32, i64 63> 3049 %2 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> %1) 3050 ret <4 x i64> %2 3051} 3052 3053define <32 x i16> @avx512_psllv_w_512_masked(<32 x i16> %v, <32 x i16> %a) { 3054; CHECK-LABEL: @avx512_psllv_w_512_masked( 3055; CHECK-NEXT: [[TMP1:%.*]] = and <32 x i16> [[A:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7> 3056; CHECK-NEXT: [[TMP2:%.*]] = shl <32 x i16> [[V:%.*]], [[TMP1]] 3057; CHECK-NEXT: ret <32 x i16> [[TMP2]] 3058; 3059 %1 = and <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7> 3060 %2 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> %1) 3061 ret <32 x i16> %2 3062} 3063 3064; 3065; Vector Demanded Bits 3066; 3067 3068define <8 x i16> @sse2_psra_w_var(<8 x i16> %v, <8 x i16> %a) { 3069; CHECK-LABEL: @sse2_psra_w_var( 3070; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> [[V:%.*]], <8 x i16> [[A:%.*]]) 3071; CHECK-NEXT: ret <8 x i16> [[TMP1]] 3072; 3073 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 3074 %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %1) 3075 ret <8 x i16> %2 3076} 3077 3078define <8 x i16> @sse2_psra_w_var_bc(<8 x i16> %v, <2 x i64> %a) { 3079; CHECK-LABEL: @sse2_psra_w_var_bc( 3080; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[A:%.*]] to <8 x i16> 3081; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> [[V:%.*]], <8 x i16> [[TMP1]]) 3082; CHECK-NEXT: ret <8 x i16> [[TMP2]] 3083; 3084 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 3085 %2 = bitcast <2 x i64> %1 to <8 x i16> 3086 %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %2) 3087 ret <8 x i16> %3 3088} 3089 3090define <4 x i32> @sse2_psra_d_var(<4 x i32> %v, <4 x i32> %a) { 3091; CHECK-LABEL: @sse2_psra_d_var( 3092; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> [[V:%.*]], <4 x i32> [[A:%.*]]) 3093; CHECK-NEXT: ret <4 x i32> [[TMP1]] 3094; 3095 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 3096 %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %1) 3097 ret <4 x i32> %2 3098} 3099 3100define <4 x i32> @sse2_psra_d_var_bc(<4 x i32> %v, <8 x i16> %a) { 3101; CHECK-LABEL: @sse2_psra_d_var_bc( 3102; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[A:%.*]] to <4 x i32> 3103; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> [[V:%.*]], <4 x i32> [[TMP1]]) 3104; CHECK-NEXT: ret <4 x i32> [[TMP2]] 3105; 3106 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 3107 %2 = bitcast <8 x i16> %1 to <4 x i32> 3108 %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %2) 3109 ret <4 x i32> %3 3110} 3111 3112define <16 x i16> @avx2_psra_w_var(<16 x i16> %v, <8 x i16> %a) { 3113; CHECK-LABEL: @avx2_psra_w_var( 3114; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> [[V:%.*]], <8 x i16> [[A:%.*]]) 3115; CHECK-NEXT: ret <16 x i16> [[TMP1]] 3116; 3117 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 3118 %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %1) 3119 ret <16 x i16> %2 3120} 3121 3122define <8 x i32> @avx2_psra_d_var(<8 x i32> %v, <4 x i32> %a) { 3123; CHECK-LABEL: @avx2_psra_d_var( 3124; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> [[V:%.*]], <4 x i32> [[A:%.*]]) 3125; CHECK-NEXT: ret <8 x i32> [[TMP1]] 3126; 3127 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 3128 %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %1) 3129 ret <8 x i32> %2 3130} 3131 3132define <2 x i64> @avx512_psra_q_128_var(<2 x i64> %v, <2 x i64> %a) { 3133; CHECK-LABEL: @avx512_psra_q_128_var( 3134; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> [[V:%.*]], <2 x i64> [[A:%.*]]) 3135; CHECK-NEXT: ret <2 x i64> [[TMP1]] 3136; 3137 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 3138 %2 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> %1) 3139 ret <2 x i64> %2 3140} 3141 3142define <4 x i64> @avx512_psra_q_256_var(<4 x i64> %v, <2 x i64> %a) { 3143; CHECK-LABEL: @avx512_psra_q_256_var( 3144; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> [[V:%.*]], <2 x i64> [[A:%.*]]) 3145; CHECK-NEXT: ret <4 x i64> [[TMP1]] 3146; 3147 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 3148 %2 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> %1) 3149 ret <4 x i64> %2 3150} 3151 3152define <32 x i16> @avx512_psra_w_512_var(<32 x i16> %v, <8 x i16> %a) { 3153; CHECK-LABEL: @avx512_psra_w_512_var( 3154; CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> [[V:%.*]], <8 x i16> [[A:%.*]]) 3155; CHECK-NEXT: ret <32 x i16> [[TMP1]] 3156; 3157 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 3158 %2 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> %1) 3159 ret <32 x i16> %2 3160} 3161 3162define <16 x i32> @avx512_psra_d_512_var(<16 x i32> %v, <4 x i32> %a) { 3163; CHECK-LABEL: @avx512_psra_d_512_var( 3164; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> [[V:%.*]], <4 x i32> [[A:%.*]]) 3165; CHECK-NEXT: ret <16 x i32> [[TMP1]] 3166; 3167 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 3168 %2 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> %1) 3169 ret <16 x i32> %2 3170} 3171 3172define <8 x i64> @avx512_psra_q_512_var(<8 x i64> %v, <2 x i64> %a) { 3173; CHECK-LABEL: @avx512_psra_q_512_var( 3174; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[V:%.*]], <2 x i64> [[A:%.*]]) 3175; CHECK-NEXT: ret <8 x i64> [[TMP1]] 3176; 3177 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 3178 %2 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> %1) 3179 ret <8 x i64> %2 3180} 3181 3182define <8 x i16> @sse2_psrl_w_var(<8 x i16> %v, <8 x i16> %a) { 3183; CHECK-LABEL: @sse2_psrl_w_var( 3184; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> [[V:%.*]], <8 x i16> [[A:%.*]]) 3185; CHECK-NEXT: ret <8 x i16> [[TMP1]] 3186; 3187 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 3188 %2 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %1) 3189 ret <8 x i16> %2 3190} 3191 3192define <4 x i32> @sse2_psrl_d_var(<4 x i32> %v, <4 x i32> %a) { 3193; CHECK-LABEL: @sse2_psrl_d_var( 3194; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> [[V:%.*]], <4 x i32> [[A:%.*]]) 3195; CHECK-NEXT: ret <4 x i32> [[TMP1]] 3196; 3197 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 3198 %2 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %1) 3199 ret <4 x i32> %2 3200} 3201 3202define <2 x i64> @sse2_psrl_q_var(<2 x i64> %v, <2 x i64> %a) { 3203; CHECK-LABEL: @sse2_psrl_q_var( 3204; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> [[V:%.*]], <2 x i64> [[A:%.*]]) 3205; CHECK-NEXT: ret <2 x i64> [[TMP1]] 3206; 3207 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 3208 %2 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %1) 3209 ret <2 x i64> %2 3210} 3211 3212define <16 x i16> @avx2_psrl_w_var(<16 x i16> %v, <8 x i16> %a) { 3213; CHECK-LABEL: @avx2_psrl_w_var( 3214; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> [[V:%.*]], <8 x i16> [[A:%.*]]) 3215; CHECK-NEXT: ret <16 x i16> [[TMP1]] 3216; 3217 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 3218 %2 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %1) 3219 ret <16 x i16> %2 3220} 3221 3222define <16 x i16> @avx2_psrl_w_var_bc(<16 x i16> %v, <16 x i8> %a) { 3223; CHECK-LABEL: @avx2_psrl_w_var_bc( 3224; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[A:%.*]] to <8 x i16> 3225; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> [[V:%.*]], <8 x i16> [[TMP1]]) 3226; CHECK-NEXT: ret <16 x i16> [[TMP2]] 3227; 3228 %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 3229 %2 = bitcast <16 x i8> %1 to <8 x i16> 3230 %3 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %2) 3231 ret <16 x i16> %3 3232} 3233 3234define <8 x i32> @avx2_psrl_d_var(<8 x i32> %v, <4 x i32> %a) { 3235; CHECK-LABEL: @avx2_psrl_d_var( 3236; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> [[V:%.*]], <4 x i32> [[A:%.*]]) 3237; CHECK-NEXT: ret <8 x i32> [[TMP1]] 3238; 3239 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 3240 %2 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %1) 3241 ret <8 x i32> %2 3242} 3243 3244define <8 x i32> @avx2_psrl_d_var_bc(<8 x i32> %v, <2 x i64> %a) { 3245; CHECK-LABEL: @avx2_psrl_d_var_bc( 3246; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x i32> 3247; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> [[V:%.*]], <4 x i32> [[TMP1]]) 3248; CHECK-NEXT: ret <8 x i32> [[TMP2]] 3249; 3250 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 3251 %2 = bitcast <2 x i64> %1 to <4 x i32> 3252 %3 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %2) 3253 ret <8 x i32> %3 3254} 3255 3256define <4 x i64> @avx2_psrl_q_var(<4 x i64> %v, <2 x i64> %a) { 3257; CHECK-LABEL: @avx2_psrl_q_var( 3258; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> [[V:%.*]], <2 x i64> [[A:%.*]]) 3259; CHECK-NEXT: ret <4 x i64> [[TMP1]] 3260; 3261 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 3262 %2 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %1) 3263 ret <4 x i64> %2 3264} 3265 3266define <32 x i16> @avx512_psrl_w_512_var(<32 x i16> %v, <8 x i16> %a) { 3267; CHECK-LABEL: @avx512_psrl_w_512_var( 3268; CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> [[V:%.*]], <8 x i16> [[A:%.*]]) 3269; CHECK-NEXT: ret <32 x i16> [[TMP1]] 3270; 3271 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 3272 %2 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> %1) 3273 ret <32 x i16> %2 3274} 3275 3276define <32 x i16> @avx512_psrl_w_512_var_bc(<32 x i16> %v, <16 x i8> %a) { 3277; CHECK-LABEL: @avx512_psrl_w_512_var_bc( 3278; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[A:%.*]] to <8 x i16> 3279; CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> [[V:%.*]], <8 x i16> [[TMP1]]) 3280; CHECK-NEXT: ret <32 x i16> [[TMP2]] 3281; 3282 %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 3283 %2 = bitcast <16 x i8> %1 to <8 x i16> 3284 %3 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> %2) 3285 ret <32 x i16> %3 3286} 3287 3288define <16 x i32> @avx512_psrl_d_512_var(<16 x i32> %v, <4 x i32> %a) { 3289; CHECK-LABEL: @avx512_psrl_d_512_var( 3290; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[V:%.*]], <4 x i32> [[A:%.*]]) 3291; CHECK-NEXT: ret <16 x i32> [[TMP1]] 3292; 3293 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 3294 %2 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> %1) 3295 ret <16 x i32> %2 3296} 3297 3298define <16 x i32> @avx512_psrl_d_512_var_bc(<16 x i32> %v, <2 x i64> %a) { 3299; CHECK-LABEL: @avx512_psrl_d_512_var_bc( 3300; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x i32> 3301; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[V:%.*]], <4 x i32> [[TMP1]]) 3302; CHECK-NEXT: ret <16 x i32> [[TMP2]] 3303; 3304 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 3305 %2 = bitcast <2 x i64> %1 to <4 x i32> 3306 %3 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> %2) 3307 ret <16 x i32> %3 3308} 3309 3310define <8 x i64> @avx512_psrl_q_512_var(<8 x i64> %v, <2 x i64> %a) { 3311; CHECK-LABEL: @avx512_psrl_q_512_var( 3312; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> [[V:%.*]], <2 x i64> [[A:%.*]]) 3313; CHECK-NEXT: ret <8 x i64> [[TMP1]] 3314; 3315 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 3316 %2 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> %1) 3317 ret <8 x i64> %2 3318} 3319 3320define <8 x i16> @sse2_psll_w_var(<8 x i16> %v, <8 x i16> %a) { 3321; CHECK-LABEL: @sse2_psll_w_var( 3322; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> [[V:%.*]], <8 x i16> [[A:%.*]]) 3323; CHECK-NEXT: ret <8 x i16> [[TMP1]] 3324; 3325 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 3326 %2 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %1) 3327 ret <8 x i16> %2 3328} 3329 3330define <4 x i32> @sse2_psll_d_var(<4 x i32> %v, <4 x i32> %a) { 3331; CHECK-LABEL: @sse2_psll_d_var( 3332; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> [[V:%.*]], <4 x i32> [[A:%.*]]) 3333; CHECK-NEXT: ret <4 x i32> [[TMP1]] 3334; 3335 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 3336 %2 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %1) 3337 ret <4 x i32> %2 3338} 3339 3340define <2 x i64> @sse2_psll_q_var(<2 x i64> %v, <2 x i64> %a) { 3341; CHECK-LABEL: @sse2_psll_q_var( 3342; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> [[V:%.*]], <2 x i64> [[A:%.*]]) 3343; CHECK-NEXT: ret <2 x i64> [[TMP1]] 3344; 3345 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 3346 %2 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %1) 3347 ret <2 x i64> %2 3348} 3349 3350define <16 x i16> @avx2_psll_w_var(<16 x i16> %v, <8 x i16> %a) { 3351; CHECK-LABEL: @avx2_psll_w_var( 3352; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> [[V:%.*]], <8 x i16> [[A:%.*]]) 3353; CHECK-NEXT: ret <16 x i16> [[TMP1]] 3354; 3355 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 3356 %2 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %1) 3357 ret <16 x i16> %2 3358} 3359 3360define <8 x i32> @avx2_psll_d_var(<8 x i32> %v, <4 x i32> %a) { 3361; CHECK-LABEL: @avx2_psll_d_var( 3362; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> [[V:%.*]], <4 x i32> [[A:%.*]]) 3363; CHECK-NEXT: ret <8 x i32> [[TMP1]] 3364; 3365 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 3366 %2 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %1) 3367 ret <8 x i32> %2 3368} 3369 3370define <4 x i64> @avx2_psll_q_var(<4 x i64> %v, <2 x i64> %a) { 3371; CHECK-LABEL: @avx2_psll_q_var( 3372; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> [[V:%.*]], <2 x i64> [[A:%.*]]) 3373; CHECK-NEXT: ret <4 x i64> [[TMP1]] 3374; 3375 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 3376 %2 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %1) 3377 ret <4 x i64> %2 3378} 3379 3380define <32 x i16> @avx512_psll_w_512_var(<32 x i16> %v, <8 x i16> %a) { 3381; CHECK-LABEL: @avx512_psll_w_512_var( 3382; CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> [[V:%.*]], <8 x i16> [[A:%.*]]) 3383; CHECK-NEXT: ret <32 x i16> [[TMP1]] 3384; 3385 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 3386 %2 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> %1) 3387 ret <32 x i16> %2 3388} 3389 3390define <16 x i32> @avx512_psll_d_512_var(<16 x i32> %v, <4 x i32> %a) { 3391; CHECK-LABEL: @avx512_psll_d_512_var( 3392; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[V:%.*]], <4 x i32> [[A:%.*]]) 3393; CHECK-NEXT: ret <16 x i32> [[TMP1]] 3394; 3395 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 3396 %2 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> %1) 3397 ret <16 x i32> %2 3398} 3399 3400define <8 x i64> @avx512_psll_q_512_var(<8 x i64> %v, <2 x i64> %a) { 3401; CHECK-LABEL: @avx512_psll_q_512_var( 3402; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> [[V:%.*]], <2 x i64> [[A:%.*]]) 3403; CHECK-NEXT: ret <8 x i64> [[TMP1]] 3404; 3405 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 3406 %2 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> %1) 3407 ret <8 x i64> %2 3408} 3409 3410; 3411; Constant Folding 3412; 3413 3414define <8 x i16> @test_sse2_psra_w_0(<8 x i16> %A) { 3415; CHECK-LABEL: @test_sse2_psra_w_0( 3416; CHECK-NEXT: ret <8 x i16> [[A:%.*]] 3417; 3418 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %A, i32 0) 3419 %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>) 3420 %3 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %2, i32 0) 3421 ret <8 x i16> %3 3422} 3423 3424define <8 x i16> @test_sse2_psra_w_8() { 3425; CHECK-LABEL: @test_sse2_psra_w_8( 3426; CHECK-NEXT: ret <8 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16> 3427; 3428 %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <8 x i16> 3429 %2 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %1, i32 3) 3430 %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>) 3431 %4 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %3, i32 2) 3432 ret <8 x i16> %4 3433} 3434 3435define <4 x i32> @test_sse2_psra_d_0(<4 x i32> %A) { 3436; CHECK-LABEL: @test_sse2_psra_d_0( 3437; CHECK-NEXT: ret <4 x i32> [[A:%.*]] 3438; 3439 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %A, i32 0) 3440 %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>) 3441 %3 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 0) 3442 ret <4 x i32> %3 3443} 3444 3445define <4 x i32> @sse2_psra_d_8() { 3446; CHECK-LABEL: @sse2_psra_d_8( 3447; CHECK-NEXT: ret <4 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608> 3448; 3449 %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <4 x i32> 3450 %2 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 3) 3451 %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>) 3452 %4 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %3, i32 2) 3453 ret <4 x i32> %4 3454} 3455 3456define <16 x i16> @test_avx2_psra_w_0(<16 x i16> %A) { 3457; CHECK-LABEL: @test_avx2_psra_w_0( 3458; CHECK-NEXT: ret <16 x i16> [[A:%.*]] 3459; 3460 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %A, i32 0) 3461 %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>) 3462 %3 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %2, i32 0) 3463 ret <16 x i16> %3 3464} 3465 3466define <16 x i16> @test_avx2_psra_w_8(<16 x i16> %A) { 3467; CHECK-LABEL: @test_avx2_psra_w_8( 3468; CHECK-NEXT: ret <16 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16> 3469; 3470 %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <16 x i16> 3471 %2 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %1, i32 3) 3472 %3 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>) 3473 %4 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %3, i32 2) 3474 ret <16 x i16> %4 3475} 3476 3477define <8 x i32> @test_avx2_psra_d_0(<8 x i32> %A) { 3478; CHECK-LABEL: @test_avx2_psra_d_0( 3479; CHECK-NEXT: ret <8 x i32> [[A:%.*]] 3480; 3481 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %A, i32 0) 3482 %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>) 3483 %3 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %2, i32 0) 3484 ret <8 x i32> %3 3485} 3486 3487define <8 x i32> @test_avx2_psra_d_8() { 3488; CHECK-LABEL: @test_avx2_psra_d_8( 3489; CHECK-NEXT: ret <8 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608> 3490; 3491 %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <8 x i32> 3492 %2 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %1, i32 3) 3493 %3 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>) 3494 %4 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %3, i32 2) 3495 ret <8 x i32> %4 3496} 3497 3498define <32 x i16> @test_avx512_psra_w_512_0(<32 x i16> %A) { 3499; CHECK-LABEL: @test_avx512_psra_w_512_0( 3500; CHECK-NEXT: ret <32 x i16> [[A:%.*]] 3501; 3502 %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %A, i32 0) 3503 %2 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>) 3504 %3 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %2, i32 0) 3505 ret <32 x i16> %3 3506} 3507 3508define <32 x i16> @test_avx512_psra_w_512_8(<32 x i16> %A) { 3509; CHECK-LABEL: @test_avx512_psra_w_512_8( 3510; CHECK-NEXT: ret <32 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16> 3511; 3512 %1 = bitcast <8 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <32 x i16> 3513 %2 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %1, i32 3) 3514 %3 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>) 3515 %4 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %3, i32 2) 3516 ret <32 x i16> %4 3517} 3518 3519define <16 x i32> @test_avx512_psra_d_512_0(<16 x i32> %A) { 3520; CHECK-LABEL: @test_avx512_psra_d_512_0( 3521; CHECK-NEXT: ret <16 x i32> [[A:%.*]] 3522; 3523 %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %A, i32 0) 3524 %2 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>) 3525 %3 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %2, i32 0) 3526 ret <16 x i32> %3 3527} 3528 3529define <16 x i32> @test_avx512_psra_d_512_8() { 3530; CHECK-LABEL: @test_avx512_psra_d_512_8( 3531; CHECK-NEXT: ret <16 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608> 3532; 3533 %1 = bitcast <8 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <16 x i32> 3534 %2 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %1, i32 3) 3535 %3 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>) 3536 %4 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %3, i32 2) 3537 ret <16 x i32> %4 3538} 3539 3540; 3541; Old Tests 3542; 3543 3544define <2 x i64> @test_sse2_1() { 3545; CHECK-LABEL: @test_sse2_1( 3546; CHECK-NEXT: ret <2 x i64> <i64 72058418680037440, i64 144117112246370624> 3547; 3548 %S = bitcast i32 1 to i32 3549 %1 = zext i32 %S to i64 3550 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 3551 %3 = insertelement <2 x i64> %2, i64 0, i32 1 3552 %4 = bitcast <2 x i64> %3 to <8 x i16> 3553 %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4) 3554 %6 = bitcast <8 x i16> %5 to <4 x i32> 3555 %7 = bitcast <2 x i64> %3 to <4 x i32> 3556 %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7) 3557 %9 = bitcast <4 x i32> %8 to <2 x i64> 3558 %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3) 3559 %11 = bitcast <2 x i64> %10 to <8 x i16> 3560 %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S) 3561 %13 = bitcast <8 x i16> %12 to <4 x i32> 3562 %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S) 3563 %15 = bitcast <4 x i32> %14 to <2 x i64> 3564 %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S) 3565 ret <2 x i64> %16 3566} 3567 3568define <4 x i64> @test_avx2_1() { 3569; CHECK-LABEL: @test_avx2_1( 3570; CHECK-NEXT: ret <4 x i64> <i64 64, i64 128, i64 192, i64 256> 3571; 3572 %S = bitcast i32 1 to i32 3573 %1 = zext i32 %S to i64 3574 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 3575 %3 = insertelement <2 x i64> %2, i64 0, i32 1 3576 %4 = bitcast <2 x i64> %3 to <8 x i16> 3577 %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4) 3578 %6 = bitcast <16 x i16> %5 to <8 x i32> 3579 %7 = bitcast <2 x i64> %3 to <4 x i32> 3580 %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7) 3581 %9 = bitcast <8 x i32> %8 to <4 x i64> 3582 %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3) 3583 %11 = bitcast <4 x i64> %10 to <16 x i16> 3584 %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S) 3585 %13 = bitcast <16 x i16> %12 to <8 x i32> 3586 %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S) 3587 %15 = bitcast <8 x i32> %14 to <4 x i64> 3588 %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S) 3589 ret <4 x i64> %16 3590} 3591 3592define <2 x i64> @test_sse2_0() { 3593; CHECK-LABEL: @test_sse2_0( 3594; CHECK-NEXT: ret <2 x i64> zeroinitializer 3595; 3596 %S = bitcast i32 128 to i32 3597 %1 = zext i32 %S to i64 3598 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 3599 %3 = insertelement <2 x i64> %2, i64 0, i32 1 3600 %4 = bitcast <2 x i64> %3 to <8 x i16> 3601 %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4) 3602 %6 = bitcast <8 x i16> %5 to <4 x i32> 3603 %7 = bitcast <2 x i64> %3 to <4 x i32> 3604 %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7) 3605 %9 = bitcast <4 x i32> %8 to <2 x i64> 3606 %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3) 3607 %11 = bitcast <2 x i64> %10 to <8 x i16> 3608 %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S) 3609 %13 = bitcast <8 x i16> %12 to <4 x i32> 3610 %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S) 3611 %15 = bitcast <4 x i32> %14 to <2 x i64> 3612 %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S) 3613 ret <2 x i64> %16 3614} 3615 3616define <4 x i64> @test_avx2_0() { 3617; CHECK-LABEL: @test_avx2_0( 3618; CHECK-NEXT: ret <4 x i64> zeroinitializer 3619; 3620 %S = bitcast i32 128 to i32 3621 %1 = zext i32 %S to i64 3622 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 3623 %3 = insertelement <2 x i64> %2, i64 0, i32 1 3624 %4 = bitcast <2 x i64> %3 to <8 x i16> 3625 %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4) 3626 %6 = bitcast <16 x i16> %5 to <8 x i32> 3627 %7 = bitcast <2 x i64> %3 to <4 x i32> 3628 %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7) 3629 %9 = bitcast <8 x i32> %8 to <4 x i64> 3630 %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3) 3631 %11 = bitcast <4 x i64> %10 to <16 x i16> 3632 %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S) 3633 %13 = bitcast <16 x i16> %12 to <8 x i32> 3634 %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S) 3635 %15 = bitcast <8 x i32> %14 to <4 x i64> 3636 %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S) 3637 ret <4 x i64> %16 3638} 3639define <2 x i64> @test_sse2_psrl_1() { 3640; CHECK-LABEL: @test_sse2_psrl_1( 3641; CHECK-NEXT: ret <2 x i64> <i64 562954248421376, i64 9007267974742020> 3642; 3643 %S = bitcast i32 1 to i32 3644 %1 = zext i32 %S to i64 3645 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 3646 %3 = insertelement <2 x i64> %2, i64 0, i32 1 3647 %4 = bitcast <2 x i64> %3 to <8 x i16> 3648 %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 16, i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048>, <8 x i16> %4) 3649 %6 = bitcast <8 x i16> %5 to <4 x i32> 3650 %7 = bitcast <2 x i64> %3 to <4 x i32> 3651 %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7) 3652 %9 = bitcast <4 x i32> %8 to <2 x i64> 3653 %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3) 3654 %11 = bitcast <2 x i64> %10 to <8 x i16> 3655 %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S) 3656 %13 = bitcast <8 x i16> %12 to <4 x i32> 3657 %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S) 3658 %15 = bitcast <4 x i32> %14 to <2 x i64> 3659 %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S) 3660 ret <2 x i64> %16 3661} 3662 3663define <4 x i64> @test_avx2_psrl_1() { 3664; CHECK-LABEL: @test_avx2_psrl_1( 3665; CHECK-NEXT: ret <4 x i64> <i64 16, i64 32, i64 64, i64 128> 3666; 3667 %S = bitcast i32 1 to i32 3668 %1 = zext i32 %S to i64 3669 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 3670 %3 = insertelement <2 x i64> %2, i64 0, i32 1 3671 %4 = bitcast <2 x i64> %3 to <8 x i16> 3672 %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4) 3673 %6 = bitcast <16 x i16> %5 to <8 x i32> 3674 %7 = bitcast <2 x i64> %3 to <4 x i32> 3675 %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7) 3676 %9 = bitcast <8 x i32> %8 to <4 x i64> 3677 %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3) 3678 %11 = bitcast <4 x i64> %10 to <16 x i16> 3679 %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S) 3680 %13 = bitcast <16 x i16> %12 to <8 x i32> 3681 %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S) 3682 %15 = bitcast <8 x i32> %14 to <4 x i64> 3683 %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S) 3684 ret <4 x i64> %16 3685} 3686 3687define <2 x i64> @test_sse2_psrl_0() { 3688; CHECK-LABEL: @test_sse2_psrl_0( 3689; CHECK-NEXT: ret <2 x i64> zeroinitializer 3690; 3691 %S = bitcast i32 128 to i32 3692 %1 = zext i32 %S to i64 3693 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 3694 %3 = insertelement <2 x i64> %2, i64 0, i32 1 3695 %4 = bitcast <2 x i64> %3 to <8 x i16> 3696 %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048, i16 4096>, <8 x i16> %4) 3697 %6 = bitcast <8 x i16> %5 to <4 x i32> 3698 %7 = bitcast <2 x i64> %3 to <4 x i32> 3699 %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7) 3700 %9 = bitcast <4 x i32> %8 to <2 x i64> 3701 %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3) 3702 %11 = bitcast <2 x i64> %10 to <8 x i16> 3703 %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S) 3704 %13 = bitcast <8 x i16> %12 to <4 x i32> 3705 %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S) 3706 %15 = bitcast <4 x i32> %14 to <2 x i64> 3707 %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S) 3708 ret <2 x i64> %16 3709} 3710 3711define <4 x i64> @test_avx2_psrl_0() { 3712; CHECK-LABEL: @test_avx2_psrl_0( 3713; CHECK-NEXT: ret <4 x i64> zeroinitializer 3714; 3715 %S = bitcast i32 128 to i32 3716 %1 = zext i32 %S to i64 3717 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 3718 %3 = insertelement <2 x i64> %2, i64 0, i32 1 3719 %4 = bitcast <2 x i64> %3 to <8 x i16> 3720 %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4) 3721 %6 = bitcast <16 x i16> %5 to <8 x i32> 3722 %7 = bitcast <2 x i64> %3 to <4 x i32> 3723 %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7) 3724 %9 = bitcast <8 x i32> %8 to <4 x i64> 3725 %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3) 3726 %11 = bitcast <4 x i64> %10 to <16 x i16> 3727 %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S) 3728 %13 = bitcast <16 x i16> %12 to <8 x i32> 3729 %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S) 3730 %15 = bitcast <8 x i32> %14 to <4 x i64> 3731 %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S) 3732 ret <4 x i64> %16 3733} 3734 3735declare <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64>, i32) #1 3736declare <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32>, i32) #1 3737declare <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16>, i32) #1 3738declare <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64>, <2 x i64>) #1 3739declare <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32>, <4 x i32>) #1 3740declare <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16>, <8 x i16>) #1 3741declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) #1 3742declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) #1 3743declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) #1 3744declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) #1 3745declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) #1 3746declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) #1 3747declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) #1 3748declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) #1 3749declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) #1 3750declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) #1 3751declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) #1 3752declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) #1 3753 3754declare <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64>, i32) #1 3755declare <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32>, i32) #1 3756declare <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16>, i32) #1 3757declare <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64>, <2 x i64>) #1 3758declare <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32>, <4 x i32>) #1 3759declare <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16>, <8 x i16>) #1 3760declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) #1 3761declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) #1 3762declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) #1 3763declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) #1 3764declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) #1 3765declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) #1 3766declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) #1 3767declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) #1 3768declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) #1 3769declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) #1 3770declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) #1 3771declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) #1 3772 3773declare <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64>, i32) #1 3774declare <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32>, i32) #1 3775declare <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16>, i32) #1 3776declare <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64>, <2 x i64>) #1 3777declare <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32>, <4 x i32>) #1 3778declare <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16>, <8 x i16>) #1 3779declare <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64>, i32) #1 3780declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) #1 3781declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) #1 3782declare <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64>, <2 x i64>) #1 3783declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) #1 3784declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) #1 3785declare <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64>, i32) #1 3786declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) #1 3787declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) #1 3788declare <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64>, <2 x i64>) #1 3789declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) #1 3790declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) #1 3791 3792declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) #1 3793declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) #1 3794declare <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32>, <16 x i32>) #1 3795declare <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64>, <2 x i64>) #1 3796declare <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64>, <4 x i64>) #1 3797declare <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64>, <8 x i64>) #1 3798 3799declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) #1 3800declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) #1 3801declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) #1 3802declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) #1 3803declare <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32>, <16 x i32>) #1 3804declare <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64>, <8 x i64>) #1 3805 3806declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) #1 3807declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) #1 3808declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) #1 3809declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) #1 3810declare <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32>, <16 x i32>) #1 3811declare <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64>, <8 x i64>) #1 3812 3813declare <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16>, <8 x i16>) #1 3814declare <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16>, <16 x i16>) #1 3815declare <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16>, <32 x i16>) #1 3816declare <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16>, <8 x i16>) #1 3817declare <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16>, <16 x i16>) #1 3818declare <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16>, <32 x i16>) #1 3819declare <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16>, <8 x i16>) #1 3820declare <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16>, <16 x i16>) #1 3821declare <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16>, <32 x i16>) #1 3822 3823attributes #1 = { nounwind readnone } 3824