1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BWVL 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX,AVX512,AVX512DQ 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512DQVL 10 11; 12; vXi64 13; 14 15define i64 @test_v2i64(<2 x i64> %a0) { 16; SSE-LABEL: test_v2i64: 17; SSE: # %bb.0: 18; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 19; SSE-NEXT: movdqa %xmm0, %xmm2 20; SSE-NEXT: psrlq $32, %xmm2 21; SSE-NEXT: pmuludq %xmm1, %xmm2 22; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] 23; SSE-NEXT: pmuludq %xmm0, %xmm3 24; SSE-NEXT: paddq %xmm2, %xmm3 25; SSE-NEXT: psllq $32, %xmm3 26; SSE-NEXT: pmuludq %xmm1, %xmm0 27; SSE-NEXT: paddq %xmm3, %xmm0 28; SSE-NEXT: movq %xmm0, %rax 29; SSE-NEXT: retq 30; 31; AVX1OR2-LABEL: test_v2i64: 32; AVX1OR2: # %bb.0: 33; AVX1OR2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 34; AVX1OR2-NEXT: vpsrlq $32, %xmm0, %xmm2 35; AVX1OR2-NEXT: vpmuludq %xmm1, %xmm2, %xmm2 36; AVX1OR2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] 37; AVX1OR2-NEXT: vpmuludq %xmm3, %xmm0, %xmm3 38; AVX1OR2-NEXT: vpaddq %xmm2, %xmm3, %xmm2 39; AVX1OR2-NEXT: vpsllq $32, %xmm2, %xmm2 40; AVX1OR2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 41; AVX1OR2-NEXT: vpaddq %xmm2, %xmm0, %xmm0 42; AVX1OR2-NEXT: vmovq %xmm0, %rax 43; AVX1OR2-NEXT: retq 44; 45; AVX512BW-LABEL: test_v2i64: 46; AVX512BW: # %bb.0: 47; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 48; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2 49; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2 50; AVX512BW-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] 51; AVX512BW-NEXT: vpmuludq %xmm3, %xmm0, %xmm3 52; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2 53; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2 54; AVX512BW-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 55; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0 56; AVX512BW-NEXT: vmovq %xmm0, %rax 57; AVX512BW-NEXT: retq 58; 59; AVX512BWVL-LABEL: test_v2i64: 60; AVX512BWVL: # %bb.0: 61; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 62; AVX512BWVL-NEXT: vpsrlq $32, %xmm0, %xmm2 63; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm2, %xmm2 64; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] 65; AVX512BWVL-NEXT: vpmuludq %xmm3, %xmm0, %xmm3 66; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm3, %xmm2 67; AVX512BWVL-NEXT: vpsllq $32, %xmm2, %xmm2 68; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 69; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0 70; AVX512BWVL-NEXT: vmovq %xmm0, %rax 71; AVX512BWVL-NEXT: retq 72; 73; AVX512DQ-LABEL: test_v2i64: 74; AVX512DQ: # %bb.0: 75; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 76; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 77; AVX512DQ-NEXT: vpmullq %zmm1, %zmm0, %zmm0 78; AVX512DQ-NEXT: vmovq %xmm0, %rax 79; AVX512DQ-NEXT: vzeroupper 80; AVX512DQ-NEXT: retq 81; 82; AVX512DQVL-LABEL: test_v2i64: 83; AVX512DQVL: # %bb.0: 84; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 85; AVX512DQVL-NEXT: vpmullq %xmm1, %xmm0, %xmm0 86; AVX512DQVL-NEXT: vmovq %xmm0, %rax 87; AVX512DQVL-NEXT: retq 88 %1 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> %a0) 89 ret i64 %1 90} 91 92define i64 @test_v4i64(<4 x i64> %a0) { 93; SSE-LABEL: test_v4i64: 94; SSE: # %bb.0: 95; SSE-NEXT: movdqa %xmm0, %xmm2 96; SSE-NEXT: psrlq $32, %xmm2 97; SSE-NEXT: pmuludq %xmm1, %xmm2 98; SSE-NEXT: movdqa %xmm1, %xmm3 99; SSE-NEXT: psrlq $32, %xmm3 100; SSE-NEXT: pmuludq %xmm0, %xmm3 101; SSE-NEXT: paddq %xmm2, %xmm3 102; SSE-NEXT: psllq $32, %xmm3 103; SSE-NEXT: pmuludq %xmm1, %xmm0 104; SSE-NEXT: paddq %xmm3, %xmm0 105; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 106; SSE-NEXT: movdqa %xmm0, %xmm2 107; SSE-NEXT: psrlq $32, %xmm2 108; SSE-NEXT: pmuludq %xmm1, %xmm2 109; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] 110; SSE-NEXT: pmuludq %xmm0, %xmm3 111; SSE-NEXT: paddq %xmm2, %xmm3 112; SSE-NEXT: psllq $32, %xmm3 113; SSE-NEXT: pmuludq %xmm1, %xmm0 114; SSE-NEXT: paddq %xmm3, %xmm0 115; SSE-NEXT: movq %xmm0, %rax 116; SSE-NEXT: retq 117; 118; AVX1-LABEL: test_v4i64: 119; AVX1: # %bb.0: 120; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 121; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm2 122; AVX1-NEXT: vpmuludq %xmm1, %xmm2, %xmm2 123; AVX1-NEXT: vpsrlq $32, %xmm1, %xmm3 124; AVX1-NEXT: vpmuludq %xmm3, %xmm0, %xmm3 125; AVX1-NEXT: vpaddq %xmm2, %xmm3, %xmm2 126; AVX1-NEXT: vpsllq $32, %xmm2, %xmm2 127; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 128; AVX1-NEXT: vpaddq %xmm2, %xmm0, %xmm0 129; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 130; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm2 131; AVX1-NEXT: vpmuludq %xmm1, %xmm2, %xmm2 132; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] 133; AVX1-NEXT: vpmuludq %xmm3, %xmm0, %xmm3 134; AVX1-NEXT: vpaddq %xmm2, %xmm3, %xmm2 135; AVX1-NEXT: vpsllq $32, %xmm2, %xmm2 136; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 137; AVX1-NEXT: vpaddq %xmm2, %xmm0, %xmm0 138; AVX1-NEXT: vmovq %xmm0, %rax 139; AVX1-NEXT: vzeroupper 140; AVX1-NEXT: retq 141; 142; AVX2-LABEL: test_v4i64: 143; AVX2: # %bb.0: 144; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 145; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2 146; AVX2-NEXT: vpmuludq %xmm1, %xmm2, %xmm2 147; AVX2-NEXT: vpsrlq $32, %xmm1, %xmm3 148; AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm3 149; AVX2-NEXT: vpaddq %xmm2, %xmm3, %xmm2 150; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2 151; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 152; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0 153; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 154; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2 155; AVX2-NEXT: vpmuludq %xmm1, %xmm2, %xmm2 156; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] 157; AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm3 158; AVX2-NEXT: vpaddq %xmm2, %xmm3, %xmm2 159; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2 160; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 161; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0 162; AVX2-NEXT: vmovq %xmm0, %rax 163; AVX2-NEXT: vzeroupper 164; AVX2-NEXT: retq 165; 166; AVX512BW-LABEL: test_v4i64: 167; AVX512BW: # %bb.0: 168; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1 169; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2 170; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2 171; AVX512BW-NEXT: vpsrlq $32, %xmm1, %xmm3 172; AVX512BW-NEXT: vpmuludq %xmm3, %xmm0, %xmm3 173; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2 174; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2 175; AVX512BW-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 176; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0 177; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 178; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2 179; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2 180; AVX512BW-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] 181; AVX512BW-NEXT: vpmuludq %xmm3, %xmm0, %xmm3 182; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2 183; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2 184; AVX512BW-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 185; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0 186; AVX512BW-NEXT: vmovq %xmm0, %rax 187; AVX512BW-NEXT: vzeroupper 188; AVX512BW-NEXT: retq 189; 190; AVX512BWVL-LABEL: test_v4i64: 191; AVX512BWVL: # %bb.0: 192; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1 193; AVX512BWVL-NEXT: vpsrlq $32, %xmm0, %xmm2 194; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm2, %xmm2 195; AVX512BWVL-NEXT: vpsrlq $32, %xmm1, %xmm3 196; AVX512BWVL-NEXT: vpmuludq %xmm3, %xmm0, %xmm3 197; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm3, %xmm2 198; AVX512BWVL-NEXT: vpsllq $32, %xmm2, %xmm2 199; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 200; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0 201; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 202; AVX512BWVL-NEXT: vpsrlq $32, %xmm0, %xmm2 203; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm2, %xmm2 204; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] 205; AVX512BWVL-NEXT: vpmuludq %xmm3, %xmm0, %xmm3 206; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm3, %xmm2 207; AVX512BWVL-NEXT: vpsllq $32, %xmm2, %xmm2 208; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 209; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0 210; AVX512BWVL-NEXT: vmovq %xmm0, %rax 211; AVX512BWVL-NEXT: vzeroupper 212; AVX512BWVL-NEXT: retq 213; 214; AVX512DQ-LABEL: test_v4i64: 215; AVX512DQ: # %bb.0: 216; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 217; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm1 218; AVX512DQ-NEXT: vpmullq %zmm1, %zmm0, %zmm0 219; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 220; AVX512DQ-NEXT: vpmullq %zmm1, %zmm0, %zmm0 221; AVX512DQ-NEXT: vmovq %xmm0, %rax 222; AVX512DQ-NEXT: vzeroupper 223; AVX512DQ-NEXT: retq 224; 225; AVX512DQVL-LABEL: test_v4i64: 226; AVX512DQVL: # %bb.0: 227; AVX512DQVL-NEXT: vextracti128 $1, %ymm0, %xmm1 228; AVX512DQVL-NEXT: vpmullq %xmm1, %xmm0, %xmm0 229; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 230; AVX512DQVL-NEXT: vpmullq %xmm1, %xmm0, %xmm0 231; AVX512DQVL-NEXT: vmovq %xmm0, %rax 232; AVX512DQVL-NEXT: vzeroupper 233; AVX512DQVL-NEXT: retq 234 %1 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> %a0) 235 ret i64 %1 236} 237 238define i64 @test_v8i64(<8 x i64> %a0) { 239; SSE-LABEL: test_v8i64: 240; SSE: # %bb.0: 241; SSE-NEXT: movdqa %xmm1, %xmm4 242; SSE-NEXT: psrlq $32, %xmm4 243; SSE-NEXT: pmuludq %xmm3, %xmm4 244; SSE-NEXT: movdqa %xmm3, %xmm5 245; SSE-NEXT: psrlq $32, %xmm5 246; SSE-NEXT: pmuludq %xmm1, %xmm5 247; SSE-NEXT: paddq %xmm4, %xmm5 248; SSE-NEXT: psllq $32, %xmm5 249; SSE-NEXT: pmuludq %xmm3, %xmm1 250; SSE-NEXT: paddq %xmm5, %xmm1 251; SSE-NEXT: movdqa %xmm0, %xmm3 252; SSE-NEXT: psrlq $32, %xmm3 253; SSE-NEXT: pmuludq %xmm2, %xmm3 254; SSE-NEXT: movdqa %xmm2, %xmm4 255; SSE-NEXT: psrlq $32, %xmm4 256; SSE-NEXT: pmuludq %xmm0, %xmm4 257; SSE-NEXT: paddq %xmm3, %xmm4 258; SSE-NEXT: psllq $32, %xmm4 259; SSE-NEXT: pmuludq %xmm2, %xmm0 260; SSE-NEXT: paddq %xmm4, %xmm0 261; SSE-NEXT: movdqa %xmm0, %xmm2 262; SSE-NEXT: psrlq $32, %xmm2 263; SSE-NEXT: pmuludq %xmm1, %xmm2 264; SSE-NEXT: movdqa %xmm1, %xmm3 265; SSE-NEXT: psrlq $32, %xmm3 266; SSE-NEXT: pmuludq %xmm0, %xmm3 267; SSE-NEXT: paddq %xmm2, %xmm3 268; SSE-NEXT: psllq $32, %xmm3 269; SSE-NEXT: pmuludq %xmm1, %xmm0 270; SSE-NEXT: paddq %xmm3, %xmm0 271; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 272; SSE-NEXT: movdqa %xmm0, %xmm2 273; SSE-NEXT: psrlq $32, %xmm2 274; SSE-NEXT: pmuludq %xmm1, %xmm2 275; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] 276; SSE-NEXT: pmuludq %xmm0, %xmm3 277; SSE-NEXT: paddq %xmm2, %xmm3 278; SSE-NEXT: psllq $32, %xmm3 279; SSE-NEXT: pmuludq %xmm1, %xmm0 280; SSE-NEXT: paddq %xmm3, %xmm0 281; SSE-NEXT: movq %xmm0, %rax 282; SSE-NEXT: retq 283; 284; AVX1-LABEL: test_v8i64: 285; AVX1: # %bb.0: 286; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 287; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 288; AVX1-NEXT: vpsrlq $32, %xmm3, %xmm4 289; AVX1-NEXT: vpmuludq %xmm2, %xmm4, %xmm4 290; AVX1-NEXT: vpsrlq $32, %xmm2, %xmm5 291; AVX1-NEXT: vpmuludq %xmm5, %xmm3, %xmm5 292; AVX1-NEXT: vpaddq %xmm4, %xmm5, %xmm4 293; AVX1-NEXT: vpsllq $32, %xmm4, %xmm4 294; AVX1-NEXT: vpmuludq %xmm2, %xmm3, %xmm2 295; AVX1-NEXT: vpaddq %xmm4, %xmm2, %xmm2 296; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm3 297; AVX1-NEXT: vpmuludq %xmm1, %xmm3, %xmm3 298; AVX1-NEXT: vpsrlq $32, %xmm1, %xmm4 299; AVX1-NEXT: vpmuludq %xmm4, %xmm0, %xmm4 300; AVX1-NEXT: vpaddq %xmm3, %xmm4, %xmm3 301; AVX1-NEXT: vpsllq $32, %xmm3, %xmm3 302; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 303; AVX1-NEXT: vpaddq %xmm3, %xmm0, %xmm0 304; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm1 305; AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 306; AVX1-NEXT: vpsrlq $32, %xmm2, %xmm3 307; AVX1-NEXT: vpmuludq %xmm3, %xmm0, %xmm3 308; AVX1-NEXT: vpaddq %xmm1, %xmm3, %xmm1 309; AVX1-NEXT: vpsllq $32, %xmm1, %xmm1 310; AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm0 311; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 312; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 313; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm2 314; AVX1-NEXT: vpmuludq %xmm1, %xmm2, %xmm2 315; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] 316; AVX1-NEXT: vpmuludq %xmm3, %xmm0, %xmm3 317; AVX1-NEXT: vpaddq %xmm2, %xmm3, %xmm2 318; AVX1-NEXT: vpsllq $32, %xmm2, %xmm2 319; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 320; AVX1-NEXT: vpaddq %xmm2, %xmm0, %xmm0 321; AVX1-NEXT: vmovq %xmm0, %rax 322; AVX1-NEXT: vzeroupper 323; AVX1-NEXT: retq 324; 325; AVX2-LABEL: test_v8i64: 326; AVX2: # %bb.0: 327; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm2 328; AVX2-NEXT: vpmuludq %ymm1, %ymm2, %ymm2 329; AVX2-NEXT: vpsrlq $32, %ymm1, %ymm3 330; AVX2-NEXT: vpmuludq %ymm3, %ymm0, %ymm3 331; AVX2-NEXT: vpaddq %ymm2, %ymm3, %ymm2 332; AVX2-NEXT: vpsllq $32, %ymm2, %ymm2 333; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 334; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0 335; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 336; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2 337; AVX2-NEXT: vpmuludq %xmm1, %xmm2, %xmm2 338; AVX2-NEXT: vpsrlq $32, %xmm1, %xmm3 339; AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm3 340; AVX2-NEXT: vpaddq %xmm2, %xmm3, %xmm2 341; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2 342; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 343; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0 344; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 345; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2 346; AVX2-NEXT: vpmuludq %xmm1, %xmm2, %xmm2 347; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] 348; AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm3 349; AVX2-NEXT: vpaddq %xmm2, %xmm3, %xmm2 350; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2 351; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 352; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0 353; AVX2-NEXT: vmovq %xmm0, %rax 354; AVX2-NEXT: vzeroupper 355; AVX2-NEXT: retq 356; 357; AVX512BW-LABEL: test_v8i64: 358; AVX512BW: # %bb.0: 359; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1 360; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm2 361; AVX512BW-NEXT: vpmuludq %zmm1, %zmm2, %zmm2 362; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm3 363; AVX512BW-NEXT: vpmuludq %zmm3, %zmm0, %zmm3 364; AVX512BW-NEXT: vpaddq %zmm2, %zmm3, %zmm2 365; AVX512BW-NEXT: vpsllq $32, %zmm2, %zmm2 366; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 367; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0 368; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1 369; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2 370; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2 371; AVX512BW-NEXT: vpsrlq $32, %xmm1, %xmm3 372; AVX512BW-NEXT: vpmuludq %xmm3, %xmm0, %xmm3 373; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2 374; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2 375; AVX512BW-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 376; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0 377; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 378; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2 379; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2 380; AVX512BW-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] 381; AVX512BW-NEXT: vpmuludq %xmm3, %xmm0, %xmm3 382; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2 383; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2 384; AVX512BW-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 385; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0 386; AVX512BW-NEXT: vmovq %xmm0, %rax 387; AVX512BW-NEXT: vzeroupper 388; AVX512BW-NEXT: retq 389; 390; AVX512BWVL-LABEL: test_v8i64: 391; AVX512BWVL: # %bb.0: 392; AVX512BWVL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 393; AVX512BWVL-NEXT: vpsrlq $32, %zmm0, %zmm2 394; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm2, %zmm2 395; AVX512BWVL-NEXT: vpsrlq $32, %zmm1, %zmm3 396; AVX512BWVL-NEXT: vpmuludq %zmm3, %zmm0, %zmm3 397; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm3, %zmm2 398; AVX512BWVL-NEXT: vpsllq $32, %zmm2, %zmm2 399; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 400; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm0, %zmm0 401; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1 402; AVX512BWVL-NEXT: vpsrlq $32, %xmm0, %xmm2 403; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm2, %xmm2 404; AVX512BWVL-NEXT: vpsrlq $32, %xmm1, %xmm3 405; AVX512BWVL-NEXT: vpmuludq %xmm3, %xmm0, %xmm3 406; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm3, %xmm2 407; AVX512BWVL-NEXT: vpsllq $32, %xmm2, %xmm2 408; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 409; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0 410; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 411; AVX512BWVL-NEXT: vpsrlq $32, %xmm0, %xmm2 412; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm2, %xmm2 413; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] 414; AVX512BWVL-NEXT: vpmuludq %xmm3, %xmm0, %xmm3 415; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm3, %xmm2 416; AVX512BWVL-NEXT: vpsllq $32, %xmm2, %xmm2 417; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 418; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0 419; AVX512BWVL-NEXT: vmovq %xmm0, %rax 420; AVX512BWVL-NEXT: vzeroupper 421; AVX512BWVL-NEXT: retq 422; 423; AVX512DQ-LABEL: test_v8i64: 424; AVX512DQ: # %bb.0: 425; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1 426; AVX512DQ-NEXT: vpmullq %zmm1, %zmm0, %zmm0 427; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm1 428; AVX512DQ-NEXT: vpmullq %zmm1, %zmm0, %zmm0 429; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 430; AVX512DQ-NEXT: vpmullq %zmm1, %zmm0, %zmm0 431; AVX512DQ-NEXT: vmovq %xmm0, %rax 432; AVX512DQ-NEXT: vzeroupper 433; AVX512DQ-NEXT: retq 434; 435; AVX512DQVL-LABEL: test_v8i64: 436; AVX512DQVL: # %bb.0: 437; AVX512DQVL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 438; AVX512DQVL-NEXT: vpmullq %zmm1, %zmm0, %zmm0 439; AVX512DQVL-NEXT: vextracti128 $1, %ymm0, %xmm1 440; AVX512DQVL-NEXT: vpmullq %xmm1, %xmm0, %xmm0 441; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 442; AVX512DQVL-NEXT: vpmullq %xmm1, %xmm0, %xmm0 443; AVX512DQVL-NEXT: vmovq %xmm0, %rax 444; AVX512DQVL-NEXT: vzeroupper 445; AVX512DQVL-NEXT: retq 446 %1 = call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> %a0) 447 ret i64 %1 448} 449 450define i64 @test_v16i64(<16 x i64> %a0) { 451; SSE-LABEL: test_v16i64: 452; SSE: # %bb.0: 453; SSE-NEXT: movdqa %xmm2, %xmm8 454; SSE-NEXT: psrlq $32, %xmm8 455; SSE-NEXT: pmuludq %xmm6, %xmm8 456; SSE-NEXT: movdqa %xmm6, %xmm9 457; SSE-NEXT: psrlq $32, %xmm9 458; SSE-NEXT: pmuludq %xmm2, %xmm9 459; SSE-NEXT: paddq %xmm8, %xmm9 460; SSE-NEXT: psllq $32, %xmm9 461; SSE-NEXT: pmuludq %xmm6, %xmm2 462; SSE-NEXT: paddq %xmm9, %xmm2 463; SSE-NEXT: movdqa %xmm0, %xmm6 464; SSE-NEXT: psrlq $32, %xmm6 465; SSE-NEXT: pmuludq %xmm4, %xmm6 466; SSE-NEXT: movdqa %xmm4, %xmm8 467; SSE-NEXT: psrlq $32, %xmm8 468; SSE-NEXT: pmuludq %xmm0, %xmm8 469; SSE-NEXT: paddq %xmm6, %xmm8 470; SSE-NEXT: psllq $32, %xmm8 471; SSE-NEXT: pmuludq %xmm4, %xmm0 472; SSE-NEXT: paddq %xmm8, %xmm0 473; SSE-NEXT: movdqa %xmm3, %xmm4 474; SSE-NEXT: psrlq $32, %xmm4 475; SSE-NEXT: pmuludq %xmm7, %xmm4 476; SSE-NEXT: movdqa %xmm7, %xmm6 477; SSE-NEXT: psrlq $32, %xmm6 478; SSE-NEXT: pmuludq %xmm3, %xmm6 479; SSE-NEXT: paddq %xmm4, %xmm6 480; SSE-NEXT: psllq $32, %xmm6 481; SSE-NEXT: pmuludq %xmm7, %xmm3 482; SSE-NEXT: paddq %xmm6, %xmm3 483; SSE-NEXT: movdqa %xmm1, %xmm4 484; SSE-NEXT: psrlq $32, %xmm4 485; SSE-NEXT: pmuludq %xmm5, %xmm4 486; SSE-NEXT: movdqa %xmm5, %xmm6 487; SSE-NEXT: psrlq $32, %xmm6 488; SSE-NEXT: pmuludq %xmm1, %xmm6 489; SSE-NEXT: paddq %xmm4, %xmm6 490; SSE-NEXT: psllq $32, %xmm6 491; SSE-NEXT: pmuludq %xmm5, %xmm1 492; SSE-NEXT: paddq %xmm6, %xmm1 493; SSE-NEXT: movdqa %xmm1, %xmm4 494; SSE-NEXT: psrlq $32, %xmm4 495; SSE-NEXT: pmuludq %xmm3, %xmm4 496; SSE-NEXT: movdqa %xmm3, %xmm5 497; SSE-NEXT: psrlq $32, %xmm5 498; SSE-NEXT: pmuludq %xmm1, %xmm5 499; SSE-NEXT: paddq %xmm4, %xmm5 500; SSE-NEXT: psllq $32, %xmm5 501; SSE-NEXT: pmuludq %xmm3, %xmm1 502; SSE-NEXT: paddq %xmm5, %xmm1 503; SSE-NEXT: movdqa %xmm0, %xmm3 504; SSE-NEXT: psrlq $32, %xmm3 505; SSE-NEXT: pmuludq %xmm2, %xmm3 506; SSE-NEXT: movdqa %xmm2, %xmm4 507; SSE-NEXT: psrlq $32, %xmm4 508; SSE-NEXT: pmuludq %xmm0, %xmm4 509; SSE-NEXT: paddq %xmm3, %xmm4 510; SSE-NEXT: psllq $32, %xmm4 511; SSE-NEXT: pmuludq %xmm2, %xmm0 512; SSE-NEXT: paddq %xmm4, %xmm0 513; SSE-NEXT: movdqa %xmm0, %xmm2 514; SSE-NEXT: psrlq $32, %xmm2 515; SSE-NEXT: pmuludq %xmm1, %xmm2 516; SSE-NEXT: movdqa %xmm1, %xmm3 517; SSE-NEXT: psrlq $32, %xmm3 518; SSE-NEXT: pmuludq %xmm0, %xmm3 519; SSE-NEXT: paddq %xmm2, %xmm3 520; SSE-NEXT: psllq $32, %xmm3 521; SSE-NEXT: pmuludq %xmm1, %xmm0 522; SSE-NEXT: paddq %xmm3, %xmm0 523; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 524; SSE-NEXT: movdqa %xmm0, %xmm2 525; SSE-NEXT: psrlq $32, %xmm2 526; SSE-NEXT: pmuludq %xmm1, %xmm2 527; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] 528; SSE-NEXT: pmuludq %xmm0, %xmm3 529; SSE-NEXT: paddq %xmm2, %xmm3 530; SSE-NEXT: psllq $32, %xmm3 531; SSE-NEXT: pmuludq %xmm1, %xmm0 532; SSE-NEXT: paddq %xmm3, %xmm0 533; SSE-NEXT: movq %xmm0, %rax 534; SSE-NEXT: retq 535; 536; AVX1-LABEL: test_v16i64: 537; AVX1: # %bb.0: 538; AVX1-NEXT: vpsrlq $32, %xmm1, %xmm4 539; AVX1-NEXT: vpmuludq %xmm3, %xmm4, %xmm4 540; AVX1-NEXT: vpsrlq $32, %xmm3, %xmm5 541; AVX1-NEXT: vpmuludq %xmm5, %xmm1, %xmm5 542; AVX1-NEXT: vpaddq %xmm4, %xmm5, %xmm4 543; AVX1-NEXT: vpsllq $32, %xmm4, %xmm4 544; AVX1-NEXT: vpmuludq %xmm3, %xmm1, %xmm5 545; AVX1-NEXT: vpaddq %xmm4, %xmm5, %xmm4 546; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm5 547; AVX1-NEXT: vpmuludq %xmm2, %xmm5, %xmm5 548; AVX1-NEXT: vpsrlq $32, %xmm2, %xmm6 549; AVX1-NEXT: vpmuludq %xmm6, %xmm0, %xmm6 550; AVX1-NEXT: vpaddq %xmm5, %xmm6, %xmm5 551; AVX1-NEXT: vpsllq $32, %xmm5, %xmm5 552; AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm6 553; AVX1-NEXT: vpaddq %xmm5, %xmm6, %xmm5 554; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 555; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 556; AVX1-NEXT: vpsrlq $32, %xmm1, %xmm6 557; AVX1-NEXT: vpmuludq %xmm3, %xmm6, %xmm6 558; AVX1-NEXT: vpsrlq $32, %xmm3, %xmm7 559; AVX1-NEXT: vpmuludq %xmm7, %xmm1, %xmm7 560; AVX1-NEXT: vpaddq %xmm6, %xmm7, %xmm6 561; AVX1-NEXT: vpsllq $32, %xmm6, %xmm6 562; AVX1-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 563; AVX1-NEXT: vpaddq %xmm6, %xmm1, %xmm1 564; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 565; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 566; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm3 567; AVX1-NEXT: vpmuludq %xmm2, %xmm3, %xmm3 568; AVX1-NEXT: vpsrlq $32, %xmm2, %xmm6 569; AVX1-NEXT: vpmuludq %xmm6, %xmm0, %xmm6 570; AVX1-NEXT: vpaddq %xmm3, %xmm6, %xmm3 571; AVX1-NEXT: vpsllq $32, %xmm3, %xmm3 572; AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm0 573; AVX1-NEXT: vpaddq %xmm3, %xmm0, %xmm0 574; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm2 575; AVX1-NEXT: vpmuludq %xmm1, %xmm2, %xmm2 576; AVX1-NEXT: vpsrlq $32, %xmm1, %xmm3 577; AVX1-NEXT: vpmuludq %xmm3, %xmm0, %xmm3 578; AVX1-NEXT: vpaddq %xmm2, %xmm3, %xmm2 579; AVX1-NEXT: vpsllq $32, %xmm2, %xmm2 580; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 581; AVX1-NEXT: vpaddq %xmm2, %xmm0, %xmm0 582; AVX1-NEXT: vpsrlq $32, %xmm5, %xmm1 583; AVX1-NEXT: vpmuludq %xmm4, %xmm1, %xmm1 584; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm2 585; AVX1-NEXT: vpmuludq %xmm2, %xmm5, %xmm2 586; AVX1-NEXT: vpaddq %xmm1, %xmm2, %xmm1 587; AVX1-NEXT: vpsllq $32, %xmm1, %xmm1 588; AVX1-NEXT: vpmuludq %xmm4, %xmm5, %xmm2 589; AVX1-NEXT: vpaddq %xmm1, %xmm2, %xmm1 590; AVX1-NEXT: vpsrlq $32, %xmm1, %xmm2 591; AVX1-NEXT: vpmuludq %xmm0, %xmm2, %xmm2 592; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm3 593; AVX1-NEXT: vpmuludq %xmm3, %xmm1, %xmm3 594; AVX1-NEXT: vpaddq %xmm2, %xmm3, %xmm2 595; AVX1-NEXT: vpsllq $32, %xmm2, %xmm2 596; AVX1-NEXT: vpmuludq %xmm0, %xmm1, %xmm0 597; AVX1-NEXT: vpaddq %xmm2, %xmm0, %xmm0 598; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 599; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm2 600; AVX1-NEXT: vpmuludq %xmm1, %xmm2, %xmm2 601; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] 602; AVX1-NEXT: vpmuludq %xmm3, %xmm0, %xmm3 603; AVX1-NEXT: vpaddq %xmm2, %xmm3, %xmm2 604; AVX1-NEXT: vpsllq $32, %xmm2, %xmm2 605; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 606; AVX1-NEXT: vpaddq %xmm2, %xmm0, %xmm0 607; AVX1-NEXT: vmovq %xmm0, %rax 608; AVX1-NEXT: vzeroupper 609; AVX1-NEXT: retq 610; 611; AVX2-LABEL: test_v16i64: 612; AVX2: # %bb.0: 613; AVX2-NEXT: vpsrlq $32, %ymm1, %ymm4 614; AVX2-NEXT: vpmuludq %ymm3, %ymm4, %ymm4 615; AVX2-NEXT: vpsrlq $32, %ymm3, %ymm5 616; AVX2-NEXT: vpmuludq %ymm5, %ymm1, %ymm5 617; AVX2-NEXT: vpaddq %ymm4, %ymm5, %ymm4 618; AVX2-NEXT: vpsllq $32, %ymm4, %ymm4 619; AVX2-NEXT: vpmuludq %ymm3, %ymm1, %ymm1 620; AVX2-NEXT: vpaddq %ymm4, %ymm1, %ymm1 621; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm3 622; AVX2-NEXT: vpmuludq %ymm2, %ymm3, %ymm3 623; AVX2-NEXT: vpsrlq $32, %ymm2, %ymm4 624; AVX2-NEXT: vpmuludq %ymm4, %ymm0, %ymm4 625; AVX2-NEXT: vpaddq %ymm3, %ymm4, %ymm3 626; AVX2-NEXT: vpsllq $32, %ymm3, %ymm3 627; AVX2-NEXT: vpmuludq %ymm2, %ymm0, %ymm0 628; AVX2-NEXT: vpaddq %ymm3, %ymm0, %ymm0 629; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm2 630; AVX2-NEXT: vpmuludq %ymm1, %ymm2, %ymm2 631; AVX2-NEXT: vpsrlq $32, %ymm1, %ymm3 632; AVX2-NEXT: vpmuludq %ymm3, %ymm0, %ymm3 633; AVX2-NEXT: vpaddq %ymm2, %ymm3, %ymm2 634; AVX2-NEXT: vpsllq $32, %ymm2, %ymm2 635; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 636; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0 637; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 638; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2 639; AVX2-NEXT: vpmuludq %xmm1, %xmm2, %xmm2 640; AVX2-NEXT: vpsrlq $32, %xmm1, %xmm3 641; AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm3 642; AVX2-NEXT: vpaddq %xmm2, %xmm3, %xmm2 643; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2 644; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 645; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0 646; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 647; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2 648; AVX2-NEXT: vpmuludq %xmm1, %xmm2, %xmm2 649; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] 650; AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm3 651; AVX2-NEXT: vpaddq %xmm2, %xmm3, %xmm2 652; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2 653; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 654; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0 655; AVX2-NEXT: vmovq %xmm0, %rax 656; AVX2-NEXT: vzeroupper 657; AVX2-NEXT: retq 658; 659; AVX512BW-LABEL: test_v16i64: 660; AVX512BW: # %bb.0: 661; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm2 662; AVX512BW-NEXT: vpmuludq %zmm1, %zmm2, %zmm2 663; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm3 664; AVX512BW-NEXT: vpmuludq %zmm3, %zmm0, %zmm3 665; AVX512BW-NEXT: vpaddq %zmm2, %zmm3, %zmm2 666; AVX512BW-NEXT: vpsllq $32, %zmm2, %zmm2 667; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 668; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0 669; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1 670; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm2 671; AVX512BW-NEXT: vpmuludq %zmm1, %zmm2, %zmm2 672; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm3 673; AVX512BW-NEXT: vpmuludq %zmm3, %zmm0, %zmm3 674; AVX512BW-NEXT: vpaddq %zmm2, %zmm3, %zmm2 675; AVX512BW-NEXT: vpsllq $32, %zmm2, %zmm2 676; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 677; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0 678; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1 679; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2 680; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2 681; AVX512BW-NEXT: vpsrlq $32, %xmm1, %xmm3 682; AVX512BW-NEXT: vpmuludq %xmm3, %xmm0, %xmm3 683; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2 684; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2 685; AVX512BW-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 686; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0 687; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 688; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2 689; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2 690; AVX512BW-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] 691; AVX512BW-NEXT: vpmuludq %xmm3, %xmm0, %xmm3 692; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2 693; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2 694; AVX512BW-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 695; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0 696; AVX512BW-NEXT: vmovq %xmm0, %rax 697; AVX512BW-NEXT: vzeroupper 698; AVX512BW-NEXT: retq 699; 700; AVX512BWVL-LABEL: test_v16i64: 701; AVX512BWVL: # %bb.0: 702; AVX512BWVL-NEXT: vpsrlq $32, %zmm0, %zmm2 703; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm2, %zmm2 704; AVX512BWVL-NEXT: vpsrlq $32, %zmm1, %zmm3 705; AVX512BWVL-NEXT: vpmuludq %zmm3, %zmm0, %zmm3 706; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm3, %zmm2 707; AVX512BWVL-NEXT: vpsllq $32, %zmm2, %zmm2 708; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 709; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm0, %zmm0 710; AVX512BWVL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 711; AVX512BWVL-NEXT: vpsrlq $32, %zmm0, %zmm2 712; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm2, %zmm2 713; AVX512BWVL-NEXT: vpsrlq $32, %zmm1, %zmm3 714; AVX512BWVL-NEXT: vpmuludq %zmm3, %zmm0, %zmm3 715; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm3, %zmm2 716; AVX512BWVL-NEXT: vpsllq $32, %zmm2, %zmm2 717; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 718; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm0, %zmm0 719; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1 720; AVX512BWVL-NEXT: vpsrlq $32, %xmm0, %xmm2 721; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm2, %xmm2 722; AVX512BWVL-NEXT: vpsrlq $32, %xmm1, %xmm3 723; AVX512BWVL-NEXT: vpmuludq %xmm3, %xmm0, %xmm3 724; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm3, %xmm2 725; AVX512BWVL-NEXT: vpsllq $32, %xmm2, %xmm2 726; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 727; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0 728; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 729; AVX512BWVL-NEXT: vpsrlq $32, %xmm0, %xmm2 730; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm2, %xmm2 731; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3] 732; AVX512BWVL-NEXT: vpmuludq %xmm3, %xmm0, %xmm3 733; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm3, %xmm2 734; AVX512BWVL-NEXT: vpsllq $32, %xmm2, %xmm2 735; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 736; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0 737; AVX512BWVL-NEXT: vmovq %xmm0, %rax 738; AVX512BWVL-NEXT: vzeroupper 739; AVX512BWVL-NEXT: retq 740; 741; AVX512DQ-LABEL: test_v16i64: 742; AVX512DQ: # %bb.0: 743; AVX512DQ-NEXT: vpmullq %zmm1, %zmm0, %zmm0 744; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1 745; AVX512DQ-NEXT: vpmullq %zmm1, %zmm0, %zmm0 746; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm1 747; AVX512DQ-NEXT: vpmullq %zmm1, %zmm0, %zmm0 748; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 749; AVX512DQ-NEXT: vpmullq %zmm1, %zmm0, %zmm0 750; AVX512DQ-NEXT: vmovq %xmm0, %rax 751; AVX512DQ-NEXT: vzeroupper 752; AVX512DQ-NEXT: retq 753; 754; AVX512DQVL-LABEL: test_v16i64: 755; AVX512DQVL: # %bb.0: 756; AVX512DQVL-NEXT: vpmullq %zmm1, %zmm0, %zmm0 757; AVX512DQVL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 758; AVX512DQVL-NEXT: vpmullq %zmm1, %zmm0, %zmm0 759; AVX512DQVL-NEXT: vextracti128 $1, %ymm0, %xmm1 760; AVX512DQVL-NEXT: vpmullq %xmm1, %xmm0, %xmm0 761; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 762; AVX512DQVL-NEXT: vpmullq %xmm1, %xmm0, %xmm0 763; AVX512DQVL-NEXT: vmovq %xmm0, %rax 764; AVX512DQVL-NEXT: vzeroupper 765; AVX512DQVL-NEXT: retq 766 %1 = call i64 @llvm.vector.reduce.mul.v16i64(<16 x i64> %a0) 767 ret i64 %1 768} 769 770; 771; vXi32 772; 773 774define i32 @test_v2i32(<2 x i32> %a0) { 775; SSE2-LABEL: test_v2i32: 776; SSE2: # %bb.0: 777; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 778; SSE2-NEXT: pmuludq %xmm0, %xmm1 779; SSE2-NEXT: movd %xmm1, %eax 780; SSE2-NEXT: retq 781; 782; SSE41-LABEL: test_v2i32: 783; SSE41: # %bb.0: 784; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 785; SSE41-NEXT: pmulld %xmm0, %xmm1 786; SSE41-NEXT: movd %xmm1, %eax 787; SSE41-NEXT: retq 788; 789; AVX-LABEL: test_v2i32: 790; AVX: # %bb.0: 791; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 792; AVX-NEXT: vpmulld %xmm1, %xmm0, %xmm0 793; AVX-NEXT: vmovd %xmm0, %eax 794; AVX-NEXT: retq 795 %1 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> %a0) 796 ret i32 %1 797} 798 799define i32 @test_v4i32(<4 x i32> %a0) { 800; SSE2-LABEL: test_v4i32: 801; SSE2: # %bb.0: 802; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 803; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3] 804; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 805; SSE2-NEXT: pmuludq %xmm2, %xmm3 806; SSE2-NEXT: pmuludq %xmm0, %xmm1 807; SSE2-NEXT: pmuludq %xmm3, %xmm1 808; SSE2-NEXT: movd %xmm1, %eax 809; SSE2-NEXT: retq 810; 811; SSE41-LABEL: test_v4i32: 812; SSE41: # %bb.0: 813; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 814; SSE41-NEXT: pmulld %xmm0, %xmm1 815; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 816; SSE41-NEXT: pmulld %xmm1, %xmm0 817; SSE41-NEXT: movd %xmm0, %eax 818; SSE41-NEXT: retq 819; 820; AVX-LABEL: test_v4i32: 821; AVX: # %bb.0: 822; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 823; AVX-NEXT: vpmulld %xmm1, %xmm0, %xmm0 824; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 825; AVX-NEXT: vpmulld %xmm1, %xmm0, %xmm0 826; AVX-NEXT: vmovd %xmm0, %eax 827; AVX-NEXT: retq 828 %1 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %a0) 829 ret i32 %1 830} 831 832define i32 @test_v8i32(<8 x i32> %a0) { 833; SSE2-LABEL: test_v8i32: 834; SSE2: # %bb.0: 835; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 836; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 837; SSE2-NEXT: pmuludq %xmm2, %xmm3 838; SSE2-NEXT: pmuludq %xmm1, %xmm0 839; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 840; SSE2-NEXT: pmuludq %xmm0, %xmm1 841; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,2,2,2] 842; SSE2-NEXT: pmuludq %xmm3, %xmm0 843; SSE2-NEXT: pmuludq %xmm1, %xmm0 844; SSE2-NEXT: movd %xmm0, %eax 845; SSE2-NEXT: retq 846; 847; SSE41-LABEL: test_v8i32: 848; SSE41: # %bb.0: 849; SSE41-NEXT: pmulld %xmm1, %xmm0 850; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 851; SSE41-NEXT: pmulld %xmm0, %xmm1 852; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 853; SSE41-NEXT: pmulld %xmm1, %xmm0 854; SSE41-NEXT: movd %xmm0, %eax 855; SSE41-NEXT: retq 856; 857; AVX1-LABEL: test_v8i32: 858; AVX1: # %bb.0: 859; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 860; AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm0 861; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 862; AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm0 863; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 864; AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm0 865; AVX1-NEXT: vmovd %xmm0, %eax 866; AVX1-NEXT: vzeroupper 867; AVX1-NEXT: retq 868; 869; AVX2-LABEL: test_v8i32: 870; AVX2: # %bb.0: 871; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 872; AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 873; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 874; AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 875; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 876; AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 877; AVX2-NEXT: vmovd %xmm0, %eax 878; AVX2-NEXT: vzeroupper 879; AVX2-NEXT: retq 880; 881; AVX512-LABEL: test_v8i32: 882; AVX512: # %bb.0: 883; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 884; AVX512-NEXT: vpmulld %xmm1, %xmm0, %xmm0 885; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 886; AVX512-NEXT: vpmulld %xmm1, %xmm0, %xmm0 887; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 888; AVX512-NEXT: vpmulld %xmm1, %xmm0, %xmm0 889; AVX512-NEXT: vmovd %xmm0, %eax 890; AVX512-NEXT: vzeroupper 891; AVX512-NEXT: retq 892 %1 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> %a0) 893 ret i32 %1 894} 895 896define i32 @test_v16i32(<16 x i32> %a0) { 897; SSE2-LABEL: test_v16i32: 898; SSE2: # %bb.0: 899; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3] 900; SSE2-NEXT: pmuludq %xmm3, %xmm1 901; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3] 902; SSE2-NEXT: pmuludq %xmm2, %xmm0 903; SSE2-NEXT: pmuludq %xmm1, %xmm0 904; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3] 905; SSE2-NEXT: pmuludq %xmm4, %xmm1 906; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 907; SSE2-NEXT: pmuludq %xmm5, %xmm2 908; SSE2-NEXT: pmuludq %xmm1, %xmm2 909; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 910; SSE2-NEXT: pmuludq %xmm0, %xmm1 911; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,2,2,2] 912; SSE2-NEXT: pmuludq %xmm2, %xmm0 913; SSE2-NEXT: pmuludq %xmm1, %xmm0 914; SSE2-NEXT: movd %xmm0, %eax 915; SSE2-NEXT: retq 916; 917; SSE41-LABEL: test_v16i32: 918; SSE41: # %bb.0: 919; SSE41-NEXT: pmulld %xmm3, %xmm1 920; SSE41-NEXT: pmulld %xmm2, %xmm0 921; SSE41-NEXT: pmulld %xmm1, %xmm0 922; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 923; SSE41-NEXT: pmulld %xmm0, %xmm1 924; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 925; SSE41-NEXT: pmulld %xmm1, %xmm0 926; SSE41-NEXT: movd %xmm0, %eax 927; SSE41-NEXT: retq 928; 929; AVX1-LABEL: test_v16i32: 930; AVX1: # %bb.0: 931; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 932; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 933; AVX1-NEXT: vpmulld %xmm2, %xmm3, %xmm2 934; AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm0 935; AVX1-NEXT: vpmulld %xmm2, %xmm0, %xmm0 936; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 937; AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm0 938; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 939; AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm0 940; AVX1-NEXT: vmovd %xmm0, %eax 941; AVX1-NEXT: vzeroupper 942; AVX1-NEXT: retq 943; 944; AVX2-LABEL: test_v16i32: 945; AVX2: # %bb.0: 946; AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm0 947; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 948; AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 949; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 950; AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 951; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 952; AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 953; AVX2-NEXT: vmovd %xmm0, %eax 954; AVX2-NEXT: vzeroupper 955; AVX2-NEXT: retq 956; 957; AVX512-LABEL: test_v16i32: 958; AVX512: # %bb.0: 959; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 960; AVX512-NEXT: vpmulld %zmm1, %zmm0, %zmm0 961; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 962; AVX512-NEXT: vpmulld %xmm1, %xmm0, %xmm0 963; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 964; AVX512-NEXT: vpmulld %xmm1, %xmm0, %xmm0 965; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 966; AVX512-NEXT: vpmulld %xmm1, %xmm0, %xmm0 967; AVX512-NEXT: vmovd %xmm0, %eax 968; AVX512-NEXT: vzeroupper 969; AVX512-NEXT: retq 970 %1 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %a0) 971 ret i32 %1 972} 973 974define i32 @test_v32i32(<32 x i32> %a0) { 975; SSE2-LABEL: test_v32i32: 976; SSE2: # %bb.0: 977; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[1,1,3,3] 978; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm2[1,1,3,3] 979; SSE2-NEXT: pmuludq %xmm8, %xmm9 980; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm4[1,1,3,3] 981; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm0[1,1,3,3] 982; SSE2-NEXT: pmuludq %xmm8, %xmm10 983; SSE2-NEXT: pmuludq %xmm9, %xmm10 984; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[1,1,3,3] 985; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm3[1,1,3,3] 986; SSE2-NEXT: pmuludq %xmm8, %xmm9 987; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm5[1,1,3,3] 988; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm1[1,1,3,3] 989; SSE2-NEXT: pmuludq %xmm8, %xmm11 990; SSE2-NEXT: pmuludq %xmm9, %xmm11 991; SSE2-NEXT: pmuludq %xmm10, %xmm11 992; SSE2-NEXT: pmuludq %xmm6, %xmm2 993; SSE2-NEXT: pmuludq %xmm4, %xmm0 994; SSE2-NEXT: pmuludq %xmm2, %xmm0 995; SSE2-NEXT: pmuludq %xmm7, %xmm3 996; SSE2-NEXT: pmuludq %xmm5, %xmm1 997; SSE2-NEXT: pmuludq %xmm3, %xmm1 998; SSE2-NEXT: pmuludq %xmm0, %xmm1 999; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1000; SSE2-NEXT: pmuludq %xmm1, %xmm0 1001; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm11[2,2,2,2] 1002; SSE2-NEXT: pmuludq %xmm11, %xmm1 1003; SSE2-NEXT: pmuludq %xmm0, %xmm1 1004; SSE2-NEXT: movd %xmm1, %eax 1005; SSE2-NEXT: retq 1006; 1007; SSE41-LABEL: test_v32i32: 1008; SSE41: # %bb.0: 1009; SSE41-NEXT: pmulld %xmm6, %xmm2 1010; SSE41-NEXT: pmulld %xmm4, %xmm0 1011; SSE41-NEXT: pmulld %xmm2, %xmm0 1012; SSE41-NEXT: pmulld %xmm7, %xmm3 1013; SSE41-NEXT: pmulld %xmm5, %xmm1 1014; SSE41-NEXT: pmulld %xmm3, %xmm1 1015; SSE41-NEXT: pmulld %xmm0, %xmm1 1016; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1017; SSE41-NEXT: pmulld %xmm1, %xmm0 1018; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1019; SSE41-NEXT: pmulld %xmm0, %xmm1 1020; SSE41-NEXT: movd %xmm1, %eax 1021; SSE41-NEXT: retq 1022; 1023; AVX1-LABEL: test_v32i32: 1024; AVX1: # %bb.0: 1025; AVX1-NEXT: vpmulld %xmm3, %xmm1, %xmm4 1026; AVX1-NEXT: vpmulld %xmm2, %xmm0, %xmm5 1027; AVX1-NEXT: vpmulld %xmm4, %xmm5, %xmm4 1028; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 1029; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 1030; AVX1-NEXT: vpmulld %xmm3, %xmm1, %xmm1 1031; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 1032; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1033; AVX1-NEXT: vpmulld %xmm2, %xmm0, %xmm0 1034; AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm0 1035; AVX1-NEXT: vpmulld %xmm0, %xmm4, %xmm0 1036; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1037; AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm0 1038; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1039; AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm0 1040; AVX1-NEXT: vmovd %xmm0, %eax 1041; AVX1-NEXT: vzeroupper 1042; AVX1-NEXT: retq 1043; 1044; AVX2-LABEL: test_v32i32: 1045; AVX2: # %bb.0: 1046; AVX2-NEXT: vpmulld %ymm3, %ymm1, %ymm1 1047; AVX2-NEXT: vpmulld %ymm2, %ymm0, %ymm0 1048; AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm0 1049; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1050; AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 1051; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1052; AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 1053; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1054; AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 1055; AVX2-NEXT: vmovd %xmm0, %eax 1056; AVX2-NEXT: vzeroupper 1057; AVX2-NEXT: retq 1058; 1059; AVX512-LABEL: test_v32i32: 1060; AVX512: # %bb.0: 1061; AVX512-NEXT: vpmulld %zmm1, %zmm0, %zmm0 1062; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1063; AVX512-NEXT: vpmulld %zmm1, %zmm0, %zmm0 1064; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1065; AVX512-NEXT: vpmulld %xmm1, %xmm0, %xmm0 1066; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1067; AVX512-NEXT: vpmulld %xmm1, %xmm0, %xmm0 1068; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1069; AVX512-NEXT: vpmulld %xmm1, %xmm0, %xmm0 1070; AVX512-NEXT: vmovd %xmm0, %eax 1071; AVX512-NEXT: vzeroupper 1072; AVX512-NEXT: retq 1073 %1 = call i32 @llvm.vector.reduce.mul.v32i32(<32 x i32> %a0) 1074 ret i32 %1 1075} 1076 1077; 1078; vXi16 1079; 1080 1081define i16 @test_v2i16(<2 x i16> %a0) { 1082; SSE-LABEL: test_v2i16: 1083; SSE: # %bb.0: 1084; SSE-NEXT: movdqa %xmm0, %xmm1 1085; SSE-NEXT: psrld $16, %xmm1 1086; SSE-NEXT: pmullw %xmm0, %xmm1 1087; SSE-NEXT: movd %xmm1, %eax 1088; SSE-NEXT: # kill: def $ax killed $ax killed $eax 1089; SSE-NEXT: retq 1090; 1091; AVX-LABEL: test_v2i16: 1092; AVX: # %bb.0: 1093; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 1094; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1095; AVX-NEXT: vmovd %xmm0, %eax 1096; AVX-NEXT: # kill: def $ax killed $ax killed $eax 1097; AVX-NEXT: retq 1098 %1 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> %a0) 1099 ret i16 %1 1100} 1101 1102define i16 @test_v4i16(<4 x i16> %a0) { 1103; SSE-LABEL: test_v4i16: 1104; SSE: # %bb.0: 1105; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1106; SSE-NEXT: pmullw %xmm0, %xmm1 1107; SSE-NEXT: movdqa %xmm1, %xmm0 1108; SSE-NEXT: psrld $16, %xmm0 1109; SSE-NEXT: pmullw %xmm1, %xmm0 1110; SSE-NEXT: movd %xmm0, %eax 1111; SSE-NEXT: # kill: def $ax killed $ax killed $eax 1112; SSE-NEXT: retq 1113; 1114; AVX-LABEL: test_v4i16: 1115; AVX: # %bb.0: 1116; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1117; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1118; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 1119; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1120; AVX-NEXT: vmovd %xmm0, %eax 1121; AVX-NEXT: # kill: def $ax killed $ax killed $eax 1122; AVX-NEXT: retq 1123 %1 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> %a0) 1124 ret i16 %1 1125} 1126 1127define i16 @test_v8i16(<8 x i16> %a0) { 1128; SSE-LABEL: test_v8i16: 1129; SSE: # %bb.0: 1130; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1131; SSE-NEXT: pmullw %xmm0, %xmm1 1132; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1133; SSE-NEXT: pmullw %xmm1, %xmm0 1134; SSE-NEXT: movdqa %xmm0, %xmm1 1135; SSE-NEXT: psrld $16, %xmm1 1136; SSE-NEXT: pmullw %xmm0, %xmm1 1137; SSE-NEXT: movd %xmm1, %eax 1138; SSE-NEXT: # kill: def $ax killed $ax killed $eax 1139; SSE-NEXT: retq 1140; 1141; AVX-LABEL: test_v8i16: 1142; AVX: # %bb.0: 1143; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1144; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1145; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1146; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1147; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 1148; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1149; AVX-NEXT: vmovd %xmm0, %eax 1150; AVX-NEXT: # kill: def $ax killed $ax killed $eax 1151; AVX-NEXT: retq 1152 %1 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> %a0) 1153 ret i16 %1 1154} 1155 1156define i16 @test_v16i16(<16 x i16> %a0) { 1157; SSE-LABEL: test_v16i16: 1158; SSE: # %bb.0: 1159; SSE-NEXT: pmullw %xmm1, %xmm0 1160; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1161; SSE-NEXT: pmullw %xmm0, %xmm1 1162; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1163; SSE-NEXT: pmullw %xmm1, %xmm0 1164; SSE-NEXT: movdqa %xmm0, %xmm1 1165; SSE-NEXT: psrld $16, %xmm1 1166; SSE-NEXT: pmullw %xmm0, %xmm1 1167; SSE-NEXT: movd %xmm1, %eax 1168; SSE-NEXT: # kill: def $ax killed $ax killed $eax 1169; SSE-NEXT: retq 1170; 1171; AVX1-LABEL: test_v16i16: 1172; AVX1: # %bb.0: 1173; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1174; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1175; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1176; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1177; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1178; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1179; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 1180; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1181; AVX1-NEXT: vmovd %xmm0, %eax 1182; AVX1-NEXT: # kill: def $ax killed $ax killed $eax 1183; AVX1-NEXT: vzeroupper 1184; AVX1-NEXT: retq 1185; 1186; AVX2-LABEL: test_v16i16: 1187; AVX2: # %bb.0: 1188; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1189; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1190; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1191; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1192; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1193; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1194; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 1195; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1196; AVX2-NEXT: vmovd %xmm0, %eax 1197; AVX2-NEXT: # kill: def $ax killed $ax killed $eax 1198; AVX2-NEXT: vzeroupper 1199; AVX2-NEXT: retq 1200; 1201; AVX512-LABEL: test_v16i16: 1202; AVX512: # %bb.0: 1203; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1204; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1205; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1206; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1207; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1208; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1209; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 1210; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1211; AVX512-NEXT: vmovd %xmm0, %eax 1212; AVX512-NEXT: # kill: def $ax killed $ax killed $eax 1213; AVX512-NEXT: vzeroupper 1214; AVX512-NEXT: retq 1215 %1 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> %a0) 1216 ret i16 %1 1217} 1218 1219define i16 @test_v32i16(<32 x i16> %a0) { 1220; SSE-LABEL: test_v32i16: 1221; SSE: # %bb.0: 1222; SSE-NEXT: pmullw %xmm3, %xmm1 1223; SSE-NEXT: pmullw %xmm2, %xmm0 1224; SSE-NEXT: pmullw %xmm1, %xmm0 1225; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1226; SSE-NEXT: pmullw %xmm0, %xmm1 1227; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1228; SSE-NEXT: pmullw %xmm1, %xmm0 1229; SSE-NEXT: movdqa %xmm0, %xmm1 1230; SSE-NEXT: psrld $16, %xmm1 1231; SSE-NEXT: pmullw %xmm0, %xmm1 1232; SSE-NEXT: movd %xmm1, %eax 1233; SSE-NEXT: # kill: def $ax killed $ax killed $eax 1234; SSE-NEXT: retq 1235; 1236; AVX1-LABEL: test_v32i16: 1237; AVX1: # %bb.0: 1238; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1239; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1240; AVX1-NEXT: vpmullw %xmm2, %xmm3, %xmm2 1241; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1242; AVX1-NEXT: vpmullw %xmm2, %xmm0, %xmm0 1243; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1244; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1245; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1246; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1247; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 1248; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1249; AVX1-NEXT: vmovd %xmm0, %eax 1250; AVX1-NEXT: # kill: def $ax killed $ax killed $eax 1251; AVX1-NEXT: vzeroupper 1252; AVX1-NEXT: retq 1253; 1254; AVX2-LABEL: test_v32i16: 1255; AVX2: # %bb.0: 1256; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0 1257; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1258; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1259; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1260; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1261; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1262; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1263; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 1264; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1265; AVX2-NEXT: vmovd %xmm0, %eax 1266; AVX2-NEXT: # kill: def $ax killed $ax killed $eax 1267; AVX2-NEXT: vzeroupper 1268; AVX2-NEXT: retq 1269; 1270; AVX512BW-LABEL: test_v32i16: 1271; AVX512BW: # %bb.0: 1272; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1273; AVX512BW-NEXT: vpmullw %zmm1, %zmm0, %zmm0 1274; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1 1275; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1276; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1277; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1278; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1279; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1280; AVX512BW-NEXT: vpsrld $16, %xmm0, %xmm1 1281; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1282; AVX512BW-NEXT: vmovd %xmm0, %eax 1283; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax 1284; AVX512BW-NEXT: vzeroupper 1285; AVX512BW-NEXT: retq 1286; 1287; AVX512BWVL-LABEL: test_v32i16: 1288; AVX512BWVL: # %bb.0: 1289; AVX512BWVL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1290; AVX512BWVL-NEXT: vpmullw %zmm1, %zmm0, %zmm0 1291; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1 1292; AVX512BWVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1293; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1294; AVX512BWVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1295; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1296; AVX512BWVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1297; AVX512BWVL-NEXT: vpsrld $16, %xmm0, %xmm1 1298; AVX512BWVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1299; AVX512BWVL-NEXT: vmovd %xmm0, %eax 1300; AVX512BWVL-NEXT: # kill: def $ax killed $ax killed $eax 1301; AVX512BWVL-NEXT: vzeroupper 1302; AVX512BWVL-NEXT: retq 1303; 1304; AVX512DQ-LABEL: test_v32i16: 1305; AVX512DQ: # %bb.0: 1306; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1307; AVX512DQ-NEXT: vpmullw %ymm1, %ymm0, %ymm0 1308; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm1 1309; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1310; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1311; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1312; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1313; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1314; AVX512DQ-NEXT: vpsrld $16, %xmm0, %xmm1 1315; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1316; AVX512DQ-NEXT: vmovd %xmm0, %eax 1317; AVX512DQ-NEXT: # kill: def $ax killed $ax killed $eax 1318; AVX512DQ-NEXT: vzeroupper 1319; AVX512DQ-NEXT: retq 1320; 1321; AVX512DQVL-LABEL: test_v32i16: 1322; AVX512DQVL: # %bb.0: 1323; AVX512DQVL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1324; AVX512DQVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0 1325; AVX512DQVL-NEXT: vextracti128 $1, %ymm0, %xmm1 1326; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1327; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1328; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1329; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1330; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1331; AVX512DQVL-NEXT: vpsrld $16, %xmm0, %xmm1 1332; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1333; AVX512DQVL-NEXT: vmovd %xmm0, %eax 1334; AVX512DQVL-NEXT: # kill: def $ax killed $ax killed $eax 1335; AVX512DQVL-NEXT: vzeroupper 1336; AVX512DQVL-NEXT: retq 1337 %1 = call i16 @llvm.vector.reduce.mul.v32i16(<32 x i16> %a0) 1338 ret i16 %1 1339} 1340 1341define i16 @test_v64i16(<64 x i16> %a0) { 1342; SSE-LABEL: test_v64i16: 1343; SSE: # %bb.0: 1344; SSE-NEXT: pmullw %xmm6, %xmm2 1345; SSE-NEXT: pmullw %xmm4, %xmm0 1346; SSE-NEXT: pmullw %xmm2, %xmm0 1347; SSE-NEXT: pmullw %xmm7, %xmm3 1348; SSE-NEXT: pmullw %xmm5, %xmm1 1349; SSE-NEXT: pmullw %xmm3, %xmm1 1350; SSE-NEXT: pmullw %xmm0, %xmm1 1351; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1352; SSE-NEXT: pmullw %xmm1, %xmm0 1353; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1354; SSE-NEXT: pmullw %xmm0, %xmm1 1355; SSE-NEXT: movdqa %xmm1, %xmm0 1356; SSE-NEXT: psrld $16, %xmm0 1357; SSE-NEXT: pmullw %xmm1, %xmm0 1358; SSE-NEXT: movd %xmm0, %eax 1359; SSE-NEXT: # kill: def $ax killed $ax killed $eax 1360; SSE-NEXT: retq 1361; 1362; AVX1-LABEL: test_v64i16: 1363; AVX1: # %bb.0: 1364; AVX1-NEXT: vpmullw %xmm3, %xmm1, %xmm4 1365; AVX1-NEXT: vpmullw %xmm2, %xmm0, %xmm5 1366; AVX1-NEXT: vpmullw %xmm4, %xmm5, %xmm4 1367; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 1368; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 1369; AVX1-NEXT: vpmullw %xmm3, %xmm1, %xmm1 1370; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 1371; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1372; AVX1-NEXT: vpmullw %xmm2, %xmm0, %xmm0 1373; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1374; AVX1-NEXT: vpmullw %xmm0, %xmm4, %xmm0 1375; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1376; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1377; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1378; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1379; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 1380; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1381; AVX1-NEXT: vmovd %xmm0, %eax 1382; AVX1-NEXT: # kill: def $ax killed $ax killed $eax 1383; AVX1-NEXT: vzeroupper 1384; AVX1-NEXT: retq 1385; 1386; AVX2-LABEL: test_v64i16: 1387; AVX2: # %bb.0: 1388; AVX2-NEXT: vpmullw %ymm3, %ymm1, %ymm1 1389; AVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm0 1390; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0 1391; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1392; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1393; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1394; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1395; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1396; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1397; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 1398; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1399; AVX2-NEXT: vmovd %xmm0, %eax 1400; AVX2-NEXT: # kill: def $ax killed $ax killed $eax 1401; AVX2-NEXT: vzeroupper 1402; AVX2-NEXT: retq 1403; 1404; AVX512BW-LABEL: test_v64i16: 1405; AVX512BW: # %bb.0: 1406; AVX512BW-NEXT: vpmullw %zmm1, %zmm0, %zmm0 1407; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1408; AVX512BW-NEXT: vpmullw %zmm1, %zmm0, %zmm0 1409; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1 1410; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1411; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1412; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1413; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1414; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1415; AVX512BW-NEXT: vpsrld $16, %xmm0, %xmm1 1416; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1417; AVX512BW-NEXT: vmovd %xmm0, %eax 1418; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax 1419; AVX512BW-NEXT: vzeroupper 1420; AVX512BW-NEXT: retq 1421; 1422; AVX512BWVL-LABEL: test_v64i16: 1423; AVX512BWVL: # %bb.0: 1424; AVX512BWVL-NEXT: vpmullw %zmm1, %zmm0, %zmm0 1425; AVX512BWVL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1426; AVX512BWVL-NEXT: vpmullw %zmm1, %zmm0, %zmm0 1427; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1 1428; AVX512BWVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1429; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1430; AVX512BWVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1431; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1432; AVX512BWVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1433; AVX512BWVL-NEXT: vpsrld $16, %xmm0, %xmm1 1434; AVX512BWVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1435; AVX512BWVL-NEXT: vmovd %xmm0, %eax 1436; AVX512BWVL-NEXT: # kill: def $ax killed $ax killed $eax 1437; AVX512BWVL-NEXT: vzeroupper 1438; AVX512BWVL-NEXT: retq 1439; 1440; AVX512DQ-LABEL: test_v64i16: 1441; AVX512DQ: # %bb.0: 1442; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2 1443; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm3 1444; AVX512DQ-NEXT: vpmullw %ymm2, %ymm3, %ymm2 1445; AVX512DQ-NEXT: vpmullw %ymm1, %ymm0, %ymm0 1446; AVX512DQ-NEXT: vpmullw %ymm2, %ymm0, %ymm0 1447; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm1 1448; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1449; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1450; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1451; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1452; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1453; AVX512DQ-NEXT: vpsrld $16, %xmm0, %xmm1 1454; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1455; AVX512DQ-NEXT: vmovd %xmm0, %eax 1456; AVX512DQ-NEXT: # kill: def $ax killed $ax killed $eax 1457; AVX512DQ-NEXT: vzeroupper 1458; AVX512DQ-NEXT: retq 1459; 1460; AVX512DQVL-LABEL: test_v64i16: 1461; AVX512DQVL: # %bb.0: 1462; AVX512DQVL-NEXT: vextracti64x4 $1, %zmm1, %ymm2 1463; AVX512DQVL-NEXT: vextracti64x4 $1, %zmm0, %ymm3 1464; AVX512DQVL-NEXT: vpmullw %ymm2, %ymm3, %ymm2 1465; AVX512DQVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0 1466; AVX512DQVL-NEXT: vpmullw %ymm2, %ymm0, %ymm0 1467; AVX512DQVL-NEXT: vextracti128 $1, %ymm0, %xmm1 1468; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1469; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1470; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1471; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1472; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1473; AVX512DQVL-NEXT: vpsrld $16, %xmm0, %xmm1 1474; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1475; AVX512DQVL-NEXT: vmovd %xmm0, %eax 1476; AVX512DQVL-NEXT: # kill: def $ax killed $ax killed $eax 1477; AVX512DQVL-NEXT: vzeroupper 1478; AVX512DQVL-NEXT: retq 1479 %1 = call i16 @llvm.vector.reduce.mul.v64i16(<64 x i16> %a0) 1480 ret i16 %1 1481} 1482 1483; 1484; vXi8 1485; 1486 1487define i8 @test_v2i8(<2 x i8> %a0) { 1488; SSE-LABEL: test_v2i8: 1489; SSE: # %bb.0: 1490; SSE-NEXT: movdqa %xmm0, %xmm1 1491; SSE-NEXT: psrlw $8, %xmm1 1492; SSE-NEXT: pmullw %xmm0, %xmm1 1493; SSE-NEXT: movd %xmm1, %eax 1494; SSE-NEXT: # kill: def $al killed $al killed $eax 1495; SSE-NEXT: retq 1496; 1497; AVX-LABEL: test_v2i8: 1498; AVX: # %bb.0: 1499; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 1500; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1501; AVX-NEXT: vmovd %xmm0, %eax 1502; AVX-NEXT: # kill: def $al killed $al killed $eax 1503; AVX-NEXT: retq 1504 %1 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> %a0) 1505 ret i8 %1 1506} 1507 1508define i8 @test_v4i8(<4 x i8> %a0) { 1509; SSE2-LABEL: test_v4i8: 1510; SSE2: # %bb.0: 1511; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1512; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1513; SSE2-NEXT: pmullw %xmm0, %xmm1 1514; SSE2-NEXT: movdqa %xmm1, %xmm0 1515; SSE2-NEXT: psrld $16, %xmm0 1516; SSE2-NEXT: pmullw %xmm1, %xmm0 1517; SSE2-NEXT: movd %xmm0, %eax 1518; SSE2-NEXT: # kill: def $al killed $al killed $eax 1519; SSE2-NEXT: retq 1520; 1521; SSE41-LABEL: test_v4i8: 1522; SSE41: # %bb.0: 1523; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1524; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1525; SSE41-NEXT: pmullw %xmm0, %xmm1 1526; SSE41-NEXT: movdqa %xmm1, %xmm0 1527; SSE41-NEXT: psrld $16, %xmm0 1528; SSE41-NEXT: pmullw %xmm1, %xmm0 1529; SSE41-NEXT: movd %xmm0, %eax 1530; SSE41-NEXT: # kill: def $al killed $al killed $eax 1531; SSE41-NEXT: retq 1532; 1533; AVX-LABEL: test_v4i8: 1534; AVX: # %bb.0: 1535; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1536; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1537; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1538; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 1539; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1540; AVX-NEXT: vmovd %xmm0, %eax 1541; AVX-NEXT: # kill: def $al killed $al killed $eax 1542; AVX-NEXT: retq 1543 %1 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> %a0) 1544 ret i8 %1 1545} 1546 1547define i8 @test_v8i8(<8 x i8> %a0) { 1548; SSE2-LABEL: test_v8i8: 1549; SSE2: # %bb.0: 1550; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1551; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1552; SSE2-NEXT: pmullw %xmm0, %xmm1 1553; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1554; SSE2-NEXT: pmullw %xmm1, %xmm0 1555; SSE2-NEXT: movdqa %xmm0, %xmm1 1556; SSE2-NEXT: psrld $16, %xmm1 1557; SSE2-NEXT: pmullw %xmm0, %xmm1 1558; SSE2-NEXT: movd %xmm1, %eax 1559; SSE2-NEXT: # kill: def $al killed $al killed $eax 1560; SSE2-NEXT: retq 1561; 1562; SSE41-LABEL: test_v8i8: 1563; SSE41: # %bb.0: 1564; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1565; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1566; SSE41-NEXT: pmullw %xmm0, %xmm1 1567; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1568; SSE41-NEXT: pmullw %xmm1, %xmm0 1569; SSE41-NEXT: movdqa %xmm0, %xmm1 1570; SSE41-NEXT: psrld $16, %xmm1 1571; SSE41-NEXT: pmullw %xmm0, %xmm1 1572; SSE41-NEXT: movd %xmm1, %eax 1573; SSE41-NEXT: # kill: def $al killed $al killed $eax 1574; SSE41-NEXT: retq 1575; 1576; AVX-LABEL: test_v8i8: 1577; AVX: # %bb.0: 1578; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1579; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1580; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1581; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1582; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1583; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 1584; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1585; AVX-NEXT: vmovd %xmm0, %eax 1586; AVX-NEXT: # kill: def $al killed $al killed $eax 1587; AVX-NEXT: retq 1588 %1 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> %a0) 1589 ret i8 %1 1590} 1591 1592define i8 @test_v16i8(<16 x i8> %a0) { 1593; SSE2-LABEL: test_v16i8: 1594; SSE2: # %bb.0: 1595; SSE2-NEXT: movdqa %xmm0, %xmm1 1596; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1597; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1598; SSE2-NEXT: pmullw %xmm1, %xmm0 1599; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1600; SSE2-NEXT: pmullw %xmm0, %xmm1 1601; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1602; SSE2-NEXT: pmullw %xmm1, %xmm0 1603; SSE2-NEXT: movdqa %xmm0, %xmm1 1604; SSE2-NEXT: psrld $16, %xmm1 1605; SSE2-NEXT: pmullw %xmm0, %xmm1 1606; SSE2-NEXT: movd %xmm1, %eax 1607; SSE2-NEXT: # kill: def $al killed $al killed $eax 1608; SSE2-NEXT: retq 1609; 1610; SSE41-LABEL: test_v16i8: 1611; SSE41: # %bb.0: 1612; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1613; SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1614; SSE41-NEXT: pmullw %xmm1, %xmm0 1615; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1616; SSE41-NEXT: pmullw %xmm0, %xmm1 1617; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1618; SSE41-NEXT: pmullw %xmm1, %xmm0 1619; SSE41-NEXT: movdqa %xmm0, %xmm1 1620; SSE41-NEXT: psrld $16, %xmm1 1621; SSE41-NEXT: pmullw %xmm0, %xmm1 1622; SSE41-NEXT: movd %xmm1, %eax 1623; SSE41-NEXT: # kill: def $al killed $al killed $eax 1624; SSE41-NEXT: retq 1625; 1626; AVX-LABEL: test_v16i8: 1627; AVX: # %bb.0: 1628; AVX-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1629; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1630; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1631; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1632; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1633; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1634; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1635; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 1636; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1637; AVX-NEXT: vmovd %xmm0, %eax 1638; AVX-NEXT: # kill: def $al killed $al killed $eax 1639; AVX-NEXT: retq 1640 %1 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> %a0) 1641 ret i8 %1 1642} 1643 1644define i8 @test_v32i8(<32 x i8> %a0) { 1645; SSE2-LABEL: test_v32i8: 1646; SSE2: # %bb.0: 1647; SSE2-NEXT: movdqa %xmm1, %xmm2 1648; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1649; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1650; SSE2-NEXT: pmullw %xmm2, %xmm1 1651; SSE2-NEXT: movdqa %xmm0, %xmm2 1652; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1653; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1654; SSE2-NEXT: pmullw %xmm2, %xmm0 1655; SSE2-NEXT: pmullw %xmm1, %xmm0 1656; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1657; SSE2-NEXT: pmullw %xmm0, %xmm1 1658; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1659; SSE2-NEXT: pmullw %xmm1, %xmm0 1660; SSE2-NEXT: movdqa %xmm0, %xmm1 1661; SSE2-NEXT: psrld $16, %xmm1 1662; SSE2-NEXT: pmullw %xmm0, %xmm1 1663; SSE2-NEXT: movd %xmm1, %eax 1664; SSE2-NEXT: # kill: def $al killed $al killed $eax 1665; SSE2-NEXT: retq 1666; 1667; SSE41-LABEL: test_v32i8: 1668; SSE41: # %bb.0: 1669; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 1670; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1671; SSE41-NEXT: pmullw %xmm2, %xmm1 1672; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1673; SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1674; SSE41-NEXT: pmullw %xmm2, %xmm0 1675; SSE41-NEXT: pmullw %xmm1, %xmm0 1676; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1677; SSE41-NEXT: pmullw %xmm0, %xmm1 1678; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1679; SSE41-NEXT: pmullw %xmm1, %xmm0 1680; SSE41-NEXT: movdqa %xmm0, %xmm1 1681; SSE41-NEXT: psrld $16, %xmm1 1682; SSE41-NEXT: pmullw %xmm0, %xmm1 1683; SSE41-NEXT: movd %xmm1, %eax 1684; SSE41-NEXT: # kill: def $al killed $al killed $eax 1685; SSE41-NEXT: retq 1686; 1687; AVX1-LABEL: test_v32i8: 1688; AVX1: # %bb.0: 1689; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1690; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1691; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 1692; AVX1-NEXT: vpmullw %xmm2, %xmm1, %xmm1 1693; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1694; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1695; AVX1-NEXT: vpmullw %xmm2, %xmm0, %xmm0 1696; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1697; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1698; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1699; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1700; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1701; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 1702; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1703; AVX1-NEXT: vmovd %xmm0, %eax 1704; AVX1-NEXT: # kill: def $al killed $al killed $eax 1705; AVX1-NEXT: vzeroupper 1706; AVX1-NEXT: retq 1707; 1708; AVX2-LABEL: test_v32i8: 1709; AVX2: # %bb.0: 1710; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] 1711; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] 1712; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0 1713; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1714; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1715; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1716; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1717; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1718; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1719; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 1720; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1721; AVX2-NEXT: vmovd %xmm0, %eax 1722; AVX2-NEXT: # kill: def $al killed $al killed $eax 1723; AVX2-NEXT: vzeroupper 1724; AVX2-NEXT: retq 1725; 1726; AVX512-LABEL: test_v32i8: 1727; AVX512: # %bb.0: 1728; AVX512-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] 1729; AVX512-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] 1730; AVX512-NEXT: vpmullw %ymm1, %ymm0, %ymm0 1731; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1732; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1733; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1734; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1735; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1736; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1737; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 1738; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1739; AVX512-NEXT: vmovd %xmm0, %eax 1740; AVX512-NEXT: # kill: def $al killed $al killed $eax 1741; AVX512-NEXT: vzeroupper 1742; AVX512-NEXT: retq 1743 %1 = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> %a0) 1744 ret i8 %1 1745} 1746 1747define i8 @test_v64i8(<64 x i8> %a0) { 1748; SSE2-LABEL: test_v64i8: 1749; SSE2: # %bb.0: 1750; SSE2-NEXT: movdqa %xmm3, %xmm4 1751; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1752; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1753; SSE2-NEXT: pmullw %xmm4, %xmm3 1754; SSE2-NEXT: movdqa %xmm1, %xmm4 1755; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1756; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1757; SSE2-NEXT: pmullw %xmm4, %xmm1 1758; SSE2-NEXT: pmullw %xmm3, %xmm1 1759; SSE2-NEXT: movdqa %xmm2, %xmm3 1760; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1761; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1762; SSE2-NEXT: pmullw %xmm3, %xmm2 1763; SSE2-NEXT: movdqa %xmm0, %xmm3 1764; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1765; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1766; SSE2-NEXT: pmullw %xmm3, %xmm0 1767; SSE2-NEXT: pmullw %xmm2, %xmm0 1768; SSE2-NEXT: pmullw %xmm1, %xmm0 1769; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1770; SSE2-NEXT: pmullw %xmm0, %xmm1 1771; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1772; SSE2-NEXT: pmullw %xmm1, %xmm0 1773; SSE2-NEXT: movdqa %xmm0, %xmm1 1774; SSE2-NEXT: psrld $16, %xmm1 1775; SSE2-NEXT: pmullw %xmm0, %xmm1 1776; SSE2-NEXT: movd %xmm1, %eax 1777; SSE2-NEXT: # kill: def $al killed $al killed $eax 1778; SSE2-NEXT: retq 1779; 1780; SSE41-LABEL: test_v64i8: 1781; SSE41: # %bb.0: 1782; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero 1783; SSE41-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1784; SSE41-NEXT: pmullw %xmm4, %xmm3 1785; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 1786; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1787; SSE41-NEXT: pmullw %xmm4, %xmm1 1788; SSE41-NEXT: pmullw %xmm3, %xmm1 1789; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero 1790; SSE41-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1791; SSE41-NEXT: pmullw %xmm3, %xmm2 1792; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1793; SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1794; SSE41-NEXT: pmullw %xmm3, %xmm0 1795; SSE41-NEXT: pmullw %xmm2, %xmm0 1796; SSE41-NEXT: pmullw %xmm1, %xmm0 1797; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1798; SSE41-NEXT: pmullw %xmm0, %xmm1 1799; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1800; SSE41-NEXT: pmullw %xmm1, %xmm0 1801; SSE41-NEXT: movdqa %xmm0, %xmm1 1802; SSE41-NEXT: psrld $16, %xmm1 1803; SSE41-NEXT: pmullw %xmm0, %xmm1 1804; SSE41-NEXT: movd %xmm1, %eax 1805; SSE41-NEXT: # kill: def $al killed $al killed $eax 1806; SSE41-NEXT: retq 1807; 1808; AVX1-LABEL: test_v64i8: 1809; AVX1: # %bb.0: 1810; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1811; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1812; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero 1813; AVX1-NEXT: vpmullw %xmm3, %xmm2, %xmm2 1814; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1815; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1816; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero 1817; AVX1-NEXT: vpmullw %xmm4, %xmm3, %xmm3 1818; AVX1-NEXT: vpmullw %xmm2, %xmm3, %xmm2 1819; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1820; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 1821; AVX1-NEXT: vpmullw %xmm3, %xmm1, %xmm1 1822; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1823; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1824; AVX1-NEXT: vpmullw %xmm3, %xmm0, %xmm0 1825; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1826; AVX1-NEXT: vpmullw %xmm2, %xmm0, %xmm0 1827; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1828; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1829; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1830; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1831; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 1832; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1833; AVX1-NEXT: vmovd %xmm0, %eax 1834; AVX1-NEXT: # kill: def $al killed $al killed $eax 1835; AVX1-NEXT: vzeroupper 1836; AVX1-NEXT: retq 1837; 1838; AVX2-LABEL: test_v64i8: 1839; AVX2: # %bb.0: 1840; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] 1841; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] 1842; AVX2-NEXT: vpmullw %ymm2, %ymm1, %ymm1 1843; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] 1844; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] 1845; AVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm0 1846; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0 1847; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1848; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1849; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1850; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1851; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1852; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1853; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 1854; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1855; AVX2-NEXT: vmovd %xmm0, %eax 1856; AVX2-NEXT: # kill: def $al killed $al killed $eax 1857; AVX2-NEXT: vzeroupper 1858; AVX2-NEXT: retq 1859; 1860; AVX512BW-LABEL: test_v64i8: 1861; AVX512BW: # %bb.0: 1862; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm1 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63] 1863; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55] 1864; AVX512BW-NEXT: vpmullw %zmm1, %zmm0, %zmm0 1865; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1866; AVX512BW-NEXT: vpmullw %ymm1, %ymm0, %ymm0 1867; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1 1868; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1869; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1870; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1871; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1872; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1873; AVX512BW-NEXT: vpsrld $16, %xmm0, %xmm1 1874; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1875; AVX512BW-NEXT: vmovd %xmm0, %eax 1876; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 1877; AVX512BW-NEXT: vzeroupper 1878; AVX512BW-NEXT: retq 1879; 1880; AVX512BWVL-LABEL: test_v64i8: 1881; AVX512BWVL: # %bb.0: 1882; AVX512BWVL-NEXT: vpunpckhbw {{.*#+}} zmm1 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63] 1883; AVX512BWVL-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55] 1884; AVX512BWVL-NEXT: vpmullw %zmm1, %zmm0, %zmm0 1885; AVX512BWVL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1886; AVX512BWVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0 1887; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1 1888; AVX512BWVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1889; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1890; AVX512BWVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1891; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1892; AVX512BWVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1893; AVX512BWVL-NEXT: vpsrld $16, %xmm0, %xmm1 1894; AVX512BWVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1895; AVX512BWVL-NEXT: vmovd %xmm0, %eax 1896; AVX512BWVL-NEXT: # kill: def $al killed $al killed $eax 1897; AVX512BWVL-NEXT: vzeroupper 1898; AVX512BWVL-NEXT: retq 1899; 1900; AVX512DQ-LABEL: test_v64i8: 1901; AVX512DQ: # %bb.0: 1902; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1903; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] 1904; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] 1905; AVX512DQ-NEXT: vpmullw %ymm2, %ymm1, %ymm1 1906; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] 1907; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] 1908; AVX512DQ-NEXT: vpmullw %ymm2, %ymm0, %ymm0 1909; AVX512DQ-NEXT: vpmullw %ymm1, %ymm0, %ymm0 1910; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm1 1911; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1912; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1913; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1914; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1915; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1916; AVX512DQ-NEXT: vpsrld $16, %xmm0, %xmm1 1917; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1918; AVX512DQ-NEXT: vmovd %xmm0, %eax 1919; AVX512DQ-NEXT: # kill: def $al killed $al killed $eax 1920; AVX512DQ-NEXT: vzeroupper 1921; AVX512DQ-NEXT: retq 1922; 1923; AVX512DQVL-LABEL: test_v64i8: 1924; AVX512DQVL: # %bb.0: 1925; AVX512DQVL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1926; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] 1927; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] 1928; AVX512DQVL-NEXT: vpmullw %ymm2, %ymm1, %ymm1 1929; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] 1930; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] 1931; AVX512DQVL-NEXT: vpmullw %ymm2, %ymm0, %ymm0 1932; AVX512DQVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0 1933; AVX512DQVL-NEXT: vextracti128 $1, %ymm0, %xmm1 1934; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1935; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1936; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1937; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1938; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1939; AVX512DQVL-NEXT: vpsrld $16, %xmm0, %xmm1 1940; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1941; AVX512DQVL-NEXT: vmovd %xmm0, %eax 1942; AVX512DQVL-NEXT: # kill: def $al killed $al killed $eax 1943; AVX512DQVL-NEXT: vzeroupper 1944; AVX512DQVL-NEXT: retq 1945 %1 = call i8 @llvm.vector.reduce.mul.v64i8(<64 x i8> %a0) 1946 ret i8 %1 1947} 1948 1949define i8 @test_v128i8(<128 x i8> %a0) { 1950; SSE2-LABEL: test_v128i8: 1951; SSE2: # %bb.0: 1952; SSE2-NEXT: movdqa %xmm7, %xmm8 1953; SSE2-NEXT: punpckhbw {{.*#+}} xmm8 = xmm8[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1954; SSE2-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1955; SSE2-NEXT: pmullw %xmm8, %xmm7 1956; SSE2-NEXT: movdqa %xmm3, %xmm8 1957; SSE2-NEXT: punpckhbw {{.*#+}} xmm8 = xmm8[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1958; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1959; SSE2-NEXT: pmullw %xmm8, %xmm3 1960; SSE2-NEXT: pmullw %xmm7, %xmm3 1961; SSE2-NEXT: movdqa %xmm5, %xmm7 1962; SSE2-NEXT: punpckhbw {{.*#+}} xmm7 = xmm7[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1963; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1964; SSE2-NEXT: pmullw %xmm7, %xmm5 1965; SSE2-NEXT: movdqa %xmm1, %xmm7 1966; SSE2-NEXT: punpckhbw {{.*#+}} xmm7 = xmm7[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1967; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1968; SSE2-NEXT: pmullw %xmm7, %xmm1 1969; SSE2-NEXT: pmullw %xmm5, %xmm1 1970; SSE2-NEXT: pmullw %xmm3, %xmm1 1971; SSE2-NEXT: movdqa %xmm6, %xmm3 1972; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1973; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1974; SSE2-NEXT: pmullw %xmm3, %xmm6 1975; SSE2-NEXT: movdqa %xmm2, %xmm3 1976; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1977; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1978; SSE2-NEXT: pmullw %xmm3, %xmm2 1979; SSE2-NEXT: pmullw %xmm6, %xmm2 1980; SSE2-NEXT: movdqa %xmm4, %xmm3 1981; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1982; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1983; SSE2-NEXT: pmullw %xmm3, %xmm4 1984; SSE2-NEXT: movdqa %xmm0, %xmm3 1985; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1986; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1987; SSE2-NEXT: pmullw %xmm3, %xmm0 1988; SSE2-NEXT: pmullw %xmm4, %xmm0 1989; SSE2-NEXT: pmullw %xmm2, %xmm0 1990; SSE2-NEXT: pmullw %xmm1, %xmm0 1991; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1992; SSE2-NEXT: pmullw %xmm0, %xmm1 1993; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1994; SSE2-NEXT: pmullw %xmm1, %xmm0 1995; SSE2-NEXT: movdqa %xmm0, %xmm1 1996; SSE2-NEXT: psrld $16, %xmm1 1997; SSE2-NEXT: pmullw %xmm0, %xmm1 1998; SSE2-NEXT: movd %xmm1, %eax 1999; SSE2-NEXT: # kill: def $al killed $al killed $eax 2000; SSE2-NEXT: retq 2001; 2002; SSE41-LABEL: test_v128i8: 2003; SSE41: # %bb.0: 2004; SSE41-NEXT: pmovzxbw {{.*#+}} xmm8 = xmm7[0],zero,xmm7[1],zero,xmm7[2],zero,xmm7[3],zero,xmm7[4],zero,xmm7[5],zero,xmm7[6],zero,xmm7[7],zero 2005; SSE41-NEXT: punpckhbw {{.*#+}} xmm7 = xmm7[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 2006; SSE41-NEXT: pmullw %xmm8, %xmm7 2007; SSE41-NEXT: pmovzxbw {{.*#+}} xmm8 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero 2008; SSE41-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 2009; SSE41-NEXT: pmullw %xmm8, %xmm3 2010; SSE41-NEXT: pmullw %xmm7, %xmm3 2011; SSE41-NEXT: pmovzxbw {{.*#+}} xmm7 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero 2012; SSE41-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 2013; SSE41-NEXT: pmullw %xmm7, %xmm5 2014; SSE41-NEXT: pmovzxbw {{.*#+}} xmm7 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 2015; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 2016; SSE41-NEXT: pmullw %xmm7, %xmm1 2017; SSE41-NEXT: pmullw %xmm5, %xmm1 2018; SSE41-NEXT: pmullw %xmm3, %xmm1 2019; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero 2020; SSE41-NEXT: punpckhbw {{.*#+}} xmm6 = xmm6[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 2021; SSE41-NEXT: pmullw %xmm3, %xmm6 2022; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero 2023; SSE41-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 2024; SSE41-NEXT: pmullw %xmm3, %xmm2 2025; SSE41-NEXT: pmullw %xmm6, %xmm2 2026; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero 2027; SSE41-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 2028; SSE41-NEXT: pmullw %xmm3, %xmm4 2029; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2030; SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 2031; SSE41-NEXT: pmullw %xmm3, %xmm0 2032; SSE41-NEXT: pmullw %xmm4, %xmm0 2033; SSE41-NEXT: pmullw %xmm2, %xmm0 2034; SSE41-NEXT: pmullw %xmm1, %xmm0 2035; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2036; SSE41-NEXT: pmullw %xmm0, %xmm1 2037; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 2038; SSE41-NEXT: pmullw %xmm1, %xmm0 2039; SSE41-NEXT: movdqa %xmm0, %xmm1 2040; SSE41-NEXT: psrld $16, %xmm1 2041; SSE41-NEXT: pmullw %xmm0, %xmm1 2042; SSE41-NEXT: movd %xmm1, %eax 2043; SSE41-NEXT: # kill: def $al killed $al killed $eax 2044; SSE41-NEXT: retq 2045; 2046; AVX1-LABEL: test_v128i8: 2047; AVX1: # %bb.0: 2048; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 2049; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm4[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 2050; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero 2051; AVX1-NEXT: vpmullw %xmm5, %xmm4, %xmm4 2052; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 2053; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm6 = xmm5[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 2054; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero 2055; AVX1-NEXT: vpmullw %xmm6, %xmm5, %xmm5 2056; AVX1-NEXT: vpmullw %xmm4, %xmm5, %xmm4 2057; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 2058; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm6 = xmm5[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 2059; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero 2060; AVX1-NEXT: vpmullw %xmm6, %xmm5, %xmm5 2061; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6 2062; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm7 = xmm6[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 2063; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm6 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero 2064; AVX1-NEXT: vpmullw %xmm7, %xmm6, %xmm6 2065; AVX1-NEXT: vpmullw %xmm5, %xmm6, %xmm5 2066; AVX1-NEXT: vpmullw %xmm4, %xmm5, %xmm4 2067; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 2068; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero 2069; AVX1-NEXT: vpmullw %xmm5, %xmm3, %xmm3 2070; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 2071; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 2072; AVX1-NEXT: vpmullw %xmm5, %xmm1, %xmm1 2073; AVX1-NEXT: vpmullw %xmm3, %xmm1, %xmm1 2074; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 2075; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero 2076; AVX1-NEXT: vpmullw %xmm3, %xmm2, %xmm2 2077; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 2078; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2079; AVX1-NEXT: vpmullw %xmm3, %xmm0, %xmm0 2080; AVX1-NEXT: vpmullw %xmm2, %xmm0, %xmm0 2081; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 2082; AVX1-NEXT: vpmullw %xmm4, %xmm0, %xmm0 2083; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2084; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 2085; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 2086; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 2087; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 2088; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 2089; AVX1-NEXT: vmovd %xmm0, %eax 2090; AVX1-NEXT: # kill: def $al killed $al killed $eax 2091; AVX1-NEXT: vzeroupper 2092; AVX1-NEXT: retq 2093; 2094; AVX2-LABEL: test_v128i8: 2095; AVX2: # %bb.0: 2096; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] 2097; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm3 = ymm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] 2098; AVX2-NEXT: vpmullw %ymm4, %ymm3, %ymm3 2099; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] 2100; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] 2101; AVX2-NEXT: vpmullw %ymm4, %ymm1, %ymm1 2102; AVX2-NEXT: vpmullw %ymm3, %ymm1, %ymm1 2103; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] 2104; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] 2105; AVX2-NEXT: vpmullw %ymm3, %ymm2, %ymm2 2106; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] 2107; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] 2108; AVX2-NEXT: vpmullw %ymm3, %ymm0, %ymm0 2109; AVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm0 2110; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0 2111; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2112; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 2113; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2114; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 2115; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 2116; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 2117; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 2118; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 2119; AVX2-NEXT: vmovd %xmm0, %eax 2120; AVX2-NEXT: # kill: def $al killed $al killed $eax 2121; AVX2-NEXT: vzeroupper 2122; AVX2-NEXT: retq 2123; 2124; AVX512BW-LABEL: test_v128i8: 2125; AVX512BW: # %bb.0: 2126; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63] 2127; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55] 2128; AVX512BW-NEXT: vpmullw %zmm2, %zmm1, %zmm1 2129; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63] 2130; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55] 2131; AVX512BW-NEXT: vpmullw %zmm2, %zmm0, %zmm0 2132; AVX512BW-NEXT: vpmullw %zmm1, %zmm0, %zmm0 2133; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1 2134; AVX512BW-NEXT: vpmullw %ymm1, %ymm0, %ymm0 2135; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1 2136; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0 2137; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2138; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0 2139; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 2140; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0 2141; AVX512BW-NEXT: vpsrld $16, %xmm0, %xmm1 2142; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0 2143; AVX512BW-NEXT: vmovd %xmm0, %eax 2144; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 2145; AVX512BW-NEXT: vzeroupper 2146; AVX512BW-NEXT: retq 2147; 2148; AVX512BWVL-LABEL: test_v128i8: 2149; AVX512BWVL: # %bb.0: 2150; AVX512BWVL-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63] 2151; AVX512BWVL-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55] 2152; AVX512BWVL-NEXT: vpmullw %zmm2, %zmm1, %zmm1 2153; AVX512BWVL-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63] 2154; AVX512BWVL-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55] 2155; AVX512BWVL-NEXT: vpmullw %zmm2, %zmm0, %zmm0 2156; AVX512BWVL-NEXT: vpmullw %zmm1, %zmm0, %zmm0 2157; AVX512BWVL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 2158; AVX512BWVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0 2159; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1 2160; AVX512BWVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 2161; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2162; AVX512BWVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 2163; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 2164; AVX512BWVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 2165; AVX512BWVL-NEXT: vpsrld $16, %xmm0, %xmm1 2166; AVX512BWVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 2167; AVX512BWVL-NEXT: vmovd %xmm0, %eax 2168; AVX512BWVL-NEXT: # kill: def $al killed $al killed $eax 2169; AVX512BWVL-NEXT: vzeroupper 2170; AVX512BWVL-NEXT: retq 2171; 2172; AVX512DQ-LABEL: test_v128i8: 2173; AVX512DQ: # %bb.0: 2174; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm2 2175; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] 2176; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] 2177; AVX512DQ-NEXT: vpmullw %ymm3, %ymm2, %ymm2 2178; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm3 2179; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] 2180; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm3 = ymm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] 2181; AVX512DQ-NEXT: vpmullw %ymm4, %ymm3, %ymm3 2182; AVX512DQ-NEXT: vpmullw %ymm2, %ymm3, %ymm2 2183; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] 2184; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] 2185; AVX512DQ-NEXT: vpmullw %ymm3, %ymm1, %ymm1 2186; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] 2187; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] 2188; AVX512DQ-NEXT: vpmullw %ymm3, %ymm0, %ymm0 2189; AVX512DQ-NEXT: vpmullw %ymm1, %ymm0, %ymm0 2190; AVX512DQ-NEXT: vpmullw %ymm2, %ymm0, %ymm0 2191; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm1 2192; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0 2193; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2194; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0 2195; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 2196; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0 2197; AVX512DQ-NEXT: vpsrld $16, %xmm0, %xmm1 2198; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0 2199; AVX512DQ-NEXT: vmovd %xmm0, %eax 2200; AVX512DQ-NEXT: # kill: def $al killed $al killed $eax 2201; AVX512DQ-NEXT: vzeroupper 2202; AVX512DQ-NEXT: retq 2203; 2204; AVX512DQVL-LABEL: test_v128i8: 2205; AVX512DQVL: # %bb.0: 2206; AVX512DQVL-NEXT: vextracti64x4 $1, %zmm1, %ymm2 2207; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] 2208; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] 2209; AVX512DQVL-NEXT: vpmullw %ymm3, %ymm2, %ymm2 2210; AVX512DQVL-NEXT: vextracti64x4 $1, %zmm0, %ymm3 2211; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] 2212; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} ymm3 = ymm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] 2213; AVX512DQVL-NEXT: vpmullw %ymm4, %ymm3, %ymm3 2214; AVX512DQVL-NEXT: vpmullw %ymm2, %ymm3, %ymm2 2215; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] 2216; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] 2217; AVX512DQVL-NEXT: vpmullw %ymm3, %ymm1, %ymm1 2218; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] 2219; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] 2220; AVX512DQVL-NEXT: vpmullw %ymm3, %ymm0, %ymm0 2221; AVX512DQVL-NEXT: vpmullw %ymm1, %ymm0, %ymm0 2222; AVX512DQVL-NEXT: vpmullw %ymm2, %ymm0, %ymm0 2223; AVX512DQVL-NEXT: vextracti128 $1, %ymm0, %xmm1 2224; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 2225; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2226; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 2227; AVX512DQVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 2228; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 2229; AVX512DQVL-NEXT: vpsrld $16, %xmm0, %xmm1 2230; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 2231; AVX512DQVL-NEXT: vmovd %xmm0, %eax 2232; AVX512DQVL-NEXT: # kill: def $al killed $al killed $eax 2233; AVX512DQVL-NEXT: vzeroupper 2234; AVX512DQVL-NEXT: retq 2235 %1 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> %a0) 2236 ret i8 %1 2237} 2238 2239; 2240; Legalization 2241; 2242 2243define i8 @illegal_v4i8(i8 %a0, ptr %a1) { 2244; SSE2-LABEL: illegal_v4i8: 2245; SSE2: # %bb.0: 2246; SSE2-NEXT: movl %edi, %eax 2247; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2248; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2249; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 2250; SSE2-NEXT: pmullw %xmm0, %xmm1 2251; SSE2-NEXT: movdqa %xmm1, %xmm0 2252; SSE2-NEXT: psrld $16, %xmm0 2253; SSE2-NEXT: pmullw %xmm1, %xmm0 2254; SSE2-NEXT: movd %xmm0, %ecx 2255; SSE2-NEXT: # kill: def $al killed $al killed $eax 2256; SSE2-NEXT: mulb %cl 2257; SSE2-NEXT: retq 2258; 2259; SSE41-LABEL: illegal_v4i8: 2260; SSE41: # %bb.0: 2261; SSE41-NEXT: movl %edi, %eax 2262; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2263; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2264; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 2265; SSE41-NEXT: pmullw %xmm0, %xmm1 2266; SSE41-NEXT: movdqa %xmm1, %xmm0 2267; SSE41-NEXT: psrld $16, %xmm0 2268; SSE41-NEXT: pmullw %xmm1, %xmm0 2269; SSE41-NEXT: movd %xmm0, %ecx 2270; SSE41-NEXT: # kill: def $al killed $al killed $eax 2271; SSE41-NEXT: mulb %cl 2272; SSE41-NEXT: retq 2273; 2274; AVX-LABEL: illegal_v4i8: 2275; AVX: # %bb.0: 2276; AVX-NEXT: movl %edi, %eax 2277; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2278; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2279; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 2280; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 2281; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 2282; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 2283; AVX-NEXT: vmovd %xmm0, %ecx 2284; AVX-NEXT: # kill: def $al killed $al killed $eax 2285; AVX-NEXT: mulb %cl 2286; AVX-NEXT: retq 2287 %ld = load <4 x i8>, ptr %a1, align 4 2288 %rdx = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> %ld) 2289 %mul = mul i8 %a0, %rdx 2290 ret i8 %mul 2291} 2292 2293define i8 @illegal_v8i8(i8 %a0, ptr %a1) { 2294; SSE2-LABEL: illegal_v8i8: 2295; SSE2: # %bb.0: 2296; SSE2-NEXT: movl %edi, %eax 2297; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2298; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2299; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2300; SSE2-NEXT: pmullw %xmm0, %xmm1 2301; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 2302; SSE2-NEXT: pmullw %xmm1, %xmm0 2303; SSE2-NEXT: movdqa %xmm0, %xmm1 2304; SSE2-NEXT: psrld $16, %xmm1 2305; SSE2-NEXT: pmullw %xmm0, %xmm1 2306; SSE2-NEXT: movd %xmm1, %ecx 2307; SSE2-NEXT: # kill: def $al killed $al killed $eax 2308; SSE2-NEXT: mulb %cl 2309; SSE2-NEXT: retq 2310; 2311; SSE41-LABEL: illegal_v8i8: 2312; SSE41: # %bb.0: 2313; SSE41-NEXT: movl %edi, %eax 2314; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 2315; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2316; SSE41-NEXT: pmullw %xmm0, %xmm1 2317; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 2318; SSE41-NEXT: pmullw %xmm1, %xmm0 2319; SSE41-NEXT: movdqa %xmm0, %xmm1 2320; SSE41-NEXT: psrld $16, %xmm1 2321; SSE41-NEXT: pmullw %xmm0, %xmm1 2322; SSE41-NEXT: movd %xmm1, %ecx 2323; SSE41-NEXT: # kill: def $al killed $al killed $eax 2324; SSE41-NEXT: mulb %cl 2325; SSE41-NEXT: retq 2326; 2327; AVX-LABEL: illegal_v8i8: 2328; AVX: # %bb.0: 2329; AVX-NEXT: movl %edi, %eax 2330; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 2331; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2332; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 2333; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 2334; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 2335; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 2336; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 2337; AVX-NEXT: vmovd %xmm0, %ecx 2338; AVX-NEXT: # kill: def $al killed $al killed $eax 2339; AVX-NEXT: mulb %cl 2340; AVX-NEXT: retq 2341 %ld = load <8 x i8>, ptr %a1, align 4 2342 %rdx = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> %ld) 2343 %mul = mul i8 %a0, %rdx 2344 ret i8 %mul 2345} 2346 2347define i8 @PR51858(i128 %arg) { 2348; SSE2-LABEL: PR51858: 2349; SSE2: # %bb.0: 2350; SSE2-NEXT: movq %rdi, %xmm0 2351; SSE2-NEXT: movq %rsi, %xmm1 2352; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2353; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2354; SSE2-NEXT: pmullw %xmm1, %xmm0 2355; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2356; SSE2-NEXT: pmullw %xmm0, %xmm1 2357; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 2358; SSE2-NEXT: pmullw %xmm1, %xmm0 2359; SSE2-NEXT: movdqa %xmm0, %xmm1 2360; SSE2-NEXT: psrld $16, %xmm1 2361; SSE2-NEXT: pmullw %xmm0, %xmm1 2362; SSE2-NEXT: movd %xmm1, %eax 2363; SSE2-NEXT: # kill: def $al killed $al killed $eax 2364; SSE2-NEXT: retq 2365; 2366; SSE41-LABEL: PR51858: 2367; SSE41: # %bb.0: 2368; SSE41-NEXT: movq %rdi, %xmm0 2369; SSE41-NEXT: movq %rsi, %xmm1 2370; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 2371; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2372; SSE41-NEXT: pmullw %xmm1, %xmm0 2373; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2374; SSE41-NEXT: pmullw %xmm0, %xmm1 2375; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 2376; SSE41-NEXT: pmullw %xmm1, %xmm0 2377; SSE41-NEXT: movdqa %xmm0, %xmm1 2378; SSE41-NEXT: psrld $16, %xmm1 2379; SSE41-NEXT: pmullw %xmm0, %xmm1 2380; SSE41-NEXT: movd %xmm1, %eax 2381; SSE41-NEXT: # kill: def $al killed $al killed $eax 2382; SSE41-NEXT: retq 2383; 2384; AVX-LABEL: PR51858: 2385; AVX: # %bb.0: 2386; AVX-NEXT: vmovq %rdi, %xmm0 2387; AVX-NEXT: vmovq %rsi, %xmm1 2388; AVX-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 2389; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2390; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 2391; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2392; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 2393; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 2394; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 2395; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 2396; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 2397; AVX-NEXT: vmovd %xmm0, %eax 2398; AVX-NEXT: # kill: def $al killed $al killed $eax 2399; AVX-NEXT: retq 2400 %vec = bitcast i128 %arg to <16 x i8> 2401 %red = tail call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> %vec) 2402 ret i8 %red 2403} 2404 2405declare i64 @llvm.vector.reduce.mul.v2i64(<2 x i64>) 2406declare i64 @llvm.vector.reduce.mul.v4i64(<4 x i64>) 2407declare i64 @llvm.vector.reduce.mul.v8i64(<8 x i64>) 2408declare i64 @llvm.vector.reduce.mul.v16i64(<16 x i64>) 2409 2410declare i32 @llvm.vector.reduce.mul.v2i32(<2 x i32>) 2411declare i32 @llvm.vector.reduce.mul.v4i32(<4 x i32>) 2412declare i32 @llvm.vector.reduce.mul.v8i32(<8 x i32>) 2413declare i32 @llvm.vector.reduce.mul.v16i32(<16 x i32>) 2414declare i32 @llvm.vector.reduce.mul.v32i32(<32 x i32>) 2415 2416declare i16 @llvm.vector.reduce.mul.v2i16(<2 x i16>) 2417declare i16 @llvm.vector.reduce.mul.v4i16(<4 x i16>) 2418declare i16 @llvm.vector.reduce.mul.v8i16(<8 x i16>) 2419declare i16 @llvm.vector.reduce.mul.v16i16(<16 x i16>) 2420declare i16 @llvm.vector.reduce.mul.v32i16(<32 x i16>) 2421declare i16 @llvm.vector.reduce.mul.v64i16(<64 x i16>) 2422 2423declare i8 @llvm.vector.reduce.mul.v2i8(<2 x i8>) 2424declare i8 @llvm.vector.reduce.mul.v4i8(<4 x i8>) 2425declare i8 @llvm.vector.reduce.mul.v8i8(<8 x i8>) 2426declare i8 @llvm.vector.reduce.mul.v16i8(<16 x i8>) 2427declare i8 @llvm.vector.reduce.mul.v32i8(<32 x i8>) 2428declare i8 @llvm.vector.reduce.mul.v64i8(<64 x i8>) 2429declare i8 @llvm.vector.reduce.mul.v128i8(<128 x i8>) 2430