1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX1 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2 --check-prefix=AVX2NOBW 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=AVX2 --check-prefix=AVX512BW 5 6; 7; sdiv by 7 8; 9 10define <4 x i64> @test_div7_4i64(<4 x i64> %a) nounwind { 11; AVX1-LABEL: test_div7_4i64: 12; AVX1: # %bb.0: 13; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 14; AVX1-NEXT: vpextrq $1, %xmm1, %rax 15; AVX1-NEXT: movabsq $5270498306774157605, %rcx # imm = 0x4924924924924925 16; AVX1-NEXT: imulq %rcx 17; AVX1-NEXT: movq %rdx, %rax 18; AVX1-NEXT: shrq $63, %rax 19; AVX1-NEXT: sarq %rdx 20; AVX1-NEXT: addq %rax, %rdx 21; AVX1-NEXT: vmovq %rdx, %xmm2 22; AVX1-NEXT: vmovq %xmm1, %rax 23; AVX1-NEXT: imulq %rcx 24; AVX1-NEXT: movq %rdx, %rax 25; AVX1-NEXT: shrq $63, %rax 26; AVX1-NEXT: sarq %rdx 27; AVX1-NEXT: addq %rax, %rdx 28; AVX1-NEXT: vmovq %rdx, %xmm1 29; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 30; AVX1-NEXT: vpextrq $1, %xmm0, %rax 31; AVX1-NEXT: imulq %rcx 32; AVX1-NEXT: movq %rdx, %rax 33; AVX1-NEXT: shrq $63, %rax 34; AVX1-NEXT: sarq %rdx 35; AVX1-NEXT: addq %rax, %rdx 36; AVX1-NEXT: vmovq %rdx, %xmm2 37; AVX1-NEXT: vmovq %xmm0, %rax 38; AVX1-NEXT: imulq %rcx 39; AVX1-NEXT: movq %rdx, %rax 40; AVX1-NEXT: shrq $63, %rax 41; AVX1-NEXT: sarq %rdx 42; AVX1-NEXT: addq %rax, %rdx 43; AVX1-NEXT: vmovq %rdx, %xmm0 44; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 45; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 46; AVX1-NEXT: retq 47; 48; AVX2-LABEL: test_div7_4i64: 49; AVX2: # %bb.0: 50; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 51; AVX2-NEXT: vpextrq $1, %xmm1, %rax 52; AVX2-NEXT: movabsq $5270498306774157605, %rcx # imm = 0x4924924924924925 53; AVX2-NEXT: imulq %rcx 54; AVX2-NEXT: movq %rdx, %rax 55; AVX2-NEXT: shrq $63, %rax 56; AVX2-NEXT: sarq %rdx 57; AVX2-NEXT: addq %rax, %rdx 58; AVX2-NEXT: vmovq %rdx, %xmm2 59; AVX2-NEXT: vmovq %xmm1, %rax 60; AVX2-NEXT: imulq %rcx 61; AVX2-NEXT: movq %rdx, %rax 62; AVX2-NEXT: shrq $63, %rax 63; AVX2-NEXT: sarq %rdx 64; AVX2-NEXT: addq %rax, %rdx 65; AVX2-NEXT: vmovq %rdx, %xmm1 66; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 67; AVX2-NEXT: vpextrq $1, %xmm0, %rax 68; AVX2-NEXT: imulq %rcx 69; AVX2-NEXT: movq %rdx, %rax 70; AVX2-NEXT: shrq $63, %rax 71; AVX2-NEXT: sarq %rdx 72; AVX2-NEXT: addq %rax, %rdx 73; AVX2-NEXT: vmovq %rdx, %xmm2 74; AVX2-NEXT: vmovq %xmm0, %rax 75; AVX2-NEXT: imulq %rcx 76; AVX2-NEXT: movq %rdx, %rax 77; AVX2-NEXT: shrq $63, %rax 78; AVX2-NEXT: sarq %rdx 79; AVX2-NEXT: addq %rax, %rdx 80; AVX2-NEXT: vmovq %rdx, %xmm0 81; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 82; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 83; AVX2-NEXT: retq 84 %res = sdiv <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7> 85 ret <4 x i64> %res 86} 87 88define <8 x i32> @test_div7_8i32(<8 x i32> %a) nounwind { 89; AVX1-LABEL: test_div7_8i32: 90; AVX1: # %bb.0: 91; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 92; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 93; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [2454267027,2454267027,2454267027,2454267027] 94; AVX1-NEXT: vpmuldq %xmm3, %xmm2, %xmm2 95; AVX1-NEXT: vpmuldq %xmm3, %xmm1, %xmm4 96; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 97; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0,1],xmm2[2,3],xmm4[4,5],xmm2[6,7] 98; AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1 99; AVX1-NEXT: vpsrld $31, %xmm1, %xmm2 100; AVX1-NEXT: vpsrad $2, %xmm1, %xmm1 101; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 102; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 103; AVX1-NEXT: vpmuldq %xmm3, %xmm2, %xmm2 104; AVX1-NEXT: vpmuldq %xmm3, %xmm0, %xmm3 105; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 106; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7] 107; AVX1-NEXT: vpaddd %xmm0, %xmm2, %xmm0 108; AVX1-NEXT: vpsrld $31, %xmm0, %xmm2 109; AVX1-NEXT: vpsrad $2, %xmm0, %xmm0 110; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0 111; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 112; AVX1-NEXT: retq 113; 114; AVX2-LABEL: test_div7_8i32: 115; AVX2: # %bb.0: 116; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm0[1,1,3,3,5,5,7,7] 117; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027] 118; AVX2-NEXT: vpmuldq %ymm2, %ymm1, %ymm1 119; AVX2-NEXT: vpmuldq %ymm2, %ymm0, %ymm2 120; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[1,1,3,3,5,5,7,7] 121; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4],ymm1[5],ymm2[6],ymm1[7] 122; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm0 123; AVX2-NEXT: vpsrld $31, %ymm0, %ymm1 124; AVX2-NEXT: vpsrad $2, %ymm0, %ymm0 125; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 126; AVX2-NEXT: retq 127 %res = sdiv <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 128 ret <8 x i32> %res 129} 130 131define <16 x i16> @test_div7_16i16(<16 x i16> %a) nounwind { 132; AVX1-LABEL: test_div7_16i16: 133; AVX1: # %bb.0: 134; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 135; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [18725,18725,18725,18725,18725,18725,18725,18725] 136; AVX1-NEXT: vpmulhw %xmm2, %xmm1, %xmm1 137; AVX1-NEXT: vpsrlw $15, %xmm1, %xmm3 138; AVX1-NEXT: vpsraw $1, %xmm1, %xmm1 139; AVX1-NEXT: vpaddw %xmm3, %xmm1, %xmm1 140; AVX1-NEXT: vpmulhw %xmm2, %xmm0, %xmm0 141; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm2 142; AVX1-NEXT: vpsraw $1, %xmm0, %xmm0 143; AVX1-NEXT: vpaddw %xmm2, %xmm0, %xmm0 144; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 145; AVX1-NEXT: retq 146; 147; AVX2-LABEL: test_div7_16i16: 148; AVX2: # %bb.0: 149; AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725] 150; AVX2-NEXT: vpsrlw $15, %ymm0, %ymm1 151; AVX2-NEXT: vpsraw $1, %ymm0, %ymm0 152; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0 153; AVX2-NEXT: retq 154 %res = sdiv <16 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7> 155 ret <16 x i16> %res 156} 157 158define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind { 159; AVX1-LABEL: test_div7_32i8: 160; AVX1: # %bb.0: 161; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 162; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 163; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] 164; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [37632,37632,37632,37632,37632,37632,37632,37632] 165; AVX1-NEXT: vpmulhw %xmm4, %xmm3, %xmm3 166; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 167; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm5 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 168; AVX1-NEXT: vpmulhw %xmm4, %xmm5, %xmm5 169; AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5 170; AVX1-NEXT: vpackuswb %xmm3, %xmm5, %xmm3 171; AVX1-NEXT: vpaddb %xmm1, %xmm3, %xmm1 172; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm3 173; AVX1-NEXT: vbroadcastss {{.*#+}} xmm5 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 174; AVX1-NEXT: vpand %xmm5, %xmm3, %xmm3 175; AVX1-NEXT: vpsrlw $2, %xmm1, %xmm1 176; AVX1-NEXT: vbroadcastss {{.*#+}} xmm6 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63] 177; AVX1-NEXT: vpand %xmm6, %xmm1, %xmm1 178; AVX1-NEXT: vbroadcastss {{.*#+}} xmm7 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32] 179; AVX1-NEXT: vpxor %xmm7, %xmm1, %xmm1 180; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1 181; AVX1-NEXT: vpsubb %xmm7, %xmm1, %xmm1 182; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] 183; AVX1-NEXT: vpmulhw %xmm4, %xmm3, %xmm3 184; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 185; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 186; AVX1-NEXT: vpmulhw %xmm4, %xmm2, %xmm2 187; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 188; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2 189; AVX1-NEXT: vpaddb %xmm0, %xmm2, %xmm0 190; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm2 191; AVX1-NEXT: vpand %xmm5, %xmm2, %xmm2 192; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm0 193; AVX1-NEXT: vpand %xmm6, %xmm0, %xmm0 194; AVX1-NEXT: vpxor %xmm7, %xmm0, %xmm0 195; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 196; AVX1-NEXT: vpsubb %xmm7, %xmm0, %xmm0 197; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 198; AVX1-NEXT: retq 199; 200; AVX2NOBW-LABEL: test_div7_32i8: 201; AVX2NOBW: # %bb.0: 202; AVX2NOBW-NEXT: vpxor %xmm1, %xmm1, %xmm1 203; AVX2NOBW-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] 204; AVX2NOBW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632] 205; AVX2NOBW-NEXT: vpmulhw %ymm3, %ymm2, %ymm2 206; AVX2NOBW-NEXT: vpsrlw $8, %ymm2, %ymm2 207; AVX2NOBW-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] 208; AVX2NOBW-NEXT: vpmulhw %ymm3, %ymm1, %ymm1 209; AVX2NOBW-NEXT: vpsrlw $8, %ymm1, %ymm1 210; AVX2NOBW-NEXT: vpackuswb %ymm2, %ymm1, %ymm1 211; AVX2NOBW-NEXT: vpaddb %ymm0, %ymm1, %ymm0 212; AVX2NOBW-NEXT: vpsrlw $2, %ymm0, %ymm1 213; AVX2NOBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 214; AVX2NOBW-NEXT: vpbroadcastb {{.*#+}} ymm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32] 215; AVX2NOBW-NEXT: vpxor %ymm2, %ymm1, %ymm1 216; AVX2NOBW-NEXT: vpsrlw $7, %ymm0, %ymm0 217; AVX2NOBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 218; AVX2NOBW-NEXT: vpaddb %ymm0, %ymm1, %ymm0 219; AVX2NOBW-NEXT: vpsubb %ymm2, %ymm0, %ymm0 220; AVX2NOBW-NEXT: retq 221; 222; AVX512BW-LABEL: test_div7_32i8: 223; AVX512BW: # %bb.0: 224; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm1 225; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 # [65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427] 226; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1 227; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1 228; AVX512BW-NEXT: vpaddb %ymm0, %ymm1, %ymm0 229; AVX512BW-NEXT: vpsrlw $2, %ymm0, %ymm1 230; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 231; AVX512BW-NEXT: vpbroadcastb {{.*#+}} ymm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32] 232; AVX512BW-NEXT: vpxor %ymm2, %ymm1, %ymm1 233; AVX512BW-NEXT: vpsrlw $7, %ymm0, %ymm0 234; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 235; AVX512BW-NEXT: vpaddb %ymm0, %ymm1, %ymm0 236; AVX512BW-NEXT: vpsubb %ymm2, %ymm0, %ymm0 237; AVX512BW-NEXT: retq 238 %res = sdiv <32 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7> 239 ret <32 x i8> %res 240} 241 242; 243; sdiv by non-splat constant 244; 245 246define <32 x i8> @test_divconstant_32i8(<32 x i8> %a) nounwind { 247; AVX1-LABEL: test_divconstant_32i8: 248; AVX1: # %bb.0: 249; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 250; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 251; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] 252; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [37632,20224,11008,47872,26368,14592,33024,37632] 253; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 254; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 255; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [47872,12544,26368,6912,14592,30976,33024,35072] 256; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 257; AVX1-NEXT: vpackuswb %xmm3, %xmm1, %xmm1 258; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm3 259; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 260; AVX1-NEXT: vpaddb %xmm4, %xmm1, %xmm1 261; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 262; AVX1-NEXT: vpsraw $8, %xmm4, %xmm4 263; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [32,64,128,32,64,128,64,64] 264; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 265; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm5 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 266; AVX1-NEXT: vpsraw $8, %xmm5, %xmm5 267; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 # [16,64,32,128,64,32,32,32] 268; AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5 269; AVX1-NEXT: vpackuswb %xmm4, %xmm5, %xmm4 270; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1 271; AVX1-NEXT: vbroadcastss {{.*#+}} xmm5 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 272; AVX1-NEXT: vpand %xmm5, %xmm1, %xmm1 273; AVX1-NEXT: vpaddb %xmm1, %xmm4, %xmm1 274; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] 275; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [35072,33024,30976,14592,6912,26368,12544,47872] 276; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 277; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 278; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [37632,33024,14592,26368,47872,11008,20224,37632] 279; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 280; AVX1-NEXT: vpackuswb %xmm4, %xmm0, %xmm0 281; AVX1-NEXT: vpaddb %xmm3, %xmm0, %xmm0 282; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 283; AVX1-NEXT: vpsraw $8, %xmm2, %xmm2 284; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [32,32,32,64,128,32,64,16] 285; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 286; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 287; AVX1-NEXT: vpsraw $8, %xmm3, %xmm3 288; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [64,64,128,64,32,128,64,32] 289; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 290; AVX1-NEXT: vpackuswb %xmm2, %xmm3, %xmm2 291; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0 292; AVX1-NEXT: vpand %xmm5, %xmm0, %xmm0 293; AVX1-NEXT: vpaddb %xmm0, %xmm2, %xmm0 294; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 295; AVX1-NEXT: retq 296; 297; AVX2NOBW-LABEL: test_divconstant_32i8: 298; AVX2NOBW: # %bb.0: 299; AVX2NOBW-NEXT: vpxor %xmm1, %xmm1, %xmm1 300; AVX2NOBW-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] 301; AVX2NOBW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [35072,33024,30976,14592,6912,26368,12544,47872,37632,20224,11008,47872,26368,14592,33024,37632] 302; AVX2NOBW-NEXT: vpsrlw $8, %ymm2, %ymm2 303; AVX2NOBW-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] 304; AVX2NOBW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [37632,33024,14592,26368,47872,11008,20224,37632,47872,12544,26368,6912,14592,30976,33024,35072] 305; AVX2NOBW-NEXT: vpsrlw $8, %ymm1, %ymm1 306; AVX2NOBW-NEXT: vpackuswb %ymm2, %ymm1, %ymm1 307; AVX2NOBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 308; AVX2NOBW-NEXT: vpaddb %ymm0, %ymm1, %ymm0 309; AVX2NOBW-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] 310; AVX2NOBW-NEXT: vpsraw $8, %ymm1, %ymm1 311; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [32,32,32,64,128,32,64,16,32,64,128,32,64,128,64,64] 312; AVX2NOBW-NEXT: vpsrlw $8, %ymm1, %ymm1 313; AVX2NOBW-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] 314; AVX2NOBW-NEXT: vpsraw $8, %ymm2, %ymm2 315; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [64,64,128,64,32,128,64,32,16,64,32,128,64,32,32,32] 316; AVX2NOBW-NEXT: vpsrlw $8, %ymm2, %ymm2 317; AVX2NOBW-NEXT: vpackuswb %ymm1, %ymm2, %ymm1 318; AVX2NOBW-NEXT: vpsrlw $7, %ymm0, %ymm0 319; AVX2NOBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 320; AVX2NOBW-NEXT: vpaddb %ymm0, %ymm1, %ymm0 321; AVX2NOBW-NEXT: retq 322; 323; AVX512BW-LABEL: test_divconstant_32i8: 324; AVX512BW: # %bb.0: 325; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 326; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0 327; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # [65427,65409,57,103,65467,43,79,65427,65417,65409,121,57,27,103,49,65467,65467,49,103,27,57,121,65409,65417,65427,79,43,65467,103,57,65409,65427] 328; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 329; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 330; AVX512BW-NEXT: vpaddb %ymm1, %ymm0, %ymm0 331; AVX512BW-NEXT: vpsrlw $7, %ymm0, %ymm1 332; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 333; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0 334; AVX512BW-NEXT: vpsravw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 335; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 336; AVX512BW-NEXT: vpaddb %ymm1, %ymm0, %ymm0 337; AVX512BW-NEXT: retq 338 %res = sdiv <32 x i8> %a, <i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7> 339 ret <32 x i8> %res 340} 341 342; 343; srem by 7 344; 345 346define <4 x i64> @test_rem7_4i64(<4 x i64> %a) nounwind { 347; AVX1-LABEL: test_rem7_4i64: 348; AVX1: # %bb.0: 349; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 350; AVX1-NEXT: vpextrq $1, %xmm1, %rcx 351; AVX1-NEXT: movabsq $5270498306774157605, %rsi # imm = 0x4924924924924925 352; AVX1-NEXT: movq %rcx, %rax 353; AVX1-NEXT: imulq %rsi 354; AVX1-NEXT: movq %rdx, %rax 355; AVX1-NEXT: shrq $63, %rax 356; AVX1-NEXT: sarq %rdx 357; AVX1-NEXT: addq %rax, %rdx 358; AVX1-NEXT: leaq (,%rdx,8), %rax 359; AVX1-NEXT: subq %rax, %rdx 360; AVX1-NEXT: addq %rcx, %rdx 361; AVX1-NEXT: vmovq %rdx, %xmm2 362; AVX1-NEXT: vmovq %xmm1, %rcx 363; AVX1-NEXT: movq %rcx, %rax 364; AVX1-NEXT: imulq %rsi 365; AVX1-NEXT: movq %rdx, %rax 366; AVX1-NEXT: shrq $63, %rax 367; AVX1-NEXT: sarq %rdx 368; AVX1-NEXT: addq %rax, %rdx 369; AVX1-NEXT: leaq (,%rdx,8), %rax 370; AVX1-NEXT: subq %rax, %rdx 371; AVX1-NEXT: addq %rcx, %rdx 372; AVX1-NEXT: vmovq %rdx, %xmm1 373; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 374; AVX1-NEXT: vpextrq $1, %xmm0, %rcx 375; AVX1-NEXT: movq %rcx, %rax 376; AVX1-NEXT: imulq %rsi 377; AVX1-NEXT: movq %rdx, %rax 378; AVX1-NEXT: shrq $63, %rax 379; AVX1-NEXT: sarq %rdx 380; AVX1-NEXT: addq %rax, %rdx 381; AVX1-NEXT: leaq (,%rdx,8), %rax 382; AVX1-NEXT: subq %rax, %rdx 383; AVX1-NEXT: addq %rcx, %rdx 384; AVX1-NEXT: vmovq %rdx, %xmm2 385; AVX1-NEXT: vmovq %xmm0, %rcx 386; AVX1-NEXT: movq %rcx, %rax 387; AVX1-NEXT: imulq %rsi 388; AVX1-NEXT: movq %rdx, %rax 389; AVX1-NEXT: shrq $63, %rax 390; AVX1-NEXT: sarq %rdx 391; AVX1-NEXT: addq %rax, %rdx 392; AVX1-NEXT: leaq (,%rdx,8), %rax 393; AVX1-NEXT: subq %rax, %rdx 394; AVX1-NEXT: addq %rcx, %rdx 395; AVX1-NEXT: vmovq %rdx, %xmm0 396; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 397; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 398; AVX1-NEXT: retq 399; 400; AVX2-LABEL: test_rem7_4i64: 401; AVX2: # %bb.0: 402; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 403; AVX2-NEXT: vpextrq $1, %xmm1, %rcx 404; AVX2-NEXT: movabsq $5270498306774157605, %rsi # imm = 0x4924924924924925 405; AVX2-NEXT: movq %rcx, %rax 406; AVX2-NEXT: imulq %rsi 407; AVX2-NEXT: movq %rdx, %rax 408; AVX2-NEXT: shrq $63, %rax 409; AVX2-NEXT: sarq %rdx 410; AVX2-NEXT: addq %rax, %rdx 411; AVX2-NEXT: leaq (,%rdx,8), %rax 412; AVX2-NEXT: subq %rax, %rdx 413; AVX2-NEXT: addq %rcx, %rdx 414; AVX2-NEXT: vmovq %rdx, %xmm2 415; AVX2-NEXT: vmovq %xmm1, %rcx 416; AVX2-NEXT: movq %rcx, %rax 417; AVX2-NEXT: imulq %rsi 418; AVX2-NEXT: movq %rdx, %rax 419; AVX2-NEXT: shrq $63, %rax 420; AVX2-NEXT: sarq %rdx 421; AVX2-NEXT: addq %rax, %rdx 422; AVX2-NEXT: leaq (,%rdx,8), %rax 423; AVX2-NEXT: subq %rax, %rdx 424; AVX2-NEXT: addq %rcx, %rdx 425; AVX2-NEXT: vmovq %rdx, %xmm1 426; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 427; AVX2-NEXT: vpextrq $1, %xmm0, %rcx 428; AVX2-NEXT: movq %rcx, %rax 429; AVX2-NEXT: imulq %rsi 430; AVX2-NEXT: movq %rdx, %rax 431; AVX2-NEXT: shrq $63, %rax 432; AVX2-NEXT: sarq %rdx 433; AVX2-NEXT: addq %rax, %rdx 434; AVX2-NEXT: leaq (,%rdx,8), %rax 435; AVX2-NEXT: subq %rax, %rdx 436; AVX2-NEXT: addq %rcx, %rdx 437; AVX2-NEXT: vmovq %rdx, %xmm2 438; AVX2-NEXT: vmovq %xmm0, %rcx 439; AVX2-NEXT: movq %rcx, %rax 440; AVX2-NEXT: imulq %rsi 441; AVX2-NEXT: movq %rdx, %rax 442; AVX2-NEXT: shrq $63, %rax 443; AVX2-NEXT: sarq %rdx 444; AVX2-NEXT: addq %rax, %rdx 445; AVX2-NEXT: leaq (,%rdx,8), %rax 446; AVX2-NEXT: subq %rax, %rdx 447; AVX2-NEXT: addq %rcx, %rdx 448; AVX2-NEXT: vmovq %rdx, %xmm0 449; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 450; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 451; AVX2-NEXT: retq 452 %res = srem <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7> 453 ret <4 x i64> %res 454} 455 456define <8 x i32> @test_rem7_8i32(<8 x i32> %a) nounwind { 457; AVX1-LABEL: test_rem7_8i32: 458; AVX1: # %bb.0: 459; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 460; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 461; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [2454267027,2454267027,2454267027,2454267027] 462; AVX1-NEXT: vpmuldq %xmm3, %xmm2, %xmm2 463; AVX1-NEXT: vpmuldq %xmm3, %xmm1, %xmm4 464; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 465; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0,1],xmm2[2,3],xmm4[4,5],xmm2[6,7] 466; AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm2 467; AVX1-NEXT: vpsrld $31, %xmm2, %xmm4 468; AVX1-NEXT: vpsrad $2, %xmm2, %xmm2 469; AVX1-NEXT: vpaddd %xmm4, %xmm2, %xmm2 470; AVX1-NEXT: vpslld $3, %xmm2, %xmm4 471; AVX1-NEXT: vpsubd %xmm4, %xmm2, %xmm2 472; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 473; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 474; AVX1-NEXT: vpmuldq %xmm3, %xmm2, %xmm2 475; AVX1-NEXT: vpmuldq %xmm3, %xmm0, %xmm3 476; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 477; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7] 478; AVX1-NEXT: vpaddd %xmm0, %xmm2, %xmm2 479; AVX1-NEXT: vpsrld $31, %xmm2, %xmm3 480; AVX1-NEXT: vpsrad $2, %xmm2, %xmm2 481; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2 482; AVX1-NEXT: vpslld $3, %xmm2, %xmm3 483; AVX1-NEXT: vpsubd %xmm3, %xmm2, %xmm2 484; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0 485; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 486; AVX1-NEXT: retq 487; 488; AVX2-LABEL: test_rem7_8i32: 489; AVX2: # %bb.0: 490; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm0[1,1,3,3,5,5,7,7] 491; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027,2454267027] 492; AVX2-NEXT: vpmuldq %ymm2, %ymm1, %ymm1 493; AVX2-NEXT: vpmuldq %ymm2, %ymm0, %ymm2 494; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[1,1,3,3,5,5,7,7] 495; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4],ymm1[5],ymm2[6],ymm1[7] 496; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm1 497; AVX2-NEXT: vpsrld $31, %ymm1, %ymm2 498; AVX2-NEXT: vpsrad $2, %ymm1, %ymm1 499; AVX2-NEXT: vpaddd %ymm2, %ymm1, %ymm1 500; AVX2-NEXT: vpslld $3, %ymm1, %ymm2 501; AVX2-NEXT: vpsubd %ymm2, %ymm1, %ymm1 502; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 503; AVX2-NEXT: retq 504 %res = srem <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 505 ret <8 x i32> %res 506} 507 508define <16 x i16> @test_rem7_16i16(<16 x i16> %a) nounwind { 509; AVX1-LABEL: test_rem7_16i16: 510; AVX1: # %bb.0: 511; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 512; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [18725,18725,18725,18725,18725,18725,18725,18725] 513; AVX1-NEXT: vpmulhw %xmm2, %xmm1, %xmm3 514; AVX1-NEXT: vpsrlw $15, %xmm3, %xmm4 515; AVX1-NEXT: vpsraw $1, %xmm3, %xmm3 516; AVX1-NEXT: vpaddw %xmm4, %xmm3, %xmm3 517; AVX1-NEXT: vpsllw $3, %xmm3, %xmm4 518; AVX1-NEXT: vpsubw %xmm4, %xmm3, %xmm3 519; AVX1-NEXT: vpaddw %xmm3, %xmm1, %xmm1 520; AVX1-NEXT: vpmulhw %xmm2, %xmm0, %xmm2 521; AVX1-NEXT: vpsrlw $15, %xmm2, %xmm3 522; AVX1-NEXT: vpsraw $1, %xmm2, %xmm2 523; AVX1-NEXT: vpaddw %xmm3, %xmm2, %xmm2 524; AVX1-NEXT: vpsllw $3, %xmm2, %xmm3 525; AVX1-NEXT: vpsubw %xmm3, %xmm2, %xmm2 526; AVX1-NEXT: vpaddw %xmm2, %xmm0, %xmm0 527; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 528; AVX1-NEXT: retq 529; 530; AVX2-LABEL: test_rem7_16i16: 531; AVX2: # %bb.0: 532; AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 # [18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725] 533; AVX2-NEXT: vpsrlw $15, %ymm1, %ymm2 534; AVX2-NEXT: vpsraw $1, %ymm1, %ymm1 535; AVX2-NEXT: vpaddw %ymm2, %ymm1, %ymm1 536; AVX2-NEXT: vpsllw $3, %ymm1, %ymm2 537; AVX2-NEXT: vpsubw %ymm2, %ymm1, %ymm1 538; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0 539; AVX2-NEXT: retq 540 %res = srem <16 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7> 541 ret <16 x i16> %res 542} 543 544define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind { 545; AVX1-LABEL: test_rem7_32i8: 546; AVX1: # %bb.0: 547; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 548; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 549; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] 550; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [37632,37632,37632,37632,37632,37632,37632,37632] 551; AVX1-NEXT: vpmulhw %xmm4, %xmm3, %xmm3 552; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 553; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm5 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 554; AVX1-NEXT: vpmulhw %xmm4, %xmm5, %xmm5 555; AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5 556; AVX1-NEXT: vpackuswb %xmm3, %xmm5, %xmm3 557; AVX1-NEXT: vpaddb %xmm1, %xmm3, %xmm3 558; AVX1-NEXT: vpsrlw $7, %xmm3, %xmm5 559; AVX1-NEXT: vbroadcastss {{.*#+}} xmm6 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 560; AVX1-NEXT: vpand %xmm6, %xmm5, %xmm5 561; AVX1-NEXT: vpsrlw $2, %xmm3, %xmm3 562; AVX1-NEXT: vbroadcastss {{.*#+}} xmm7 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63] 563; AVX1-NEXT: vpand %xmm7, %xmm3, %xmm3 564; AVX1-NEXT: vbroadcastss {{.*#+}} xmm8 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32] 565; AVX1-NEXT: vpxor %xmm3, %xmm8, %xmm3 566; AVX1-NEXT: vpaddb %xmm5, %xmm3, %xmm3 567; AVX1-NEXT: vpsubb %xmm8, %xmm3, %xmm3 568; AVX1-NEXT: vpsllw $3, %xmm3, %xmm5 569; AVX1-NEXT: vbroadcastss {{.*#+}} xmm9 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248] 570; AVX1-NEXT: vpand %xmm5, %xmm9, %xmm5 571; AVX1-NEXT: vpsubb %xmm5, %xmm3, %xmm3 572; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1 573; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] 574; AVX1-NEXT: vpmulhw %xmm4, %xmm3, %xmm3 575; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 576; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 577; AVX1-NEXT: vpmulhw %xmm4, %xmm2, %xmm2 578; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 579; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2 580; AVX1-NEXT: vpaddb %xmm0, %xmm2, %xmm2 581; AVX1-NEXT: vpsrlw $7, %xmm2, %xmm3 582; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3 583; AVX1-NEXT: vpsrlw $2, %xmm2, %xmm2 584; AVX1-NEXT: vpand %xmm7, %xmm2, %xmm2 585; AVX1-NEXT: vpxor %xmm2, %xmm8, %xmm2 586; AVX1-NEXT: vpaddb %xmm3, %xmm2, %xmm2 587; AVX1-NEXT: vpsubb %xmm8, %xmm2, %xmm2 588; AVX1-NEXT: vpsllw $3, %xmm2, %xmm3 589; AVX1-NEXT: vpand %xmm3, %xmm9, %xmm3 590; AVX1-NEXT: vpsubb %xmm3, %xmm2, %xmm2 591; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 592; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 593; AVX1-NEXT: retq 594; 595; AVX2NOBW-LABEL: test_rem7_32i8: 596; AVX2NOBW: # %bb.0: 597; AVX2NOBW-NEXT: vpxor %xmm1, %xmm1, %xmm1 598; AVX2NOBW-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] 599; AVX2NOBW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632,37632] 600; AVX2NOBW-NEXT: vpmulhw %ymm3, %ymm2, %ymm2 601; AVX2NOBW-NEXT: vpsrlw $8, %ymm2, %ymm2 602; AVX2NOBW-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] 603; AVX2NOBW-NEXT: vpmulhw %ymm3, %ymm1, %ymm1 604; AVX2NOBW-NEXT: vpsrlw $8, %ymm1, %ymm1 605; AVX2NOBW-NEXT: vpackuswb %ymm2, %ymm1, %ymm1 606; AVX2NOBW-NEXT: vpaddb %ymm0, %ymm1, %ymm1 607; AVX2NOBW-NEXT: vpsrlw $2, %ymm1, %ymm2 608; AVX2NOBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 609; AVX2NOBW-NEXT: vpbroadcastb {{.*#+}} ymm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32] 610; AVX2NOBW-NEXT: vpxor %ymm3, %ymm2, %ymm2 611; AVX2NOBW-NEXT: vpsrlw $7, %ymm1, %ymm1 612; AVX2NOBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 613; AVX2NOBW-NEXT: vpaddb %ymm1, %ymm2, %ymm1 614; AVX2NOBW-NEXT: vpsubb %ymm3, %ymm1, %ymm1 615; AVX2NOBW-NEXT: vpsllw $3, %ymm1, %ymm2 616; AVX2NOBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 617; AVX2NOBW-NEXT: vpsubb %ymm2, %ymm1, %ymm1 618; AVX2NOBW-NEXT: vpaddb %ymm1, %ymm0, %ymm0 619; AVX2NOBW-NEXT: retq 620; 621; AVX512BW-LABEL: test_rem7_32i8: 622; AVX512BW: # %bb.0: 623; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm1 624; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 # [65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427] 625; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1 626; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1 627; AVX512BW-NEXT: vpaddb %ymm0, %ymm1, %ymm1 628; AVX512BW-NEXT: vpsrlw $2, %ymm1, %ymm2 629; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 630; AVX512BW-NEXT: vpbroadcastb {{.*#+}} ymm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32] 631; AVX512BW-NEXT: vpxor %ymm3, %ymm2, %ymm2 632; AVX512BW-NEXT: vpsrlw $7, %ymm1, %ymm1 633; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 634; AVX512BW-NEXT: vpaddb %ymm1, %ymm2, %ymm1 635; AVX512BW-NEXT: vpsubb %ymm3, %ymm1, %ymm1 636; AVX512BW-NEXT: vpsllw $3, %ymm1, %ymm2 637; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 638; AVX512BW-NEXT: vpsubb %ymm2, %ymm1, %ymm1 639; AVX512BW-NEXT: vpaddb %ymm1, %ymm0, %ymm0 640; AVX512BW-NEXT: retq 641 %res = srem <32 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7> 642 ret <32 x i8> %res 643} 644 645; 646; srem by non-splat constant 647; 648 649define <32 x i8> @test_remconstant_32i8(<32 x i8> %a) nounwind { 650; AVX1-LABEL: test_remconstant_32i8: 651; AVX1: # %bb.0: 652; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 653; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 654; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 655; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [37632,20224,11008,47872,26368,14592,33024,37632] 656; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 657; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm4 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 658; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [47872,12544,26368,6912,14592,30976,33024,35072] 659; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 660; AVX1-NEXT: vpackuswb %xmm3, %xmm4, %xmm3 661; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm4 662; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm5 663; AVX1-NEXT: vpaddb %xmm5, %xmm3, %xmm3 664; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 665; AVX1-NEXT: vpsraw $8, %xmm5, %xmm5 666; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 # [32,64,128,32,64,128,64,64] 667; AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5 668; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm6 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 669; AVX1-NEXT: vpsraw $8, %xmm6, %xmm6 670; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6, %xmm6 # [16,64,32,128,64,32,32,32] 671; AVX1-NEXT: vpsrlw $8, %xmm6, %xmm6 672; AVX1-NEXT: vpackuswb %xmm5, %xmm6, %xmm5 673; AVX1-NEXT: vpsrlw $7, %xmm3, %xmm3 674; AVX1-NEXT: vbroadcastss {{.*#+}} xmm6 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 675; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3 676; AVX1-NEXT: vpaddb %xmm3, %xmm5, %xmm3 677; AVX1-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm5 # [22,0,20,0,18,0,16,0,14,0,12,0,10,0,8,0] 678; AVX1-NEXT: vbroadcastss {{.*#+}} xmm7 = [255,255,255,255,255,255,255,255] 679; AVX1-NEXT: vpand %xmm7, %xmm5, %xmm5 680; AVX1-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [0,21,0,19,0,17,0,15,0,13,0,11,0,9,0,7] 681; AVX1-NEXT: vpsllw $8, %xmm3, %xmm3 682; AVX1-NEXT: vpor %xmm3, %xmm5, %xmm3 683; AVX1-NEXT: vpsubb %xmm3, %xmm2, %xmm2 684; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] 685; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [35072,33024,30976,14592,6912,26368,12544,47872] 686; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 687; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 688; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [37632,33024,14592,26368,47872,11008,20224,37632] 689; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 690; AVX1-NEXT: vpackuswb %xmm3, %xmm1, %xmm1 691; AVX1-NEXT: vpaddb %xmm4, %xmm1, %xmm1 692; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 693; AVX1-NEXT: vpsraw $8, %xmm3, %xmm3 694; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [32,32,32,64,128,32,64,16] 695; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 696; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm4 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 697; AVX1-NEXT: vpsraw $8, %xmm4, %xmm4 698; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [64,64,128,64,32,128,64,32] 699; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 700; AVX1-NEXT: vpackuswb %xmm3, %xmm4, %xmm3 701; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1 702; AVX1-NEXT: vpand %xmm6, %xmm1, %xmm1 703; AVX1-NEXT: vpaddb %xmm1, %xmm3, %xmm1 704; AVX1-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm3 # [7,0,9,0,11,0,13,0,15,0,17,0,19,0,21,0] 705; AVX1-NEXT: vpand %xmm7, %xmm3, %xmm3 706; AVX1-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [0,8,0,10,0,12,0,14,0,16,0,18,0,20,0,22] 707; AVX1-NEXT: vpsllw $8, %xmm1, %xmm1 708; AVX1-NEXT: vpor %xmm1, %xmm3, %xmm1 709; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm0 710; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 711; AVX1-NEXT: retq 712; 713; AVX2NOBW-LABEL: test_remconstant_32i8: 714; AVX2NOBW: # %bb.0: 715; AVX2NOBW-NEXT: vpxor %xmm1, %xmm1, %xmm1 716; AVX2NOBW-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] 717; AVX2NOBW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [35072,33024,30976,14592,6912,26368,12544,47872,37632,20224,11008,47872,26368,14592,33024,37632] 718; AVX2NOBW-NEXT: vpsrlw $8, %ymm2, %ymm2 719; AVX2NOBW-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] 720; AVX2NOBW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [37632,33024,14592,26368,47872,11008,20224,37632,47872,12544,26368,6912,14592,30976,33024,35072] 721; AVX2NOBW-NEXT: vpsrlw $8, %ymm1, %ymm1 722; AVX2NOBW-NEXT: vpackuswb %ymm2, %ymm1, %ymm1 723; AVX2NOBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2 724; AVX2NOBW-NEXT: vpaddb %ymm2, %ymm1, %ymm1 725; AVX2NOBW-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] 726; AVX2NOBW-NEXT: vpsraw $8, %ymm2, %ymm2 727; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [32,32,32,64,128,32,64,16,32,64,128,32,64,128,64,64] 728; AVX2NOBW-NEXT: vpsrlw $8, %ymm2, %ymm2 729; AVX2NOBW-NEXT: vpunpcklbw {{.*#+}} ymm3 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] 730; AVX2NOBW-NEXT: vpsraw $8, %ymm3, %ymm3 731; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [64,64,128,64,32,128,64,32,16,64,32,128,64,32,32,32] 732; AVX2NOBW-NEXT: vpsrlw $8, %ymm3, %ymm3 733; AVX2NOBW-NEXT: vpackuswb %ymm2, %ymm3, %ymm2 734; AVX2NOBW-NEXT: vpsrlw $7, %ymm1, %ymm1 735; AVX2NOBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 736; AVX2NOBW-NEXT: vpaddb %ymm1, %ymm2, %ymm1 737; AVX2NOBW-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2 # [0,8,0,10,0,12,0,14,0,16,0,18,0,20,0,22,0,21,0,19,0,17,0,15,0,13,0,11,0,9,0,7] 738; AVX2NOBW-NEXT: vpsllw $8, %ymm2, %ymm2 739; AVX2NOBW-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [7,0,9,0,11,0,13,0,15,0,17,0,19,0,21,0,22,0,20,0,18,0,16,0,14,0,12,0,10,0,8,0] 740; AVX2NOBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 741; AVX2NOBW-NEXT: vpor %ymm2, %ymm1, %ymm1 742; AVX2NOBW-NEXT: vpsubb %ymm1, %ymm0, %ymm0 743; AVX2NOBW-NEXT: retq 744; 745; AVX512BW-LABEL: test_remconstant_32i8: 746; AVX512BW: # %bb.0: 747; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 748; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm2 749; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 # [65427,65409,57,103,65467,43,79,65427,65417,65409,121,57,27,103,49,65467,65467,49,103,27,57,121,65409,65417,65427,79,43,65467,103,57,65409,65427] 750; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2 751; AVX512BW-NEXT: vpmovwb %zmm2, %ymm2 752; AVX512BW-NEXT: vpaddb %ymm1, %ymm2, %ymm1 753; AVX512BW-NEXT: vpsrlw $7, %ymm1, %ymm2 754; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 755; AVX512BW-NEXT: vpmovsxbw %ymm1, %zmm1 756; AVX512BW-NEXT: vpsravw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 757; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1 758; AVX512BW-NEXT: vpaddb %ymm2, %ymm1, %ymm1 759; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero 760; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 # [7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7] 761; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1 762; AVX512BW-NEXT: vpsubb %ymm1, %ymm0, %ymm0 763; AVX512BW-NEXT: retq 764 %res = srem <32 x i8> %a, <i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7> 765 ret <32 x i8> %res 766} 767