1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 < %s | FileCheck %s --check-prefix=CHECK-SSE2 3; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 < %s | FileCheck %s --check-prefix=CHECK-SSE41 4; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx < %s | FileCheck %s --check-prefix=CHECK-AVX1 5; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 < %s | FileCheck %s --check-prefix=CHECK-AVX2 6; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl < %s | FileCheck %s --check-prefix=CHECK-AVX512VL 7 8; Odd+Even divisors 9define <4 x i32> @test_srem_odd_even(<4 x i32> %X) nounwind { 10; CHECK-SSE2-LABEL: test_srem_odd_even: 11; CHECK-SSE2: # %bb.0: 12; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 13; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 14; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 15; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 16; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 17; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 18; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 19; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3] 20; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 21; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 22; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3] 23; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 24; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 25; CHECK-SSE2-NEXT: por %xmm1, %xmm0 26; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 27; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 28; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 29; CHECK-SSE2-NEXT: retq 30; 31; CHECK-SSE41-LABEL: test_srem_odd_even: 32; CHECK-SSE41: # %bb.0: 33; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 34; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 35; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 36; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 37; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2 38; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 39; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 40; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 41; CHECK-SSE41-NEXT: por %xmm2, %xmm0 42; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,306783378,171798690,42949672] 43; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 44; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 45; CHECK-SSE41-NEXT: psrld $31, %xmm0 46; CHECK-SSE41-NEXT: retq 47; 48; CHECK-AVX1-LABEL: test_srem_odd_even: 49; CHECK-AVX1: # %bb.0: 50; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 51; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 52; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 53; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 54; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 55; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 56; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 57; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 58; CHECK-AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 59; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 60; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 61; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 62; CHECK-AVX1-NEXT: retq 63; 64; CHECK-AVX2-LABEL: test_srem_odd_even: 65; CHECK-AVX2: # %bb.0: 66; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 67; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 68; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 69; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 70; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 71; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 72; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 73; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 74; CHECK-AVX2-NEXT: retq 75; 76; CHECK-AVX512VL-LABEL: test_srem_odd_even: 77; CHECK-AVX512VL: # %bb.0: 78; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 79; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 80; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 81; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 82; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 83; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 84; CHECK-AVX512VL-NEXT: retq 85 %srem = srem <4 x i32> %X, <i32 5, i32 14, i32 25, i32 100> 86 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 87 %ret = zext <4 x i1> %cmp to <4 x i32> 88 ret <4 x i32> %ret 89} 90 91;==============================================================================; 92 93; One all-ones divisor in odd divisor 94define <4 x i32> @test_srem_odd_allones_eq(<4 x i32> %X) nounwind { 95; CHECK-SSE2-LABEL: test_srem_odd_allones_eq: 96; CHECK-SSE2: # %bb.0: 97; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837] 98; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 99; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0 100; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 101; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 102; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 103; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 104; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 105; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 106; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 107; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 108; CHECK-SSE2-NEXT: retq 109; 110; CHECK-SSE41-LABEL: test_srem_odd_allones_eq: 111; CHECK-SSE41: # %bb.0: 112; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 113; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 114; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,858993458,4294967295,858993458] 115; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 116; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 117; CHECK-SSE41-NEXT: psrld $31, %xmm0 118; CHECK-SSE41-NEXT: retq 119; 120; CHECK-AVX1-LABEL: test_srem_odd_allones_eq: 121; CHECK-AVX1: # %bb.0: 122; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 123; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 124; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 125; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 126; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 127; CHECK-AVX1-NEXT: retq 128; 129; CHECK-AVX2-LABEL: test_srem_odd_allones_eq: 130; CHECK-AVX2: # %bb.0: 131; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837] 132; CHECK-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 133; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [429496729,429496729,429496729,429496729] 134; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 135; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 136; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 137; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 138; CHECK-AVX2-NEXT: retq 139; 140; CHECK-AVX512VL-LABEL: test_srem_odd_allones_eq: 141; CHECK-AVX512VL: # %bb.0: 142; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 143; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 144; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 145; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 146; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 147; CHECK-AVX512VL-NEXT: retq 148 %srem = srem <4 x i32> %X, <i32 5, i32 5, i32 4294967295, i32 5> 149 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 150 %ret = zext <4 x i1> %cmp to <4 x i32> 151 ret <4 x i32> %ret 152} 153define <4 x i32> @test_srem_odd_allones_ne(<4 x i32> %X) nounwind { 154; CHECK-SSE2-LABEL: test_srem_odd_allones_ne: 155; CHECK-SSE2: # %bb.0: 156; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837] 157; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 158; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0 159; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 160; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 161; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 162; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 163; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 164; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 165; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 166; CHECK-SSE2-NEXT: psrld $31, %xmm0 167; CHECK-SSE2-NEXT: retq 168; 169; CHECK-SSE41-LABEL: test_srem_odd_allones_ne: 170; CHECK-SSE41: # %bb.0: 171; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 172; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 173; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,858993458,4294967295,858993458] 174; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 175; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 176; CHECK-SSE41-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 177; CHECK-SSE41-NEXT: retq 178; 179; CHECK-AVX1-LABEL: test_srem_odd_allones_ne: 180; CHECK-AVX1: # %bb.0: 181; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 182; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 183; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 184; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 185; CHECK-AVX1-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 186; CHECK-AVX1-NEXT: retq 187; 188; CHECK-AVX2-LABEL: test_srem_odd_allones_ne: 189; CHECK-AVX2: # %bb.0: 190; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837] 191; CHECK-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 192; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [429496729,429496729,429496729,429496729] 193; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 194; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 195; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 196; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] 197; CHECK-AVX2-NEXT: vpandn %xmm1, %xmm0, %xmm0 198; CHECK-AVX2-NEXT: retq 199; 200; CHECK-AVX512VL-LABEL: test_srem_odd_allones_ne: 201; CHECK-AVX512VL: # %bb.0: 202; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 203; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 204; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 205; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 206; CHECK-AVX512VL-NEXT: vpandnd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 207; CHECK-AVX512VL-NEXT: retq 208 %srem = srem <4 x i32> %X, <i32 5, i32 5, i32 4294967295, i32 5> 209 %cmp = icmp ne <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 210 %ret = zext <4 x i1> %cmp to <4 x i32> 211 ret <4 x i32> %ret 212} 213 214; One all-ones divisor in even divisor 215define <4 x i32> @test_srem_even_allones_eq(<4 x i32> %X) nounwind { 216; CHECK-SSE2-LABEL: test_srem_even_allones_eq: 217; CHECK-SSE2: # %bb.0: 218; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3067833783,3067833783,3067833783,3067833783] 219; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 220; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0 221; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 222; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 223; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 224; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 225; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 226; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1 227; CHECK-SSE2-NEXT: psrld $1, %xmm1 228; CHECK-SSE2-NEXT: pslld $31, %xmm0 229; CHECK-SSE2-NEXT: por %xmm1, %xmm0 230; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 231; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 232; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 233; CHECK-SSE2-NEXT: retq 234; 235; CHECK-SSE41-LABEL: test_srem_even_allones_eq: 236; CHECK-SSE41: # %bb.0: 237; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 238; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 239; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1 240; CHECK-SSE41-NEXT: psrld $1, %xmm1 241; CHECK-SSE41-NEXT: pslld $31, %xmm0 242; CHECK-SSE41-NEXT: por %xmm1, %xmm0 243; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [306783378,306783378,4294967295,306783378] 244; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 245; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 246; CHECK-SSE41-NEXT: psrld $31, %xmm0 247; CHECK-SSE41-NEXT: retq 248; 249; CHECK-AVX1-LABEL: test_srem_even_allones_eq: 250; CHECK-AVX1: # %bb.0: 251; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 252; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 253; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1 254; CHECK-AVX1-NEXT: vpslld $31, %xmm0, %xmm0 255; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 256; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 257; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 258; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 259; CHECK-AVX1-NEXT: retq 260; 261; CHECK-AVX2-LABEL: test_srem_even_allones_eq: 262; CHECK-AVX2: # %bb.0: 263; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3067833783,3067833783,3067833783,3067833783] 264; CHECK-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 265; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [306783378,306783378,306783378,306783378] 266; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 267; CHECK-AVX2-NEXT: vpsrld $1, %xmm0, %xmm1 268; CHECK-AVX2-NEXT: vpslld $31, %xmm0, %xmm0 269; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 270; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 271; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 272; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 273; CHECK-AVX2-NEXT: retq 274; 275; CHECK-AVX512VL-LABEL: test_srem_even_allones_eq: 276; CHECK-AVX512VL: # %bb.0: 277; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 278; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 279; CHECK-AVX512VL-NEXT: vprord $1, %xmm0, %xmm0 280; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 281; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 282; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 283; CHECK-AVX512VL-NEXT: retq 284 %srem = srem <4 x i32> %X, <i32 14, i32 14, i32 4294967295, i32 14> 285 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 286 %ret = zext <4 x i1> %cmp to <4 x i32> 287 ret <4 x i32> %ret 288} 289define <4 x i32> @test_srem_even_allones_ne(<4 x i32> %X) nounwind { 290; CHECK-SSE2-LABEL: test_srem_even_allones_ne: 291; CHECK-SSE2: # %bb.0: 292; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3067833783,3067833783,3067833783,3067833783] 293; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 294; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0 295; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 296; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 297; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 298; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 299; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 300; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1 301; CHECK-SSE2-NEXT: psrld $1, %xmm1 302; CHECK-SSE2-NEXT: pslld $31, %xmm0 303; CHECK-SSE2-NEXT: por %xmm1, %xmm0 304; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 305; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 306; CHECK-SSE2-NEXT: psrld $31, %xmm0 307; CHECK-SSE2-NEXT: retq 308; 309; CHECK-SSE41-LABEL: test_srem_even_allones_ne: 310; CHECK-SSE41: # %bb.0: 311; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 312; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 313; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1 314; CHECK-SSE41-NEXT: psrld $1, %xmm1 315; CHECK-SSE41-NEXT: pslld $31, %xmm0 316; CHECK-SSE41-NEXT: por %xmm1, %xmm0 317; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [306783378,306783378,4294967295,306783378] 318; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 319; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 320; CHECK-SSE41-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 321; CHECK-SSE41-NEXT: retq 322; 323; CHECK-AVX1-LABEL: test_srem_even_allones_ne: 324; CHECK-AVX1: # %bb.0: 325; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 326; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 327; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1 328; CHECK-AVX1-NEXT: vpslld $31, %xmm0, %xmm0 329; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 330; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 331; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 332; CHECK-AVX1-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 333; CHECK-AVX1-NEXT: retq 334; 335; CHECK-AVX2-LABEL: test_srem_even_allones_ne: 336; CHECK-AVX2: # %bb.0: 337; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3067833783,3067833783,3067833783,3067833783] 338; CHECK-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 339; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [306783378,306783378,306783378,306783378] 340; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 341; CHECK-AVX2-NEXT: vpsrld $1, %xmm0, %xmm1 342; CHECK-AVX2-NEXT: vpslld $31, %xmm0, %xmm0 343; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 344; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 345; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 346; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] 347; CHECK-AVX2-NEXT: vpandn %xmm1, %xmm0, %xmm0 348; CHECK-AVX2-NEXT: retq 349; 350; CHECK-AVX512VL-LABEL: test_srem_even_allones_ne: 351; CHECK-AVX512VL: # %bb.0: 352; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 353; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 354; CHECK-AVX512VL-NEXT: vprord $1, %xmm0, %xmm0 355; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 356; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 357; CHECK-AVX512VL-NEXT: vpandnd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 358; CHECK-AVX512VL-NEXT: retq 359 %srem = srem <4 x i32> %X, <i32 14, i32 14, i32 4294967295, i32 14> 360 %cmp = icmp ne <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 361 %ret = zext <4 x i1> %cmp to <4 x i32> 362 ret <4 x i32> %ret 363} 364 365; One all-ones divisor in odd+even divisor 366define <4 x i32> @test_srem_odd_even_allones_eq(<4 x i32> %X) nounwind { 367; CHECK-SSE2-LABEL: test_srem_odd_even_allones_eq: 368; CHECK-SSE2: # %bb.0: 369; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 370; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 371; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 372; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 373; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 374; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 375; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 376; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3] 377; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 378; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 379; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3] 380; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 381; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 382; CHECK-SSE2-NEXT: por %xmm1, %xmm0 383; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 384; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 385; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 386; CHECK-SSE2-NEXT: retq 387; 388; CHECK-SSE41-LABEL: test_srem_odd_even_allones_eq: 389; CHECK-SSE41: # %bb.0: 390; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 391; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 392; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 393; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 394; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2 395; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 396; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 397; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 398; CHECK-SSE41-NEXT: por %xmm2, %xmm0 399; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,306783378,4294967295,42949672] 400; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 401; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 402; CHECK-SSE41-NEXT: psrld $31, %xmm0 403; CHECK-SSE41-NEXT: retq 404; 405; CHECK-AVX1-LABEL: test_srem_odd_even_allones_eq: 406; CHECK-AVX1: # %bb.0: 407; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 408; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 409; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 410; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 411; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 412; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 413; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 414; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 415; CHECK-AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 416; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 417; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 418; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 419; CHECK-AVX1-NEXT: retq 420; 421; CHECK-AVX2-LABEL: test_srem_odd_even_allones_eq: 422; CHECK-AVX2: # %bb.0: 423; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 424; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 425; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 426; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 427; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 428; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 429; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 430; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 431; CHECK-AVX2-NEXT: retq 432; 433; CHECK-AVX512VL-LABEL: test_srem_odd_even_allones_eq: 434; CHECK-AVX512VL: # %bb.0: 435; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 436; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 437; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 438; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 439; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 440; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 441; CHECK-AVX512VL-NEXT: retq 442 %srem = srem <4 x i32> %X, <i32 5, i32 14, i32 4294967295, i32 100> 443 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 444 %ret = zext <4 x i1> %cmp to <4 x i32> 445 ret <4 x i32> %ret 446} 447define <4 x i32> @test_srem_odd_even_allones_ne(<4 x i32> %X) nounwind { 448; CHECK-SSE2-LABEL: test_srem_odd_even_allones_ne: 449; CHECK-SSE2: # %bb.0: 450; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 451; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 452; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 453; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 454; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 455; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 456; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 457; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3] 458; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 459; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 460; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3] 461; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 462; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 463; CHECK-SSE2-NEXT: por %xmm1, %xmm0 464; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 465; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 466; CHECK-SSE2-NEXT: psrld $31, %xmm0 467; CHECK-SSE2-NEXT: retq 468; 469; CHECK-SSE41-LABEL: test_srem_odd_even_allones_ne: 470; CHECK-SSE41: # %bb.0: 471; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 472; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 473; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 474; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 475; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2 476; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 477; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 478; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 479; CHECK-SSE41-NEXT: por %xmm2, %xmm0 480; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,306783378,4294967295,42949672] 481; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 482; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 483; CHECK-SSE41-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 484; CHECK-SSE41-NEXT: retq 485; 486; CHECK-AVX1-LABEL: test_srem_odd_even_allones_ne: 487; CHECK-AVX1: # %bb.0: 488; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 489; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 490; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 491; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 492; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 493; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 494; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 495; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 496; CHECK-AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 497; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 498; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 499; CHECK-AVX1-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 500; CHECK-AVX1-NEXT: retq 501; 502; CHECK-AVX2-LABEL: test_srem_odd_even_allones_ne: 503; CHECK-AVX2: # %bb.0: 504; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 505; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 506; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 507; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 508; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 509; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 510; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 511; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] 512; CHECK-AVX2-NEXT: vpandn %xmm1, %xmm0, %xmm0 513; CHECK-AVX2-NEXT: retq 514; 515; CHECK-AVX512VL-LABEL: test_srem_odd_even_allones_ne: 516; CHECK-AVX512VL: # %bb.0: 517; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 518; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 519; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 520; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 521; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 522; CHECK-AVX512VL-NEXT: vpandnd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 523; CHECK-AVX512VL-NEXT: retq 524 %srem = srem <4 x i32> %X, <i32 5, i32 14, i32 4294967295, i32 100> 525 %cmp = icmp ne <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 526 %ret = zext <4 x i1> %cmp to <4 x i32> 527 ret <4 x i32> %ret 528} 529 530;------------------------------------------------------------------------------; 531 532; One power-of-two divisor in odd divisor 533define <4 x i32> @test_srem_odd_poweroftwo(<4 x i32> %X) nounwind { 534; CHECK-SSE2-LABEL: test_srem_odd_poweroftwo: 535; CHECK-SSE2: # %bb.0: 536; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 537; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 538; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 539; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 540; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 541; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 542; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 543; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3] 544; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 545; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3] 546; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 547; CHECK-SSE2-NEXT: psrlq $32, %xmm0 548; CHECK-SSE2-NEXT: por %xmm2, %xmm0 549; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 550; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 551; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 552; CHECK-SSE2-NEXT: retq 553; 554; CHECK-SSE41-LABEL: test_srem_odd_poweroftwo: 555; CHECK-SSE41: # %bb.0: 556; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 557; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 558; CHECK-SSE41-NEXT: pmovsxdq {{.*#+}} xmm1 = [1,268435456] 559; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm1 560; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7] 561; CHECK-SSE41-NEXT: psrlq $32, %xmm1 562; CHECK-SSE41-NEXT: por %xmm1, %xmm0 563; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,858993458,268435455,858993458] 564; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 565; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 566; CHECK-SSE41-NEXT: psrld $31, %xmm0 567; CHECK-SSE41-NEXT: retq 568; 569; CHECK-AVX1-LABEL: test_srem_odd_poweroftwo: 570; CHECK-AVX1: # %bb.0: 571; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 572; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 573; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 574; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7] 575; CHECK-AVX1-NEXT: vpsrlq $32, %xmm1, %xmm1 576; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 577; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 578; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 579; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 580; CHECK-AVX1-NEXT: retq 581; 582; CHECK-AVX2-LABEL: test_srem_odd_poweroftwo: 583; CHECK-AVX2: # %bb.0: 584; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 585; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 586; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 587; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 588; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 589; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 590; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 591; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 592; CHECK-AVX2-NEXT: retq 593; 594; CHECK-AVX512VL-LABEL: test_srem_odd_poweroftwo: 595; CHECK-AVX512VL: # %bb.0: 596; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 597; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 598; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 599; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 600; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 601; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 602; CHECK-AVX512VL-NEXT: retq 603 %srem = srem <4 x i32> %X, <i32 5, i32 5, i32 16, i32 5> 604 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 605 %ret = zext <4 x i1> %cmp to <4 x i32> 606 ret <4 x i32> %ret 607} 608 609; One power-of-two divisor in even divisor 610define <4 x i32> @test_srem_even_poweroftwo(<4 x i32> %X) nounwind { 611; CHECK-SSE2-LABEL: test_srem_even_poweroftwo: 612; CHECK-SSE2: # %bb.0: 613; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 614; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 615; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 616; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 617; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 618; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 619; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 620; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,268435456,2147483648] 621; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm1 622; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3] 623; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 624; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] 625; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm3 626; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,3,2,3] 627; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] 628; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 629; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3] 630; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 631; CHECK-SSE2-NEXT: por %xmm2, %xmm0 632; CHECK-SSE2-NEXT: pxor %xmm4, %xmm0 633; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 634; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 635; CHECK-SSE2-NEXT: retq 636; 637; CHECK-SSE41-LABEL: test_srem_even_poweroftwo: 638; CHECK-SSE41: # %bb.0: 639; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 640; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 641; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 642; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 643; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 644; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 645; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 646; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 647; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 648; CHECK-SSE41-NEXT: por %xmm2, %xmm0 649; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [306783378,306783378,268435455,306783378] 650; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 651; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 652; CHECK-SSE41-NEXT: psrld $31, %xmm0 653; CHECK-SSE41-NEXT: retq 654; 655; CHECK-AVX1-LABEL: test_srem_even_poweroftwo: 656; CHECK-AVX1: # %bb.0: 657; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 658; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 659; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 660; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 661; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 662; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 663; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 664; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 665; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 666; CHECK-AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 667; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 668; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 669; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 670; CHECK-AVX1-NEXT: retq 671; 672; CHECK-AVX2-LABEL: test_srem_even_poweroftwo: 673; CHECK-AVX2: # %bb.0: 674; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 675; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 676; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 677; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 678; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 679; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 680; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 681; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 682; CHECK-AVX2-NEXT: retq 683; 684; CHECK-AVX512VL-LABEL: test_srem_even_poweroftwo: 685; CHECK-AVX512VL: # %bb.0: 686; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 687; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 688; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 689; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 690; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 691; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 692; CHECK-AVX512VL-NEXT: retq 693 %srem = srem <4 x i32> %X, <i32 14, i32 14, i32 16, i32 14> 694 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 695 %ret = zext <4 x i1> %cmp to <4 x i32> 696 ret <4 x i32> %ret 697} 698 699; One power-of-two divisor in odd+even divisor 700define <4 x i32> @test_srem_odd_even_poweroftwo(<4 x i32> %X) nounwind { 701; CHECK-SSE2-LABEL: test_srem_odd_even_poweroftwo: 702; CHECK-SSE2: # %bb.0: 703; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 704; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 705; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 706; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 707; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 708; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 709; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 710; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,2147483648,268435456,1073741824] 711; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm1 712; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3] 713; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 714; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 715; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,3,2,3] 716; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] 717; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 718; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3] 719; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 720; CHECK-SSE2-NEXT: por %xmm2, %xmm0 721; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 722; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 723; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 724; CHECK-SSE2-NEXT: retq 725; 726; CHECK-SSE41-LABEL: test_srem_odd_even_poweroftwo: 727; CHECK-SSE41: # %bb.0: 728; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 729; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 730; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 731; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 732; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 733; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 734; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 735; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 736; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 737; CHECK-SSE41-NEXT: por %xmm2, %xmm0 738; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,306783378,268435455,42949672] 739; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 740; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 741; CHECK-SSE41-NEXT: psrld $31, %xmm0 742; CHECK-SSE41-NEXT: retq 743; 744; CHECK-AVX1-LABEL: test_srem_odd_even_poweroftwo: 745; CHECK-AVX1: # %bb.0: 746; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 747; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 748; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 749; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 750; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 751; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 752; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 753; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 754; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 755; CHECK-AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 756; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 757; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 758; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 759; CHECK-AVX1-NEXT: retq 760; 761; CHECK-AVX2-LABEL: test_srem_odd_even_poweroftwo: 762; CHECK-AVX2: # %bb.0: 763; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 764; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 765; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 766; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 767; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 768; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 769; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 770; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 771; CHECK-AVX2-NEXT: retq 772; 773; CHECK-AVX512VL-LABEL: test_srem_odd_even_poweroftwo: 774; CHECK-AVX512VL: # %bb.0: 775; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 776; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 777; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 778; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 779; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 780; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 781; CHECK-AVX512VL-NEXT: retq 782 %srem = srem <4 x i32> %X, <i32 5, i32 14, i32 16, i32 100> 783 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 784 %ret = zext <4 x i1> %cmp to <4 x i32> 785 ret <4 x i32> %ret 786} 787 788;------------------------------------------------------------------------------; 789 790; One one divisor in odd divisor 791define <4 x i32> @test_srem_odd_one(<4 x i32> %X) nounwind { 792; CHECK-SSE2-LABEL: test_srem_odd_one: 793; CHECK-SSE2: # %bb.0: 794; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837] 795; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 796; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0 797; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 798; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 799; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 800; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 801; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 802; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 803; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 804; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 805; CHECK-SSE2-NEXT: retq 806; 807; CHECK-SSE41-LABEL: test_srem_odd_one: 808; CHECK-SSE41: # %bb.0: 809; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 810; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 811; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,858993458,4294967295,858993458] 812; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 813; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 814; CHECK-SSE41-NEXT: psrld $31, %xmm0 815; CHECK-SSE41-NEXT: retq 816; 817; CHECK-AVX1-LABEL: test_srem_odd_one: 818; CHECK-AVX1: # %bb.0: 819; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 820; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 821; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 822; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 823; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 824; CHECK-AVX1-NEXT: retq 825; 826; CHECK-AVX2-LABEL: test_srem_odd_one: 827; CHECK-AVX2: # %bb.0: 828; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837] 829; CHECK-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 830; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [429496729,429496729,429496729,429496729] 831; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 832; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 833; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 834; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 835; CHECK-AVX2-NEXT: retq 836; 837; CHECK-AVX512VL-LABEL: test_srem_odd_one: 838; CHECK-AVX512VL: # %bb.0: 839; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 840; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 841; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 842; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 843; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 844; CHECK-AVX512VL-NEXT: retq 845 %srem = srem <4 x i32> %X, <i32 5, i32 5, i32 1, i32 5> 846 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 847 %ret = zext <4 x i1> %cmp to <4 x i32> 848 ret <4 x i32> %ret 849} 850 851; One one divisor in even divisor 852define <4 x i32> @test_srem_even_one(<4 x i32> %X) nounwind { 853; CHECK-SSE2-LABEL: test_srem_even_one: 854; CHECK-SSE2: # %bb.0: 855; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3067833783,3067833783,3067833783,3067833783] 856; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 857; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0 858; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 859; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 860; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 861; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 862; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 863; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1 864; CHECK-SSE2-NEXT: psrld $1, %xmm1 865; CHECK-SSE2-NEXT: pslld $31, %xmm0 866; CHECK-SSE2-NEXT: por %xmm1, %xmm0 867; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 868; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 869; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 870; CHECK-SSE2-NEXT: retq 871; 872; CHECK-SSE41-LABEL: test_srem_even_one: 873; CHECK-SSE41: # %bb.0: 874; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 875; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 876; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1 877; CHECK-SSE41-NEXT: psrld $1, %xmm1 878; CHECK-SSE41-NEXT: pslld $31, %xmm0 879; CHECK-SSE41-NEXT: por %xmm1, %xmm0 880; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [306783378,306783378,4294967295,306783378] 881; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 882; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 883; CHECK-SSE41-NEXT: psrld $31, %xmm0 884; CHECK-SSE41-NEXT: retq 885; 886; CHECK-AVX1-LABEL: test_srem_even_one: 887; CHECK-AVX1: # %bb.0: 888; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 889; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 890; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1 891; CHECK-AVX1-NEXT: vpslld $31, %xmm0, %xmm0 892; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 893; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 894; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 895; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 896; CHECK-AVX1-NEXT: retq 897; 898; CHECK-AVX2-LABEL: test_srem_even_one: 899; CHECK-AVX2: # %bb.0: 900; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3067833783,3067833783,3067833783,3067833783] 901; CHECK-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 902; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [306783378,306783378,306783378,306783378] 903; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 904; CHECK-AVX2-NEXT: vpsrld $1, %xmm0, %xmm1 905; CHECK-AVX2-NEXT: vpslld $31, %xmm0, %xmm0 906; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 907; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 908; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 909; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 910; CHECK-AVX2-NEXT: retq 911; 912; CHECK-AVX512VL-LABEL: test_srem_even_one: 913; CHECK-AVX512VL: # %bb.0: 914; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 915; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 916; CHECK-AVX512VL-NEXT: vprord $1, %xmm0, %xmm0 917; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 918; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 919; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 920; CHECK-AVX512VL-NEXT: retq 921 %srem = srem <4 x i32> %X, <i32 14, i32 14, i32 1, i32 14> 922 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 923 %ret = zext <4 x i1> %cmp to <4 x i32> 924 ret <4 x i32> %ret 925} 926 927; One one divisor in odd+even divisor 928define <4 x i32> @test_srem_odd_even_one(<4 x i32> %X) nounwind { 929; CHECK-SSE2-LABEL: test_srem_odd_even_one: 930; CHECK-SSE2: # %bb.0: 931; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 932; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 933; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 934; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 935; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 936; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 937; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 938; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3] 939; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 940; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 941; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3] 942; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 943; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 944; CHECK-SSE2-NEXT: por %xmm1, %xmm0 945; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 946; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 947; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 948; CHECK-SSE2-NEXT: retq 949; 950; CHECK-SSE41-LABEL: test_srem_odd_even_one: 951; CHECK-SSE41: # %bb.0: 952; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 953; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 954; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 955; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 956; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2 957; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 958; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 959; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 960; CHECK-SSE41-NEXT: por %xmm2, %xmm0 961; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,306783378,4294967295,42949672] 962; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 963; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 964; CHECK-SSE41-NEXT: psrld $31, %xmm0 965; CHECK-SSE41-NEXT: retq 966; 967; CHECK-AVX1-LABEL: test_srem_odd_even_one: 968; CHECK-AVX1: # %bb.0: 969; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 970; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 971; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 972; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 973; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 974; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 975; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 976; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 977; CHECK-AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 978; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 979; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 980; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 981; CHECK-AVX1-NEXT: retq 982; 983; CHECK-AVX2-LABEL: test_srem_odd_even_one: 984; CHECK-AVX2: # %bb.0: 985; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 986; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 987; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 988; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 989; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 990; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 991; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 992; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 993; CHECK-AVX2-NEXT: retq 994; 995; CHECK-AVX512VL-LABEL: test_srem_odd_even_one: 996; CHECK-AVX512VL: # %bb.0: 997; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 998; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 999; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1000; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1001; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1002; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1003; CHECK-AVX512VL-NEXT: retq 1004 %srem = srem <4 x i32> %X, <i32 5, i32 14, i32 1, i32 100> 1005 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 1006 %ret = zext <4 x i1> %cmp to <4 x i32> 1007 ret <4 x i32> %ret 1008} 1009 1010;------------------------------------------------------------------------------; 1011 1012; One INT_MIN divisor in odd divisor 1013define <4 x i32> @test_srem_odd_INT_MIN(<4 x i32> %X) nounwind { 1014; CHECK-SSE2-LABEL: test_srem_odd_INT_MIN: 1015; CHECK-SSE2: # %bb.0: 1016; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 1017; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647] 1018; CHECK-SSE2-NEXT: pand %xmm0, %xmm2 1019; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm2 1020; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1021; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1022; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,2,2,3] 1023; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1024; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 1025; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 1026; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 1027; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 1028; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 1029; CHECK-SSE2-NEXT: pcmpeqd %xmm0, %xmm0 1030; CHECK-SSE2-NEXT: pxor %xmm3, %xmm0 1031; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm0[3,0] 1032; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0,2] 1033; CHECK-SSE2-NEXT: psrld $31, %xmm0 1034; CHECK-SSE2-NEXT: retq 1035; 1036; CHECK-SSE41-LABEL: test_srem_odd_INT_MIN: 1037; CHECK-SSE41: # %bb.0: 1038; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 1039; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647] 1040; CHECK-SSE41-NEXT: pand %xmm0, %xmm2 1041; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm2 1042; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1043; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1044; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,858993458,1,858993458] 1045; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 1046; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1047; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5],xmm0[6,7] 1048; CHECK-SSE41-NEXT: psrld $31, %xmm0 1049; CHECK-SSE41-NEXT: retq 1050; 1051; CHECK-AVX1-LABEL: test_srem_odd_INT_MIN: 1052; CHECK-AVX1: # %bb.0: 1053; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1054; CHECK-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 1055; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm1 1056; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1057; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1058; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 1059; CHECK-AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 1060; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7] 1061; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1062; CHECK-AVX1-NEXT: retq 1063; 1064; CHECK-AVX2-LABEL: test_srem_odd_INT_MIN: 1065; CHECK-AVX2: # %bb.0: 1066; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1067; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647] 1068; CHECK-AVX2-NEXT: vpand %xmm2, %xmm0, %xmm2 1069; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm1 1070; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1071; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1072; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 1073; CHECK-AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 1074; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 1075; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1076; CHECK-AVX2-NEXT: retq 1077; 1078; CHECK-AVX512VL-LABEL: test_srem_odd_INT_MIN: 1079; CHECK-AVX512VL: # %bb.0: 1080; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 1081; CHECK-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm2 1082; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm1 1083; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1084; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1085; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 1086; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 1087; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 1088; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1089; CHECK-AVX512VL-NEXT: retq 1090 %srem = srem <4 x i32> %X, <i32 5, i32 5, i32 2147483648, i32 5> 1091 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 1092 %ret = zext <4 x i1> %cmp to <4 x i32> 1093 ret <4 x i32> %ret 1094} 1095 1096; One INT_MIN divisor in even divisor 1097define <4 x i32> @test_srem_even_INT_MIN(<4 x i32> %X) nounwind { 1098; CHECK-SSE2-LABEL: test_srem_even_INT_MIN: 1099; CHECK-SSE2: # %bb.0: 1100; CHECK-SSE2-NEXT: pxor %xmm2, %xmm2 1101; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3067833783,u,1,u] 1102; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm1 1103; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 1104; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 1105; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 1106; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 1107; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 1108; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1109; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2,2147483648] 1110; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3 1111; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,3,2,3] 1112; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1113; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648] 1114; CHECK-SSE2-NEXT: pmuludq %xmm5, %xmm1 1115; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm1[1,3,2,3] 1116; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm6[0],xmm4[1],xmm6[1] 1117; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 1118; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 1119; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] 1120; CHECK-SSE2-NEXT: por %xmm4, %xmm3 1121; CHECK-SSE2-NEXT: pxor %xmm5, %xmm3 1122; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 1123; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 1124; CHECK-SSE2-NEXT: pxor %xmm3, %xmm1 1125; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1126; CHECK-SSE2-NEXT: pcmpeqd %xmm2, %xmm0 1127; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[3,0] 1128; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2] 1129; CHECK-SSE2-NEXT: psrld $31, %xmm1 1130; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm0 1131; CHECK-SSE2-NEXT: retq 1132; 1133; CHECK-SSE41-LABEL: test_srem_even_INT_MIN: 1134; CHECK-SSE41: # %bb.0: 1135; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 1136; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [3067833783,3067833783,1,3067833783] 1137; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm2 1138; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 1139; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 1140; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 1141; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 1142; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 1143; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7] 1144; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,2,2] 1145; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7] 1146; CHECK-SSE41-NEXT: por %xmm4, %xmm3 1147; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [306783378,306783378,1,306783378] 1148; CHECK-SSE41-NEXT: pminud %xmm3, %xmm2 1149; CHECK-SSE41-NEXT: pcmpeqd %xmm3, %xmm2 1150; CHECK-SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1151; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1152; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5],xmm2[6,7] 1153; CHECK-SSE41-NEXT: psrld $31, %xmm0 1154; CHECK-SSE41-NEXT: retq 1155; 1156; CHECK-AVX1-LABEL: test_srem_even_INT_MIN: 1157; CHECK-AVX1: # %bb.0: 1158; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1159; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 1160; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 1161; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 1162; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 1163; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 1164; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 1165; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7] 1166; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,0,2,2] 1167; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7] 1168; CHECK-AVX1-NEXT: vpor %xmm4, %xmm2, %xmm2 1169; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 1170; CHECK-AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 1171; CHECK-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1172; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1173; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5],xmm2[6,7] 1174; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1175; CHECK-AVX1-NEXT: retq 1176; 1177; CHECK-AVX2-LABEL: test_srem_even_INT_MIN: 1178; CHECK-AVX2: # %bb.0: 1179; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1180; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 1181; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 1182; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 1183; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 1184; CHECK-AVX2-NEXT: vpor %xmm3, %xmm2, %xmm2 1185; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 1186; CHECK-AVX2-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 1187; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [2147483647,2147483647,2147483647,2147483647] 1188; CHECK-AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0 1189; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1190; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm0[2],xmm2[3] 1191; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1192; CHECK-AVX2-NEXT: retq 1193; 1194; CHECK-AVX512VL-LABEL: test_srem_even_INT_MIN: 1195; CHECK-AVX512VL: # %bb.0: 1196; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 1197; CHECK-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm2 1198; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm1 1199; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1200; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1201; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1202; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 1203; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 1204; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 1205; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1206; CHECK-AVX512VL-NEXT: retq 1207 %srem = srem <4 x i32> %X, <i32 14, i32 14, i32 2147483648, i32 14> 1208 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 1209 %ret = zext <4 x i1> %cmp to <4 x i32> 1210 ret <4 x i32> %ret 1211} 1212 1213; One INT_MIN divisor in odd+even divisor 1214define <4 x i32> @test_srem_odd_even_INT_MIN(<4 x i32> %X) nounwind { 1215; CHECK-SSE2-LABEL: test_srem_odd_even_INT_MIN: 1216; CHECK-SSE2: # %bb.0: 1217; CHECK-SSE2-NEXT: pxor %xmm2, %xmm2 1218; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,3067833783,1,3264175145] 1219; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm1 1220; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 1221; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 1222; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 1223; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 1224; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 1225; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1226; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1,2147483648,2,1073741824] 1227; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3 1228; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,3,2,3] 1229; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1230; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1231; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,3,2,3] 1232; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1] 1233; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 1234; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 1235; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] 1236; CHECK-SSE2-NEXT: por %xmm4, %xmm3 1237; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 1238; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 1239; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 1240; CHECK-SSE2-NEXT: pxor %xmm3, %xmm1 1241; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1242; CHECK-SSE2-NEXT: pcmpeqd %xmm2, %xmm0 1243; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[3,0] 1244; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2] 1245; CHECK-SSE2-NEXT: psrld $31, %xmm1 1246; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm0 1247; CHECK-SSE2-NEXT: retq 1248; 1249; CHECK-SSE41-LABEL: test_srem_odd_even_INT_MIN: 1250; CHECK-SSE41: # %bb.0: 1251; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 1252; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [3435973837,3067833783,1,3264175145] 1253; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm2 1254; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 1255; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 1256; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 1257; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 1258; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 1259; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7] 1260; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,2,2] 1261; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7] 1262; CHECK-SSE41-NEXT: por %xmm4, %xmm3 1263; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [858993458,306783378,1,42949672] 1264; CHECK-SSE41-NEXT: pminud %xmm3, %xmm2 1265; CHECK-SSE41-NEXT: pcmpeqd %xmm3, %xmm2 1266; CHECK-SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1267; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1268; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5],xmm2[6,7] 1269; CHECK-SSE41-NEXT: psrld $31, %xmm0 1270; CHECK-SSE41-NEXT: retq 1271; 1272; CHECK-AVX1-LABEL: test_srem_odd_even_INT_MIN: 1273; CHECK-AVX1: # %bb.0: 1274; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1275; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 1276; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 1277; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 1278; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 1279; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 1280; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 1281; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7] 1282; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,0,2,2] 1283; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7] 1284; CHECK-AVX1-NEXT: vpor %xmm4, %xmm2, %xmm2 1285; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 1286; CHECK-AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 1287; CHECK-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1288; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1289; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5],xmm2[6,7] 1290; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1291; CHECK-AVX1-NEXT: retq 1292; 1293; CHECK-AVX2-LABEL: test_srem_odd_even_INT_MIN: 1294; CHECK-AVX2: # %bb.0: 1295; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1296; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 1297; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 1298; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 1299; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 1300; CHECK-AVX2-NEXT: vpor %xmm3, %xmm2, %xmm2 1301; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 1302; CHECK-AVX2-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 1303; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [2147483647,2147483647,2147483647,2147483647] 1304; CHECK-AVX2-NEXT: vpand %xmm3, %xmm0, %xmm0 1305; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1306; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm0[2],xmm2[3] 1307; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1308; CHECK-AVX2-NEXT: retq 1309; 1310; CHECK-AVX512VL-LABEL: test_srem_odd_even_INT_MIN: 1311; CHECK-AVX512VL: # %bb.0: 1312; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 1313; CHECK-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm2 1314; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm1 1315; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1316; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1317; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1318; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 1319; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 1320; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 1321; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1322; CHECK-AVX512VL-NEXT: retq 1323 %srem = srem <4 x i32> %X, <i32 5, i32 14, i32 2147483648, i32 100> 1324 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 1325 %ret = zext <4 x i1> %cmp to <4 x i32> 1326 ret <4 x i32> %ret 1327} 1328 1329;==============================================================================; 1330 1331; One all-ones divisor and power-of-two divisor divisor in odd divisor 1332define <4 x i32> @test_srem_odd_allones_and_poweroftwo(<4 x i32> %X) nounwind { 1333; CHECK-SSE2-LABEL: test_srem_odd_allones_and_poweroftwo: 1334; CHECK-SSE2: # %bb.0: 1335; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1336; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1337; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 1338; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1339; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 1340; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1341; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1342; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3] 1343; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1344; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3] 1345; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 1346; CHECK-SSE2-NEXT: psrlq $32, %xmm0 1347; CHECK-SSE2-NEXT: por %xmm2, %xmm0 1348; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1349; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1350; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1351; CHECK-SSE2-NEXT: retq 1352; 1353; CHECK-SSE41-LABEL: test_srem_odd_allones_and_poweroftwo: 1354; CHECK-SSE41: # %bb.0: 1355; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1356; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1357; CHECK-SSE41-NEXT: pmovsxdq {{.*#+}} xmm1 = [1,268435456] 1358; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm1 1359; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7] 1360; CHECK-SSE41-NEXT: psrlq $32, %xmm1 1361; CHECK-SSE41-NEXT: por %xmm1, %xmm0 1362; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,4294967295,268435455,858993458] 1363; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 1364; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1365; CHECK-SSE41-NEXT: psrld $31, %xmm0 1366; CHECK-SSE41-NEXT: retq 1367; 1368; CHECK-AVX1-LABEL: test_srem_odd_allones_and_poweroftwo: 1369; CHECK-AVX1: # %bb.0: 1370; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1371; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1372; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1373; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7] 1374; CHECK-AVX1-NEXT: vpsrlq $32, %xmm1, %xmm1 1375; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 1376; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1377; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1378; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1379; CHECK-AVX1-NEXT: retq 1380; 1381; CHECK-AVX2-LABEL: test_srem_odd_allones_and_poweroftwo: 1382; CHECK-AVX2: # %bb.0: 1383; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1384; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1385; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1386; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1387; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 1388; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1389; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1390; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1391; CHECK-AVX2-NEXT: retq 1392; 1393; CHECK-AVX512VL-LABEL: test_srem_odd_allones_and_poweroftwo: 1394; CHECK-AVX512VL: # %bb.0: 1395; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1396; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1397; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1398; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1399; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1400; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1401; CHECK-AVX512VL-NEXT: retq 1402 %srem = srem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 5> 1403 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 1404 %ret = zext <4 x i1> %cmp to <4 x i32> 1405 ret <4 x i32> %ret 1406} 1407 1408; One all-ones divisor and power-of-two divisor divisor in even divisor 1409define <4 x i32> @test_srem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind { 1410; CHECK-SSE2-LABEL: test_srem_even_allones_and_poweroftwo: 1411; CHECK-SSE2: # %bb.0: 1412; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1413; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1414; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 1415; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1416; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 1417; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1418; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1419; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,1,268435456,2147483648] 1420; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm1 1421; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3] 1422; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 1423; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 1424; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,3,2,3] 1425; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] 1426; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 1427; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3] 1428; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1429; CHECK-SSE2-NEXT: por %xmm2, %xmm0 1430; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1431; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1432; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1433; CHECK-SSE2-NEXT: retq 1434; 1435; CHECK-SSE41-LABEL: test_srem_even_allones_and_poweroftwo: 1436; CHECK-SSE41: # %bb.0: 1437; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1438; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1439; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1440; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1441; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1442; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 1443; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1444; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 1445; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 1446; CHECK-SSE41-NEXT: por %xmm2, %xmm0 1447; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [306783378,4294967295,268435455,306783378] 1448; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 1449; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1450; CHECK-SSE41-NEXT: psrld $31, %xmm0 1451; CHECK-SSE41-NEXT: retq 1452; 1453; CHECK-AVX1-LABEL: test_srem_even_allones_and_poweroftwo: 1454; CHECK-AVX1: # %bb.0: 1455; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1456; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1457; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1458; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 1459; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1460; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 1461; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1462; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 1463; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 1464; CHECK-AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 1465; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1466; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1467; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1468; CHECK-AVX1-NEXT: retq 1469; 1470; CHECK-AVX2-LABEL: test_srem_even_allones_and_poweroftwo: 1471; CHECK-AVX2: # %bb.0: 1472; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1473; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1474; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1475; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1476; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 1477; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1478; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1479; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1480; CHECK-AVX2-NEXT: retq 1481; 1482; CHECK-AVX512VL-LABEL: test_srem_even_allones_and_poweroftwo: 1483; CHECK-AVX512VL: # %bb.0: 1484; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1485; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1486; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1487; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1488; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1489; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1490; CHECK-AVX512VL-NEXT: retq 1491 %srem = srem <4 x i32> %X, <i32 14, i32 4294967295, i32 16, i32 14> 1492 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 1493 %ret = zext <4 x i1> %cmp to <4 x i32> 1494 ret <4 x i32> %ret 1495} 1496 1497; One all-ones divisor and power-of-two divisor divisor in odd+even divisor 1498define <4 x i32> @test_srem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwind { 1499; CHECK-SSE2-LABEL: test_srem_odd_even_allones_and_poweroftwo: 1500; CHECK-SSE2: # %bb.0: 1501; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1502; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1503; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 1504; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1505; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 1506; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1507; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1508; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,1,268435456,1073741824] 1509; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm1 1510; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3] 1511; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 1512; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 1513; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,3,2,3] 1514; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] 1515; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 1516; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3] 1517; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1518; CHECK-SSE2-NEXT: por %xmm2, %xmm0 1519; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1520; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1521; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1522; CHECK-SSE2-NEXT: retq 1523; 1524; CHECK-SSE41-LABEL: test_srem_odd_even_allones_and_poweroftwo: 1525; CHECK-SSE41: # %bb.0: 1526; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1527; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1528; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1529; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1530; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1531; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 1532; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1533; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 1534; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 1535; CHECK-SSE41-NEXT: por %xmm2, %xmm0 1536; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,4294967295,268435455,42949672] 1537; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 1538; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1539; CHECK-SSE41-NEXT: psrld $31, %xmm0 1540; CHECK-SSE41-NEXT: retq 1541; 1542; CHECK-AVX1-LABEL: test_srem_odd_even_allones_and_poweroftwo: 1543; CHECK-AVX1: # %bb.0: 1544; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1545; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1546; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1547; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 1548; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1549; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 1550; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1551; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 1552; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 1553; CHECK-AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 1554; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1555; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1556; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1557; CHECK-AVX1-NEXT: retq 1558; 1559; CHECK-AVX2-LABEL: test_srem_odd_even_allones_and_poweroftwo: 1560; CHECK-AVX2: # %bb.0: 1561; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1562; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1563; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1564; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1565; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 1566; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1567; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1568; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1569; CHECK-AVX2-NEXT: retq 1570; 1571; CHECK-AVX512VL-LABEL: test_srem_odd_even_allones_and_poweroftwo: 1572; CHECK-AVX512VL: # %bb.0: 1573; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1574; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1575; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1576; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1577; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1578; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1579; CHECK-AVX512VL-NEXT: retq 1580 %srem = srem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 100> 1581 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 1582 %ret = zext <4 x i1> %cmp to <4 x i32> 1583 ret <4 x i32> %ret 1584} 1585 1586;------------------------------------------------------------------------------; 1587 1588; One all-ones divisor and one one divisor in odd divisor 1589define <4 x i32> @test_srem_odd_allones_and_one(<4 x i32> %X) nounwind { 1590; CHECK-SSE2-LABEL: test_srem_odd_allones_and_one: 1591; CHECK-SSE2: # %bb.0: 1592; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837] 1593; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 1594; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0 1595; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 1596; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 1597; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 1598; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1599; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1600; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1601; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1602; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1603; CHECK-SSE2-NEXT: retq 1604; 1605; CHECK-SSE41-LABEL: test_srem_odd_allones_and_one: 1606; CHECK-SSE41: # %bb.0: 1607; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1608; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1609; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,4294967295,4294967295,858993458] 1610; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 1611; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1612; CHECK-SSE41-NEXT: psrld $31, %xmm0 1613; CHECK-SSE41-NEXT: retq 1614; 1615; CHECK-AVX1-LABEL: test_srem_odd_allones_and_one: 1616; CHECK-AVX1: # %bb.0: 1617; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1618; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1619; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1620; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1621; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1622; CHECK-AVX1-NEXT: retq 1623; 1624; CHECK-AVX2-LABEL: test_srem_odd_allones_and_one: 1625; CHECK-AVX2: # %bb.0: 1626; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837] 1627; CHECK-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 1628; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [429496729,429496729,429496729,429496729] 1629; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 1630; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1631; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1632; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1633; CHECK-AVX2-NEXT: retq 1634; 1635; CHECK-AVX512VL-LABEL: test_srem_odd_allones_and_one: 1636; CHECK-AVX512VL: # %bb.0: 1637; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 1638; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 1639; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1640; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1641; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1642; CHECK-AVX512VL-NEXT: retq 1643 %srem = srem <4 x i32> %X, <i32 5, i32 4294967295, i32 1, i32 5> 1644 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 1645 %ret = zext <4 x i1> %cmp to <4 x i32> 1646 ret <4 x i32> %ret 1647} 1648 1649; One all-ones divisor and one one divisor in even divisor 1650define <4 x i32> @test_srem_even_allones_and_one(<4 x i32> %X) nounwind { 1651; CHECK-SSE2-LABEL: test_srem_even_allones_and_one: 1652; CHECK-SSE2: # %bb.0: 1653; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3067833783,3067833783,3067833783,3067833783] 1654; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 1655; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0 1656; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 1657; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 1658; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 1659; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1660; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1661; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1 1662; CHECK-SSE2-NEXT: psrld $1, %xmm1 1663; CHECK-SSE2-NEXT: pslld $31, %xmm0 1664; CHECK-SSE2-NEXT: por %xmm1, %xmm0 1665; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1666; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1667; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1668; CHECK-SSE2-NEXT: retq 1669; 1670; CHECK-SSE41-LABEL: test_srem_even_allones_and_one: 1671; CHECK-SSE41: # %bb.0: 1672; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1673; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1674; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1 1675; CHECK-SSE41-NEXT: psrld $1, %xmm1 1676; CHECK-SSE41-NEXT: pslld $31, %xmm0 1677; CHECK-SSE41-NEXT: por %xmm1, %xmm0 1678; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [306783378,4294967295,4294967295,306783378] 1679; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 1680; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1681; CHECK-SSE41-NEXT: psrld $31, %xmm0 1682; CHECK-SSE41-NEXT: retq 1683; 1684; CHECK-AVX1-LABEL: test_srem_even_allones_and_one: 1685; CHECK-AVX1: # %bb.0: 1686; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1687; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1688; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1 1689; CHECK-AVX1-NEXT: vpslld $31, %xmm0, %xmm0 1690; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 1691; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1692; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1693; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1694; CHECK-AVX1-NEXT: retq 1695; 1696; CHECK-AVX2-LABEL: test_srem_even_allones_and_one: 1697; CHECK-AVX2: # %bb.0: 1698; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3067833783,3067833783,3067833783,3067833783] 1699; CHECK-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 1700; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [306783378,306783378,306783378,306783378] 1701; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 1702; CHECK-AVX2-NEXT: vpsrld $1, %xmm0, %xmm1 1703; CHECK-AVX2-NEXT: vpslld $31, %xmm0, %xmm0 1704; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 1705; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1706; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1707; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1708; CHECK-AVX2-NEXT: retq 1709; 1710; CHECK-AVX512VL-LABEL: test_srem_even_allones_and_one: 1711; CHECK-AVX512VL: # %bb.0: 1712; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 1713; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 1714; CHECK-AVX512VL-NEXT: vprord $1, %xmm0, %xmm0 1715; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1716; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1717; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1718; CHECK-AVX512VL-NEXT: retq 1719 %srem = srem <4 x i32> %X, <i32 14, i32 4294967295, i32 1, i32 14> 1720 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 1721 %ret = zext <4 x i1> %cmp to <4 x i32> 1722 ret <4 x i32> %ret 1723} 1724 1725; One all-ones divisor and one one divisor in odd+even divisor 1726define <4 x i32> @test_srem_odd_even_allones_and_one(<4 x i32> %X) nounwind { 1727; CHECK-SSE2-LABEL: test_srem_odd_even_allones_and_one: 1728; CHECK-SSE2: # %bb.0: 1729; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1730; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1731; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 1732; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1733; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 1734; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1735; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1736; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3] 1737; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1738; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1739; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3] 1740; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1741; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1742; CHECK-SSE2-NEXT: por %xmm1, %xmm0 1743; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1744; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1745; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1746; CHECK-SSE2-NEXT: retq 1747; 1748; CHECK-SSE41-LABEL: test_srem_odd_even_allones_and_one: 1749; CHECK-SSE41: # %bb.0: 1750; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1751; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1752; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1753; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1754; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2 1755; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1756; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 1757; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 1758; CHECK-SSE41-NEXT: por %xmm2, %xmm0 1759; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,4294967295,4294967295,42949672] 1760; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 1761; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1762; CHECK-SSE41-NEXT: psrld $31, %xmm0 1763; CHECK-SSE41-NEXT: retq 1764; 1765; CHECK-AVX1-LABEL: test_srem_odd_even_allones_and_one: 1766; CHECK-AVX1: # %bb.0: 1767; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1768; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1769; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1770; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 1771; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 1772; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1773; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 1774; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 1775; CHECK-AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 1776; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1777; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1778; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1779; CHECK-AVX1-NEXT: retq 1780; 1781; CHECK-AVX2-LABEL: test_srem_odd_even_allones_and_one: 1782; CHECK-AVX2: # %bb.0: 1783; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1784; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1785; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1786; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1787; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 1788; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1789; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1790; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1791; CHECK-AVX2-NEXT: retq 1792; 1793; CHECK-AVX512VL-LABEL: test_srem_odd_even_allones_and_one: 1794; CHECK-AVX512VL: # %bb.0: 1795; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1796; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1797; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1798; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1799; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1800; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1801; CHECK-AVX512VL-NEXT: retq 1802 %srem = srem <4 x i32> %X, <i32 5, i32 4294967295, i32 1, i32 100> 1803 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 1804 %ret = zext <4 x i1> %cmp to <4 x i32> 1805 ret <4 x i32> %ret 1806} 1807 1808;------------------------------------------------------------------------------; 1809 1810; One power-of-two divisor divisor and one divisor in odd divisor 1811define <4 x i32> @test_srem_odd_poweroftwo_and_one(<4 x i32> %X) nounwind { 1812; CHECK-SSE2-LABEL: test_srem_odd_poweroftwo_and_one: 1813; CHECK-SSE2: # %bb.0: 1814; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1815; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1816; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 1817; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1818; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 1819; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1820; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1821; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3] 1822; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1823; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1824; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3] 1825; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1826; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1827; CHECK-SSE2-NEXT: por %xmm1, %xmm0 1828; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1829; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1830; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1831; CHECK-SSE2-NEXT: retq 1832; 1833; CHECK-SSE41-LABEL: test_srem_odd_poweroftwo_and_one: 1834; CHECK-SSE41: # %bb.0: 1835; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1836; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1837; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1838; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1839; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2 1840; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1841; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 1842; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 1843; CHECK-SSE41-NEXT: por %xmm2, %xmm0 1844; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,268435455,4294967295,858993458] 1845; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 1846; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1847; CHECK-SSE41-NEXT: psrld $31, %xmm0 1848; CHECK-SSE41-NEXT: retq 1849; 1850; CHECK-AVX1-LABEL: test_srem_odd_poweroftwo_and_one: 1851; CHECK-AVX1: # %bb.0: 1852; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1853; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1854; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1855; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 1856; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 1857; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1858; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 1859; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 1860; CHECK-AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 1861; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1862; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1863; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1864; CHECK-AVX1-NEXT: retq 1865; 1866; CHECK-AVX2-LABEL: test_srem_odd_poweroftwo_and_one: 1867; CHECK-AVX2: # %bb.0: 1868; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1869; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1870; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1871; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1872; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 1873; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1874; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1875; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1876; CHECK-AVX2-NEXT: retq 1877; 1878; CHECK-AVX512VL-LABEL: test_srem_odd_poweroftwo_and_one: 1879; CHECK-AVX512VL: # %bb.0: 1880; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1881; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1882; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1883; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1884; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1885; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1886; CHECK-AVX512VL-NEXT: retq 1887 %srem = srem <4 x i32> %X, <i32 5, i32 16, i32 1, i32 5> 1888 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 1889 %ret = zext <4 x i1> %cmp to <4 x i32> 1890 ret <4 x i32> %ret 1891} 1892 1893; One power-of-two divisor divisor and one divisor in even divisor 1894define <4 x i32> @test_srem_even_poweroftwo_and_one(<4 x i32> %X) nounwind { 1895; CHECK-SSE2-LABEL: test_srem_even_poweroftwo_and_one: 1896; CHECK-SSE2: # %bb.0: 1897; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1898; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1899; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 1900; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1901; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 1902; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1903; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1904; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,268435456,1,2147483648] 1905; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm1 1906; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3] 1907; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 1908; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 1909; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,3,2,3] 1910; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] 1911; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 1912; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3] 1913; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1914; CHECK-SSE2-NEXT: por %xmm2, %xmm0 1915; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1916; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1917; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1918; CHECK-SSE2-NEXT: retq 1919; 1920; CHECK-SSE41-LABEL: test_srem_even_poweroftwo_and_one: 1921; CHECK-SSE41: # %bb.0: 1922; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1923; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1924; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1925; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1926; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1927; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 1928; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1929; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 1930; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 1931; CHECK-SSE41-NEXT: por %xmm2, %xmm0 1932; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [306783378,268435455,4294967295,306783378] 1933; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 1934; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1935; CHECK-SSE41-NEXT: psrld $31, %xmm0 1936; CHECK-SSE41-NEXT: retq 1937; 1938; CHECK-AVX1-LABEL: test_srem_even_poweroftwo_and_one: 1939; CHECK-AVX1: # %bb.0: 1940; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1941; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1942; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1943; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 1944; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1945; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 1946; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1947; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 1948; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 1949; CHECK-AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 1950; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1951; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1952; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1953; CHECK-AVX1-NEXT: retq 1954; 1955; CHECK-AVX2-LABEL: test_srem_even_poweroftwo_and_one: 1956; CHECK-AVX2: # %bb.0: 1957; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1958; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1959; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1960; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1961; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 1962; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1963; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1964; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1965; CHECK-AVX2-NEXT: retq 1966; 1967; CHECK-AVX512VL-LABEL: test_srem_even_poweroftwo_and_one: 1968; CHECK-AVX512VL: # %bb.0: 1969; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1970; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1971; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1972; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1973; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1974; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1975; CHECK-AVX512VL-NEXT: retq 1976 %srem = srem <4 x i32> %X, <i32 14, i32 16, i32 1, i32 14> 1977 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 1978 %ret = zext <4 x i1> %cmp to <4 x i32> 1979 ret <4 x i32> %ret 1980} 1981 1982; One power-of-two divisor divisor and one divisor in odd+even divisor 1983define <4 x i32> @test_srem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind { 1984; CHECK-SSE2-LABEL: test_srem_odd_even_poweroftwo_and_one: 1985; CHECK-SSE2: # %bb.0: 1986; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1987; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1988; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 1989; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1990; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 1991; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1992; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1993; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3] 1994; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1995; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1996; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3] 1997; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1998; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1999; CHECK-SSE2-NEXT: por %xmm1, %xmm0 2000; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2001; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2002; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2003; CHECK-SSE2-NEXT: retq 2004; 2005; CHECK-SSE41-LABEL: test_srem_odd_even_poweroftwo_and_one: 2006; CHECK-SSE41: # %bb.0: 2007; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2008; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2009; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 2010; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 2011; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2 2012; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 2013; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 2014; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 2015; CHECK-SSE41-NEXT: por %xmm2, %xmm0 2016; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,268435455,4294967295,42949672] 2017; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 2018; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 2019; CHECK-SSE41-NEXT: psrld $31, %xmm0 2020; CHECK-SSE41-NEXT: retq 2021; 2022; CHECK-AVX1-LABEL: test_srem_odd_even_poweroftwo_and_one: 2023; CHECK-AVX1: # %bb.0: 2024; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2025; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2026; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 2027; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 2028; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 2029; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 2030; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 2031; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 2032; CHECK-AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 2033; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 2034; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2035; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 2036; CHECK-AVX1-NEXT: retq 2037; 2038; CHECK-AVX2-LABEL: test_srem_odd_even_poweroftwo_and_one: 2039; CHECK-AVX2: # %bb.0: 2040; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2041; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2042; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 2043; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2044; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 2045; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 2046; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2047; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 2048; CHECK-AVX2-NEXT: retq 2049; 2050; CHECK-AVX512VL-LABEL: test_srem_odd_even_poweroftwo_and_one: 2051; CHECK-AVX512VL: # %bb.0: 2052; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2053; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2054; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2055; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 2056; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2057; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 2058; CHECK-AVX512VL-NEXT: retq 2059 %srem = srem <4 x i32> %X, <i32 5, i32 16, i32 1, i32 100> 2060 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 2061 %ret = zext <4 x i1> %cmp to <4 x i32> 2062 ret <4 x i32> %ret 2063} 2064 2065;------------------------------------------------------------------------------; 2066 2067define <4 x i32> @test_srem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nounwind { 2068; CHECK-SSE2-LABEL: test_srem_odd_allones_and_poweroftwo_and_one: 2069; CHECK-SSE2: # %bb.0: 2070; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2071; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2072; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2073; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1 2074; CHECK-SSE2-NEXT: psrlq $32, %xmm1 2075; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2076; CHECK-SSE2-NEXT: por %xmm1, %xmm0 2077; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2078; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2079; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2080; CHECK-SSE2-NEXT: retq 2081; 2082; CHECK-SSE41-LABEL: test_srem_odd_allones_and_poweroftwo_and_one: 2083; CHECK-SSE41: # %bb.0: 2084; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2085; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2086; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2087; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 2088; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 2089; CHECK-SSE41-NEXT: psrlq $32, %xmm0 2090; CHECK-SSE41-NEXT: por %xmm1, %xmm0 2091; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,4294967295,268435455,4294967295] 2092; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 2093; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 2094; CHECK-SSE41-NEXT: psrld $31, %xmm0 2095; CHECK-SSE41-NEXT: retq 2096; 2097; CHECK-AVX1-LABEL: test_srem_odd_allones_and_poweroftwo_and_one: 2098; CHECK-AVX1: # %bb.0: 2099; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2100; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2101; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2102; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 2103; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 2104; CHECK-AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0 2105; CHECK-AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0 2106; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 2107; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2108; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 2109; CHECK-AVX1-NEXT: retq 2110; 2111; CHECK-AVX2-LABEL: test_srem_odd_allones_and_poweroftwo_and_one: 2112; CHECK-AVX2: # %bb.0: 2113; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2114; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2115; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 2116; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2117; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 2118; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 2119; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2120; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 2121; CHECK-AVX2-NEXT: retq 2122; 2123; CHECK-AVX512VL-LABEL: test_srem_odd_allones_and_poweroftwo_and_one: 2124; CHECK-AVX512VL: # %bb.0: 2125; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2126; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2127; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2128; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 2129; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2130; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 2131; CHECK-AVX512VL-NEXT: retq 2132 %srem = srem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 1> 2133 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 2134 %ret = zext <4 x i1> %cmp to <4 x i32> 2135 ret <4 x i32> %ret 2136} 2137 2138define <4 x i32> @test_srem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) nounwind { 2139; CHECK-SSE2-LABEL: test_srem_even_allones_and_poweroftwo_and_one: 2140; CHECK-SSE2: # %bb.0: 2141; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2142; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2143; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2144; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1 2145; CHECK-SSE2-NEXT: psrlq $32, %xmm1 2146; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2147; CHECK-SSE2-NEXT: por %xmm1, %xmm0 2148; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2149; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2150; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2151; CHECK-SSE2-NEXT: retq 2152; 2153; CHECK-SSE41-LABEL: test_srem_even_allones_and_poweroftwo_and_one: 2154; CHECK-SSE41: # %bb.0: 2155; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2156; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2157; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2158; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 2159; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 2160; CHECK-SSE41-NEXT: psrlq $32, %xmm0 2161; CHECK-SSE41-NEXT: por %xmm1, %xmm0 2162; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [306783378,4294967295,268435455,4294967295] 2163; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 2164; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 2165; CHECK-SSE41-NEXT: psrld $31, %xmm0 2166; CHECK-SSE41-NEXT: retq 2167; 2168; CHECK-AVX1-LABEL: test_srem_even_allones_and_poweroftwo_and_one: 2169; CHECK-AVX1: # %bb.0: 2170; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2171; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2172; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2173; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 2174; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 2175; CHECK-AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0 2176; CHECK-AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0 2177; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 2178; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2179; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 2180; CHECK-AVX1-NEXT: retq 2181; 2182; CHECK-AVX2-LABEL: test_srem_even_allones_and_poweroftwo_and_one: 2183; CHECK-AVX2: # %bb.0: 2184; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2185; CHECK-AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2186; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 2187; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2188; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 2189; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 2190; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2191; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 2192; CHECK-AVX2-NEXT: retq 2193; 2194; CHECK-AVX512VL-LABEL: test_srem_even_allones_and_poweroftwo_and_one: 2195; CHECK-AVX512VL: # %bb.0: 2196; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2197; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2198; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2199; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 2200; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2201; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 2202; CHECK-AVX512VL-NEXT: retq 2203 %srem = srem <4 x i32> %X, <i32 14, i32 4294967295, i32 16, i32 1> 2204 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 2205 %ret = zext <4 x i1> %cmp to <4 x i32> 2206 ret <4 x i32> %ret 2207} 2208 2209; PR51133: the VSELECT should have i1 element type 2210define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) { 2211; CHECK-SSE2-LABEL: pr51133: 2212; CHECK-SSE2: # %bb.0: 2213; CHECK-SSE2-NEXT: movq %rdi, %rax 2214; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm5 2215; CHECK-SSE2-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 2216; CHECK-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5 # [9,0,41,183,1,1,161,221] 2217; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] 2218; CHECK-SSE2-NEXT: pand %xmm4, %xmm5 2219; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm6 2220; CHECK-SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2221; CHECK-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6 # [171,103,183,171,61,1,127,183] 2222; CHECK-SSE2-NEXT: pand %xmm4, %xmm6 2223; CHECK-SSE2-NEXT: packuswb %xmm5, %xmm6 2224; CHECK-SSE2-NEXT: paddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6 2225; CHECK-SSE2-NEXT: movdqa %xmm6, %xmm5 2226; CHECK-SSE2-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm6[8],xmm5[9],xmm6[9],xmm5[10],xmm6[10],xmm5[11],xmm6[11],xmm5[12],xmm6[12],xmm5[13],xmm6[13],xmm5[14],xmm6[14],xmm5[15],xmm6[15] 2227; CHECK-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5 # [128,1,128,1,128,32,1,1] 2228; CHECK-SSE2-NEXT: psrlw $8, %xmm5 2229; CHECK-SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2230; CHECK-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6 # [1,1,1,128,64,2,1,32] 2231; CHECK-SSE2-NEXT: psrlw $8, %xmm6 2232; CHECK-SSE2-NEXT: packuswb %xmm5, %xmm6 2233; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm7 = [84,2,36,42,2,1,2,4,2,255,4,36,127,31,2,2] 2234; CHECK-SSE2-NEXT: pminub %xmm6, %xmm7 2235; CHECK-SSE2-NEXT: pcmpeqb %xmm6, %xmm7 2236; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255] 2237; CHECK-SSE2-NEXT: pandn %xmm5, %xmm7 2238; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 2239; CHECK-SSE2-NEXT: pxor %xmm6, %xmm6 2240; CHECK-SSE2-NEXT: pcmpgtb %xmm6, %xmm1 2241; CHECK-SSE2-NEXT: pandn %xmm1, %xmm5 2242; CHECK-SSE2-NEXT: por %xmm7, %xmm5 2243; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1 2244; CHECK-SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 2245; CHECK-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [223,223,205,183,161,1,171,239] 2246; CHECK-SSE2-NEXT: pand %xmm4, %xmm1 2247; CHECK-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2248; CHECK-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [197,205,27,241,1,1,1,163] 2249; CHECK-SSE2-NEXT: pand %xmm4, %xmm0 2250; CHECK-SSE2-NEXT: packuswb %xmm1, %xmm0 2251; CHECK-SSE2-NEXT: paddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2252; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1 2253; CHECK-SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] 2254; CHECK-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [128,128,1,1,1,128,1,64] 2255; CHECK-SSE2-NEXT: psrlw $8, %xmm1 2256; CHECK-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2257; CHECK-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,1,1,128,128,32,128,32] 2258; CHECK-SSE2-NEXT: psrlw $8, %xmm0 2259; CHECK-SSE2-NEXT: packuswb %xmm1, %xmm0 2260; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [19,51,13,7,128,32,128,3,5,5,51,37,3,128,85,5] 2261; CHECK-SSE2-NEXT: pmaxub %xmm0, %xmm1 2262; CHECK-SSE2-NEXT: pcmpeqb %xmm0, %xmm1 2263; CHECK-SSE2-NEXT: pcmpeqb %xmm6, %xmm3 2264; CHECK-SSE2-NEXT: pandn %xmm5, %xmm3 2265; CHECK-SSE2-NEXT: pcmpeqb %xmm6, %xmm2 2266; CHECK-SSE2-NEXT: pandn %xmm1, %xmm2 2267; CHECK-SSE2-NEXT: pmovmskb %xmm2, %ecx 2268; CHECK-SSE2-NEXT: pmovmskb %xmm3, %edx 2269; CHECK-SSE2-NEXT: shll $16, %edx 2270; CHECK-SSE2-NEXT: orl %ecx, %edx 2271; CHECK-SSE2-NEXT: movl %edx, (%rdi) 2272; CHECK-SSE2-NEXT: retq 2273; 2274; CHECK-SSE41-LABEL: pr51133: 2275; CHECK-SSE41: # %bb.0: 2276; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm4 2277; CHECK-SSE41-NEXT: movq %rdi, %rax 2278; CHECK-SSE41-NEXT: movdqa %xmm1, %xmm0 2279; CHECK-SSE41-NEXT: pmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [171,0,183,0,61,0,127,0,9,0,41,0,1,0,161,0] 2280; CHECK-SSE41-NEXT: pmovzxbw {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255] 2281; CHECK-SSE41-NEXT: pand %xmm5, %xmm0 2282; CHECK-SSE41-NEXT: movdqa %xmm1, %xmm6 2283; CHECK-SSE41-NEXT: pmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6 # [0,103,0,171,0,1,0,183,0,0,0,183,0,1,0,221] 2284; CHECK-SSE41-NEXT: psllw $8, %xmm6 2285; CHECK-SSE41-NEXT: por %xmm0, %xmm6 2286; CHECK-SSE41-NEXT: paddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6 2287; CHECK-SSE41-NEXT: movdqa %xmm6, %xmm0 2288; CHECK-SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm6[8],xmm0[9],xmm6[9],xmm0[10],xmm6[10],xmm0[11],xmm6[11],xmm0[12],xmm6[12],xmm0[13],xmm6[13],xmm0[14],xmm6[14],xmm0[15],xmm6[15] 2289; CHECK-SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [128,1,128,1,128,32,1,1] 2290; CHECK-SSE41-NEXT: psrlw $8, %xmm0 2291; CHECK-SSE41-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2292; CHECK-SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6 # [1,1,1,128,64,2,1,32] 2293; CHECK-SSE41-NEXT: psrlw $8, %xmm6 2294; CHECK-SSE41-NEXT: packuswb %xmm0, %xmm6 2295; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm0 = [84,2,36,42,2,1,2,4,2,255,4,36,127,31,2,2] 2296; CHECK-SSE41-NEXT: pminub %xmm6, %xmm0 2297; CHECK-SSE41-NEXT: pcmpeqb %xmm6, %xmm0 2298; CHECK-SSE41-NEXT: pcmpeqd %xmm7, %xmm7 2299; CHECK-SSE41-NEXT: pxor %xmm0, %xmm7 2300; CHECK-SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 2301; CHECK-SSE41-NEXT: pxor %xmm6, %xmm6 2302; CHECK-SSE41-NEXT: pcmpgtb %xmm6, %xmm1 2303; CHECK-SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255] 2304; CHECK-SSE41-NEXT: pblendvb %xmm0, %xmm7, %xmm1 2305; CHECK-SSE41-NEXT: movdqa %xmm4, %xmm0 2306; CHECK-SSE41-NEXT: pmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [197,0,27,0,1,0,1,0,223,0,205,0,161,0,171,0] 2307; CHECK-SSE41-NEXT: pand %xmm5, %xmm0 2308; CHECK-SSE41-NEXT: pmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 # [0,205,0,241,0,1,0,163,0,223,0,183,0,1,0,239] 2309; CHECK-SSE41-NEXT: psllw $8, %xmm4 2310; CHECK-SSE41-NEXT: por %xmm0, %xmm4 2311; CHECK-SSE41-NEXT: paddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 2312; CHECK-SSE41-NEXT: movdqa %xmm4, %xmm0 2313; CHECK-SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm4[8],xmm0[9],xmm4[9],xmm0[10],xmm4[10],xmm0[11],xmm4[11],xmm0[12],xmm4[12],xmm0[13],xmm4[13],xmm0[14],xmm4[14],xmm0[15],xmm4[15] 2314; CHECK-SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [128,128,1,1,1,128,1,64] 2315; CHECK-SSE41-NEXT: psrlw $8, %xmm0 2316; CHECK-SSE41-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2317; CHECK-SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 # [1,1,1,128,128,32,128,32] 2318; CHECK-SSE41-NEXT: psrlw $8, %xmm4 2319; CHECK-SSE41-NEXT: packuswb %xmm0, %xmm4 2320; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm0 = [19,51,13,7,128,32,128,3,5,5,51,37,3,128,85,5] 2321; CHECK-SSE41-NEXT: pmaxub %xmm4, %xmm0 2322; CHECK-SSE41-NEXT: pcmpeqb %xmm4, %xmm0 2323; CHECK-SSE41-NEXT: pcmpeqb %xmm6, %xmm3 2324; CHECK-SSE41-NEXT: pandn %xmm1, %xmm3 2325; CHECK-SSE41-NEXT: pcmpeqb %xmm6, %xmm2 2326; CHECK-SSE41-NEXT: pandn %xmm0, %xmm2 2327; CHECK-SSE41-NEXT: pmovmskb %xmm2, %ecx 2328; CHECK-SSE41-NEXT: pmovmskb %xmm3, %edx 2329; CHECK-SSE41-NEXT: shll $16, %edx 2330; CHECK-SSE41-NEXT: orl %ecx, %edx 2331; CHECK-SSE41-NEXT: movl %edx, (%rdi) 2332; CHECK-SSE41-NEXT: retq 2333; 2334; CHECK-AVX1-LABEL: pr51133: 2335; CHECK-AVX1: # %bb.0: 2336; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 2337; CHECK-AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] 2338; CHECK-AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [34048,34048,26368,37632,21760,33024,22016,35072] 2339; CHECK-AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 2340; CHECK-AVX1-NEXT: vpunpcklbw {{.*#+}} xmm4 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2341; CHECK-AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [20224,26368,6912,30976,33024,33024,33024,12032] 2342; CHECK-AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 2343; CHECK-AVX1-NEXT: vpackuswb %xmm3, %xmm4, %xmm4 2344; CHECK-AVX1-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm5 # [0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0] 2345; CHECK-AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] 2346; CHECK-AVX1-NEXT: vpand %xmm3, %xmm5, %xmm5 2347; CHECK-AVX1-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm6 # [0,0,0,0,0,1,0,0,0,1,0,1,0,1,0,1] 2348; CHECK-AVX1-NEXT: vpsllw $8, %xmm6, %xmm6 2349; CHECK-AVX1-NEXT: vpor %xmm6, %xmm5, %xmm5 2350; CHECK-AVX1-NEXT: vpaddb %xmm5, %xmm4, %xmm4 2351; CHECK-AVX1-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm4[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 2352; CHECK-AVX1-NEXT: vpsraw $8, %xmm5, %xmm5 2353; CHECK-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 # [8,8,128,64,8,256,256,8] 2354; CHECK-AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5 2355; CHECK-AVX1-NEXT: vpunpcklbw {{.*#+}} xmm6 = xmm4[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2356; CHECK-AVX1-NEXT: vpsraw $8, %xmm6, %xmm6 2357; CHECK-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6, %xmm6 # [64,128,128,16,256,64,256,16] 2358; CHECK-AVX1-NEXT: vpsrlw $8, %xmm6, %xmm6 2359; CHECK-AVX1-NEXT: vpackuswb %xmm5, %xmm6, %xmm6 2360; CHECK-AVX1-NEXT: vpsrlw $7, %xmm4, %xmm4 2361; CHECK-AVX1-NEXT: vbroadcastss {{.*#+}} xmm5 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 2362; CHECK-AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4 2363; CHECK-AVX1-NEXT: vpaddb %xmm4, %xmm6, %xmm4 2364; CHECK-AVX1-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm6 # [13,0,19,0,2,0,2,0,62,0,5,0,97,0,3,0] 2365; CHECK-AVX1-NEXT: vpand %xmm3, %xmm6, %xmm6 2366; CHECK-AVX1-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [0,5,0,34,0,8,0,88,0,62,0,7,0,2,0,60] 2367; CHECK-AVX1-NEXT: vpsllw $8, %xmm4, %xmm4 2368; CHECK-AVX1-NEXT: vpor %xmm4, %xmm6, %xmm4 2369; CHECK-AVX1-NEXT: vpsubb %xmm4, %xmm0, %xmm4 2370; CHECK-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 2371; CHECK-AVX1-NEXT: vpunpckhbw {{.*#+}} xmm6 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] 2372; CHECK-AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6, %xmm6 # [2304,0,10496,37632,33024,33024,21760,36096] 2373; CHECK-AVX1-NEXT: vpsrlw $8, %xmm6, %xmm6 2374; CHECK-AVX1-NEXT: vpunpcklbw {{.*#+}} xmm7 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2375; CHECK-AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm7, %xmm7 # [22016,24320,37632,11008,12544,32512,16640,37632] 2376; CHECK-AVX1-NEXT: vpsrlw $8, %xmm7, %xmm7 2377; CHECK-AVX1-NEXT: vpackuswb %xmm6, %xmm7, %xmm6 2378; CHECK-AVX1-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm7 # [0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0] 2379; CHECK-AVX1-NEXT: vpand %xmm3, %xmm7, %xmm7 2380; CHECK-AVX1-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm8 # [0,0,0,0,0,255,0,1,0,1,0,1,0,1,0,1] 2381; CHECK-AVX1-NEXT: vpsllw $8, %xmm8, %xmm8 2382; CHECK-AVX1-NEXT: vpor %xmm7, %xmm8, %xmm7 2383; CHECK-AVX1-NEXT: vpaddb %xmm7, %xmm6, %xmm6 2384; CHECK-AVX1-NEXT: vpunpckhbw {{.*#+}} xmm7 = xmm6[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 2385; CHECK-AVX1-NEXT: vpsraw $8, %xmm7, %xmm7 2386; CHECK-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm7, %xmm7 # [64,256,32,64,256,64,8,4] 2387; CHECK-AVX1-NEXT: vpsrlw $8, %xmm7, %xmm7 2388; CHECK-AVX1-NEXT: vpunpcklbw {{.*#+}} xmm8 = xmm6[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2389; CHECK-AVX1-NEXT: vpsraw $8, %xmm8, %xmm8 2390; CHECK-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm8, %xmm8 # [256,8,64,256,16,4,8,8] 2391; CHECK-AVX1-NEXT: vpsrlw $8, %xmm8, %xmm8 2392; CHECK-AVX1-NEXT: vpackuswb %xmm7, %xmm8, %xmm7 2393; CHECK-AVX1-NEXT: vpsrlw $7, %xmm6, %xmm6 2394; CHECK-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6, %xmm6 2395; CHECK-AVX1-NEXT: vpand %xmm5, %xmm6, %xmm5 2396; CHECK-AVX1-NEXT: vpaddb %xmm5, %xmm7, %xmm5 2397; CHECK-AVX1-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm6 # [3,0,7,0,84,0,127,0,114,0,50,0,2,0,97,0] 2398; CHECK-AVX1-NEXT: vpand %xmm3, %xmm6, %xmm3 2399; CHECK-AVX1-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 # [0,87,0,6,0,128,0,56,0,1,0,7,0,8,0,117] 2400; CHECK-AVX1-NEXT: vpsllw $8, %xmm5, %xmm5 2401; CHECK-AVX1-NEXT: vpor %xmm5, %xmm3, %xmm3 2402; CHECK-AVX1-NEXT: vpsubb %xmm3, %xmm0, %xmm0 2403; CHECK-AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0 2404; CHECK-AVX1-NEXT: vpcmpeqb %xmm2, %xmm4, %xmm3 2405; CHECK-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 2406; CHECK-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2407; CHECK-AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm3 2408; CHECK-AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1 2409; CHECK-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 2410; CHECK-AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0 2411; CHECK-AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 2412; CHECK-AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 2413; CHECK-AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 2414; CHECK-AVX1-NEXT: retq 2415; 2416; CHECK-AVX2-LABEL: pr51133: 2417; CHECK-AVX2: # %bb.0: 2418; CHECK-AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 2419; CHECK-AVX2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15],ymm2[24],ymm0[24],ymm2[25],ymm0[25],ymm2[26],ymm0[26],ymm2[27],ymm0[27],ymm2[28],ymm0[28],ymm2[29],ymm0[29],ymm2[30],ymm0[30],ymm2[31],ymm0[31] 2420; CHECK-AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [34048,34048,26368,37632,21760,33024,22016,35072,2304,0,10496,37632,33024,33024,21760,36096] 2421; CHECK-AVX2-NEXT: vpsrlw $8, %ymm3, %ymm3 2422; CHECK-AVX2-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[16],ymm0[16],ymm2[17],ymm0[17],ymm2[18],ymm0[18],ymm2[19],ymm0[19],ymm2[20],ymm0[20],ymm2[21],ymm0[21],ymm2[22],ymm0[22],ymm2[23],ymm0[23] 2423; CHECK-AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [20224,26368,6912,30976,33024,33024,33024,12032,22016,24320,37632,11008,12544,32512,16640,37632] 2424; CHECK-AVX2-NEXT: vpsrlw $8, %ymm4, %ymm4 2425; CHECK-AVX2-NEXT: vpackuswb %ymm3, %ymm4, %ymm3 2426; CHECK-AVX2-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm4 # [0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0] 2427; CHECK-AVX2-NEXT: vpbroadcastw {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 2428; CHECK-AVX2-NEXT: vpand %ymm5, %ymm4, %ymm4 2429; CHECK-AVX2-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm6 # [0,0,0,0,0,1,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,255,0,1,0,1,0,1,0,1,0,1] 2430; CHECK-AVX2-NEXT: vpsllw $8, %ymm6, %ymm6 2431; CHECK-AVX2-NEXT: vpor %ymm6, %ymm4, %ymm4 2432; CHECK-AVX2-NEXT: vpaddb %ymm4, %ymm3, %ymm3 2433; CHECK-AVX2-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] 2434; CHECK-AVX2-NEXT: vpsraw $8, %ymm4, %ymm4 2435; CHECK-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [8,8,128,64,8,256,256,8,64,256,32,64,256,64,8,4] 2436; CHECK-AVX2-NEXT: vpsrlw $8, %ymm4, %ymm4 2437; CHECK-AVX2-NEXT: vpunpcklbw {{.*#+}} ymm6 = ymm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] 2438; CHECK-AVX2-NEXT: vpsraw $8, %ymm6, %ymm6 2439; CHECK-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm6, %ymm6 # [64,128,128,16,256,64,256,16,256,8,64,256,16,4,8,8] 2440; CHECK-AVX2-NEXT: vpsrlw $8, %ymm6, %ymm6 2441; CHECK-AVX2-NEXT: vpackuswb %ymm4, %ymm6, %ymm4 2442; CHECK-AVX2-NEXT: vpsrlw $7, %ymm3, %ymm3 2443; CHECK-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 2444; CHECK-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 2445; CHECK-AVX2-NEXT: vpaddb %ymm3, %ymm4, %ymm3 2446; CHECK-AVX2-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm4 # [13,0,19,0,2,0,2,0,62,0,5,0,97,0,3,0,3,0,7,0,84,0,127,0,114,0,50,0,2,0,97,0] 2447; CHECK-AVX2-NEXT: vpand %ymm5, %ymm4, %ymm4 2448; CHECK-AVX2-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [0,5,0,34,0,8,0,88,0,62,0,7,0,2,0,60,0,87,0,6,0,128,0,56,0,1,0,7,0,8,0,117] 2449; CHECK-AVX2-NEXT: vpsllw $8, %ymm3, %ymm3 2450; CHECK-AVX2-NEXT: vpor %ymm3, %ymm4, %ymm3 2451; CHECK-AVX2-NEXT: vpsubb %ymm3, %ymm0, %ymm0 2452; CHECK-AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 2453; CHECK-AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1 2454; CHECK-AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0 2455; CHECK-AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 2456; CHECK-AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 2457; CHECK-AVX2-NEXT: retq 2458; 2459; CHECK-AVX512VL-LABEL: pr51133: 2460; CHECK-AVX512VL: # %bb.0: 2461; CHECK-AVX512VL-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2 # [197,0,27,0,1,0,1,0,223,0,205,0,161,0,171,0,171,0,183,0,61,0,127,0,9,0,41,0,1,0,161,0] 2462; CHECK-AVX512VL-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm3 # [0,205,0,241,0,1,0,163,0,223,0,183,0,1,0,239,0,103,0,171,0,1,0,183,0,0,0,183,0,1,0,221] 2463; CHECK-AVX512VL-NEXT: vpsllw $8, %ymm3, %ymm3 2464; CHECK-AVX512VL-NEXT: vpternlogd {{.*#+}} ymm3 = ymm3 | (ymm2 & mem) 2465; CHECK-AVX512VL-NEXT: vpaddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm2 2466; CHECK-AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] 2467; CHECK-AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [128,128,1,1,1,128,1,64,128,1,128,1,128,32,1,1] 2468; CHECK-AVX512VL-NEXT: vpsrlw $8, %ymm3, %ymm3 2469; CHECK-AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] 2470; CHECK-AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [1,1,1,128,128,32,128,32,1,1,1,128,64,2,1,32] 2471; CHECK-AVX512VL-NEXT: vpsrlw $8, %ymm2, %ymm2 2472; CHECK-AVX512VL-NEXT: vpackuswb %ymm3, %ymm2, %ymm2 2473; CHECK-AVX512VL-NEXT: vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm3 2474; CHECK-AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2 2475; CHECK-AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255] 2476; CHECK-AVX512VL-NEXT: vpandn %ymm3, %ymm2, %ymm2 2477; CHECK-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 2478; CHECK-AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4 2479; CHECK-AVX512VL-NEXT: vpcmpgtb %ymm4, %ymm0, %ymm0 2480; CHECK-AVX512VL-NEXT: vpandn %ymm0, %ymm3, %ymm3 2481; CHECK-AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm0 2482; CHECK-AVX512VL-NEXT: vpternlogq {{.*#+}} ymm0 = ~ymm0 & (ymm2 | ymm3) 2483; CHECK-AVX512VL-NEXT: retq 2484 %rem = srem <32 x i8> %x, <i8 13, i8 5, i8 19, i8 34, i8 2, i8 8, i8 2, i8 88, i8 62, i8 62, i8 5, i8 7, i8 97, i8 2, i8 3, i8 60, i8 3, i8 87, i8 7, i8 6, i8 84, i8 -128, i8 127, i8 56, i8 114, i8 1, i8 50, i8 7, i8 2, i8 8, i8 97, i8 117> 2485 %cmp = icmp ne <32 x i8> %rem, zeroinitializer 2486 %cmp4 = icmp ne <32 x i8> %y, zeroinitializer 2487 %cmpres = and <32 x i1> %cmp4, %cmp 2488 ret <32 x i1> %cmpres 2489} 2490