1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 < %s | FileCheck %s --check-prefix=CHECK-SSE2 3; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 < %s | FileCheck %s --check-prefix=CHECK-SSE41 4; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx < %s | FileCheck %s --check-prefixes=CHECK-AVX,CHECK-AVX1 5; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 < %s | FileCheck %s --check-prefixes=CHECK-AVX,CHECK-AVX2 6; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl < %s | FileCheck %s --check-prefixes=CHECK-AVX,CHECK-AVX512VL 7 8; Odd+Even divisors 9define <4 x i32> @test_urem_odd_even(<4 x i32> %X) nounwind { 10; CHECK-SSE2-LABEL: test_urem_odd_even: 11; CHECK-SSE2: # %bb.0: 12; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 13; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 14; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 15; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 16; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 17; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3] 18; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 19; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 20; CHECK-SSE2-NEXT: por %xmm0, %xmm1 21; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 22; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 23; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 24; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm0 25; CHECK-SSE2-NEXT: retq 26; 27; CHECK-SSE41-LABEL: test_urem_odd_even: 28; CHECK-SSE41: # %bb.0: 29; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 30; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 31; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 32; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2 33; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 34; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 35; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 36; CHECK-SSE41-NEXT: por %xmm2, %xmm0 37; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993459,306783378,171798691,42949672] 38; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 39; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 40; CHECK-SSE41-NEXT: psrld $31, %xmm0 41; CHECK-SSE41-NEXT: retq 42; 43; CHECK-AVX1-LABEL: test_urem_odd_even: 44; CHECK-AVX1: # %bb.0: 45; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 46; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 47; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 48; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 49; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 50; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 51; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 52; CHECK-AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 53; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 54; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 55; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 56; CHECK-AVX1-NEXT: retq 57; 58; CHECK-AVX2-LABEL: test_urem_odd_even: 59; CHECK-AVX2: # %bb.0: 60; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 61; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 62; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 63; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 64; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 65; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 66; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 67; CHECK-AVX2-NEXT: retq 68; 69; CHECK-AVX512VL-LABEL: test_urem_odd_even: 70; CHECK-AVX512VL: # %bb.0: 71; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 72; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 73; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 74; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 75; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 76; CHECK-AVX512VL-NEXT: retq 77 %urem = urem <4 x i32> %X, <i32 5, i32 14, i32 25, i32 100> 78 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 79 %ret = zext <4 x i1> %cmp to <4 x i32> 80 ret <4 x i32> %ret 81} 82 83;==============================================================================; 84 85; One all-ones divisor in odd divisor 86define <4 x i32> @test_urem_odd_allones_eq(<4 x i32> %X) nounwind { 87; CHECK-SSE2-LABEL: test_urem_odd_allones_eq: 88; CHECK-SSE2: # %bb.0: 89; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 90; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 91; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 92; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 93; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 94; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 95; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 96; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 97; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 98; CHECK-SSE2-NEXT: retq 99; 100; CHECK-SSE41-LABEL: test_urem_odd_allones_eq: 101; CHECK-SSE41: # %bb.0: 102; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 103; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993459,858993459,1,858993459] 104; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 105; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 106; CHECK-SSE41-NEXT: psrld $31, %xmm0 107; CHECK-SSE41-NEXT: retq 108; 109; CHECK-AVX-LABEL: test_urem_odd_allones_eq: 110; CHECK-AVX: # %bb.0: 111; CHECK-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 112; CHECK-AVX-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 113; CHECK-AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 114; CHECK-AVX-NEXT: vpsrld $31, %xmm0, %xmm0 115; CHECK-AVX-NEXT: retq 116 %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 4294967295, i32 5> 117 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 118 %ret = zext <4 x i1> %cmp to <4 x i32> 119 ret <4 x i32> %ret 120} 121define <4 x i32> @test_urem_odd_allones_ne(<4 x i32> %X) nounwind { 122; CHECK-SSE2-LABEL: test_urem_odd_allones_ne: 123; CHECK-SSE2: # %bb.0: 124; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 125; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 126; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 127; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 128; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 129; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 130; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 131; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 132; CHECK-SSE2-NEXT: psrld $31, %xmm0 133; CHECK-SSE2-NEXT: retq 134; 135; CHECK-SSE41-LABEL: test_urem_odd_allones_ne: 136; CHECK-SSE41: # %bb.0: 137; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 138; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993460,858993460,2,858993460] 139; CHECK-SSE41-NEXT: pmaxud %xmm0, %xmm1 140; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 141; CHECK-SSE41-NEXT: psrld $31, %xmm0 142; CHECK-SSE41-NEXT: retq 143; 144; CHECK-AVX-LABEL: test_urem_odd_allones_ne: 145; CHECK-AVX: # %bb.0: 146; CHECK-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 147; CHECK-AVX-NEXT: vpmaxud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 148; CHECK-AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 149; CHECK-AVX-NEXT: vpsrld $31, %xmm0, %xmm0 150; CHECK-AVX-NEXT: retq 151 %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 4294967295, i32 5> 152 %cmp = icmp ne <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 153 %ret = zext <4 x i1> %cmp to <4 x i32> 154 ret <4 x i32> %ret 155} 156 157; One all-ones divisor in even divisor 158define <4 x i32> @test_urem_even_allones_eq(<4 x i32> %X) nounwind { 159; CHECK-SSE2-LABEL: test_urem_even_allones_eq: 160; CHECK-SSE2: # %bb.0: 161; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 162; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 163; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 164; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm1 165; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] 166; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 167; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 168; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,3,2,3] 169; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] 170; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 171; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 172; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 173; CHECK-SSE2-NEXT: por %xmm4, %xmm0 174; CHECK-SSE2-NEXT: pxor %xmm2, %xmm0 175; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 176; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 177; CHECK-SSE2-NEXT: retq 178; 179; CHECK-SSE41-LABEL: test_urem_even_allones_eq: 180; CHECK-SSE41: # %bb.0: 181; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 182; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 183; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 184; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 185; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 186; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 187; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 188; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 189; CHECK-SSE41-NEXT: por %xmm2, %xmm0 190; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [306783378,306783378,1,306783378] 191; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 192; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 193; CHECK-SSE41-NEXT: psrld $31, %xmm0 194; CHECK-SSE41-NEXT: retq 195; 196; CHECK-AVX1-LABEL: test_urem_even_allones_eq: 197; CHECK-AVX1: # %bb.0: 198; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 199; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 200; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 201; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 202; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 203; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 204; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 205; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 206; CHECK-AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 207; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 208; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 209; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 210; CHECK-AVX1-NEXT: retq 211; 212; CHECK-AVX2-LABEL: test_urem_even_allones_eq: 213; CHECK-AVX2: # %bb.0: 214; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 215; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 216; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 217; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 218; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 219; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 220; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 221; CHECK-AVX2-NEXT: retq 222; 223; CHECK-AVX512VL-LABEL: test_urem_even_allones_eq: 224; CHECK-AVX512VL: # %bb.0: 225; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 226; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 227; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 228; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 229; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 230; CHECK-AVX512VL-NEXT: retq 231 %urem = urem <4 x i32> %X, <i32 14, i32 14, i32 4294967295, i32 14> 232 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 233 %ret = zext <4 x i1> %cmp to <4 x i32> 234 ret <4 x i32> %ret 235} 236define <4 x i32> @test_urem_even_allones_ne(<4 x i32> %X) nounwind { 237; CHECK-SSE2-LABEL: test_urem_even_allones_ne: 238; CHECK-SSE2: # %bb.0: 239; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 240; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 241; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 242; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm1 243; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] 244; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 245; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 246; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,3,2,3] 247; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] 248; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 249; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 250; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 251; CHECK-SSE2-NEXT: por %xmm4, %xmm0 252; CHECK-SSE2-NEXT: pxor %xmm2, %xmm0 253; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 254; CHECK-SSE2-NEXT: psrld $31, %xmm0 255; CHECK-SSE2-NEXT: retq 256; 257; CHECK-SSE41-LABEL: test_urem_even_allones_ne: 258; CHECK-SSE41: # %bb.0: 259; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 260; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 261; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 262; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 263; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 264; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 265; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 266; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 267; CHECK-SSE41-NEXT: por %xmm2, %xmm0 268; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [306783379,306783379,2,306783379] 269; CHECK-SSE41-NEXT: pmaxud %xmm0, %xmm1 270; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 271; CHECK-SSE41-NEXT: psrld $31, %xmm0 272; CHECK-SSE41-NEXT: retq 273; 274; CHECK-AVX1-LABEL: test_urem_even_allones_ne: 275; CHECK-AVX1: # %bb.0: 276; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 277; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 278; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 279; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 280; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 281; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 282; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 283; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 284; CHECK-AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 285; CHECK-AVX1-NEXT: vpmaxud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 286; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 287; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 288; CHECK-AVX1-NEXT: retq 289; 290; CHECK-AVX2-LABEL: test_urem_even_allones_ne: 291; CHECK-AVX2: # %bb.0: 292; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 293; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 294; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 295; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 296; CHECK-AVX2-NEXT: vpmaxud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 297; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 298; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 299; CHECK-AVX2-NEXT: retq 300; 301; CHECK-AVX512VL-LABEL: test_urem_even_allones_ne: 302; CHECK-AVX512VL: # %bb.0: 303; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 304; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 305; CHECK-AVX512VL-NEXT: vpmaxud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 306; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 307; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 308; CHECK-AVX512VL-NEXT: retq 309 %urem = urem <4 x i32> %X, <i32 14, i32 14, i32 4294967295, i32 14> 310 %cmp = icmp ne <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 311 %ret = zext <4 x i1> %cmp to <4 x i32> 312 ret <4 x i32> %ret 313} 314 315; One all-ones divisor in odd+even divisor 316define <4 x i32> @test_urem_odd_even_allones_eq(<4 x i32> %X) nounwind { 317; CHECK-SSE2-LABEL: test_urem_odd_even_allones_eq: 318; CHECK-SSE2: # %bb.0: 319; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 320; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 321; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 322; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 323; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 324; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3] 325; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 326; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 327; CHECK-SSE2-NEXT: por %xmm0, %xmm1 328; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 329; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 330; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 331; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm0 332; CHECK-SSE2-NEXT: retq 333; 334; CHECK-SSE41-LABEL: test_urem_odd_even_allones_eq: 335; CHECK-SSE41: # %bb.0: 336; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 337; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 338; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 339; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2 340; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 341; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 342; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 343; CHECK-SSE41-NEXT: por %xmm2, %xmm0 344; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993459,306783378,1,42949672] 345; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 346; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 347; CHECK-SSE41-NEXT: psrld $31, %xmm0 348; CHECK-SSE41-NEXT: retq 349; 350; CHECK-AVX1-LABEL: test_urem_odd_even_allones_eq: 351; CHECK-AVX1: # %bb.0: 352; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 353; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 354; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 355; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 356; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 357; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 358; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 359; CHECK-AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 360; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 361; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 362; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 363; CHECK-AVX1-NEXT: retq 364; 365; CHECK-AVX2-LABEL: test_urem_odd_even_allones_eq: 366; CHECK-AVX2: # %bb.0: 367; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 368; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 369; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 370; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 371; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 372; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 373; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 374; CHECK-AVX2-NEXT: retq 375; 376; CHECK-AVX512VL-LABEL: test_urem_odd_even_allones_eq: 377; CHECK-AVX512VL: # %bb.0: 378; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 379; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 380; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 381; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 382; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 383; CHECK-AVX512VL-NEXT: retq 384 %urem = urem <4 x i32> %X, <i32 5, i32 14, i32 4294967295, i32 100> 385 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 386 %ret = zext <4 x i1> %cmp to <4 x i32> 387 ret <4 x i32> %ret 388} 389define <4 x i32> @test_urem_odd_even_allones_ne(<4 x i32> %X) nounwind { 390; CHECK-SSE2-LABEL: test_urem_odd_even_allones_ne: 391; CHECK-SSE2: # %bb.0: 392; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 393; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 394; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 395; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 396; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 397; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3] 398; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 399; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 400; CHECK-SSE2-NEXT: por %xmm0, %xmm1 401; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 402; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 403; CHECK-SSE2-NEXT: psrld $31, %xmm1 404; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm0 405; CHECK-SSE2-NEXT: retq 406; 407; CHECK-SSE41-LABEL: test_urem_odd_even_allones_ne: 408; CHECK-SSE41: # %bb.0: 409; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 410; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 411; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 412; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2 413; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 414; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 415; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 416; CHECK-SSE41-NEXT: por %xmm2, %xmm0 417; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993460,306783379,2,42949673] 418; CHECK-SSE41-NEXT: pmaxud %xmm0, %xmm1 419; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 420; CHECK-SSE41-NEXT: psrld $31, %xmm0 421; CHECK-SSE41-NEXT: retq 422; 423; CHECK-AVX1-LABEL: test_urem_odd_even_allones_ne: 424; CHECK-AVX1: # %bb.0: 425; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 426; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 427; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 428; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 429; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 430; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 431; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 432; CHECK-AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 433; CHECK-AVX1-NEXT: vpmaxud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 434; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 435; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 436; CHECK-AVX1-NEXT: retq 437; 438; CHECK-AVX2-LABEL: test_urem_odd_even_allones_ne: 439; CHECK-AVX2: # %bb.0: 440; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 441; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 442; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 443; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 444; CHECK-AVX2-NEXT: vpmaxud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 445; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 446; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 447; CHECK-AVX2-NEXT: retq 448; 449; CHECK-AVX512VL-LABEL: test_urem_odd_even_allones_ne: 450; CHECK-AVX512VL: # %bb.0: 451; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 452; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 453; CHECK-AVX512VL-NEXT: vpmaxud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 454; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 455; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 456; CHECK-AVX512VL-NEXT: retq 457 %urem = urem <4 x i32> %X, <i32 5, i32 14, i32 4294967295, i32 100> 458 %cmp = icmp ne <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 459 %ret = zext <4 x i1> %cmp to <4 x i32> 460 ret <4 x i32> %ret 461} 462 463;------------------------------------------------------------------------------; 464 465; One power-of-two divisor in odd divisor 466define <4 x i32> @test_urem_odd_poweroftwo(<4 x i32> %X) nounwind { 467; CHECK-SSE2-LABEL: test_urem_odd_poweroftwo: 468; CHECK-SSE2: # %bb.0: 469; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 470; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 471; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 472; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 473; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 474; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3] 475; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 476; CHECK-SSE2-NEXT: psrlq $32, %xmm0 477; CHECK-SSE2-NEXT: por %xmm2, %xmm0 478; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 479; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 480; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 481; CHECK-SSE2-NEXT: retq 482; 483; CHECK-SSE41-LABEL: test_urem_odd_poweroftwo: 484; CHECK-SSE41: # %bb.0: 485; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 486; CHECK-SSE41-NEXT: pmovsxdq {{.*#+}} xmm1 = [1,268435456] 487; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm1 488; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7] 489; CHECK-SSE41-NEXT: psrlq $32, %xmm1 490; CHECK-SSE41-NEXT: por %xmm1, %xmm0 491; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993459,858993459,268435455,858993459] 492; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 493; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 494; CHECK-SSE41-NEXT: psrld $31, %xmm0 495; CHECK-SSE41-NEXT: retq 496; 497; CHECK-AVX1-LABEL: test_urem_odd_poweroftwo: 498; CHECK-AVX1: # %bb.0: 499; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 500; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 501; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7] 502; CHECK-AVX1-NEXT: vpsrlq $32, %xmm1, %xmm1 503; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 504; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 505; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 506; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 507; CHECK-AVX1-NEXT: retq 508; 509; CHECK-AVX2-LABEL: test_urem_odd_poweroftwo: 510; CHECK-AVX2: # %bb.0: 511; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 512; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 513; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 514; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 515; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 516; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 517; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 518; CHECK-AVX2-NEXT: retq 519; 520; CHECK-AVX512VL-LABEL: test_urem_odd_poweroftwo: 521; CHECK-AVX512VL: # %bb.0: 522; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 523; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 524; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 525; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 526; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 527; CHECK-AVX512VL-NEXT: retq 528 %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 16, i32 5> 529 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 530 %ret = zext <4 x i1> %cmp to <4 x i32> 531 ret <4 x i32> %ret 532} 533 534; One power-of-two divisor in even divisor 535define <4 x i32> @test_urem_even_poweroftwo(<4 x i32> %X) nounwind { 536; CHECK-SSE2-LABEL: test_urem_even_poweroftwo: 537; CHECK-SSE2: # %bb.0: 538; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 539; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 540; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 541; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm1 542; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] 543; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 544; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 545; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,3,2,3] 546; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] 547; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 548; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 549; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 550; CHECK-SSE2-NEXT: por %xmm4, %xmm0 551; CHECK-SSE2-NEXT: pxor %xmm2, %xmm0 552; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 553; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 554; CHECK-SSE2-NEXT: retq 555; 556; CHECK-SSE41-LABEL: test_urem_even_poweroftwo: 557; CHECK-SSE41: # %bb.0: 558; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 559; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 560; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 561; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 562; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 563; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 564; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 565; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 566; CHECK-SSE41-NEXT: por %xmm2, %xmm0 567; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [306783378,306783378,268435455,306783378] 568; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 569; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 570; CHECK-SSE41-NEXT: psrld $31, %xmm0 571; CHECK-SSE41-NEXT: retq 572; 573; CHECK-AVX1-LABEL: test_urem_even_poweroftwo: 574; CHECK-AVX1: # %bb.0: 575; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 576; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 577; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 578; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 579; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 580; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 581; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 582; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 583; CHECK-AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 584; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 585; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 586; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 587; CHECK-AVX1-NEXT: retq 588; 589; CHECK-AVX2-LABEL: test_urem_even_poweroftwo: 590; CHECK-AVX2: # %bb.0: 591; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 592; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 593; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 594; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 595; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 596; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 597; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 598; CHECK-AVX2-NEXT: retq 599; 600; CHECK-AVX512VL-LABEL: test_urem_even_poweroftwo: 601; CHECK-AVX512VL: # %bb.0: 602; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 603; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 604; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 605; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 606; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 607; CHECK-AVX512VL-NEXT: retq 608 %urem = urem <4 x i32> %X, <i32 14, i32 14, i32 16, i32 14> 609 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 610 %ret = zext <4 x i1> %cmp to <4 x i32> 611 ret <4 x i32> %ret 612} 613 614; One power-of-two divisor in odd+even divisor 615define <4 x i32> @test_urem_odd_even_poweroftwo(<4 x i32> %X) nounwind { 616; CHECK-SSE2-LABEL: test_urem_odd_even_poweroftwo: 617; CHECK-SSE2: # %bb.0: 618; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 619; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 620; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 621; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3] 622; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 623; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 624; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] 625; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 626; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 627; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 628; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 629; CHECK-SSE2-NEXT: por %xmm2, %xmm0 630; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 631; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 632; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 633; CHECK-SSE2-NEXT: retq 634; 635; CHECK-SSE41-LABEL: test_urem_odd_even_poweroftwo: 636; CHECK-SSE41: # %bb.0: 637; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 638; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 639; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 640; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 641; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 642; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 643; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 644; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 645; CHECK-SSE41-NEXT: por %xmm2, %xmm0 646; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993459,306783378,268435455,42949672] 647; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 648; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 649; CHECK-SSE41-NEXT: psrld $31, %xmm0 650; CHECK-SSE41-NEXT: retq 651; 652; CHECK-AVX1-LABEL: test_urem_odd_even_poweroftwo: 653; CHECK-AVX1: # %bb.0: 654; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 655; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 656; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 657; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 658; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 659; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 660; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 661; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 662; CHECK-AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 663; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 664; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 665; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 666; CHECK-AVX1-NEXT: retq 667; 668; CHECK-AVX2-LABEL: test_urem_odd_even_poweroftwo: 669; CHECK-AVX2: # %bb.0: 670; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 671; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 672; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 673; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 674; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 675; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 676; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 677; CHECK-AVX2-NEXT: retq 678; 679; CHECK-AVX512VL-LABEL: test_urem_odd_even_poweroftwo: 680; CHECK-AVX512VL: # %bb.0: 681; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 682; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 683; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 684; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 685; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 686; CHECK-AVX512VL-NEXT: retq 687 %urem = urem <4 x i32> %X, <i32 5, i32 14, i32 16, i32 100> 688 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 689 %ret = zext <4 x i1> %cmp to <4 x i32> 690 ret <4 x i32> %ret 691} 692 693;------------------------------------------------------------------------------; 694 695; One one divisor in odd divisor 696define <4 x i32> @test_urem_odd_one(<4 x i32> %X) nounwind { 697; CHECK-SSE2-LABEL: test_urem_odd_one: 698; CHECK-SSE2: # %bb.0: 699; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837] 700; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 701; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0 702; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 703; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 704; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 705; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 706; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 707; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 708; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 709; CHECK-SSE2-NEXT: retq 710; 711; CHECK-SSE41-LABEL: test_urem_odd_one: 712; CHECK-SSE41: # %bb.0: 713; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 714; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993459,858993459,4294967295,858993459] 715; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 716; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 717; CHECK-SSE41-NEXT: psrld $31, %xmm0 718; CHECK-SSE41-NEXT: retq 719; 720; CHECK-AVX1-LABEL: test_urem_odd_one: 721; CHECK-AVX1: # %bb.0: 722; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 723; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 724; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 725; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 726; CHECK-AVX1-NEXT: retq 727; 728; CHECK-AVX2-LABEL: test_urem_odd_one: 729; CHECK-AVX2: # %bb.0: 730; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837] 731; CHECK-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 732; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 733; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 734; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 735; CHECK-AVX2-NEXT: retq 736; 737; CHECK-AVX512VL-LABEL: test_urem_odd_one: 738; CHECK-AVX512VL: # %bb.0: 739; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 740; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 741; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 742; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 743; CHECK-AVX512VL-NEXT: retq 744 %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 1, i32 5> 745 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 746 %ret = zext <4 x i1> %cmp to <4 x i32> 747 ret <4 x i32> %ret 748} 749 750; One one divisor in even divisor 751define <4 x i32> @test_urem_even_one(<4 x i32> %X) nounwind { 752; CHECK-SSE2-LABEL: test_urem_even_one: 753; CHECK-SSE2: # %bb.0: 754; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3067833783,3067833783,3067833783,3067833783] 755; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 756; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0 757; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 758; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 759; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 760; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 761; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1 762; CHECK-SSE2-NEXT: psrld $1, %xmm1 763; CHECK-SSE2-NEXT: pslld $31, %xmm0 764; CHECK-SSE2-NEXT: por %xmm1, %xmm0 765; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 766; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 767; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 768; CHECK-SSE2-NEXT: retq 769; 770; CHECK-SSE41-LABEL: test_urem_even_one: 771; CHECK-SSE41: # %bb.0: 772; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 773; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1 774; CHECK-SSE41-NEXT: psrld $1, %xmm1 775; CHECK-SSE41-NEXT: pslld $31, %xmm0 776; CHECK-SSE41-NEXT: por %xmm1, %xmm0 777; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [306783378,306783378,4294967295,306783378] 778; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 779; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 780; CHECK-SSE41-NEXT: psrld $31, %xmm0 781; CHECK-SSE41-NEXT: retq 782; 783; CHECK-AVX1-LABEL: test_urem_even_one: 784; CHECK-AVX1: # %bb.0: 785; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 786; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1 787; CHECK-AVX1-NEXT: vpslld $31, %xmm0, %xmm0 788; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 789; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 790; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 791; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 792; CHECK-AVX1-NEXT: retq 793; 794; CHECK-AVX2-LABEL: test_urem_even_one: 795; CHECK-AVX2: # %bb.0: 796; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3067833783,3067833783,3067833783,3067833783] 797; CHECK-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 798; CHECK-AVX2-NEXT: vpsrld $1, %xmm0, %xmm1 799; CHECK-AVX2-NEXT: vpslld $31, %xmm0, %xmm0 800; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 801; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 802; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 803; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 804; CHECK-AVX2-NEXT: retq 805; 806; CHECK-AVX512VL-LABEL: test_urem_even_one: 807; CHECK-AVX512VL: # %bb.0: 808; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 809; CHECK-AVX512VL-NEXT: vprord $1, %xmm0, %xmm0 810; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 811; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 812; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 813; CHECK-AVX512VL-NEXT: retq 814 %urem = urem <4 x i32> %X, <i32 14, i32 14, i32 1, i32 14> 815 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 816 %ret = zext <4 x i1> %cmp to <4 x i32> 817 ret <4 x i32> %ret 818} 819 820; One one divisor in odd+even divisor 821define <4 x i32> @test_urem_odd_even_one(<4 x i32> %X) nounwind { 822; CHECK-SSE2-LABEL: test_urem_odd_even_one: 823; CHECK-SSE2: # %bb.0: 824; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 825; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 826; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 827; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 828; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 829; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3] 830; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 831; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 832; CHECK-SSE2-NEXT: por %xmm0, %xmm1 833; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 834; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 835; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 836; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm0 837; CHECK-SSE2-NEXT: retq 838; 839; CHECK-SSE41-LABEL: test_urem_odd_even_one: 840; CHECK-SSE41: # %bb.0: 841; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 842; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 843; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 844; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2 845; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 846; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 847; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 848; CHECK-SSE41-NEXT: por %xmm2, %xmm0 849; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993459,306783378,4294967295,42949672] 850; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 851; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 852; CHECK-SSE41-NEXT: psrld $31, %xmm0 853; CHECK-SSE41-NEXT: retq 854; 855; CHECK-AVX1-LABEL: test_urem_odd_even_one: 856; CHECK-AVX1: # %bb.0: 857; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 858; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 859; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 860; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 861; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 862; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 863; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 864; CHECK-AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 865; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 866; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 867; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 868; CHECK-AVX1-NEXT: retq 869; 870; CHECK-AVX2-LABEL: test_urem_odd_even_one: 871; CHECK-AVX2: # %bb.0: 872; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 873; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 874; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 875; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 876; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 877; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 878; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 879; CHECK-AVX2-NEXT: retq 880; 881; CHECK-AVX512VL-LABEL: test_urem_odd_even_one: 882; CHECK-AVX512VL: # %bb.0: 883; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 884; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 885; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 886; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 887; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 888; CHECK-AVX512VL-NEXT: retq 889 %urem = urem <4 x i32> %X, <i32 5, i32 14, i32 1, i32 100> 890 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 891 %ret = zext <4 x i1> %cmp to <4 x i32> 892 ret <4 x i32> %ret 893} 894 895;------------------------------------------------------------------------------; 896 897; One INT_MIN divisor in odd divisor 898define <4 x i32> @test_urem_odd_INT_MIN(<4 x i32> %X) nounwind { 899; CHECK-SSE2-LABEL: test_urem_odd_INT_MIN: 900; CHECK-SSE2: # %bb.0: 901; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 902; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 903; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 904; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 905; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 906; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3] 907; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 908; CHECK-SSE2-NEXT: psrlq $32, %xmm0 909; CHECK-SSE2-NEXT: por %xmm2, %xmm0 910; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 911; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 912; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 913; CHECK-SSE2-NEXT: retq 914; 915; CHECK-SSE41-LABEL: test_urem_odd_INT_MIN: 916; CHECK-SSE41: # %bb.0: 917; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 918; CHECK-SSE41-NEXT: pmovsxbq {{.*#+}} xmm1 = [1,2] 919; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm1 920; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7] 921; CHECK-SSE41-NEXT: psrlq $32, %xmm1 922; CHECK-SSE41-NEXT: por %xmm1, %xmm0 923; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993459,858993459,1,858993459] 924; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 925; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 926; CHECK-SSE41-NEXT: psrld $31, %xmm0 927; CHECK-SSE41-NEXT: retq 928; 929; CHECK-AVX1-LABEL: test_urem_odd_INT_MIN: 930; CHECK-AVX1: # %bb.0: 931; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 932; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 933; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7] 934; CHECK-AVX1-NEXT: vpsrlq $32, %xmm1, %xmm1 935; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 936; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 937; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 938; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 939; CHECK-AVX1-NEXT: retq 940; 941; CHECK-AVX2-LABEL: test_urem_odd_INT_MIN: 942; CHECK-AVX2: # %bb.0: 943; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 944; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 945; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 946; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 947; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 948; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 949; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 950; CHECK-AVX2-NEXT: retq 951; 952; CHECK-AVX512VL-LABEL: test_urem_odd_INT_MIN: 953; CHECK-AVX512VL: # %bb.0: 954; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 955; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 956; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 957; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 958; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 959; CHECK-AVX512VL-NEXT: retq 960 %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 2147483648, i32 5> 961 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 962 %ret = zext <4 x i1> %cmp to <4 x i32> 963 ret <4 x i32> %ret 964} 965 966; One INT_MIN divisor in even divisor 967define <4 x i32> @test_urem_even_INT_MIN(<4 x i32> %X) nounwind { 968; CHECK-SSE2-LABEL: test_urem_even_INT_MIN: 969; CHECK-SSE2: # %bb.0: 970; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 971; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 972; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 973; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm1 974; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] 975; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 976; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 977; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,3,2,3] 978; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] 979; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 980; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 981; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 982; CHECK-SSE2-NEXT: por %xmm4, %xmm0 983; CHECK-SSE2-NEXT: pxor %xmm2, %xmm0 984; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 985; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 986; CHECK-SSE2-NEXT: retq 987; 988; CHECK-SSE41-LABEL: test_urem_even_INT_MIN: 989; CHECK-SSE41: # %bb.0: 990; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 991; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 992; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 993; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 994; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 995; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 996; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 997; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 998; CHECK-SSE41-NEXT: por %xmm2, %xmm0 999; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [306783378,306783378,1,306783378] 1000; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 1001; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1002; CHECK-SSE41-NEXT: psrld $31, %xmm0 1003; CHECK-SSE41-NEXT: retq 1004; 1005; CHECK-AVX1-LABEL: test_urem_even_INT_MIN: 1006; CHECK-AVX1: # %bb.0: 1007; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1008; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1009; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 1010; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1011; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 1012; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1013; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 1014; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 1015; CHECK-AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 1016; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1017; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1018; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1019; CHECK-AVX1-NEXT: retq 1020; 1021; CHECK-AVX2-LABEL: test_urem_even_INT_MIN: 1022; CHECK-AVX2: # %bb.0: 1023; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1024; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1025; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1026; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 1027; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1028; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1029; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1030; CHECK-AVX2-NEXT: retq 1031; 1032; CHECK-AVX512VL-LABEL: test_urem_even_INT_MIN: 1033; CHECK-AVX512VL: # %bb.0: 1034; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1035; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1036; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1037; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1038; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1039; CHECK-AVX512VL-NEXT: retq 1040 %urem = urem <4 x i32> %X, <i32 14, i32 14, i32 2147483648, i32 14> 1041 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 1042 %ret = zext <4 x i1> %cmp to <4 x i32> 1043 ret <4 x i32> %ret 1044} 1045 1046; One INT_MIN divisor in odd+even divisor 1047define <4 x i32> @test_urem_odd_even_INT_MIN(<4 x i32> %X) nounwind { 1048; CHECK-SSE2-LABEL: test_urem_odd_even_INT_MIN: 1049; CHECK-SSE2: # %bb.0: 1050; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1051; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1052; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1053; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3] 1054; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1055; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1056; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] 1057; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 1058; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 1059; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 1060; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1061; CHECK-SSE2-NEXT: por %xmm2, %xmm0 1062; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1063; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1064; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1065; CHECK-SSE2-NEXT: retq 1066; 1067; CHECK-SSE41-LABEL: test_urem_odd_even_INT_MIN: 1068; CHECK-SSE41: # %bb.0: 1069; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1070; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1071; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1072; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1073; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 1074; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1075; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 1076; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 1077; CHECK-SSE41-NEXT: por %xmm2, %xmm0 1078; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993459,306783378,1,42949672] 1079; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 1080; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1081; CHECK-SSE41-NEXT: psrld $31, %xmm0 1082; CHECK-SSE41-NEXT: retq 1083; 1084; CHECK-AVX1-LABEL: test_urem_odd_even_INT_MIN: 1085; CHECK-AVX1: # %bb.0: 1086; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1087; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1088; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 1089; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1090; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 1091; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1092; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 1093; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 1094; CHECK-AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 1095; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1096; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1097; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1098; CHECK-AVX1-NEXT: retq 1099; 1100; CHECK-AVX2-LABEL: test_urem_odd_even_INT_MIN: 1101; CHECK-AVX2: # %bb.0: 1102; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1103; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1104; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1105; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 1106; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1107; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1108; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1109; CHECK-AVX2-NEXT: retq 1110; 1111; CHECK-AVX512VL-LABEL: test_urem_odd_even_INT_MIN: 1112; CHECK-AVX512VL: # %bb.0: 1113; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1114; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1115; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1116; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1117; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1118; CHECK-AVX512VL-NEXT: retq 1119 %urem = urem <4 x i32> %X, <i32 5, i32 14, i32 2147483648, i32 100> 1120 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 1121 %ret = zext <4 x i1> %cmp to <4 x i32> 1122 ret <4 x i32> %ret 1123} 1124 1125;==============================================================================; 1126 1127; One all-ones divisor and power-of-two divisor divisor in odd divisor 1128define <4 x i32> @test_urem_odd_allones_and_poweroftwo(<4 x i32> %X) nounwind { 1129; CHECK-SSE2-LABEL: test_urem_odd_allones_and_poweroftwo: 1130; CHECK-SSE2: # %bb.0: 1131; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1132; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1133; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 1134; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1135; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1136; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3] 1137; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 1138; CHECK-SSE2-NEXT: psrlq $32, %xmm0 1139; CHECK-SSE2-NEXT: por %xmm2, %xmm0 1140; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1141; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1142; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1143; CHECK-SSE2-NEXT: retq 1144; 1145; CHECK-SSE41-LABEL: test_urem_odd_allones_and_poweroftwo: 1146; CHECK-SSE41: # %bb.0: 1147; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1148; CHECK-SSE41-NEXT: pmovsxdq {{.*#+}} xmm1 = [1,268435456] 1149; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm1 1150; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7] 1151; CHECK-SSE41-NEXT: psrlq $32, %xmm1 1152; CHECK-SSE41-NEXT: por %xmm1, %xmm0 1153; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993459,1,268435455,858993459] 1154; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 1155; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1156; CHECK-SSE41-NEXT: psrld $31, %xmm0 1157; CHECK-SSE41-NEXT: retq 1158; 1159; CHECK-AVX1-LABEL: test_urem_odd_allones_and_poweroftwo: 1160; CHECK-AVX1: # %bb.0: 1161; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1162; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1163; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7] 1164; CHECK-AVX1-NEXT: vpsrlq $32, %xmm1, %xmm1 1165; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 1166; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1167; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1168; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1169; CHECK-AVX1-NEXT: retq 1170; 1171; CHECK-AVX2-LABEL: test_urem_odd_allones_and_poweroftwo: 1172; CHECK-AVX2: # %bb.0: 1173; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1174; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1175; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1176; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 1177; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1178; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1179; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1180; CHECK-AVX2-NEXT: retq 1181; 1182; CHECK-AVX512VL-LABEL: test_urem_odd_allones_and_poweroftwo: 1183; CHECK-AVX512VL: # %bb.0: 1184; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1185; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1186; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1187; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1188; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1189; CHECK-AVX512VL-NEXT: retq 1190 %urem = urem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 5> 1191 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 1192 %ret = zext <4 x i1> %cmp to <4 x i32> 1193 ret <4 x i32> %ret 1194} 1195 1196; One all-ones divisor and power-of-two divisor divisor in even divisor 1197define <4 x i32> @test_urem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind { 1198; CHECK-SSE2-LABEL: test_urem_even_allones_and_poweroftwo: 1199; CHECK-SSE2: # %bb.0: 1200; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1201; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1202; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1203; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3] 1204; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1205; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1206; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] 1207; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 1208; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 1209; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 1210; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1211; CHECK-SSE2-NEXT: por %xmm2, %xmm0 1212; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1213; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1214; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1215; CHECK-SSE2-NEXT: retq 1216; 1217; CHECK-SSE41-LABEL: test_urem_even_allones_and_poweroftwo: 1218; CHECK-SSE41: # %bb.0: 1219; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1220; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1221; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1222; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1223; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 1224; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1225; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 1226; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 1227; CHECK-SSE41-NEXT: por %xmm2, %xmm0 1228; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [306783378,1,268435455,306783378] 1229; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 1230; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1231; CHECK-SSE41-NEXT: psrld $31, %xmm0 1232; CHECK-SSE41-NEXT: retq 1233; 1234; CHECK-AVX1-LABEL: test_urem_even_allones_and_poweroftwo: 1235; CHECK-AVX1: # %bb.0: 1236; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1237; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1238; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 1239; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1240; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 1241; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1242; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 1243; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 1244; CHECK-AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 1245; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1246; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1247; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1248; CHECK-AVX1-NEXT: retq 1249; 1250; CHECK-AVX2-LABEL: test_urem_even_allones_and_poweroftwo: 1251; CHECK-AVX2: # %bb.0: 1252; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1253; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1254; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1255; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 1256; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1257; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1258; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1259; CHECK-AVX2-NEXT: retq 1260; 1261; CHECK-AVX512VL-LABEL: test_urem_even_allones_and_poweroftwo: 1262; CHECK-AVX512VL: # %bb.0: 1263; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1264; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1265; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1266; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1267; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1268; CHECK-AVX512VL-NEXT: retq 1269 %urem = urem <4 x i32> %X, <i32 14, i32 4294967295, i32 16, i32 14> 1270 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 1271 %ret = zext <4 x i1> %cmp to <4 x i32> 1272 ret <4 x i32> %ret 1273} 1274 1275; One all-ones divisor and power-of-two divisor divisor in odd+even divisor 1276define <4 x i32> @test_urem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwind { 1277; CHECK-SSE2-LABEL: test_urem_odd_even_allones_and_poweroftwo: 1278; CHECK-SSE2: # %bb.0: 1279; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1280; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1281; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1282; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3] 1283; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1284; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1285; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] 1286; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 1287; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 1288; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 1289; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1290; CHECK-SSE2-NEXT: por %xmm2, %xmm0 1291; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1292; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1293; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1294; CHECK-SSE2-NEXT: retq 1295; 1296; CHECK-SSE41-LABEL: test_urem_odd_even_allones_and_poweroftwo: 1297; CHECK-SSE41: # %bb.0: 1298; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1299; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1300; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1301; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1302; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 1303; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1304; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 1305; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 1306; CHECK-SSE41-NEXT: por %xmm2, %xmm0 1307; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993459,1,268435455,42949672] 1308; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 1309; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1310; CHECK-SSE41-NEXT: psrld $31, %xmm0 1311; CHECK-SSE41-NEXT: retq 1312; 1313; CHECK-AVX1-LABEL: test_urem_odd_even_allones_and_poweroftwo: 1314; CHECK-AVX1: # %bb.0: 1315; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1316; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1317; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 1318; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1319; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 1320; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1321; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 1322; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 1323; CHECK-AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 1324; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1325; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1326; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1327; CHECK-AVX1-NEXT: retq 1328; 1329; CHECK-AVX2-LABEL: test_urem_odd_even_allones_and_poweroftwo: 1330; CHECK-AVX2: # %bb.0: 1331; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1332; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1333; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1334; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 1335; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1336; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1337; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1338; CHECK-AVX2-NEXT: retq 1339; 1340; CHECK-AVX512VL-LABEL: test_urem_odd_even_allones_and_poweroftwo: 1341; CHECK-AVX512VL: # %bb.0: 1342; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1343; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1344; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1345; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1346; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1347; CHECK-AVX512VL-NEXT: retq 1348 %urem = urem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 100> 1349 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 1350 %ret = zext <4 x i1> %cmp to <4 x i32> 1351 ret <4 x i32> %ret 1352} 1353 1354;------------------------------------------------------------------------------; 1355 1356; One all-ones divisor and one one divisor in odd divisor 1357define <4 x i32> @test_urem_odd_allones_and_one(<4 x i32> %X) nounwind { 1358; CHECK-SSE2-LABEL: test_urem_odd_allones_and_one: 1359; CHECK-SSE2: # %bb.0: 1360; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1361; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1362; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 1363; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1364; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 1365; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1366; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1367; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1368; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1369; CHECK-SSE2-NEXT: retq 1370; 1371; CHECK-SSE41-LABEL: test_urem_odd_allones_and_one: 1372; CHECK-SSE41: # %bb.0: 1373; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1374; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993459,1,4294967295,858993459] 1375; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 1376; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1377; CHECK-SSE41-NEXT: psrld $31, %xmm0 1378; CHECK-SSE41-NEXT: retq 1379; 1380; CHECK-AVX-LABEL: test_urem_odd_allones_and_one: 1381; CHECK-AVX: # %bb.0: 1382; CHECK-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1383; CHECK-AVX-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1384; CHECK-AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1385; CHECK-AVX-NEXT: vpsrld $31, %xmm0, %xmm0 1386; CHECK-AVX-NEXT: retq 1387 %urem = urem <4 x i32> %X, <i32 5, i32 4294967295, i32 1, i32 5> 1388 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 1389 %ret = zext <4 x i1> %cmp to <4 x i32> 1390 ret <4 x i32> %ret 1391} 1392 1393; One all-ones divisor and one one divisor in even divisor 1394define <4 x i32> @test_urem_even_allones_and_one(<4 x i32> %X) nounwind { 1395; CHECK-SSE2-LABEL: test_urem_even_allones_and_one: 1396; CHECK-SSE2: # %bb.0: 1397; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1398; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1399; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1400; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3] 1401; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1402; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1403; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] 1404; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 1405; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 1406; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 1407; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1408; CHECK-SSE2-NEXT: por %xmm2, %xmm0 1409; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1410; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1411; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1412; CHECK-SSE2-NEXT: retq 1413; 1414; CHECK-SSE41-LABEL: test_urem_even_allones_and_one: 1415; CHECK-SSE41: # %bb.0: 1416; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1417; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1418; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1419; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1420; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 1421; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1422; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 1423; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 1424; CHECK-SSE41-NEXT: por %xmm2, %xmm0 1425; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [306783378,1,4294967295,306783378] 1426; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 1427; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1428; CHECK-SSE41-NEXT: psrld $31, %xmm0 1429; CHECK-SSE41-NEXT: retq 1430; 1431; CHECK-AVX1-LABEL: test_urem_even_allones_and_one: 1432; CHECK-AVX1: # %bb.0: 1433; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1434; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1435; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 1436; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1437; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 1438; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1439; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 1440; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 1441; CHECK-AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 1442; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1443; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1444; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1445; CHECK-AVX1-NEXT: retq 1446; 1447; CHECK-AVX2-LABEL: test_urem_even_allones_and_one: 1448; CHECK-AVX2: # %bb.0: 1449; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1450; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1451; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1452; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 1453; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1454; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1455; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1456; CHECK-AVX2-NEXT: retq 1457; 1458; CHECK-AVX512VL-LABEL: test_urem_even_allones_and_one: 1459; CHECK-AVX512VL: # %bb.0: 1460; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1461; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1462; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1463; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1464; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1465; CHECK-AVX512VL-NEXT: retq 1466 %urem = urem <4 x i32> %X, <i32 14, i32 4294967295, i32 1, i32 14> 1467 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 1468 %ret = zext <4 x i1> %cmp to <4 x i32> 1469 ret <4 x i32> %ret 1470} 1471 1472; One all-ones divisor and one one divisor in odd+even divisor 1473define <4 x i32> @test_urem_odd_even_allones_and_one(<4 x i32> %X) nounwind { 1474; CHECK-SSE2-LABEL: test_urem_odd_even_allones_and_one: 1475; CHECK-SSE2: # %bb.0: 1476; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1477; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1478; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 1479; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1480; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1481; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3] 1482; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1483; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1484; CHECK-SSE2-NEXT: por %xmm0, %xmm1 1485; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1486; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1487; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1488; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm0 1489; CHECK-SSE2-NEXT: retq 1490; 1491; CHECK-SSE41-LABEL: test_urem_odd_even_allones_and_one: 1492; CHECK-SSE41: # %bb.0: 1493; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1494; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1495; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1496; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2 1497; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1498; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 1499; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 1500; CHECK-SSE41-NEXT: por %xmm2, %xmm0 1501; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993459,1,4294967295,42949672] 1502; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 1503; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1504; CHECK-SSE41-NEXT: psrld $31, %xmm0 1505; CHECK-SSE41-NEXT: retq 1506; 1507; CHECK-AVX1-LABEL: test_urem_odd_even_allones_and_one: 1508; CHECK-AVX1: # %bb.0: 1509; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1510; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1511; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 1512; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 1513; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1514; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 1515; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 1516; CHECK-AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 1517; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1518; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1519; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1520; CHECK-AVX1-NEXT: retq 1521; 1522; CHECK-AVX2-LABEL: test_urem_odd_even_allones_and_one: 1523; CHECK-AVX2: # %bb.0: 1524; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1525; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1526; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1527; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 1528; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1529; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1530; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1531; CHECK-AVX2-NEXT: retq 1532; 1533; CHECK-AVX512VL-LABEL: test_urem_odd_even_allones_and_one: 1534; CHECK-AVX512VL: # %bb.0: 1535; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1536; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1537; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1538; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1539; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1540; CHECK-AVX512VL-NEXT: retq 1541 %urem = urem <4 x i32> %X, <i32 5, i32 4294967295, i32 1, i32 100> 1542 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 1543 %ret = zext <4 x i1> %cmp to <4 x i32> 1544 ret <4 x i32> %ret 1545} 1546 1547;------------------------------------------------------------------------------; 1548 1549; One power-of-two divisor divisor and one divisor in odd divisor 1550define <4 x i32> @test_urem_odd_poweroftwo_and_one(<4 x i32> %X) nounwind { 1551; CHECK-SSE2-LABEL: test_urem_odd_poweroftwo_and_one: 1552; CHECK-SSE2: # %bb.0: 1553; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1554; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1555; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 1556; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1557; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1558; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3] 1559; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1560; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1561; CHECK-SSE2-NEXT: por %xmm0, %xmm1 1562; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1563; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1564; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1565; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm0 1566; CHECK-SSE2-NEXT: retq 1567; 1568; CHECK-SSE41-LABEL: test_urem_odd_poweroftwo_and_one: 1569; CHECK-SSE41: # %bb.0: 1570; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1571; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1572; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1573; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2 1574; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1575; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 1576; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 1577; CHECK-SSE41-NEXT: por %xmm2, %xmm0 1578; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993459,268435455,4294967295,858993459] 1579; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 1580; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1581; CHECK-SSE41-NEXT: psrld $31, %xmm0 1582; CHECK-SSE41-NEXT: retq 1583; 1584; CHECK-AVX1-LABEL: test_urem_odd_poweroftwo_and_one: 1585; CHECK-AVX1: # %bb.0: 1586; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1587; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1588; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 1589; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 1590; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1591; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 1592; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 1593; CHECK-AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 1594; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1595; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1596; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1597; CHECK-AVX1-NEXT: retq 1598; 1599; CHECK-AVX2-LABEL: test_urem_odd_poweroftwo_and_one: 1600; CHECK-AVX2: # %bb.0: 1601; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1602; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1603; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1604; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 1605; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1606; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1607; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1608; CHECK-AVX2-NEXT: retq 1609; 1610; CHECK-AVX512VL-LABEL: test_urem_odd_poweroftwo_and_one: 1611; CHECK-AVX512VL: # %bb.0: 1612; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1613; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1614; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1615; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1616; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1617; CHECK-AVX512VL-NEXT: retq 1618 %urem = urem <4 x i32> %X, <i32 5, i32 16, i32 1, i32 5> 1619 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 1620 %ret = zext <4 x i1> %cmp to <4 x i32> 1621 ret <4 x i32> %ret 1622} 1623 1624; One power-of-two divisor divisor and one divisor in even divisor 1625define <4 x i32> @test_urem_even_poweroftwo_and_one(<4 x i32> %X) nounwind { 1626; CHECK-SSE2-LABEL: test_urem_even_poweroftwo_and_one: 1627; CHECK-SSE2: # %bb.0: 1628; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1629; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1630; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1631; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3] 1632; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1633; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1634; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] 1635; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 1636; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 1637; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 1638; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1639; CHECK-SSE2-NEXT: por %xmm2, %xmm0 1640; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1641; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1642; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1643; CHECK-SSE2-NEXT: retq 1644; 1645; CHECK-SSE41-LABEL: test_urem_even_poweroftwo_and_one: 1646; CHECK-SSE41: # %bb.0: 1647; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1648; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1649; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1650; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1651; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 1652; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1653; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 1654; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 1655; CHECK-SSE41-NEXT: por %xmm2, %xmm0 1656; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [306783378,268435455,4294967295,306783378] 1657; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 1658; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1659; CHECK-SSE41-NEXT: psrld $31, %xmm0 1660; CHECK-SSE41-NEXT: retq 1661; 1662; CHECK-AVX1-LABEL: test_urem_even_poweroftwo_and_one: 1663; CHECK-AVX1: # %bb.0: 1664; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1665; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1666; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 1667; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1668; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 1669; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1670; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 1671; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 1672; CHECK-AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 1673; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1674; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1675; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1676; CHECK-AVX1-NEXT: retq 1677; 1678; CHECK-AVX2-LABEL: test_urem_even_poweroftwo_and_one: 1679; CHECK-AVX2: # %bb.0: 1680; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1681; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1682; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1683; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 1684; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1685; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1686; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1687; CHECK-AVX2-NEXT: retq 1688; 1689; CHECK-AVX512VL-LABEL: test_urem_even_poweroftwo_and_one: 1690; CHECK-AVX512VL: # %bb.0: 1691; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1692; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1693; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1694; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1695; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1696; CHECK-AVX512VL-NEXT: retq 1697 %urem = urem <4 x i32> %X, <i32 14, i32 16, i32 1, i32 14> 1698 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 1699 %ret = zext <4 x i1> %cmp to <4 x i32> 1700 ret <4 x i32> %ret 1701} 1702 1703; One power-of-two divisor divisor and one divisor in odd+even divisor 1704define <4 x i32> @test_urem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind { 1705; CHECK-SSE2-LABEL: test_urem_odd_even_poweroftwo_and_one: 1706; CHECK-SSE2: # %bb.0: 1707; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1708; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1709; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 1710; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1711; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1712; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3] 1713; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1714; CHECK-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1715; CHECK-SSE2-NEXT: por %xmm0, %xmm1 1716; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1717; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1718; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1719; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm0 1720; CHECK-SSE2-NEXT: retq 1721; 1722; CHECK-SSE41-LABEL: test_urem_odd_even_poweroftwo_and_one: 1723; CHECK-SSE41: # %bb.0: 1724; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1725; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1726; CHECK-SSE41-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1727; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2 1728; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1729; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 1730; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 1731; CHECK-SSE41-NEXT: por %xmm2, %xmm0 1732; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993459,268435455,4294967295,42949672] 1733; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 1734; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1735; CHECK-SSE41-NEXT: psrld $31, %xmm0 1736; CHECK-SSE41-NEXT: retq 1737; 1738; CHECK-AVX1-LABEL: test_urem_odd_even_poweroftwo_and_one: 1739; CHECK-AVX1: # %bb.0: 1740; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1741; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1742; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 1743; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 1744; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1745; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,2,2] 1746; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 1747; CHECK-AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 1748; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1749; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1750; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1751; CHECK-AVX1-NEXT: retq 1752; 1753; CHECK-AVX2-LABEL: test_urem_odd_even_poweroftwo_and_one: 1754; CHECK-AVX2: # %bb.0: 1755; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1756; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1757; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1758; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 1759; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1760; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1761; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1762; CHECK-AVX2-NEXT: retq 1763; 1764; CHECK-AVX512VL-LABEL: test_urem_odd_even_poweroftwo_and_one: 1765; CHECK-AVX512VL: # %bb.0: 1766; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1767; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1768; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1769; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1770; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1771; CHECK-AVX512VL-NEXT: retq 1772 %urem = urem <4 x i32> %X, <i32 5, i32 16, i32 1, i32 100> 1773 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 1774 %ret = zext <4 x i1> %cmp to <4 x i32> 1775 ret <4 x i32> %ret 1776} 1777 1778;------------------------------------------------------------------------------; 1779 1780define <4 x i32> @test_urem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nounwind { 1781; CHECK-SSE2-LABEL: test_urem_odd_allones_and_poweroftwo_and_one: 1782; CHECK-SSE2: # %bb.0: 1783; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1784; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1785; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 1786; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1787; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1788; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3] 1789; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 1790; CHECK-SSE2-NEXT: psrlq $32, %xmm0 1791; CHECK-SSE2-NEXT: por %xmm2, %xmm0 1792; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1793; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1794; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1795; CHECK-SSE2-NEXT: retq 1796; 1797; CHECK-SSE41-LABEL: test_urem_odd_allones_and_poweroftwo_and_one: 1798; CHECK-SSE41: # %bb.0: 1799; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1800; CHECK-SSE41-NEXT: pmovsxdq {{.*#+}} xmm1 = [1,268435456] 1801; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm1 1802; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7] 1803; CHECK-SSE41-NEXT: psrlq $32, %xmm1 1804; CHECK-SSE41-NEXT: por %xmm1, %xmm0 1805; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993459,1,268435455,4294967295] 1806; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 1807; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1808; CHECK-SSE41-NEXT: psrld $31, %xmm0 1809; CHECK-SSE41-NEXT: retq 1810; 1811; CHECK-AVX1-LABEL: test_urem_odd_allones_and_poweroftwo_and_one: 1812; CHECK-AVX1: # %bb.0: 1813; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1814; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1815; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7] 1816; CHECK-AVX1-NEXT: vpsrlq $32, %xmm1, %xmm1 1817; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 1818; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1819; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1820; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1821; CHECK-AVX1-NEXT: retq 1822; 1823; CHECK-AVX2-LABEL: test_urem_odd_allones_and_poweroftwo_and_one: 1824; CHECK-AVX2: # %bb.0: 1825; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1826; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1827; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1828; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 1829; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1830; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1831; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1832; CHECK-AVX2-NEXT: retq 1833; 1834; CHECK-AVX512VL-LABEL: test_urem_odd_allones_and_poweroftwo_and_one: 1835; CHECK-AVX512VL: # %bb.0: 1836; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1837; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1838; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1839; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1840; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1841; CHECK-AVX512VL-NEXT: retq 1842 %urem = urem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 1> 1843 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 1844 %ret = zext <4 x i1> %cmp to <4 x i32> 1845 ret <4 x i32> %ret 1846} 1847 1848define <4 x i32> @test_urem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) nounwind { 1849; CHECK-SSE2-LABEL: test_urem_even_allones_and_poweroftwo_and_one: 1850; CHECK-SSE2: # %bb.0: 1851; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1852; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 1853; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 1854; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1855; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1856; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3] 1857; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 1858; CHECK-SSE2-NEXT: psrlq $32, %xmm0 1859; CHECK-SSE2-NEXT: por %xmm2, %xmm0 1860; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1861; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1862; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1863; CHECK-SSE2-NEXT: retq 1864; 1865; CHECK-SSE41-LABEL: test_urem_even_allones_and_poweroftwo_and_one: 1866; CHECK-SSE41: # %bb.0: 1867; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1868; CHECK-SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = [2147483648,268435456] 1869; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm1 1870; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7] 1871; CHECK-SSE41-NEXT: psrlq $32, %xmm1 1872; CHECK-SSE41-NEXT: por %xmm1, %xmm0 1873; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [306783378,1,268435455,4294967295] 1874; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 1875; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1876; CHECK-SSE41-NEXT: psrld $31, %xmm0 1877; CHECK-SSE41-NEXT: retq 1878; 1879; CHECK-AVX1-LABEL: test_urem_even_allones_and_poweroftwo_and_one: 1880; CHECK-AVX1: # %bb.0: 1881; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1882; CHECK-AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1883; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7] 1884; CHECK-AVX1-NEXT: vpsrlq $32, %xmm1, %xmm1 1885; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 1886; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1887; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1888; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1889; CHECK-AVX1-NEXT: retq 1890; 1891; CHECK-AVX2-LABEL: test_urem_even_allones_and_poweroftwo_and_one: 1892; CHECK-AVX2: # %bb.0: 1893; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1894; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1895; CHECK-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1896; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 1897; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1898; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1899; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1900; CHECK-AVX2-NEXT: retq 1901; 1902; CHECK-AVX512VL-LABEL: test_urem_even_allones_and_poweroftwo_and_one: 1903; CHECK-AVX512VL: # %bb.0: 1904; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1905; CHECK-AVX512VL-NEXT: vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1906; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1907; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1908; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1909; CHECK-AVX512VL-NEXT: retq 1910 %urem = urem <4 x i32> %X, <i32 14, i32 4294967295, i32 16, i32 1> 1911 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 1912 %ret = zext <4 x i1> %cmp to <4 x i32> 1913 ret <4 x i32> %ret 1914} 1915