1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 < %s | FileCheck %s --check-prefixes=CHECK-SSE,CHECK-SSE2 3; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 < %s | FileCheck %s --check-prefixes=CHECK-SSE,CHECK-SSE41 4; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx < %s | FileCheck %s --check-prefixes=CHECK-AVX,CHECK-AVX1 5; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 < %s | FileCheck %s --check-prefixes=CHECK-AVX,CHECK-AVX2 6; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl < %s | FileCheck %s --check-prefixes=CHECK-AVX,CHECK-AVX512VL 7 8; Odd divisor 9define <4 x i32> @test_urem_odd_25(<4 x i32> %X) nounwind { 10; CHECK-SSE2-LABEL: test_urem_odd_25: 11; CHECK-SSE2: # %bb.0: 12; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3264175145,3264175145,3264175145,3264175145] 13; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 14; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0 15; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 16; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 17; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 18; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 19; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 20; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 21; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 22; CHECK-SSE2-NEXT: retq 23; 24; CHECK-SSE41-LABEL: test_urem_odd_25: 25; CHECK-SSE41: # %bb.0: 26; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 27; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [171798691,171798691,171798691,171798691] 28; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 29; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 30; CHECK-SSE41-NEXT: psrld $31, %xmm0 31; CHECK-SSE41-NEXT: retq 32; 33; CHECK-AVX1-LABEL: test_urem_odd_25: 34; CHECK-AVX1: # %bb.0: 35; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 36; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 37; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 38; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 39; CHECK-AVX1-NEXT: retq 40; 41; CHECK-AVX2-LABEL: test_urem_odd_25: 42; CHECK-AVX2: # %bb.0: 43; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3264175145,3264175145,3264175145,3264175145] 44; CHECK-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 45; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [171798691,171798691,171798691,171798691] 46; CHECK-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm1 47; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 48; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 49; CHECK-AVX2-NEXT: retq 50; 51; CHECK-AVX512VL-LABEL: test_urem_odd_25: 52; CHECK-AVX512VL: # %bb.0: 53; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 54; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm1 55; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 56; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 57; CHECK-AVX512VL-NEXT: retq 58 %urem = urem <4 x i32> %X, <i32 25, i32 25, i32 25, i32 25> 59 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 60 %ret = zext <4 x i1> %cmp to <4 x i32> 61 ret <4 x i32> %ret 62} 63 64; Even divisors 65define <4 x i32> @test_urem_even_100(<4 x i32> %X) nounwind { 66; CHECK-SSE2-LABEL: test_urem_even_100: 67; CHECK-SSE2: # %bb.0: 68; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3264175145,3264175145,3264175145,3264175145] 69; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 70; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0 71; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 72; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 73; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 74; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 75; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1 76; CHECK-SSE2-NEXT: psrld $2, %xmm1 77; CHECK-SSE2-NEXT: pslld $30, %xmm0 78; CHECK-SSE2-NEXT: por %xmm1, %xmm0 79; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 80; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 81; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 82; CHECK-SSE2-NEXT: retq 83; 84; CHECK-SSE41-LABEL: test_urem_even_100: 85; CHECK-SSE41: # %bb.0: 86; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 87; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1 88; CHECK-SSE41-NEXT: psrld $2, %xmm1 89; CHECK-SSE41-NEXT: pslld $30, %xmm0 90; CHECK-SSE41-NEXT: por %xmm1, %xmm0 91; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [42949672,42949672,42949672,42949672] 92; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 93; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 94; CHECK-SSE41-NEXT: psrld $31, %xmm0 95; CHECK-SSE41-NEXT: retq 96; 97; CHECK-AVX1-LABEL: test_urem_even_100: 98; CHECK-AVX1: # %bb.0: 99; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 100; CHECK-AVX1-NEXT: vpsrld $2, %xmm0, %xmm1 101; CHECK-AVX1-NEXT: vpslld $30, %xmm0, %xmm0 102; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 103; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 104; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 105; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 106; CHECK-AVX1-NEXT: retq 107; 108; CHECK-AVX2-LABEL: test_urem_even_100: 109; CHECK-AVX2: # %bb.0: 110; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3264175145,3264175145,3264175145,3264175145] 111; CHECK-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 112; CHECK-AVX2-NEXT: vpsrld $2, %xmm0, %xmm1 113; CHECK-AVX2-NEXT: vpslld $30, %xmm0, %xmm0 114; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 115; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [42949672,42949672,42949672,42949672] 116; CHECK-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm1 117; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 118; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 119; CHECK-AVX2-NEXT: retq 120; 121; CHECK-AVX512VL-LABEL: test_urem_even_100: 122; CHECK-AVX512VL: # %bb.0: 123; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 124; CHECK-AVX512VL-NEXT: vprord $2, %xmm0, %xmm0 125; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm1 126; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 127; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 128; CHECK-AVX512VL-NEXT: retq 129 %urem = urem <4 x i32> %X, <i32 100, i32 100, i32 100, i32 100> 130 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 131 %ret = zext <4 x i1> %cmp to <4 x i32> 132 ret <4 x i32> %ret 133} 134 135; Negative divisors should be negated, and thus this is still splat vectors. 136 137; Odd divisor 138define <4 x i32> @test_urem_odd_neg25(<4 x i32> %X) nounwind { 139; CHECK-SSE2-LABEL: test_urem_odd_neg25: 140; CHECK-SSE2: # %bb.0: 141; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 142; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 143; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 144; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 145; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 146; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 147; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 148; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 149; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 150; CHECK-SSE2-NEXT: retq 151; 152; CHECK-SSE41-LABEL: test_urem_odd_neg25: 153; CHECK-SSE41: # %bb.0: 154; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 155; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [171798691,1,1,171798691] 156; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 157; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 158; CHECK-SSE41-NEXT: psrld $31, %xmm0 159; CHECK-SSE41-NEXT: retq 160; 161; CHECK-AVX-LABEL: test_urem_odd_neg25: 162; CHECK-AVX: # %bb.0: 163; CHECK-AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 164; CHECK-AVX-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 165; CHECK-AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 166; CHECK-AVX-NEXT: vpsrld $31, %xmm0, %xmm0 167; CHECK-AVX-NEXT: retq 168 %urem = urem <4 x i32> %X, <i32 25, i32 -25, i32 -25, i32 25> 169 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 170 %ret = zext <4 x i1> %cmp to <4 x i32> 171 ret <4 x i32> %ret 172} 173 174; Even divisors 175define <4 x i32> @test_urem_even_neg100(<4 x i32> %X) nounwind { 176; CHECK-SSE2-LABEL: test_urem_even_neg100: 177; CHECK-SSE2: # %bb.0: 178; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 179; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 180; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 181; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 182; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 183; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 184; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1 185; CHECK-SSE2-NEXT: psrld $2, %xmm1 186; CHECK-SSE2-NEXT: pslld $30, %xmm0 187; CHECK-SSE2-NEXT: por %xmm1, %xmm0 188; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 189; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 190; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 191; CHECK-SSE2-NEXT: retq 192; 193; CHECK-SSE41-LABEL: test_urem_even_neg100: 194; CHECK-SSE41: # %bb.0: 195; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 196; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1 197; CHECK-SSE41-NEXT: psrld $2, %xmm1 198; CHECK-SSE41-NEXT: pslld $30, %xmm0 199; CHECK-SSE41-NEXT: por %xmm1, %xmm0 200; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,42949672,1,42949672] 201; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 202; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 203; CHECK-SSE41-NEXT: psrld $31, %xmm0 204; CHECK-SSE41-NEXT: retq 205; 206; CHECK-AVX1-LABEL: test_urem_even_neg100: 207; CHECK-AVX1: # %bb.0: 208; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 209; CHECK-AVX1-NEXT: vpsrld $2, %xmm0, %xmm1 210; CHECK-AVX1-NEXT: vpslld $30, %xmm0, %xmm0 211; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 212; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 213; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 214; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 215; CHECK-AVX1-NEXT: retq 216; 217; CHECK-AVX2-LABEL: test_urem_even_neg100: 218; CHECK-AVX2: # %bb.0: 219; CHECK-AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 220; CHECK-AVX2-NEXT: vpsrld $2, %xmm0, %xmm1 221; CHECK-AVX2-NEXT: vpslld $30, %xmm0, %xmm0 222; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 223; CHECK-AVX2-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 224; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 225; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 226; CHECK-AVX2-NEXT: retq 227; 228; CHECK-AVX512VL-LABEL: test_urem_even_neg100: 229; CHECK-AVX512VL: # %bb.0: 230; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 231; CHECK-AVX512VL-NEXT: vprord $2, %xmm0, %xmm0 232; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 233; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 234; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 235; CHECK-AVX512VL-NEXT: retq 236 %urem = urem <4 x i32> %X, <i32 -100, i32 100, i32 -100, i32 100> 237 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 238 %ret = zext <4 x i1> %cmp to <4 x i32> 239 ret <4 x i32> %ret 240} 241 242;------------------------------------------------------------------------------; 243; Comparison constant has undef elements. 244;------------------------------------------------------------------------------; 245 246define <4 x i32> @test_urem_odd_undef1(<4 x i32> %X) nounwind { 247; CHECK-SSE2-LABEL: test_urem_odd_undef1: 248; CHECK-SSE2: # %bb.0: 249; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1374389535,1374389535,1374389535,1374389535] 250; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 251; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 252; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 253; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 254; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3 255; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3] 256; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 257; CHECK-SSE2-NEXT: psrld $3, %xmm2 258; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [25,25,25,25] 259; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 260; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 261; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 262; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3 263; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3] 264; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 265; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0 266; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 267; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 268; CHECK-SSE2-NEXT: psrld $31, %xmm0 269; CHECK-SSE2-NEXT: retq 270; 271; CHECK-SSE41-LABEL: test_urem_odd_undef1: 272; CHECK-SSE41: # %bb.0: 273; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 274; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535] 275; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm1 276; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm2 277; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 278; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 279; CHECK-SSE41-NEXT: psrld $3, %xmm2 280; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 281; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0 282; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 283; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 284; CHECK-SSE41-NEXT: psrld $31, %xmm0 285; CHECK-SSE41-NEXT: retq 286; 287; CHECK-AVX1-LABEL: test_urem_odd_undef1: 288; CHECK-AVX1: # %bb.0: 289; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 290; CHECK-AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535] 291; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 292; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 293; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 294; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 295; CHECK-AVX1-NEXT: vpsrld $3, %xmm1, %xmm1 296; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 297; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 298; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 299; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 300; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 301; CHECK-AVX1-NEXT: retq 302; 303; CHECK-AVX2-LABEL: test_urem_odd_undef1: 304; CHECK-AVX2: # %bb.0: 305; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 306; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535] 307; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 308; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 309; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 310; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3] 311; CHECK-AVX2-NEXT: vpsrld $3, %xmm1, %xmm1 312; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [25,25,25,25] 313; CHECK-AVX2-NEXT: vpmulld %xmm2, %xmm1, %xmm1 314; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 315; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 316; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 317; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 318; CHECK-AVX2-NEXT: retq 319; 320; CHECK-AVX512VL-LABEL: test_urem_odd_undef1: 321; CHECK-AVX512VL: # %bb.0: 322; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 323; CHECK-AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535] 324; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 325; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 326; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 327; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3] 328; CHECK-AVX512VL-NEXT: vpsrld $3, %xmm1, %xmm1 329; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1 330; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 331; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 332; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 333; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 334; CHECK-AVX512VL-NEXT: retq 335 %urem = urem <4 x i32> %X, <i32 25, i32 25, i32 25, i32 25> 336 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 undef, i32 0> 337 %ret = zext <4 x i1> %cmp to <4 x i32> 338 ret <4 x i32> %ret 339} 340 341define <4 x i32> @test_urem_even_undef1(<4 x i32> %X) nounwind { 342; CHECK-SSE2-LABEL: test_urem_even_undef1: 343; CHECK-SSE2: # %bb.0: 344; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1374389535,1374389535,1374389535,1374389535] 345; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 346; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 347; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 348; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 349; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3 350; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3] 351; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 352; CHECK-SSE2-NEXT: psrld $5, %xmm2 353; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [100,100,100,100] 354; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 355; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 356; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 357; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3 358; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3] 359; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 360; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0 361; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 362; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 363; CHECK-SSE2-NEXT: psrld $31, %xmm0 364; CHECK-SSE2-NEXT: retq 365; 366; CHECK-SSE41-LABEL: test_urem_even_undef1: 367; CHECK-SSE41: # %bb.0: 368; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 369; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535] 370; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm1 371; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm2 372; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 373; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 374; CHECK-SSE41-NEXT: psrld $5, %xmm2 375; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 376; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0 377; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 378; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 379; CHECK-SSE41-NEXT: psrld $31, %xmm0 380; CHECK-SSE41-NEXT: retq 381; 382; CHECK-AVX1-LABEL: test_urem_even_undef1: 383; CHECK-AVX1: # %bb.0: 384; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 385; CHECK-AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535] 386; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 387; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 388; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 389; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 390; CHECK-AVX1-NEXT: vpsrld $5, %xmm1, %xmm1 391; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 392; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 393; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 394; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 395; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 396; CHECK-AVX1-NEXT: retq 397; 398; CHECK-AVX2-LABEL: test_urem_even_undef1: 399; CHECK-AVX2: # %bb.0: 400; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 401; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535] 402; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 403; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 404; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 405; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3] 406; CHECK-AVX2-NEXT: vpsrld $5, %xmm1, %xmm1 407; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [100,100,100,100] 408; CHECK-AVX2-NEXT: vpmulld %xmm2, %xmm1, %xmm1 409; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 410; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 411; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 412; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 413; CHECK-AVX2-NEXT: retq 414; 415; CHECK-AVX512VL-LABEL: test_urem_even_undef1: 416; CHECK-AVX512VL: # %bb.0: 417; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 418; CHECK-AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535] 419; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 420; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 421; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 422; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3] 423; CHECK-AVX512VL-NEXT: vpsrld $5, %xmm1, %xmm1 424; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1 425; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 426; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 427; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 428; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 429; CHECK-AVX512VL-NEXT: retq 430 %urem = urem <4 x i32> %X, <i32 100, i32 100, i32 100, i32 100> 431 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 undef, i32 0> 432 %ret = zext <4 x i1> %cmp to <4 x i32> 433 ret <4 x i32> %ret 434} 435 436;------------------------------------------------------------------------------; 437; Negative tests 438;------------------------------------------------------------------------------; 439 440define <4 x i32> @test_urem_one_eq(<4 x i32> %X) nounwind { 441; CHECK-SSE-LABEL: test_urem_one_eq: 442; CHECK-SSE: # %bb.0: 443; CHECK-SSE-NEXT: movaps {{.*#+}} xmm0 = [1,1,1,1] 444; CHECK-SSE-NEXT: retq 445; 446; CHECK-AVX-LABEL: test_urem_one_eq: 447; CHECK-AVX: # %bb.0: 448; CHECK-AVX-NEXT: vbroadcastss {{.*#+}} xmm0 = [1,1,1,1] 449; CHECK-AVX-NEXT: retq 450 %urem = urem <4 x i32> %X, <i32 1, i32 1, i32 1, i32 1> 451 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 452 %ret = zext <4 x i1> %cmp to <4 x i32> 453 ret <4 x i32> %ret 454} 455define <4 x i32> @test_urem_one_ne(<4 x i32> %X) nounwind { 456; CHECK-SSE-LABEL: test_urem_one_ne: 457; CHECK-SSE: # %bb.0: 458; CHECK-SSE-NEXT: xorps %xmm0, %xmm0 459; CHECK-SSE-NEXT: retq 460; 461; CHECK-AVX-LABEL: test_urem_one_ne: 462; CHECK-AVX: # %bb.0: 463; CHECK-AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 464; CHECK-AVX-NEXT: retq 465 %urem = urem <4 x i32> %X, <i32 1, i32 1, i32 1, i32 1> 466 %cmp = icmp ne <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 467 %ret = zext <4 x i1> %cmp to <4 x i32> 468 ret <4 x i32> %ret 469} 470 471; We can lower remainder of division by powers of two much better elsewhere. 472define <4 x i32> @test_urem_pow2(<4 x i32> %X) nounwind { 473; CHECK-SSE-LABEL: test_urem_pow2: 474; CHECK-SSE: # %bb.0: 475; CHECK-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 476; CHECK-SSE-NEXT: pxor %xmm1, %xmm1 477; CHECK-SSE-NEXT: pcmpeqd %xmm1, %xmm0 478; CHECK-SSE-NEXT: psrld $31, %xmm0 479; CHECK-SSE-NEXT: retq 480; 481; CHECK-AVX1-LABEL: test_urem_pow2: 482; CHECK-AVX1: # %bb.0: 483; CHECK-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 484; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 485; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 486; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 487; CHECK-AVX1-NEXT: retq 488; 489; CHECK-AVX2-LABEL: test_urem_pow2: 490; CHECK-AVX2: # %bb.0: 491; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15] 492; CHECK-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 493; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 494; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 495; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 496; CHECK-AVX2-NEXT: retq 497; 498; CHECK-AVX512VL-LABEL: test_urem_pow2: 499; CHECK-AVX512VL: # %bb.0: 500; CHECK-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 501; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 502; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 503; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 504; CHECK-AVX512VL-NEXT: retq 505 %urem = urem <4 x i32> %X, <i32 16, i32 16, i32 16, i32 16> 506 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 507 %ret = zext <4 x i1> %cmp to <4 x i32> 508 ret <4 x i32> %ret 509} 510 511; We could lower remainder of division by INT_MIN much better elsewhere. 512define <4 x i32> @test_urem_int_min(<4 x i32> %X) nounwind { 513; CHECK-SSE-LABEL: test_urem_int_min: 514; CHECK-SSE: # %bb.0: 515; CHECK-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 516; CHECK-SSE-NEXT: pxor %xmm1, %xmm1 517; CHECK-SSE-NEXT: pcmpeqd %xmm1, %xmm0 518; CHECK-SSE-NEXT: psrld $31, %xmm0 519; CHECK-SSE-NEXT: retq 520; 521; CHECK-AVX1-LABEL: test_urem_int_min: 522; CHECK-AVX1: # %bb.0: 523; CHECK-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 524; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 525; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 526; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 527; CHECK-AVX1-NEXT: retq 528; 529; CHECK-AVX2-LABEL: test_urem_int_min: 530; CHECK-AVX2: # %bb.0: 531; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483647,2147483647,2147483647,2147483647] 532; CHECK-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 533; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 534; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 535; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 536; CHECK-AVX2-NEXT: retq 537; 538; CHECK-AVX512VL-LABEL: test_urem_int_min: 539; CHECK-AVX512VL: # %bb.0: 540; CHECK-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 541; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 542; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 543; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 544; CHECK-AVX512VL-NEXT: retq 545 %urem = urem <4 x i32> %X, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648> 546 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 547 %ret = zext <4 x i1> %cmp to <4 x i32> 548 ret <4 x i32> %ret 549} 550 551; We could lower remainder of division by all-ones much better elsewhere. 552define <4 x i32> @test_urem_allones(<4 x i32> %X) nounwind { 553; CHECK-SSE2-LABEL: test_urem_allones: 554; CHECK-SSE2: # %bb.0: 555; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 556; CHECK-SSE2-NEXT: psubd %xmm0, %xmm1 557; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 558; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 559; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 560; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm0 561; CHECK-SSE2-NEXT: retq 562; 563; CHECK-SSE41-LABEL: test_urem_allones: 564; CHECK-SSE41: # %bb.0: 565; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 566; CHECK-SSE41-NEXT: psubd %xmm0, %xmm1 567; CHECK-SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [1,1,1,1] 568; CHECK-SSE41-NEXT: pminud %xmm1, %xmm0 569; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 570; CHECK-SSE41-NEXT: psrld $31, %xmm0 571; CHECK-SSE41-NEXT: retq 572; 573; CHECK-AVX1-LABEL: test_urem_allones: 574; CHECK-AVX1: # %bb.0: 575; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 576; CHECK-AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm0 577; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 578; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 579; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 580; CHECK-AVX1-NEXT: retq 581; 582; CHECK-AVX2-LABEL: test_urem_allones: 583; CHECK-AVX2: # %bb.0: 584; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 585; CHECK-AVX2-NEXT: vpsubd %xmm0, %xmm1, %xmm0 586; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] 587; CHECK-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm1 588; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 589; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 590; CHECK-AVX2-NEXT: retq 591; 592; CHECK-AVX512VL-LABEL: test_urem_allones: 593; CHECK-AVX512VL: # %bb.0: 594; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 595; CHECK-AVX512VL-NEXT: vpsubd %xmm0, %xmm1, %xmm0 596; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm1 597; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 598; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 599; CHECK-AVX512VL-NEXT: retq 600 %urem = urem <4 x i32> %X, <i32 4294967295, i32 4294967295, i32 4294967295, i32 4294967295> 601 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 602 %ret = zext <4 x i1> %cmp to <4 x i32> 603 ret <4 x i32> %ret 604} 605