1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 6 7; PR33276 - https://bugs.llvm.org/show_bug.cgi?id=33276 8; If both operands of an unsigned icmp are known non-negative, then 9; we don't need to flip the sign bits in order to map to signed pcmpgt*. 10 11define <2 x i1> @ugt_v2i64(<2 x i64> %x, <2 x i64> %y) { 12; SSE2-LABEL: ugt_v2i64: 13; SSE2: # %bb.0: 14; SSE2-NEXT: psrlq $1, %xmm0 15; SSE2-NEXT: psrlq $1, %xmm1 16; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 17; SSE2-NEXT: pxor %xmm2, %xmm1 18; SSE2-NEXT: pxor %xmm2, %xmm0 19; SSE2-NEXT: movdqa %xmm0, %xmm2 20; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 21; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 22; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 23; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 24; SSE2-NEXT: pand %xmm3, %xmm1 25; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 26; SSE2-NEXT: por %xmm1, %xmm0 27; SSE2-NEXT: retq 28; 29; SSE41-LABEL: ugt_v2i64: 30; SSE41: # %bb.0: 31; SSE41-NEXT: psrlq $1, %xmm0 32; SSE41-NEXT: psrlq $1, %xmm1 33; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = [2147483648,2147483648] 34; SSE41-NEXT: pxor %xmm2, %xmm1 35; SSE41-NEXT: pxor %xmm2, %xmm0 36; SSE41-NEXT: movdqa %xmm0, %xmm2 37; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 38; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 39; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 40; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 41; SSE41-NEXT: pand %xmm3, %xmm1 42; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 43; SSE41-NEXT: por %xmm1, %xmm0 44; SSE41-NEXT: retq 45; 46; AVX-LABEL: ugt_v2i64: 47; AVX: # %bb.0: 48; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0 49; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1 50; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 51; AVX-NEXT: retq 52 %sh1 = lshr <2 x i64> %x, <i64 1, i64 1> 53 %sh2 = lshr <2 x i64> %y, <i64 1, i64 1> 54 %cmp = icmp ugt <2 x i64> %sh1, %sh2 55 ret <2 x i1> %cmp 56} 57 58define <2 x i1> @ult_v2i64(<2 x i64> %x, <2 x i64> %y) { 59; SSE2-LABEL: ult_v2i64: 60; SSE2: # %bb.0: 61; SSE2-NEXT: psrlq $1, %xmm0 62; SSE2-NEXT: psrlq $1, %xmm1 63; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 64; SSE2-NEXT: pxor %xmm2, %xmm0 65; SSE2-NEXT: pxor %xmm2, %xmm1 66; SSE2-NEXT: movdqa %xmm1, %xmm2 67; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 68; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 69; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 70; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 71; SSE2-NEXT: pand %xmm3, %xmm1 72; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 73; SSE2-NEXT: por %xmm1, %xmm0 74; SSE2-NEXT: retq 75; 76; SSE41-LABEL: ult_v2i64: 77; SSE41: # %bb.0: 78; SSE41-NEXT: psrlq $1, %xmm0 79; SSE41-NEXT: psrlq $1, %xmm1 80; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = [2147483648,2147483648] 81; SSE41-NEXT: pxor %xmm2, %xmm0 82; SSE41-NEXT: pxor %xmm2, %xmm1 83; SSE41-NEXT: movdqa %xmm1, %xmm2 84; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 85; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 86; SSE41-NEXT: pcmpeqd %xmm0, %xmm1 87; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 88; SSE41-NEXT: pand %xmm3, %xmm1 89; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 90; SSE41-NEXT: por %xmm1, %xmm0 91; SSE41-NEXT: retq 92; 93; AVX-LABEL: ult_v2i64: 94; AVX: # %bb.0: 95; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0 96; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1 97; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 98; AVX-NEXT: retq 99 %sh1 = lshr <2 x i64> %x, <i64 1, i64 1> 100 %sh2 = lshr <2 x i64> %y, <i64 1, i64 1> 101 %cmp = icmp ult <2 x i64> %sh1, %sh2 102 ret <2 x i1> %cmp 103} 104 105define <2 x i1> @uge_v2i64(<2 x i64> %x, <2 x i64> %y) { 106; SSE2-LABEL: uge_v2i64: 107; SSE2: # %bb.0: 108; SSE2-NEXT: psrlq $1, %xmm0 109; SSE2-NEXT: psrlq $1, %xmm1 110; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 111; SSE2-NEXT: pxor %xmm2, %xmm0 112; SSE2-NEXT: pxor %xmm2, %xmm1 113; SSE2-NEXT: movdqa %xmm1, %xmm2 114; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 115; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 116; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 117; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 118; SSE2-NEXT: pand %xmm3, %xmm0 119; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 120; SSE2-NEXT: por %xmm0, %xmm1 121; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 122; SSE2-NEXT: pxor %xmm1, %xmm0 123; SSE2-NEXT: retq 124; 125; SSE41-LABEL: uge_v2i64: 126; SSE41: # %bb.0: 127; SSE41-NEXT: psrlq $1, %xmm0 128; SSE41-NEXT: psrlq $1, %xmm1 129; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = [2147483648,2147483648] 130; SSE41-NEXT: pxor %xmm2, %xmm0 131; SSE41-NEXT: pxor %xmm2, %xmm1 132; SSE41-NEXT: movdqa %xmm1, %xmm2 133; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 134; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 135; SSE41-NEXT: pcmpeqd %xmm0, %xmm1 136; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 137; SSE41-NEXT: pand %xmm3, %xmm0 138; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 139; SSE41-NEXT: por %xmm0, %xmm1 140; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 141; SSE41-NEXT: pxor %xmm1, %xmm0 142; SSE41-NEXT: retq 143; 144; AVX-LABEL: uge_v2i64: 145; AVX: # %bb.0: 146; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0 147; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1 148; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 149; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 150; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 151; AVX-NEXT: retq 152 %sh1 = lshr <2 x i64> %x, <i64 1, i64 1> 153 %sh2 = lshr <2 x i64> %y, <i64 1, i64 1> 154 %cmp = icmp uge <2 x i64> %sh1, %sh2 155 ret <2 x i1> %cmp 156} 157 158define <2 x i1> @ule_v2i64(<2 x i64> %x, <2 x i64> %y) { 159; SSE2-LABEL: ule_v2i64: 160; SSE2: # %bb.0: 161; SSE2-NEXT: psrlq $1, %xmm0 162; SSE2-NEXT: psrlq $1, %xmm1 163; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 164; SSE2-NEXT: pxor %xmm2, %xmm1 165; SSE2-NEXT: pxor %xmm2, %xmm0 166; SSE2-NEXT: movdqa %xmm0, %xmm2 167; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 168; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 169; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 170; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 171; SSE2-NEXT: pand %xmm3, %xmm0 172; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 173; SSE2-NEXT: por %xmm0, %xmm1 174; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 175; SSE2-NEXT: pxor %xmm1, %xmm0 176; SSE2-NEXT: retq 177; 178; SSE41-LABEL: ule_v2i64: 179; SSE41: # %bb.0: 180; SSE41-NEXT: psrlq $1, %xmm0 181; SSE41-NEXT: psrlq $1, %xmm1 182; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = [2147483648,2147483648] 183; SSE41-NEXT: pxor %xmm2, %xmm1 184; SSE41-NEXT: pxor %xmm2, %xmm0 185; SSE41-NEXT: movdqa %xmm0, %xmm2 186; SSE41-NEXT: pcmpgtd %xmm1, %xmm2 187; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 188; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 189; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 190; SSE41-NEXT: pand %xmm3, %xmm0 191; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 192; SSE41-NEXT: por %xmm0, %xmm1 193; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 194; SSE41-NEXT: pxor %xmm1, %xmm0 195; SSE41-NEXT: retq 196; 197; AVX-LABEL: ule_v2i64: 198; AVX: # %bb.0: 199; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0 200; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1 201; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 202; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 203; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 204; AVX-NEXT: retq 205 %sh1 = lshr <2 x i64> %x, <i64 1, i64 1> 206 %sh2 = lshr <2 x i64> %y, <i64 1, i64 1> 207 %cmp = icmp ule <2 x i64> %sh1, %sh2 208 ret <2 x i1> %cmp 209} 210 211define <4 x i1> @ugt_v4i32(<4 x i32> %x, <4 x i32> %y) { 212; SSE-LABEL: ugt_v4i32: 213; SSE: # %bb.0: 214; SSE-NEXT: psrld $1, %xmm0 215; SSE-NEXT: psrld $1, %xmm1 216; SSE-NEXT: pcmpgtd %xmm1, %xmm0 217; SSE-NEXT: retq 218; 219; AVX-LABEL: ugt_v4i32: 220; AVX: # %bb.0: 221; AVX-NEXT: vpsrld $1, %xmm0, %xmm0 222; AVX-NEXT: vpsrld $1, %xmm1, %xmm1 223; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 224; AVX-NEXT: retq 225 %sh1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1> 226 %sh2 = lshr <4 x i32> %y, <i32 1, i32 1, i32 1, i32 1> 227 %cmp = icmp ugt <4 x i32> %sh1, %sh2 228 ret <4 x i1> %cmp 229} 230 231define <4 x i1> @ult_v4i32(<4 x i32> %x, <4 x i32> %y) { 232; SSE-LABEL: ult_v4i32: 233; SSE: # %bb.0: 234; SSE-NEXT: psrld $1, %xmm0 235; SSE-NEXT: psrld $1, %xmm1 236; SSE-NEXT: pcmpgtd %xmm0, %xmm1 237; SSE-NEXT: movdqa %xmm1, %xmm0 238; SSE-NEXT: retq 239; 240; AVX-LABEL: ult_v4i32: 241; AVX: # %bb.0: 242; AVX-NEXT: vpsrld $1, %xmm0, %xmm0 243; AVX-NEXT: vpsrld $1, %xmm1, %xmm1 244; AVX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 245; AVX-NEXT: retq 246 %sh1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1> 247 %sh2 = lshr <4 x i32> %y, <i32 1, i32 1, i32 1, i32 1> 248 %cmp = icmp ult <4 x i32> %sh1, %sh2 249 ret <4 x i1> %cmp 250} 251 252define <4 x i1> @uge_v4i32(<4 x i32> %x, <4 x i32> %y) { 253; SSE2-LABEL: uge_v4i32: 254; SSE2: # %bb.0: 255; SSE2-NEXT: psrld $1, %xmm0 256; SSE2-NEXT: psrld $1, %xmm1 257; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 258; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 259; SSE2-NEXT: pxor %xmm1, %xmm0 260; SSE2-NEXT: retq 261; 262; SSE41-LABEL: uge_v4i32: 263; SSE41: # %bb.0: 264; SSE41-NEXT: psrld $1, %xmm0 265; SSE41-NEXT: psrld $1, %xmm1 266; SSE41-NEXT: pmaxud %xmm0, %xmm1 267; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 268; SSE41-NEXT: retq 269; 270; AVX-LABEL: uge_v4i32: 271; AVX: # %bb.0: 272; AVX-NEXT: vpsrld $1, %xmm0, %xmm0 273; AVX-NEXT: vpsrld $1, %xmm1, %xmm1 274; AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm1 275; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 276; AVX-NEXT: retq 277 %sh1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1> 278 %sh2 = lshr <4 x i32> %y, <i32 1, i32 1, i32 1, i32 1> 279 %cmp = icmp uge <4 x i32> %sh1, %sh2 280 ret <4 x i1> %cmp 281} 282 283define <4 x i1> @ule_v4i32(<4 x i32> %x, <4 x i32> %y) { 284; SSE2-LABEL: ule_v4i32: 285; SSE2: # %bb.0: 286; SSE2-NEXT: psrld $1, %xmm0 287; SSE2-NEXT: psrld $1, %xmm1 288; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 289; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 290; SSE2-NEXT: pxor %xmm1, %xmm0 291; SSE2-NEXT: retq 292; 293; SSE41-LABEL: ule_v4i32: 294; SSE41: # %bb.0: 295; SSE41-NEXT: psrld $1, %xmm0 296; SSE41-NEXT: psrld $1, %xmm1 297; SSE41-NEXT: pminud %xmm0, %xmm1 298; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 299; SSE41-NEXT: retq 300; 301; AVX-LABEL: ule_v4i32: 302; AVX: # %bb.0: 303; AVX-NEXT: vpsrld $1, %xmm0, %xmm0 304; AVX-NEXT: vpsrld $1, %xmm1, %xmm1 305; AVX-NEXT: vpminud %xmm1, %xmm0, %xmm1 306; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 307; AVX-NEXT: retq 308 %sh1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1> 309 %sh2 = lshr <4 x i32> %y, <i32 1, i32 1, i32 1, i32 1> 310 %cmp = icmp ule <4 x i32> %sh1, %sh2 311 ret <4 x i1> %cmp 312} 313 314define <8 x i1> @ugt_v8i16(<8 x i16> %x, <8 x i16> %y) { 315; SSE-LABEL: ugt_v8i16: 316; SSE: # %bb.0: 317; SSE-NEXT: psrlw $1, %xmm0 318; SSE-NEXT: psrlw $1, %xmm1 319; SSE-NEXT: pcmpgtw %xmm1, %xmm0 320; SSE-NEXT: retq 321; 322; AVX-LABEL: ugt_v8i16: 323; AVX: # %bb.0: 324; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0 325; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1 326; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 327; AVX-NEXT: retq 328 %sh1 = lshr <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 329 %sh2 = lshr <8 x i16> %y, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 330 %cmp = icmp ugt <8 x i16> %sh1, %sh2 331 ret <8 x i1> %cmp 332} 333 334define <8 x i1> @ult_v8i16(<8 x i16> %x, <8 x i16> %y) { 335; SSE-LABEL: ult_v8i16: 336; SSE: # %bb.0: 337; SSE-NEXT: psrlw $1, %xmm0 338; SSE-NEXT: psrlw $1, %xmm1 339; SSE-NEXT: pcmpgtw %xmm0, %xmm1 340; SSE-NEXT: movdqa %xmm1, %xmm0 341; SSE-NEXT: retq 342; 343; AVX-LABEL: ult_v8i16: 344; AVX: # %bb.0: 345; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0 346; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1 347; AVX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 348; AVX-NEXT: retq 349 %sh1 = lshr <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 350 %sh2 = lshr <8 x i16> %y, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 351 %cmp = icmp ult <8 x i16> %sh1, %sh2 352 ret <8 x i1> %cmp 353} 354 355define <8 x i1> @uge_v8i16(<8 x i16> %x, <8 x i16> %y) { 356; SSE2-LABEL: uge_v8i16: 357; SSE2: # %bb.0: 358; SSE2-NEXT: psrlw $1, %xmm0 359; SSE2-NEXT: psrlw $1, %xmm1 360; SSE2-NEXT: pcmpgtw %xmm0, %xmm1 361; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 362; SSE2-NEXT: pxor %xmm1, %xmm0 363; SSE2-NEXT: retq 364; 365; SSE41-LABEL: uge_v8i16: 366; SSE41: # %bb.0: 367; SSE41-NEXT: psrlw $1, %xmm0 368; SSE41-NEXT: psrlw $1, %xmm1 369; SSE41-NEXT: pmaxuw %xmm0, %xmm1 370; SSE41-NEXT: pcmpeqw %xmm1, %xmm0 371; SSE41-NEXT: retq 372; 373; AVX-LABEL: uge_v8i16: 374; AVX: # %bb.0: 375; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0 376; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1 377; AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1 378; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 379; AVX-NEXT: retq 380 %sh1 = lshr <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 381 %sh2 = lshr <8 x i16> %y, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 382 %cmp = icmp uge <8 x i16> %sh1, %sh2 383 ret <8 x i1> %cmp 384} 385 386define <8 x i1> @ule_v8i16(<8 x i16> %x, <8 x i16> %y) { 387; SSE2-LABEL: ule_v8i16: 388; SSE2: # %bb.0: 389; SSE2-NEXT: psrlw $1, %xmm0 390; SSE2-NEXT: psrlw $1, %xmm1 391; SSE2-NEXT: pcmpgtw %xmm1, %xmm0 392; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 393; SSE2-NEXT: pxor %xmm1, %xmm0 394; SSE2-NEXT: retq 395; 396; SSE41-LABEL: ule_v8i16: 397; SSE41: # %bb.0: 398; SSE41-NEXT: psrlw $1, %xmm0 399; SSE41-NEXT: psrlw $1, %xmm1 400; SSE41-NEXT: pminuw %xmm0, %xmm1 401; SSE41-NEXT: pcmpeqw %xmm1, %xmm0 402; SSE41-NEXT: retq 403; 404; AVX-LABEL: ule_v8i16: 405; AVX: # %bb.0: 406; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0 407; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1 408; AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm1 409; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 410; AVX-NEXT: retq 411 %sh1 = lshr <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 412 %sh2 = lshr <8 x i16> %y, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 413 %cmp = icmp ule <8 x i16> %sh1, %sh2 414 ret <8 x i1> %cmp 415} 416 417define <16 x i1> @ugt_v16i8(<16 x i8> %x, <16 x i8> %y) { 418; SSE-LABEL: ugt_v16i8: 419; SSE: # %bb.0: 420; SSE-NEXT: psrlw $1, %xmm0 421; SSE-NEXT: movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 422; SSE-NEXT: pand %xmm2, %xmm0 423; SSE-NEXT: psrlw $1, %xmm1 424; SSE-NEXT: pand %xmm2, %xmm1 425; SSE-NEXT: pcmpgtb %xmm1, %xmm0 426; SSE-NEXT: retq 427; 428; AVX1-LABEL: ugt_v16i8: 429; AVX1: # %bb.0: 430; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm0 431; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 432; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 433; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm1 434; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 435; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 436; AVX1-NEXT: retq 437; 438; AVX2-LABEL: ugt_v16i8: 439; AVX2: # %bb.0: 440; AVX2-NEXT: vpsrlw $1, %xmm0, %xmm0 441; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 442; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 443; AVX2-NEXT: vpsrlw $1, %xmm1, %xmm1 444; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 445; AVX2-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 446; AVX2-NEXT: retq 447 %sh1 = lshr <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 448 %sh2 = lshr <16 x i8> %y, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 449 %cmp = icmp ugt <16 x i8> %sh1, %sh2 450 ret <16 x i1> %cmp 451} 452 453define <16 x i1> @ult_v16i8(<16 x i8> %x, <16 x i8> %y) { 454; SSE-LABEL: ult_v16i8: 455; SSE: # %bb.0: 456; SSE-NEXT: psrlw $1, %xmm0 457; SSE-NEXT: movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 458; SSE-NEXT: pand %xmm2, %xmm0 459; SSE-NEXT: psrlw $1, %xmm1 460; SSE-NEXT: pand %xmm1, %xmm2 461; SSE-NEXT: pcmpgtb %xmm0, %xmm2 462; SSE-NEXT: movdqa %xmm2, %xmm0 463; SSE-NEXT: retq 464; 465; AVX1-LABEL: ult_v16i8: 466; AVX1: # %bb.0: 467; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm0 468; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 469; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 470; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm1 471; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 472; AVX1-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 473; AVX1-NEXT: retq 474; 475; AVX2-LABEL: ult_v16i8: 476; AVX2: # %bb.0: 477; AVX2-NEXT: vpsrlw $1, %xmm0, %xmm0 478; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 479; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 480; AVX2-NEXT: vpsrlw $1, %xmm1, %xmm1 481; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 482; AVX2-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 483; AVX2-NEXT: retq 484 %sh1 = lshr <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 485 %sh2 = lshr <16 x i8> %y, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 486 %cmp = icmp ult <16 x i8> %sh1, %sh2 487 ret <16 x i1> %cmp 488} 489 490define <16 x i1> @uge_v16i8(<16 x i8> %x, <16 x i8> %y) { 491; SSE-LABEL: uge_v16i8: 492; SSE: # %bb.0: 493; SSE-NEXT: psrlw $1, %xmm0 494; SSE-NEXT: movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 495; SSE-NEXT: pand %xmm2, %xmm0 496; SSE-NEXT: psrlw $1, %xmm1 497; SSE-NEXT: pand %xmm1, %xmm2 498; SSE-NEXT: pmaxub %xmm0, %xmm2 499; SSE-NEXT: pcmpeqb %xmm2, %xmm0 500; SSE-NEXT: retq 501; 502; AVX1-LABEL: uge_v16i8: 503; AVX1: # %bb.0: 504; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm0 505; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 506; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 507; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm1 508; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 509; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm1 510; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 511; AVX1-NEXT: retq 512; 513; AVX2-LABEL: uge_v16i8: 514; AVX2: # %bb.0: 515; AVX2-NEXT: vpsrlw $1, %xmm0, %xmm0 516; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 517; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 518; AVX2-NEXT: vpsrlw $1, %xmm1, %xmm1 519; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 520; AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm1 521; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 522; AVX2-NEXT: retq 523 %sh1 = lshr <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 524 %sh2 = lshr <16 x i8> %y, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 525 %cmp = icmp uge <16 x i8> %sh1, %sh2 526 ret <16 x i1> %cmp 527} 528 529define <16 x i1> @ule_v16i8(<16 x i8> %x, <16 x i8> %y) { 530; SSE-LABEL: ule_v16i8: 531; SSE: # %bb.0: 532; SSE-NEXT: psrlw $1, %xmm0 533; SSE-NEXT: movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 534; SSE-NEXT: pand %xmm2, %xmm0 535; SSE-NEXT: psrlw $1, %xmm1 536; SSE-NEXT: pand %xmm1, %xmm2 537; SSE-NEXT: pminub %xmm0, %xmm2 538; SSE-NEXT: pcmpeqb %xmm2, %xmm0 539; SSE-NEXT: retq 540; 541; AVX1-LABEL: ule_v16i8: 542; AVX1: # %bb.0: 543; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm0 544; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 545; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 546; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm1 547; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 548; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm1 549; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 550; AVX1-NEXT: retq 551; 552; AVX2-LABEL: ule_v16i8: 553; AVX2: # %bb.0: 554; AVX2-NEXT: vpsrlw $1, %xmm0, %xmm0 555; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 556; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 557; AVX2-NEXT: vpsrlw $1, %xmm1, %xmm1 558; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 559; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm1 560; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 561; AVX2-NEXT: retq 562 %sh1 = lshr <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 563 %sh2 = lshr <16 x i8> %y, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 564 %cmp = icmp ule <16 x i8> %sh1, %sh2 565 ret <16 x i1> %cmp 566} 567 568define <8 x i16> @PR47448_uge(i16 signext %0) { 569; SSE2-LABEL: PR47448_uge: 570; SSE2: # %bb.0: 571; SSE2-NEXT: andl $7, %edi 572; SSE2-NEXT: movd %edi, %xmm0 573; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 574; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 575; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7] 576; SSE2-NEXT: pcmpgtw %xmm0, %xmm1 577; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 578; SSE2-NEXT: pxor %xmm1, %xmm0 579; SSE2-NEXT: retq 580; 581; SSE41-LABEL: PR47448_uge: 582; SSE41: # %bb.0: 583; SSE41-NEXT: andl $7, %edi 584; SSE41-NEXT: movd %edi, %xmm0 585; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 586; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] 587; SSE41-NEXT: pmovsxbw {{.*#+}} xmm0 = [0,1,2,3,4,5,6,7] 588; SSE41-NEXT: pmaxuw %xmm1, %xmm0 589; SSE41-NEXT: pcmpeqw %xmm1, %xmm0 590; SSE41-NEXT: retq 591; 592; AVX1-LABEL: PR47448_uge: 593; AVX1: # %bb.0: 594; AVX1-NEXT: andl $7, %edi 595; AVX1-NEXT: vmovd %edi, %xmm0 596; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 597; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 598; AVX1-NEXT: vpmaxuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 599; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 600; AVX1-NEXT: retq 601; 602; AVX2-LABEL: PR47448_uge: 603; AVX2: # %bb.0: 604; AVX2-NEXT: andl $7, %edi 605; AVX2-NEXT: vmovd %edi, %xmm0 606; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 607; AVX2-NEXT: vpmaxuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 608; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 609; AVX2-NEXT: retq 610 %2 = and i16 %0, 7 611 %3 = insertelement <8 x i16> undef, i16 %2, i32 0 612 %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> zeroinitializer 613 %5 = icmp uge <8 x i16> %4, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7> 614 %6 = sext <8 x i1> %5 to <8 x i16> 615 ret <8 x i16> %6 616} 617 618define <8 x i16> @PR47448_ugt(i16 signext %0) { 619; SSE-LABEL: PR47448_ugt: 620; SSE: # %bb.0: 621; SSE-NEXT: andl $7, %edi 622; SSE-NEXT: movd %edi, %xmm0 623; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 624; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 625; SSE-NEXT: pcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 626; SSE-NEXT: retq 627; 628; AVX1-LABEL: PR47448_ugt: 629; AVX1: # %bb.0: 630; AVX1-NEXT: andl $7, %edi 631; AVX1-NEXT: vmovd %edi, %xmm0 632; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 633; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 634; AVX1-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 635; AVX1-NEXT: retq 636; 637; AVX2-LABEL: PR47448_ugt: 638; AVX2: # %bb.0: 639; AVX2-NEXT: andl $7, %edi 640; AVX2-NEXT: vmovd %edi, %xmm0 641; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 642; AVX2-NEXT: vpcmpgtw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 643; AVX2-NEXT: retq 644 %2 = and i16 %0, 7 645 %3 = insertelement <8 x i16> undef, i16 %2, i32 0 646 %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> zeroinitializer 647 %5 = icmp ugt <8 x i16> %4, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7> 648 %6 = sext <8 x i1> %5 to <8 x i16> 649 ret <8 x i16> %6 650} 651 652; Recognise the knownbits from X86ISD::AND in previous block. 653define void @PR54171(ptr %mask0, ptr %mask1, i64 %i) { 654; SSE-LABEL: PR54171: 655; SSE: # %bb.0: # %entry 656; SSE-NEXT: andq $7, %rdx 657; SSE-NEXT: je .LBB18_2 658; SSE-NEXT: # %bb.1: # %if.then 659; SSE-NEXT: movd %edx, %xmm0 660; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 661; SSE-NEXT: movdqa %xmm0, %xmm1 662; SSE-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 663; SSE-NEXT: movdqa %xmm0, %xmm2 664; SSE-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 665; SSE-NEXT: movdqa %xmm2, (%rdi) 666; SSE-NEXT: movdqa %xmm1, 16(%rdi) 667; SSE-NEXT: movdqa %xmm0, %xmm1 668; SSE-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 669; SSE-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 670; SSE-NEXT: movdqa %xmm0, (%rsi) 671; SSE-NEXT: movdqa %xmm1, 16(%rsi) 672; SSE-NEXT: .LBB18_2: # %if.end 673; SSE-NEXT: retq 674; 675; AVX1-LABEL: PR54171: 676; AVX1: # %bb.0: # %entry 677; AVX1-NEXT: andq $7, %rdx 678; AVX1-NEXT: je .LBB18_2 679; AVX1-NEXT: # %bb.1: # %if.then 680; AVX1-NEXT: vmovd %edx, %xmm0 681; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 682; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 683; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 684; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [0.0E+0,0.0E+0,1.0E+0,1.0E+0,2.0E+0,2.0E+0,3.0E+0,3.0E+0] 685; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm1 686; AVX1-NEXT: vmovaps %ymm1, (%rdi) 687; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [4.0E+0,4.0E+0,5.0E+0,5.0E+0,6.0E+0,6.0E+0,7.0E+0,7.0E+0] 688; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 689; AVX1-NEXT: vmovaps %ymm0, (%rsi) 690; AVX1-NEXT: .LBB18_2: # %if.end 691; AVX1-NEXT: vzeroupper 692; AVX1-NEXT: retq 693; 694; AVX2-LABEL: PR54171: 695; AVX2: # %bb.0: # %entry 696; AVX2-NEXT: andq $7, %rdx 697; AVX2-NEXT: je .LBB18_2 698; AVX2-NEXT: # %bb.1: # %if.then 699; AVX2-NEXT: vmovd %edx, %xmm0 700; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0 701; AVX2-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 702; AVX2-NEXT: vmovdqa %ymm1, (%rdi) 703; AVX2-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 704; AVX2-NEXT: vmovdqa %ymm0, (%rsi) 705; AVX2-NEXT: .LBB18_2: # %if.end 706; AVX2-NEXT: vzeroupper 707; AVX2-NEXT: retq 708entry: 709 %sub = and i64 %i, 7 710 %cmp.not = icmp eq i64 %sub, 0 711 br i1 %cmp.not, label %if.end, label %if.then 712 713if.then: 714 %conv = trunc i64 %sub to i32 715 %vecinit.i.i = insertelement <8 x i32> undef, i32 %conv, i64 0 716 %vecinit7.i.i = shufflevector <8 x i32> %vecinit.i.i, <8 x i32> poison, <8 x i32> zeroinitializer 717 %cmp.i = icmp ugt <8 x i32> %vecinit7.i.i, <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3> 718 %sext.i = sext <8 x i1> %cmp.i to <8 x i32> 719 store <8 x i32> %sext.i, ptr %mask0, align 32 720 %cmp.i18 = icmp ugt <8 x i32> %vecinit7.i.i, <i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7> 721 %sext.i19 = sext <8 x i1> %cmp.i18 to <8 x i32> 722 store <8 x i32> %sext.i19, ptr %mask1, align 32 723 br label %if.end 724 725if.end: 726 ret void 727} 728