1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -o - -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2 3; RUN: llc < %s -o - -mtriple=x86_64-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE41 4 5; For a setult against a constant, turn it into a setule and lower via psubusw. 6 7define void @loop_no_const_reload(ptr %in, ptr %out, i32 %n) { 8; SSE2-LABEL: loop_no_const_reload: 9; SSE2: ## %bb.0: ## %entry 10; SSE2-NEXT: testl %edx, %edx 11; SSE2-NEXT: je LBB0_3 12; SSE2-NEXT: ## %bb.1: ## %for.body.preheader 13; SSE2-NEXT: xorl %eax, %eax 14; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [25,25,25,25,25,25,25,25] 15; SSE2-NEXT: pxor %xmm1, %xmm1 16; SSE2-NEXT: .p2align 4 17; SSE2-NEXT: LBB0_2: ## %for.body 18; SSE2-NEXT: ## =>This Inner Loop Header: Depth=1 19; SSE2-NEXT: movdqa (%rdi,%rax), %xmm2 20; SSE2-NEXT: psubusw %xmm0, %xmm2 21; SSE2-NEXT: pcmpeqw %xmm1, %xmm2 22; SSE2-NEXT: movdqa %xmm2, (%rsi,%rax) 23; SSE2-NEXT: addq $16, %rax 24; SSE2-NEXT: decl %edx 25; SSE2-NEXT: jne LBB0_2 26; SSE2-NEXT: LBB0_3: ## %for.end 27; SSE2-NEXT: retq 28; 29; SSE41-LABEL: loop_no_const_reload: 30; SSE41: ## %bb.0: ## %entry 31; SSE41-NEXT: testl %edx, %edx 32; SSE41-NEXT: je LBB0_3 33; SSE41-NEXT: ## %bb.1: ## %for.body.preheader 34; SSE41-NEXT: xorl %eax, %eax 35; SSE41-NEXT: pmovsxbw {{.*#+}} xmm0 = [25,25,25,25,25,25,25,25] 36; SSE41-NEXT: .p2align 4 37; SSE41-NEXT: LBB0_2: ## %for.body 38; SSE41-NEXT: ## =>This Inner Loop Header: Depth=1 39; SSE41-NEXT: movdqa (%rdi,%rax), %xmm1 40; SSE41-NEXT: movdqa %xmm1, %xmm2 41; SSE41-NEXT: pminuw %xmm0, %xmm2 42; SSE41-NEXT: pcmpeqw %xmm1, %xmm2 43; SSE41-NEXT: movdqa %xmm2, (%rsi,%rax) 44; SSE41-NEXT: addq $16, %rax 45; SSE41-NEXT: decl %edx 46; SSE41-NEXT: jne LBB0_2 47; SSE41-NEXT: LBB0_3: ## %for.end 48; SSE41-NEXT: retq 49entry: 50 %cmp9 = icmp eq i32 %n, 0 51 br i1 %cmp9, label %for.end, label %for.body 52 53for.body: ; preds = %for.body, %entry 54 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 55 %arrayidx1 = getelementptr inbounds <2 x i64>, ptr %in, i64 %indvars.iv 56 %arrayidx1.val = load <2 x i64>, ptr %arrayidx1, align 16 57 %0 = bitcast <2 x i64> %arrayidx1.val to <8 x i16> 58 %cmp.i.i = icmp ult <8 x i16> %0, <i16 26, i16 26, i16 26, i16 26, i16 26, i16 26, i16 26, i16 26> 59 %sext.i.i = sext <8 x i1> %cmp.i.i to <8 x i16> 60 %1 = bitcast <8 x i16> %sext.i.i to <2 x i64> 61 %arrayidx5 = getelementptr inbounds <2 x i64>, ptr %out, i64 %indvars.iv 62 store <2 x i64> %1, ptr %arrayidx5, align 16 63 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 64 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 65 %exitcond = icmp eq i32 %lftr.wideiv, %n 66 br i1 %exitcond, label %for.end, label %for.body 67 68for.end: ; preds = %for.body, %entry 69 ret void 70} 71 72; Be careful if decrementing the constant would undeflow. 73 74define void @loop_const_folding_underflow(ptr %in, ptr %out, i32 %n) { 75; SSE2-LABEL: loop_const_folding_underflow: 76; SSE2: ## %bb.0: ## %entry 77; SSE2-NEXT: testl %edx, %edx 78; SSE2-NEXT: je LBB1_3 79; SSE2-NEXT: ## %bb.1: ## %for.body.preheader 80; SSE2-NEXT: xorl %eax, %eax 81; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [32768,32768,32768,32768,32768,32768,32768,32768] 82; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [32768,32794,32794,32794,32794,32794,32794,32794] 83; SSE2-NEXT: .p2align 4 84; SSE2-NEXT: LBB1_2: ## %for.body 85; SSE2-NEXT: ## =>This Inner Loop Header: Depth=1 86; SSE2-NEXT: movdqa (%rdi,%rax), %xmm2 87; SSE2-NEXT: pxor %xmm0, %xmm2 88; SSE2-NEXT: movdqa %xmm1, %xmm3 89; SSE2-NEXT: pcmpgtw %xmm2, %xmm3 90; SSE2-NEXT: movdqa %xmm3, (%rsi,%rax) 91; SSE2-NEXT: addq $16, %rax 92; SSE2-NEXT: decl %edx 93; SSE2-NEXT: jne LBB1_2 94; SSE2-NEXT: LBB1_3: ## %for.end 95; SSE2-NEXT: retq 96; 97; SSE41-LABEL: loop_const_folding_underflow: 98; SSE41: ## %bb.0: ## %entry 99; SSE41-NEXT: testl %edx, %edx 100; SSE41-NEXT: je LBB1_3 101; SSE41-NEXT: ## %bb.1: ## %for.body.preheader 102; SSE41-NEXT: xorl %eax, %eax 103; SSE41-NEXT: pmovsxbw {{.*#+}} xmm0 = [0,26,26,26,26,26,26,26] 104; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 105; SSE41-NEXT: .p2align 4 106; SSE41-NEXT: LBB1_2: ## %for.body 107; SSE41-NEXT: ## =>This Inner Loop Header: Depth=1 108; SSE41-NEXT: movdqa (%rdi,%rax), %xmm2 109; SSE41-NEXT: movdqa %xmm2, %xmm3 110; SSE41-NEXT: pmaxuw %xmm0, %xmm3 111; SSE41-NEXT: pcmpeqw %xmm2, %xmm3 112; SSE41-NEXT: pxor %xmm1, %xmm3 113; SSE41-NEXT: movdqa %xmm3, (%rsi,%rax) 114; SSE41-NEXT: addq $16, %rax 115; SSE41-NEXT: decl %edx 116; SSE41-NEXT: jne LBB1_2 117; SSE41-NEXT: LBB1_3: ## %for.end 118; SSE41-NEXT: retq 119entry: 120 %cmp9 = icmp eq i32 %n, 0 121 br i1 %cmp9, label %for.end, label %for.body 122 123for.body: ; preds = %for.body, %entry 124 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 125 %arrayidx1 = getelementptr inbounds <2 x i64>, ptr %in, i64 %indvars.iv 126 %arrayidx1.val = load <2 x i64>, ptr %arrayidx1, align 16 127 %0 = bitcast <2 x i64> %arrayidx1.val to <8 x i16> 128 %cmp.i.i = icmp ult <8 x i16> %0, <i16 0, i16 26, i16 26, i16 26, i16 26, i16 26, i16 26, i16 26> 129 %sext.i.i = sext <8 x i1> %cmp.i.i to <8 x i16> 130 %1 = bitcast <8 x i16> %sext.i.i to <2 x i64> 131 %arrayidx5 = getelementptr inbounds <2 x i64>, ptr %out, i64 %indvars.iv 132 store <2 x i64> %1, ptr %arrayidx5, align 16 133 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 134 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 135 %exitcond = icmp eq i32 %lftr.wideiv, %n 136 br i1 %exitcond, label %for.end, label %for.body 137 138for.end: ; preds = %for.body, %entry 139 ret void 140} 141 142; Test for PSUBUSB 143 144define <16 x i8> @test_ult_byte(<16 x i8> %a) { 145; CHECK-LABEL: test_ult_byte: 146; CHECK: ## %bb.0: ## %entry 147; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10] 148; CHECK-NEXT: pminub %xmm0, %xmm1 149; CHECK-NEXT: pcmpeqb %xmm1, %xmm0 150; CHECK-NEXT: retq 151entry: 152 %icmp = icmp ult <16 x i8> %a, <i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11> 153 %sext = sext <16 x i1> %icmp to <16 x i8> 154 ret <16 x i8> %sext 155} 156 157; Only do this when we can turn the comparison into a setule. I.e. not for 158; register operands. 159 160define <8 x i16> @test_ult_register(<8 x i16> %a, <8 x i16> %b) { 161; SSE2-LABEL: test_ult_register: 162; SSE2: ## %bb.0: ## %entry 163; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] 164; SSE2-NEXT: pxor %xmm2, %xmm0 165; SSE2-NEXT: pxor %xmm1, %xmm2 166; SSE2-NEXT: pcmpgtw %xmm0, %xmm2 167; SSE2-NEXT: movdqa %xmm2, %xmm0 168; SSE2-NEXT: retq 169; 170; SSE41-LABEL: test_ult_register: 171; SSE41: ## %bb.0: ## %entry 172; SSE41-NEXT: pmaxuw %xmm0, %xmm1 173; SSE41-NEXT: pcmpeqw %xmm1, %xmm0 174; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 175; SSE41-NEXT: pxor %xmm1, %xmm0 176; SSE41-NEXT: retq 177entry: 178 %icmp = icmp ult <8 x i16> %a, %b 179 %sext = sext <8 x i1> %icmp to <8 x i16> 180 ret <8 x i16> %sext 181} 182 183define <16 x i1> @ugt_v16i8_splat(<16 x i8> %x) { 184; CHECK-LABEL: ugt_v16i8_splat: 185; CHECK: ## %bb.0: 186; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [43,43,43,43,43,43,43,43,43,43,43,43,43,43,43,43] 187; CHECK-NEXT: pmaxub %xmm0, %xmm1 188; CHECK-NEXT: pcmpeqb %xmm1, %xmm0 189; CHECK-NEXT: retq 190 %cmp = icmp ugt <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42> 191 ret <16 x i1> %cmp 192} 193 194define <8 x i1> @ugt_v8i16_splat(<8 x i16> %x) { 195; SSE2-LABEL: ugt_v8i16_splat: 196; SSE2: ## %bb.0: 197; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [243,243,243,243,243,243,243,243] 198; SSE2-NEXT: psubusw %xmm0, %xmm1 199; SSE2-NEXT: pxor %xmm0, %xmm0 200; SSE2-NEXT: pcmpeqw %xmm1, %xmm0 201; SSE2-NEXT: retq 202; 203; SSE41-LABEL: ugt_v8i16_splat: 204; SSE41: ## %bb.0: 205; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = [243,243,243,243,243,243,243,243] 206; SSE41-NEXT: pmaxuw %xmm0, %xmm1 207; SSE41-NEXT: pcmpeqw %xmm1, %xmm0 208; SSE41-NEXT: retq 209 %cmp = icmp ugt <8 x i16> %x, <i16 242, i16 242, i16 242, i16 242, i16 242, i16 242, i16 242, i16 242> 210 ret <8 x i1> %cmp 211} 212 213define <4 x i1> @ugt_v4i32_splat(<4 x i32> %x) { 214; SSE2-LABEL: ugt_v4i32_splat: 215; SSE2: ## %bb.0: 216; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 217; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 218; SSE2-NEXT: retq 219; 220; SSE41-LABEL: ugt_v4i32_splat: 221; SSE41: ## %bb.0: 222; SSE41-NEXT: pmovsxbd {{.*#+}} xmm1 = [4294967255,4294967255,4294967255,4294967255] 223; SSE41-NEXT: pmaxud %xmm0, %xmm1 224; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 225; SSE41-NEXT: retq 226 %cmp = icmp ugt <4 x i32> %x, <i32 -42, i32 -42, i32 -42, i32 -42> 227 ret <4 x i1> %cmp 228} 229 230define <2 x i1> @ugt_v2i64_splat(<2 x i64> %x) { 231; SSE2-LABEL: ugt_v2i64_splat: 232; SSE2: ## %bb.0: 233; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 234; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 235; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 236; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2] 237; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 238; SSE2-NEXT: pand %xmm2, %xmm1 239; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 240; SSE2-NEXT: por %xmm1, %xmm0 241; SSE2-NEXT: retq 242; 243; SSE41-LABEL: ugt_v2i64_splat: 244; SSE41: ## %bb.0: 245; SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 246; SSE41-NEXT: pcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 247; SSE41-NEXT: retq 248 %cmp = icmp ugt <2 x i64> %x, <i64 442, i64 442> 249 ret <2 x i1> %cmp 250} 251 252define <16 x i1> @uge_v16i8_splat(<16 x i8> %x) { 253; CHECK-LABEL: uge_v16i8_splat: 254; CHECK: ## %bb.0: 255; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 256; CHECK-NEXT: pmaxub %xmm0, %xmm1 257; CHECK-NEXT: pcmpeqb %xmm1, %xmm0 258; CHECK-NEXT: retq 259 %cmp = icmp uge <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42> 260 ret <16 x i1> %cmp 261} 262 263define <8 x i1> @uge_v8i16_splat(<8 x i16> %x) { 264; SSE2-LABEL: uge_v8i16_splat: 265; SSE2: ## %bb.0: 266; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [242,242,242,242,242,242,242,242] 267; SSE2-NEXT: psubusw %xmm0, %xmm1 268; SSE2-NEXT: pxor %xmm0, %xmm0 269; SSE2-NEXT: pcmpeqw %xmm1, %xmm0 270; SSE2-NEXT: retq 271; 272; SSE41-LABEL: uge_v8i16_splat: 273; SSE41: ## %bb.0: 274; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = [242,242,242,242,242,242,242,242] 275; SSE41-NEXT: pmaxuw %xmm0, %xmm1 276; SSE41-NEXT: pcmpeqw %xmm1, %xmm0 277; SSE41-NEXT: retq 278 %cmp = icmp uge <8 x i16> %x, <i16 242, i16 242, i16 242, i16 242, i16 242, i16 242, i16 242, i16 242> 279 ret <8 x i1> %cmp 280} 281 282define <4 x i1> @uge_v4i32_splat(<4 x i32> %x) { 283; SSE2-LABEL: uge_v4i32_splat: 284; SSE2: ## %bb.0: 285; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 286; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483606,2147483606,2147483606,2147483606] 287; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 288; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 289; SSE2-NEXT: pxor %xmm1, %xmm0 290; SSE2-NEXT: retq 291; 292; SSE41-LABEL: uge_v4i32_splat: 293; SSE41: ## %bb.0: 294; SSE41-NEXT: pmovsxbd {{.*#+}} xmm1 = [4294967254,4294967254,4294967254,4294967254] 295; SSE41-NEXT: pmaxud %xmm0, %xmm1 296; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 297; SSE41-NEXT: retq 298 %cmp = icmp uge <4 x i32> %x, <i32 -42, i32 -42, i32 -42, i32 -42> 299 ret <4 x i1> %cmp 300} 301 302define <2 x i1> @uge_v2i64_splat(<2 x i64> %x) { 303; SSE2-LABEL: uge_v2i64_splat: 304; SSE2: ## %bb.0: 305; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 306; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 307; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 308; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2] 309; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 310; SSE2-NEXT: pandn %xmm1, %xmm2 311; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 312; SSE2-NEXT: pxor %xmm2, %xmm0 313; SSE2-NEXT: retq 314; 315; SSE41-LABEL: uge_v2i64_splat: 316; SSE41: ## %bb.0: 317; SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 318; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [9223372036854776250,9223372036854776250] 319; SSE41-NEXT: pcmpgtq %xmm0, %xmm1 320; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 321; SSE41-NEXT: pxor %xmm1, %xmm0 322; SSE41-NEXT: retq 323 %cmp = icmp uge <2 x i64> %x, <i64 442, i64 442> 324 ret <2 x i1> %cmp 325} 326 327define <16 x i1> @ult_v16i8_splat(<16 x i8> %x) { 328; CHECK-LABEL: ult_v16i8_splat: 329; CHECK: ## %bb.0: 330; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [41,41,41,41,41,41,41,41,41,41,41,41,41,41,41,41] 331; CHECK-NEXT: pminub %xmm0, %xmm1 332; CHECK-NEXT: pcmpeqb %xmm1, %xmm0 333; CHECK-NEXT: retq 334 %cmp = icmp ult <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42> 335 ret <16 x i1> %cmp 336} 337 338define <8 x i1> @ult_v8i16_splat(<8 x i16> %x) { 339; SSE2-LABEL: ult_v8i16_splat: 340; SSE2: ## %bb.0: 341; SSE2-NEXT: psubusw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 342; SSE2-NEXT: pxor %xmm1, %xmm1 343; SSE2-NEXT: pcmpeqw %xmm1, %xmm0 344; SSE2-NEXT: retq 345; 346; SSE41-LABEL: ult_v8i16_splat: 347; SSE41: ## %bb.0: 348; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = [241,241,241,241,241,241,241,241] 349; SSE41-NEXT: pminuw %xmm0, %xmm1 350; SSE41-NEXT: pcmpeqw %xmm1, %xmm0 351; SSE41-NEXT: retq 352 %cmp = icmp ult <8 x i16> %x, <i16 242, i16 242, i16 242, i16 242, i16 242, i16 242, i16 242, i16 242> 353 ret <8 x i1> %cmp 354} 355 356define <4 x i1> @ult_v4i32_splat(<4 x i32> %x) { 357; SSE2-LABEL: ult_v4i32_splat: 358; SSE2: ## %bb.0: 359; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 360; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483606,2147483606,2147483606,2147483606] 361; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 362; SSE2-NEXT: movdqa %xmm1, %xmm0 363; SSE2-NEXT: retq 364; 365; SSE41-LABEL: ult_v4i32_splat: 366; SSE41: ## %bb.0: 367; SSE41-NEXT: pmovsxbd {{.*#+}} xmm1 = [4294967253,4294967253,4294967253,4294967253] 368; SSE41-NEXT: pminud %xmm0, %xmm1 369; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 370; SSE41-NEXT: retq 371 %cmp = icmp ult <4 x i32> %x, <i32 -42, i32 -42, i32 -42, i32 -42> 372 ret <4 x i1> %cmp 373} 374 375define <2 x i1> @ult_v2i64_splat(<2 x i64> %x) { 376; SSE2-LABEL: ult_v2i64_splat: 377; SSE2: ## %bb.0: 378; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 379; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 380; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 381; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2] 382; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 383; SSE2-NEXT: pandn %xmm1, %xmm0 384; SSE2-NEXT: retq 385; 386; SSE41-LABEL: ult_v2i64_splat: 387; SSE41: ## %bb.0: 388; SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 389; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [9223372036854776250,9223372036854776250] 390; SSE41-NEXT: pcmpgtq %xmm0, %xmm1 391; SSE41-NEXT: movdqa %xmm1, %xmm0 392; SSE41-NEXT: retq 393 %cmp = icmp ult <2 x i64> %x, <i64 442, i64 442> 394 ret <2 x i1> %cmp 395} 396 397define <16 x i1> @ule_v16i8_splat(<16 x i8> %x) { 398; CHECK-LABEL: ule_v16i8_splat: 399; CHECK: ## %bb.0: 400; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] 401; CHECK-NEXT: pminub %xmm0, %xmm1 402; CHECK-NEXT: pcmpeqb %xmm1, %xmm0 403; CHECK-NEXT: retq 404 %cmp = icmp ule <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42> 405 ret <16 x i1> %cmp 406} 407 408define <8 x i1> @ule_v8i16_splat(<8 x i16> %x) { 409; SSE2-LABEL: ule_v8i16_splat: 410; SSE2: ## %bb.0: 411; SSE2-NEXT: psubusw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 412; SSE2-NEXT: pxor %xmm1, %xmm1 413; SSE2-NEXT: pcmpeqw %xmm1, %xmm0 414; SSE2-NEXT: retq 415; 416; SSE41-LABEL: ule_v8i16_splat: 417; SSE41: ## %bb.0: 418; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = [242,242,242,242,242,242,242,242] 419; SSE41-NEXT: pminuw %xmm0, %xmm1 420; SSE41-NEXT: pcmpeqw %xmm1, %xmm0 421; SSE41-NEXT: retq 422 %cmp = icmp ule <8 x i16> %x, <i16 242, i16 242, i16 242, i16 242, i16 242, i16 242, i16 242, i16 242> 423 ret <8 x i1> %cmp 424} 425 426define <4 x i1> @ule_v4i32_splat(<4 x i32> %x) { 427; SSE2-LABEL: ule_v4i32_splat: 428; SSE2: ## %bb.0: 429; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 430; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 431; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 432; SSE2-NEXT: pxor %xmm1, %xmm0 433; SSE2-NEXT: retq 434; 435; SSE41-LABEL: ule_v4i32_splat: 436; SSE41: ## %bb.0: 437; SSE41-NEXT: pmovsxbd {{.*#+}} xmm1 = [4294967254,4294967254,4294967254,4294967254] 438; SSE41-NEXT: pminud %xmm0, %xmm1 439; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 440; SSE41-NEXT: retq 441 %cmp = icmp ule <4 x i32> %x, <i32 -42, i32 -42, i32 -42, i32 -42> 442 ret <4 x i1> %cmp 443} 444 445define <2 x i1> @ule_v2i64_splat(<2 x i64> %x) { 446; SSE2-LABEL: ule_v2i64_splat: 447; SSE2: ## %bb.0: 448; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 449; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 450; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 451; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2] 452; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 453; SSE2-NEXT: pand %xmm2, %xmm1 454; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 455; SSE2-NEXT: por %xmm1, %xmm2 456; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 457; SSE2-NEXT: pxor %xmm2, %xmm0 458; SSE2-NEXT: retq 459; 460; SSE41-LABEL: ule_v2i64_splat: 461; SSE41: ## %bb.0: 462; SSE41-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 463; SSE41-NEXT: pcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 464; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 465; SSE41-NEXT: pxor %xmm1, %xmm0 466; SSE41-NEXT: retq 467 %cmp = icmp ule <2 x i64> %x, <i64 442, i64 442> 468 ret <2 x i1> %cmp 469} 470 471; This should be simplified before we reach lowering, but 472; make sure that we are not getting it wrong by underflowing. 473 474define <4 x i1> @ult_v4i32_splat_0_simplify(<4 x i32> %x) { 475; CHECK-LABEL: ult_v4i32_splat_0_simplify: 476; CHECK: ## %bb.0: 477; CHECK-NEXT: xorps %xmm0, %xmm0 478; CHECK-NEXT: retq 479 %cmp = icmp ult <4 x i32> %x, <i32 0, i32 0, i32 0, i32 0> 480 ret <4 x i1> %cmp 481} 482 483; This should be simplified before we reach lowering, but 484; make sure that we are not getting it wrong by overflowing. 485 486define <4 x i1> @ugt_v4i32_splat_maxval_simplify(<4 x i32> %x) { 487; CHECK-LABEL: ugt_v4i32_splat_maxval_simplify: 488; CHECK: ## %bb.0: 489; CHECK-NEXT: xorps %xmm0, %xmm0 490; CHECK-NEXT: retq 491 %cmp = icmp ugt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> 492 ret <4 x i1> %cmp 493} 494 495define <4 x i1> @ugt_v4i32_nonsplat(<4 x i32> %x) { 496; SSE2-LABEL: ugt_v4i32_nonsplat: 497; SSE2: ## %bb.0: 498; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 499; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 500; SSE2-NEXT: retq 501; 502; SSE41-LABEL: ugt_v4i32_nonsplat: 503; SSE41: ## %bb.0: 504; SSE41-NEXT: pmovsxbd {{.*#+}} xmm1 = [4294967254,4294967255,4294967256,4294967257] 505; SSE41-NEXT: pmaxud %xmm0, %xmm1 506; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 507; SSE41-NEXT: retq 508 %cmp = icmp ugt <4 x i32> %x, <i32 -43, i32 -42, i32 -41, i32 -40> 509 ret <4 x i1> %cmp 510} 511 512define <4 x i1> @ugt_v4i32_splat_commute(<4 x i32> %x) { 513; SSE2-LABEL: ugt_v4i32_splat_commute: 514; SSE2: ## %bb.0: 515; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 516; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483652,2147483652,2147483652,2147483652] 517; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 518; SSE2-NEXT: movdqa %xmm1, %xmm0 519; SSE2-NEXT: retq 520; 521; SSE41-LABEL: ugt_v4i32_splat_commute: 522; SSE41: ## %bb.0: 523; SSE41-NEXT: pmovsxbd {{.*#+}} xmm1 = [3,3,3,3] 524; SSE41-NEXT: pminud %xmm0, %xmm1 525; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 526; SSE41-NEXT: retq 527 %cmp = icmp ugt <4 x i32> <i32 4, i32 4, i32 4, i32 4>, %x 528 ret <4 x i1> %cmp 529} 530 531define <8 x i16> @PR39859(<8 x i16> %x, <8 x i16> %y) { 532; SSE2-LABEL: PR39859: 533; SSE2: ## %bb.0: 534; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [43,43,43,43,43,43,43,43] 535; SSE2-NEXT: psubusw %xmm0, %xmm3 536; SSE2-NEXT: pxor %xmm2, %xmm2 537; SSE2-NEXT: pcmpeqw %xmm3, %xmm2 538; SSE2-NEXT: pand %xmm2, %xmm1 539; SSE2-NEXT: pandn %xmm0, %xmm2 540; SSE2-NEXT: por %xmm1, %xmm2 541; SSE2-NEXT: movdqa %xmm2, %xmm0 542; SSE2-NEXT: retq 543; 544; SSE41-LABEL: PR39859: 545; SSE41: ## %bb.0: 546; SSE41-NEXT: movdqa %xmm0, %xmm2 547; SSE41-NEXT: pmovsxbw {{.*#+}} xmm0 = [43,43,43,43,43,43,43,43] 548; SSE41-NEXT: pmaxuw %xmm2, %xmm0 549; SSE41-NEXT: pcmpeqw %xmm2, %xmm0 550; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 551; SSE41-NEXT: movdqa %xmm2, %xmm0 552; SSE41-NEXT: retq 553 %cmp = icmp ugt <8 x i16> %x, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42> 554 %sel = select <8 x i1> %cmp, <8 x i16> %y, <8 x i16> %x 555 ret <8 x i16> %sel 556} 557 558