1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s -check-prefixes=CHECK,SSE 3; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -early-live-intervals | FileCheck %s -check-prefixes=CHECK,SSE 4; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s -check-prefixes=CHECK,AVX,AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=x86-64-v3 | FileCheck %s -check-prefixes=CHECK,AVX,AVX2 6 7define i32 @or_self(i32 %x) { 8; CHECK-LABEL: or_self: 9; CHECK: # %bb.0: 10; CHECK-NEXT: movl %edi, %eax 11; CHECK-NEXT: retq 12 %or = or i32 %x, %x 13 ret i32 %or 14} 15 16define <4 x i32> @or_self_vec(<4 x i32> %x) { 17; CHECK-LABEL: or_self_vec: 18; CHECK: # %bb.0: 19; CHECK-NEXT: retq 20 %or = or <4 x i32> %x, %x 21 ret <4 x i32> %or 22} 23 24; fold (or x, c) -> c iff (x & ~c) == 0 25 26define <2 x i64> @or_zext_v2i32(<2 x i32> %a0) { 27; SSE-LABEL: or_zext_v2i32: 28; SSE: # %bb.0: 29; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295] 30; SSE-NEXT: retq 31; 32; AVX1-LABEL: or_zext_v2i32: 33; AVX1: # %bb.0: 34; AVX1-NEXT: vpmovsxbd {{.*#+}} xmm0 = [4294967295,0,4294967295,0] 35; AVX1-NEXT: retq 36; 37; AVX2-LABEL: or_zext_v2i32: 38; AVX2: # %bb.0: 39; AVX2-NEXT: vmovddup {{.*#+}} xmm0 = [4294967295,4294967295] 40; AVX2-NEXT: # xmm0 = mem[0,0] 41; AVX2-NEXT: retq 42 %1 = zext <2 x i32> %a0 to <2 x i64> 43 %2 = or <2 x i64> %1, <i64 4294967295, i64 4294967295> 44 ret <2 x i64> %2 45} 46 47define <4 x i32> @or_zext_v4i16(<4 x i16> %a0) { 48; SSE-LABEL: or_zext_v4i16: 49; SSE: # %bb.0: 50; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65535,65535,65535] 51; SSE-NEXT: retq 52; 53; AVX-LABEL: or_zext_v4i16: 54; AVX: # %bb.0: 55; AVX-NEXT: vbroadcastss {{.*#+}} xmm0 = [65535,65535,65535,65535] 56; AVX-NEXT: retq 57 %1 = zext <4 x i16> %a0 to <4 x i32> 58 %2 = or <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535> 59 ret <4 x i32> %2 60} 61 62; fold (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2)) 63 64define i32 @or_and_and_i32(i32 %x, i32 %y) { 65; CHECK-LABEL: or_and_and_i32: 66; CHECK: # %bb.0: 67; CHECK-NEXT: movl %edi, %eax 68; CHECK-NEXT: andl $-11, %esi 69; CHECK-NEXT: andl $-3, %eax 70; CHECK-NEXT: orl %esi, %eax 71; CHECK-NEXT: retq 72 %xy = or i32 %x, %y 73 %mx = and i32 %x, 8 74 %mxy = and i32 %xy, -11 75 %r = or i32 %mx, %mxy 76 ret i32 %r 77} 78 79define i64 @or_and_and_commute_i64(i64 %x, i64 %y) { 80; CHECK-LABEL: or_and_and_commute_i64: 81; CHECK: # %bb.0: 82; CHECK-NEXT: movq %rdi, %rax 83; CHECK-NEXT: orq %rsi, %rax 84; CHECK-NEXT: andq $-3, %rax 85; CHECK-NEXT: retq 86 %xy = or i64 %x, %y 87 %mx = and i64 %x, 8 88 %mxy = and i64 %xy, -3 89 %r = or i64 %mxy, %mx 90 ret i64 %r 91} 92 93define <4 x i32> @or_and_and_v4i32(<4 x i32> %x, <4 x i32> %y) { 94; SSE-LABEL: or_and_and_v4i32: 95; SSE: # %bb.0: 96; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 97; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 98; SSE-NEXT: orps %xmm1, %xmm0 99; SSE-NEXT: retq 100; 101; AVX-LABEL: or_and_and_v4i32: 102; AVX: # %bb.0: 103; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 104; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 105; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 106; AVX-NEXT: retq 107 %xy = or <4 x i32> %x, %y 108 %mx = and <4 x i32> %x, <i32 2, i32 4, i32 8, i32 16> 109 %mxy = and <4 x i32> %xy, <i32 1, i32 -1, i32 -5, i32 -25> 110 %r = or <4 x i32> %mx, %mxy 111 ret <4 x i32> %r 112} 113 114define i32 @or_and_and_multiuse_i32(i32 %x, i32 %y) nounwind { 115; CHECK-LABEL: or_and_and_multiuse_i32: 116; CHECK: # %bb.0: 117; CHECK-NEXT: pushq %rbx 118; CHECK-NEXT: # kill: def $esi killed $esi def $rsi 119; CHECK-NEXT: # kill: def $edi killed $edi def $rdi 120; CHECK-NEXT: orl %edi, %esi 121; CHECK-NEXT: andl $8, %edi 122; CHECK-NEXT: andl $-11, %esi 123; CHECK-NEXT: leal (%rdi,%rsi), %ebx 124; CHECK-NEXT: movl %esi, %edi 125; CHECK-NEXT: callq use_i32@PLT 126; CHECK-NEXT: movl %ebx, %eax 127; CHECK-NEXT: popq %rbx 128; CHECK-NEXT: retq 129 %xy = or i32 %x, %y 130 %mx = and i32 %x, 8 131 %mxy = and i32 %xy, -11 132 %r = or i32 %mx, %mxy 133 call void @use_i32(i32 %mxy) 134 ret i32 %r 135} 136 137define i32 @or_and_multiuse_and_i32(i32 %x, i32 %y) nounwind { 138; CHECK-LABEL: or_and_multiuse_and_i32: 139; CHECK: # %bb.0: 140; CHECK-NEXT: pushq %rbx 141; CHECK-NEXT: # kill: def $esi killed $esi def $rsi 142; CHECK-NEXT: # kill: def $edi killed $edi def $rdi 143; CHECK-NEXT: orl %edi, %esi 144; CHECK-NEXT: andl $8, %edi 145; CHECK-NEXT: andl $-11, %esi 146; CHECK-NEXT: leal (%rsi,%rdi), %ebx 147; CHECK-NEXT: # kill: def $edi killed $edi killed $rdi 148; CHECK-NEXT: callq use_i32@PLT 149; CHECK-NEXT: movl %ebx, %eax 150; CHECK-NEXT: popq %rbx 151; CHECK-NEXT: retq 152 %xy = or i32 %x, %y 153 %mx = and i32 %x, 8 154 %mxy = and i32 %xy, -11 155 %r = or i32 %mx, %mxy 156 call void @use_i32(i32 %mx) 157 ret i32 %r 158} 159 160define i32 @or_and_multiuse_and_multiuse_i32(i32 %x, i32 %y) nounwind { 161; CHECK-LABEL: or_and_multiuse_and_multiuse_i32: 162; CHECK: # %bb.0: 163; CHECK-NEXT: pushq %rbp 164; CHECK-NEXT: pushq %rbx 165; CHECK-NEXT: pushq %rax 166; CHECK-NEXT: movl %esi, %ebx 167; CHECK-NEXT: # kill: def $edi killed $edi def $rdi 168; CHECK-NEXT: orl %edi, %ebx 169; CHECK-NEXT: andl $8, %edi 170; CHECK-NEXT: andl $-11, %ebx 171; CHECK-NEXT: leal (%rdi,%rbx), %ebp 172; CHECK-NEXT: # kill: def $edi killed $edi killed $rdi 173; CHECK-NEXT: callq use_i32@PLT 174; CHECK-NEXT: movl %ebx, %edi 175; CHECK-NEXT: callq use_i32@PLT 176; CHECK-NEXT: movl %ebp, %eax 177; CHECK-NEXT: addq $8, %rsp 178; CHECK-NEXT: popq %rbx 179; CHECK-NEXT: popq %rbp 180; CHECK-NEXT: retq 181 %xy = or i32 %x, %y 182 %mx = and i32 %x, 8 183 %mxy = and i32 %xy, -11 184 %r = or i32 %mx, %mxy 185 call void @use_i32(i32 %mx) 186 call void @use_i32(i32 %mxy) 187 ret i32 %r 188} 189 190define i64 @or_build_pair_not(i32 %a0, i32 %a1) { 191; CHECK-LABEL: or_build_pair_not: 192; CHECK: # %bb.0: 193; CHECK-NEXT: # kill: def $esi killed $esi def $rsi 194; CHECK-NEXT: shlq $32, %rsi 195; CHECK-NEXT: movl %edi, %eax 196; CHECK-NEXT: orq %rsi, %rax 197; CHECK-NEXT: notq %rax 198; CHECK-NEXT: retq 199 %n0 = xor i32 %a0, -1 200 %n1 = xor i32 %a1, -1 201 %x0 = zext i32 %n0 to i64 202 %x1 = zext i32 %n1 to i64 203 %hi = shl i64 %x1, 32 204 %r = or i64 %hi, %x0 205 ret i64 %r 206} 207 208define i64 @PR89533(<64 x i8> %a0) { 209; SSE-LABEL: PR89533: 210; SSE: # %bb.0: 211; SSE-NEXT: movdqa {{.*#+}} xmm4 = [95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95] 212; SSE-NEXT: pcmpeqb %xmm4, %xmm0 213; SSE-NEXT: pmovmskb %xmm0, %eax 214; SSE-NEXT: xorl $65535, %eax # imm = 0xFFFF 215; SSE-NEXT: pcmpeqb %xmm4, %xmm1 216; SSE-NEXT: pmovmskb %xmm1, %ecx 217; SSE-NEXT: notl %ecx 218; SSE-NEXT: shll $16, %ecx 219; SSE-NEXT: orl %eax, %ecx 220; SSE-NEXT: pcmpeqb %xmm4, %xmm2 221; SSE-NEXT: pmovmskb %xmm2, %eax 222; SSE-NEXT: xorl $65535, %eax # imm = 0xFFFF 223; SSE-NEXT: pcmpeqb %xmm4, %xmm3 224; SSE-NEXT: pmovmskb %xmm3, %edx 225; SSE-NEXT: notl %edx 226; SSE-NEXT: shll $16, %edx 227; SSE-NEXT: orl %eax, %edx 228; SSE-NEXT: shlq $32, %rdx 229; SSE-NEXT: orq %rcx, %rdx 230; SSE-NEXT: movl $64, %eax 231; SSE-NEXT: rep bsfq %rdx, %rax 232; SSE-NEXT: retq 233; 234; AVX1-LABEL: PR89533: 235; AVX1: # %bb.0: 236; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95] 237; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm3 238; AVX1-NEXT: vpmovmskb %xmm3, %eax 239; AVX1-NEXT: xorl $65535, %eax # imm = 0xFFFF 240; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 241; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0 242; AVX1-NEXT: vpmovmskb %xmm0, %ecx 243; AVX1-NEXT: notl %ecx 244; AVX1-NEXT: shll $16, %ecx 245; AVX1-NEXT: orl %eax, %ecx 246; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm0 247; AVX1-NEXT: vpmovmskb %xmm0, %eax 248; AVX1-NEXT: xorl $65535, %eax # imm = 0xFFFF 249; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 250; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0 251; AVX1-NEXT: vpmovmskb %xmm0, %edx 252; AVX1-NEXT: notl %edx 253; AVX1-NEXT: shll $16, %edx 254; AVX1-NEXT: orl %eax, %edx 255; AVX1-NEXT: shlq $32, %rdx 256; AVX1-NEXT: orq %rcx, %rdx 257; AVX1-NEXT: movl $64, %eax 258; AVX1-NEXT: rep bsfq %rdx, %rax 259; AVX1-NEXT: vzeroupper 260; AVX1-NEXT: retq 261; 262; AVX2-LABEL: PR89533: 263; AVX2: # %bb.0: 264; AVX2-NEXT: vpbroadcastb {{.*#+}} ymm2 = [95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95] 265; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 266; AVX2-NEXT: vpmovmskb %ymm0, %eax 267; AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm0 268; AVX2-NEXT: vpmovmskb %ymm0, %ecx 269; AVX2-NEXT: shlq $32, %rcx 270; AVX2-NEXT: orq %rax, %rcx 271; AVX2-NEXT: notq %rcx 272; AVX2-NEXT: xorl %eax, %eax 273; AVX2-NEXT: tzcntq %rcx, %rax 274; AVX2-NEXT: vzeroupper 275; AVX2-NEXT: retq 276 %cmp = icmp ne <64 x i8> %a0, <i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95, i8 95> 277 %mask = bitcast <64 x i1> %cmp to i64 278 %tz = tail call i64 @llvm.cttz.i64(i64 %mask, i1 false) 279 ret i64 %tz 280} 281 282declare void @use_i32(i32) 283 284