1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=i686-- < %s | FileCheck %s --check-prefix=X86 3; RUN: llc -mtriple=x86_64-- < %s | FileCheck %s --check-prefix=X64 4 5; Optimize expanded SRL/SHL used as an input of 6; SETCC comparing it with zero by removing rotation. 7; 8; See https://bugs.llvm.org/show_bug.cgi?id=50197 9define i128 @opt_setcc_lt_power_of_2(i128 %a) nounwind { 10; X86-LABEL: opt_setcc_lt_power_of_2: 11; X86: # %bb.0: 12; X86-NEXT: pushl %ebp 13; X86-NEXT: pushl %ebx 14; X86-NEXT: pushl %edi 15; X86-NEXT: pushl %esi 16; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 17; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 18; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 19; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 20; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 21; X86-NEXT: .p2align 4 22; X86-NEXT: .LBB0_1: # %loop 23; X86-NEXT: # =>This Inner Loop Header: Depth=1 24; X86-NEXT: addl $1, %edi 25; X86-NEXT: adcl $0, %esi 26; X86-NEXT: adcl $0, %edx 27; X86-NEXT: adcl $0, %ecx 28; X86-NEXT: movl %edx, %ebx 29; X86-NEXT: orl %ecx, %ebx 30; X86-NEXT: movl %esi, %ebp 31; X86-NEXT: orl %edx, %ebp 32; X86-NEXT: orl %ecx, %ebp 33; X86-NEXT: shrdl $28, %ebx, %ebp 34; X86-NEXT: jne .LBB0_1 35; X86-NEXT: # %bb.2: # %exit 36; X86-NEXT: movl %edi, (%eax) 37; X86-NEXT: movl %esi, 4(%eax) 38; X86-NEXT: movl %edx, 8(%eax) 39; X86-NEXT: movl %ecx, 12(%eax) 40; X86-NEXT: popl %esi 41; X86-NEXT: popl %edi 42; X86-NEXT: popl %ebx 43; X86-NEXT: popl %ebp 44; X86-NEXT: retl $4 45; 46; X64-LABEL: opt_setcc_lt_power_of_2: 47; X64: # %bb.0: 48; X64-NEXT: movq %rsi, %rdx 49; X64-NEXT: movq %rdi, %rax 50; X64-NEXT: .p2align 4 51; X64-NEXT: .LBB0_1: # %loop 52; X64-NEXT: # =>This Inner Loop Header: Depth=1 53; X64-NEXT: addq $1, %rax 54; X64-NEXT: adcq $0, %rdx 55; X64-NEXT: movq %rax, %rcx 56; X64-NEXT: shrq $60, %rcx 57; X64-NEXT: orq %rdx, %rcx 58; X64-NEXT: jne .LBB0_1 59; X64-NEXT: # %bb.2: # %exit 60; X64-NEXT: retq 61 br label %loop 62 63loop: 64 %phi.a = phi i128 [ %a, %0 ], [ %inc, %loop ] 65 %inc = add i128 %phi.a, 1 66 %cmp = icmp ult i128 %inc, 1152921504606846976 67 br i1 %cmp, label %exit, label %loop 68 69exit: 70 ret i128 %inc 71} 72 73define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind { 74; X86-LABEL: opt_setcc_srl_eq_zero: 75; X86: # %bb.0: 76; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 77; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 78; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx 79; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 80; X86-NEXT: orl %eax, %edx 81; X86-NEXT: orl %ecx, %edx 82; X86-NEXT: orl %eax, %ecx 83; X86-NEXT: shldl $15, %edx, %ecx 84; X86-NEXT: sete %al 85; X86-NEXT: retl 86; 87; X64-LABEL: opt_setcc_srl_eq_zero: 88; X64: # %bb.0: 89; X64-NEXT: shrq $17, %rdi 90; X64-NEXT: orq %rsi, %rdi 91; X64-NEXT: sete %al 92; X64-NEXT: retq 93 %srl = lshr i128 %a, 17 94 %cmp = icmp eq i128 %srl, 0 95 ret i1 %cmp 96} 97 98define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind { 99; X86-LABEL: opt_setcc_srl_ne_zero: 100; X86: # %bb.0: 101; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 102; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 103; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx 104; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 105; X86-NEXT: orl %eax, %edx 106; X86-NEXT: orl %ecx, %edx 107; X86-NEXT: orl %eax, %ecx 108; X86-NEXT: shldl $15, %edx, %ecx 109; X86-NEXT: setne %al 110; X86-NEXT: retl 111; 112; X64-LABEL: opt_setcc_srl_ne_zero: 113; X64: # %bb.0: 114; X64-NEXT: shrq $17, %rdi 115; X64-NEXT: orq %rsi, %rdi 116; X64-NEXT: setne %al 117; X64-NEXT: retq 118 %srl = lshr i128 %a, 17 119 %cmp = icmp ne i128 %srl, 0 120 ret i1 %cmp 121} 122 123define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind { 124; X86-LABEL: opt_setcc_shl_eq_zero: 125; X86: # %bb.0: 126; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 127; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 128; X86-NEXT: shll $17, %ecx 129; X86-NEXT: orl {{[0-9]+}}(%esp), %eax 130; X86-NEXT: orl {{[0-9]+}}(%esp), %eax 131; X86-NEXT: orl %ecx, %eax 132; X86-NEXT: sete %al 133; X86-NEXT: retl 134; 135; X64-LABEL: opt_setcc_shl_eq_zero: 136; X64: # %bb.0: 137; X64-NEXT: shlq $17, %rsi 138; X64-NEXT: orq %rdi, %rsi 139; X64-NEXT: sete %al 140; X64-NEXT: retq 141 %shl = shl i128 %a, 17 142 %cmp = icmp eq i128 %shl, 0 143 ret i1 %cmp 144} 145 146define i1 @opt_setcc_shl_ne_zero(i128 %a) nounwind { 147; X86-LABEL: opt_setcc_shl_ne_zero: 148; X86: # %bb.0: 149; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 150; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 151; X86-NEXT: shll $17, %ecx 152; X86-NEXT: orl {{[0-9]+}}(%esp), %eax 153; X86-NEXT: orl {{[0-9]+}}(%esp), %eax 154; X86-NEXT: orl %ecx, %eax 155; X86-NEXT: setne %al 156; X86-NEXT: retl 157; 158; X64-LABEL: opt_setcc_shl_ne_zero: 159; X64: # %bb.0: 160; X64-NEXT: shlq $17, %rsi 161; X64-NEXT: orq %rdi, %rsi 162; X64-NEXT: setne %al 163; X64-NEXT: retq 164 %shl = shl i128 %a, 17 165 %cmp = icmp ne i128 %shl, 0 166 ret i1 %cmp 167} 168 169; Negative test: optimization should not be applied if shift has multiple users. 170define i1 @opt_setcc_shl_eq_zero_multiple_shl_users(i128 %a) nounwind { 171; X86-LABEL: opt_setcc_shl_eq_zero_multiple_shl_users: 172; X86: # %bb.0: 173; X86-NEXT: pushl %ebx 174; X86-NEXT: pushl %edi 175; X86-NEXT: pushl %esi 176; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 177; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 178; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 179; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 180; X86-NEXT: shldl $17, %edx, %esi 181; X86-NEXT: shldl $17, %ecx, %edx 182; X86-NEXT: shldl $17, %eax, %ecx 183; X86-NEXT: shll $17, %eax 184; X86-NEXT: movl %ecx, %edi 185; X86-NEXT: orl %esi, %edi 186; X86-NEXT: movl %eax, %ebx 187; X86-NEXT: orl %edx, %ebx 188; X86-NEXT: orl %edi, %ebx 189; X86-NEXT: sete %bl 190; X86-NEXT: pushl %esi 191; X86-NEXT: pushl %edx 192; X86-NEXT: pushl %ecx 193; X86-NEXT: pushl %eax 194; X86-NEXT: calll use@PLT 195; X86-NEXT: addl $16, %esp 196; X86-NEXT: movl %ebx, %eax 197; X86-NEXT: popl %esi 198; X86-NEXT: popl %edi 199; X86-NEXT: popl %ebx 200; X86-NEXT: retl 201; 202; X64-LABEL: opt_setcc_shl_eq_zero_multiple_shl_users: 203; X64: # %bb.0: 204; X64-NEXT: pushq %rbx 205; X64-NEXT: shldq $17, %rdi, %rsi 206; X64-NEXT: shlq $17, %rdi 207; X64-NEXT: movq %rdi, %rax 208; X64-NEXT: orq %rsi, %rax 209; X64-NEXT: sete %bl 210; X64-NEXT: callq use@PLT 211; X64-NEXT: movl %ebx, %eax 212; X64-NEXT: popq %rbx 213; X64-NEXT: retq 214 %shl = shl i128 %a, 17 215 %cmp = icmp eq i128 %shl, 0 216 call void @use(i128 %shl) 217 ret i1 %cmp 218} 219 220; Check that optimization is applied to DAG having appropriate shape 221; even if there were no actual shift's expansion. 222define i1 @opt_setcc_expanded_shl_correct_shifts(i64 %a, i64 %b) nounwind { 223; X86-LABEL: opt_setcc_expanded_shl_correct_shifts: 224; X86: # %bb.0: 225; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 226; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 227; X86-NEXT: orl {{[0-9]+}}(%esp), %eax 228; X86-NEXT: shll $17, %ecx 229; X86-NEXT: orl {{[0-9]+}}(%esp), %eax 230; X86-NEXT: orl %ecx, %eax 231; X86-NEXT: sete %al 232; X86-NEXT: retl 233; 234; X64-LABEL: opt_setcc_expanded_shl_correct_shifts: 235; X64: # %bb.0: 236; X64-NEXT: shlq $17, %rdi 237; X64-NEXT: orq %rsi, %rdi 238; X64-NEXT: sete %al 239; X64-NEXT: retq 240 %shl.a = shl i64 %a, 17 241 %srl.b = lshr i64 %b, 47 242 %or.0 = or i64 %shl.a, %srl.b 243 %shl.b = shl i64 %b, 17 244 %or.1 = or i64 %or.0, %shl.b 245 %cmp = icmp eq i64 %or.1, 0 246 ret i1 %cmp 247} 248 249; Negative test: optimization should not be applied as 250; constants used in shifts do not match. 251define i1 @opt_setcc_expanded_shl_wrong_shifts(i64 %a, i64 %b) nounwind { 252; X86-LABEL: opt_setcc_expanded_shl_wrong_shifts: 253; X86: # %bb.0: 254; X86-NEXT: pushl %esi 255; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 256; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 257; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 258; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 259; X86-NEXT: shldl $17, %edx, %esi 260; X86-NEXT: shldl $17, %ecx, %edx 261; X86-NEXT: shldl $18, %eax, %ecx 262; X86-NEXT: shll $18, %eax 263; X86-NEXT: orl %edx, %eax 264; X86-NEXT: orl %esi, %ecx 265; X86-NEXT: orl %eax, %ecx 266; X86-NEXT: sete %al 267; X86-NEXT: popl %esi 268; X86-NEXT: retl 269; 270; X64-LABEL: opt_setcc_expanded_shl_wrong_shifts: 271; X64: # %bb.0: 272; X64-NEXT: shldq $17, %rsi, %rdi 273; X64-NEXT: shlq $18, %rsi 274; X64-NEXT: orq %rdi, %rsi 275; X64-NEXT: sete %al 276; X64-NEXT: retq 277 %shl.a = shl i64 %a, 17 278 %srl.b = lshr i64 %b, 47 279 %or.0 = or i64 %shl.a, %srl.b 280 %shl.b = shl i64 %b, 18 281 %or.1 = or i64 %or.0, %shl.b 282 %cmp = icmp eq i64 %or.1, 0 283 ret i1 %cmp 284} 285 286declare void @use(i128 %a) 287