1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64 4 5define void @knownbits_zext_in_reg(ptr) nounwind { 6; X86-LABEL: knownbits_zext_in_reg: 7; X86: # %bb.0: # %BB 8; X86-NEXT: pushl %ebx 9; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 10; X86-NEXT: movzbl (%eax), %ecx 11; X86-NEXT: imull $101, %ecx, %eax 12; X86-NEXT: shrl $14, %eax 13; X86-NEXT: imull $177, %ecx, %edx 14; X86-NEXT: shrl $14, %edx 15; X86-NEXT: movzbl %al, %ecx 16; X86-NEXT: xorl %ebx, %ebx 17; X86-NEXT: .p2align 4 18; X86-NEXT: .LBB0_1: # %CF 19; X86-NEXT: # =>This Loop Header: Depth=1 20; X86-NEXT: # Child Loop BB0_2 Depth 2 21; X86-NEXT: movl %ecx, %eax 22; X86-NEXT: divb %dl 23; X86-NEXT: .p2align 4 24; X86-NEXT: .LBB0_2: # %CF237 25; X86-NEXT: # Parent Loop BB0_1 Depth=1 26; X86-NEXT: # => This Inner Loop Header: Depth=2 27; X86-NEXT: testb %bl, %bl 28; X86-NEXT: jne .LBB0_2 29; X86-NEXT: jmp .LBB0_1 30; 31; X64-LABEL: knownbits_zext_in_reg: 32; X64: # %bb.0: # %BB 33; X64-NEXT: movzbl (%rdi), %eax 34; X64-NEXT: imull $101, %eax, %ecx 35; X64-NEXT: shrl $14, %ecx 36; X64-NEXT: imull $177, %eax, %edx 37; X64-NEXT: shrl $14, %edx 38; X64-NEXT: movzbl %cl, %ecx 39; X64-NEXT: xorl %esi, %esi 40; X64-NEXT: .p2align 4 41; X64-NEXT: .LBB0_1: # %CF 42; X64-NEXT: # =>This Loop Header: Depth=1 43; X64-NEXT: # Child Loop BB0_2 Depth 2 44; X64-NEXT: movl %ecx, %eax 45; X64-NEXT: divb %dl 46; X64-NEXT: .p2align 4 47; X64-NEXT: .LBB0_2: # %CF237 48; X64-NEXT: # Parent Loop BB0_1 Depth=1 49; X64-NEXT: # => This Inner Loop Header: Depth=2 50; X64-NEXT: testb %sil, %sil 51; X64-NEXT: jne .LBB0_2 52; X64-NEXT: jmp .LBB0_1 53BB: 54 %L5 = load i8, ptr %0 55 %Sl9 = select i1 true, i8 %L5, i8 undef 56 %B21 = udiv i8 %Sl9, -93 57 %B22 = udiv i8 %Sl9, 93 58 br label %CF 59 60CF: ; preds = %CF246, %BB 61 %I40 = insertelement <4 x i8> zeroinitializer, i8 %B21, i32 1 62 %I41 = insertelement <4 x i8> zeroinitializer, i8 %B22, i32 1 63 %B41 = srem <4 x i8> %I40, %I41 64 br label %CF237 65 66CF237: ; preds = %CF237, %CF 67 %Cmp73 = icmp ne i1 undef, undef 68 br i1 %Cmp73, label %CF237, label %CF246 69 70CF246: ; preds = %CF237 71 %Cmp117 = icmp ult <4 x i8> %B41, undef 72 %E156 = extractelement <4 x i1> %Cmp117, i32 2 73 br label %CF 74} 75 76define i32 @knownbits_mask_add_lshr(i32 %a0, i32 %a1) nounwind { 77; CHECK-LABEL: knownbits_mask_add_lshr: 78; CHECK: # %bb.0: 79; CHECK-NEXT: xorl %eax, %eax 80; CHECK-NEXT: ret{{[l|q]}} 81 %1 = and i32 %a0, 32767 82 %2 = and i32 %a1, 32766 83 %3 = add i32 %1, %2 84 %4 = lshr i32 %3, 17 85 ret i32 %4 86} 87 88define i128 @knownbits_mask_addc_shl(i64 %a0, i64 %a1, i64 %a2) nounwind { 89; X86-LABEL: knownbits_mask_addc_shl: 90; X86: # %bb.0: 91; X86-NEXT: pushl %edi 92; X86-NEXT: pushl %esi 93; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 94; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 95; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 96; X86-NEXT: movl $-1024, %esi # imm = 0xFC00 97; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 98; X86-NEXT: andl %esi, %edi 99; X86-NEXT: andl {{[0-9]+}}(%esp), %esi 100; X86-NEXT: addl %edi, %esi 101; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx 102; X86-NEXT: adcl $0, %ecx 103; X86-NEXT: shldl $22, %edx, %ecx 104; X86-NEXT: shldl $22, %esi, %edx 105; X86-NEXT: movl %edx, 8(%eax) 106; X86-NEXT: movl %ecx, 12(%eax) 107; X86-NEXT: movl $0, 4(%eax) 108; X86-NEXT: movl $0, (%eax) 109; X86-NEXT: popl %esi 110; X86-NEXT: popl %edi 111; X86-NEXT: retl $4 112; 113; X64-LABEL: knownbits_mask_addc_shl: 114; X64: # %bb.0: 115; X64-NEXT: andq $-1024, %rdi # imm = 0xFC00 116; X64-NEXT: andq $-1024, %rsi # imm = 0xFC00 117; X64-NEXT: addq %rdi, %rsi 118; X64-NEXT: adcl $0, %edx 119; X64-NEXT: shldq $54, %rsi, %rdx 120; X64-NEXT: xorl %eax, %eax 121; X64-NEXT: retq 122 %1 = and i64 %a0, -1024 123 %2 = zext i64 %1 to i128 124 %3 = and i64 %a1, -1024 125 %4 = zext i64 %3 to i128 126 %5 = add i128 %2, %4 127 %6 = zext i64 %a2 to i128 128 %7 = shl i128 %6, 64 129 %8 = add i128 %5, %7 130 %9 = shl i128 %8, 54 131 ret i128 %9 132} 133 134define {i32, i1} @knownbits_uaddo_saddo(i64 %a0, i64 %a1) nounwind { 135; X86-LABEL: knownbits_uaddo_saddo: 136; X86: # %bb.0: 137; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 138; X86-NEXT: addl {{[0-9]+}}(%esp), %eax 139; X86-NEXT: setb %al 140; X86-NEXT: seto %dl 141; X86-NEXT: orb %al, %dl 142; X86-NEXT: xorl %eax, %eax 143; X86-NEXT: retl 144; 145; X64-LABEL: knownbits_uaddo_saddo: 146; X64: # %bb.0: 147; X64-NEXT: shlq $32, %rdi 148; X64-NEXT: shlq $32, %rsi 149; X64-NEXT: addq %rdi, %rsi 150; X64-NEXT: setb %al 151; X64-NEXT: seto %dl 152; X64-NEXT: orb %al, %dl 153; X64-NEXT: xorl %eax, %eax 154; X64-NEXT: retq 155 %1 = shl i64 %a0, 32 156 %2 = shl i64 %a1, 32 157 %u = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %1, i64 %2) 158 %uval = extractvalue {i64, i1} %u, 0 159 %uovf = extractvalue {i64, i1} %u, 1 160 %s = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %1, i64 %2) 161 %sval = extractvalue {i64, i1} %s, 0 162 %sovf = extractvalue {i64, i1} %s, 1 163 %sum = add i64 %uval, %sval 164 %3 = trunc i64 %sum to i32 165 %4 = or i1 %uovf, %sovf 166 %ret0 = insertvalue {i32, i1} undef, i32 %3, 0 167 %ret1 = insertvalue {i32, i1} %ret0, i1 %4, 1 168 ret {i32, i1} %ret1 169} 170 171define {i32, i1} @knownbits_usubo_ssubo(i64 %a0, i64 %a1) nounwind { 172; X86-LABEL: knownbits_usubo_ssubo: 173; X86: # %bb.0: 174; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 175; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax 176; X86-NEXT: setb %al 177; X86-NEXT: seto %dl 178; X86-NEXT: orb %al, %dl 179; X86-NEXT: xorl %eax, %eax 180; X86-NEXT: retl 181; 182; X64-LABEL: knownbits_usubo_ssubo: 183; X64: # %bb.0: 184; X64-NEXT: shlq $32, %rdi 185; X64-NEXT: shlq $32, %rsi 186; X64-NEXT: cmpq %rsi, %rdi 187; X64-NEXT: setb %al 188; X64-NEXT: seto %dl 189; X64-NEXT: orb %al, %dl 190; X64-NEXT: xorl %eax, %eax 191; X64-NEXT: retq 192 %1 = shl i64 %a0, 32 193 %2 = shl i64 %a1, 32 194 %u = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %1, i64 %2) 195 %uval = extractvalue {i64, i1} %u, 0 196 %uovf = extractvalue {i64, i1} %u, 1 197 %s = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %1, i64 %2) 198 %sval = extractvalue {i64, i1} %s, 0 199 %sovf = extractvalue {i64, i1} %s, 1 200 %sum = add i64 %uval, %sval 201 %3 = trunc i64 %sum to i32 202 %4 = or i1 %uovf, %sovf 203 %ret0 = insertvalue {i32, i1} undef, i32 %3, 0 204 %ret1 = insertvalue {i32, i1} %ret0, i1 %4, 1 205 ret {i32, i1} %ret1 206} 207 208declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone 209declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone 210declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone 211declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone 212 213define i32 @knownbits_fshl(i32 %a0) nounwind { 214; CHECK-LABEL: knownbits_fshl: 215; CHECK: # %bb.0: 216; CHECK-NEXT: movl $3, %eax 217; CHECK-NEXT: ret{{[l|q]}} 218 %1 = tail call i32 @llvm.fshl.i32(i32 %a0, i32 -1, i32 5) 219 %2 = and i32 %1, 3 220 ret i32 %2 221} 222 223define i32 @knownbits_fshr(i32 %a0) nounwind { 224; CHECK-LABEL: knownbits_fshr: 225; CHECK: # %bb.0: 226; CHECK-NEXT: movl $3, %eax 227; CHECK-NEXT: ret{{[l|q]}} 228 %1 = tail call i32 @llvm.fshr.i32(i32 %a0, i32 -1, i32 5) 229 %2 = and i32 %1, 3 230 ret i32 %2 231} 232 233declare i32 @llvm.fshl.i32(i32, i32, i32) nounwind readnone 234declare i32 @llvm.fshr.i32(i32, i32, i32) nounwind readnone 235