1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64 4 5%WideUInt32 = type { i32, i32 } 6 7define void @PR25858_i32(ptr sret(%WideUInt32), ptr, ptr) nounwind { 8; X86-LABEL: PR25858_i32: 9; X86: # %bb.0: # %top 10; X86-NEXT: pushl %esi 11; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 12; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 13; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 14; X86-NEXT: movl (%edx), %esi 15; X86-NEXT: movl 4(%edx), %edx 16; X86-NEXT: subl (%ecx), %esi 17; X86-NEXT: sbbl 4(%ecx), %edx 18; X86-NEXT: movl %edx, 4(%eax) 19; X86-NEXT: movl %esi, (%eax) 20; X86-NEXT: popl %esi 21; X86-NEXT: retl $4 22; 23; X64-LABEL: PR25858_i32: 24; X64: # %bb.0: # %top 25; X64-NEXT: movq %rdi, %rax 26; X64-NEXT: movl (%rsi), %ecx 27; X64-NEXT: movl 4(%rsi), %esi 28; X64-NEXT: subl (%rdx), %ecx 29; X64-NEXT: sbbl 4(%rdx), %esi 30; X64-NEXT: movl %esi, 4(%rdi) 31; X64-NEXT: movl %ecx, (%rdi) 32; X64-NEXT: retq 33top: 34 %3 = load i32, ptr %1, align 4 35 %4 = load i32, ptr %2, align 4 36 %5 = sub i32 %3, %4 37 %6 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %3, i32 %4) 38 %7 = extractvalue { i32, i1 } %6, 1 39 %8 = getelementptr inbounds %WideUInt32, ptr %1, i32 0, i32 1 40 %9 = load i32, ptr %8, align 8 41 %10 = getelementptr inbounds %WideUInt32, ptr %2, i32 0, i32 1 42 %11 = load i32, ptr %10, align 8 43 %12 = sub i32 %9, %11 44 %.neg1 = sext i1 %7 to i32 45 %13 = add i32 %12, %.neg1 46 %14 = insertvalue %WideUInt32 undef, i32 %5, 0 47 %15 = insertvalue %WideUInt32 %14, i32 %13, 1 48 store %WideUInt32 %15, ptr %0, align 4 49 ret void 50} 51 52declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) 53 54%WideUInt64 = type { i64, i64 } 55 56define void @PR25858_i64(ptr sret(%WideUInt64), ptr, ptr) nounwind { 57; X86-LABEL: PR25858_i64: 58; X86: # %bb.0: # %top 59; X86-NEXT: pushl %ebx 60; X86-NEXT: pushl %edi 61; X86-NEXT: pushl %esi 62; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 63; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx 64; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 65; X86-NEXT: movl (%edi), %edx 66; X86-NEXT: movl 4(%edi), %esi 67; X86-NEXT: movl 12(%edi), %ecx 68; X86-NEXT: movl 8(%edi), %edi 69; X86-NEXT: subl 8(%ebx), %edi 70; X86-NEXT: sbbl 12(%ebx), %ecx 71; X86-NEXT: subl (%ebx), %edx 72; X86-NEXT: sbbl 4(%ebx), %esi 73; X86-NEXT: sbbl $0, %edi 74; X86-NEXT: sbbl $0, %ecx 75; X86-NEXT: movl %edx, (%eax) 76; X86-NEXT: movl %esi, 4(%eax) 77; X86-NEXT: movl %edi, 8(%eax) 78; X86-NEXT: movl %ecx, 12(%eax) 79; X86-NEXT: popl %esi 80; X86-NEXT: popl %edi 81; X86-NEXT: popl %ebx 82; X86-NEXT: retl $4 83; 84; X64-LABEL: PR25858_i64: 85; X64: # %bb.0: # %top 86; X64-NEXT: movq %rdi, %rax 87; X64-NEXT: movq (%rsi), %rcx 88; X64-NEXT: movq 8(%rsi), %rsi 89; X64-NEXT: subq (%rdx), %rcx 90; X64-NEXT: sbbq 8(%rdx), %rsi 91; X64-NEXT: movq %rsi, 8(%rdi) 92; X64-NEXT: movq %rcx, (%rdi) 93; X64-NEXT: retq 94top: 95 %3 = load i64, ptr %1, align 8 96 %4 = load i64, ptr %2, align 8 97 %5 = sub i64 %3, %4 98 %6 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %3, i64 %4) 99 %7 = extractvalue { i64, i1 } %6, 1 100 %8 = getelementptr inbounds %WideUInt64, ptr %1, i64 0, i32 1 101 %9 = load i64, ptr %8, align 8 102 %10 = getelementptr inbounds %WideUInt64, ptr %2, i64 0, i32 1 103 %11 = load i64, ptr %10, align 8 104 %12 = sub i64 %9, %11 105 %.neg1 = sext i1 %7 to i64 106 %13 = add i64 %12, %.neg1 107 %14 = insertvalue %WideUInt64 undef, i64 %5, 0 108 %15 = insertvalue %WideUInt64 %14, i64 %13, 1 109 store %WideUInt64 %15, ptr %0, align 8 110 ret void 111} 112 113declare { i64, i1 } @llvm.usub.with.overflow.i64(i64, i64) 114 115; PR24545 less_than_ideal() 116define i8 @PR24545(i32, i32, ptr nocapture readonly) { 117; X86-LABEL: PR24545: 118; X86: # %bb.0: 119; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 120; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 121; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 122; X86-NEXT: cmpl (%ecx), %edx 123; X86-NEXT: sbbl 4(%ecx), %eax 124; X86-NEXT: setb %al 125; X86-NEXT: retl 126; 127; X64-LABEL: PR24545: 128; X64: # %bb.0: 129; X64-NEXT: cmpl (%rdx), %edi 130; X64-NEXT: sbbl 4(%rdx), %esi 131; X64-NEXT: setb %al 132; X64-NEXT: retq 133 %4 = load i32, ptr %2 134 %5 = icmp ugt i32 %4, %0 135 %6 = zext i1 %5 to i8 136 %7 = getelementptr inbounds i32, ptr %2, i32 1 137 %8 = load i32, ptr %7 138 %9 = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 %6, i32 %1, i32 %8) 139 %10 = extractvalue { i8, i32 } %9, 0 140 %11 = icmp ne i8 %10, 0 141 %12 = zext i1 %11 to i8 142 ret i8 %12 143} 144 145define i32 @PR40483_sub1(ptr, i32) nounwind { 146; X86-LABEL: PR40483_sub1: 147; X86: # %bb.0: 148; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 149; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 150; X86-NEXT: subl %eax, (%ecx) 151; X86-NEXT: xorl %eax, %eax 152; X86-NEXT: retl 153; 154; X64-LABEL: PR40483_sub1: 155; X64: # %bb.0: 156; X64-NEXT: subl %esi, (%rdi) 157; X64-NEXT: xorl %eax, %eax 158; X64-NEXT: retq 159 %3 = load i32, ptr %0, align 4 160 %4 = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 %3, i32 %1) 161 %5 = extractvalue { i8, i32 } %4, 1 162 store i32 %5, ptr %0, align 4 163 %6 = sub i32 %1, %3 164 %7 = add i32 %6, %5 165 ret i32 %7 166} 167 168define i32 @PR40483_sub2(ptr, i32) nounwind { 169; X86-LABEL: PR40483_sub2: 170; X86: # %bb.0: 171; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 172; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 173; X86-NEXT: subl %eax, (%ecx) 174; X86-NEXT: xorl %eax, %eax 175; X86-NEXT: retl 176; 177; X64-LABEL: PR40483_sub2: 178; X64: # %bb.0: 179; X64-NEXT: subl %esi, (%rdi) 180; X64-NEXT: xorl %eax, %eax 181; X64-NEXT: retq 182 %3 = load i32, ptr %0, align 4 183 %4 = sub i32 %3, %1 184 %5 = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 %3, i32 %1) 185 %6 = extractvalue { i8, i32 } %5, 1 186 store i32 %6, ptr %0, align 4 187 %7 = sub i32 %4, %6 188 ret i32 %7 189} 190 191define i32 @PR40483_sub3(ptr, i32) nounwind { 192; X86-LABEL: PR40483_sub3: 193; X86: # %bb.0: 194; X86-NEXT: pushl %esi 195; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 196; X86-NEXT: movl (%eax), %edx 197; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 198; X86-NEXT: movl %edx, %ecx 199; X86-NEXT: subl %esi, %ecx 200; X86-NEXT: subl %esi, %edx 201; X86-NEXT: movl %edx, (%eax) 202; X86-NEXT: jae .LBB5_1 203; X86-NEXT: # %bb.2: 204; X86-NEXT: xorl %eax, %eax 205; X86-NEXT: popl %esi 206; X86-NEXT: retl 207; X86-NEXT: .LBB5_1: 208; X86-NEXT: movl %ecx, %eax 209; X86-NEXT: negl %eax 210; X86-NEXT: orl %ecx, %eax 211; X86-NEXT: popl %esi 212; X86-NEXT: retl 213; 214; X64-LABEL: PR40483_sub3: 215; X64: # %bb.0: 216; X64-NEXT: movl (%rdi), %ecx 217; X64-NEXT: movl %ecx, %eax 218; X64-NEXT: subl %esi, %eax 219; X64-NEXT: movl %eax, %edx 220; X64-NEXT: negl %edx 221; X64-NEXT: orl %eax, %edx 222; X64-NEXT: xorl %eax, %eax 223; X64-NEXT: subl %esi, %ecx 224; X64-NEXT: movl %ecx, (%rdi) 225; X64-NEXT: cmovael %edx, %eax 226; X64-NEXT: retq 227 %3 = load i32, ptr %0, align 8 228 %4 = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 %3, i32 %1) 229 %5 = extractvalue { i8, i32 } %4, 1 230 store i32 %5, ptr %0, align 8 231 %6 = extractvalue { i8, i32 } %4, 0 232 %7 = icmp eq i8 %6, 0 233 %8 = sub i32 %1, %3 234 %9 = or i32 %5, %8 235 %10 = select i1 %7, i32 %9, i32 0 236 ret i32 %10 237} 238 239define i32 @PR40483_sub4(ptr, i32) nounwind { 240; X86-LABEL: PR40483_sub4: 241; X86: # %bb.0: 242; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 243; X86-NEXT: movl (%edx), %ecx 244; X86-NEXT: xorl %eax, %eax 245; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx 246; X86-NEXT: movl %ecx, (%edx) 247; X86-NEXT: jae .LBB6_2 248; X86-NEXT: # %bb.1: 249; X86-NEXT: movl %ecx, %eax 250; X86-NEXT: .LBB6_2: 251; X86-NEXT: retl 252; 253; X64-LABEL: PR40483_sub4: 254; X64: # %bb.0: 255; X64-NEXT: movl (%rdi), %eax 256; X64-NEXT: xorl %ecx, %ecx 257; X64-NEXT: subl %esi, %eax 258; X64-NEXT: movl %eax, (%rdi) 259; X64-NEXT: cmovael %ecx, %eax 260; X64-NEXT: retq 261 %3 = load i32, ptr %0, align 8 262 %4 = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 %3, i32 %1) 263 %5 = extractvalue { i8, i32 } %4, 1 264 store i32 %5, ptr %0, align 8 265 %6 = extractvalue { i8, i32 } %4, 0 266 %7 = icmp eq i8 %6, 0 267 %8 = sub i32 %3, %1 268 %9 = or i32 %5, %8 269 %10 = select i1 %7, i32 0, i32 %9 270 ret i32 %10 271} 272 273; Verify that a bogus cmov is simplified. 274 275define i32 @PR40483_sub5(ptr, i32) nounwind { 276; X86-LABEL: PR40483_sub5: 277; X86: # %bb.0: 278; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 279; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 280; X86-NEXT: subl %eax, (%ecx) 281; X86-NEXT: xorl %eax, %eax 282; X86-NEXT: retl 283; 284; X64-LABEL: PR40483_sub5: 285; X64: # %bb.0: 286; X64-NEXT: subl %esi, (%rdi) 287; X64-NEXT: xorl %eax, %eax 288; X64-NEXT: retq 289 %3 = load i32, ptr %0, align 8 290 %4 = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 %3, i32 %1) 291 %5 = extractvalue { i8, i32 } %4, 1 292 store i32 %5, ptr %0, align 8 293 %6 = extractvalue { i8, i32 } %4, 0 294 %7 = icmp eq i8 %6, 0 295 %8 = sub i32 %1, %3 296 %9 = add i32 %8, %5 297 %10 = select i1 %7, i32 %9, i32 0 298 ret i32 %10 299} 300 301define i32 @PR40483_sub6(ptr, i32) nounwind { 302; X86-LABEL: PR40483_sub6: 303; X86: # %bb.0: 304; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 305; X86-NEXT: movl (%edx), %ecx 306; X86-NEXT: xorl %eax, %eax 307; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx 308; X86-NEXT: movl %ecx, (%edx) 309; X86-NEXT: jae .LBB8_2 310; X86-NEXT: # %bb.1: 311; X86-NEXT: leal (%ecx,%ecx), %eax 312; X86-NEXT: .LBB8_2: 313; X86-NEXT: retl 314; 315; X64-LABEL: PR40483_sub6: 316; X64: # %bb.0: 317; X64-NEXT: movl (%rdi), %eax 318; X64-NEXT: xorl %ecx, %ecx 319; X64-NEXT: subl %esi, %eax 320; X64-NEXT: movl %eax, (%rdi) 321; X64-NEXT: leal (%rax,%rax), %eax 322; X64-NEXT: cmovael %ecx, %eax 323; X64-NEXT: retq 324 %3 = load i32, ptr %0, align 8 325 %4 = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 %3, i32 %1) 326 %5 = extractvalue { i8, i32 } %4, 1 327 store i32 %5, ptr %0, align 8 328 %6 = extractvalue { i8, i32 } %4, 0 329 %7 = icmp eq i8 %6, 0 330 %8 = sub i32 %3, %1 331 %9 = add i32 %8, %5 332 %10 = select i1 %7, i32 0, i32 %9 333 ret i32 %10 334} 335 336declare { i8, i32 } @llvm.x86.subborrow.32(i8, i32, i32) 337