1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -disable-peephole -mtriple=x86_64-linux-unknown < %s | FileCheck %s --check-prefixes=CHECK,LINUX,SDAG 3; RUN: llc -disable-peephole -mtriple=x86_64-linux-unknown -fast-isel -fast-isel-abort=1 < %s | FileCheck %s --check-prefixes=CHECK,LINUX,FAST 4; RUN: llc -disable-peephole -mtriple=x86_64-linux-unknown -mcpu=knl < %s | FileCheck %s --check-prefixes=CHECK,LINUX,SDAG 5; RUN: llc -disable-peephole -mtriple=x86_64-pc-win32 < %s | FileCheck %s --check-prefixes=CHECK,WIN64 6; RUN: llc -disable-peephole -mtriple=i386-pc-win32 < %s | FileCheck %s --check-prefix=WIN32 7 8define {i64, i1} @t1() nounwind { 9; CHECK-LABEL: t1: 10; CHECK: # %bb.0: 11; CHECK-NEXT: movl $72, %eax 12; CHECK-NEXT: xorl %edx, %edx 13; CHECK-NEXT: retq 14; 15; WIN32-LABEL: t1: 16; WIN32: # %bb.0: 17; WIN32-NEXT: movl $72, %eax 18; WIN32-NEXT: xorl %edx, %edx 19; WIN32-NEXT: xorl %ecx, %ecx 20; WIN32-NEXT: retl 21 %1 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 9, i64 8) 22 ret {i64, i1} %1 23} 24 25define {i64, i1} @t2() nounwind { 26; CHECK-LABEL: t2: 27; CHECK: # %bb.0: 28; CHECK-NEXT: xorl %eax, %eax 29; CHECK-NEXT: xorl %edx, %edx 30; CHECK-NEXT: retq 31; 32; WIN32-LABEL: t2: 33; WIN32: # %bb.0: 34; WIN32-NEXT: xorl %eax, %eax 35; WIN32-NEXT: xorl %edx, %edx 36; WIN32-NEXT: xorl %ecx, %ecx 37; WIN32-NEXT: retl 38 %1 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 9, i64 0) 39 ret {i64, i1} %1 40} 41 42define {i64, i1} @t3() nounwind { 43; CHECK-LABEL: t3: 44; CHECK: # %bb.0: 45; CHECK-NEXT: movq $-9, %rax 46; CHECK-NEXT: movb $1, %dl 47; CHECK-NEXT: retq 48; 49; WIN32-LABEL: t3: 50; WIN32: # %bb.0: 51; WIN32-NEXT: movl $-9, %eax 52; WIN32-NEXT: movl $-1, %edx 53; WIN32-NEXT: movb $1, %cl 54; WIN32-NEXT: retl 55 %1 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 9, i64 -1) 56 ret {i64, i1} %1 57} 58 59; SMULO 60define zeroext i1 @smuloi8(i8 %v1, i8 %v2, ptr %res) { 61; SDAG-LABEL: smuloi8: 62; SDAG: # %bb.0: 63; SDAG-NEXT: movl %edi, %eax 64; SDAG-NEXT: # kill: def $al killed $al killed $eax 65; SDAG-NEXT: imulb %sil 66; SDAG-NEXT: seto %cl 67; SDAG-NEXT: movb %al, (%rdx) 68; SDAG-NEXT: movl %ecx, %eax 69; SDAG-NEXT: retq 70; 71; FAST-LABEL: smuloi8: 72; FAST: # %bb.0: 73; FAST-NEXT: movl %edi, %eax 74; FAST-NEXT: # kill: def $al killed $al killed $eax 75; FAST-NEXT: imulb %sil 76; FAST-NEXT: seto %cl 77; FAST-NEXT: movb %al, (%rdx) 78; FAST-NEXT: andb $1, %cl 79; FAST-NEXT: movl %ecx, %eax 80; FAST-NEXT: retq 81; 82; WIN64-LABEL: smuloi8: 83; WIN64: # %bb.0: 84; WIN64-NEXT: movl %ecx, %eax 85; WIN64-NEXT: imulb %dl 86; WIN64-NEXT: seto %cl 87; WIN64-NEXT: movb %al, (%r8) 88; WIN64-NEXT: movl %ecx, %eax 89; WIN64-NEXT: retq 90; 91; WIN32-LABEL: smuloi8: 92; WIN32: # %bb.0: 93; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx 94; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 95; WIN32-NEXT: imulb {{[0-9]+}}(%esp) 96; WIN32-NEXT: seto %cl 97; WIN32-NEXT: movb %al, (%edx) 98; WIN32-NEXT: movl %ecx, %eax 99; WIN32-NEXT: retl 100 %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2) 101 %val = extractvalue {i8, i1} %t, 0 102 %obit = extractvalue {i8, i1} %t, 1 103 store i8 %val, ptr %res 104 ret i1 %obit 105} 106 107define zeroext i1 @smuloi16(i16 %v1, i16 %v2, ptr %res) { 108; SDAG-LABEL: smuloi16: 109; SDAG: # %bb.0: 110; SDAG-NEXT: imulw %si, %di 111; SDAG-NEXT: seto %al 112; SDAG-NEXT: movw %di, (%rdx) 113; SDAG-NEXT: retq 114; 115; FAST-LABEL: smuloi16: 116; FAST: # %bb.0: 117; FAST-NEXT: imulw %si, %di 118; FAST-NEXT: seto %al 119; FAST-NEXT: movw %di, (%rdx) 120; FAST-NEXT: andb $1, %al 121; FAST-NEXT: retq 122; 123; WIN64-LABEL: smuloi16: 124; WIN64: # %bb.0: 125; WIN64-NEXT: imulw %dx, %cx 126; WIN64-NEXT: seto %al 127; WIN64-NEXT: movw %cx, (%r8) 128; WIN64-NEXT: retq 129; 130; WIN32-LABEL: smuloi16: 131; WIN32: # %bb.0: 132; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 133; WIN32-NEXT: movzwl {{[0-9]+}}(%esp), %edx 134; WIN32-NEXT: imulw {{[0-9]+}}(%esp), %dx 135; WIN32-NEXT: seto %al 136; WIN32-NEXT: movw %dx, (%ecx) 137; WIN32-NEXT: retl 138 %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %v1, i16 %v2) 139 %val = extractvalue {i16, i1} %t, 0 140 %obit = extractvalue {i16, i1} %t, 1 141 store i16 %val, ptr %res 142 ret i1 %obit 143} 144 145define zeroext i1 @smuloi32(i32 %v1, i32 %v2, ptr %res) { 146; SDAG-LABEL: smuloi32: 147; SDAG: # %bb.0: 148; SDAG-NEXT: imull %esi, %edi 149; SDAG-NEXT: seto %al 150; SDAG-NEXT: movl %edi, (%rdx) 151; SDAG-NEXT: retq 152; 153; FAST-LABEL: smuloi32: 154; FAST: # %bb.0: 155; FAST-NEXT: imull %esi, %edi 156; FAST-NEXT: seto %al 157; FAST-NEXT: movl %edi, (%rdx) 158; FAST-NEXT: andb $1, %al 159; FAST-NEXT: retq 160; 161; WIN64-LABEL: smuloi32: 162; WIN64: # %bb.0: 163; WIN64-NEXT: imull %edx, %ecx 164; WIN64-NEXT: seto %al 165; WIN64-NEXT: movl %ecx, (%r8) 166; WIN64-NEXT: retq 167; 168; WIN32-LABEL: smuloi32: 169; WIN32: # %bb.0: 170; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 171; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx 172; WIN32-NEXT: imull {{[0-9]+}}(%esp), %edx 173; WIN32-NEXT: seto %al 174; WIN32-NEXT: movl %edx, (%ecx) 175; WIN32-NEXT: retl 176 %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2) 177 %val = extractvalue {i32, i1} %t, 0 178 %obit = extractvalue {i32, i1} %t, 1 179 store i32 %val, ptr %res 180 ret i1 %obit 181} 182 183define zeroext i1 @smuloi64(i64 %v1, i64 %v2, ptr %res) { 184; SDAG-LABEL: smuloi64: 185; SDAG: # %bb.0: 186; SDAG-NEXT: imulq %rsi, %rdi 187; SDAG-NEXT: seto %al 188; SDAG-NEXT: movq %rdi, (%rdx) 189; SDAG-NEXT: retq 190; 191; FAST-LABEL: smuloi64: 192; FAST: # %bb.0: 193; FAST-NEXT: imulq %rsi, %rdi 194; FAST-NEXT: seto %al 195; FAST-NEXT: movq %rdi, (%rdx) 196; FAST-NEXT: andb $1, %al 197; FAST-NEXT: retq 198; 199; WIN64-LABEL: smuloi64: 200; WIN64: # %bb.0: 201; WIN64-NEXT: imulq %rdx, %rcx 202; WIN64-NEXT: seto %al 203; WIN64-NEXT: movq %rcx, (%r8) 204; WIN64-NEXT: retq 205; 206; WIN32-LABEL: smuloi64: 207; WIN32: # %bb.0: 208; WIN32-NEXT: pushl %ebp 209; WIN32-NEXT: pushl %ebx 210; WIN32-NEXT: pushl %edi 211; WIN32-NEXT: pushl %esi 212; WIN32-NEXT: subl $8, %esp 213; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx 214; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 215; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi 216; WIN32-NEXT: movl %edi, %esi 217; WIN32-NEXT: sarl $31, %esi 218; WIN32-NEXT: imull %ebx, %esi 219; WIN32-NEXT: mull %ebx 220; WIN32-NEXT: movl %edx, %ecx 221; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 222; WIN32-NEXT: movl %edi, %eax 223; WIN32-NEXT: mull %ebx 224; WIN32-NEXT: movl %edx, %ebx 225; WIN32-NEXT: movl %eax, %ebp 226; WIN32-NEXT: addl %ecx, %ebp 227; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 228; WIN32-NEXT: adcl %esi, %ebx 229; WIN32-NEXT: movl %ebx, %edi 230; WIN32-NEXT: sarl $31, %edi 231; WIN32-NEXT: movl %ecx, %esi 232; WIN32-NEXT: sarl $31, %esi 233; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 234; WIN32-NEXT: imull %eax, %esi 235; WIN32-NEXT: mull %ecx 236; WIN32-NEXT: movl %edx, %ecx 237; WIN32-NEXT: addl %ebp, %eax 238; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill 239; WIN32-NEXT: adcl %esi, %ecx 240; WIN32-NEXT: movl %ecx, %ebp 241; WIN32-NEXT: sarl $31, %ebp 242; WIN32-NEXT: addl %ebx, %ecx 243; WIN32-NEXT: adcl %edi, %ebp 244; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 245; WIN32-NEXT: imull {{[0-9]+}}(%esp) 246; WIN32-NEXT: addl %ecx, %eax 247; WIN32-NEXT: adcl %ebp, %edx 248; WIN32-NEXT: movl (%esp), %esi # 4-byte Reload 249; WIN32-NEXT: movl %esi, %ecx 250; WIN32-NEXT: sarl $31, %ecx 251; WIN32-NEXT: xorl %ecx, %edx 252; WIN32-NEXT: xorl %eax, %ecx 253; WIN32-NEXT: orl %edx, %ecx 254; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 255; WIN32-NEXT: movl %esi, 4(%eax) 256; WIN32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 257; WIN32-NEXT: movl %ecx, (%eax) 258; WIN32-NEXT: setne %al 259; WIN32-NEXT: addl $8, %esp 260; WIN32-NEXT: popl %esi 261; WIN32-NEXT: popl %edi 262; WIN32-NEXT: popl %ebx 263; WIN32-NEXT: popl %ebp 264; WIN32-NEXT: retl 265 %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) 266 %val = extractvalue {i64, i1} %t, 0 267 %obit = extractvalue {i64, i1} %t, 1 268 store i64 %val, ptr %res 269 ret i1 %obit 270} 271 272; UMULO 273define zeroext i1 @umuloi8(i8 %v1, i8 %v2, ptr %res) { 274; SDAG-LABEL: umuloi8: 275; SDAG: # %bb.0: 276; SDAG-NEXT: movl %edi, %eax 277; SDAG-NEXT: # kill: def $al killed $al killed $eax 278; SDAG-NEXT: mulb %sil 279; SDAG-NEXT: seto %cl 280; SDAG-NEXT: movb %al, (%rdx) 281; SDAG-NEXT: movl %ecx, %eax 282; SDAG-NEXT: retq 283; 284; FAST-LABEL: umuloi8: 285; FAST: # %bb.0: 286; FAST-NEXT: movl %edi, %eax 287; FAST-NEXT: # kill: def $al killed $al killed $eax 288; FAST-NEXT: mulb %sil 289; FAST-NEXT: seto %cl 290; FAST-NEXT: movb %al, (%rdx) 291; FAST-NEXT: andb $1, %cl 292; FAST-NEXT: movl %ecx, %eax 293; FAST-NEXT: retq 294; 295; WIN64-LABEL: umuloi8: 296; WIN64: # %bb.0: 297; WIN64-NEXT: movl %ecx, %eax 298; WIN64-NEXT: mulb %dl 299; WIN64-NEXT: seto %cl 300; WIN64-NEXT: movb %al, (%r8) 301; WIN64-NEXT: movl %ecx, %eax 302; WIN64-NEXT: retq 303; 304; WIN32-LABEL: umuloi8: 305; WIN32: # %bb.0: 306; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx 307; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 308; WIN32-NEXT: mulb {{[0-9]+}}(%esp) 309; WIN32-NEXT: seto %cl 310; WIN32-NEXT: movb %al, (%edx) 311; WIN32-NEXT: movl %ecx, %eax 312; WIN32-NEXT: retl 313 %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2) 314 %val = extractvalue {i8, i1} %t, 0 315 %obit = extractvalue {i8, i1} %t, 1 316 store i8 %val, ptr %res 317 ret i1 %obit 318} 319 320define zeroext i1 @umuloi16(i16 %v1, i16 %v2, ptr %res) { 321; SDAG-LABEL: umuloi16: 322; SDAG: # %bb.0: 323; SDAG-NEXT: movq %rdx, %rcx 324; SDAG-NEXT: movl %edi, %eax 325; SDAG-NEXT: # kill: def $ax killed $ax killed $eax 326; SDAG-NEXT: mulw %si 327; SDAG-NEXT: seto %dl 328; SDAG-NEXT: movw %ax, (%rcx) 329; SDAG-NEXT: movl %edx, %eax 330; SDAG-NEXT: retq 331; 332; FAST-LABEL: umuloi16: 333; FAST: # %bb.0: 334; FAST-NEXT: movq %rdx, %rcx 335; FAST-NEXT: movl %edi, %eax 336; FAST-NEXT: # kill: def $ax killed $ax killed $eax 337; FAST-NEXT: mulw %si 338; FAST-NEXT: seto %dl 339; FAST-NEXT: movw %ax, (%rcx) 340; FAST-NEXT: andb $1, %dl 341; FAST-NEXT: movl %edx, %eax 342; FAST-NEXT: retq 343; 344; WIN64-LABEL: umuloi16: 345; WIN64: # %bb.0: 346; WIN64-NEXT: movl %ecx, %eax 347; WIN64-NEXT: mulw %dx 348; WIN64-NEXT: seto %cl 349; WIN64-NEXT: movw %ax, (%r8) 350; WIN64-NEXT: movl %ecx, %eax 351; WIN64-NEXT: retq 352; 353; WIN32-LABEL: umuloi16: 354; WIN32: # %bb.0: 355; WIN32-NEXT: pushl %esi 356; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 357; WIN32-NEXT: movzwl {{[0-9]+}}(%esp), %eax 358; WIN32-NEXT: mulw {{[0-9]+}}(%esp) 359; WIN32-NEXT: seto %cl 360; WIN32-NEXT: movw %ax, (%esi) 361; WIN32-NEXT: movl %ecx, %eax 362; WIN32-NEXT: popl %esi 363; WIN32-NEXT: retl 364 %t = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %v1, i16 %v2) 365 %val = extractvalue {i16, i1} %t, 0 366 %obit = extractvalue {i16, i1} %t, 1 367 store i16 %val, ptr %res 368 ret i1 %obit 369} 370 371define zeroext i1 @umuloi32(i32 %v1, i32 %v2, ptr %res) { 372; SDAG-LABEL: umuloi32: 373; SDAG: # %bb.0: 374; SDAG-NEXT: movq %rdx, %rcx 375; SDAG-NEXT: movl %edi, %eax 376; SDAG-NEXT: mull %esi 377; SDAG-NEXT: seto %dl 378; SDAG-NEXT: movl %eax, (%rcx) 379; SDAG-NEXT: movl %edx, %eax 380; SDAG-NEXT: retq 381; 382; FAST-LABEL: umuloi32: 383; FAST: # %bb.0: 384; FAST-NEXT: movq %rdx, %rcx 385; FAST-NEXT: movl %edi, %eax 386; FAST-NEXT: mull %esi 387; FAST-NEXT: seto %dl 388; FAST-NEXT: movl %eax, (%rcx) 389; FAST-NEXT: andb $1, %dl 390; FAST-NEXT: movl %edx, %eax 391; FAST-NEXT: retq 392; 393; WIN64-LABEL: umuloi32: 394; WIN64: # %bb.0: 395; WIN64-NEXT: movl %ecx, %eax 396; WIN64-NEXT: mull %edx 397; WIN64-NEXT: seto %cl 398; WIN64-NEXT: movl %eax, (%r8) 399; WIN64-NEXT: movl %ecx, %eax 400; WIN64-NEXT: retq 401; 402; WIN32-LABEL: umuloi32: 403; WIN32: # %bb.0: 404; WIN32-NEXT: pushl %esi 405; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 406; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 407; WIN32-NEXT: mull {{[0-9]+}}(%esp) 408; WIN32-NEXT: seto %cl 409; WIN32-NEXT: movl %eax, (%esi) 410; WIN32-NEXT: movl %ecx, %eax 411; WIN32-NEXT: popl %esi 412; WIN32-NEXT: retl 413 %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2) 414 %val = extractvalue {i32, i1} %t, 0 415 %obit = extractvalue {i32, i1} %t, 1 416 store i32 %val, ptr %res 417 ret i1 %obit 418} 419 420define zeroext i1 @umuloi64(i64 %v1, i64 %v2, ptr %res) { 421; SDAG-LABEL: umuloi64: 422; SDAG: # %bb.0: 423; SDAG-NEXT: movq %rdx, %rcx 424; SDAG-NEXT: movq %rdi, %rax 425; SDAG-NEXT: mulq %rsi 426; SDAG-NEXT: seto %dl 427; SDAG-NEXT: movq %rax, (%rcx) 428; SDAG-NEXT: movl %edx, %eax 429; SDAG-NEXT: retq 430; 431; FAST-LABEL: umuloi64: 432; FAST: # %bb.0: 433; FAST-NEXT: movq %rdx, %rcx 434; FAST-NEXT: movq %rdi, %rax 435; FAST-NEXT: mulq %rsi 436; FAST-NEXT: seto %dl 437; FAST-NEXT: movq %rax, (%rcx) 438; FAST-NEXT: andb $1, %dl 439; FAST-NEXT: movl %edx, %eax 440; FAST-NEXT: retq 441; 442; WIN64-LABEL: umuloi64: 443; WIN64: # %bb.0: 444; WIN64-NEXT: movq %rcx, %rax 445; WIN64-NEXT: mulq %rdx 446; WIN64-NEXT: seto %cl 447; WIN64-NEXT: movq %rax, (%r8) 448; WIN64-NEXT: movl %ecx, %eax 449; WIN64-NEXT: retq 450; 451; WIN32-LABEL: umuloi64: 452; WIN32: # %bb.0: 453; WIN32-NEXT: pushl %ebp 454; WIN32-NEXT: pushl %ebx 455; WIN32-NEXT: pushl %edi 456; WIN32-NEXT: pushl %esi 457; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp 458; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 459; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 460; WIN32-NEXT: testl %esi, %esi 461; WIN32-NEXT: setne %dl 462; WIN32-NEXT: testl %eax, %eax 463; WIN32-NEXT: setne %cl 464; WIN32-NEXT: andb %dl, %cl 465; WIN32-NEXT: mull {{[0-9]+}}(%esp) 466; WIN32-NEXT: movl %eax, %edi 467; WIN32-NEXT: seto %bl 468; WIN32-NEXT: movl %esi, %eax 469; WIN32-NEXT: mull %ebp 470; WIN32-NEXT: seto %ch 471; WIN32-NEXT: orb %bl, %ch 472; WIN32-NEXT: orb %cl, %ch 473; WIN32-NEXT: leal (%edi,%eax), %esi 474; WIN32-NEXT: movl %ebp, %eax 475; WIN32-NEXT: mull {{[0-9]+}}(%esp) 476; WIN32-NEXT: addl %esi, %edx 477; WIN32-NEXT: setb %cl 478; WIN32-NEXT: orb %ch, %cl 479; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 480; WIN32-NEXT: movl %eax, (%esi) 481; WIN32-NEXT: movl %edx, 4(%esi) 482; WIN32-NEXT: movl %ecx, %eax 483; WIN32-NEXT: popl %esi 484; WIN32-NEXT: popl %edi 485; WIN32-NEXT: popl %ebx 486; WIN32-NEXT: popl %ebp 487; WIN32-NEXT: retl 488 %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2) 489 %val = extractvalue {i64, i1} %t, 0 490 %obit = extractvalue {i64, i1} %t, 1 491 store i64 %val, ptr %res 492 ret i1 %obit 493} 494 495; 496; Check the use of the overflow bit in combination with a select instruction. 497; 498define i32 @smuloselecti32(i32 %v1, i32 %v2) { 499; LINUX-LABEL: smuloselecti32: 500; LINUX: # %bb.0: 501; LINUX-NEXT: movl %esi, %eax 502; LINUX-NEXT: movl %edi, %ecx 503; LINUX-NEXT: imull %esi, %ecx 504; LINUX-NEXT: cmovol %edi, %eax 505; LINUX-NEXT: retq 506; 507; WIN64-LABEL: smuloselecti32: 508; WIN64: # %bb.0: 509; WIN64-NEXT: movl %edx, %eax 510; WIN64-NEXT: movl %ecx, %edx 511; WIN64-NEXT: imull %eax, %edx 512; WIN64-NEXT: cmovol %ecx, %eax 513; WIN64-NEXT: retq 514; 515; WIN32-LABEL: smuloselecti32: 516; WIN32: # %bb.0: 517; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 518; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 519; WIN32-NEXT: movl %eax, %edx 520; WIN32-NEXT: imull %ecx, %edx 521; WIN32-NEXT: jo LBB11_2 522; WIN32-NEXT: # %bb.1: 523; WIN32-NEXT: movl %ecx, %eax 524; WIN32-NEXT: LBB11_2: 525; WIN32-NEXT: retl 526 %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2) 527 %obit = extractvalue {i32, i1} %t, 1 528 %ret = select i1 %obit, i32 %v1, i32 %v2 529 ret i32 %ret 530} 531 532define i64 @smuloselecti64(i64 %v1, i64 %v2) { 533; LINUX-LABEL: smuloselecti64: 534; LINUX: # %bb.0: 535; LINUX-NEXT: movq %rsi, %rax 536; LINUX-NEXT: movq %rdi, %rcx 537; LINUX-NEXT: imulq %rsi, %rcx 538; LINUX-NEXT: cmovoq %rdi, %rax 539; LINUX-NEXT: retq 540; 541; WIN64-LABEL: smuloselecti64: 542; WIN64: # %bb.0: 543; WIN64-NEXT: movq %rdx, %rax 544; WIN64-NEXT: movq %rcx, %rdx 545; WIN64-NEXT: imulq %rax, %rdx 546; WIN64-NEXT: cmovoq %rcx, %rax 547; WIN64-NEXT: retq 548; 549; WIN32-LABEL: smuloselecti64: 550; WIN32: # %bb.0: 551; WIN32-NEXT: pushl %ebp 552; WIN32-NEXT: pushl %ebx 553; WIN32-NEXT: pushl %edi 554; WIN32-NEXT: pushl %esi 555; WIN32-NEXT: pushl %eax 556; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi 557; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 558; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx 559; WIN32-NEXT: movl %ebx, %esi 560; WIN32-NEXT: sarl $31, %esi 561; WIN32-NEXT: imull %edi, %esi 562; WIN32-NEXT: mull %edi 563; WIN32-NEXT: movl %edx, %ecx 564; WIN32-NEXT: movl %ebx, %eax 565; WIN32-NEXT: mull %edi 566; WIN32-NEXT: movl %edx, %ebx 567; WIN32-NEXT: movl %eax, %ebp 568; WIN32-NEXT: addl %ecx, %ebp 569; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 570; WIN32-NEXT: adcl %esi, %ebx 571; WIN32-NEXT: movl %ebx, %eax 572; WIN32-NEXT: sarl $31, %eax 573; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill 574; WIN32-NEXT: movl %ecx, %esi 575; WIN32-NEXT: sarl $31, %esi 576; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 577; WIN32-NEXT: imull %eax, %esi 578; WIN32-NEXT: mull %ecx 579; WIN32-NEXT: movl %edx, %ecx 580; WIN32-NEXT: movl %eax, %edi 581; WIN32-NEXT: addl %ebp, %edi 582; WIN32-NEXT: adcl %esi, %ecx 583; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 584; WIN32-NEXT: movl %ecx, %ebp 585; WIN32-NEXT: sarl $31, %ebp 586; WIN32-NEXT: addl %ebx, %ecx 587; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx 588; WIN32-NEXT: adcl (%esp), %ebp # 4-byte Folded Reload 589; WIN32-NEXT: movl %esi, %eax 590; WIN32-NEXT: imull %ebx 591; WIN32-NEXT: addl %ecx, %eax 592; WIN32-NEXT: adcl %ebp, %edx 593; WIN32-NEXT: sarl $31, %edi 594; WIN32-NEXT: xorl %edi, %edx 595; WIN32-NEXT: xorl %eax, %edi 596; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 597; WIN32-NEXT: orl %edx, %edi 598; WIN32-NEXT: jne LBB12_2 599; WIN32-NEXT: # %bb.1: 600; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 601; WIN32-NEXT: movl %ebx, %esi 602; WIN32-NEXT: LBB12_2: 603; WIN32-NEXT: movl %esi, %edx 604; WIN32-NEXT: addl $4, %esp 605; WIN32-NEXT: popl %esi 606; WIN32-NEXT: popl %edi 607; WIN32-NEXT: popl %ebx 608; WIN32-NEXT: popl %ebp 609; WIN32-NEXT: retl 610 %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) 611 %obit = extractvalue {i64, i1} %t, 1 612 %ret = select i1 %obit, i64 %v1, i64 %v2 613 ret i64 %ret 614} 615 616define i32 @umuloselecti32(i32 %v1, i32 %v2) { 617; LINUX-LABEL: umuloselecti32: 618; LINUX: # %bb.0: 619; LINUX-NEXT: movl %edi, %eax 620; LINUX-NEXT: mull %esi 621; LINUX-NEXT: cmovol %edi, %esi 622; LINUX-NEXT: movl %esi, %eax 623; LINUX-NEXT: retq 624; 625; WIN64-LABEL: umuloselecti32: 626; WIN64: # %bb.0: 627; WIN64-NEXT: movl %edx, %r8d 628; WIN64-NEXT: movl %ecx, %eax 629; WIN64-NEXT: mull %edx 630; WIN64-NEXT: cmovol %ecx, %r8d 631; WIN64-NEXT: movl %r8d, %eax 632; WIN64-NEXT: retq 633; 634; WIN32-LABEL: umuloselecti32: 635; WIN32: # %bb.0: 636; WIN32-NEXT: pushl %esi 637; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 638; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 639; WIN32-NEXT: movl %ecx, %eax 640; WIN32-NEXT: mull %esi 641; WIN32-NEXT: jo LBB13_2 642; WIN32-NEXT: # %bb.1: 643; WIN32-NEXT: movl %esi, %ecx 644; WIN32-NEXT: LBB13_2: 645; WIN32-NEXT: movl %ecx, %eax 646; WIN32-NEXT: popl %esi 647; WIN32-NEXT: retl 648 %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2) 649 %obit = extractvalue {i32, i1} %t, 1 650 %ret = select i1 %obit, i32 %v1, i32 %v2 651 ret i32 %ret 652} 653 654define i64 @umuloselecti64(i64 %v1, i64 %v2) { 655; LINUX-LABEL: umuloselecti64: 656; LINUX: # %bb.0: 657; LINUX-NEXT: movq %rdi, %rax 658; LINUX-NEXT: mulq %rsi 659; LINUX-NEXT: cmovoq %rdi, %rsi 660; LINUX-NEXT: movq %rsi, %rax 661; LINUX-NEXT: retq 662; 663; WIN64-LABEL: umuloselecti64: 664; WIN64: # %bb.0: 665; WIN64-NEXT: movq %rdx, %r8 666; WIN64-NEXT: movq %rcx, %rax 667; WIN64-NEXT: mulq %rdx 668; WIN64-NEXT: cmovoq %rcx, %r8 669; WIN64-NEXT: movq %r8, %rax 670; WIN64-NEXT: retq 671; 672; WIN32-LABEL: umuloselecti64: 673; WIN32: # %bb.0: 674; WIN32-NEXT: pushl %ebp 675; WIN32-NEXT: pushl %ebx 676; WIN32-NEXT: pushl %edi 677; WIN32-NEXT: pushl %esi 678; WIN32-NEXT: pushl %eax 679; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 680; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 681; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi 682; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp 683; WIN32-NEXT: testl %ebp, %ebp 684; WIN32-NEXT: setne %al 685; WIN32-NEXT: testl %esi, %esi 686; WIN32-NEXT: setne %bl 687; WIN32-NEXT: andb %al, %bl 688; WIN32-NEXT: movl %esi, %eax 689; WIN32-NEXT: mull %edi 690; WIN32-NEXT: movl %edi, %edx 691; WIN32-NEXT: movl %eax, %edi 692; WIN32-NEXT: seto {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill 693; WIN32-NEXT: movl %ebp, %eax 694; WIN32-NEXT: movl %edx, %ebp 695; WIN32-NEXT: mull %ecx 696; WIN32-NEXT: seto %bh 697; WIN32-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Folded Reload 698; WIN32-NEXT: orb %bl, %bh 699; WIN32-NEXT: addl %eax, %edi 700; WIN32-NEXT: movl %ecx, %eax 701; WIN32-NEXT: mull %ebp 702; WIN32-NEXT: addl %edi, %edx 703; WIN32-NEXT: setb %al 704; WIN32-NEXT: orb %bh, %al 705; WIN32-NEXT: testb %al, %al 706; WIN32-NEXT: jne LBB14_2 707; WIN32-NEXT: # %bb.1: 708; WIN32-NEXT: movl %ebp, %ecx 709; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 710; WIN32-NEXT: LBB14_2: 711; WIN32-NEXT: movl %ecx, %eax 712; WIN32-NEXT: movl %esi, %edx 713; WIN32-NEXT: addl $4, %esp 714; WIN32-NEXT: popl %esi 715; WIN32-NEXT: popl %edi 716; WIN32-NEXT: popl %ebx 717; WIN32-NEXT: popl %ebp 718; WIN32-NEXT: retl 719 %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2) 720 %obit = extractvalue {i64, i1} %t, 1 721 %ret = select i1 %obit, i64 %v1, i64 %v2 722 ret i64 %ret 723} 724 725; 726; Check the use of the overflow bit in combination with a branch instruction. 727; 728define zeroext i1 @smulobri8(i8 %v1, i8 %v2) { 729; SDAG-LABEL: smulobri8: 730; SDAG: # %bb.0: 731; SDAG-NEXT: movl %edi, %eax 732; SDAG-NEXT: # kill: def $al killed $al killed $eax 733; SDAG-NEXT: imulb %sil 734; SDAG-NEXT: jo .LBB15_1 735; SDAG-NEXT: # %bb.2: # %continue 736; SDAG-NEXT: movb $1, %al 737; SDAG-NEXT: retq 738; SDAG-NEXT: .LBB15_1: # %overflow 739; SDAG-NEXT: xorl %eax, %eax 740; SDAG-NEXT: retq 741; 742; FAST-LABEL: smulobri8: 743; FAST: # %bb.0: 744; FAST-NEXT: movl %edi, %eax 745; FAST-NEXT: # kill: def $al killed $al killed $eax 746; FAST-NEXT: imulb %sil 747; FAST-NEXT: seto %al 748; FAST-NEXT: testb $1, %al 749; FAST-NEXT: jne .LBB15_1 750; FAST-NEXT: # %bb.2: # %continue 751; FAST-NEXT: movb $1, %al 752; FAST-NEXT: andb $1, %al 753; FAST-NEXT: retq 754; FAST-NEXT: .LBB15_1: # %overflow 755; FAST-NEXT: xorl %eax, %eax 756; FAST-NEXT: andb $1, %al 757; FAST-NEXT: # kill: def $al killed $al killed $eax 758; FAST-NEXT: retq 759; 760; WIN64-LABEL: smulobri8: 761; WIN64: # %bb.0: 762; WIN64-NEXT: movl %ecx, %eax 763; WIN64-NEXT: imulb %dl 764; WIN64-NEXT: jo .LBB15_1 765; WIN64-NEXT: # %bb.2: # %continue 766; WIN64-NEXT: movb $1, %al 767; WIN64-NEXT: retq 768; WIN64-NEXT: .LBB15_1: # %overflow 769; WIN64-NEXT: xorl %eax, %eax 770; WIN64-NEXT: retq 771; 772; WIN32-LABEL: smulobri8: 773; WIN32: # %bb.0: 774; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 775; WIN32-NEXT: imulb {{[0-9]+}}(%esp) 776; WIN32-NEXT: jo LBB15_1 777; WIN32-NEXT: # %bb.2: # %continue 778; WIN32-NEXT: movb $1, %al 779; WIN32-NEXT: retl 780; WIN32-NEXT: LBB15_1: # %overflow 781; WIN32-NEXT: xorl %eax, %eax 782; WIN32-NEXT: retl 783 %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2) 784 %val = extractvalue {i8, i1} %t, 0 785 %obit = extractvalue {i8, i1} %t, 1 786 br i1 %obit, label %overflow, label %continue, !prof !0 787 788overflow: 789 ret i1 false 790 791continue: 792 ret i1 true 793} 794 795define zeroext i1 @smulobri16(i16 %v1, i16 %v2) { 796; SDAG-LABEL: smulobri16: 797; SDAG: # %bb.0: 798; SDAG-NEXT: imulw %si, %di 799; SDAG-NEXT: jo .LBB16_1 800; SDAG-NEXT: # %bb.2: # %continue 801; SDAG-NEXT: movb $1, %al 802; SDAG-NEXT: retq 803; SDAG-NEXT: .LBB16_1: # %overflow 804; SDAG-NEXT: xorl %eax, %eax 805; SDAG-NEXT: retq 806; 807; FAST-LABEL: smulobri16: 808; FAST: # %bb.0: 809; FAST-NEXT: imulw %si, %di 810; FAST-NEXT: seto %al 811; FAST-NEXT: testb $1, %al 812; FAST-NEXT: jne .LBB16_1 813; FAST-NEXT: # %bb.2: # %continue 814; FAST-NEXT: movb $1, %al 815; FAST-NEXT: andb $1, %al 816; FAST-NEXT: retq 817; FAST-NEXT: .LBB16_1: # %overflow 818; FAST-NEXT: xorl %eax, %eax 819; FAST-NEXT: andb $1, %al 820; FAST-NEXT: # kill: def $al killed $al killed $eax 821; FAST-NEXT: retq 822; 823; WIN64-LABEL: smulobri16: 824; WIN64: # %bb.0: 825; WIN64-NEXT: imulw %dx, %cx 826; WIN64-NEXT: jo .LBB16_1 827; WIN64-NEXT: # %bb.2: # %continue 828; WIN64-NEXT: movb $1, %al 829; WIN64-NEXT: retq 830; WIN64-NEXT: .LBB16_1: # %overflow 831; WIN64-NEXT: xorl %eax, %eax 832; WIN64-NEXT: retq 833; 834; WIN32-LABEL: smulobri16: 835; WIN32: # %bb.0: 836; WIN32-NEXT: movzwl {{[0-9]+}}(%esp), %eax 837; WIN32-NEXT: imulw {{[0-9]+}}(%esp), %ax 838; WIN32-NEXT: jo LBB16_1 839; WIN32-NEXT: # %bb.2: # %continue 840; WIN32-NEXT: movb $1, %al 841; WIN32-NEXT: retl 842; WIN32-NEXT: LBB16_1: # %overflow 843; WIN32-NEXT: xorl %eax, %eax 844; WIN32-NEXT: retl 845 %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %v1, i16 %v2) 846 %val = extractvalue {i16, i1} %t, 0 847 %obit = extractvalue {i16, i1} %t, 1 848 br i1 %obit, label %overflow, label %continue, !prof !0 849 850overflow: 851 ret i1 false 852 853continue: 854 ret i1 true 855} 856 857define zeroext i1 @smulobri32(i32 %v1, i32 %v2) { 858; SDAG-LABEL: smulobri32: 859; SDAG: # %bb.0: 860; SDAG-NEXT: imull %esi, %edi 861; SDAG-NEXT: jo .LBB17_1 862; SDAG-NEXT: # %bb.2: # %continue 863; SDAG-NEXT: movb $1, %al 864; SDAG-NEXT: retq 865; SDAG-NEXT: .LBB17_1: # %overflow 866; SDAG-NEXT: xorl %eax, %eax 867; SDAG-NEXT: retq 868; 869; FAST-LABEL: smulobri32: 870; FAST: # %bb.0: 871; FAST-NEXT: imull %esi, %edi 872; FAST-NEXT: jo .LBB17_1 873; FAST-NEXT: # %bb.2: # %continue 874; FAST-NEXT: movb $1, %al 875; FAST-NEXT: andb $1, %al 876; FAST-NEXT: retq 877; FAST-NEXT: .LBB17_1: # %overflow 878; FAST-NEXT: xorl %eax, %eax 879; FAST-NEXT: andb $1, %al 880; FAST-NEXT: # kill: def $al killed $al killed $eax 881; FAST-NEXT: retq 882; 883; WIN64-LABEL: smulobri32: 884; WIN64: # %bb.0: 885; WIN64-NEXT: imull %edx, %ecx 886; WIN64-NEXT: jo .LBB17_1 887; WIN64-NEXT: # %bb.2: # %continue 888; WIN64-NEXT: movb $1, %al 889; WIN64-NEXT: retq 890; WIN64-NEXT: .LBB17_1: # %overflow 891; WIN64-NEXT: xorl %eax, %eax 892; WIN64-NEXT: retq 893; 894; WIN32-LABEL: smulobri32: 895; WIN32: # %bb.0: 896; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 897; WIN32-NEXT: imull {{[0-9]+}}(%esp), %eax 898; WIN32-NEXT: jo LBB17_1 899; WIN32-NEXT: # %bb.2: # %continue 900; WIN32-NEXT: movb $1, %al 901; WIN32-NEXT: retl 902; WIN32-NEXT: LBB17_1: # %overflow 903; WIN32-NEXT: xorl %eax, %eax 904; WIN32-NEXT: retl 905 %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2) 906 %val = extractvalue {i32, i1} %t, 0 907 %obit = extractvalue {i32, i1} %t, 1 908 br i1 %obit, label %overflow, label %continue, !prof !0 909 910overflow: 911 ret i1 false 912 913continue: 914 ret i1 true 915} 916 917define zeroext i1 @smulobri64(i64 %v1, i64 %v2) { 918; SDAG-LABEL: smulobri64: 919; SDAG: # %bb.0: 920; SDAG-NEXT: imulq %rsi, %rdi 921; SDAG-NEXT: jo .LBB18_1 922; SDAG-NEXT: # %bb.2: # %continue 923; SDAG-NEXT: movb $1, %al 924; SDAG-NEXT: retq 925; SDAG-NEXT: .LBB18_1: # %overflow 926; SDAG-NEXT: xorl %eax, %eax 927; SDAG-NEXT: retq 928; 929; FAST-LABEL: smulobri64: 930; FAST: # %bb.0: 931; FAST-NEXT: imulq %rsi, %rdi 932; FAST-NEXT: jo .LBB18_1 933; FAST-NEXT: # %bb.2: # %continue 934; FAST-NEXT: movb $1, %al 935; FAST-NEXT: andb $1, %al 936; FAST-NEXT: retq 937; FAST-NEXT: .LBB18_1: # %overflow 938; FAST-NEXT: xorl %eax, %eax 939; FAST-NEXT: andb $1, %al 940; FAST-NEXT: # kill: def $al killed $al killed $eax 941; FAST-NEXT: retq 942; 943; WIN64-LABEL: smulobri64: 944; WIN64: # %bb.0: 945; WIN64-NEXT: imulq %rdx, %rcx 946; WIN64-NEXT: jo .LBB18_1 947; WIN64-NEXT: # %bb.2: # %continue 948; WIN64-NEXT: movb $1, %al 949; WIN64-NEXT: retq 950; WIN64-NEXT: .LBB18_1: # %overflow 951; WIN64-NEXT: xorl %eax, %eax 952; WIN64-NEXT: retq 953; 954; WIN32-LABEL: smulobri64: 955; WIN32: # %bb.0: 956; WIN32-NEXT: pushl %ebp 957; WIN32-NEXT: pushl %ebx 958; WIN32-NEXT: pushl %edi 959; WIN32-NEXT: pushl %esi 960; WIN32-NEXT: pushl %eax 961; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi 962; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 963; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp 964; WIN32-NEXT: movl %ebp, %ecx 965; WIN32-NEXT: sarl $31, %ecx 966; WIN32-NEXT: imull %edi, %ecx 967; WIN32-NEXT: movl %esi, %eax 968; WIN32-NEXT: mull %edi 969; WIN32-NEXT: movl %edx, %ebx 970; WIN32-NEXT: movl %ebp, %eax 971; WIN32-NEXT: mull %edi 972; WIN32-NEXT: movl %edx, %edi 973; WIN32-NEXT: movl %eax, %ebp 974; WIN32-NEXT: addl %ebx, %ebp 975; WIN32-NEXT: adcl %ecx, %edi 976; WIN32-NEXT: movl %edi, %eax 977; WIN32-NEXT: sarl $31, %eax 978; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill 979; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx 980; WIN32-NEXT: movl %edx, %ecx 981; WIN32-NEXT: sarl $31, %ecx 982; WIN32-NEXT: imull %esi, %ecx 983; WIN32-NEXT: movl %esi, %eax 984; WIN32-NEXT: mull %edx 985; WIN32-NEXT: movl %edx, %ebx 986; WIN32-NEXT: movl %eax, %esi 987; WIN32-NEXT: addl %ebp, %esi 988; WIN32-NEXT: adcl %ecx, %ebx 989; WIN32-NEXT: movl %ebx, %ebp 990; WIN32-NEXT: sarl $31, %ebp 991; WIN32-NEXT: addl %edi, %ebx 992; WIN32-NEXT: adcl (%esp), %ebp # 4-byte Folded Reload 993; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 994; WIN32-NEXT: imull {{[0-9]+}}(%esp) 995; WIN32-NEXT: addl %ebx, %eax 996; WIN32-NEXT: adcl %ebp, %edx 997; WIN32-NEXT: sarl $31, %esi 998; WIN32-NEXT: xorl %esi, %edx 999; WIN32-NEXT: xorl %eax, %esi 1000; WIN32-NEXT: orl %edx, %esi 1001; WIN32-NEXT: jne LBB18_1 1002; WIN32-NEXT: # %bb.3: # %continue 1003; WIN32-NEXT: movb $1, %al 1004; WIN32-NEXT: LBB18_2: # %overflow 1005; WIN32-NEXT: addl $4, %esp 1006; WIN32-NEXT: popl %esi 1007; WIN32-NEXT: popl %edi 1008; WIN32-NEXT: popl %ebx 1009; WIN32-NEXT: popl %ebp 1010; WIN32-NEXT: retl 1011; WIN32-NEXT: LBB18_1: # %overflow 1012; WIN32-NEXT: xorl %eax, %eax 1013; WIN32-NEXT: jmp LBB18_2 1014 %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) 1015 %val = extractvalue {i64, i1} %t, 0 1016 %obit = extractvalue {i64, i1} %t, 1 1017 br i1 %obit, label %overflow, label %continue, !prof !0 1018 1019overflow: 1020 ret i1 false 1021 1022continue: 1023 ret i1 true 1024} 1025 1026define zeroext i1 @umulobri8(i8 %v1, i8 %v2) { 1027; SDAG-LABEL: umulobri8: 1028; SDAG: # %bb.0: 1029; SDAG-NEXT: movl %edi, %eax 1030; SDAG-NEXT: # kill: def $al killed $al killed $eax 1031; SDAG-NEXT: mulb %sil 1032; SDAG-NEXT: jo .LBB19_1 1033; SDAG-NEXT: # %bb.2: # %continue 1034; SDAG-NEXT: movb $1, %al 1035; SDAG-NEXT: retq 1036; SDAG-NEXT: .LBB19_1: # %overflow 1037; SDAG-NEXT: xorl %eax, %eax 1038; SDAG-NEXT: retq 1039; 1040; FAST-LABEL: umulobri8: 1041; FAST: # %bb.0: 1042; FAST-NEXT: movl %edi, %eax 1043; FAST-NEXT: # kill: def $al killed $al killed $eax 1044; FAST-NEXT: mulb %sil 1045; FAST-NEXT: seto %al 1046; FAST-NEXT: testb $1, %al 1047; FAST-NEXT: jne .LBB19_1 1048; FAST-NEXT: # %bb.2: # %continue 1049; FAST-NEXT: movb $1, %al 1050; FAST-NEXT: andb $1, %al 1051; FAST-NEXT: retq 1052; FAST-NEXT: .LBB19_1: # %overflow 1053; FAST-NEXT: xorl %eax, %eax 1054; FAST-NEXT: andb $1, %al 1055; FAST-NEXT: # kill: def $al killed $al killed $eax 1056; FAST-NEXT: retq 1057; 1058; WIN64-LABEL: umulobri8: 1059; WIN64: # %bb.0: 1060; WIN64-NEXT: movl %ecx, %eax 1061; WIN64-NEXT: mulb %dl 1062; WIN64-NEXT: jo .LBB19_1 1063; WIN64-NEXT: # %bb.2: # %continue 1064; WIN64-NEXT: movb $1, %al 1065; WIN64-NEXT: retq 1066; WIN64-NEXT: .LBB19_1: # %overflow 1067; WIN64-NEXT: xorl %eax, %eax 1068; WIN64-NEXT: retq 1069; 1070; WIN32-LABEL: umulobri8: 1071; WIN32: # %bb.0: 1072; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1073; WIN32-NEXT: mulb {{[0-9]+}}(%esp) 1074; WIN32-NEXT: jo LBB19_1 1075; WIN32-NEXT: # %bb.2: # %continue 1076; WIN32-NEXT: movb $1, %al 1077; WIN32-NEXT: retl 1078; WIN32-NEXT: LBB19_1: # %overflow 1079; WIN32-NEXT: xorl %eax, %eax 1080; WIN32-NEXT: retl 1081 %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2) 1082 %val = extractvalue {i8, i1} %t, 0 1083 %obit = extractvalue {i8, i1} %t, 1 1084 br i1 %obit, label %overflow, label %continue, !prof !0 1085 1086overflow: 1087 ret i1 false 1088 1089continue: 1090 ret i1 true 1091} 1092 1093define zeroext i1 @umulobri16(i16 %v1, i16 %v2) { 1094; SDAG-LABEL: umulobri16: 1095; SDAG: # %bb.0: 1096; SDAG-NEXT: movl %edi, %eax 1097; SDAG-NEXT: # kill: def $ax killed $ax killed $eax 1098; SDAG-NEXT: mulw %si 1099; SDAG-NEXT: jo .LBB20_1 1100; SDAG-NEXT: # %bb.2: # %continue 1101; SDAG-NEXT: movb $1, %al 1102; SDAG-NEXT: retq 1103; SDAG-NEXT: .LBB20_1: # %overflow 1104; SDAG-NEXT: xorl %eax, %eax 1105; SDAG-NEXT: retq 1106; 1107; FAST-LABEL: umulobri16: 1108; FAST: # %bb.0: 1109; FAST-NEXT: movl %edi, %eax 1110; FAST-NEXT: # kill: def $ax killed $ax killed $eax 1111; FAST-NEXT: mulw %si 1112; FAST-NEXT: seto %al 1113; FAST-NEXT: testb $1, %al 1114; FAST-NEXT: jne .LBB20_1 1115; FAST-NEXT: # %bb.2: # %continue 1116; FAST-NEXT: movb $1, %al 1117; FAST-NEXT: andb $1, %al 1118; FAST-NEXT: retq 1119; FAST-NEXT: .LBB20_1: # %overflow 1120; FAST-NEXT: xorl %eax, %eax 1121; FAST-NEXT: andb $1, %al 1122; FAST-NEXT: # kill: def $al killed $al killed $eax 1123; FAST-NEXT: retq 1124; 1125; WIN64-LABEL: umulobri16: 1126; WIN64: # %bb.0: 1127; WIN64-NEXT: movl %ecx, %eax 1128; WIN64-NEXT: mulw %dx 1129; WIN64-NEXT: jo .LBB20_1 1130; WIN64-NEXT: # %bb.2: # %continue 1131; WIN64-NEXT: movb $1, %al 1132; WIN64-NEXT: retq 1133; WIN64-NEXT: .LBB20_1: # %overflow 1134; WIN64-NEXT: xorl %eax, %eax 1135; WIN64-NEXT: retq 1136; 1137; WIN32-LABEL: umulobri16: 1138; WIN32: # %bb.0: 1139; WIN32-NEXT: movzwl {{[0-9]+}}(%esp), %eax 1140; WIN32-NEXT: mulw {{[0-9]+}}(%esp) 1141; WIN32-NEXT: jo LBB20_1 1142; WIN32-NEXT: # %bb.2: # %continue 1143; WIN32-NEXT: movb $1, %al 1144; WIN32-NEXT: retl 1145; WIN32-NEXT: LBB20_1: # %overflow 1146; WIN32-NEXT: xorl %eax, %eax 1147; WIN32-NEXT: retl 1148 %t = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %v1, i16 %v2) 1149 %val = extractvalue {i16, i1} %t, 0 1150 %obit = extractvalue {i16, i1} %t, 1 1151 br i1 %obit, label %overflow, label %continue, !prof !0 1152 1153overflow: 1154 ret i1 false 1155 1156continue: 1157 ret i1 true 1158} 1159 1160define zeroext i1 @umulobri32(i32 %v1, i32 %v2) { 1161; SDAG-LABEL: umulobri32: 1162; SDAG: # %bb.0: 1163; SDAG-NEXT: movl %edi, %eax 1164; SDAG-NEXT: mull %esi 1165; SDAG-NEXT: jo .LBB21_1 1166; SDAG-NEXT: # %bb.2: # %continue 1167; SDAG-NEXT: movb $1, %al 1168; SDAG-NEXT: retq 1169; SDAG-NEXT: .LBB21_1: # %overflow 1170; SDAG-NEXT: xorl %eax, %eax 1171; SDAG-NEXT: retq 1172; 1173; FAST-LABEL: umulobri32: 1174; FAST: # %bb.0: 1175; FAST-NEXT: movl %edi, %eax 1176; FAST-NEXT: mull %esi 1177; FAST-NEXT: jo .LBB21_1 1178; FAST-NEXT: # %bb.2: # %continue 1179; FAST-NEXT: movb $1, %al 1180; FAST-NEXT: andb $1, %al 1181; FAST-NEXT: retq 1182; FAST-NEXT: .LBB21_1: # %overflow 1183; FAST-NEXT: xorl %eax, %eax 1184; FAST-NEXT: andb $1, %al 1185; FAST-NEXT: # kill: def $al killed $al killed $eax 1186; FAST-NEXT: retq 1187; 1188; WIN64-LABEL: umulobri32: 1189; WIN64: # %bb.0: 1190; WIN64-NEXT: movl %ecx, %eax 1191; WIN64-NEXT: mull %edx 1192; WIN64-NEXT: jo .LBB21_1 1193; WIN64-NEXT: # %bb.2: # %continue 1194; WIN64-NEXT: movb $1, %al 1195; WIN64-NEXT: retq 1196; WIN64-NEXT: .LBB21_1: # %overflow 1197; WIN64-NEXT: xorl %eax, %eax 1198; WIN64-NEXT: retq 1199; 1200; WIN32-LABEL: umulobri32: 1201; WIN32: # %bb.0: 1202; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1203; WIN32-NEXT: mull {{[0-9]+}}(%esp) 1204; WIN32-NEXT: jo LBB21_1 1205; WIN32-NEXT: # %bb.2: # %continue 1206; WIN32-NEXT: movb $1, %al 1207; WIN32-NEXT: retl 1208; WIN32-NEXT: LBB21_1: # %overflow 1209; WIN32-NEXT: xorl %eax, %eax 1210; WIN32-NEXT: retl 1211 %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2) 1212 %val = extractvalue {i32, i1} %t, 0 1213 %obit = extractvalue {i32, i1} %t, 1 1214 br i1 %obit, label %overflow, label %continue, !prof !0 1215 1216overflow: 1217 ret i1 false 1218 1219continue: 1220 ret i1 true 1221} 1222 1223define zeroext i1 @umulobri64(i64 %v1, i64 %v2) { 1224; SDAG-LABEL: umulobri64: 1225; SDAG: # %bb.0: 1226; SDAG-NEXT: movq %rdi, %rax 1227; SDAG-NEXT: mulq %rsi 1228; SDAG-NEXT: jo .LBB22_1 1229; SDAG-NEXT: # %bb.2: # %continue 1230; SDAG-NEXT: movb $1, %al 1231; SDAG-NEXT: retq 1232; SDAG-NEXT: .LBB22_1: # %overflow 1233; SDAG-NEXT: xorl %eax, %eax 1234; SDAG-NEXT: retq 1235; 1236; FAST-LABEL: umulobri64: 1237; FAST: # %bb.0: 1238; FAST-NEXT: movq %rdi, %rax 1239; FAST-NEXT: mulq %rsi 1240; FAST-NEXT: jo .LBB22_1 1241; FAST-NEXT: # %bb.2: # %continue 1242; FAST-NEXT: movb $1, %al 1243; FAST-NEXT: andb $1, %al 1244; FAST-NEXT: retq 1245; FAST-NEXT: .LBB22_1: # %overflow 1246; FAST-NEXT: xorl %eax, %eax 1247; FAST-NEXT: andb $1, %al 1248; FAST-NEXT: # kill: def $al killed $al killed $eax 1249; FAST-NEXT: retq 1250; 1251; WIN64-LABEL: umulobri64: 1252; WIN64: # %bb.0: 1253; WIN64-NEXT: movq %rcx, %rax 1254; WIN64-NEXT: mulq %rdx 1255; WIN64-NEXT: jo .LBB22_1 1256; WIN64-NEXT: # %bb.2: # %continue 1257; WIN64-NEXT: movb $1, %al 1258; WIN64-NEXT: retq 1259; WIN64-NEXT: .LBB22_1: # %overflow 1260; WIN64-NEXT: xorl %eax, %eax 1261; WIN64-NEXT: retq 1262; 1263; WIN32-LABEL: umulobri64: 1264; WIN32: # %bb.0: 1265; WIN32-NEXT: pushl %ebp 1266; WIN32-NEXT: pushl %ebx 1267; WIN32-NEXT: pushl %edi 1268; WIN32-NEXT: pushl %esi 1269; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp 1270; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1271; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 1272; WIN32-NEXT: testl %esi, %esi 1273; WIN32-NEXT: setne %dl 1274; WIN32-NEXT: testl %eax, %eax 1275; WIN32-NEXT: setne %cl 1276; WIN32-NEXT: andb %dl, %cl 1277; WIN32-NEXT: mull {{[0-9]+}}(%esp) 1278; WIN32-NEXT: movl %eax, %edi 1279; WIN32-NEXT: seto %bl 1280; WIN32-NEXT: movl %esi, %eax 1281; WIN32-NEXT: mull %ebp 1282; WIN32-NEXT: seto %ch 1283; WIN32-NEXT: orb %bl, %ch 1284; WIN32-NEXT: orb %cl, %ch 1285; WIN32-NEXT: leal (%edi,%eax), %esi 1286; WIN32-NEXT: movl %ebp, %eax 1287; WIN32-NEXT: mull {{[0-9]+}}(%esp) 1288; WIN32-NEXT: addl %esi, %edx 1289; WIN32-NEXT: setb %al 1290; WIN32-NEXT: orb %ch, %al 1291; WIN32-NEXT: subb $1, %al 1292; WIN32-NEXT: je LBB22_1 1293; WIN32-NEXT: # %bb.3: # %continue 1294; WIN32-NEXT: movb $1, %al 1295; WIN32-NEXT: LBB22_2: # %overflow 1296; WIN32-NEXT: popl %esi 1297; WIN32-NEXT: popl %edi 1298; WIN32-NEXT: popl %ebx 1299; WIN32-NEXT: popl %ebp 1300; WIN32-NEXT: retl 1301; WIN32-NEXT: LBB22_1: # %overflow 1302; WIN32-NEXT: xorl %eax, %eax 1303; WIN32-NEXT: jmp LBB22_2 1304 %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2) 1305 %val = extractvalue {i64, i1} %t, 0 1306 %obit = extractvalue {i64, i1} %t, 1 1307 br i1 %obit, label %overflow, label %continue, !prof !0 1308 1309overflow: 1310 ret i1 false 1311 1312continue: 1313 ret i1 true 1314} 1315 1316define i1 @bug27873(i64 %c1, i1 %c2) { 1317; LINUX-LABEL: bug27873: 1318; LINUX: # %bb.0: 1319; LINUX-NEXT: movq %rdi, %rax 1320; LINUX-NEXT: movl $160, %ecx 1321; LINUX-NEXT: mulq %rcx 1322; LINUX-NEXT: seto %al 1323; LINUX-NEXT: orb %sil, %al 1324; LINUX-NEXT: retq 1325; 1326; WIN64-LABEL: bug27873: 1327; WIN64: # %bb.0: 1328; WIN64-NEXT: movl %edx, %r8d 1329; WIN64-NEXT: movq %rcx, %rax 1330; WIN64-NEXT: movl $160, %ecx 1331; WIN64-NEXT: mulq %rcx 1332; WIN64-NEXT: seto %al 1333; WIN64-NEXT: orb %r8b, %al 1334; WIN64-NEXT: retq 1335; 1336; WIN32-LABEL: bug27873: 1337; WIN32: # %bb.0: 1338; WIN32-NEXT: pushl %ebx 1339; WIN32-NEXT: movl $160, %eax 1340; WIN32-NEXT: mull {{[0-9]+}}(%esp) 1341; WIN32-NEXT: movl %eax, %ecx 1342; WIN32-NEXT: seto %bl 1343; WIN32-NEXT: movl $160, %eax 1344; WIN32-NEXT: mull {{[0-9]+}}(%esp) 1345; WIN32-NEXT: addl %ecx, %edx 1346; WIN32-NEXT: setb %al 1347; WIN32-NEXT: orb %bl, %al 1348; WIN32-NEXT: orb {{[0-9]+}}(%esp), %al 1349; WIN32-NEXT: popl %ebx 1350; WIN32-NEXT: retl 1351 %mul = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %c1, i64 160) 1352 %mul.overflow = extractvalue { i64, i1 } %mul, 1 1353 %x1 = or i1 %c2, %mul.overflow 1354 ret i1 %x1 1355} 1356 1357define zeroext i1 @smuloi8_load(ptr %ptr1, i8 %v2, ptr %res) { 1358; SDAG-LABEL: smuloi8_load: 1359; SDAG: # %bb.0: 1360; SDAG-NEXT: movl %esi, %eax 1361; SDAG-NEXT: # kill: def $al killed $al killed $eax 1362; SDAG-NEXT: imulb (%rdi) 1363; SDAG-NEXT: seto %cl 1364; SDAG-NEXT: movb %al, (%rdx) 1365; SDAG-NEXT: movl %ecx, %eax 1366; SDAG-NEXT: retq 1367; 1368; FAST-LABEL: smuloi8_load: 1369; FAST: # %bb.0: 1370; FAST-NEXT: movzbl (%rdi), %eax 1371; FAST-NEXT: imulb %sil 1372; FAST-NEXT: seto %cl 1373; FAST-NEXT: movb %al, (%rdx) 1374; FAST-NEXT: andb $1, %cl 1375; FAST-NEXT: movl %ecx, %eax 1376; FAST-NEXT: retq 1377; 1378; WIN64-LABEL: smuloi8_load: 1379; WIN64: # %bb.0: 1380; WIN64-NEXT: movl %edx, %eax 1381; WIN64-NEXT: imulb (%rcx) 1382; WIN64-NEXT: seto %cl 1383; WIN64-NEXT: movb %al, (%r8) 1384; WIN64-NEXT: movl %ecx, %eax 1385; WIN64-NEXT: retq 1386; 1387; WIN32-LABEL: smuloi8_load: 1388; WIN32: # %bb.0: 1389; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx 1390; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1391; WIN32-NEXT: movzbl (%eax), %eax 1392; WIN32-NEXT: imulb {{[0-9]+}}(%esp) 1393; WIN32-NEXT: seto %cl 1394; WIN32-NEXT: movb %al, (%edx) 1395; WIN32-NEXT: movl %ecx, %eax 1396; WIN32-NEXT: retl 1397 %v1 = load i8, ptr %ptr1 1398 %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2) 1399 %val = extractvalue {i8, i1} %t, 0 1400 %obit = extractvalue {i8, i1} %t, 1 1401 store i8 %val, ptr %res 1402 ret i1 %obit 1403} 1404 1405define zeroext i1 @smuloi8_load2(i8 %v1, ptr %ptr2, ptr %res) { 1406; SDAG-LABEL: smuloi8_load2: 1407; SDAG: # %bb.0: 1408; SDAG-NEXT: movl %edi, %eax 1409; SDAG-NEXT: # kill: def $al killed $al killed $eax 1410; SDAG-NEXT: imulb (%rsi) 1411; SDAG-NEXT: seto %cl 1412; SDAG-NEXT: movb %al, (%rdx) 1413; SDAG-NEXT: movl %ecx, %eax 1414; SDAG-NEXT: retq 1415; 1416; FAST-LABEL: smuloi8_load2: 1417; FAST: # %bb.0: 1418; FAST-NEXT: movl %edi, %eax 1419; FAST-NEXT: # kill: def $al killed $al killed $eax 1420; FAST-NEXT: imulb (%rsi) 1421; FAST-NEXT: seto %cl 1422; FAST-NEXT: movb %al, (%rdx) 1423; FAST-NEXT: andb $1, %cl 1424; FAST-NEXT: movl %ecx, %eax 1425; FAST-NEXT: retq 1426; 1427; WIN64-LABEL: smuloi8_load2: 1428; WIN64: # %bb.0: 1429; WIN64-NEXT: movl %ecx, %eax 1430; WIN64-NEXT: imulb (%rdx) 1431; WIN64-NEXT: seto %cl 1432; WIN64-NEXT: movb %al, (%r8) 1433; WIN64-NEXT: movl %ecx, %eax 1434; WIN64-NEXT: retq 1435; 1436; WIN32-LABEL: smuloi8_load2: 1437; WIN32: # %bb.0: 1438; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx 1439; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1440; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1441; WIN32-NEXT: imulb (%ecx) 1442; WIN32-NEXT: seto %cl 1443; WIN32-NEXT: movb %al, (%edx) 1444; WIN32-NEXT: movl %ecx, %eax 1445; WIN32-NEXT: retl 1446 %v2 = load i8, ptr %ptr2 1447 %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2) 1448 %val = extractvalue {i8, i1} %t, 0 1449 %obit = extractvalue {i8, i1} %t, 1 1450 store i8 %val, ptr %res 1451 ret i1 %obit 1452} 1453 1454define zeroext i1 @smuloi16_load(ptr %ptr1, i16 %v2, ptr %res) { 1455; SDAG-LABEL: smuloi16_load: 1456; SDAG: # %bb.0: 1457; SDAG-NEXT: imulw (%rdi), %si 1458; SDAG-NEXT: seto %al 1459; SDAG-NEXT: movw %si, (%rdx) 1460; SDAG-NEXT: retq 1461; 1462; FAST-LABEL: smuloi16_load: 1463; FAST: # %bb.0: 1464; FAST-NEXT: imulw (%rdi), %si 1465; FAST-NEXT: seto %al 1466; FAST-NEXT: movw %si, (%rdx) 1467; FAST-NEXT: andb $1, %al 1468; FAST-NEXT: retq 1469; 1470; WIN64-LABEL: smuloi16_load: 1471; WIN64: # %bb.0: 1472; WIN64-NEXT: imulw (%rcx), %dx 1473; WIN64-NEXT: seto %al 1474; WIN64-NEXT: movw %dx, (%r8) 1475; WIN64-NEXT: retq 1476; 1477; WIN32-LABEL: smuloi16_load: 1478; WIN32: # %bb.0: 1479; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1480; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1481; WIN32-NEXT: movzwl (%eax), %edx 1482; WIN32-NEXT: imulw {{[0-9]+}}(%esp), %dx 1483; WIN32-NEXT: seto %al 1484; WIN32-NEXT: movw %dx, (%ecx) 1485; WIN32-NEXT: retl 1486 %v1 = load i16, ptr %ptr1 1487 %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %v1, i16 %v2) 1488 %val = extractvalue {i16, i1} %t, 0 1489 %obit = extractvalue {i16, i1} %t, 1 1490 store i16 %val, ptr %res 1491 ret i1 %obit 1492} 1493 1494define zeroext i1 @smuloi16_load2(i16 %v1, ptr %ptr2, ptr %res) { 1495; SDAG-LABEL: smuloi16_load2: 1496; SDAG: # %bb.0: 1497; SDAG-NEXT: imulw (%rsi), %di 1498; SDAG-NEXT: seto %al 1499; SDAG-NEXT: movw %di, (%rdx) 1500; SDAG-NEXT: retq 1501; 1502; FAST-LABEL: smuloi16_load2: 1503; FAST: # %bb.0: 1504; FAST-NEXT: imulw (%rsi), %di 1505; FAST-NEXT: seto %al 1506; FAST-NEXT: movw %di, (%rdx) 1507; FAST-NEXT: andb $1, %al 1508; FAST-NEXT: retq 1509; 1510; WIN64-LABEL: smuloi16_load2: 1511; WIN64: # %bb.0: 1512; WIN64-NEXT: imulw (%rdx), %cx 1513; WIN64-NEXT: seto %al 1514; WIN64-NEXT: movw %cx, (%r8) 1515; WIN64-NEXT: retq 1516; 1517; WIN32-LABEL: smuloi16_load2: 1518; WIN32: # %bb.0: 1519; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1520; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1521; WIN32-NEXT: movzwl {{[0-9]+}}(%esp), %edx 1522; WIN32-NEXT: imulw (%eax), %dx 1523; WIN32-NEXT: seto %al 1524; WIN32-NEXT: movw %dx, (%ecx) 1525; WIN32-NEXT: retl 1526 %v2 = load i16, ptr %ptr2 1527 %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %v1, i16 %v2) 1528 %val = extractvalue {i16, i1} %t, 0 1529 %obit = extractvalue {i16, i1} %t, 1 1530 store i16 %val, ptr %res 1531 ret i1 %obit 1532} 1533 1534define zeroext i1 @smuloi32_load(ptr %ptr1, i32 %v2, ptr %res) { 1535; SDAG-LABEL: smuloi32_load: 1536; SDAG: # %bb.0: 1537; SDAG-NEXT: imull (%rdi), %esi 1538; SDAG-NEXT: seto %al 1539; SDAG-NEXT: movl %esi, (%rdx) 1540; SDAG-NEXT: retq 1541; 1542; FAST-LABEL: smuloi32_load: 1543; FAST: # %bb.0: 1544; FAST-NEXT: imull (%rdi), %esi 1545; FAST-NEXT: seto %al 1546; FAST-NEXT: movl %esi, (%rdx) 1547; FAST-NEXT: andb $1, %al 1548; FAST-NEXT: retq 1549; 1550; WIN64-LABEL: smuloi32_load: 1551; WIN64: # %bb.0: 1552; WIN64-NEXT: imull (%rcx), %edx 1553; WIN64-NEXT: seto %al 1554; WIN64-NEXT: movl %edx, (%r8) 1555; WIN64-NEXT: retq 1556; 1557; WIN32-LABEL: smuloi32_load: 1558; WIN32: # %bb.0: 1559; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1560; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1561; WIN32-NEXT: movl (%eax), %edx 1562; WIN32-NEXT: imull {{[0-9]+}}(%esp), %edx 1563; WIN32-NEXT: seto %al 1564; WIN32-NEXT: movl %edx, (%ecx) 1565; WIN32-NEXT: retl 1566 %v1 = load i32, ptr %ptr1 1567 %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2) 1568 %val = extractvalue {i32, i1} %t, 0 1569 %obit = extractvalue {i32, i1} %t, 1 1570 store i32 %val, ptr %res 1571 ret i1 %obit 1572} 1573 1574define zeroext i1 @smuloi32_load2(i32 %v1, ptr %ptr2, ptr %res) { 1575; SDAG-LABEL: smuloi32_load2: 1576; SDAG: # %bb.0: 1577; SDAG-NEXT: imull (%rsi), %edi 1578; SDAG-NEXT: seto %al 1579; SDAG-NEXT: movl %edi, (%rdx) 1580; SDAG-NEXT: retq 1581; 1582; FAST-LABEL: smuloi32_load2: 1583; FAST: # %bb.0: 1584; FAST-NEXT: imull (%rsi), %edi 1585; FAST-NEXT: seto %al 1586; FAST-NEXT: movl %edi, (%rdx) 1587; FAST-NEXT: andb $1, %al 1588; FAST-NEXT: retq 1589; 1590; WIN64-LABEL: smuloi32_load2: 1591; WIN64: # %bb.0: 1592; WIN64-NEXT: imull (%rdx), %ecx 1593; WIN64-NEXT: seto %al 1594; WIN64-NEXT: movl %ecx, (%r8) 1595; WIN64-NEXT: retq 1596; 1597; WIN32-LABEL: smuloi32_load2: 1598; WIN32: # %bb.0: 1599; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1600; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1601; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx 1602; WIN32-NEXT: imull (%eax), %edx 1603; WIN32-NEXT: seto %al 1604; WIN32-NEXT: movl %edx, (%ecx) 1605; WIN32-NEXT: retl 1606 %v2 = load i32, ptr %ptr2 1607 %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2) 1608 %val = extractvalue {i32, i1} %t, 0 1609 %obit = extractvalue {i32, i1} %t, 1 1610 store i32 %val, ptr %res 1611 ret i1 %obit 1612} 1613 1614define zeroext i1 @smuloi64_load(ptr %ptr1, i64 %v2, ptr %res) { 1615; SDAG-LABEL: smuloi64_load: 1616; SDAG: # %bb.0: 1617; SDAG-NEXT: imulq (%rdi), %rsi 1618; SDAG-NEXT: seto %al 1619; SDAG-NEXT: movq %rsi, (%rdx) 1620; SDAG-NEXT: retq 1621; 1622; FAST-LABEL: smuloi64_load: 1623; FAST: # %bb.0: 1624; FAST-NEXT: imulq (%rdi), %rsi 1625; FAST-NEXT: seto %al 1626; FAST-NEXT: movq %rsi, (%rdx) 1627; FAST-NEXT: andb $1, %al 1628; FAST-NEXT: retq 1629; 1630; WIN64-LABEL: smuloi64_load: 1631; WIN64: # %bb.0: 1632; WIN64-NEXT: imulq (%rcx), %rdx 1633; WIN64-NEXT: seto %al 1634; WIN64-NEXT: movq %rdx, (%r8) 1635; WIN64-NEXT: retq 1636; 1637; WIN32-LABEL: smuloi64_load: 1638; WIN32: # %bb.0: 1639; WIN32-NEXT: pushl %ebp 1640; WIN32-NEXT: pushl %ebx 1641; WIN32-NEXT: pushl %edi 1642; WIN32-NEXT: pushl %esi 1643; WIN32-NEXT: subl $12, %esp 1644; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx 1645; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1646; WIN32-NEXT: movl (%eax), %ecx 1647; WIN32-NEXT: movl 4(%eax), %ebp 1648; WIN32-NEXT: movl %ebp, %esi 1649; WIN32-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1650; WIN32-NEXT: sarl $31, %esi 1651; WIN32-NEXT: imull %ebx, %esi 1652; WIN32-NEXT: movl %ecx, %eax 1653; WIN32-NEXT: mull %ebx 1654; WIN32-NEXT: movl %edx, %edi 1655; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1656; WIN32-NEXT: movl %ebp, %eax 1657; WIN32-NEXT: mull %ebx 1658; WIN32-NEXT: movl %edx, %ebx 1659; WIN32-NEXT: movl %eax, %ebp 1660; WIN32-NEXT: addl %edi, %ebp 1661; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1662; WIN32-NEXT: adcl %esi, %ebx 1663; WIN32-NEXT: movl %ebx, %edi 1664; WIN32-NEXT: sarl $31, %edi 1665; WIN32-NEXT: movl %eax, %esi 1666; WIN32-NEXT: sarl $31, %esi 1667; WIN32-NEXT: imull %ecx, %esi 1668; WIN32-NEXT: movl %ecx, %eax 1669; WIN32-NEXT: mull {{[0-9]+}}(%esp) 1670; WIN32-NEXT: movl %edx, %ecx 1671; WIN32-NEXT: addl %ebp, %eax 1672; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill 1673; WIN32-NEXT: adcl %esi, %ecx 1674; WIN32-NEXT: movl %ecx, %ebp 1675; WIN32-NEXT: sarl $31, %ebp 1676; WIN32-NEXT: addl %ebx, %ecx 1677; WIN32-NEXT: adcl %edi, %ebp 1678; WIN32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 1679; WIN32-NEXT: imull {{[0-9]+}}(%esp) 1680; WIN32-NEXT: addl %ecx, %eax 1681; WIN32-NEXT: adcl %ebp, %edx 1682; WIN32-NEXT: movl (%esp), %esi # 4-byte Reload 1683; WIN32-NEXT: movl %esi, %ecx 1684; WIN32-NEXT: sarl $31, %ecx 1685; WIN32-NEXT: xorl %ecx, %edx 1686; WIN32-NEXT: xorl %eax, %ecx 1687; WIN32-NEXT: orl %edx, %ecx 1688; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1689; WIN32-NEXT: movl %esi, 4(%eax) 1690; WIN32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1691; WIN32-NEXT: movl %ecx, (%eax) 1692; WIN32-NEXT: setne %al 1693; WIN32-NEXT: addl $12, %esp 1694; WIN32-NEXT: popl %esi 1695; WIN32-NEXT: popl %edi 1696; WIN32-NEXT: popl %ebx 1697; WIN32-NEXT: popl %ebp 1698; WIN32-NEXT: retl 1699 %v1 = load i64, ptr %ptr1 1700 %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) 1701 %val = extractvalue {i64, i1} %t, 0 1702 %obit = extractvalue {i64, i1} %t, 1 1703 store i64 %val, ptr %res 1704 ret i1 %obit 1705} 1706 1707define zeroext i1 @smuloi64_load2(i64 %v1, ptr %ptr2, ptr %res) { 1708; SDAG-LABEL: smuloi64_load2: 1709; SDAG: # %bb.0: 1710; SDAG-NEXT: imulq (%rsi), %rdi 1711; SDAG-NEXT: seto %al 1712; SDAG-NEXT: movq %rdi, (%rdx) 1713; SDAG-NEXT: retq 1714; 1715; FAST-LABEL: smuloi64_load2: 1716; FAST: # %bb.0: 1717; FAST-NEXT: imulq (%rsi), %rdi 1718; FAST-NEXT: seto %al 1719; FAST-NEXT: movq %rdi, (%rdx) 1720; FAST-NEXT: andb $1, %al 1721; FAST-NEXT: retq 1722; 1723; WIN64-LABEL: smuloi64_load2: 1724; WIN64: # %bb.0: 1725; WIN64-NEXT: imulq (%rdx), %rcx 1726; WIN64-NEXT: seto %al 1727; WIN64-NEXT: movq %rcx, (%r8) 1728; WIN64-NEXT: retq 1729; 1730; WIN32-LABEL: smuloi64_load2: 1731; WIN32: # %bb.0: 1732; WIN32-NEXT: pushl %ebp 1733; WIN32-NEXT: pushl %ebx 1734; WIN32-NEXT: pushl %edi 1735; WIN32-NEXT: pushl %esi 1736; WIN32-NEXT: subl $12, %esp 1737; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1738; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi 1739; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1740; WIN32-NEXT: movl (%ecx), %ebx 1741; WIN32-NEXT: movl %edi, %esi 1742; WIN32-NEXT: sarl $31, %esi 1743; WIN32-NEXT: imull %ebx, %esi 1744; WIN32-NEXT: mull %ebx 1745; WIN32-NEXT: movl %edx, %ecx 1746; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1747; WIN32-NEXT: movl %edi, %eax 1748; WIN32-NEXT: mull %ebx 1749; WIN32-NEXT: movl %edx, %ebx 1750; WIN32-NEXT: movl %eax, %ebp 1751; WIN32-NEXT: addl %ecx, %ebp 1752; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1753; WIN32-NEXT: movl 4(%eax), %ecx 1754; WIN32-NEXT: movl %ecx, (%esp) # 4-byte Spill 1755; WIN32-NEXT: adcl %esi, %ebx 1756; WIN32-NEXT: movl %ebx, %edi 1757; WIN32-NEXT: sarl $31, %edi 1758; WIN32-NEXT: movl %ecx, %esi 1759; WIN32-NEXT: sarl $31, %esi 1760; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1761; WIN32-NEXT: imull %eax, %esi 1762; WIN32-NEXT: mull %ecx 1763; WIN32-NEXT: movl %edx, %ecx 1764; WIN32-NEXT: addl %ebp, %eax 1765; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1766; WIN32-NEXT: adcl %esi, %ecx 1767; WIN32-NEXT: movl %ecx, %ebp 1768; WIN32-NEXT: sarl $31, %ebp 1769; WIN32-NEXT: addl %ebx, %ecx 1770; WIN32-NEXT: adcl %edi, %ebp 1771; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1772; WIN32-NEXT: imull (%esp) # 4-byte Folded Reload 1773; WIN32-NEXT: addl %ecx, %eax 1774; WIN32-NEXT: adcl %ebp, %edx 1775; WIN32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload 1776; WIN32-NEXT: movl %esi, %ecx 1777; WIN32-NEXT: sarl $31, %ecx 1778; WIN32-NEXT: xorl %ecx, %edx 1779; WIN32-NEXT: xorl %eax, %ecx 1780; WIN32-NEXT: orl %edx, %ecx 1781; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1782; WIN32-NEXT: movl %esi, 4(%eax) 1783; WIN32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1784; WIN32-NEXT: movl %ecx, (%eax) 1785; WIN32-NEXT: setne %al 1786; WIN32-NEXT: addl $12, %esp 1787; WIN32-NEXT: popl %esi 1788; WIN32-NEXT: popl %edi 1789; WIN32-NEXT: popl %ebx 1790; WIN32-NEXT: popl %ebp 1791; WIN32-NEXT: retl 1792 %v2 = load i64, ptr %ptr2 1793 %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) 1794 %val = extractvalue {i64, i1} %t, 0 1795 %obit = extractvalue {i64, i1} %t, 1 1796 store i64 %val, ptr %res 1797 ret i1 %obit 1798} 1799 1800define zeroext i1 @umuloi8_load(ptr %ptr1, i8 %v2, ptr %res) { 1801; SDAG-LABEL: umuloi8_load: 1802; SDAG: # %bb.0: 1803; SDAG-NEXT: movl %esi, %eax 1804; SDAG-NEXT: # kill: def $al killed $al killed $eax 1805; SDAG-NEXT: mulb (%rdi) 1806; SDAG-NEXT: seto %cl 1807; SDAG-NEXT: movb %al, (%rdx) 1808; SDAG-NEXT: movl %ecx, %eax 1809; SDAG-NEXT: retq 1810; 1811; FAST-LABEL: umuloi8_load: 1812; FAST: # %bb.0: 1813; FAST-NEXT: movzbl (%rdi), %eax 1814; FAST-NEXT: mulb %sil 1815; FAST-NEXT: seto %cl 1816; FAST-NEXT: movb %al, (%rdx) 1817; FAST-NEXT: andb $1, %cl 1818; FAST-NEXT: movl %ecx, %eax 1819; FAST-NEXT: retq 1820; 1821; WIN64-LABEL: umuloi8_load: 1822; WIN64: # %bb.0: 1823; WIN64-NEXT: movl %edx, %eax 1824; WIN64-NEXT: mulb (%rcx) 1825; WIN64-NEXT: seto %cl 1826; WIN64-NEXT: movb %al, (%r8) 1827; WIN64-NEXT: movl %ecx, %eax 1828; WIN64-NEXT: retq 1829; 1830; WIN32-LABEL: umuloi8_load: 1831; WIN32: # %bb.0: 1832; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx 1833; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1834; WIN32-NEXT: movzbl (%eax), %eax 1835; WIN32-NEXT: mulb {{[0-9]+}}(%esp) 1836; WIN32-NEXT: seto %cl 1837; WIN32-NEXT: movb %al, (%edx) 1838; WIN32-NEXT: movl %ecx, %eax 1839; WIN32-NEXT: retl 1840 %v1 = load i8, ptr %ptr1 1841 %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2) 1842 %val = extractvalue {i8, i1} %t, 0 1843 %obit = extractvalue {i8, i1} %t, 1 1844 store i8 %val, ptr %res 1845 ret i1 %obit 1846} 1847 1848define zeroext i1 @umuloi8_load2(i8 %v1, ptr %ptr2, ptr %res) { 1849; SDAG-LABEL: umuloi8_load2: 1850; SDAG: # %bb.0: 1851; SDAG-NEXT: movl %edi, %eax 1852; SDAG-NEXT: # kill: def $al killed $al killed $eax 1853; SDAG-NEXT: mulb (%rsi) 1854; SDAG-NEXT: seto %cl 1855; SDAG-NEXT: movb %al, (%rdx) 1856; SDAG-NEXT: movl %ecx, %eax 1857; SDAG-NEXT: retq 1858; 1859; FAST-LABEL: umuloi8_load2: 1860; FAST: # %bb.0: 1861; FAST-NEXT: movl %edi, %eax 1862; FAST-NEXT: # kill: def $al killed $al killed $eax 1863; FAST-NEXT: mulb (%rsi) 1864; FAST-NEXT: seto %cl 1865; FAST-NEXT: movb %al, (%rdx) 1866; FAST-NEXT: andb $1, %cl 1867; FAST-NEXT: movl %ecx, %eax 1868; FAST-NEXT: retq 1869; 1870; WIN64-LABEL: umuloi8_load2: 1871; WIN64: # %bb.0: 1872; WIN64-NEXT: movl %ecx, %eax 1873; WIN64-NEXT: mulb (%rdx) 1874; WIN64-NEXT: seto %cl 1875; WIN64-NEXT: movb %al, (%r8) 1876; WIN64-NEXT: movl %ecx, %eax 1877; WIN64-NEXT: retq 1878; 1879; WIN32-LABEL: umuloi8_load2: 1880; WIN32: # %bb.0: 1881; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx 1882; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1883; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1884; WIN32-NEXT: mulb (%ecx) 1885; WIN32-NEXT: seto %cl 1886; WIN32-NEXT: movb %al, (%edx) 1887; WIN32-NEXT: movl %ecx, %eax 1888; WIN32-NEXT: retl 1889 %v2 = load i8, ptr %ptr2 1890 %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2) 1891 %val = extractvalue {i8, i1} %t, 0 1892 %obit = extractvalue {i8, i1} %t, 1 1893 store i8 %val, ptr %res 1894 ret i1 %obit 1895} 1896 1897define zeroext i1 @umuloi16_load(ptr %ptr1, i16 %v2, ptr %res) { 1898; SDAG-LABEL: umuloi16_load: 1899; SDAG: # %bb.0: 1900; SDAG-NEXT: movq %rdx, %rcx 1901; SDAG-NEXT: movl %esi, %eax 1902; SDAG-NEXT: # kill: def $ax killed $ax killed $eax 1903; SDAG-NEXT: mulw (%rdi) 1904; SDAG-NEXT: seto %dl 1905; SDAG-NEXT: movw %ax, (%rcx) 1906; SDAG-NEXT: movl %edx, %eax 1907; SDAG-NEXT: retq 1908; 1909; FAST-LABEL: umuloi16_load: 1910; FAST: # %bb.0: 1911; FAST-NEXT: movq %rdx, %rcx 1912; FAST-NEXT: movzwl (%rdi), %eax 1913; FAST-NEXT: mulw %si 1914; FAST-NEXT: seto %dl 1915; FAST-NEXT: movw %ax, (%rcx) 1916; FAST-NEXT: andb $1, %dl 1917; FAST-NEXT: movl %edx, %eax 1918; FAST-NEXT: retq 1919; 1920; WIN64-LABEL: umuloi16_load: 1921; WIN64: # %bb.0: 1922; WIN64-NEXT: movl %edx, %eax 1923; WIN64-NEXT: mulw (%rcx) 1924; WIN64-NEXT: seto %cl 1925; WIN64-NEXT: movw %ax, (%r8) 1926; WIN64-NEXT: movl %ecx, %eax 1927; WIN64-NEXT: retq 1928; 1929; WIN32-LABEL: umuloi16_load: 1930; WIN32: # %bb.0: 1931; WIN32-NEXT: pushl %esi 1932; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 1933; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 1934; WIN32-NEXT: movzwl (%eax), %eax 1935; WIN32-NEXT: mulw {{[0-9]+}}(%esp) 1936; WIN32-NEXT: seto %cl 1937; WIN32-NEXT: movw %ax, (%esi) 1938; WIN32-NEXT: movl %ecx, %eax 1939; WIN32-NEXT: popl %esi 1940; WIN32-NEXT: retl 1941 %v1 = load i16, ptr %ptr1 1942 %t = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %v1, i16 %v2) 1943 %val = extractvalue {i16, i1} %t, 0 1944 %obit = extractvalue {i16, i1} %t, 1 1945 store i16 %val, ptr %res 1946 ret i1 %obit 1947} 1948 1949define zeroext i1 @umuloi16_load2(i16 %v1, ptr %ptr2, ptr %res) { 1950; SDAG-LABEL: umuloi16_load2: 1951; SDAG: # %bb.0: 1952; SDAG-NEXT: movq %rdx, %rcx 1953; SDAG-NEXT: movl %edi, %eax 1954; SDAG-NEXT: # kill: def $ax killed $ax killed $eax 1955; SDAG-NEXT: mulw (%rsi) 1956; SDAG-NEXT: seto %dl 1957; SDAG-NEXT: movw %ax, (%rcx) 1958; SDAG-NEXT: movl %edx, %eax 1959; SDAG-NEXT: retq 1960; 1961; FAST-LABEL: umuloi16_load2: 1962; FAST: # %bb.0: 1963; FAST-NEXT: movq %rdx, %rcx 1964; FAST-NEXT: movl %edi, %eax 1965; FAST-NEXT: # kill: def $ax killed $ax killed $eax 1966; FAST-NEXT: mulw (%rsi) 1967; FAST-NEXT: seto %dl 1968; FAST-NEXT: movw %ax, (%rcx) 1969; FAST-NEXT: andb $1, %dl 1970; FAST-NEXT: movl %edx, %eax 1971; FAST-NEXT: retq 1972; 1973; WIN64-LABEL: umuloi16_load2: 1974; WIN64: # %bb.0: 1975; WIN64-NEXT: movl %ecx, %eax 1976; WIN64-NEXT: mulw (%rdx) 1977; WIN64-NEXT: seto %cl 1978; WIN64-NEXT: movw %ax, (%r8) 1979; WIN64-NEXT: movl %ecx, %eax 1980; WIN64-NEXT: retq 1981; 1982; WIN32-LABEL: umuloi16_load2: 1983; WIN32: # %bb.0: 1984; WIN32-NEXT: pushl %esi 1985; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 1986; WIN32-NEXT: movzwl {{[0-9]+}}(%esp), %eax 1987; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1988; WIN32-NEXT: mulw (%ecx) 1989; WIN32-NEXT: seto %cl 1990; WIN32-NEXT: movw %ax, (%esi) 1991; WIN32-NEXT: movl %ecx, %eax 1992; WIN32-NEXT: popl %esi 1993; WIN32-NEXT: retl 1994 %v2 = load i16, ptr %ptr2 1995 %t = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %v1, i16 %v2) 1996 %val = extractvalue {i16, i1} %t, 0 1997 %obit = extractvalue {i16, i1} %t, 1 1998 store i16 %val, ptr %res 1999 ret i1 %obit 2000} 2001 2002define zeroext i1 @umuloi32_load(ptr %ptr1, i32 %v2, ptr %res) { 2003; SDAG-LABEL: umuloi32_load: 2004; SDAG: # %bb.0: 2005; SDAG-NEXT: movq %rdx, %rcx 2006; SDAG-NEXT: movl %esi, %eax 2007; SDAG-NEXT: mull (%rdi) 2008; SDAG-NEXT: seto %dl 2009; SDAG-NEXT: movl %eax, (%rcx) 2010; SDAG-NEXT: movl %edx, %eax 2011; SDAG-NEXT: retq 2012; 2013; FAST-LABEL: umuloi32_load: 2014; FAST: # %bb.0: 2015; FAST-NEXT: movq %rdx, %rcx 2016; FAST-NEXT: movl (%rdi), %eax 2017; FAST-NEXT: mull %esi 2018; FAST-NEXT: seto %dl 2019; FAST-NEXT: movl %eax, (%rcx) 2020; FAST-NEXT: andb $1, %dl 2021; FAST-NEXT: movl %edx, %eax 2022; FAST-NEXT: retq 2023; 2024; WIN64-LABEL: umuloi32_load: 2025; WIN64: # %bb.0: 2026; WIN64-NEXT: movl %edx, %eax 2027; WIN64-NEXT: mull (%rcx) 2028; WIN64-NEXT: seto %cl 2029; WIN64-NEXT: movl %eax, (%r8) 2030; WIN64-NEXT: movl %ecx, %eax 2031; WIN64-NEXT: retq 2032; 2033; WIN32-LABEL: umuloi32_load: 2034; WIN32: # %bb.0: 2035; WIN32-NEXT: pushl %esi 2036; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 2037; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 2038; WIN32-NEXT: movl (%eax), %eax 2039; WIN32-NEXT: mull {{[0-9]+}}(%esp) 2040; WIN32-NEXT: seto %cl 2041; WIN32-NEXT: movl %eax, (%esi) 2042; WIN32-NEXT: movl %ecx, %eax 2043; WIN32-NEXT: popl %esi 2044; WIN32-NEXT: retl 2045 %v1 = load i32, ptr %ptr1 2046 %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2) 2047 %val = extractvalue {i32, i1} %t, 0 2048 %obit = extractvalue {i32, i1} %t, 1 2049 store i32 %val, ptr %res 2050 ret i1 %obit 2051} 2052 2053define zeroext i1 @umuloi32_load2(i32 %v1, ptr %ptr2, ptr %res) { 2054; SDAG-LABEL: umuloi32_load2: 2055; SDAG: # %bb.0: 2056; SDAG-NEXT: movq %rdx, %rcx 2057; SDAG-NEXT: movl %edi, %eax 2058; SDAG-NEXT: mull (%rsi) 2059; SDAG-NEXT: seto %dl 2060; SDAG-NEXT: movl %eax, (%rcx) 2061; SDAG-NEXT: movl %edx, %eax 2062; SDAG-NEXT: retq 2063; 2064; FAST-LABEL: umuloi32_load2: 2065; FAST: # %bb.0: 2066; FAST-NEXT: movq %rdx, %rcx 2067; FAST-NEXT: movl %edi, %eax 2068; FAST-NEXT: mull (%rsi) 2069; FAST-NEXT: seto %dl 2070; FAST-NEXT: movl %eax, (%rcx) 2071; FAST-NEXT: andb $1, %dl 2072; FAST-NEXT: movl %edx, %eax 2073; FAST-NEXT: retq 2074; 2075; WIN64-LABEL: umuloi32_load2: 2076; WIN64: # %bb.0: 2077; WIN64-NEXT: movl %ecx, %eax 2078; WIN64-NEXT: mull (%rdx) 2079; WIN64-NEXT: seto %cl 2080; WIN64-NEXT: movl %eax, (%r8) 2081; WIN64-NEXT: movl %ecx, %eax 2082; WIN64-NEXT: retq 2083; 2084; WIN32-LABEL: umuloi32_load2: 2085; WIN32: # %bb.0: 2086; WIN32-NEXT: pushl %esi 2087; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 2088; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 2089; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 2090; WIN32-NEXT: mull (%ecx) 2091; WIN32-NEXT: seto %cl 2092; WIN32-NEXT: movl %eax, (%esi) 2093; WIN32-NEXT: movl %ecx, %eax 2094; WIN32-NEXT: popl %esi 2095; WIN32-NEXT: retl 2096 %v2 = load i32, ptr %ptr2 2097 %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2) 2098 %val = extractvalue {i32, i1} %t, 0 2099 %obit = extractvalue {i32, i1} %t, 1 2100 store i32 %val, ptr %res 2101 ret i1 %obit 2102} 2103 2104define zeroext i1 @umuloi64_load(ptr %ptr1, i64 %v2, ptr %res) { 2105; SDAG-LABEL: umuloi64_load: 2106; SDAG: # %bb.0: 2107; SDAG-NEXT: movq %rdx, %rcx 2108; SDAG-NEXT: movq %rsi, %rax 2109; SDAG-NEXT: mulq (%rdi) 2110; SDAG-NEXT: seto %dl 2111; SDAG-NEXT: movq %rax, (%rcx) 2112; SDAG-NEXT: movl %edx, %eax 2113; SDAG-NEXT: retq 2114; 2115; FAST-LABEL: umuloi64_load: 2116; FAST: # %bb.0: 2117; FAST-NEXT: movq %rdx, %rcx 2118; FAST-NEXT: movq (%rdi), %rax 2119; FAST-NEXT: mulq %rsi 2120; FAST-NEXT: seto %dl 2121; FAST-NEXT: movq %rax, (%rcx) 2122; FAST-NEXT: andb $1, %dl 2123; FAST-NEXT: movl %edx, %eax 2124; FAST-NEXT: retq 2125; 2126; WIN64-LABEL: umuloi64_load: 2127; WIN64: # %bb.0: 2128; WIN64-NEXT: movq %rdx, %rax 2129; WIN64-NEXT: mulq (%rcx) 2130; WIN64-NEXT: seto %cl 2131; WIN64-NEXT: movq %rax, (%r8) 2132; WIN64-NEXT: movl %ecx, %eax 2133; WIN64-NEXT: retq 2134; 2135; WIN32-LABEL: umuloi64_load: 2136; WIN32: # %bb.0: 2137; WIN32-NEXT: pushl %ebp 2138; WIN32-NEXT: pushl %ebx 2139; WIN32-NEXT: pushl %edi 2140; WIN32-NEXT: pushl %esi 2141; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 2142; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 2143; WIN32-NEXT: movl (%eax), %ebp 2144; WIN32-NEXT: movl 4(%eax), %eax 2145; WIN32-NEXT: testl %esi, %esi 2146; WIN32-NEXT: setne %dl 2147; WIN32-NEXT: testl %eax, %eax 2148; WIN32-NEXT: setne %cl 2149; WIN32-NEXT: andb %dl, %cl 2150; WIN32-NEXT: mull {{[0-9]+}}(%esp) 2151; WIN32-NEXT: movl %eax, %edi 2152; WIN32-NEXT: seto %bl 2153; WIN32-NEXT: movl %esi, %eax 2154; WIN32-NEXT: mull %ebp 2155; WIN32-NEXT: seto %ch 2156; WIN32-NEXT: orb %bl, %ch 2157; WIN32-NEXT: orb %cl, %ch 2158; WIN32-NEXT: leal (%edi,%eax), %esi 2159; WIN32-NEXT: movl %ebp, %eax 2160; WIN32-NEXT: mull {{[0-9]+}}(%esp) 2161; WIN32-NEXT: addl %esi, %edx 2162; WIN32-NEXT: setb %cl 2163; WIN32-NEXT: orb %ch, %cl 2164; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 2165; WIN32-NEXT: movl %eax, (%esi) 2166; WIN32-NEXT: movl %edx, 4(%esi) 2167; WIN32-NEXT: movl %ecx, %eax 2168; WIN32-NEXT: popl %esi 2169; WIN32-NEXT: popl %edi 2170; WIN32-NEXT: popl %ebx 2171; WIN32-NEXT: popl %ebp 2172; WIN32-NEXT: retl 2173 %v1 = load i64, ptr %ptr1 2174 %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2) 2175 %val = extractvalue {i64, i1} %t, 0 2176 %obit = extractvalue {i64, i1} %t, 1 2177 store i64 %val, ptr %res 2178 ret i1 %obit 2179} 2180 2181define zeroext i1 @umuloi64_load2(i64 %v1, ptr %ptr2, ptr %res) { 2182; SDAG-LABEL: umuloi64_load2: 2183; SDAG: # %bb.0: 2184; SDAG-NEXT: movq %rdx, %rcx 2185; SDAG-NEXT: movq %rdi, %rax 2186; SDAG-NEXT: mulq (%rsi) 2187; SDAG-NEXT: seto %dl 2188; SDAG-NEXT: movq %rax, (%rcx) 2189; SDAG-NEXT: movl %edx, %eax 2190; SDAG-NEXT: retq 2191; 2192; FAST-LABEL: umuloi64_load2: 2193; FAST: # %bb.0: 2194; FAST-NEXT: movq %rdx, %rcx 2195; FAST-NEXT: movq %rdi, %rax 2196; FAST-NEXT: mulq (%rsi) 2197; FAST-NEXT: seto %dl 2198; FAST-NEXT: movq %rax, (%rcx) 2199; FAST-NEXT: andb $1, %dl 2200; FAST-NEXT: movl %edx, %eax 2201; FAST-NEXT: retq 2202; 2203; WIN64-LABEL: umuloi64_load2: 2204; WIN64: # %bb.0: 2205; WIN64-NEXT: movq %rcx, %rax 2206; WIN64-NEXT: mulq (%rdx) 2207; WIN64-NEXT: seto %cl 2208; WIN64-NEXT: movq %rax, (%r8) 2209; WIN64-NEXT: movl %ecx, %eax 2210; WIN64-NEXT: retq 2211; 2212; WIN32-LABEL: umuloi64_load2: 2213; WIN32: # %bb.0: 2214; WIN32-NEXT: pushl %ebp 2215; WIN32-NEXT: pushl %ebx 2216; WIN32-NEXT: pushl %edi 2217; WIN32-NEXT: pushl %esi 2218; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 2219; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 2220; WIN32-NEXT: movl (%ecx), %ebp 2221; WIN32-NEXT: movl 4(%ecx), %esi 2222; WIN32-NEXT: testl %eax, %eax 2223; WIN32-NEXT: setne %dl 2224; WIN32-NEXT: testl %esi, %esi 2225; WIN32-NEXT: setne %cl 2226; WIN32-NEXT: andb %dl, %cl 2227; WIN32-NEXT: mull %ebp 2228; WIN32-NEXT: movl %eax, %edi 2229; WIN32-NEXT: seto %bl 2230; WIN32-NEXT: movl %esi, %eax 2231; WIN32-NEXT: mull {{[0-9]+}}(%esp) 2232; WIN32-NEXT: seto %ch 2233; WIN32-NEXT: orb %bl, %ch 2234; WIN32-NEXT: orb %cl, %ch 2235; WIN32-NEXT: leal (%edi,%eax), %esi 2236; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax 2237; WIN32-NEXT: mull %ebp 2238; WIN32-NEXT: addl %esi, %edx 2239; WIN32-NEXT: setb %cl 2240; WIN32-NEXT: orb %ch, %cl 2241; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 2242; WIN32-NEXT: movl %eax, (%esi) 2243; WIN32-NEXT: movl %edx, 4(%esi) 2244; WIN32-NEXT: movl %ecx, %eax 2245; WIN32-NEXT: popl %esi 2246; WIN32-NEXT: popl %edi 2247; WIN32-NEXT: popl %ebx 2248; WIN32-NEXT: popl %ebp 2249; WIN32-NEXT: retl 2250 %v2 = load i64, ptr %ptr2 2251 %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2) 2252 %val = extractvalue {i64, i1} %t, 0 2253 %obit = extractvalue {i64, i1} %t, 1 2254 store i64 %val, ptr %res 2255 ret i1 %obit 2256} 2257 2258declare {i8, i1} @llvm.smul.with.overflow.i8 (i8, i8 ) nounwind readnone 2259declare {i16, i1} @llvm.smul.with.overflow.i16(i16, i16) nounwind readnone 2260declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone 2261declare {i64, i1} @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone 2262declare {i8, i1} @llvm.umul.with.overflow.i8 (i8, i8 ) nounwind readnone 2263declare {i16, i1} @llvm.umul.with.overflow.i16(i16, i16) nounwind readnone 2264declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone 2265declare {i64, i1} @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone 2266 2267!0 = !{!"branch_weights", i32 0, i32 2147483647} 2268