1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 3; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86 4 5; These test cases are inspired by C++2a std::midpoint(). 6; See https://bugs.llvm.org/show_bug.cgi?id=40965 7 8; ---------------------------------------------------------------------------- ; 9; 32-bit width 10; ---------------------------------------------------------------------------- ; 11 12; Values come from regs 13 14define i32 @scalar_i32_signed_reg_reg(i32 %a1, i32 %a2) nounwind { 15; X64-LABEL: scalar_i32_signed_reg_reg: 16; X64: # %bb.0: 17; X64-NEXT: xorl %ecx, %ecx 18; X64-NEXT: movl %edi, %eax 19; X64-NEXT: subl %esi, %eax 20; X64-NEXT: setle %cl 21; X64-NEXT: leal -1(%rcx,%rcx), %ecx 22; X64-NEXT: subl %edi, %esi 23; X64-NEXT: cmovgel %esi, %eax 24; X64-NEXT: shrl %eax 25; X64-NEXT: imull %ecx, %eax 26; X64-NEXT: addl %edi, %eax 27; X64-NEXT: retq 28; 29; X86-LABEL: scalar_i32_signed_reg_reg: 30; X86: # %bb.0: 31; X86-NEXT: pushl %esi 32; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 33; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 34; X86-NEXT: xorl %eax, %eax 35; X86-NEXT: cmpl %esi, %ecx 36; X86-NEXT: setle %al 37; X86-NEXT: leal -1(%eax,%eax), %edx 38; X86-NEXT: movl %ecx, %eax 39; X86-NEXT: subl %esi, %eax 40; X86-NEXT: jg .LBB0_2 41; X86-NEXT: # %bb.1: 42; X86-NEXT: subl %ecx, %esi 43; X86-NEXT: movl %esi, %eax 44; X86-NEXT: .LBB0_2: 45; X86-NEXT: shrl %eax 46; X86-NEXT: imull %edx, %eax 47; X86-NEXT: addl %ecx, %eax 48; X86-NEXT: popl %esi 49; X86-NEXT: retl 50 %t3 = icmp sgt i32 %a1, %a2 ; signed 51 %t4 = select i1 %t3, i32 -1, i32 1 52 %t5 = select i1 %t3, i32 %a2, i32 %a1 53 %t6 = select i1 %t3, i32 %a1, i32 %a2 54 %t7 = sub i32 %t6, %t5 55 %t8 = lshr i32 %t7, 1 56 %t9 = mul nsw i32 %t8, %t4 ; signed 57 %a10 = add nsw i32 %t9, %a1 ; signed 58 ret i32 %a10 59} 60 61define i32 @scalar_i32_unsigned_reg_reg(i32 %a1, i32 %a2) nounwind { 62; X64-LABEL: scalar_i32_unsigned_reg_reg: 63; X64: # %bb.0: 64; X64-NEXT: xorl %ecx, %ecx 65; X64-NEXT: cmpl %edi, %esi 66; X64-NEXT: sbbl %ecx, %ecx 67; X64-NEXT: movl %edi, %eax 68; X64-NEXT: subl %esi, %eax 69; X64-NEXT: subl %edi, %esi 70; X64-NEXT: cmovael %esi, %eax 71; X64-NEXT: orl $1, %ecx 72; X64-NEXT: shrl %eax 73; X64-NEXT: imull %ecx, %eax 74; X64-NEXT: addl %edi, %eax 75; X64-NEXT: retq 76; 77; X86-LABEL: scalar_i32_unsigned_reg_reg: 78; X86: # %bb.0: 79; X86-NEXT: pushl %edi 80; X86-NEXT: pushl %esi 81; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 82; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 83; X86-NEXT: xorl %edx, %edx 84; X86-NEXT: movl %edi, %esi 85; X86-NEXT: subl %ecx, %esi 86; X86-NEXT: sbbl %edx, %edx 87; X86-NEXT: orl $1, %edx 88; X86-NEXT: movl %ecx, %eax 89; X86-NEXT: subl %edi, %eax 90; X86-NEXT: ja .LBB1_2 91; X86-NEXT: # %bb.1: 92; X86-NEXT: movl %esi, %eax 93; X86-NEXT: .LBB1_2: 94; X86-NEXT: shrl %eax 95; X86-NEXT: imull %edx, %eax 96; X86-NEXT: addl %ecx, %eax 97; X86-NEXT: popl %esi 98; X86-NEXT: popl %edi 99; X86-NEXT: retl 100 %t3 = icmp ugt i32 %a1, %a2 101 %t4 = select i1 %t3, i32 -1, i32 1 102 %t5 = select i1 %t3, i32 %a2, i32 %a1 103 %t6 = select i1 %t3, i32 %a1, i32 %a2 104 %t7 = sub i32 %t6, %t5 105 %t8 = lshr i32 %t7, 1 106 %t9 = mul i32 %t8, %t4 107 %a10 = add i32 %t9, %a1 108 ret i32 %a10 109} 110 111; Values are loaded. Only check signed case. 112 113define i32 @scalar_i32_signed_mem_reg(ptr %a1_addr, i32 %a2) nounwind { 114; X64-LABEL: scalar_i32_signed_mem_reg: 115; X64: # %bb.0: 116; X64-NEXT: movl (%rdi), %ecx 117; X64-NEXT: xorl %edx, %edx 118; X64-NEXT: movl %ecx, %eax 119; X64-NEXT: subl %esi, %eax 120; X64-NEXT: setle %dl 121; X64-NEXT: leal -1(%rdx,%rdx), %edx 122; X64-NEXT: subl %ecx, %esi 123; X64-NEXT: cmovgel %esi, %eax 124; X64-NEXT: shrl %eax 125; X64-NEXT: imull %edx, %eax 126; X64-NEXT: addl %ecx, %eax 127; X64-NEXT: retq 128; 129; X86-LABEL: scalar_i32_signed_mem_reg: 130; X86: # %bb.0: 131; X86-NEXT: pushl %esi 132; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 133; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 134; X86-NEXT: movl (%eax), %ecx 135; X86-NEXT: xorl %eax, %eax 136; X86-NEXT: cmpl %edx, %ecx 137; X86-NEXT: setle %al 138; X86-NEXT: leal -1(%eax,%eax), %esi 139; X86-NEXT: movl %ecx, %eax 140; X86-NEXT: subl %edx, %eax 141; X86-NEXT: jg .LBB2_2 142; X86-NEXT: # %bb.1: 143; X86-NEXT: subl %ecx, %edx 144; X86-NEXT: movl %edx, %eax 145; X86-NEXT: .LBB2_2: 146; X86-NEXT: shrl %eax 147; X86-NEXT: imull %esi, %eax 148; X86-NEXT: addl %ecx, %eax 149; X86-NEXT: popl %esi 150; X86-NEXT: retl 151 %a1 = load i32, ptr %a1_addr 152 %t3 = icmp sgt i32 %a1, %a2 ; signed 153 %t4 = select i1 %t3, i32 -1, i32 1 154 %t5 = select i1 %t3, i32 %a2, i32 %a1 155 %t6 = select i1 %t3, i32 %a1, i32 %a2 156 %t7 = sub i32 %t6, %t5 157 %t8 = lshr i32 %t7, 1 158 %t9 = mul nsw i32 %t8, %t4 ; signed 159 %a10 = add nsw i32 %t9, %a1 ; signed 160 ret i32 %a10 161} 162 163define i32 @scalar_i32_signed_reg_mem(i32 %a1, ptr %a2_addr) nounwind { 164; X64-LABEL: scalar_i32_signed_reg_mem: 165; X64: # %bb.0: 166; X64-NEXT: movl (%rsi), %eax 167; X64-NEXT: xorl %ecx, %ecx 168; X64-NEXT: movl %edi, %edx 169; X64-NEXT: subl %eax, %edx 170; X64-NEXT: setle %cl 171; X64-NEXT: leal -1(%rcx,%rcx), %ecx 172; X64-NEXT: subl %edi, %eax 173; X64-NEXT: cmovll %edx, %eax 174; X64-NEXT: shrl %eax 175; X64-NEXT: imull %ecx, %eax 176; X64-NEXT: addl %edi, %eax 177; X64-NEXT: retq 178; 179; X86-LABEL: scalar_i32_signed_reg_mem: 180; X86: # %bb.0: 181; X86-NEXT: pushl %esi 182; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 183; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 184; X86-NEXT: movl (%eax), %esi 185; X86-NEXT: xorl %eax, %eax 186; X86-NEXT: cmpl %esi, %ecx 187; X86-NEXT: setle %al 188; X86-NEXT: leal -1(%eax,%eax), %edx 189; X86-NEXT: movl %ecx, %eax 190; X86-NEXT: subl %esi, %eax 191; X86-NEXT: jg .LBB3_2 192; X86-NEXT: # %bb.1: 193; X86-NEXT: subl %ecx, %esi 194; X86-NEXT: movl %esi, %eax 195; X86-NEXT: .LBB3_2: 196; X86-NEXT: shrl %eax 197; X86-NEXT: imull %edx, %eax 198; X86-NEXT: addl %ecx, %eax 199; X86-NEXT: popl %esi 200; X86-NEXT: retl 201 %a2 = load i32, ptr %a2_addr 202 %t3 = icmp sgt i32 %a1, %a2 ; signed 203 %t4 = select i1 %t3, i32 -1, i32 1 204 %t5 = select i1 %t3, i32 %a2, i32 %a1 205 %t6 = select i1 %t3, i32 %a1, i32 %a2 206 %t7 = sub i32 %t6, %t5 207 %t8 = lshr i32 %t7, 1 208 %t9 = mul nsw i32 %t8, %t4 ; signed 209 %a10 = add nsw i32 %t9, %a1 ; signed 210 ret i32 %a10 211} 212 213define i32 @scalar_i32_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { 214; X64-LABEL: scalar_i32_signed_mem_mem: 215; X64: # %bb.0: 216; X64-NEXT: movl (%rdi), %ecx 217; X64-NEXT: movl (%rsi), %eax 218; X64-NEXT: xorl %edx, %edx 219; X64-NEXT: movl %ecx, %esi 220; X64-NEXT: subl %eax, %esi 221; X64-NEXT: setle %dl 222; X64-NEXT: leal -1(%rdx,%rdx), %edx 223; X64-NEXT: subl %ecx, %eax 224; X64-NEXT: cmovll %esi, %eax 225; X64-NEXT: shrl %eax 226; X64-NEXT: imull %edx, %eax 227; X64-NEXT: addl %ecx, %eax 228; X64-NEXT: retq 229; 230; X86-LABEL: scalar_i32_signed_mem_mem: 231; X86: # %bb.0: 232; X86-NEXT: pushl %esi 233; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 234; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 235; X86-NEXT: movl (%ecx), %ecx 236; X86-NEXT: movl (%eax), %esi 237; X86-NEXT: xorl %eax, %eax 238; X86-NEXT: cmpl %esi, %ecx 239; X86-NEXT: setle %al 240; X86-NEXT: leal -1(%eax,%eax), %edx 241; X86-NEXT: movl %ecx, %eax 242; X86-NEXT: subl %esi, %eax 243; X86-NEXT: jg .LBB4_2 244; X86-NEXT: # %bb.1: 245; X86-NEXT: subl %ecx, %esi 246; X86-NEXT: movl %esi, %eax 247; X86-NEXT: .LBB4_2: 248; X86-NEXT: shrl %eax 249; X86-NEXT: imull %edx, %eax 250; X86-NEXT: addl %ecx, %eax 251; X86-NEXT: popl %esi 252; X86-NEXT: retl 253 %a1 = load i32, ptr %a1_addr 254 %a2 = load i32, ptr %a2_addr 255 %t3 = icmp sgt i32 %a1, %a2 ; signed 256 %t4 = select i1 %t3, i32 -1, i32 1 257 %t5 = select i1 %t3, i32 %a2, i32 %a1 258 %t6 = select i1 %t3, i32 %a1, i32 %a2 259 %t7 = sub i32 %t6, %t5 260 %t8 = lshr i32 %t7, 1 261 %t9 = mul nsw i32 %t8, %t4 ; signed 262 %a10 = add nsw i32 %t9, %a1 ; signed 263 ret i32 %a10 264} 265 266; ---------------------------------------------------------------------------- ; 267; 64-bit width 268; ---------------------------------------------------------------------------- ; 269 270; Values come from regs 271 272define i64 @scalar_i64_signed_reg_reg(i64 %a1, i64 %a2) nounwind { 273; X64-LABEL: scalar_i64_signed_reg_reg: 274; X64: # %bb.0: 275; X64-NEXT: xorl %ecx, %ecx 276; X64-NEXT: movq %rdi, %rax 277; X64-NEXT: subq %rsi, %rax 278; X64-NEXT: setle %cl 279; X64-NEXT: leaq -1(%rcx,%rcx), %rcx 280; X64-NEXT: subq %rdi, %rsi 281; X64-NEXT: cmovgeq %rsi, %rax 282; X64-NEXT: shrq %rax 283; X64-NEXT: imulq %rcx, %rax 284; X64-NEXT: addq %rdi, %rax 285; X64-NEXT: retq 286; 287; X86-LABEL: scalar_i64_signed_reg_reg: 288; X86: # %bb.0: 289; X86-NEXT: pushl %ebp 290; X86-NEXT: pushl %ebx 291; X86-NEXT: pushl %edi 292; X86-NEXT: pushl %esi 293; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 294; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 295; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 296; X86-NEXT: cmpl %esi, %edx 297; X86-NEXT: movl %ecx, %eax 298; X86-NEXT: sbbl {{[0-9]+}}(%esp), %eax 299; X86-NEXT: setl %al 300; X86-NEXT: movzbl %al, %edi 301; X86-NEXT: negl %edi 302; X86-NEXT: movl %edi, %ebx 303; X86-NEXT: orl $1, %ebx 304; X86-NEXT: movl %esi, %eax 305; X86-NEXT: subl %edx, %eax 306; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp 307; X86-NEXT: sbbl %ecx, %ebp 308; X86-NEXT: subl %esi, %edx 309; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx 310; X86-NEXT: jl .LBB5_2 311; X86-NEXT: # %bb.1: 312; X86-NEXT: movl %edx, %eax 313; X86-NEXT: movl %ecx, %ebp 314; X86-NEXT: .LBB5_2: 315; X86-NEXT: shrdl $1, %ebp, %eax 316; X86-NEXT: shrl %ebp 317; X86-NEXT: imull %eax, %edi 318; X86-NEXT: mull %ebx 319; X86-NEXT: addl %edi, %edx 320; X86-NEXT: imull %ebx, %ebp 321; X86-NEXT: addl %ebp, %edx 322; X86-NEXT: addl {{[0-9]+}}(%esp), %eax 323; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx 324; X86-NEXT: popl %esi 325; X86-NEXT: popl %edi 326; X86-NEXT: popl %ebx 327; X86-NEXT: popl %ebp 328; X86-NEXT: retl 329 %t3 = icmp sgt i64 %a1, %a2 ; signed 330 %t4 = select i1 %t3, i64 -1, i64 1 331 %t5 = select i1 %t3, i64 %a2, i64 %a1 332 %t6 = select i1 %t3, i64 %a1, i64 %a2 333 %t7 = sub i64 %t6, %t5 334 %t8 = lshr i64 %t7, 1 335 %t9 = mul nsw i64 %t8, %t4 ; signed 336 %a10 = add nsw i64 %t9, %a1 ; signed 337 ret i64 %a10 338} 339 340define i64 @scalar_i64_unsigned_reg_reg(i64 %a1, i64 %a2) nounwind { 341; X64-LABEL: scalar_i64_unsigned_reg_reg: 342; X64: # %bb.0: 343; X64-NEXT: xorl %ecx, %ecx 344; X64-NEXT: cmpq %rdi, %rsi 345; X64-NEXT: sbbq %rcx, %rcx 346; X64-NEXT: movq %rdi, %rax 347; X64-NEXT: subq %rsi, %rax 348; X64-NEXT: subq %rdi, %rsi 349; X64-NEXT: cmovaeq %rsi, %rax 350; X64-NEXT: orq $1, %rcx 351; X64-NEXT: shrq %rax 352; X64-NEXT: imulq %rcx, %rax 353; X64-NEXT: addq %rdi, %rax 354; X64-NEXT: retq 355; 356; X86-LABEL: scalar_i64_unsigned_reg_reg: 357; X86: # %bb.0: 358; X86-NEXT: pushl %ebp 359; X86-NEXT: pushl %ebx 360; X86-NEXT: pushl %edi 361; X86-NEXT: pushl %esi 362; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp 363; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 364; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 365; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 366; X86-NEXT: xorl %edx, %edx 367; X86-NEXT: cmpl %eax, %ebp 368; X86-NEXT: sbbl %ecx, %esi 369; X86-NEXT: movl $0, %ebx 370; X86-NEXT: sbbl %ebx, %ebx 371; X86-NEXT: movl %ebx, %edi 372; X86-NEXT: orl $1, %edi 373; X86-NEXT: movl %eax, %esi 374; X86-NEXT: subl %ebp, %esi 375; X86-NEXT: movl %ecx, %eax 376; X86-NEXT: sbbl {{[0-9]+}}(%esp), %eax 377; X86-NEXT: sbbl %edx, %edx 378; X86-NEXT: xorl %edx, %eax 379; X86-NEXT: xorl %edx, %esi 380; X86-NEXT: subl %edx, %esi 381; X86-NEXT: sbbl %edx, %eax 382; X86-NEXT: movl %eax, %ebp 383; X86-NEXT: shldl $31, %esi, %eax 384; X86-NEXT: imull %eax, %ebx 385; X86-NEXT: mull %edi 386; X86-NEXT: addl %ebx, %edx 387; X86-NEXT: shrl %ebp 388; X86-NEXT: imull %edi, %ebp 389; X86-NEXT: addl %ebp, %edx 390; X86-NEXT: addl {{[0-9]+}}(%esp), %eax 391; X86-NEXT: adcl %ecx, %edx 392; X86-NEXT: popl %esi 393; X86-NEXT: popl %edi 394; X86-NEXT: popl %ebx 395; X86-NEXT: popl %ebp 396; X86-NEXT: retl 397 %t3 = icmp ugt i64 %a1, %a2 398 %t4 = select i1 %t3, i64 -1, i64 1 399 %t5 = select i1 %t3, i64 %a2, i64 %a1 400 %t6 = select i1 %t3, i64 %a1, i64 %a2 401 %t7 = sub i64 %t6, %t5 402 %t8 = lshr i64 %t7, 1 403 %t9 = mul i64 %t8, %t4 404 %a10 = add i64 %t9, %a1 405 ret i64 %a10 406} 407 408; Values are loaded. Only check signed case. 409 410define i64 @scalar_i64_signed_mem_reg(ptr %a1_addr, i64 %a2) nounwind { 411; X64-LABEL: scalar_i64_signed_mem_reg: 412; X64: # %bb.0: 413; X64-NEXT: movq (%rdi), %rcx 414; X64-NEXT: xorl %edx, %edx 415; X64-NEXT: movq %rcx, %rax 416; X64-NEXT: subq %rsi, %rax 417; X64-NEXT: setle %dl 418; X64-NEXT: leaq -1(%rdx,%rdx), %rdx 419; X64-NEXT: subq %rcx, %rsi 420; X64-NEXT: cmovgeq %rsi, %rax 421; X64-NEXT: shrq %rax 422; X64-NEXT: imulq %rdx, %rax 423; X64-NEXT: addq %rcx, %rax 424; X64-NEXT: retq 425; 426; X86-LABEL: scalar_i64_signed_mem_reg: 427; X86: # %bb.0: 428; X86-NEXT: pushl %ebp 429; X86-NEXT: pushl %ebx 430; X86-NEXT: pushl %edi 431; X86-NEXT: pushl %esi 432; X86-NEXT: subl $12, %esp 433; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 434; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 435; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 436; X86-NEXT: movl (%eax), %ebx 437; X86-NEXT: movl 4(%eax), %esi 438; X86-NEXT: cmpl %ebx, %edx 439; X86-NEXT: movl %ecx, %eax 440; X86-NEXT: sbbl %esi, %eax 441; X86-NEXT: setl %al 442; X86-NEXT: movzbl %al, %edi 443; X86-NEXT: negl %edi 444; X86-NEXT: movl %edi, %eax 445; X86-NEXT: orl $1, %eax 446; X86-NEXT: movl %eax, (%esp) # 4-byte Spill 447; X86-NEXT: movl %ebx, %eax 448; X86-NEXT: subl %edx, %eax 449; X86-NEXT: movl %esi, %ebp 450; X86-NEXT: sbbl %ecx, %ebp 451; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 452; X86-NEXT: subl %ebx, %edx 453; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 454; X86-NEXT: sbbl %esi, %ecx 455; X86-NEXT: jl .LBB7_2 456; X86-NEXT: # %bb.1: 457; X86-NEXT: movl %edx, %eax 458; X86-NEXT: movl %ecx, %ebp 459; X86-NEXT: .LBB7_2: 460; X86-NEXT: shrdl $1, %ebp, %eax 461; X86-NEXT: shrl %ebp 462; X86-NEXT: imull %eax, %edi 463; X86-NEXT: movl (%esp), %ecx # 4-byte Reload 464; X86-NEXT: mull %ecx 465; X86-NEXT: addl %edi, %edx 466; X86-NEXT: imull %ecx, %ebp 467; X86-NEXT: addl %ebp, %edx 468; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload 469; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload 470; X86-NEXT: addl $12, %esp 471; X86-NEXT: popl %esi 472; X86-NEXT: popl %edi 473; X86-NEXT: popl %ebx 474; X86-NEXT: popl %ebp 475; X86-NEXT: retl 476 %a1 = load i64, ptr %a1_addr 477 %t3 = icmp sgt i64 %a1, %a2 ; signed 478 %t4 = select i1 %t3, i64 -1, i64 1 479 %t5 = select i1 %t3, i64 %a2, i64 %a1 480 %t6 = select i1 %t3, i64 %a1, i64 %a2 481 %t7 = sub i64 %t6, %t5 482 %t8 = lshr i64 %t7, 1 483 %t9 = mul nsw i64 %t8, %t4 ; signed 484 %a10 = add nsw i64 %t9, %a1 ; signed 485 ret i64 %a10 486} 487 488define i64 @scalar_i64_signed_reg_mem(i64 %a1, ptr %a2_addr) nounwind { 489; X64-LABEL: scalar_i64_signed_reg_mem: 490; X64: # %bb.0: 491; X64-NEXT: movq (%rsi), %rax 492; X64-NEXT: xorl %ecx, %ecx 493; X64-NEXT: movq %rdi, %rdx 494; X64-NEXT: subq %rax, %rdx 495; X64-NEXT: setle %cl 496; X64-NEXT: leaq -1(%rcx,%rcx), %rcx 497; X64-NEXT: subq %rdi, %rax 498; X64-NEXT: cmovlq %rdx, %rax 499; X64-NEXT: shrq %rax 500; X64-NEXT: imulq %rcx, %rax 501; X64-NEXT: addq %rdi, %rax 502; X64-NEXT: retq 503; 504; X86-LABEL: scalar_i64_signed_reg_mem: 505; X86: # %bb.0: 506; X86-NEXT: pushl %ebp 507; X86-NEXT: pushl %ebx 508; X86-NEXT: pushl %edi 509; X86-NEXT: pushl %esi 510; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 511; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 512; X86-NEXT: movl (%eax), %edx 513; X86-NEXT: movl 4(%eax), %ecx 514; X86-NEXT: cmpl %esi, %edx 515; X86-NEXT: movl %ecx, %eax 516; X86-NEXT: sbbl {{[0-9]+}}(%esp), %eax 517; X86-NEXT: setl %al 518; X86-NEXT: movzbl %al, %edi 519; X86-NEXT: negl %edi 520; X86-NEXT: movl %edi, %ebx 521; X86-NEXT: orl $1, %ebx 522; X86-NEXT: movl %esi, %eax 523; X86-NEXT: subl %edx, %eax 524; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp 525; X86-NEXT: sbbl %ecx, %ebp 526; X86-NEXT: subl %esi, %edx 527; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx 528; X86-NEXT: jl .LBB8_2 529; X86-NEXT: # %bb.1: 530; X86-NEXT: movl %edx, %eax 531; X86-NEXT: movl %ecx, %ebp 532; X86-NEXT: .LBB8_2: 533; X86-NEXT: shrdl $1, %ebp, %eax 534; X86-NEXT: shrl %ebp 535; X86-NEXT: imull %eax, %edi 536; X86-NEXT: mull %ebx 537; X86-NEXT: addl %edi, %edx 538; X86-NEXT: imull %ebx, %ebp 539; X86-NEXT: addl %ebp, %edx 540; X86-NEXT: addl {{[0-9]+}}(%esp), %eax 541; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx 542; X86-NEXT: popl %esi 543; X86-NEXT: popl %edi 544; X86-NEXT: popl %ebx 545; X86-NEXT: popl %ebp 546; X86-NEXT: retl 547 %a2 = load i64, ptr %a2_addr 548 %t3 = icmp sgt i64 %a1, %a2 ; signed 549 %t4 = select i1 %t3, i64 -1, i64 1 550 %t5 = select i1 %t3, i64 %a2, i64 %a1 551 %t6 = select i1 %t3, i64 %a1, i64 %a2 552 %t7 = sub i64 %t6, %t5 553 %t8 = lshr i64 %t7, 1 554 %t9 = mul nsw i64 %t8, %t4 ; signed 555 %a10 = add nsw i64 %t9, %a1 ; signed 556 ret i64 %a10 557} 558 559define i64 @scalar_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { 560; X64-LABEL: scalar_i64_signed_mem_mem: 561; X64: # %bb.0: 562; X64-NEXT: movq (%rdi), %rcx 563; X64-NEXT: movq (%rsi), %rax 564; X64-NEXT: xorl %edx, %edx 565; X64-NEXT: movq %rcx, %rsi 566; X64-NEXT: subq %rax, %rsi 567; X64-NEXT: setle %dl 568; X64-NEXT: leaq -1(%rdx,%rdx), %rdx 569; X64-NEXT: subq %rcx, %rax 570; X64-NEXT: cmovlq %rsi, %rax 571; X64-NEXT: shrq %rax 572; X64-NEXT: imulq %rdx, %rax 573; X64-NEXT: addq %rcx, %rax 574; X64-NEXT: retq 575; 576; X86-LABEL: scalar_i64_signed_mem_mem: 577; X86: # %bb.0: 578; X86-NEXT: pushl %ebp 579; X86-NEXT: pushl %ebx 580; X86-NEXT: pushl %edi 581; X86-NEXT: pushl %esi 582; X86-NEXT: subl $12, %esp 583; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 584; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 585; X86-NEXT: movl (%ecx), %ebx 586; X86-NEXT: movl 4(%ecx), %esi 587; X86-NEXT: movl (%eax), %edx 588; X86-NEXT: movl 4(%eax), %ecx 589; X86-NEXT: cmpl %ebx, %edx 590; X86-NEXT: movl %ecx, %eax 591; X86-NEXT: sbbl %esi, %eax 592; X86-NEXT: setl %al 593; X86-NEXT: movzbl %al, %edi 594; X86-NEXT: negl %edi 595; X86-NEXT: movl %edi, %eax 596; X86-NEXT: orl $1, %eax 597; X86-NEXT: movl %eax, (%esp) # 4-byte Spill 598; X86-NEXT: movl %ebx, %eax 599; X86-NEXT: subl %edx, %eax 600; X86-NEXT: movl %esi, %ebp 601; X86-NEXT: sbbl %ecx, %ebp 602; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 603; X86-NEXT: subl %ebx, %edx 604; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 605; X86-NEXT: sbbl %esi, %ecx 606; X86-NEXT: jl .LBB9_2 607; X86-NEXT: # %bb.1: 608; X86-NEXT: movl %edx, %eax 609; X86-NEXT: movl %ecx, %ebp 610; X86-NEXT: .LBB9_2: 611; X86-NEXT: shrdl $1, %ebp, %eax 612; X86-NEXT: shrl %ebp 613; X86-NEXT: imull %eax, %edi 614; X86-NEXT: movl (%esp), %ecx # 4-byte Reload 615; X86-NEXT: mull %ecx 616; X86-NEXT: addl %edi, %edx 617; X86-NEXT: imull %ecx, %ebp 618; X86-NEXT: addl %ebp, %edx 619; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload 620; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload 621; X86-NEXT: addl $12, %esp 622; X86-NEXT: popl %esi 623; X86-NEXT: popl %edi 624; X86-NEXT: popl %ebx 625; X86-NEXT: popl %ebp 626; X86-NEXT: retl 627 %a1 = load i64, ptr %a1_addr 628 %a2 = load i64, ptr %a2_addr 629 %t3 = icmp sgt i64 %a1, %a2 ; signed 630 %t4 = select i1 %t3, i64 -1, i64 1 631 %t5 = select i1 %t3, i64 %a2, i64 %a1 632 %t6 = select i1 %t3, i64 %a1, i64 %a2 633 %t7 = sub i64 %t6, %t5 634 %t8 = lshr i64 %t7, 1 635 %t9 = mul nsw i64 %t8, %t4 ; signed 636 %a10 = add nsw i64 %t9, %a1 ; signed 637 ret i64 %a10 638} 639 640; ---------------------------------------------------------------------------- ; 641; 16-bit width 642; ---------------------------------------------------------------------------- ; 643 644; Values come from regs 645 646define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind { 647; X64-LABEL: scalar_i16_signed_reg_reg: 648; X64: # %bb.0: 649; X64-NEXT: xorl %eax, %eax 650; X64-NEXT: cmpw %si, %di 651; X64-NEXT: setle %al 652; X64-NEXT: leal -1(%rax,%rax), %ecx 653; X64-NEXT: movl %edi, %eax 654; X64-NEXT: subl %esi, %eax 655; X64-NEXT: movswl %di, %edx 656; X64-NEXT: movswl %si, %esi 657; X64-NEXT: subl %edx, %esi 658; X64-NEXT: cmovll %eax, %esi 659; X64-NEXT: movzwl %si, %eax 660; X64-NEXT: shrl %eax 661; X64-NEXT: imull %ecx, %eax 662; X64-NEXT: addl %edi, %eax 663; X64-NEXT: # kill: def $ax killed $ax killed $eax 664; X64-NEXT: retq 665; 666; X86-LABEL: scalar_i16_signed_reg_reg: 667; X86: # %bb.0: 668; X86-NEXT: pushl %ebx 669; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx 670; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 671; X86-NEXT: movl %ecx, %eax 672; X86-NEXT: subw %dx, %ax 673; X86-NEXT: jg .LBB10_2 674; X86-NEXT: # %bb.1: 675; X86-NEXT: negl %eax 676; X86-NEXT: .LBB10_2: 677; X86-NEXT: xorl %ebx, %ebx 678; X86-NEXT: cmpw %dx, %cx 679; X86-NEXT: setle %bl 680; X86-NEXT: leal -1(%ebx,%ebx), %edx 681; X86-NEXT: movzwl %ax, %eax 682; X86-NEXT: shrl %eax 683; X86-NEXT: imull %edx, %eax 684; X86-NEXT: addl %ecx, %eax 685; X86-NEXT: # kill: def $ax killed $ax killed $eax 686; X86-NEXT: popl %ebx 687; X86-NEXT: retl 688 %t3 = icmp sgt i16 %a1, %a2 ; signed 689 %t4 = select i1 %t3, i16 -1, i16 1 690 %t5 = select i1 %t3, i16 %a2, i16 %a1 691 %t6 = select i1 %t3, i16 %a1, i16 %a2 692 %t7 = sub i16 %t6, %t5 693 %t8 = lshr i16 %t7, 1 694 %t9 = mul nsw i16 %t8, %t4 ; signed 695 %a10 = add nsw i16 %t9, %a1 ; signed 696 ret i16 %a10 697} 698 699define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind { 700; X64-LABEL: scalar_i16_unsigned_reg_reg: 701; X64: # %bb.0: 702; X64-NEXT: xorl %ecx, %ecx 703; X64-NEXT: cmpw %di, %si 704; X64-NEXT: sbbl %ecx, %ecx 705; X64-NEXT: orl $1, %ecx 706; X64-NEXT: movl %edi, %eax 707; X64-NEXT: subl %esi, %eax 708; X64-NEXT: movzwl %di, %edx 709; X64-NEXT: movzwl %si, %esi 710; X64-NEXT: subl %edx, %esi 711; X64-NEXT: cmovbl %eax, %esi 712; X64-NEXT: movzwl %si, %eax 713; X64-NEXT: shrl %eax 714; X64-NEXT: imull %ecx, %eax 715; X64-NEXT: addl %edi, %eax 716; X64-NEXT: # kill: def $ax killed $ax killed $eax 717; X64-NEXT: retq 718; 719; X86-LABEL: scalar_i16_unsigned_reg_reg: 720; X86: # %bb.0: 721; X86-NEXT: pushl %esi 722; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx 723; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 724; X86-NEXT: movl %ecx, %eax 725; X86-NEXT: subw %dx, %ax 726; X86-NEXT: ja .LBB11_2 727; X86-NEXT: # %bb.1: 728; X86-NEXT: negl %eax 729; X86-NEXT: .LBB11_2: 730; X86-NEXT: xorl %esi, %esi 731; X86-NEXT: cmpw %cx, %dx 732; X86-NEXT: sbbl %esi, %esi 733; X86-NEXT: orl $1, %esi 734; X86-NEXT: movzwl %ax, %eax 735; X86-NEXT: shrl %eax 736; X86-NEXT: imull %esi, %eax 737; X86-NEXT: addl %ecx, %eax 738; X86-NEXT: # kill: def $ax killed $ax killed $eax 739; X86-NEXT: popl %esi 740; X86-NEXT: retl 741 %t3 = icmp ugt i16 %a1, %a2 742 %t4 = select i1 %t3, i16 -1, i16 1 743 %t5 = select i1 %t3, i16 %a2, i16 %a1 744 %t6 = select i1 %t3, i16 %a1, i16 %a2 745 %t7 = sub i16 %t6, %t5 746 %t8 = lshr i16 %t7, 1 747 %t9 = mul i16 %t8, %t4 748 %a10 = add i16 %t9, %a1 749 ret i16 %a10 750} 751 752; Values are loaded. Only check signed case. 753 754define i16 @scalar_i16_signed_mem_reg(ptr %a1_addr, i16 %a2) nounwind { 755; X64-LABEL: scalar_i16_signed_mem_reg: 756; X64: # %bb.0: 757; X64-NEXT: movswl (%rdi), %ecx 758; X64-NEXT: xorl %eax, %eax 759; X64-NEXT: cmpw %si, %cx 760; X64-NEXT: setle %al 761; X64-NEXT: leal -1(%rax,%rax), %edx 762; X64-NEXT: movl %ecx, %eax 763; X64-NEXT: subl %esi, %eax 764; X64-NEXT: movswl %si, %esi 765; X64-NEXT: subl %ecx, %esi 766; X64-NEXT: cmovll %eax, %esi 767; X64-NEXT: movzwl %si, %eax 768; X64-NEXT: shrl %eax 769; X64-NEXT: imull %edx, %eax 770; X64-NEXT: addl %ecx, %eax 771; X64-NEXT: # kill: def $ax killed $ax killed $eax 772; X64-NEXT: retq 773; 774; X86-LABEL: scalar_i16_signed_mem_reg: 775; X86: # %bb.0: 776; X86-NEXT: pushl %ebx 777; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx 778; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 779; X86-NEXT: movzwl (%eax), %ecx 780; X86-NEXT: movl %ecx, %eax 781; X86-NEXT: subw %dx, %ax 782; X86-NEXT: jg .LBB12_2 783; X86-NEXT: # %bb.1: 784; X86-NEXT: negl %eax 785; X86-NEXT: .LBB12_2: 786; X86-NEXT: xorl %ebx, %ebx 787; X86-NEXT: cmpw %dx, %cx 788; X86-NEXT: setle %bl 789; X86-NEXT: leal -1(%ebx,%ebx), %edx 790; X86-NEXT: movzwl %ax, %eax 791; X86-NEXT: shrl %eax 792; X86-NEXT: imull %edx, %eax 793; X86-NEXT: addl %ecx, %eax 794; X86-NEXT: # kill: def $ax killed $ax killed $eax 795; X86-NEXT: popl %ebx 796; X86-NEXT: retl 797 %a1 = load i16, ptr %a1_addr 798 %t3 = icmp sgt i16 %a1, %a2 ; signed 799 %t4 = select i1 %t3, i16 -1, i16 1 800 %t5 = select i1 %t3, i16 %a2, i16 %a1 801 %t6 = select i1 %t3, i16 %a1, i16 %a2 802 %t7 = sub i16 %t6, %t5 803 %t8 = lshr i16 %t7, 1 804 %t9 = mul nsw i16 %t8, %t4 ; signed 805 %a10 = add nsw i16 %t9, %a1 ; signed 806 ret i16 %a10 807} 808 809define i16 @scalar_i16_signed_reg_mem(i16 %a1, ptr %a2_addr) nounwind { 810; X64-LABEL: scalar_i16_signed_reg_mem: 811; X64: # %bb.0: 812; X64-NEXT: movswl (%rsi), %eax 813; X64-NEXT: xorl %ecx, %ecx 814; X64-NEXT: cmpw %ax, %di 815; X64-NEXT: setle %cl 816; X64-NEXT: leal -1(%rcx,%rcx), %ecx 817; X64-NEXT: movl %edi, %edx 818; X64-NEXT: subl %eax, %edx 819; X64-NEXT: movswl %di, %esi 820; X64-NEXT: subl %esi, %eax 821; X64-NEXT: cmovll %edx, %eax 822; X64-NEXT: movzwl %ax, %eax 823; X64-NEXT: shrl %eax 824; X64-NEXT: imull %ecx, %eax 825; X64-NEXT: addl %edi, %eax 826; X64-NEXT: # kill: def $ax killed $ax killed $eax 827; X64-NEXT: retq 828; 829; X86-LABEL: scalar_i16_signed_reg_mem: 830; X86: # %bb.0: 831; X86-NEXT: pushl %ebx 832; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 833; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 834; X86-NEXT: movzwl (%eax), %edx 835; X86-NEXT: movl %ecx, %eax 836; X86-NEXT: subw %dx, %ax 837; X86-NEXT: jg .LBB13_2 838; X86-NEXT: # %bb.1: 839; X86-NEXT: negl %eax 840; X86-NEXT: .LBB13_2: 841; X86-NEXT: xorl %ebx, %ebx 842; X86-NEXT: cmpw %dx, %cx 843; X86-NEXT: setle %bl 844; X86-NEXT: leal -1(%ebx,%ebx), %edx 845; X86-NEXT: movzwl %ax, %eax 846; X86-NEXT: shrl %eax 847; X86-NEXT: imull %edx, %eax 848; X86-NEXT: addl %ecx, %eax 849; X86-NEXT: # kill: def $ax killed $ax killed $eax 850; X86-NEXT: popl %ebx 851; X86-NEXT: retl 852 %a2 = load i16, ptr %a2_addr 853 %t3 = icmp sgt i16 %a1, %a2 ; signed 854 %t4 = select i1 %t3, i16 -1, i16 1 855 %t5 = select i1 %t3, i16 %a2, i16 %a1 856 %t6 = select i1 %t3, i16 %a1, i16 %a2 857 %t7 = sub i16 %t6, %t5 858 %t8 = lshr i16 %t7, 1 859 %t9 = mul nsw i16 %t8, %t4 ; signed 860 %a10 = add nsw i16 %t9, %a1 ; signed 861 ret i16 %a10 862} 863 864define i16 @scalar_i16_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { 865; X64-LABEL: scalar_i16_signed_mem_mem: 866; X64: # %bb.0: 867; X64-NEXT: movswl (%rdi), %ecx 868; X64-NEXT: movswl (%rsi), %eax 869; X64-NEXT: xorl %edx, %edx 870; X64-NEXT: cmpw %ax, %cx 871; X64-NEXT: setle %dl 872; X64-NEXT: leal -1(%rdx,%rdx), %edx 873; X64-NEXT: movl %ecx, %esi 874; X64-NEXT: subl %eax, %esi 875; X64-NEXT: subl %ecx, %eax 876; X64-NEXT: cmovll %esi, %eax 877; X64-NEXT: movzwl %ax, %eax 878; X64-NEXT: shrl %eax 879; X64-NEXT: imull %edx, %eax 880; X64-NEXT: addl %ecx, %eax 881; X64-NEXT: # kill: def $ax killed $ax killed $eax 882; X64-NEXT: retq 883; 884; X86-LABEL: scalar_i16_signed_mem_mem: 885; X86: # %bb.0: 886; X86-NEXT: pushl %ebx 887; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 888; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 889; X86-NEXT: movzwl (%ecx), %ecx 890; X86-NEXT: movzwl (%eax), %edx 891; X86-NEXT: movl %ecx, %eax 892; X86-NEXT: subw %dx, %ax 893; X86-NEXT: jg .LBB14_2 894; X86-NEXT: # %bb.1: 895; X86-NEXT: negl %eax 896; X86-NEXT: .LBB14_2: 897; X86-NEXT: xorl %ebx, %ebx 898; X86-NEXT: cmpw %dx, %cx 899; X86-NEXT: setle %bl 900; X86-NEXT: leal -1(%ebx,%ebx), %edx 901; X86-NEXT: movzwl %ax, %eax 902; X86-NEXT: shrl %eax 903; X86-NEXT: imull %edx, %eax 904; X86-NEXT: addl %ecx, %eax 905; X86-NEXT: # kill: def $ax killed $ax killed $eax 906; X86-NEXT: popl %ebx 907; X86-NEXT: retl 908 %a1 = load i16, ptr %a1_addr 909 %a2 = load i16, ptr %a2_addr 910 %t3 = icmp sgt i16 %a1, %a2 ; signed 911 %t4 = select i1 %t3, i16 -1, i16 1 912 %t5 = select i1 %t3, i16 %a2, i16 %a1 913 %t6 = select i1 %t3, i16 %a1, i16 %a2 914 %t7 = sub i16 %t6, %t5 915 %t8 = lshr i16 %t7, 1 916 %t9 = mul nsw i16 %t8, %t4 ; signed 917 %a10 = add nsw i16 %t9, %a1 ; signed 918 ret i16 %a10 919} 920 921; ---------------------------------------------------------------------------- ; 922; 8-bit width 923; ---------------------------------------------------------------------------- ; 924 925; Values come from regs 926 927define i8 @scalar_i8_signed_reg_reg(i8 %a1, i8 %a2) nounwind { 928; X64-LABEL: scalar_i8_signed_reg_reg: 929; X64: # %bb.0: 930; X64-NEXT: cmpb %sil, %dil 931; X64-NEXT: setg %cl 932; X64-NEXT: negb %cl 933; X64-NEXT: orb $1, %cl 934; X64-NEXT: movsbl %dil, %edx 935; X64-NEXT: subl %esi, %edi 936; X64-NEXT: movsbl %sil, %eax 937; X64-NEXT: subl %edx, %eax 938; X64-NEXT: cmovll %edi, %eax 939; X64-NEXT: shrb %al 940; X64-NEXT: # kill: def $al killed $al killed $eax 941; X64-NEXT: mulb %cl 942; X64-NEXT: addb %dl, %al 943; X64-NEXT: retq 944; 945; X86-LABEL: scalar_i8_signed_reg_reg: 946; X86: # %bb.0: 947; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 948; X86-NEXT: movb {{[0-9]+}}(%esp), %ah 949; X86-NEXT: cmpb %ah, %cl 950; X86-NEXT: setg %dl 951; X86-NEXT: negb %dl 952; X86-NEXT: orb $1, %dl 953; X86-NEXT: movb %cl, %al 954; X86-NEXT: subb %ah, %al 955; X86-NEXT: jg .LBB15_2 956; X86-NEXT: # %bb.1: 957; X86-NEXT: subb %cl, %ah 958; X86-NEXT: movb %ah, %al 959; X86-NEXT: .LBB15_2: 960; X86-NEXT: shrb %al 961; X86-NEXT: mulb %dl 962; X86-NEXT: addb %cl, %al 963; X86-NEXT: retl 964 %t3 = icmp sgt i8 %a1, %a2 ; signed 965 %t4 = select i1 %t3, i8 -1, i8 1 966 %t5 = select i1 %t3, i8 %a2, i8 %a1 967 %t6 = select i1 %t3, i8 %a1, i8 %a2 968 %t7 = sub i8 %t6, %t5 969 %t8 = lshr i8 %t7, 1 970 %t9 = mul nsw i8 %t8, %t4 ; signed 971 %a10 = add nsw i8 %t9, %a1 ; signed 972 ret i8 %a10 973} 974 975define i8 @scalar_i8_unsigned_reg_reg(i8 %a1, i8 %a2) nounwind { 976; X64-LABEL: scalar_i8_unsigned_reg_reg: 977; X64: # %bb.0: 978; X64-NEXT: xorl %ecx, %ecx 979; X64-NEXT: cmpb %dil, %sil 980; X64-NEXT: sbbl %ecx, %ecx 981; X64-NEXT: orb $1, %cl 982; X64-NEXT: movzbl %dil, %edx 983; X64-NEXT: subl %esi, %edi 984; X64-NEXT: movzbl %sil, %eax 985; X64-NEXT: subl %edx, %eax 986; X64-NEXT: cmovbl %edi, %eax 987; X64-NEXT: shrb %al 988; X64-NEXT: # kill: def $al killed $al killed $eax 989; X64-NEXT: mulb %cl 990; X64-NEXT: addb %dl, %al 991; X64-NEXT: retq 992; 993; X86-LABEL: scalar_i8_unsigned_reg_reg: 994; X86: # %bb.0: 995; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 996; X86-NEXT: movb {{[0-9]+}}(%esp), %ch 997; X86-NEXT: xorl %edx, %edx 998; X86-NEXT: movb %ch, %ah 999; X86-NEXT: subb %cl, %ah 1000; X86-NEXT: sbbl %edx, %edx 1001; X86-NEXT: orb $1, %dl 1002; X86-NEXT: movb %cl, %al 1003; X86-NEXT: subb %ch, %al 1004; X86-NEXT: ja .LBB16_2 1005; X86-NEXT: # %bb.1: 1006; X86-NEXT: movb %ah, %al 1007; X86-NEXT: .LBB16_2: 1008; X86-NEXT: shrb %al 1009; X86-NEXT: mulb %dl 1010; X86-NEXT: addb %cl, %al 1011; X86-NEXT: retl 1012 %t3 = icmp ugt i8 %a1, %a2 1013 %t4 = select i1 %t3, i8 -1, i8 1 1014 %t5 = select i1 %t3, i8 %a2, i8 %a1 1015 %t6 = select i1 %t3, i8 %a1, i8 %a2 1016 %t7 = sub i8 %t6, %t5 1017 %t8 = lshr i8 %t7, 1 1018 %t9 = mul i8 %t8, %t4 1019 %a10 = add i8 %t9, %a1 1020 ret i8 %a10 1021} 1022 1023; Values are loaded. Only check signed case. 1024 1025define i8 @scalar_i8_signed_mem_reg(ptr %a1_addr, i8 %a2) nounwind { 1026; X64-LABEL: scalar_i8_signed_mem_reg: 1027; X64: # %bb.0: 1028; X64-NEXT: movsbl (%rdi), %ecx 1029; X64-NEXT: cmpb %sil, %cl 1030; X64-NEXT: setg %dl 1031; X64-NEXT: negb %dl 1032; X64-NEXT: orb $1, %dl 1033; X64-NEXT: movl %ecx, %edi 1034; X64-NEXT: subl %esi, %edi 1035; X64-NEXT: movsbl %sil, %eax 1036; X64-NEXT: subl %ecx, %eax 1037; X64-NEXT: cmovll %edi, %eax 1038; X64-NEXT: shrb %al 1039; X64-NEXT: # kill: def $al killed $al killed $eax 1040; X64-NEXT: mulb %dl 1041; X64-NEXT: addb %cl, %al 1042; X64-NEXT: retq 1043; 1044; X86-LABEL: scalar_i8_signed_mem_reg: 1045; X86: # %bb.0: 1046; X86-NEXT: movb {{[0-9]+}}(%esp), %ah 1047; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 1048; X86-NEXT: movzbl (%ecx), %ecx 1049; X86-NEXT: cmpb %ah, %cl 1050; X86-NEXT: setg %dl 1051; X86-NEXT: negb %dl 1052; X86-NEXT: orb $1, %dl 1053; X86-NEXT: movb %cl, %al 1054; X86-NEXT: subb %ah, %al 1055; X86-NEXT: jg .LBB17_2 1056; X86-NEXT: # %bb.1: 1057; X86-NEXT: subb %cl, %ah 1058; X86-NEXT: movb %ah, %al 1059; X86-NEXT: .LBB17_2: 1060; X86-NEXT: shrb %al 1061; X86-NEXT: mulb %dl 1062; X86-NEXT: addb %cl, %al 1063; X86-NEXT: retl 1064 %a1 = load i8, ptr %a1_addr 1065 %t3 = icmp sgt i8 %a1, %a2 ; signed 1066 %t4 = select i1 %t3, i8 -1, i8 1 1067 %t5 = select i1 %t3, i8 %a2, i8 %a1 1068 %t6 = select i1 %t3, i8 %a1, i8 %a2 1069 %t7 = sub i8 %t6, %t5 1070 %t8 = lshr i8 %t7, 1 1071 %t9 = mul nsw i8 %t8, %t4 ; signed 1072 %a10 = add nsw i8 %t9, %a1 ; signed 1073 ret i8 %a10 1074} 1075 1076define i8 @scalar_i8_signed_reg_mem(i8 %a1, ptr %a2_addr) nounwind { 1077; X64-LABEL: scalar_i8_signed_reg_mem: 1078; X64: # %bb.0: 1079; X64-NEXT: movsbl (%rsi), %eax 1080; X64-NEXT: cmpb %al, %dil 1081; X64-NEXT: setg %cl 1082; X64-NEXT: negb %cl 1083; X64-NEXT: orb $1, %cl 1084; X64-NEXT: movsbl %dil, %edx 1085; X64-NEXT: subl %eax, %edi 1086; X64-NEXT: subl %edx, %eax 1087; X64-NEXT: cmovll %edi, %eax 1088; X64-NEXT: shrb %al 1089; X64-NEXT: # kill: def $al killed $al killed $eax 1090; X64-NEXT: mulb %cl 1091; X64-NEXT: addb %dl, %al 1092; X64-NEXT: retq 1093; 1094; X86-LABEL: scalar_i8_signed_reg_mem: 1095; X86: # %bb.0: 1096; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1097; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1098; X86-NEXT: movb (%eax), %ah 1099; X86-NEXT: cmpb %ah, %cl 1100; X86-NEXT: setg %dl 1101; X86-NEXT: negb %dl 1102; X86-NEXT: orb $1, %dl 1103; X86-NEXT: movb %cl, %al 1104; X86-NEXT: subb %ah, %al 1105; X86-NEXT: jg .LBB18_2 1106; X86-NEXT: # %bb.1: 1107; X86-NEXT: subb %cl, %ah 1108; X86-NEXT: movb %ah, %al 1109; X86-NEXT: .LBB18_2: 1110; X86-NEXT: shrb %al 1111; X86-NEXT: mulb %dl 1112; X86-NEXT: addb %cl, %al 1113; X86-NEXT: retl 1114 %a2 = load i8, ptr %a2_addr 1115 %t3 = icmp sgt i8 %a1, %a2 ; signed 1116 %t4 = select i1 %t3, i8 -1, i8 1 1117 %t5 = select i1 %t3, i8 %a2, i8 %a1 1118 %t6 = select i1 %t3, i8 %a1, i8 %a2 1119 %t7 = sub i8 %t6, %t5 1120 %t8 = lshr i8 %t7, 1 1121 %t9 = mul nsw i8 %t8, %t4 ; signed 1122 %a10 = add nsw i8 %t9, %a1 ; signed 1123 ret i8 %a10 1124} 1125 1126define i8 @scalar_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { 1127; X64-LABEL: scalar_i8_signed_mem_mem: 1128; X64: # %bb.0: 1129; X64-NEXT: movsbl (%rdi), %ecx 1130; X64-NEXT: movsbl (%rsi), %eax 1131; X64-NEXT: cmpb %al, %cl 1132; X64-NEXT: setg %dl 1133; X64-NEXT: negb %dl 1134; X64-NEXT: orb $1, %dl 1135; X64-NEXT: movl %ecx, %esi 1136; X64-NEXT: subl %eax, %esi 1137; X64-NEXT: subl %ecx, %eax 1138; X64-NEXT: cmovll %esi, %eax 1139; X64-NEXT: shrb %al 1140; X64-NEXT: # kill: def $al killed $al killed $eax 1141; X64-NEXT: mulb %dl 1142; X64-NEXT: addb %cl, %al 1143; X64-NEXT: retq 1144; 1145; X86-LABEL: scalar_i8_signed_mem_mem: 1146; X86: # %bb.0: 1147; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1148; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 1149; X86-NEXT: movzbl (%ecx), %ecx 1150; X86-NEXT: movb (%eax), %ah 1151; X86-NEXT: cmpb %ah, %cl 1152; X86-NEXT: setg %dl 1153; X86-NEXT: negb %dl 1154; X86-NEXT: orb $1, %dl 1155; X86-NEXT: movb %cl, %al 1156; X86-NEXT: subb %ah, %al 1157; X86-NEXT: jg .LBB19_2 1158; X86-NEXT: # %bb.1: 1159; X86-NEXT: subb %cl, %ah 1160; X86-NEXT: movb %ah, %al 1161; X86-NEXT: .LBB19_2: 1162; X86-NEXT: shrb %al 1163; X86-NEXT: mulb %dl 1164; X86-NEXT: addb %cl, %al 1165; X86-NEXT: retl 1166 %a1 = load i8, ptr %a1_addr 1167 %a2 = load i8, ptr %a2_addr 1168 %t3 = icmp sgt i8 %a1, %a2 ; signed 1169 %t4 = select i1 %t3, i8 -1, i8 1 1170 %t5 = select i1 %t3, i8 %a2, i8 %a1 1171 %t6 = select i1 %t3, i8 %a1, i8 %a2 1172 %t7 = sub i8 %t6, %t5 1173 %t8 = lshr i8 %t7, 1 1174 %t9 = mul nsw i8 %t8, %t4 ; signed 1175 %a10 = add nsw i8 %t9, %a1 ; signed 1176 ret i8 %a10 1177} 1178