1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86-NOBMI 3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi2 | FileCheck %s --check-prefix=X86-BMI 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64-NOBMI 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi2 | FileCheck %s --check-prefix=X64-BMI 6 7; PR1198 8 9define i64 @foo(i64 %x, i64 %y) nounwind { 10; X86-NOBMI-LABEL: foo: 11; X86-NOBMI: # %bb.0: 12; X86-NOBMI-NEXT: pushl %ebp 13; X86-NOBMI-NEXT: pushl %ebx 14; X86-NOBMI-NEXT: pushl %edi 15; X86-NOBMI-NEXT: pushl %esi 16; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi 17; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ebx 18; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi 19; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx 20; X86-NOBMI-NEXT: movl %edi, %eax 21; X86-NOBMI-NEXT: mull %esi 22; X86-NOBMI-NEXT: movl %edx, %ebp 23; X86-NOBMI-NEXT: movl %ebx, %eax 24; X86-NOBMI-NEXT: mull %esi 25; X86-NOBMI-NEXT: movl %edx, %esi 26; X86-NOBMI-NEXT: movl %eax, %ebx 27; X86-NOBMI-NEXT: addl %ebp, %ebx 28; X86-NOBMI-NEXT: adcl $0, %esi 29; X86-NOBMI-NEXT: movl %edi, %eax 30; X86-NOBMI-NEXT: mull %ecx 31; X86-NOBMI-NEXT: addl %ebx, %eax 32; X86-NOBMI-NEXT: adcl %edx, %esi 33; X86-NOBMI-NEXT: setb %al 34; X86-NOBMI-NEXT: movzbl %al, %edi 35; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 36; X86-NOBMI-NEXT: mull %ecx 37; X86-NOBMI-NEXT: addl %esi, %eax 38; X86-NOBMI-NEXT: adcl %edi, %edx 39; X86-NOBMI-NEXT: popl %esi 40; X86-NOBMI-NEXT: popl %edi 41; X86-NOBMI-NEXT: popl %ebx 42; X86-NOBMI-NEXT: popl %ebp 43; X86-NOBMI-NEXT: retl 44; 45; X86-BMI-LABEL: foo: 46; X86-BMI: # %bb.0: 47; X86-BMI-NEXT: pushl %ebp 48; X86-BMI-NEXT: pushl %ebx 49; X86-BMI-NEXT: pushl %edi 50; X86-BMI-NEXT: pushl %esi 51; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx 52; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax 53; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edi 54; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi 55; X86-BMI-NEXT: movl %ecx, %edx 56; X86-BMI-NEXT: mulxl %edi, %ebx, %ebx 57; X86-BMI-NEXT: movl %eax, %edx 58; X86-BMI-NEXT: mulxl %edi, %edi, %ebp 59; X86-BMI-NEXT: addl %ebx, %edi 60; X86-BMI-NEXT: adcl $0, %ebp 61; X86-BMI-NEXT: movl %ecx, %edx 62; X86-BMI-NEXT: mulxl %esi, %ecx, %ebx 63; X86-BMI-NEXT: addl %edi, %ecx 64; X86-BMI-NEXT: adcl %ebp, %ebx 65; X86-BMI-NEXT: setb %cl 66; X86-BMI-NEXT: movzbl %cl, %ecx 67; X86-BMI-NEXT: movl %eax, %edx 68; X86-BMI-NEXT: mulxl %esi, %eax, %edx 69; X86-BMI-NEXT: addl %ebx, %eax 70; X86-BMI-NEXT: adcl %ecx, %edx 71; X86-BMI-NEXT: popl %esi 72; X86-BMI-NEXT: popl %edi 73; X86-BMI-NEXT: popl %ebx 74; X86-BMI-NEXT: popl %ebp 75; X86-BMI-NEXT: retl 76; 77; X64-NOBMI-LABEL: foo: 78; X64-NOBMI: # %bb.0: 79; X64-NOBMI-NEXT: movq %rdi, %rax 80; X64-NOBMI-NEXT: mulq %rsi 81; X64-NOBMI-NEXT: movq %rdx, %rax 82; X64-NOBMI-NEXT: retq 83; 84; X64-BMI-LABEL: foo: 85; X64-BMI: # %bb.0: 86; X64-BMI-NEXT: movq %rdi, %rdx 87; X64-BMI-NEXT: mulxq %rsi, %rax, %rax 88; X64-BMI-NEXT: retq 89 %tmp0 = zext i64 %x to i128 90 %tmp1 = zext i64 %y to i128 91 %tmp2 = mul i128 %tmp0, %tmp1 92 %tmp7 = zext i32 64 to i128 93 %tmp3 = lshr i128 %tmp2, %tmp7 94 %tmp4 = trunc i128 %tmp3 to i64 95 ret i64 %tmp4 96} 97 98; <rdar://problem/14096009> superfluous multiply by high part of 99; zero-extended value. 100 101define i64 @mul1(i64 %n, ptr nocapture %z, ptr nocapture %x, i64 %y) nounwind { 102; X86-NOBMI-LABEL: mul1: 103; X86-NOBMI: # %bb.0: # %entry 104; X86-NOBMI-NEXT: pushl %ebp 105; X86-NOBMI-NEXT: pushl %ebx 106; X86-NOBMI-NEXT: pushl %edi 107; X86-NOBMI-NEXT: pushl %esi 108; X86-NOBMI-NEXT: subl $20, %esp 109; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx 110; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 111; X86-NOBMI-NEXT: orl %ecx, %eax 112; X86-NOBMI-NEXT: je .LBB1_3 113; X86-NOBMI-NEXT: # %bb.1: # %for.body.preheader 114; X86-NOBMI-NEXT: xorl %eax, %eax 115; X86-NOBMI-NEXT: xorl %edx, %edx 116; X86-NOBMI-NEXT: xorl %ecx, %ecx 117; X86-NOBMI-NEXT: movl $0, (%esp) # 4-byte Folded Spill 118; X86-NOBMI-NEXT: .p2align 4 119; X86-NOBMI-NEXT: .LBB1_2: # %for.body 120; X86-NOBMI-NEXT: # =>This Inner Loop Header: Depth=1 121; X86-NOBMI-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 122; X86-NOBMI-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 123; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 124; X86-NOBMI-NEXT: movl (%eax,%ecx,8), %edi 125; X86-NOBMI-NEXT: movl 4(%eax,%ecx,8), %ebx 126; X86-NOBMI-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 127; X86-NOBMI-NEXT: movl %edi, %eax 128; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi 129; X86-NOBMI-NEXT: mull %esi 130; X86-NOBMI-NEXT: movl %edx, %ebp 131; X86-NOBMI-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 132; X86-NOBMI-NEXT: movl %ebx, %eax 133; X86-NOBMI-NEXT: mull %esi 134; X86-NOBMI-NEXT: movl %edx, %ebx 135; X86-NOBMI-NEXT: movl %eax, %esi 136; X86-NOBMI-NEXT: addl %ebp, %esi 137; X86-NOBMI-NEXT: adcl $0, %ebx 138; X86-NOBMI-NEXT: movl %edi, %eax 139; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 140; X86-NOBMI-NEXT: mull %edx 141; X86-NOBMI-NEXT: movl %edx, %ebp 142; X86-NOBMI-NEXT: movl %eax, %edi 143; X86-NOBMI-NEXT: addl %esi, %edi 144; X86-NOBMI-NEXT: adcl %ebx, %ebp 145; X86-NOBMI-NEXT: setb %bl 146; X86-NOBMI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 147; X86-NOBMI-NEXT: mull {{[0-9]+}}(%esp) 148; X86-NOBMI-NEXT: addl %ebp, %eax 149; X86-NOBMI-NEXT: movzbl %bl, %esi 150; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ebp 151; X86-NOBMI-NEXT: adcl %esi, %edx 152; X86-NOBMI-NEXT: movl %ecx, %ebx 153; X86-NOBMI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 154; X86-NOBMI-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload 155; X86-NOBMI-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload 156; X86-NOBMI-NEXT: adcl $0, %eax 157; X86-NOBMI-NEXT: adcl $0, %edx 158; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi 159; X86-NOBMI-NEXT: movl %ecx, (%esi,%ebx,8) 160; X86-NOBMI-NEXT: movl %ebx, %ecx 161; X86-NOBMI-NEXT: movl %edi, 4(%esi,%ebx,8) 162; X86-NOBMI-NEXT: addl $1, %ecx 163; X86-NOBMI-NEXT: movl (%esp), %edi # 4-byte Reload 164; X86-NOBMI-NEXT: adcl $0, %edi 165; X86-NOBMI-NEXT: movl %ecx, %esi 166; X86-NOBMI-NEXT: xorl {{[0-9]+}}(%esp), %esi 167; X86-NOBMI-NEXT: movl %edi, (%esp) # 4-byte Spill 168; X86-NOBMI-NEXT: xorl %ebp, %edi 169; X86-NOBMI-NEXT: orl %esi, %edi 170; X86-NOBMI-NEXT: jne .LBB1_2 171; X86-NOBMI-NEXT: .LBB1_3: # %for.end 172; X86-NOBMI-NEXT: xorl %eax, %eax 173; X86-NOBMI-NEXT: xorl %edx, %edx 174; X86-NOBMI-NEXT: addl $20, %esp 175; X86-NOBMI-NEXT: popl %esi 176; X86-NOBMI-NEXT: popl %edi 177; X86-NOBMI-NEXT: popl %ebx 178; X86-NOBMI-NEXT: popl %ebp 179; X86-NOBMI-NEXT: retl 180; 181; X86-BMI-LABEL: mul1: 182; X86-BMI: # %bb.0: # %entry 183; X86-BMI-NEXT: pushl %ebp 184; X86-BMI-NEXT: pushl %ebx 185; X86-BMI-NEXT: pushl %edi 186; X86-BMI-NEXT: pushl %esi 187; X86-BMI-NEXT: subl $20, %esp 188; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx 189; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax 190; X86-BMI-NEXT: orl %ecx, %eax 191; X86-BMI-NEXT: je .LBB1_3 192; X86-BMI-NEXT: # %bb.1: # %for.body.preheader 193; X86-BMI-NEXT: xorl %ecx, %ecx 194; X86-BMI-NEXT: xorl %eax, %eax 195; X86-BMI-NEXT: xorl %ebx, %ebx 196; X86-BMI-NEXT: xorl %ebp, %ebp 197; X86-BMI-NEXT: .p2align 4 198; X86-BMI-NEXT: .LBB1_2: # %for.body 199; X86-BMI-NEXT: # =>This Inner Loop Header: Depth=1 200; X86-BMI-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 201; X86-BMI-NEXT: movl %eax, (%esp) # 4-byte Spill 202; X86-BMI-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 203; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax 204; X86-BMI-NEXT: movl (%eax,%ebx,8), %ecx 205; X86-BMI-NEXT: movl 4(%eax,%ebx,8), %esi 206; X86-BMI-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 207; X86-BMI-NEXT: movl %ecx, %edx 208; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax 209; X86-BMI-NEXT: mulxl %eax, %edx, %edi 210; X86-BMI-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 211; X86-BMI-NEXT: movl %esi, %edx 212; X86-BMI-NEXT: mulxl %eax, %esi, %eax 213; X86-BMI-NEXT: addl %edi, %esi 214; X86-BMI-NEXT: adcl $0, %eax 215; X86-BMI-NEXT: movl %ecx, %edx 216; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx 217; X86-BMI-NEXT: mulxl %ecx, %edi, %ebp 218; X86-BMI-NEXT: addl %esi, %edi 219; X86-BMI-NEXT: adcl %eax, %ebp 220; X86-BMI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload 221; X86-BMI-NEXT: mulxl %ecx, %ecx, %eax 222; X86-BMI-NEXT: setb %dl 223; X86-BMI-NEXT: addl %ebp, %ecx 224; X86-BMI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload 225; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi 226; X86-BMI-NEXT: movzbl %dl, %edx 227; X86-BMI-NEXT: adcl %edx, %eax 228; X86-BMI-NEXT: movl %eax, %edx 229; X86-BMI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 230; X86-BMI-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload 231; X86-BMI-NEXT: adcl (%esp), %edi # 4-byte Folded Reload 232; X86-BMI-NEXT: adcl $0, %ecx 233; X86-BMI-NEXT: adcl $0, %edx 234; X86-BMI-NEXT: movl %edx, (%esp) # 4-byte Spill 235; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx 236; X86-BMI-NEXT: movl %eax, (%edx,%ebx,8) 237; X86-BMI-NEXT: movl %edi, 4(%edx,%ebx,8) 238; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edi 239; X86-BMI-NEXT: addl $1, %ebx 240; X86-BMI-NEXT: adcl $0, %ebp 241; X86-BMI-NEXT: movl %ebx, %edx 242; X86-BMI-NEXT: xorl %esi, %edx 243; X86-BMI-NEXT: movl %ebp, %esi 244; X86-BMI-NEXT: xorl %edi, %esi 245; X86-BMI-NEXT: orl %edx, %esi 246; X86-BMI-NEXT: movl (%esp), %eax # 4-byte Reload 247; X86-BMI-NEXT: jne .LBB1_2 248; X86-BMI-NEXT: .LBB1_3: # %for.end 249; X86-BMI-NEXT: xorl %eax, %eax 250; X86-BMI-NEXT: xorl %edx, %edx 251; X86-BMI-NEXT: addl $20, %esp 252; X86-BMI-NEXT: popl %esi 253; X86-BMI-NEXT: popl %edi 254; X86-BMI-NEXT: popl %ebx 255; X86-BMI-NEXT: popl %ebp 256; X86-BMI-NEXT: retl 257; 258; X64-NOBMI-LABEL: mul1: 259; X64-NOBMI: # %bb.0: # %entry 260; X64-NOBMI-NEXT: testq %rdi, %rdi 261; X64-NOBMI-NEXT: je .LBB1_3 262; X64-NOBMI-NEXT: # %bb.1: # %for.body.preheader 263; X64-NOBMI-NEXT: movq %rdx, %r8 264; X64-NOBMI-NEXT: xorl %r10d, %r10d 265; X64-NOBMI-NEXT: xorl %r9d, %r9d 266; X64-NOBMI-NEXT: .p2align 4 267; X64-NOBMI-NEXT: .LBB1_2: # %for.body 268; X64-NOBMI-NEXT: # =>This Inner Loop Header: Depth=1 269; X64-NOBMI-NEXT: movq %rcx, %rax 270; X64-NOBMI-NEXT: mulq (%r8,%r9,8) 271; X64-NOBMI-NEXT: addq %r10, %rax 272; X64-NOBMI-NEXT: adcq $0, %rdx 273; X64-NOBMI-NEXT: movq %rax, (%rsi,%r9,8) 274; X64-NOBMI-NEXT: incq %r9 275; X64-NOBMI-NEXT: cmpq %r9, %rdi 276; X64-NOBMI-NEXT: movq %rdx, %r10 277; X64-NOBMI-NEXT: jne .LBB1_2 278; X64-NOBMI-NEXT: .LBB1_3: # %for.end 279; X64-NOBMI-NEXT: xorl %eax, %eax 280; X64-NOBMI-NEXT: retq 281; 282; X64-BMI-LABEL: mul1: 283; X64-BMI: # %bb.0: # %entry 284; X64-BMI-NEXT: testq %rdi, %rdi 285; X64-BMI-NEXT: je .LBB1_3 286; X64-BMI-NEXT: # %bb.1: # %for.body.preheader 287; X64-BMI-NEXT: movq %rdx, %rax 288; X64-BMI-NEXT: xorl %r9d, %r9d 289; X64-BMI-NEXT: xorl %r8d, %r8d 290; X64-BMI-NEXT: .p2align 4 291; X64-BMI-NEXT: .LBB1_2: # %for.body 292; X64-BMI-NEXT: # =>This Inner Loop Header: Depth=1 293; X64-BMI-NEXT: movq %rcx, %rdx 294; X64-BMI-NEXT: mulxq (%rax,%r8,8), %r10, %rdx 295; X64-BMI-NEXT: addq %r9, %r10 296; X64-BMI-NEXT: adcq $0, %rdx 297; X64-BMI-NEXT: movq %r10, (%rsi,%r8,8) 298; X64-BMI-NEXT: incq %r8 299; X64-BMI-NEXT: cmpq %r8, %rdi 300; X64-BMI-NEXT: movq %rdx, %r9 301; X64-BMI-NEXT: jne .LBB1_2 302; X64-BMI-NEXT: .LBB1_3: # %for.end 303; X64-BMI-NEXT: xorl %eax, %eax 304; X64-BMI-NEXT: retq 305entry: 306 %conv = zext i64 %y to i128 307 %cmp11 = icmp eq i64 %n, 0 308 br i1 %cmp11, label %for.end, label %for.body 309 310for.body: ; preds = %entry, %for.body 311 %carry.013 = phi i64 [ %conv6, %for.body ], [ 0, %entry ] 312 %i.012 = phi i64 [ %inc, %for.body ], [ 0, %entry ] 313 %arrayidx = getelementptr inbounds i64, ptr %x, i64 %i.012 314 %0 = load i64, ptr %arrayidx, align 8 315 %conv2 = zext i64 %0 to i128 316 %mul = mul i128 %conv2, %conv 317 %conv3 = zext i64 %carry.013 to i128 318 %add = add i128 %mul, %conv3 319 %conv4 = trunc i128 %add to i64 320 %arrayidx5 = getelementptr inbounds i64, ptr %z, i64 %i.012 321 store i64 %conv4, ptr %arrayidx5, align 8 322 %shr = lshr i128 %add, 64 323 %conv6 = trunc i128 %shr to i64 324 %inc = add i64 %i.012, 1 325 %exitcond = icmp eq i64 %inc, %n 326 br i1 %exitcond, label %for.end, label %for.body 327 328for.end: ; preds = %for.body, %entry 329 ret i64 0 330} 331