1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=X64 3; RUN: llc < %s -mtriple=i686 -mattr=cmov | FileCheck %s --check-prefix=X86 4 5declare i4 @llvm.umul.fix.i4 (i4, i4, i32) 6declare i32 @llvm.umul.fix.i32 (i32, i32, i32) 7declare i64 @llvm.umul.fix.i64 (i64, i64, i32) 8declare <4 x i32> @llvm.umul.fix.v4i32(<4 x i32>, <4 x i32>, i32) 9 10define i32 @func(i32 %x, i32 %y) nounwind { 11; X64-LABEL: func: 12; X64: # %bb.0: 13; X64-NEXT: movl %esi, %eax 14; X64-NEXT: movl %edi, %ecx 15; X64-NEXT: imulq %rax, %rcx 16; X64-NEXT: movq %rcx, %rax 17; X64-NEXT: shrq $32, %rax 18; X64-NEXT: shldl $30, %ecx, %eax 19; X64-NEXT: # kill: def $eax killed $eax killed $rax 20; X64-NEXT: retq 21; 22; X86-LABEL: func: 23; X86: # %bb.0: 24; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 25; X86-NEXT: mull {{[0-9]+}}(%esp) 26; X86-NEXT: shrdl $2, %edx, %eax 27; X86-NEXT: retl 28 %tmp = call i32 @llvm.umul.fix.i32(i32 %x, i32 %y, i32 2) 29 ret i32 %tmp 30} 31 32define i64 @func2(i64 %x, i64 %y) nounwind { 33; X64-LABEL: func2: 34; X64: # %bb.0: 35; X64-NEXT: movq %rdi, %rax 36; X64-NEXT: mulq %rsi 37; X64-NEXT: shrdq $2, %rdx, %rax 38; X64-NEXT: retq 39; 40; X86-LABEL: func2: 41; X86: # %bb.0: 42; X86-NEXT: pushl %ebp 43; X86-NEXT: pushl %ebx 44; X86-NEXT: pushl %edi 45; X86-NEXT: pushl %esi 46; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 47; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 48; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp 49; X86-NEXT: movl %esi, %eax 50; X86-NEXT: mull {{[0-9]+}}(%esp) 51; X86-NEXT: movl %edx, %edi 52; X86-NEXT: movl %eax, %ebx 53; X86-NEXT: movl %esi, %eax 54; X86-NEXT: mull %ebp 55; X86-NEXT: movl %eax, %esi 56; X86-NEXT: addl %edx, %ebx 57; X86-NEXT: adcl $0, %edi 58; X86-NEXT: movl %ecx, %eax 59; X86-NEXT: mull %ebp 60; X86-NEXT: addl %ebx, %eax 61; X86-NEXT: adcl %edi, %edx 62; X86-NEXT: imull {{[0-9]+}}(%esp), %ecx 63; X86-NEXT: addl %ecx, %edx 64; X86-NEXT: shldl $30, %eax, %edx 65; X86-NEXT: shldl $30, %esi, %eax 66; X86-NEXT: popl %esi 67; X86-NEXT: popl %edi 68; X86-NEXT: popl %ebx 69; X86-NEXT: popl %ebp 70; X86-NEXT: retl 71 %tmp = call i64 @llvm.umul.fix.i64(i64 %x, i64 %y, i32 2) 72 ret i64 %tmp 73} 74 75define i4 @func3(i4 %x, i4 %y) nounwind { 76; X64-LABEL: func3: 77; X64: # %bb.0: 78; X64-NEXT: movl %edi, %eax 79; X64-NEXT: andb $15, %al 80; X64-NEXT: andb $15, %sil 81; X64-NEXT: # kill: def $al killed $al killed $eax 82; X64-NEXT: mulb %sil 83; X64-NEXT: shrb $2, %al 84; X64-NEXT: retq 85; 86; X86-LABEL: func3: 87; X86: # %bb.0: 88; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 89; X86-NEXT: andb $15, %al 90; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 91; X86-NEXT: andb $15, %cl 92; X86-NEXT: mulb %cl 93; X86-NEXT: shrb $2, %al 94; X86-NEXT: retl 95 %tmp = call i4 @llvm.umul.fix.i4(i4 %x, i4 %y, i32 2) 96 ret i4 %tmp 97} 98 99define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { 100; X64-LABEL: vec: 101; X64: # %bb.0: 102; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 103; X64-NEXT: pmuludq %xmm1, %xmm0 104; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,2,2,3] 105; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 106; X64-NEXT: pmuludq %xmm2, %xmm1 107; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3] 108; X64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 109; X64-NEXT: psrld $2, %xmm3 110; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] 111; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 112; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 113; X64-NEXT: pslld $30, %xmm0 114; X64-NEXT: por %xmm3, %xmm0 115; X64-NEXT: retq 116; 117; X86-LABEL: vec: 118; X86: # %bb.0: 119; X86-NEXT: pushl %ebp 120; X86-NEXT: pushl %ebx 121; X86-NEXT: pushl %edi 122; X86-NEXT: pushl %esi 123; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 124; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 125; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp 126; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx 127; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 128; X86-NEXT: mull {{[0-9]+}}(%esp) 129; X86-NEXT: movl %edx, %esi 130; X86-NEXT: shldl $30, %eax, %esi 131; X86-NEXT: movl %ebx, %eax 132; X86-NEXT: mull {{[0-9]+}}(%esp) 133; X86-NEXT: movl %edx, %ebx 134; X86-NEXT: shldl $30, %eax, %ebx 135; X86-NEXT: movl %ebp, %eax 136; X86-NEXT: mull {{[0-9]+}}(%esp) 137; X86-NEXT: movl %edx, %ebp 138; X86-NEXT: shldl $30, %eax, %ebp 139; X86-NEXT: movl %edi, %eax 140; X86-NEXT: mull {{[0-9]+}}(%esp) 141; X86-NEXT: shldl $30, %eax, %edx 142; X86-NEXT: movl %edx, 12(%ecx) 143; X86-NEXT: movl %ebp, 8(%ecx) 144; X86-NEXT: movl %ebx, 4(%ecx) 145; X86-NEXT: movl %esi, (%ecx) 146; X86-NEXT: movl %ecx, %eax 147; X86-NEXT: popl %esi 148; X86-NEXT: popl %edi 149; X86-NEXT: popl %ebx 150; X86-NEXT: popl %ebp 151; X86-NEXT: retl $4 152 %tmp = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> %x, <4 x i32> %y, i32 2) 153 ret <4 x i32> %tmp 154} 155 156; These result in regular integer multiplication 157define i32 @func4(i32 %x, i32 %y) nounwind { 158; X64-LABEL: func4: 159; X64: # %bb.0: 160; X64-NEXT: movl %edi, %eax 161; X64-NEXT: imull %esi, %eax 162; X64-NEXT: retq 163; 164; X86-LABEL: func4: 165; X86: # %bb.0: 166; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 167; X86-NEXT: imull {{[0-9]+}}(%esp), %eax 168; X86-NEXT: retl 169 %tmp = call i32 @llvm.umul.fix.i32(i32 %x, i32 %y, i32 0) 170 ret i32 %tmp 171} 172 173define i64 @func5(i64 %x, i64 %y) nounwind { 174; X64-LABEL: func5: 175; X64: # %bb.0: 176; X64-NEXT: movq %rdi, %rax 177; X64-NEXT: imulq %rsi, %rax 178; X64-NEXT: retq 179; 180; X86-LABEL: func5: 181; X86: # %bb.0: 182; X86-NEXT: pushl %esi 183; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 184; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 185; X86-NEXT: movl %ecx, %eax 186; X86-NEXT: mull %esi 187; X86-NEXT: imull {{[0-9]+}}(%esp), %ecx 188; X86-NEXT: addl %ecx, %edx 189; X86-NEXT: imull {{[0-9]+}}(%esp), %esi 190; X86-NEXT: addl %esi, %edx 191; X86-NEXT: popl %esi 192; X86-NEXT: retl 193 %tmp = call i64 @llvm.umul.fix.i64(i64 %x, i64 %y, i32 0) 194 ret i64 %tmp 195} 196 197define i4 @func6(i4 %x, i4 %y) nounwind { 198; X64-LABEL: func6: 199; X64: # %bb.0: 200; X64-NEXT: movl %edi, %eax 201; X64-NEXT: andb $15, %al 202; X64-NEXT: andb $15, %sil 203; X64-NEXT: # kill: def $al killed $al killed $eax 204; X64-NEXT: mulb %sil 205; X64-NEXT: retq 206; 207; X86-LABEL: func6: 208; X86: # %bb.0: 209; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 210; X86-NEXT: andb $15, %al 211; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 212; X86-NEXT: andb $15, %cl 213; X86-NEXT: mulb %cl 214; X86-NEXT: retl 215 %tmp = call i4 @llvm.umul.fix.i4(i4 %x, i4 %y, i32 0) 216 ret i4 %tmp 217} 218 219define <4 x i32> @vec2(<4 x i32> %x, <4 x i32> %y) nounwind { 220; X64-LABEL: vec2: 221; X64: # %bb.0: 222; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 223; X64-NEXT: pmuludq %xmm1, %xmm0 224; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 225; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 226; X64-NEXT: pmuludq %xmm2, %xmm1 227; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 228; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 229; X64-NEXT: retq 230; 231; X86-LABEL: vec2: 232; X86: # %bb.0: 233; X86-NEXT: pushl %edi 234; X86-NEXT: pushl %esi 235; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 236; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 237; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 238; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 239; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 240; X86-NEXT: imull {{[0-9]+}}(%esp), %edi 241; X86-NEXT: imull {{[0-9]+}}(%esp), %esi 242; X86-NEXT: imull {{[0-9]+}}(%esp), %edx 243; X86-NEXT: imull {{[0-9]+}}(%esp), %ecx 244; X86-NEXT: movl %ecx, 12(%eax) 245; X86-NEXT: movl %edx, 8(%eax) 246; X86-NEXT: movl %esi, 4(%eax) 247; X86-NEXT: movl %edi, (%eax) 248; X86-NEXT: popl %esi 249; X86-NEXT: popl %edi 250; X86-NEXT: retl $4 251 %tmp = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> %x, <4 x i32> %y, i32 0) 252 ret <4 x i32> %tmp 253} 254 255define i64 @func7(i64 %x, i64 %y) nounwind { 256; X64-LABEL: func7: 257; X64: # %bb.0: 258; X64-NEXT: movq %rdi, %rax 259; X64-NEXT: mulq %rsi 260; X64-NEXT: shrdq $32, %rdx, %rax 261; X64-NEXT: retq 262; 263; X86-LABEL: func7: 264; X86: # %bb.0: 265; X86-NEXT: pushl %ebp 266; X86-NEXT: pushl %ebx 267; X86-NEXT: pushl %edi 268; X86-NEXT: pushl %esi 269; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx 270; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 271; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp 272; X86-NEXT: movl %ebx, %eax 273; X86-NEXT: mull {{[0-9]+}}(%esp) 274; X86-NEXT: movl %edx, %esi 275; X86-NEXT: movl %eax, %edi 276; X86-NEXT: movl %ebx, %eax 277; X86-NEXT: mull %ebp 278; X86-NEXT: addl %edx, %edi 279; X86-NEXT: adcl $0, %esi 280; X86-NEXT: movl %ecx, %eax 281; X86-NEXT: mull %ebp 282; X86-NEXT: addl %edi, %eax 283; X86-NEXT: adcl %esi, %edx 284; X86-NEXT: imull {{[0-9]+}}(%esp), %ecx 285; X86-NEXT: addl %ecx, %edx 286; X86-NEXT: popl %esi 287; X86-NEXT: popl %edi 288; X86-NEXT: popl %ebx 289; X86-NEXT: popl %ebp 290; X86-NEXT: retl 291 %tmp = call i64 @llvm.umul.fix.i64(i64 %x, i64 %y, i32 32) 292 ret i64 %tmp 293} 294 295define i64 @func8(i64 %x, i64 %y) nounwind { 296; X64-LABEL: func8: 297; X64: # %bb.0: 298; X64-NEXT: movq %rdi, %rax 299; X64-NEXT: mulq %rsi 300; X64-NEXT: shrdq $63, %rdx, %rax 301; X64-NEXT: retq 302; 303; X86-LABEL: func8: 304; X86: # %bb.0: 305; X86-NEXT: pushl %ebp 306; X86-NEXT: pushl %ebx 307; X86-NEXT: pushl %edi 308; X86-NEXT: pushl %esi 309; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 310; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx 311; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp 312; X86-NEXT: movl %esi, %eax 313; X86-NEXT: mull %ebp 314; X86-NEXT: movl %edx, %ecx 315; X86-NEXT: movl %eax, %edi 316; X86-NEXT: movl %esi, %eax 317; X86-NEXT: mull %ebx 318; X86-NEXT: addl %edx, %edi 319; X86-NEXT: adcl $0, %ecx 320; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 321; X86-NEXT: mull %ebp 322; X86-NEXT: movl %edx, %esi 323; X86-NEXT: movl %eax, %ebp 324; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 325; X86-NEXT: mull %ebx 326; X86-NEXT: addl %edi, %eax 327; X86-NEXT: adcl %edx, %ecx 328; X86-NEXT: adcl $0, %esi 329; X86-NEXT: addl %ebp, %ecx 330; X86-NEXT: adcl $0, %esi 331; X86-NEXT: shldl $1, %ecx, %esi 332; X86-NEXT: shrdl $31, %ecx, %eax 333; X86-NEXT: movl %esi, %edx 334; X86-NEXT: popl %esi 335; X86-NEXT: popl %edi 336; X86-NEXT: popl %ebx 337; X86-NEXT: popl %ebp 338; X86-NEXT: retl 339 %tmp = call i64 @llvm.umul.fix.i64(i64 %x, i64 %y, i32 63) 340 ret i64 %tmp 341} 342 343define i64 @func9(i64 %x, i64 %y) nounwind { 344; X64-LABEL: func9: 345; X64: # %bb.0: 346; X64-NEXT: movq %rdi, %rax 347; X64-NEXT: mulq %rsi 348; X64-NEXT: movq %rdx, %rax 349; X64-NEXT: retq 350; 351; X86-LABEL: func9: 352; X86: # %bb.0: 353; X86-NEXT: pushl %ebp 354; X86-NEXT: pushl %ebx 355; X86-NEXT: pushl %edi 356; X86-NEXT: pushl %esi 357; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 358; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 359; X86-NEXT: movl %edi, %eax 360; X86-NEXT: mull %esi 361; X86-NEXT: movl %edx, %ecx 362; X86-NEXT: movl %eax, %ebp 363; X86-NEXT: movl %edi, %eax 364; X86-NEXT: mull {{[0-9]+}}(%esp) 365; X86-NEXT: movl %edx, %ebx 366; X86-NEXT: addl %ebp, %ebx 367; X86-NEXT: adcl $0, %ecx 368; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 369; X86-NEXT: mull %esi 370; X86-NEXT: movl %edx, %edi 371; X86-NEXT: movl %eax, %ebp 372; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 373; X86-NEXT: mull {{[0-9]+}}(%esp) 374; X86-NEXT: addl %ebx, %eax 375; X86-NEXT: adcl %edx, %ecx 376; X86-NEXT: adcl $0, %edi 377; X86-NEXT: addl %ebp, %ecx 378; X86-NEXT: adcl $0, %edi 379; X86-NEXT: movl %ecx, %eax 380; X86-NEXT: movl %edi, %edx 381; X86-NEXT: popl %esi 382; X86-NEXT: popl %edi 383; X86-NEXT: popl %ebx 384; X86-NEXT: popl %ebp 385; X86-NEXT: retl 386 %tmp = call i64 @llvm.umul.fix.i64(i64 %x, i64 %y, i32 64) 387 ret i64 %tmp 388} 389