1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X86 3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X64 4 5define i32 @t(ptr %ref_frame_ptr, i32 %ref_frame_stride, i32 %idxX, i32 %idxY) nounwind { 6; X86-LABEL: t: 7; X86: ## %bb.0: ## %entry 8; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 9; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 10; X86-NEXT: imull {{[0-9]+}}(%esp), %ecx 11; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx 12; X86-NEXT: movl (%eax,%ecx), %eax 13; X86-NEXT: retl 14; 15; X64-LABEL: t: 16; X64: ## %bb.0: ## %entry 17; X64-NEXT: imull %ecx, %esi 18; X64-NEXT: addl %edx, %esi 19; X64-NEXT: movslq %esi, %rax 20; X64-NEXT: movl (%rdi,%rax), %eax 21; X64-NEXT: retq 22entry: 23 %tmp7 = mul i32 %idxY, %ref_frame_stride ; <i32> [#uses=2] 24 %tmp9 = add i32 %tmp7, %idxX ; <i32> [#uses=1] 25 %tmp11 = getelementptr i8, ptr %ref_frame_ptr, i32 %tmp9 ; <ptr> [#uses=1] 26 %tmp13 = load i32, ptr %tmp11, align 4 ; <i32> [#uses=1] 27 %tmp18 = add i32 %idxX, 4 ; <i32> [#uses=1] 28 %tmp20.sum = add i32 %tmp18, %tmp7 ; <i32> [#uses=1] 29 %tmp21 = getelementptr i8, ptr %ref_frame_ptr, i32 %tmp20.sum ; <ptr> [#uses=1] 30 %tmp23 = load i16, ptr %tmp21, align 2 ; <i16> [#uses=1] 31 %tmp2425 = zext i16 %tmp23 to i64 ; <i64> [#uses=1] 32 %tmp26 = shl i64 %tmp2425, 32 ; <i64> [#uses=1] 33 %tmp2728 = zext i32 %tmp13 to i64 ; <i64> [#uses=1] 34 %tmp29 = or i64 %tmp26, %tmp2728 ; <i64> [#uses=1] 35 %tmp3454 = bitcast i64 %tmp29 to double ; <double> [#uses=1] 36 %tmp35 = insertelement <2 x double> undef, double %tmp3454, i32 0 ; <<2 x double>> [#uses=1] 37 %tmp36 = insertelement <2 x double> %tmp35, double 0.000000e+00, i32 1 ; <<2 x double>> [#uses=1] 38 %tmp42 = bitcast <2 x double> %tmp36 to <8 x i16> ; <<8 x i16>> [#uses=1] 39 %tmp43 = shufflevector <8 x i16> %tmp42, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 1, i32 2, i32 4, i32 5, i32 6, i32 7 > ; <<8 x i16>> [#uses=1] 40 %tmp47 = bitcast <8 x i16> %tmp43 to <4 x i32> ; <<4 x i32>> [#uses=1] 41 %tmp48 = extractelement <4 x i32> %tmp47, i32 0 ; <i32> [#uses=1] 42 ret i32 %tmp48 43} 44 45; Test CSE for SDAG nodes with multiple results (UMUL_LOHI). 46define i96 @square_high(i96 %x) nounwind { 47; X86-LABEL: square_high: 48; X86: ## %bb.0: ## %entry 49; X86-NEXT: pushl %ebp 50; X86-NEXT: pushl %ebx 51; X86-NEXT: pushl %edi 52; X86-NEXT: pushl %esi 53; X86-NEXT: pushl %eax 54; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 55; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 56; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx 57; X86-NEXT: movl %edi, %eax 58; X86-NEXT: mull %edi 59; X86-NEXT: movl %edx, %ecx 60; X86-NEXT: movl %ebx, %eax 61; X86-NEXT: mull %edi 62; X86-NEXT: addl %eax, %ecx 63; X86-NEXT: movl %edx, %ebp 64; X86-NEXT: adcl $0, %ebp 65; X86-NEXT: addl %eax, %ecx 66; X86-NEXT: adcl %edx, %ebp 67; X86-NEXT: setb %al 68; X86-NEXT: movzbl %al, %ecx 69; X86-NEXT: movl %ebx, %eax 70; X86-NEXT: mull %ebx 71; X86-NEXT: movl %eax, %ebx 72; X86-NEXT: addl %ebp, %ebx 73; X86-NEXT: adcl %edx, %ecx 74; X86-NEXT: movl %esi, %eax 75; X86-NEXT: mull %edi 76; X86-NEXT: movl %edx, (%esp) ## 4-byte Spill 77; X86-NEXT: movl %eax, %ebp 78; X86-NEXT: movl %esi, %eax 79; X86-NEXT: mull {{[0-9]+}}(%esp) 80; X86-NEXT: movl %edx, %esi 81; X86-NEXT: movl %eax, %edi 82; X86-NEXT: addl (%esp), %edi ## 4-byte Folded Reload 83; X86-NEXT: adcl $0, %esi 84; X86-NEXT: addl %ebp, %ebx 85; X86-NEXT: adcl %edi, %ecx 86; X86-NEXT: movl %esi, %eax 87; X86-NEXT: adcl $0, %eax 88; X86-NEXT: setb %dl 89; X86-NEXT: addl %ebp, %ebx 90; X86-NEXT: adcl %ecx, %edi 91; X86-NEXT: movzbl %dl, %ecx 92; X86-NEXT: adcl %eax, %esi 93; X86-NEXT: adcl $0, %ecx 94; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 95; X86-NEXT: mull %eax 96; X86-NEXT: addl %eax, %esi 97; X86-NEXT: adcl %edx, %ecx 98; X86-NEXT: movl %edi, %eax 99; X86-NEXT: movl %esi, %edx 100; X86-NEXT: addl $4, %esp 101; X86-NEXT: popl %esi 102; X86-NEXT: popl %edi 103; X86-NEXT: popl %ebx 104; X86-NEXT: popl %ebp 105; X86-NEXT: retl 106; 107; X64-LABEL: square_high: 108; X64: ## %bb.0: ## %entry 109; X64-NEXT: movl %esi, %ecx 110; X64-NEXT: movq %rcx, %rax 111; X64-NEXT: mulq %rdi 112; X64-NEXT: movq %rdx, %rsi 113; X64-NEXT: movq %rax, %r8 114; X64-NEXT: movq %rdi, %rax 115; X64-NEXT: mulq %rdi 116; X64-NEXT: addq %r8, %rdx 117; X64-NEXT: movq %rsi, %rax 118; X64-NEXT: adcq $0, %rax 119; X64-NEXT: addq %rdx, %r8 120; X64-NEXT: adcq %rsi, %rax 121; X64-NEXT: imulq %rcx, %rcx 122; X64-NEXT: addq %rax, %rcx 123; X64-NEXT: shrdq $32, %rcx, %r8 124; X64-NEXT: shrq $32, %rcx 125; X64-NEXT: movq %r8, %rax 126; X64-NEXT: movq %rcx, %rdx 127; X64-NEXT: retq 128entry: 129 %conv = zext i96 %x to i192 130 %mul = mul nuw i192 %conv, %conv 131 %shr = lshr i192 %mul, 96 132 %conv2 = trunc i192 %shr to i96 133 ret i96 %conv2 134} 135