xref: /llvm-project/llvm/test/CodeGen/X86/dagcombine-cse.ll (revision 7b3bbd83c0c24087072ec5b22a76799ab31f87d5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X86
3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X64
4
5define i32 @t(ptr %ref_frame_ptr, i32 %ref_frame_stride, i32 %idxX, i32 %idxY) nounwind  {
6; X86-LABEL: t:
7; X86:       ## %bb.0: ## %entry
8; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
9; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
10; X86-NEXT:    imull {{[0-9]+}}(%esp), %ecx
11; X86-NEXT:    addl {{[0-9]+}}(%esp), %ecx
12; X86-NEXT:    movl (%eax,%ecx), %eax
13; X86-NEXT:    retl
14;
15; X64-LABEL: t:
16; X64:       ## %bb.0: ## %entry
17; X64-NEXT:    imull %ecx, %esi
18; X64-NEXT:    addl %edx, %esi
19; X64-NEXT:    movslq %esi, %rax
20; X64-NEXT:    movl (%rdi,%rax), %eax
21; X64-NEXT:    retq
22entry:
23	%tmp7 = mul i32 %idxY, %ref_frame_stride		; <i32> [#uses=2]
24	%tmp9 = add i32 %tmp7, %idxX		; <i32> [#uses=1]
25	%tmp11 = getelementptr i8, ptr %ref_frame_ptr, i32 %tmp9		; <ptr> [#uses=1]
26	%tmp13 = load i32, ptr %tmp11, align 4		; <i32> [#uses=1]
27	%tmp18 = add i32 %idxX, 4		; <i32> [#uses=1]
28	%tmp20.sum = add i32 %tmp18, %tmp7		; <i32> [#uses=1]
29	%tmp21 = getelementptr i8, ptr %ref_frame_ptr, i32 %tmp20.sum		; <ptr> [#uses=1]
30	%tmp23 = load i16, ptr %tmp21, align 2		; <i16> [#uses=1]
31	%tmp2425 = zext i16 %tmp23 to i64		; <i64> [#uses=1]
32	%tmp26 = shl i64 %tmp2425, 32		; <i64> [#uses=1]
33	%tmp2728 = zext i32 %tmp13 to i64		; <i64> [#uses=1]
34	%tmp29 = or i64 %tmp26, %tmp2728		; <i64> [#uses=1]
35	%tmp3454 = bitcast i64 %tmp29 to double		; <double> [#uses=1]
36	%tmp35 = insertelement <2 x double> undef, double %tmp3454, i32 0		; <<2 x double>> [#uses=1]
37	%tmp36 = insertelement <2 x double> %tmp35, double 0.000000e+00, i32 1		; <<2 x double>> [#uses=1]
38	%tmp42 = bitcast <2 x double> %tmp36 to <8 x i16>		; <<8 x i16>> [#uses=1]
39	%tmp43 = shufflevector <8 x i16> %tmp42, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 1, i32 2, i32 4, i32 5, i32 6, i32 7 >		; <<8 x i16>> [#uses=1]
40	%tmp47 = bitcast <8 x i16> %tmp43 to <4 x i32>		; <<4 x i32>> [#uses=1]
41	%tmp48 = extractelement <4 x i32> %tmp47, i32 0		; <i32> [#uses=1]
42	ret i32 %tmp48
43}
44
45; Test CSE for SDAG nodes with multiple results (UMUL_LOHI).
46define i96 @square_high(i96 %x) nounwind {
47; X86-LABEL: square_high:
48; X86:       ## %bb.0: ## %entry
49; X86-NEXT:    pushl %ebp
50; X86-NEXT:    pushl %ebx
51; X86-NEXT:    pushl %edi
52; X86-NEXT:    pushl %esi
53; X86-NEXT:    pushl %eax
54; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
55; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
56; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
57; X86-NEXT:    movl %edi, %eax
58; X86-NEXT:    mull %edi
59; X86-NEXT:    movl %edx, %ecx
60; X86-NEXT:    movl %ebx, %eax
61; X86-NEXT:    mull %edi
62; X86-NEXT:    addl %eax, %ecx
63; X86-NEXT:    movl %edx, %ebp
64; X86-NEXT:    adcl $0, %ebp
65; X86-NEXT:    addl %eax, %ecx
66; X86-NEXT:    adcl %edx, %ebp
67; X86-NEXT:    setb %al
68; X86-NEXT:    movzbl %al, %ecx
69; X86-NEXT:    movl %ebx, %eax
70; X86-NEXT:    mull %ebx
71; X86-NEXT:    movl %eax, %ebx
72; X86-NEXT:    addl %ebp, %ebx
73; X86-NEXT:    adcl %edx, %ecx
74; X86-NEXT:    movl %esi, %eax
75; X86-NEXT:    mull %edi
76; X86-NEXT:    movl %edx, (%esp) ## 4-byte Spill
77; X86-NEXT:    movl %eax, %ebp
78; X86-NEXT:    movl %esi, %eax
79; X86-NEXT:    mull {{[0-9]+}}(%esp)
80; X86-NEXT:    movl %edx, %esi
81; X86-NEXT:    movl %eax, %edi
82; X86-NEXT:    addl (%esp), %edi ## 4-byte Folded Reload
83; X86-NEXT:    adcl $0, %esi
84; X86-NEXT:    addl %ebp, %ebx
85; X86-NEXT:    adcl %edi, %ecx
86; X86-NEXT:    movl %esi, %eax
87; X86-NEXT:    adcl $0, %eax
88; X86-NEXT:    setb %dl
89; X86-NEXT:    addl %ebp, %ebx
90; X86-NEXT:    adcl %ecx, %edi
91; X86-NEXT:    movzbl %dl, %ecx
92; X86-NEXT:    adcl %eax, %esi
93; X86-NEXT:    adcl $0, %ecx
94; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
95; X86-NEXT:    mull %eax
96; X86-NEXT:    addl %eax, %esi
97; X86-NEXT:    adcl %edx, %ecx
98; X86-NEXT:    movl %edi, %eax
99; X86-NEXT:    movl %esi, %edx
100; X86-NEXT:    addl $4, %esp
101; X86-NEXT:    popl %esi
102; X86-NEXT:    popl %edi
103; X86-NEXT:    popl %ebx
104; X86-NEXT:    popl %ebp
105; X86-NEXT:    retl
106;
107; X64-LABEL: square_high:
108; X64:       ## %bb.0: ## %entry
109; X64-NEXT:    movl %esi, %ecx
110; X64-NEXT:    movq %rcx, %rax
111; X64-NEXT:    mulq %rdi
112; X64-NEXT:    movq %rdx, %rsi
113; X64-NEXT:    movq %rax, %r8
114; X64-NEXT:    movq %rdi, %rax
115; X64-NEXT:    mulq %rdi
116; X64-NEXT:    addq %r8, %rdx
117; X64-NEXT:    movq %rsi, %rax
118; X64-NEXT:    adcq $0, %rax
119; X64-NEXT:    addq %rdx, %r8
120; X64-NEXT:    adcq %rsi, %rax
121; X64-NEXT:    imulq %rcx, %rcx
122; X64-NEXT:    addq %rax, %rcx
123; X64-NEXT:    shrdq $32, %rcx, %r8
124; X64-NEXT:    shrq $32, %rcx
125; X64-NEXT:    movq %r8, %rax
126; X64-NEXT:    movq %rcx, %rdx
127; X64-NEXT:    retq
128entry:
129  %conv = zext i96 %x to i192
130  %mul = mul nuw i192 %conv, %conv
131  %shr = lshr i192 %mul, 96
132  %conv2 = trunc i192 %shr to i96
133  ret i96 %conv2
134}
135