xref: /llvm-project/llvm/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll (revision 2f448bf509432c1a19ec46ab8cbc7353c03c6280)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -regalloc=fast -optimize-regalloc=0 -mtriple=i686-- -mattr=+mmx | FileCheck %s
3; PR2082
4; Local register allocator was refusing to use ESI, EDI, and EBP so it ran out of
5; registers.
6define void @transpose4x4(ptr %dst, ptr %src, i32 %dst_stride, i32 %src_stride) {
7; CHECK-LABEL: transpose4x4:
8; CHECK:       # %bb.0: # %entry
9; CHECK-NEXT:    pushl %ebx
10; CHECK-NEXT:    .cfi_def_cfa_offset 8
11; CHECK-NEXT:    pushl %edi
12; CHECK-NEXT:    .cfi_def_cfa_offset 12
13; CHECK-NEXT:    pushl %esi
14; CHECK-NEXT:    .cfi_def_cfa_offset 16
15; CHECK-NEXT:    .cfi_offset %esi, -16
16; CHECK-NEXT:    .cfi_offset %edi, -12
17; CHECK-NEXT:    .cfi_offset %ebx, -8
18; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
19; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
20; CHECK-NEXT:    leal (%ecx,%ecx,2), %edx
21; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
22; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edi
23; CHECK-NEXT:    leal (%edi,%edi,2), %ebx
24; CHECK-NEXT:    #APP
25; CHECK-NEXT:    movd (%esi), %mm0
26; CHECK-NEXT:    movd (%esi,%edi), %mm1
27; CHECK-NEXT:    movd (%esi,%edi,2), %mm2
28; CHECK-NEXT:    movd (%esi,%ebx), %mm3
29; CHECK-NEXT:    punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3]
30; CHECK-NEXT:    punpcklbw %mm3, %mm2 # mm2 = mm2[0],mm3[0],mm2[1],mm3[1],mm2[2],mm3[2],mm2[3],mm3[3]
31; CHECK-NEXT:    movq %mm0, %mm1
32; CHECK-NEXT:    punpcklwd %mm2, %mm0 # mm0 = mm0[0],mm2[0],mm0[1],mm2[1]
33; CHECK-NEXT:    punpckhwd %mm2, %mm1 # mm1 = mm1[2],mm2[2],mm1[3],mm2[3]
34; CHECK-NEXT:    movd %mm0, (%eax)
35; CHECK-NEXT:    punpckhdq %mm0, %mm0 # mm0 = mm0[1,1]
36; CHECK-NEXT:    movd %mm0, (%eax,%ecx)
37; CHECK-NEXT:    movd %mm1, (%eax,%ecx,2)
38; CHECK-NEXT:    punpckhdq %mm1, %mm1 # mm1 = mm1[1,1]
39; CHECK-NEXT:    movd %mm1, (%eax,%edx)
40; CHECK-EMPTY:
41; CHECK-NEXT:    #NO_APP
42; CHECK-NEXT:    popl %esi
43; CHECK-NEXT:    .cfi_def_cfa_offset 12
44; CHECK-NEXT:    popl %edi
45; CHECK-NEXT:    .cfi_def_cfa_offset 8
46; CHECK-NEXT:    popl %ebx
47; CHECK-NEXT:    .cfi_def_cfa_offset 4
48; CHECK-NEXT:    retl
49entry:
50	%dst_addr = alloca ptr		; <ptr> [#uses=5]
51	%src_addr = alloca ptr		; <ptr> [#uses=5]
52	%dst_stride_addr = alloca i32		; <ptr> [#uses=4]
53	%src_stride_addr = alloca i32		; <ptr> [#uses=4]
54	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
55	store ptr %dst, ptr %dst_addr
56	store ptr %src, ptr %src_addr
57	store i32 %dst_stride, ptr %dst_stride_addr
58	store i32 %src_stride, ptr %src_stride_addr
59	%tmp = load ptr, ptr %dst_addr, align 4		; <ptr> [#uses=1]
60	%tmp1 = getelementptr i8, ptr %tmp, i32 0		; <ptr> [#uses=1]
61	%tmp3 = load ptr, ptr %dst_addr, align 4		; <ptr> [#uses=1]
62	%tmp4 = load i32, ptr %dst_stride_addr, align 4		; <i32> [#uses=1]
63	%tmp5 = getelementptr i8, ptr %tmp3, i32 %tmp4		; <ptr> [#uses=1]
64	%tmp7 = load i32, ptr %dst_stride_addr, align 4		; <i32> [#uses=1]
65	%tmp8 = mul i32 %tmp7, 2		; <i32> [#uses=1]
66	%tmp9 = load ptr, ptr %dst_addr, align 4		; <ptr> [#uses=1]
67	%tmp10 = getelementptr i8, ptr %tmp9, i32 %tmp8		; <ptr> [#uses=1]
68	%tmp13 = load i32, ptr %dst_stride_addr, align 4		; <i32> [#uses=1]
69	%tmp14 = mul i32 %tmp13, 3		; <i32> [#uses=1]
70	%tmp15 = load ptr, ptr %dst_addr, align 4		; <ptr> [#uses=1]
71	%tmp16 = getelementptr i8, ptr %tmp15, i32 %tmp14		; <ptr> [#uses=1]
72	%tmp18 = load ptr, ptr %src_addr, align 4		; <ptr> [#uses=1]
73	%tmp19 = getelementptr i8, ptr %tmp18, i32 0		; <ptr> [#uses=1]
74	%tmp21 = load ptr, ptr %src_addr, align 4		; <ptr> [#uses=1]
75	%tmp22 = load i32, ptr %src_stride_addr, align 4		; <i32> [#uses=1]
76	%tmp23 = getelementptr i8, ptr %tmp21, i32 %tmp22		; <ptr> [#uses=1]
77	%tmp25 = load i32, ptr %src_stride_addr, align 4		; <i32> [#uses=1]
78	%tmp26 = mul i32 %tmp25, 2		; <i32> [#uses=1]
79	%tmp27 = load ptr, ptr %src_addr, align 4		; <ptr> [#uses=1]
80	%tmp28 = getelementptr i8, ptr %tmp27, i32 %tmp26		; <ptr> [#uses=1]
81	%tmp30 = load i32, ptr %src_stride_addr, align 4		; <i32> [#uses=1]
82	%tmp31 = mul i32 %tmp30, 3		; <i32> [#uses=1]
83	%tmp32 = load ptr, ptr %src_addr, align 4		; <ptr> [#uses=1]
84	%tmp33 = getelementptr i8, ptr %tmp32, i32 %tmp31		; <ptr> [#uses=1]
85	call void asm sideeffect "movd  $4, %mm0                \0A\09movd  $5, %mm1                \0A\09movd  $6, %mm2                \0A\09movd  $7, %mm3                \0A\09punpcklbw %mm1, %mm0         \0A\09punpcklbw %mm3, %mm2         \0A\09movq %mm0, %mm1              \0A\09punpcklwd %mm2, %mm0         \0A\09punpckhwd %mm2, %mm1         \0A\09movd  %mm0, $0                \0A\09punpckhdq %mm0, %mm0         \0A\09movd  %mm0, $1                \0A\09movd  %mm1, $2                \0A\09punpckhdq %mm1, %mm1         \0A\09movd  %mm1, $3                \0A\09", "=*m,=*m,=*m,=*m,*m,*m,*m,*m,~{dirflag},~{fpsr},~{flags}"( ptr elementtype( i32) %tmp1, ptr elementtype(i32) %tmp5, ptr elementtype(i32) %tmp10, ptr elementtype(i32) %tmp16, ptr elementtype(i32) %tmp19, ptr elementtype(i32) %tmp23, ptr elementtype(i32) %tmp28, ptr elementtype(i32) %tmp33 ) nounwind
86	br label %return
87
88return:		; preds = %entry
89	ret void
90}
91