1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -regalloc=fast -optimize-regalloc=0 -mtriple=i686-- -mattr=+mmx | FileCheck %s 3; PR2082 4; Local register allocator was refusing to use ESI, EDI, and EBP so it ran out of 5; registers. 6define void @transpose4x4(ptr %dst, ptr %src, i32 %dst_stride, i32 %src_stride) { 7; CHECK-LABEL: transpose4x4: 8; CHECK: # %bb.0: # %entry 9; CHECK-NEXT: pushl %ebx 10; CHECK-NEXT: .cfi_def_cfa_offset 8 11; CHECK-NEXT: pushl %edi 12; CHECK-NEXT: .cfi_def_cfa_offset 12 13; CHECK-NEXT: pushl %esi 14; CHECK-NEXT: .cfi_def_cfa_offset 16 15; CHECK-NEXT: .cfi_offset %esi, -16 16; CHECK-NEXT: .cfi_offset %edi, -12 17; CHECK-NEXT: .cfi_offset %ebx, -8 18; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 19; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 20; CHECK-NEXT: leal (%ecx,%ecx,2), %edx 21; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi 22; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi 23; CHECK-NEXT: leal (%edi,%edi,2), %ebx 24; CHECK-NEXT: #APP 25; CHECK-NEXT: movd (%esi), %mm0 26; CHECK-NEXT: movd (%esi,%edi), %mm1 27; CHECK-NEXT: movd (%esi,%edi,2), %mm2 28; CHECK-NEXT: movd (%esi,%ebx), %mm3 29; CHECK-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] 30; CHECK-NEXT: punpcklbw %mm3, %mm2 # mm2 = mm2[0],mm3[0],mm2[1],mm3[1],mm2[2],mm3[2],mm2[3],mm3[3] 31; CHECK-NEXT: movq %mm0, %mm1 32; CHECK-NEXT: punpcklwd %mm2, %mm0 # mm0 = mm0[0],mm2[0],mm0[1],mm2[1] 33; CHECK-NEXT: punpckhwd %mm2, %mm1 # mm1 = mm1[2],mm2[2],mm1[3],mm2[3] 34; CHECK-NEXT: movd %mm0, (%eax) 35; CHECK-NEXT: punpckhdq %mm0, %mm0 # mm0 = mm0[1,1] 36; CHECK-NEXT: movd %mm0, (%eax,%ecx) 37; CHECK-NEXT: movd %mm1, (%eax,%ecx,2) 38; CHECK-NEXT: punpckhdq %mm1, %mm1 # mm1 = mm1[1,1] 39; CHECK-NEXT: movd %mm1, (%eax,%edx) 40; CHECK-EMPTY: 41; CHECK-NEXT: #NO_APP 42; CHECK-NEXT: popl %esi 43; CHECK-NEXT: .cfi_def_cfa_offset 12 44; CHECK-NEXT: popl %edi 45; CHECK-NEXT: .cfi_def_cfa_offset 8 46; CHECK-NEXT: popl %ebx 47; CHECK-NEXT: .cfi_def_cfa_offset 4 48; CHECK-NEXT: retl 49entry: 50 %dst_addr = alloca ptr ; <ptr> [#uses=5] 51 %src_addr = alloca ptr ; <ptr> [#uses=5] 52 %dst_stride_addr = alloca i32 ; <ptr> [#uses=4] 53 %src_stride_addr = alloca i32 ; <ptr> [#uses=4] 54 %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] 55 store ptr %dst, ptr %dst_addr 56 store ptr %src, ptr %src_addr 57 store i32 %dst_stride, ptr %dst_stride_addr 58 store i32 %src_stride, ptr %src_stride_addr 59 %tmp = load ptr, ptr %dst_addr, align 4 ; <ptr> [#uses=1] 60 %tmp1 = getelementptr i8, ptr %tmp, i32 0 ; <ptr> [#uses=1] 61 %tmp3 = load ptr, ptr %dst_addr, align 4 ; <ptr> [#uses=1] 62 %tmp4 = load i32, ptr %dst_stride_addr, align 4 ; <i32> [#uses=1] 63 %tmp5 = getelementptr i8, ptr %tmp3, i32 %tmp4 ; <ptr> [#uses=1] 64 %tmp7 = load i32, ptr %dst_stride_addr, align 4 ; <i32> [#uses=1] 65 %tmp8 = mul i32 %tmp7, 2 ; <i32> [#uses=1] 66 %tmp9 = load ptr, ptr %dst_addr, align 4 ; <ptr> [#uses=1] 67 %tmp10 = getelementptr i8, ptr %tmp9, i32 %tmp8 ; <ptr> [#uses=1] 68 %tmp13 = load i32, ptr %dst_stride_addr, align 4 ; <i32> [#uses=1] 69 %tmp14 = mul i32 %tmp13, 3 ; <i32> [#uses=1] 70 %tmp15 = load ptr, ptr %dst_addr, align 4 ; <ptr> [#uses=1] 71 %tmp16 = getelementptr i8, ptr %tmp15, i32 %tmp14 ; <ptr> [#uses=1] 72 %tmp18 = load ptr, ptr %src_addr, align 4 ; <ptr> [#uses=1] 73 %tmp19 = getelementptr i8, ptr %tmp18, i32 0 ; <ptr> [#uses=1] 74 %tmp21 = load ptr, ptr %src_addr, align 4 ; <ptr> [#uses=1] 75 %tmp22 = load i32, ptr %src_stride_addr, align 4 ; <i32> [#uses=1] 76 %tmp23 = getelementptr i8, ptr %tmp21, i32 %tmp22 ; <ptr> [#uses=1] 77 %tmp25 = load i32, ptr %src_stride_addr, align 4 ; <i32> [#uses=1] 78 %tmp26 = mul i32 %tmp25, 2 ; <i32> [#uses=1] 79 %tmp27 = load ptr, ptr %src_addr, align 4 ; <ptr> [#uses=1] 80 %tmp28 = getelementptr i8, ptr %tmp27, i32 %tmp26 ; <ptr> [#uses=1] 81 %tmp30 = load i32, ptr %src_stride_addr, align 4 ; <i32> [#uses=1] 82 %tmp31 = mul i32 %tmp30, 3 ; <i32> [#uses=1] 83 %tmp32 = load ptr, ptr %src_addr, align 4 ; <ptr> [#uses=1] 84 %tmp33 = getelementptr i8, ptr %tmp32, i32 %tmp31 ; <ptr> [#uses=1] 85 call void asm sideeffect "movd $4, %mm0 \0A\09movd $5, %mm1 \0A\09movd $6, %mm2 \0A\09movd $7, %mm3 \0A\09punpcklbw %mm1, %mm0 \0A\09punpcklbw %mm3, %mm2 \0A\09movq %mm0, %mm1 \0A\09punpcklwd %mm2, %mm0 \0A\09punpckhwd %mm2, %mm1 \0A\09movd %mm0, $0 \0A\09punpckhdq %mm0, %mm0 \0A\09movd %mm0, $1 \0A\09movd %mm1, $2 \0A\09punpckhdq %mm1, %mm1 \0A\09movd %mm1, $3 \0A\09", "=*m,=*m,=*m,=*m,*m,*m,*m,*m,~{dirflag},~{fpsr},~{flags}"( ptr elementtype( i32) %tmp1, ptr elementtype(i32) %tmp5, ptr elementtype(i32) %tmp10, ptr elementtype(i32) %tmp16, ptr elementtype(i32) %tmp19, ptr elementtype(i32) %tmp23, ptr elementtype(i32) %tmp28, ptr elementtype(i32) %tmp33 ) nounwind 86 br label %return 87 88return: ; preds = %entry 89 ret void 90} 91