xref: /llvm-project/llvm/test/CodeGen/X86/fixup-bw-inst.ll (revision 58be6fd1b4f6f0697148edda63e30ec0f74d412a)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -fixup-byte-word-insts=1 < %s | FileCheck %s -check-prefix=BWON
3; RUN: llc -fixup-byte-word-insts=0 < %s | FileCheck %s -check-prefix=BWOFF
4
5target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
6target triple = "x86_64-apple-macosx10.8.0"
7
8%struct.A = type { i8, i8, i8, i8, i8, i8, i8, i8 }
9
10; This has byte loads interspersed with byte stores, in a single
11; basic-block loop.  The upper portion should be dead, so the movb loads
12; should have been changed into movzbl instead.
13define void @foo1(i32 %count, ptr noalias nocapture %q, ptr noalias nocapture %p) nounwind uwtable noinline ssp {
14; BWON-LABEL: foo1:
15; BWON:       ## %bb.0:
16; BWON-NEXT:    testl %edi, %edi
17; BWON-NEXT:    jle LBB0_2
18; BWON-NEXT:    .p2align 4
19; BWON-NEXT:  LBB0_1: ## %a4
20; BWON-NEXT:    ## =>This Inner Loop Header: Depth=1
21; BWON-NEXT:    movzbl (%rsi), %eax
22; BWON-NEXT:    movb %al, (%rdx)
23; BWON-NEXT:    movzbl 1(%rsi), %eax
24; BWON-NEXT:    movb %al, 1(%rdx)
25; BWON-NEXT:    addq $8, %rdx
26; BWON-NEXT:    decl %edi
27; BWON-NEXT:    jne LBB0_1
28; BWON-NEXT:  LBB0_2: ## %._crit_edge
29; BWON-NEXT:    retq
30;
31; BWOFF-LABEL: foo1:
32; BWOFF:       ## %bb.0:
33; BWOFF-NEXT:    testl %edi, %edi
34; BWOFF-NEXT:    jle LBB0_2
35; BWOFF-NEXT:    .p2align 4
36; BWOFF-NEXT:  LBB0_1: ## %a4
37; BWOFF-NEXT:    ## =>This Inner Loop Header: Depth=1
38; BWOFF-NEXT:    movb (%rsi), %al
39; BWOFF-NEXT:    movb %al, (%rdx)
40; BWOFF-NEXT:    movb 1(%rsi), %al
41; BWOFF-NEXT:    movb %al, 1(%rdx)
42; BWOFF-NEXT:    addq $8, %rdx
43; BWOFF-NEXT:    decl %edi
44; BWOFF-NEXT:    jne LBB0_1
45; BWOFF-NEXT:  LBB0_2: ## %._crit_edge
46; BWOFF-NEXT:    retq
47  %1 = icmp sgt i32 %count, 0
48  br i1 %1, label %.lr.ph, label %._crit_edge
49
50.lr.ph:                                           ; preds = %0
51  %2 = getelementptr inbounds %struct.A, ptr %q, i64 0, i32 1
52  br label %a4
53
54a4:                                       ; preds = %3, %.lr.ph
55  %i.02 = phi i32 [ 0, %.lr.ph ], [ %a9, %a4 ]
56  %.01 = phi ptr [ %p, %.lr.ph ], [ %a10, %a4 ]
57  %a5 = load i8, ptr %q, align 1
58  store i8 %a5, ptr %.01, align 1
59  %a8 = getelementptr inbounds %struct.A, ptr %.01, i64 0, i32 1
60  %a6 = load i8, ptr %2, align 1
61  store i8 %a6, ptr %a8, align 1
62  %a9 = add nsw i32 %i.02, 1
63  %a10 = getelementptr inbounds %struct.A, ptr %.01, i64 1
64  %exitcond = icmp eq i32 %a9, %count
65  br i1 %exitcond, label %._crit_edge, label %a4
66
67._crit_edge:                                      ; preds = %3, %0
68  ret void
69}
70
71%struct.B = type { i16, i16, i16, i16, i16, i16, i16, i16 }
72
73; This has word loads interspersed with word stores.
74; The upper portion should be dead, so the movw loads should have
75; been changed into movzwl instead.
76define void @foo2(i32 %count, ptr noalias nocapture %q, ptr noalias nocapture %p) nounwind uwtable noinline ssp {
77; BWON-LABEL: foo2:
78; BWON:       ## %bb.0:
79; BWON-NEXT:    testl %edi, %edi
80; BWON-NEXT:    jle LBB1_2
81; BWON-NEXT:    .p2align 4
82; BWON-NEXT:  LBB1_1: ## %a4
83; BWON-NEXT:    ## =>This Inner Loop Header: Depth=1
84; BWON-NEXT:    movzwl (%rsi), %eax
85; BWON-NEXT:    movw %ax, (%rdx)
86; BWON-NEXT:    movzwl 2(%rsi), %eax
87; BWON-NEXT:    movw %ax, 2(%rdx)
88; BWON-NEXT:    addq $16, %rdx
89; BWON-NEXT:    decl %edi
90; BWON-NEXT:    jne LBB1_1
91; BWON-NEXT:  LBB1_2: ## %._crit_edge
92; BWON-NEXT:    retq
93;
94; BWOFF-LABEL: foo2:
95; BWOFF:       ## %bb.0:
96; BWOFF-NEXT:    testl %edi, %edi
97; BWOFF-NEXT:    jle LBB1_2
98; BWOFF-NEXT:    .p2align 4
99; BWOFF-NEXT:  LBB1_1: ## %a4
100; BWOFF-NEXT:    ## =>This Inner Loop Header: Depth=1
101; BWOFF-NEXT:    movw (%rsi), %ax
102; BWOFF-NEXT:    movw %ax, (%rdx)
103; BWOFF-NEXT:    movw 2(%rsi), %ax
104; BWOFF-NEXT:    movw %ax, 2(%rdx)
105; BWOFF-NEXT:    addq $16, %rdx
106; BWOFF-NEXT:    decl %edi
107; BWOFF-NEXT:    jne LBB1_1
108; BWOFF-NEXT:  LBB1_2: ## %._crit_edge
109; BWOFF-NEXT:    retq
110  %1 = icmp sgt i32 %count, 0
111  br i1 %1, label %.lr.ph, label %._crit_edge
112
113.lr.ph:                                           ; preds = %0
114  %2 = getelementptr inbounds %struct.B, ptr %q, i64 0, i32 1
115  br label %a4
116
117a4:                                       ; preds = %3, %.lr.ph
118  %i.02 = phi i32 [ 0, %.lr.ph ], [ %a9, %a4 ]
119  %.01 = phi ptr [ %p, %.lr.ph ], [ %a10, %a4 ]
120  %a5 = load i16, ptr %q, align 2
121  store i16 %a5, ptr %.01, align 2
122  %a8 = getelementptr inbounds %struct.B, ptr %.01, i64 0, i32 1
123  %a6 = load i16, ptr %2, align 2
124  store i16 %a6, ptr %a8, align 2
125  %a9 = add nsw i32 %i.02, 1
126  %a10 = getelementptr inbounds %struct.B, ptr %.01, i64 1
127  %exitcond = icmp eq i32 %a9, %count
128  br i1 %exitcond, label %._crit_edge, label %a4
129
130._crit_edge:                                      ; preds = %3, %0
131  ret void
132}
133
134; This test contains nothing but a simple byte load and store.
135; movb encodes smaller, but we use movzbl for the load for better perf.
136define void @foo3(ptr%dst, ptr%src) {
137; BWON-LABEL: foo3:
138; BWON:       ## %bb.0:
139; BWON-NEXT:    movzbl (%rsi), %eax
140; BWON-NEXT:    movb %al, (%rdi)
141; BWON-NEXT:    retq
142;
143; BWOFF-LABEL: foo3:
144; BWOFF:       ## %bb.0:
145; BWOFF-NEXT:    movb (%rsi), %al
146; BWOFF-NEXT:    movb %al, (%rdi)
147; BWOFF-NEXT:    retq
148  %t0 = load i8, ptr%src, align 1
149  store i8 %t0, ptr%dst, align 1
150  ret void
151}
152
153; This test contains nothing but a simple word load and store.  Since
154; movw and movzwl are the same size, we should always choose to use
155; movzwl instead.
156define void @foo4(ptr%dst, ptr%src) {
157; BWON-LABEL: foo4:
158; BWON:       ## %bb.0:
159; BWON-NEXT:    movzwl (%rsi), %eax
160; BWON-NEXT:    movw %ax, (%rdi)
161; BWON-NEXT:    retq
162;
163; BWOFF-LABEL: foo4:
164; BWOFF:       ## %bb.0:
165; BWOFF-NEXT:    movw (%rsi), %ax
166; BWOFF-NEXT:    movw %ax, (%rdi)
167; BWOFF-NEXT:    retq
168  %t0 = load i16, ptr%src, align 2
169  store i16 %t0, ptr%dst, align 2
170  ret void
171}
172