xref: /llvm-project/llvm/test/CodeGen/X86/x86-cmov-converter.ll (revision 7b3bbd83c0c24087072ec5b22a76799ab31f87d5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=x86_64-pc-linux -x86-cmov-converter=true -verify-machineinstrs -disable-block-placement < %s | FileCheck -allow-deprecated-dag-overlap %s
3; RUN: llc -mtriple=x86_64-pc-linux -x86-cmov-converter=true -x86-cmov-converter-force-all=true -verify-machineinstrs -disable-block-placement < %s | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=CHECK-FORCEALL
4
5;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6;; This test checks that x86-cmov-converter optimization transform CMOV
7;; instruction into branches when it is profitable.
8;; There are 5 cases below:
9;;   1. CmovInCriticalPath:
10;;        CMOV depends on the condition and it is in the hot path.
11;;        Thus, it worths transforming.
12;;
13;;   2. CmovNotInCriticalPath:
14;;        Similar test like in (1), just that CMOV is not in the hot path.
15;;        Thus, it does not worth transforming.
16;;
17;;   3. MaxIndex:
18;;        Maximum calculation algorithm that is looking for the max index,
19;;        calculating CMOV value is cheaper than calculating CMOV condition.
20;;        Thus, it worths transforming.
21;;
22;;   4. MaxValue:
23;;        Maximum calculation algorithm that is looking for the max value,
24;;        calculating CMOV value is not cheaper than calculating CMOV condition.
25;;        Thus, it does not worth transforming.
26;;
27;;   5. BinarySearch:
28;;        Usually, binary search CMOV is not predicted.
29;;        Thus, it does not worth transforming.
30;;
31;; Test was created using the following command line:
32;; > clang -S -O2 -m64 -fno-vectorize -fno-unroll-loops -emit-llvm foo.c -o -
33;; Where foo.c is:
34;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
35;;void CmovInHotPath(int n, int a, int b, int *c, int *d) {
36;;  for (int i = 0; i < n; i++) {
37;;    int t = c[i] + 1;
38;;    if (cptr a > b)
39;;      t = 10;
40;;    c[i] = (c[i] + 1) * t;
41;;  }
42;;}
43;;
44;;
45;;void CmovNotInHotPath(int n, int a, int b, int *c, int *d) {
46;;  for (int i = 0; i < n; i++) {
47;;    int t = c[i];
48;;    if (cptr a > b)
49;;      t = 10;
50;;    c[i] = t;
51;;    d[i] /= b;
52;;  }
53;;}
54;;
55;;
56;;int MaxIndex(int n, int *a) {
57;;  int t = 0;
58;;  for (int i = 1; i < n; i++) {
59;;    if (a[i] > a[t])
60;;      t = i;
61;;  }
62;;  return t;
63;;}
64;;
65;;
66;;int MaxValue(int n, int *a) {
67;;  int t = a[0];
68;;  for (int i = 1; i < n; i++) {
69;;    if (a[i] > t)
70;;      t = a[i];
71;;  }
72;;  return t;
73;;}
74;;
75;;typedef struct Node Node;
76;;struct Node {
77;;  unsigned Val;
78;;  Node *Right;
79;;  Node *Left;
80;;};
81;;
82;;unsigned BinarySearch(unsigned Mask, Node *Curr, Node *Next) {
83;;  while (Curr->Val > Next->Val) {
84;;    Curr = Next;
85;;    if (Mask & (0x1 << Curr->Val))
86;;      Next = Curr->Right;
87;;    else
88;;      Next = Curr->Left;
89;;  }
90;;  return Curr->Val;
91;;}
92;;
93;;
94;;void SmallGainPerLoop(int n, int a, int b, int *c, int *d) {
95;;  for (int i = 0; i < n; i++) {
96;;    int t = c[i];
97;;    if (cptr a > b)
98;;      t = 10;
99;;    c[i] = t;
100;;  }
101;;}
102;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
103
104%struct.Node = type { i32, ptr, ptr }
105
106define void @CmovInHotPath(i32 %n, i32 %a, i32 %b, ptr nocapture %c, ptr nocapture readnone %d) #0 {
107; CHECK-LABEL: CmovInHotPath:
108; CHECK:       # %bb.0: # %entry
109; CHECK-NEXT:    testl %edi, %edi
110; CHECK-NEXT:    jle .LBB0_5
111; CHECK-NEXT:  # %bb.1: # %for.body.preheader
112; CHECK-NEXT:    movl %edi, %eax
113; CHECK-NEXT:    xorl %edi, %edi
114; CHECK-NEXT:  .LBB0_2: # %for.body
115; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
116; CHECK-NEXT:    movl (%rcx,%rdi,4), %r10d
117; CHECK-NEXT:    leal 1(%r10), %r8d
118; CHECK-NEXT:    imull %esi, %r10d
119; CHECK-NEXT:    movl $10, %r9d
120; CHECK-NEXT:    cmpl %edx, %r10d
121; CHECK-NEXT:    jg .LBB0_4
122; CHECK-NEXT:  # %bb.3: # %for.body
123; CHECK-NEXT:    # in Loop: Header=BB0_2 Depth=1
124; CHECK-NEXT:    movl %r8d, %r9d
125; CHECK-NEXT:  .LBB0_4: # %for.body
126; CHECK-NEXT:    # in Loop: Header=BB0_2 Depth=1
127; CHECK-NEXT:    imull %r8d, %r9d
128; CHECK-NEXT:    movl %r9d, (%rcx,%rdi,4)
129; CHECK-NEXT:    addq $1, %rdi
130; CHECK-NEXT:    cmpq %rdi, %rax
131; CHECK-NEXT:    jne .LBB0_2
132; CHECK-NEXT:  .LBB0_5: # %for.cond.cleanup
133; CHECK-NEXT:    retq
134;
135; CHECK-FORCEALL-LABEL: CmovInHotPath:
136; CHECK-FORCEALL:       # %bb.0: # %entry
137; CHECK-FORCEALL-NEXT:    testl %edi, %edi
138; CHECK-FORCEALL-NEXT:    jle .LBB0_5
139; CHECK-FORCEALL-NEXT:  # %bb.1: # %for.body.preheader
140; CHECK-FORCEALL-NEXT:    movl %edi, %eax
141; CHECK-FORCEALL-NEXT:    xorl %edi, %edi
142; CHECK-FORCEALL-NEXT:  .LBB0_2: # %for.body
143; CHECK-FORCEALL-NEXT:    # =>This Inner Loop Header: Depth=1
144; CHECK-FORCEALL-NEXT:    movl (%rcx,%rdi,4), %r10d
145; CHECK-FORCEALL-NEXT:    leal 1(%r10), %r8d
146; CHECK-FORCEALL-NEXT:    imull %esi, %r10d
147; CHECK-FORCEALL-NEXT:    movl $10, %r9d
148; CHECK-FORCEALL-NEXT:    cmpl %edx, %r10d
149; CHECK-FORCEALL-NEXT:    jg .LBB0_4
150; CHECK-FORCEALL-NEXT:  # %bb.3: # %for.body
151; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB0_2 Depth=1
152; CHECK-FORCEALL-NEXT:    movl %r8d, %r9d
153; CHECK-FORCEALL-NEXT:  .LBB0_4: # %for.body
154; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB0_2 Depth=1
155; CHECK-FORCEALL-NEXT:    imull %r8d, %r9d
156; CHECK-FORCEALL-NEXT:    movl %r9d, (%rcx,%rdi,4)
157; CHECK-FORCEALL-NEXT:    addq $1, %rdi
158; CHECK-FORCEALL-NEXT:    cmpq %rdi, %rax
159; CHECK-FORCEALL-NEXT:    jne .LBB0_2
160; CHECK-FORCEALL-NEXT:  .LBB0_5: # %for.cond.cleanup
161; CHECK-FORCEALL-NEXT:    retq
162entry:
163  %cmp14 = icmp sgt i32 %n, 0
164  br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup
165
166for.body.preheader:                               ; preds = %entry
167  %wide.trip.count = zext i32 %n to i64
168  br label %for.body
169
170for.cond.cleanup:                                 ; preds = %for.body, %entry
171  ret void
172
173for.body:                                         ; preds = %for.body.preheader, %for.body
174  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
175  %arrayidx = getelementptr inbounds i32, ptr %c, i64 %indvars.iv
176  %0 = load i32, ptr %arrayidx, align 4
177  %add = add nsw i32 %0, 1
178  %mul = mul nsw i32 %0, %a
179  %cmp3 = icmp sgt i32 %mul, %b
180  %. = select i1 %cmp3, i32 10, i32 %add
181  %mul7 = mul nsw i32 %., %add
182  store i32 %mul7, ptr %arrayidx, align 4
183  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
184  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
185  br i1 %exitcond, label %for.cond.cleanup, label %for.body
186}
187
188define void @CmovNotInHotPath(i32 %n, i32 %a, i32 %b, ptr nocapture %c, ptr nocapture %d) #0 {
189; CHECK-LABEL: CmovNotInHotPath:
190; CHECK:       # %bb.0: # %entry
191; CHECK-NEXT:    testl %edi, %edi
192; CHECK-NEXT:    jle .LBB1_3
193; CHECK-NEXT:  # %bb.1: # %for.body.preheader
194; CHECK-NEXT:    movl %edx, %r9d
195; CHECK-NEXT:    movl %edi, %edi
196; CHECK-NEXT:    xorl %r10d, %r10d
197; CHECK-NEXT:    movl $10, %r11d
198; CHECK-NEXT:  .LBB1_2: # %for.body
199; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
200; CHECK-NEXT:    movl (%rcx,%r10,4), %eax
201; CHECK-NEXT:    movl %eax, %edx
202; CHECK-NEXT:    imull %esi, %edx
203; CHECK-NEXT:    cmpl %r9d, %edx
204; CHECK-NEXT:    cmovgl %r11d, %eax
205; CHECK-NEXT:    movl %eax, (%rcx,%r10,4)
206; CHECK-NEXT:    movl (%r8,%r10,4), %eax
207; CHECK-NEXT:    cltd
208; CHECK-NEXT:    idivl %r9d
209; CHECK-NEXT:    movl %eax, (%r8,%r10,4)
210; CHECK-NEXT:    addq $1, %r10
211; CHECK-NEXT:    cmpq %r10, %rdi
212; CHECK-NEXT:    jne .LBB1_2
213; CHECK-NEXT:  .LBB1_3: # %for.cond.cleanup
214; CHECK-NEXT:    retq
215;
216; CHECK-FORCEALL-LABEL: CmovNotInHotPath:
217; CHECK-FORCEALL:       # %bb.0: # %entry
218; CHECK-FORCEALL-NEXT:    testl %edi, %edi
219; CHECK-FORCEALL-NEXT:    jle .LBB1_5
220; CHECK-FORCEALL-NEXT:  # %bb.1: # %for.body.preheader
221; CHECK-FORCEALL-NEXT:    movl %edx, %r9d
222; CHECK-FORCEALL-NEXT:    movl %edi, %edi
223; CHECK-FORCEALL-NEXT:    xorl %r10d, %r10d
224; CHECK-FORCEALL-NEXT:  .LBB1_2: # %for.body
225; CHECK-FORCEALL-NEXT:    # =>This Inner Loop Header: Depth=1
226; CHECK-FORCEALL-NEXT:    movl (%rcx,%r10,4), %eax
227; CHECK-FORCEALL-NEXT:    movl %eax, %r11d
228; CHECK-FORCEALL-NEXT:    imull %esi, %r11d
229; CHECK-FORCEALL-NEXT:    movl $10, %edx
230; CHECK-FORCEALL-NEXT:    cmpl %r9d, %r11d
231; CHECK-FORCEALL-NEXT:    jg .LBB1_4
232; CHECK-FORCEALL-NEXT:  # %bb.3: # %for.body
233; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB1_2 Depth=1
234; CHECK-FORCEALL-NEXT:    movl %eax, %edx
235; CHECK-FORCEALL-NEXT:  .LBB1_4: # %for.body
236; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB1_2 Depth=1
237; CHECK-FORCEALL-NEXT:    movl %edx, (%rcx,%r10,4)
238; CHECK-FORCEALL-NEXT:    movl (%r8,%r10,4), %eax
239; CHECK-FORCEALL-NEXT:    cltd
240; CHECK-FORCEALL-NEXT:    idivl %r9d
241; CHECK-FORCEALL-NEXT:    movl %eax, (%r8,%r10,4)
242; CHECK-FORCEALL-NEXT:    addq $1, %r10
243; CHECK-FORCEALL-NEXT:    cmpq %r10, %rdi
244; CHECK-FORCEALL-NEXT:    jne .LBB1_2
245; CHECK-FORCEALL-NEXT:  .LBB1_5: # %for.cond.cleanup
246; CHECK-FORCEALL-NEXT:    retq
247entry:
248  %cmp18 = icmp sgt i32 %n, 0
249  br i1 %cmp18, label %for.body.preheader, label %for.cond.cleanup
250
251for.body.preheader:                               ; preds = %entry
252  %wide.trip.count = zext i32 %n to i64
253  br label %for.body
254
255for.cond.cleanup:                                 ; preds = %for.body, %entry
256  ret void
257
258for.body:                                         ; preds = %for.body.preheader, %for.body
259  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
260  %arrayidx = getelementptr inbounds i32, ptr %c, i64 %indvars.iv
261  %0 = load i32, ptr %arrayidx, align 4
262  %mul = mul nsw i32 %0, %a
263  %cmp3 = icmp sgt i32 %mul, %b
264  %. = select i1 %cmp3, i32 10, i32 %0
265  store i32 %., ptr %arrayidx, align 4
266  %arrayidx7 = getelementptr inbounds i32, ptr %d, i64 %indvars.iv
267  %1 = load i32, ptr %arrayidx7, align 4
268  %div = sdiv i32 %1, %b
269  store i32 %div, ptr %arrayidx7, align 4
270  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
271  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
272  br i1 %exitcond, label %for.cond.cleanup, label %for.body
273}
274
275define i32 @MaxIndex(i32 %n, ptr nocapture readonly %a) #0 {
276; CHECK-LABEL: MaxIndex:
277; CHECK:       # %bb.0: # %entry
278; CHECK-NEXT:    xorl %eax, %eax
279; CHECK-NEXT:    cmpl $2, %edi
280; CHECK-NEXT:    jl .LBB2_5
281; CHECK-NEXT:  # %bb.1: # %for.body.preheader
282; CHECK-NEXT:    movl %edi, %ecx
283; CHECK-NEXT:    xorl %edi, %edi
284; CHECK-NEXT:    movl $1, %edx
285; CHECK-NEXT:  .LBB2_2: # %for.body
286; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
287; CHECK-NEXT:    movl (%rsi,%rdx,4), %r8d
288; CHECK-NEXT:    movslq %edi, %r9
289; CHECK-NEXT:    movl %edx, %eax
290; CHECK-NEXT:    cmpl (%rsi,%r9,4), %r8d
291; CHECK-NEXT:    jg .LBB2_4
292; CHECK-NEXT:  # %bb.3: # %for.body
293; CHECK-NEXT:    # in Loop: Header=BB2_2 Depth=1
294; CHECK-NEXT:    movl %edi, %eax
295; CHECK-NEXT:  .LBB2_4: # %for.body
296; CHECK-NEXT:    # in Loop: Header=BB2_2 Depth=1
297; CHECK-NEXT:    addq $1, %rdx
298; CHECK-NEXT:    movl %eax, %edi
299; CHECK-NEXT:    cmpq %rdx, %rcx
300; CHECK-NEXT:    jne .LBB2_2
301; CHECK-NEXT:  .LBB2_5: # %for.cond.cleanup
302; CHECK-NEXT:    retq
303;
304; CHECK-FORCEALL-LABEL: MaxIndex:
305; CHECK-FORCEALL:       # %bb.0: # %entry
306; CHECK-FORCEALL-NEXT:    xorl %eax, %eax
307; CHECK-FORCEALL-NEXT:    cmpl $2, %edi
308; CHECK-FORCEALL-NEXT:    jl .LBB2_5
309; CHECK-FORCEALL-NEXT:  # %bb.1: # %for.body.preheader
310; CHECK-FORCEALL-NEXT:    movl %edi, %ecx
311; CHECK-FORCEALL-NEXT:    xorl %edi, %edi
312; CHECK-FORCEALL-NEXT:    movl $1, %edx
313; CHECK-FORCEALL-NEXT:  .LBB2_2: # %for.body
314; CHECK-FORCEALL-NEXT:    # =>This Inner Loop Header: Depth=1
315; CHECK-FORCEALL-NEXT:    movl (%rsi,%rdx,4), %r8d
316; CHECK-FORCEALL-NEXT:    movslq %edi, %r9
317; CHECK-FORCEALL-NEXT:    movl %edx, %eax
318; CHECK-FORCEALL-NEXT:    cmpl (%rsi,%r9,4), %r8d
319; CHECK-FORCEALL-NEXT:    jg .LBB2_4
320; CHECK-FORCEALL-NEXT:  # %bb.3: # %for.body
321; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB2_2 Depth=1
322; CHECK-FORCEALL-NEXT:    movl %edi, %eax
323; CHECK-FORCEALL-NEXT:  .LBB2_4: # %for.body
324; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB2_2 Depth=1
325; CHECK-FORCEALL-NEXT:    addq $1, %rdx
326; CHECK-FORCEALL-NEXT:    movl %eax, %edi
327; CHECK-FORCEALL-NEXT:    cmpq %rdx, %rcx
328; CHECK-FORCEALL-NEXT:    jne .LBB2_2
329; CHECK-FORCEALL-NEXT:  .LBB2_5: # %for.cond.cleanup
330; CHECK-FORCEALL-NEXT:    retq
331entry:
332  %cmp14 = icmp sgt i32 %n, 1
333  br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup
334
335for.body.preheader:                               ; preds = %entry
336  %wide.trip.count = zext i32 %n to i64
337  br label %for.body
338
339for.cond.cleanup:                                 ; preds = %for.body, %entry
340  %t.0.lcssa = phi i32 [ 0, %entry ], [ %i.0.t.0, %for.body ]
341  ret i32 %t.0.lcssa
342
343for.body:                                         ; preds = %for.body.preheader, %for.body
344  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %for.body.preheader ]
345  %t.015 = phi i32 [ %i.0.t.0, %for.body ], [ 0, %for.body.preheader ]
346  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
347  %0 = load i32, ptr %arrayidx, align 4
348  %idxprom1 = sext i32 %t.015 to i64
349  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %idxprom1
350  %1 = load i32, ptr %arrayidx2, align 4
351  %cmp3 = icmp sgt i32 %0, %1
352  %2 = trunc i64 %indvars.iv to i32
353  %i.0.t.0 = select i1 %cmp3, i32 %2, i32 %t.015
354  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
355  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
356  br i1 %exitcond, label %for.cond.cleanup, label %for.body
357}
358
359; If cmov instruction is marked as unpredictable, do not convert it to branch.
360define i32 @MaxIndex_unpredictable(i32 %n, ptr nocapture readonly %a) #0 {
361; CHECK-LABEL: MaxIndex_unpredictable:
362; CHECK:       # %bb.0: # %entry
363; CHECK-NEXT:    xorl %eax, %eax
364; CHECK-NEXT:    cmpl $2, %edi
365; CHECK-NEXT:    jl .LBB3_3
366; CHECK-NEXT:  # %bb.1: # %for.body.preheader
367; CHECK-NEXT:    movl %edi, %ecx
368; CHECK-NEXT:    xorl %eax, %eax
369; CHECK-NEXT:    movl $1, %edx
370; CHECK-NEXT:  .LBB3_2: # %for.body
371; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
372; CHECK-NEXT:    movl (%rsi,%rdx,4), %edi
373; CHECK-NEXT:    cltq
374; CHECK-NEXT:    cmpl (%rsi,%rax,4), %edi
375; CHECK-NEXT:    cmovgl %edx, %eax
376; CHECK-NEXT:    addq $1, %rdx
377; CHECK-NEXT:    cmpq %rdx, %rcx
378; CHECK-NEXT:    jne .LBB3_2
379; CHECK-NEXT:  .LBB3_3: # %for.cond.cleanup
380; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
381; CHECK-NEXT:    retq
382;
383; CHECK-FORCEALL-LABEL: MaxIndex_unpredictable:
384; CHECK-FORCEALL:       # %bb.0: # %entry
385; CHECK-FORCEALL-NEXT:    xorl %eax, %eax
386; CHECK-FORCEALL-NEXT:    cmpl $2, %edi
387; CHECK-FORCEALL-NEXT:    jl .LBB3_3
388; CHECK-FORCEALL-NEXT:  # %bb.1: # %for.body.preheader
389; CHECK-FORCEALL-NEXT:    movl %edi, %ecx
390; CHECK-FORCEALL-NEXT:    xorl %eax, %eax
391; CHECK-FORCEALL-NEXT:    movl $1, %edx
392; CHECK-FORCEALL-NEXT:  .LBB3_2: # %for.body
393; CHECK-FORCEALL-NEXT:    # =>This Inner Loop Header: Depth=1
394; CHECK-FORCEALL-NEXT:    movl (%rsi,%rdx,4), %edi
395; CHECK-FORCEALL-NEXT:    cltq
396; CHECK-FORCEALL-NEXT:    cmpl (%rsi,%rax,4), %edi
397; CHECK-FORCEALL-NEXT:    cmovgl %edx, %eax
398; CHECK-FORCEALL-NEXT:    addq $1, %rdx
399; CHECK-FORCEALL-NEXT:    cmpq %rdx, %rcx
400; CHECK-FORCEALL-NEXT:    jne .LBB3_2
401; CHECK-FORCEALL-NEXT:  .LBB3_3: # %for.cond.cleanup
402; CHECK-FORCEALL-NEXT:    # kill: def $eax killed $eax killed $rax
403; CHECK-FORCEALL-NEXT:    retq
404entry:
405  %cmp14 = icmp sgt i32 %n, 1
406  br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup
407
408for.body.preheader:                               ; preds = %entry
409  %wide.trip.count = zext i32 %n to i64
410  br label %for.body
411
412for.cond.cleanup:                                 ; preds = %for.body, %entry
413  %t.0.lcssa = phi i32 [ 0, %entry ], [ %i.0.t.0, %for.body ]
414  ret i32 %t.0.lcssa
415
416for.body:                                         ; preds = %for.body.preheader, %for.body
417  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %for.body.preheader ]
418  %t.015 = phi i32 [ %i.0.t.0, %for.body ], [ 0, %for.body.preheader ]
419  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
420  %0 = load i32, ptr %arrayidx, align 4
421  %idxprom1 = sext i32 %t.015 to i64
422  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %idxprom1
423  %1 = load i32, ptr %arrayidx2, align 4
424  %cmp3 = icmp sgt i32 %0, %1
425  %2 = trunc i64 %indvars.iv to i32
426  %i.0.t.0 = select i1 %cmp3, i32 %2, i32 %t.015, !unpredictable !0
427  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
428  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
429  br i1 %exitcond, label %for.cond.cleanup, label %for.body
430}
431
432define i32 @MaxValue(i32 %n, ptr nocapture readonly %a) #0 {
433; CHECK-LABEL: MaxValue:
434; CHECK:       # %bb.0: # %entry
435; CHECK-NEXT:    movl (%rsi), %eax
436; CHECK-NEXT:    cmpl $2, %edi
437; CHECK-NEXT:    jl .LBB4_3
438; CHECK-NEXT:  # %bb.1: # %for.body.preheader
439; CHECK-NEXT:    movl %edi, %ecx
440; CHECK-NEXT:    movl $1, %edx
441; CHECK-NEXT:  .LBB4_2: # %for.body
442; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
443; CHECK-NEXT:    movl (%rsi,%rdx,4), %edi
444; CHECK-NEXT:    cmpl %eax, %edi
445; CHECK-NEXT:    cmovgl %edi, %eax
446; CHECK-NEXT:    addq $1, %rdx
447; CHECK-NEXT:    cmpq %rdx, %rcx
448; CHECK-NEXT:    jne .LBB4_2
449; CHECK-NEXT:  .LBB4_3: # %for.cond.cleanup
450; CHECK-NEXT:    retq
451;
452; CHECK-FORCEALL-LABEL: MaxValue:
453; CHECK-FORCEALL:       # %bb.0: # %entry
454; CHECK-FORCEALL-NEXT:    movl (%rsi), %r8d
455; CHECK-FORCEALL-NEXT:    cmpl $2, %edi
456; CHECK-FORCEALL-NEXT:    jge .LBB4_3
457; CHECK-FORCEALL-NEXT:  # %bb.1:
458; CHECK-FORCEALL-NEXT:    movl %r8d, %eax
459; CHECK-FORCEALL-NEXT:  .LBB4_2: # %for.cond.cleanup
460; CHECK-FORCEALL-NEXT:    retq
461; CHECK-FORCEALL-NEXT:  .LBB4_3: # %for.body.preheader
462; CHECK-FORCEALL-NEXT:    movl %edi, %ecx
463; CHECK-FORCEALL-NEXT:    movl $1, %edx
464; CHECK-FORCEALL-NEXT:  .LBB4_4: # %for.body
465; CHECK-FORCEALL-NEXT:    # =>This Inner Loop Header: Depth=1
466; CHECK-FORCEALL-NEXT:    movl (%rsi,%rdx,4), %eax
467; CHECK-FORCEALL-NEXT:    cmpl %r8d, %eax
468; CHECK-FORCEALL-NEXT:    jg .LBB4_6
469; CHECK-FORCEALL-NEXT:  # %bb.5: # %for.body
470; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB4_4 Depth=1
471; CHECK-FORCEALL-NEXT:    movl %r8d, %eax
472; CHECK-FORCEALL-NEXT:  .LBB4_6: # %for.body
473; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB4_4 Depth=1
474; CHECK-FORCEALL-NEXT:    addq $1, %rdx
475; CHECK-FORCEALL-NEXT:    movl %eax, %r8d
476; CHECK-FORCEALL-NEXT:    cmpq %rdx, %rcx
477; CHECK-FORCEALL-NEXT:    je .LBB4_2
478; CHECK-FORCEALL-NEXT:    jmp .LBB4_4
479entry:
480  %0 = load i32, ptr %a, align 4
481  %cmp13 = icmp sgt i32 %n, 1
482  br i1 %cmp13, label %for.body.preheader, label %for.cond.cleanup
483
484for.body.preheader:                               ; preds = %entry
485  %wide.trip.count = zext i32 %n to i64
486  br label %for.body
487
488for.cond.cleanup:                                 ; preds = %for.body, %entry
489  %t.0.lcssa = phi i32 [ %0, %entry ], [ %.t.0, %for.body ]
490  ret i32 %t.0.lcssa
491
492for.body:                                         ; preds = %for.body.preheader, %for.body
493  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %for.body.preheader ]
494  %t.014 = phi i32 [ %.t.0, %for.body ], [ %0, %for.body.preheader ]
495  %arrayidx1 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
496  %1 = load i32, ptr %arrayidx1, align 4
497  %cmp2 = icmp sgt i32 %1, %t.014
498  %.t.0 = select i1 %cmp2, i32 %1, i32 %t.014
499  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
500  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
501  br i1 %exitcond, label %for.cond.cleanup, label %for.body
502}
503
504define i32 @BinarySearch(i32 %Mask, ptr nocapture readonly %Curr, ptr nocapture readonly %Next) #0 {
505; CHECK-LABEL: BinarySearch:
506; CHECK:       # %bb.0: # %entry
507; CHECK-NEXT:    movl (%rsi), %eax
508; CHECK-NEXT:    jmp .LBB5_2
509; CHECK-NEXT:  .LBB5_1: # %while.body
510; CHECK-NEXT:    # in Loop: Header=BB5_2 Depth=1
511; CHECK-NEXT:    movl %ecx, %eax
512; CHECK-NEXT:    xorl %ecx, %ecx
513; CHECK-NEXT:    btl %eax, %edi
514; CHECK-NEXT:    setae %cl
515; CHECK-NEXT:    movq 8(%rdx,%rcx,8), %rdx
516; CHECK-NEXT:  .LBB5_2: # %while.body
517; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
518; CHECK-NEXT:    movl (%rdx), %ecx
519; CHECK-NEXT:    cmpl %ecx, %eax
520; CHECK-NEXT:    ja .LBB5_1
521; CHECK-NEXT:  # %bb.3: # %while.end
522; CHECK-NEXT:    retq
523;
524; CHECK-FORCEALL-LABEL: BinarySearch:
525; CHECK-FORCEALL:       # %bb.0: # %entry
526; CHECK-FORCEALL-NEXT:    movl (%rsi), %eax
527; CHECK-FORCEALL-NEXT:    jmp .LBB5_2
528; CHECK-FORCEALL-NEXT:  .LBB5_1: # %while.body
529; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB5_2 Depth=1
530; CHECK-FORCEALL-NEXT:    movl %ecx, %eax
531; CHECK-FORCEALL-NEXT:    xorl %ecx, %ecx
532; CHECK-FORCEALL-NEXT:    btl %eax, %edi
533; CHECK-FORCEALL-NEXT:    setae %cl
534; CHECK-FORCEALL-NEXT:    movq 8(%rdx,%rcx,8), %rdx
535; CHECK-FORCEALL-NEXT:  .LBB5_2: # %while.body
536; CHECK-FORCEALL-NEXT:    # =>This Inner Loop Header: Depth=1
537; CHECK-FORCEALL-NEXT:    movl (%rdx), %ecx
538; CHECK-FORCEALL-NEXT:    cmpl %ecx, %eax
539; CHECK-FORCEALL-NEXT:    ja .LBB5_1
540; CHECK-FORCEALL-NEXT:  # %bb.3: # %while.end
541; CHECK-FORCEALL-NEXT:    retq
542entry:
543  %0 = load i32, ptr %Curr, align 8
544  %1 = load i32, ptr %Next, align 8
545  %cmp10 = icmp ugt i32 %0, %1
546  br i1 %cmp10, label %while.body, label %while.end
547
548while.body:                                       ; preds = %entry, %while.body
549  %2 = phi i32 [ %4, %while.body ], [ %1, %entry ]
550  %Next.addr.011 = phi ptr [ %3, %while.body ], [ %Next, %entry ]
551  %shl = shl i32 1, %2
552  %and = and i32 %shl, %Mask
553  %tobool = icmp eq i32 %and, 0
554  %Left = getelementptr inbounds %struct.Node, ptr %Next.addr.011, i64 0, i32 2
555  %Right = getelementptr inbounds %struct.Node, ptr %Next.addr.011, i64 0, i32 1
556  %Left.sink = select i1 %tobool, ptr %Left, ptr %Right
557  %3 = load ptr, ptr %Left.sink, align 8
558  %4 = load i32, ptr %3, align 8
559  %cmp = icmp ugt i32 %2, %4
560  br i1 %cmp, label %while.body, label %while.end
561
562while.end:                                        ; preds = %while.body, %entry
563  %.lcssa = phi i32 [ %0, %entry ], [ %2, %while.body ]
564  ret i32 %.lcssa
565}
566
567;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
568;; The following test checks that x86-cmov-converter optimization transforms
569;; CMOV instructions into branch correctly.
570;;
571;; MBB:
572;;   cond = cmp ...
573;;   v1 = CMOVgt t1, f1, cond
574;;   v2 = CMOVle s1, f2, cond
575;;
576;; Where: t1 = 11, f1 = 22, f2 = a
577;;
578;; After CMOV transformation
579;; -------------------------
580;; MBB:
581;;   cond = cmp ...
582;;   ja %SinkMBB
583;;
584;; FalseMBB:
585;;   jmp %SinkMBB
586;;
587;; SinkMBB:
588;;   %v1 = phi[%f1, %FalseMBB], [%t1, %MBB]
589;;   %v2 = phi[%f1, %FalseMBB], [%f2, %MBB] ; For CMOV with OppCC switch
590;;                                          ; true-value with false-value
591;;                                          ; Phi instruction cannot use
592;;                                          ; previous Phi instruction result
593;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
594
595define void @Transform(ptr%arr, ptr%arr2, i32 %a, i32 %b, i32 %c, i32 %n) #0 {
596; CHECK-LABEL: Transform:
597; CHECK:       # %bb.0: # %entry
598; CHECK-NEXT:    movb $1, %al
599; CHECK-NEXT:    testb %al, %al
600; CHECK-NEXT:    jne .LBB6_5
601; CHECK-NEXT:  # %bb.1: # %while.body.preheader
602; CHECK-NEXT:    movl %edx, %ecx
603; CHECK-NEXT:    xorl %esi, %esi
604; CHECK-NEXT:  .LBB6_2: # %while.body
605; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
606; CHECK-NEXT:    movslq %esi, %rsi
607; CHECK-NEXT:    movl (%rdi,%rsi,4), %eax
608; CHECK-NEXT:    xorl %edx, %edx
609; CHECK-NEXT:    divl %ecx
610; CHECK-NEXT:    movl %eax, %edx
611; CHECK-NEXT:    movl $11, %eax
612; CHECK-NEXT:    movl %ecx, %r8d
613; CHECK-NEXT:    cmpl %ecx, %edx
614; CHECK-NEXT:    ja .LBB6_4
615; CHECK-NEXT:  # %bb.3: # %while.body
616; CHECK-NEXT:    # in Loop: Header=BB6_2 Depth=1
617; CHECK-NEXT:    movl $22, %eax
618; CHECK-NEXT:    movl $22, %r8d
619; CHECK-NEXT:  .LBB6_4: # %while.body
620; CHECK-NEXT:    # in Loop: Header=BB6_2 Depth=1
621; CHECK-NEXT:    xorl %edx, %edx
622; CHECK-NEXT:    divl %r8d
623; CHECK-NEXT:    movl %edx, (%rdi,%rsi,4)
624; CHECK-NEXT:    addl $1, %esi
625; CHECK-NEXT:    cmpl %r9d, %esi
626; CHECK-NEXT:    ja .LBB6_2
627; CHECK-NEXT:  .LBB6_5: # %while.end
628; CHECK-NEXT:    retq
629;
630; CHECK-FORCEALL-LABEL: Transform:
631; CHECK-FORCEALL:       # %bb.0: # %entry
632; CHECK-FORCEALL-NEXT:    movb $1, %al
633; CHECK-FORCEALL-NEXT:    testb %al, %al
634; CHECK-FORCEALL-NEXT:    jne .LBB6_5
635; CHECK-FORCEALL-NEXT:  # %bb.1: # %while.body.preheader
636; CHECK-FORCEALL-NEXT:    movl %edx, %ecx
637; CHECK-FORCEALL-NEXT:    xorl %esi, %esi
638; CHECK-FORCEALL-NEXT:  .LBB6_2: # %while.body
639; CHECK-FORCEALL-NEXT:    # =>This Inner Loop Header: Depth=1
640; CHECK-FORCEALL-NEXT:    movslq %esi, %rsi
641; CHECK-FORCEALL-NEXT:    movl (%rdi,%rsi,4), %eax
642; CHECK-FORCEALL-NEXT:    xorl %edx, %edx
643; CHECK-FORCEALL-NEXT:    divl %ecx
644; CHECK-FORCEALL-NEXT:    movl %eax, %edx
645; CHECK-FORCEALL-NEXT:    movl $11, %eax
646; CHECK-FORCEALL-NEXT:    movl %ecx, %r8d
647; CHECK-FORCEALL-NEXT:    cmpl %ecx, %edx
648; CHECK-FORCEALL-NEXT:    ja .LBB6_4
649; CHECK-FORCEALL-NEXT:  # %bb.3: # %while.body
650; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB6_2 Depth=1
651; CHECK-FORCEALL-NEXT:    movl $22, %eax
652; CHECK-FORCEALL-NEXT:    movl $22, %r8d
653; CHECK-FORCEALL-NEXT:  .LBB6_4: # %while.body
654; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB6_2 Depth=1
655; CHECK-FORCEALL-NEXT:    xorl %edx, %edx
656; CHECK-FORCEALL-NEXT:    divl %r8d
657; CHECK-FORCEALL-NEXT:    movl %edx, (%rdi,%rsi,4)
658; CHECK-FORCEALL-NEXT:    addl $1, %esi
659; CHECK-FORCEALL-NEXT:    cmpl %r9d, %esi
660; CHECK-FORCEALL-NEXT:    ja .LBB6_2
661; CHECK-FORCEALL-NEXT:  .LBB6_5: # %while.end
662; CHECK-FORCEALL-NEXT:    retq
663entry:
664  %cmp10 = icmp ugt i32 0, %n
665  br i1 %cmp10, label %while.body, label %while.end
666
667while.body:                                       ; preds = %entry, %while.body
668  %i = phi i32 [ %i_inc, %while.body ], [ 0, %entry ]
669  %arr_i = getelementptr inbounds i32, ptr %arr, i32 %i
670  %x = load i32, ptr %arr_i, align 4
671  %div = udiv i32 %x, %a
672  %cond = icmp ugt i32 %div, %a
673  %condOpp = icmp ule i32 %div, %a
674  %s1 = select i1 %cond, i32 11, i32 22
675  %s2 = select i1 %condOpp, i32 %s1, i32 %a
676  %sum = urem i32 %s1, %s2
677  store i32 %sum, ptr %arr_i, align 4
678  %i_inc = add i32 %i, 1
679  %cmp = icmp ugt i32 %i_inc, %n
680  br i1 %cmp, label %while.body, label %while.end
681
682while.end:                                        ; preds = %while.body, %entry
683  ret void
684}
685
686; Test that we always will convert a cmov with a memory operand into a branch,
687; even outside of a loop.
688define i32 @test_cmov_memoperand(i32 %a, i32 %b, i32 %x, ptr %y) #0 {
689; CHECK-LABEL: test_cmov_memoperand:
690; CHECK:       # %bb.0: # %entry
691; CHECK-NEXT:    movl %edx, %eax
692; CHECK-NEXT:    cmpl %esi, %edi
693; CHECK-NEXT:    ja .LBB7_2
694; CHECK-NEXT:  # %bb.1: # %entry
695; CHECK-NEXT:    movl (%rcx), %eax
696; CHECK-NEXT:  .LBB7_2: # %entry
697; CHECK-NEXT:    retq
698;
699; CHECK-FORCEALL-LABEL: test_cmov_memoperand:
700; CHECK-FORCEALL:       # %bb.0: # %entry
701; CHECK-FORCEALL-NEXT:    movl %edx, %eax
702; CHECK-FORCEALL-NEXT:    cmpl %esi, %edi
703; CHECK-FORCEALL-NEXT:    ja .LBB7_2
704; CHECK-FORCEALL-NEXT:  # %bb.1: # %entry
705; CHECK-FORCEALL-NEXT:    movl (%rcx), %eax
706; CHECK-FORCEALL-NEXT:  .LBB7_2: # %entry
707; CHECK-FORCEALL-NEXT:    retq
708entry:
709  %cond = icmp ugt i32 %a, %b
710  %load = load i32, ptr %y
711  %z = select i1 %cond, i32 %x, i32 %load
712  ret i32 %z
713}
714
715; If cmov instruction is marked as unpredictable, do not convert it to branch.
716define i32 @test_cmov_memoperand_unpredictable(i32 %a, i32 %b, i32 %x, ptr %y) #0 {
717; CHECK-LABEL: test_cmov_memoperand_unpredictable:
718; CHECK:       # %bb.0: # %entry
719; CHECK-NEXT:    movl %edx, %eax
720; CHECK-NEXT:    cmpl %esi, %edi
721; CHECK-NEXT:    cmovbel (%rcx), %eax
722; CHECK-NEXT:    retq
723;
724; CHECK-FORCEALL-LABEL: test_cmov_memoperand_unpredictable:
725; CHECK-FORCEALL:       # %bb.0: # %entry
726; CHECK-FORCEALL-NEXT:    movl %edx, %eax
727; CHECK-FORCEALL-NEXT:    cmpl %esi, %edi
728; CHECK-FORCEALL-NEXT:    cmovbel (%rcx), %eax
729; CHECK-FORCEALL-NEXT:    retq
730entry:
731  %cond = icmp ugt i32 %a, %b
732  %load = load i32, ptr %y
733  %z = select i1 %cond, i32 %x, i32 %load, !unpredictable !0
734  ret i32 %z
735}
736
737; Test that we can convert a group of cmovs where only one has a memory
738; operand.
739define i32 @test_cmov_memoperand_in_group(i32 %a, i32 %b, i32 %x, ptr %y.ptr) #0 {
740; CHECK-LABEL: test_cmov_memoperand_in_group:
741; CHECK:       # %bb.0: # %entry
742; CHECK-NEXT:    movl %edx, %eax
743; CHECK-NEXT:    movl %edx, %r8d
744; CHECK-NEXT:    cmpl %esi, %edi
745; CHECK-NEXT:    ja .LBB9_2
746; CHECK-NEXT:  # %bb.1: # %entry
747; CHECK-NEXT:    movl (%rcx), %edx
748; CHECK-NEXT:    movl %edi, %eax
749; CHECK-NEXT:    movl %esi, %r8d
750; CHECK-NEXT:  .LBB9_2: # %entry
751; CHECK-NEXT:    addl %r8d, %eax
752; CHECK-NEXT:    addl %edx, %eax
753; CHECK-NEXT:    retq
754;
755; CHECK-FORCEALL-LABEL: test_cmov_memoperand_in_group:
756; CHECK-FORCEALL:       # %bb.0: # %entry
757; CHECK-FORCEALL-NEXT:    movl %edx, %eax
758; CHECK-FORCEALL-NEXT:    movl %edx, %r8d
759; CHECK-FORCEALL-NEXT:    cmpl %esi, %edi
760; CHECK-FORCEALL-NEXT:    ja .LBB9_2
761; CHECK-FORCEALL-NEXT:  # %bb.1: # %entry
762; CHECK-FORCEALL-NEXT:    movl (%rcx), %edx
763; CHECK-FORCEALL-NEXT:    movl %edi, %eax
764; CHECK-FORCEALL-NEXT:    movl %esi, %r8d
765; CHECK-FORCEALL-NEXT:  .LBB9_2: # %entry
766; CHECK-FORCEALL-NEXT:    addl %r8d, %eax
767; CHECK-FORCEALL-NEXT:    addl %edx, %eax
768; CHECK-FORCEALL-NEXT:    retq
769entry:
770  %cond = icmp ugt i32 %a, %b
771  %y = load i32, ptr %y.ptr
772  %z1 = select i1 %cond, i32 %x, i32 %a
773  %z2 = select i1 %cond, i32 %x, i32 %y
774  %z3 = select i1 %cond, i32 %x, i32 %b
775  %s1 = add i32 %z1, %z2
776  %s2 = add i32 %s1, %z3
777  ret i32 %s2
778}
779
780; Same as before but with operands reversed in the select with a load.
781define i32 @test_cmov_memoperand_in_group2(i32 %a, i32 %b, i32 %x, ptr %y.ptr) #0 {
782; CHECK-LABEL: test_cmov_memoperand_in_group2:
783; CHECK:       # %bb.0: # %entry
784; CHECK-NEXT:    movl %edx, %eax
785; CHECK-NEXT:    movl %edx, %r8d
786; CHECK-NEXT:    cmpl %esi, %edi
787; CHECK-NEXT:    jbe .LBB10_2
788; CHECK-NEXT:  # %bb.1: # %entry
789; CHECK-NEXT:    movl (%rcx), %edx
790; CHECK-NEXT:    movl %edi, %eax
791; CHECK-NEXT:    movl %esi, %r8d
792; CHECK-NEXT:  .LBB10_2: # %entry
793; CHECK-NEXT:    addl %r8d, %eax
794; CHECK-NEXT:    addl %edx, %eax
795; CHECK-NEXT:    retq
796;
797; CHECK-FORCEALL-LABEL: test_cmov_memoperand_in_group2:
798; CHECK-FORCEALL:       # %bb.0: # %entry
799; CHECK-FORCEALL-NEXT:    movl %edx, %eax
800; CHECK-FORCEALL-NEXT:    movl %edx, %r8d
801; CHECK-FORCEALL-NEXT:    cmpl %esi, %edi
802; CHECK-FORCEALL-NEXT:    jbe .LBB10_2
803; CHECK-FORCEALL-NEXT:  # %bb.1: # %entry
804; CHECK-FORCEALL-NEXT:    movl (%rcx), %edx
805; CHECK-FORCEALL-NEXT:    movl %edi, %eax
806; CHECK-FORCEALL-NEXT:    movl %esi, %r8d
807; CHECK-FORCEALL-NEXT:  .LBB10_2: # %entry
808; CHECK-FORCEALL-NEXT:    addl %r8d, %eax
809; CHECK-FORCEALL-NEXT:    addl %edx, %eax
810; CHECK-FORCEALL-NEXT:    retq
811entry:
812  %cond = icmp ugt i32 %a, %b
813  %y = load i32, ptr %y.ptr
814  %z2 = select i1 %cond, i32 %a, i32 %x
815  %z1 = select i1 %cond, i32 %y, i32 %x
816  %z3 = select i1 %cond, i32 %b, i32 %x
817  %s1 = add i32 %z1, %z2
818  %s2 = add i32 %s1, %z3
819  ret i32 %s2
820}
821
822; Test that we don't convert a group of cmovs with conflicting directions of
823; loads.
824define i32 @test_cmov_memoperand_conflicting_dir(i32 %a, i32 %b, i32 %x, ptr %y1.ptr, ptr %y2.ptr) #0 {
825; CHECK-LABEL: test_cmov_memoperand_conflicting_dir:
826; CHECK:       # %bb.0: # %entry
827; CHECK-NEXT:    cmpl %esi, %edi
828; CHECK-NEXT:    movl (%rcx), %eax
829; CHECK-NEXT:    cmoval %edx, %eax
830; CHECK-NEXT:    cmoval (%r8), %edx
831; CHECK-NEXT:    addl %edx, %eax
832; CHECK-NEXT:    retq
833;
834; CHECK-FORCEALL-LABEL: test_cmov_memoperand_conflicting_dir:
835; CHECK-FORCEALL:       # %bb.0: # %entry
836; CHECK-FORCEALL-NEXT:    cmpl %esi, %edi
837; CHECK-FORCEALL-NEXT:    movl (%rcx), %eax
838; CHECK-FORCEALL-NEXT:    cmoval %edx, %eax
839; CHECK-FORCEALL-NEXT:    cmoval (%r8), %edx
840; CHECK-FORCEALL-NEXT:    addl %edx, %eax
841; CHECK-FORCEALL-NEXT:    retq
842entry:
843  %cond = icmp ugt i32 %a, %b
844  %y1 = load i32, ptr %y1.ptr
845  %y2 = load i32, ptr %y2.ptr
846  %z1 = select i1 %cond, i32 %x, i32 %y1
847  %z2 = select i1 %cond, i32 %y2, i32 %x
848  %s1 = add i32 %z1, %z2
849  ret i32 %s1
850}
851
852; Test that we can convert a group of cmovs where only one has a memory
853; operand and where that memory operand's registers come from a prior cmov in
854; the group.
855define i32 @test_cmov_memoperand_in_group_reuse_for_addr(i32 %a, i32 %b, ptr %x, ptr %y) #0 {
856; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr:
857; CHECK:       # %bb.0: # %entry
858; CHECK-NEXT:    movl %edi, %eax
859; CHECK-NEXT:    cmpl %esi, %edi
860; CHECK-NEXT:    ja .LBB12_2
861; CHECK-NEXT:  # %bb.1: # %entry
862; CHECK-NEXT:    movl (%rcx), %eax
863; CHECK-NEXT:  .LBB12_2: # %entry
864; CHECK-NEXT:    retq
865;
866; CHECK-FORCEALL-LABEL: test_cmov_memoperand_in_group_reuse_for_addr:
867; CHECK-FORCEALL:       # %bb.0: # %entry
868; CHECK-FORCEALL-NEXT:    movl %edi, %eax
869; CHECK-FORCEALL-NEXT:    cmpl %esi, %edi
870; CHECK-FORCEALL-NEXT:    ja .LBB12_2
871; CHECK-FORCEALL-NEXT:  # %bb.1: # %entry
872; CHECK-FORCEALL-NEXT:    movl (%rcx), %eax
873; CHECK-FORCEALL-NEXT:  .LBB12_2: # %entry
874; CHECK-FORCEALL-NEXT:    retq
875entry:
876  %cond = icmp ugt i32 %a, %b
877  %p = select i1 %cond, ptr %x, ptr %y
878  %load = load i32, ptr %p
879  %z = select i1 %cond, i32 %a, i32 %load
880  ret i32 %z
881}
882
883; Test that we can convert a group of two cmovs with memory operands where one
884; uses the result of the other as part of the address.
885define i32 @test_cmov_memoperand_in_group_reuse_for_addr2(i32 %a, i32 %b, ptr %x, ptr %y) #0 {
886; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr2:
887; CHECK:       # %bb.0: # %entry
888; CHECK-NEXT:    movl %edi, %eax
889; CHECK-NEXT:    cmpl %esi, %edi
890; CHECK-NEXT:    ja .LBB13_2
891; CHECK-NEXT:  # %bb.1: # %entry
892; CHECK-NEXT:    movq (%rcx), %rax
893; CHECK-NEXT:    movl (%rax), %eax
894; CHECK-NEXT:  .LBB13_2: # %entry
895; CHECK-NEXT:    retq
896;
897; CHECK-FORCEALL-LABEL: test_cmov_memoperand_in_group_reuse_for_addr2:
898; CHECK-FORCEALL:       # %bb.0: # %entry
899; CHECK-FORCEALL-NEXT:    movl %edi, %eax
900; CHECK-FORCEALL-NEXT:    cmpl %esi, %edi
901; CHECK-FORCEALL-NEXT:    ja .LBB13_2
902; CHECK-FORCEALL-NEXT:  # %bb.1: # %entry
903; CHECK-FORCEALL-NEXT:    movq (%rcx), %rax
904; CHECK-FORCEALL-NEXT:    movl (%rax), %eax
905; CHECK-FORCEALL-NEXT:  .LBB13_2: # %entry
906; CHECK-FORCEALL-NEXT:    retq
907entry:
908  %cond = icmp ugt i32 %a, %b
909  %load1 = load ptr, ptr %y
910  %p = select i1 %cond, ptr %x, ptr %load1
911  %load2 = load i32, ptr %p
912  %z = select i1 %cond, i32 %a, i32 %load2
913  ret i32 %z
914}
915
916; Test that we can convert a group of cmovs where only one has a memory
917; operand and where that memory operand's registers come from a prior cmov and
918; where that cmov gets *its* input from a prior cmov in the group.
919define i32 @test_cmov_memoperand_in_group_reuse_for_addr3(i32 %a, i32 %b, ptr %x, ptr %y, ptr %z) #0 {
920; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr3:
921; CHECK:       # %bb.0: # %entry
922; CHECK-NEXT:    movl %edi, %eax
923; CHECK-NEXT:    cmpl %esi, %edi
924; CHECK-NEXT:    ja .LBB14_2
925; CHECK-NEXT:  # %bb.1: # %entry
926; CHECK-NEXT:    movl (%rcx), %eax
927; CHECK-NEXT:  .LBB14_2: # %entry
928; CHECK-NEXT:    retq
929;
930; CHECK-FORCEALL-LABEL: test_cmov_memoperand_in_group_reuse_for_addr3:
931; CHECK-FORCEALL:       # %bb.0: # %entry
932; CHECK-FORCEALL-NEXT:    movl %edi, %eax
933; CHECK-FORCEALL-NEXT:    cmpl %esi, %edi
934; CHECK-FORCEALL-NEXT:    ja .LBB14_2
935; CHECK-FORCEALL-NEXT:  # %bb.1: # %entry
936; CHECK-FORCEALL-NEXT:    movl (%rcx), %eax
937; CHECK-FORCEALL-NEXT:  .LBB14_2: # %entry
938; CHECK-FORCEALL-NEXT:    retq
939entry:
940  %cond = icmp ugt i32 %a, %b
941  %p = select i1 %cond, ptr %x, ptr %y
942  %p2 = select i1 %cond, ptr %z, ptr %p
943  %load = load i32, ptr %p2
944  %r = select i1 %cond, i32 %a, i32 %load
945  ret i32 %r
946}
947
948@begin = external global ptr
949@end = external global ptr
950
951define void @test_memoperand_loop(i32 %data) #0 {
952; CHECK-LABEL: test_memoperand_loop:
953; CHECK:       # %bb.0: # %entry
954; CHECK-NEXT:    movq begin@GOTPCREL(%rip), %rax
955; CHECK-NEXT:    movq (%rax), %rcx
956; CHECK-NEXT:    movq end@GOTPCREL(%rip), %rdx
957; CHECK-NEXT:    movq (%rdx), %rdx
958; CHECK-NEXT:    xorl %esi, %esi
959; CHECK-NEXT:    movq %rcx, %r8
960; CHECK-NEXT:  .LBB15_1: # %loop.body
961; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
962; CHECK-NEXT:    addq $8, %r8
963; CHECK-NEXT:    cmpq %rdx, %r8
964; CHECK-NEXT:    ja .LBB15_3
965; CHECK-NEXT:  # %bb.2: # %loop.body
966; CHECK-NEXT:    # in Loop: Header=BB15_1 Depth=1
967; CHECK-NEXT:    movq (%rax), %r8
968; CHECK-NEXT:  .LBB15_3: # %loop.body
969; CHECK-NEXT:    # in Loop: Header=BB15_1 Depth=1
970; CHECK-NEXT:    movl %edi, (%r8)
971; CHECK-NEXT:    addq $8, %r8
972; CHECK-NEXT:    cmpq %rdx, %r8
973; CHECK-NEXT:    ja .LBB15_5
974; CHECK-NEXT:  # %bb.4: # %loop.body
975; CHECK-NEXT:    # in Loop: Header=BB15_1 Depth=1
976; CHECK-NEXT:    movq %rcx, %r8
977; CHECK-NEXT:  .LBB15_5: # %loop.body
978; CHECK-NEXT:    # in Loop: Header=BB15_1 Depth=1
979; CHECK-NEXT:    movl %edi, (%r8)
980; CHECK-NEXT:    addl $1, %esi
981; CHECK-NEXT:    cmpl $1024, %esi # imm = 0x400
982; CHECK-NEXT:    jl .LBB15_1
983; CHECK-NEXT:  # %bb.6: # %exit
984; CHECK-NEXT:    retq
985;
986; CHECK-FORCEALL-LABEL: test_memoperand_loop:
987; CHECK-FORCEALL:       # %bb.0: # %entry
988; CHECK-FORCEALL-NEXT:    movq begin@GOTPCREL(%rip), %rax
989; CHECK-FORCEALL-NEXT:    movq (%rax), %rcx
990; CHECK-FORCEALL-NEXT:    movq end@GOTPCREL(%rip), %rdx
991; CHECK-FORCEALL-NEXT:    movq (%rdx), %rdx
992; CHECK-FORCEALL-NEXT:    xorl %esi, %esi
993; CHECK-FORCEALL-NEXT:    movq %rcx, %r8
994; CHECK-FORCEALL-NEXT:  .LBB15_1: # %loop.body
995; CHECK-FORCEALL-NEXT:    # =>This Inner Loop Header: Depth=1
996; CHECK-FORCEALL-NEXT:    addq $8, %r8
997; CHECK-FORCEALL-NEXT:    cmpq %rdx, %r8
998; CHECK-FORCEALL-NEXT:    ja .LBB15_3
999; CHECK-FORCEALL-NEXT:  # %bb.2: # %loop.body
1000; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB15_1 Depth=1
1001; CHECK-FORCEALL-NEXT:    movq (%rax), %r8
1002; CHECK-FORCEALL-NEXT:  .LBB15_3: # %loop.body
1003; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB15_1 Depth=1
1004; CHECK-FORCEALL-NEXT:    movl %edi, (%r8)
1005; CHECK-FORCEALL-NEXT:    addq $8, %r8
1006; CHECK-FORCEALL-NEXT:    cmpq %rdx, %r8
1007; CHECK-FORCEALL-NEXT:    ja .LBB15_5
1008; CHECK-FORCEALL-NEXT:  # %bb.4: # %loop.body
1009; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB15_1 Depth=1
1010; CHECK-FORCEALL-NEXT:    movq %rcx, %r8
1011; CHECK-FORCEALL-NEXT:  .LBB15_5: # %loop.body
1012; CHECK-FORCEALL-NEXT:    # in Loop: Header=BB15_1 Depth=1
1013; CHECK-FORCEALL-NEXT:    movl %edi, (%r8)
1014; CHECK-FORCEALL-NEXT:    addl $1, %esi
1015; CHECK-FORCEALL-NEXT:    cmpl $1024, %esi # imm = 0x400
1016; CHECK-FORCEALL-NEXT:    jl .LBB15_1
1017; CHECK-FORCEALL-NEXT:  # %bb.6: # %exit
1018; CHECK-FORCEALL-NEXT:    retq
1019entry:
1020  %begin = load ptr, ptr @begin, align 8
1021  %end = load ptr, ptr @end, align 8
1022  br label %loop.body
1023loop.body:
1024  %phi.iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.body ]
1025  %phi.ptr = phi ptr [ %begin, %entry ], [ %dst2, %loop.body ]
1026  %gep1 = getelementptr inbounds i32, ptr%phi.ptr, i64 2
1027  %cmp1 = icmp ugt ptr %gep1, %end
1028  %begin_dup = load ptr, ptr @begin, align 8
1029  %dst1 = select i1 %cmp1, ptr %gep1, ptr %begin_dup
1030  store i32 %data, ptr%dst1, align 4
1031  %gep2 = getelementptr inbounds i32, ptr%dst1, i64 2
1032  %cmp2 = icmp ugt ptr %gep2, %end
1033  %dst2 = select i1 %cmp2, ptr %gep2, ptr %begin
1034  store i32 %data, ptr%dst2, align 4
1035  %iv.next = add i32 %phi.iv, 1
1036  %cond = icmp slt i32 %iv.next, 1024
1037  br i1 %cond, label %loop.body, label %exit
1038exit:
1039  ret void
1040}
1041
1042attributes #0 = {"target-cpu"="x86-64" "tune-cpu"="x86-64"}
1043!0 = !{}
1044