1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-pc-linux -x86-cmov-converter=true -verify-machineinstrs -disable-block-placement < %s | FileCheck -allow-deprecated-dag-overlap %s 3; RUN: llc -mtriple=x86_64-pc-linux -x86-cmov-converter=true -x86-cmov-converter-force-all=true -verify-machineinstrs -disable-block-placement < %s | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=CHECK-FORCEALL 4 5;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 6;; This test checks that x86-cmov-converter optimization transform CMOV 7;; instruction into branches when it is profitable. 8;; There are 5 cases below: 9;; 1. CmovInCriticalPath: 10;; CMOV depends on the condition and it is in the hot path. 11;; Thus, it worths transforming. 12;; 13;; 2. CmovNotInCriticalPath: 14;; Similar test like in (1), just that CMOV is not in the hot path. 15;; Thus, it does not worth transforming. 16;; 17;; 3. MaxIndex: 18;; Maximum calculation algorithm that is looking for the max index, 19;; calculating CMOV value is cheaper than calculating CMOV condition. 20;; Thus, it worths transforming. 21;; 22;; 4. MaxValue: 23;; Maximum calculation algorithm that is looking for the max value, 24;; calculating CMOV value is not cheaper than calculating CMOV condition. 25;; Thus, it does not worth transforming. 26;; 27;; 5. BinarySearch: 28;; Usually, binary search CMOV is not predicted. 29;; Thus, it does not worth transforming. 30;; 31;; Test was created using the following command line: 32;; > clang -S -O2 -m64 -fno-vectorize -fno-unroll-loops -emit-llvm foo.c -o - 33;; Where foo.c is: 34;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 35;;void CmovInHotPath(int n, int a, int b, int *c, int *d) { 36;; for (int i = 0; i < n; i++) { 37;; int t = c[i] + 1; 38;; if (cptr a > b) 39;; t = 10; 40;; c[i] = (c[i] + 1) * t; 41;; } 42;;} 43;; 44;; 45;;void CmovNotInHotPath(int n, int a, int b, int *c, int *d) { 46;; for (int i = 0; i < n; i++) { 47;; int t = c[i]; 48;; if (cptr a > b) 49;; t = 10; 50;; c[i] = t; 51;; d[i] /= b; 52;; } 53;;} 54;; 55;; 56;;int MaxIndex(int n, int *a) { 57;; int t = 0; 58;; for (int i = 1; i < n; i++) { 59;; if (a[i] > a[t]) 60;; t = i; 61;; } 62;; return t; 63;;} 64;; 65;; 66;;int MaxValue(int n, int *a) { 67;; int t = a[0]; 68;; for (int i = 1; i < n; i++) { 69;; if (a[i] > t) 70;; t = a[i]; 71;; } 72;; return t; 73;;} 74;; 75;;typedef struct Node Node; 76;;struct Node { 77;; unsigned Val; 78;; Node *Right; 79;; Node *Left; 80;;}; 81;; 82;;unsigned BinarySearch(unsigned Mask, Node *Curr, Node *Next) { 83;; while (Curr->Val > Next->Val) { 84;; Curr = Next; 85;; if (Mask & (0x1 << Curr->Val)) 86;; Next = Curr->Right; 87;; else 88;; Next = Curr->Left; 89;; } 90;; return Curr->Val; 91;;} 92;; 93;; 94;;void SmallGainPerLoop(int n, int a, int b, int *c, int *d) { 95;; for (int i = 0; i < n; i++) { 96;; int t = c[i]; 97;; if (cptr a > b) 98;; t = 10; 99;; c[i] = t; 100;; } 101;;} 102;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 103 104%struct.Node = type { i32, ptr, ptr } 105 106define void @CmovInHotPath(i32 %n, i32 %a, i32 %b, ptr nocapture %c, ptr nocapture readnone %d) #0 { 107; CHECK-LABEL: CmovInHotPath: 108; CHECK: # %bb.0: # %entry 109; CHECK-NEXT: testl %edi, %edi 110; CHECK-NEXT: jle .LBB0_5 111; CHECK-NEXT: # %bb.1: # %for.body.preheader 112; CHECK-NEXT: movl %edi, %eax 113; CHECK-NEXT: xorl %edi, %edi 114; CHECK-NEXT: .LBB0_2: # %for.body 115; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 116; CHECK-NEXT: movl (%rcx,%rdi,4), %r10d 117; CHECK-NEXT: leal 1(%r10), %r8d 118; CHECK-NEXT: imull %esi, %r10d 119; CHECK-NEXT: movl $10, %r9d 120; CHECK-NEXT: cmpl %edx, %r10d 121; CHECK-NEXT: jg .LBB0_4 122; CHECK-NEXT: # %bb.3: # %for.body 123; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 124; CHECK-NEXT: movl %r8d, %r9d 125; CHECK-NEXT: .LBB0_4: # %for.body 126; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 127; CHECK-NEXT: imull %r8d, %r9d 128; CHECK-NEXT: movl %r9d, (%rcx,%rdi,4) 129; CHECK-NEXT: addq $1, %rdi 130; CHECK-NEXT: cmpq %rdi, %rax 131; CHECK-NEXT: jne .LBB0_2 132; CHECK-NEXT: .LBB0_5: # %for.cond.cleanup 133; CHECK-NEXT: retq 134; 135; CHECK-FORCEALL-LABEL: CmovInHotPath: 136; CHECK-FORCEALL: # %bb.0: # %entry 137; CHECK-FORCEALL-NEXT: testl %edi, %edi 138; CHECK-FORCEALL-NEXT: jle .LBB0_5 139; CHECK-FORCEALL-NEXT: # %bb.1: # %for.body.preheader 140; CHECK-FORCEALL-NEXT: movl %edi, %eax 141; CHECK-FORCEALL-NEXT: xorl %edi, %edi 142; CHECK-FORCEALL-NEXT: .LBB0_2: # %for.body 143; CHECK-FORCEALL-NEXT: # =>This Inner Loop Header: Depth=1 144; CHECK-FORCEALL-NEXT: movl (%rcx,%rdi,4), %r10d 145; CHECK-FORCEALL-NEXT: leal 1(%r10), %r8d 146; CHECK-FORCEALL-NEXT: imull %esi, %r10d 147; CHECK-FORCEALL-NEXT: movl $10, %r9d 148; CHECK-FORCEALL-NEXT: cmpl %edx, %r10d 149; CHECK-FORCEALL-NEXT: jg .LBB0_4 150; CHECK-FORCEALL-NEXT: # %bb.3: # %for.body 151; CHECK-FORCEALL-NEXT: # in Loop: Header=BB0_2 Depth=1 152; CHECK-FORCEALL-NEXT: movl %r8d, %r9d 153; CHECK-FORCEALL-NEXT: .LBB0_4: # %for.body 154; CHECK-FORCEALL-NEXT: # in Loop: Header=BB0_2 Depth=1 155; CHECK-FORCEALL-NEXT: imull %r8d, %r9d 156; CHECK-FORCEALL-NEXT: movl %r9d, (%rcx,%rdi,4) 157; CHECK-FORCEALL-NEXT: addq $1, %rdi 158; CHECK-FORCEALL-NEXT: cmpq %rdi, %rax 159; CHECK-FORCEALL-NEXT: jne .LBB0_2 160; CHECK-FORCEALL-NEXT: .LBB0_5: # %for.cond.cleanup 161; CHECK-FORCEALL-NEXT: retq 162entry: 163 %cmp14 = icmp sgt i32 %n, 0 164 br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup 165 166for.body.preheader: ; preds = %entry 167 %wide.trip.count = zext i32 %n to i64 168 br label %for.body 169 170for.cond.cleanup: ; preds = %for.body, %entry 171 ret void 172 173for.body: ; preds = %for.body.preheader, %for.body 174 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] 175 %arrayidx = getelementptr inbounds i32, ptr %c, i64 %indvars.iv 176 %0 = load i32, ptr %arrayidx, align 4 177 %add = add nsw i32 %0, 1 178 %mul = mul nsw i32 %0, %a 179 %cmp3 = icmp sgt i32 %mul, %b 180 %. = select i1 %cmp3, i32 10, i32 %add 181 %mul7 = mul nsw i32 %., %add 182 store i32 %mul7, ptr %arrayidx, align 4 183 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 184 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 185 br i1 %exitcond, label %for.cond.cleanup, label %for.body 186} 187 188define void @CmovNotInHotPath(i32 %n, i32 %a, i32 %b, ptr nocapture %c, ptr nocapture %d) #0 { 189; CHECK-LABEL: CmovNotInHotPath: 190; CHECK: # %bb.0: # %entry 191; CHECK-NEXT: testl %edi, %edi 192; CHECK-NEXT: jle .LBB1_3 193; CHECK-NEXT: # %bb.1: # %for.body.preheader 194; CHECK-NEXT: movl %edx, %r9d 195; CHECK-NEXT: movl %edi, %edi 196; CHECK-NEXT: xorl %r10d, %r10d 197; CHECK-NEXT: movl $10, %r11d 198; CHECK-NEXT: .LBB1_2: # %for.body 199; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 200; CHECK-NEXT: movl (%rcx,%r10,4), %eax 201; CHECK-NEXT: movl %eax, %edx 202; CHECK-NEXT: imull %esi, %edx 203; CHECK-NEXT: cmpl %r9d, %edx 204; CHECK-NEXT: cmovgl %r11d, %eax 205; CHECK-NEXT: movl %eax, (%rcx,%r10,4) 206; CHECK-NEXT: movl (%r8,%r10,4), %eax 207; CHECK-NEXT: cltd 208; CHECK-NEXT: idivl %r9d 209; CHECK-NEXT: movl %eax, (%r8,%r10,4) 210; CHECK-NEXT: addq $1, %r10 211; CHECK-NEXT: cmpq %r10, %rdi 212; CHECK-NEXT: jne .LBB1_2 213; CHECK-NEXT: .LBB1_3: # %for.cond.cleanup 214; CHECK-NEXT: retq 215; 216; CHECK-FORCEALL-LABEL: CmovNotInHotPath: 217; CHECK-FORCEALL: # %bb.0: # %entry 218; CHECK-FORCEALL-NEXT: testl %edi, %edi 219; CHECK-FORCEALL-NEXT: jle .LBB1_5 220; CHECK-FORCEALL-NEXT: # %bb.1: # %for.body.preheader 221; CHECK-FORCEALL-NEXT: movl %edx, %r9d 222; CHECK-FORCEALL-NEXT: movl %edi, %edi 223; CHECK-FORCEALL-NEXT: xorl %r10d, %r10d 224; CHECK-FORCEALL-NEXT: .LBB1_2: # %for.body 225; CHECK-FORCEALL-NEXT: # =>This Inner Loop Header: Depth=1 226; CHECK-FORCEALL-NEXT: movl (%rcx,%r10,4), %eax 227; CHECK-FORCEALL-NEXT: movl %eax, %r11d 228; CHECK-FORCEALL-NEXT: imull %esi, %r11d 229; CHECK-FORCEALL-NEXT: movl $10, %edx 230; CHECK-FORCEALL-NEXT: cmpl %r9d, %r11d 231; CHECK-FORCEALL-NEXT: jg .LBB1_4 232; CHECK-FORCEALL-NEXT: # %bb.3: # %for.body 233; CHECK-FORCEALL-NEXT: # in Loop: Header=BB1_2 Depth=1 234; CHECK-FORCEALL-NEXT: movl %eax, %edx 235; CHECK-FORCEALL-NEXT: .LBB1_4: # %for.body 236; CHECK-FORCEALL-NEXT: # in Loop: Header=BB1_2 Depth=1 237; CHECK-FORCEALL-NEXT: movl %edx, (%rcx,%r10,4) 238; CHECK-FORCEALL-NEXT: movl (%r8,%r10,4), %eax 239; CHECK-FORCEALL-NEXT: cltd 240; CHECK-FORCEALL-NEXT: idivl %r9d 241; CHECK-FORCEALL-NEXT: movl %eax, (%r8,%r10,4) 242; CHECK-FORCEALL-NEXT: addq $1, %r10 243; CHECK-FORCEALL-NEXT: cmpq %r10, %rdi 244; CHECK-FORCEALL-NEXT: jne .LBB1_2 245; CHECK-FORCEALL-NEXT: .LBB1_5: # %for.cond.cleanup 246; CHECK-FORCEALL-NEXT: retq 247entry: 248 %cmp18 = icmp sgt i32 %n, 0 249 br i1 %cmp18, label %for.body.preheader, label %for.cond.cleanup 250 251for.body.preheader: ; preds = %entry 252 %wide.trip.count = zext i32 %n to i64 253 br label %for.body 254 255for.cond.cleanup: ; preds = %for.body, %entry 256 ret void 257 258for.body: ; preds = %for.body.preheader, %for.body 259 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] 260 %arrayidx = getelementptr inbounds i32, ptr %c, i64 %indvars.iv 261 %0 = load i32, ptr %arrayidx, align 4 262 %mul = mul nsw i32 %0, %a 263 %cmp3 = icmp sgt i32 %mul, %b 264 %. = select i1 %cmp3, i32 10, i32 %0 265 store i32 %., ptr %arrayidx, align 4 266 %arrayidx7 = getelementptr inbounds i32, ptr %d, i64 %indvars.iv 267 %1 = load i32, ptr %arrayidx7, align 4 268 %div = sdiv i32 %1, %b 269 store i32 %div, ptr %arrayidx7, align 4 270 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 271 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 272 br i1 %exitcond, label %for.cond.cleanup, label %for.body 273} 274 275define i32 @MaxIndex(i32 %n, ptr nocapture readonly %a) #0 { 276; CHECK-LABEL: MaxIndex: 277; CHECK: # %bb.0: # %entry 278; CHECK-NEXT: xorl %eax, %eax 279; CHECK-NEXT: cmpl $2, %edi 280; CHECK-NEXT: jl .LBB2_5 281; CHECK-NEXT: # %bb.1: # %for.body.preheader 282; CHECK-NEXT: movl %edi, %ecx 283; CHECK-NEXT: xorl %edi, %edi 284; CHECK-NEXT: movl $1, %edx 285; CHECK-NEXT: .LBB2_2: # %for.body 286; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 287; CHECK-NEXT: movl (%rsi,%rdx,4), %r8d 288; CHECK-NEXT: movslq %edi, %r9 289; CHECK-NEXT: movl %edx, %eax 290; CHECK-NEXT: cmpl (%rsi,%r9,4), %r8d 291; CHECK-NEXT: jg .LBB2_4 292; CHECK-NEXT: # %bb.3: # %for.body 293; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1 294; CHECK-NEXT: movl %edi, %eax 295; CHECK-NEXT: .LBB2_4: # %for.body 296; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1 297; CHECK-NEXT: addq $1, %rdx 298; CHECK-NEXT: movl %eax, %edi 299; CHECK-NEXT: cmpq %rdx, %rcx 300; CHECK-NEXT: jne .LBB2_2 301; CHECK-NEXT: .LBB2_5: # %for.cond.cleanup 302; CHECK-NEXT: retq 303; 304; CHECK-FORCEALL-LABEL: MaxIndex: 305; CHECK-FORCEALL: # %bb.0: # %entry 306; CHECK-FORCEALL-NEXT: xorl %eax, %eax 307; CHECK-FORCEALL-NEXT: cmpl $2, %edi 308; CHECK-FORCEALL-NEXT: jl .LBB2_5 309; CHECK-FORCEALL-NEXT: # %bb.1: # %for.body.preheader 310; CHECK-FORCEALL-NEXT: movl %edi, %ecx 311; CHECK-FORCEALL-NEXT: xorl %edi, %edi 312; CHECK-FORCEALL-NEXT: movl $1, %edx 313; CHECK-FORCEALL-NEXT: .LBB2_2: # %for.body 314; CHECK-FORCEALL-NEXT: # =>This Inner Loop Header: Depth=1 315; CHECK-FORCEALL-NEXT: movl (%rsi,%rdx,4), %r8d 316; CHECK-FORCEALL-NEXT: movslq %edi, %r9 317; CHECK-FORCEALL-NEXT: movl %edx, %eax 318; CHECK-FORCEALL-NEXT: cmpl (%rsi,%r9,4), %r8d 319; CHECK-FORCEALL-NEXT: jg .LBB2_4 320; CHECK-FORCEALL-NEXT: # %bb.3: # %for.body 321; CHECK-FORCEALL-NEXT: # in Loop: Header=BB2_2 Depth=1 322; CHECK-FORCEALL-NEXT: movl %edi, %eax 323; CHECK-FORCEALL-NEXT: .LBB2_4: # %for.body 324; CHECK-FORCEALL-NEXT: # in Loop: Header=BB2_2 Depth=1 325; CHECK-FORCEALL-NEXT: addq $1, %rdx 326; CHECK-FORCEALL-NEXT: movl %eax, %edi 327; CHECK-FORCEALL-NEXT: cmpq %rdx, %rcx 328; CHECK-FORCEALL-NEXT: jne .LBB2_2 329; CHECK-FORCEALL-NEXT: .LBB2_5: # %for.cond.cleanup 330; CHECK-FORCEALL-NEXT: retq 331entry: 332 %cmp14 = icmp sgt i32 %n, 1 333 br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup 334 335for.body.preheader: ; preds = %entry 336 %wide.trip.count = zext i32 %n to i64 337 br label %for.body 338 339for.cond.cleanup: ; preds = %for.body, %entry 340 %t.0.lcssa = phi i32 [ 0, %entry ], [ %i.0.t.0, %for.body ] 341 ret i32 %t.0.lcssa 342 343for.body: ; preds = %for.body.preheader, %for.body 344 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %for.body.preheader ] 345 %t.015 = phi i32 [ %i.0.t.0, %for.body ], [ 0, %for.body.preheader ] 346 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv 347 %0 = load i32, ptr %arrayidx, align 4 348 %idxprom1 = sext i32 %t.015 to i64 349 %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %idxprom1 350 %1 = load i32, ptr %arrayidx2, align 4 351 %cmp3 = icmp sgt i32 %0, %1 352 %2 = trunc i64 %indvars.iv to i32 353 %i.0.t.0 = select i1 %cmp3, i32 %2, i32 %t.015 354 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 355 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 356 br i1 %exitcond, label %for.cond.cleanup, label %for.body 357} 358 359; If cmov instruction is marked as unpredictable, do not convert it to branch. 360define i32 @MaxIndex_unpredictable(i32 %n, ptr nocapture readonly %a) #0 { 361; CHECK-LABEL: MaxIndex_unpredictable: 362; CHECK: # %bb.0: # %entry 363; CHECK-NEXT: xorl %eax, %eax 364; CHECK-NEXT: cmpl $2, %edi 365; CHECK-NEXT: jl .LBB3_3 366; CHECK-NEXT: # %bb.1: # %for.body.preheader 367; CHECK-NEXT: movl %edi, %ecx 368; CHECK-NEXT: xorl %eax, %eax 369; CHECK-NEXT: movl $1, %edx 370; CHECK-NEXT: .LBB3_2: # %for.body 371; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 372; CHECK-NEXT: movl (%rsi,%rdx,4), %edi 373; CHECK-NEXT: cltq 374; CHECK-NEXT: cmpl (%rsi,%rax,4), %edi 375; CHECK-NEXT: cmovgl %edx, %eax 376; CHECK-NEXT: addq $1, %rdx 377; CHECK-NEXT: cmpq %rdx, %rcx 378; CHECK-NEXT: jne .LBB3_2 379; CHECK-NEXT: .LBB3_3: # %for.cond.cleanup 380; CHECK-NEXT: # kill: def $eax killed $eax killed $rax 381; CHECK-NEXT: retq 382; 383; CHECK-FORCEALL-LABEL: MaxIndex_unpredictable: 384; CHECK-FORCEALL: # %bb.0: # %entry 385; CHECK-FORCEALL-NEXT: xorl %eax, %eax 386; CHECK-FORCEALL-NEXT: cmpl $2, %edi 387; CHECK-FORCEALL-NEXT: jl .LBB3_3 388; CHECK-FORCEALL-NEXT: # %bb.1: # %for.body.preheader 389; CHECK-FORCEALL-NEXT: movl %edi, %ecx 390; CHECK-FORCEALL-NEXT: xorl %eax, %eax 391; CHECK-FORCEALL-NEXT: movl $1, %edx 392; CHECK-FORCEALL-NEXT: .LBB3_2: # %for.body 393; CHECK-FORCEALL-NEXT: # =>This Inner Loop Header: Depth=1 394; CHECK-FORCEALL-NEXT: movl (%rsi,%rdx,4), %edi 395; CHECK-FORCEALL-NEXT: cltq 396; CHECK-FORCEALL-NEXT: cmpl (%rsi,%rax,4), %edi 397; CHECK-FORCEALL-NEXT: cmovgl %edx, %eax 398; CHECK-FORCEALL-NEXT: addq $1, %rdx 399; CHECK-FORCEALL-NEXT: cmpq %rdx, %rcx 400; CHECK-FORCEALL-NEXT: jne .LBB3_2 401; CHECK-FORCEALL-NEXT: .LBB3_3: # %for.cond.cleanup 402; CHECK-FORCEALL-NEXT: # kill: def $eax killed $eax killed $rax 403; CHECK-FORCEALL-NEXT: retq 404entry: 405 %cmp14 = icmp sgt i32 %n, 1 406 br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup 407 408for.body.preheader: ; preds = %entry 409 %wide.trip.count = zext i32 %n to i64 410 br label %for.body 411 412for.cond.cleanup: ; preds = %for.body, %entry 413 %t.0.lcssa = phi i32 [ 0, %entry ], [ %i.0.t.0, %for.body ] 414 ret i32 %t.0.lcssa 415 416for.body: ; preds = %for.body.preheader, %for.body 417 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %for.body.preheader ] 418 %t.015 = phi i32 [ %i.0.t.0, %for.body ], [ 0, %for.body.preheader ] 419 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv 420 %0 = load i32, ptr %arrayidx, align 4 421 %idxprom1 = sext i32 %t.015 to i64 422 %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %idxprom1 423 %1 = load i32, ptr %arrayidx2, align 4 424 %cmp3 = icmp sgt i32 %0, %1 425 %2 = trunc i64 %indvars.iv to i32 426 %i.0.t.0 = select i1 %cmp3, i32 %2, i32 %t.015, !unpredictable !0 427 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 428 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 429 br i1 %exitcond, label %for.cond.cleanup, label %for.body 430} 431 432define i32 @MaxValue(i32 %n, ptr nocapture readonly %a) #0 { 433; CHECK-LABEL: MaxValue: 434; CHECK: # %bb.0: # %entry 435; CHECK-NEXT: movl (%rsi), %eax 436; CHECK-NEXT: cmpl $2, %edi 437; CHECK-NEXT: jl .LBB4_3 438; CHECK-NEXT: # %bb.1: # %for.body.preheader 439; CHECK-NEXT: movl %edi, %ecx 440; CHECK-NEXT: movl $1, %edx 441; CHECK-NEXT: .LBB4_2: # %for.body 442; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 443; CHECK-NEXT: movl (%rsi,%rdx,4), %edi 444; CHECK-NEXT: cmpl %eax, %edi 445; CHECK-NEXT: cmovgl %edi, %eax 446; CHECK-NEXT: addq $1, %rdx 447; CHECK-NEXT: cmpq %rdx, %rcx 448; CHECK-NEXT: jne .LBB4_2 449; CHECK-NEXT: .LBB4_3: # %for.cond.cleanup 450; CHECK-NEXT: retq 451; 452; CHECK-FORCEALL-LABEL: MaxValue: 453; CHECK-FORCEALL: # %bb.0: # %entry 454; CHECK-FORCEALL-NEXT: movl (%rsi), %r8d 455; CHECK-FORCEALL-NEXT: cmpl $2, %edi 456; CHECK-FORCEALL-NEXT: jge .LBB4_3 457; CHECK-FORCEALL-NEXT: # %bb.1: 458; CHECK-FORCEALL-NEXT: movl %r8d, %eax 459; CHECK-FORCEALL-NEXT: .LBB4_2: # %for.cond.cleanup 460; CHECK-FORCEALL-NEXT: retq 461; CHECK-FORCEALL-NEXT: .LBB4_3: # %for.body.preheader 462; CHECK-FORCEALL-NEXT: movl %edi, %ecx 463; CHECK-FORCEALL-NEXT: movl $1, %edx 464; CHECK-FORCEALL-NEXT: .LBB4_4: # %for.body 465; CHECK-FORCEALL-NEXT: # =>This Inner Loop Header: Depth=1 466; CHECK-FORCEALL-NEXT: movl (%rsi,%rdx,4), %eax 467; CHECK-FORCEALL-NEXT: cmpl %r8d, %eax 468; CHECK-FORCEALL-NEXT: jg .LBB4_6 469; CHECK-FORCEALL-NEXT: # %bb.5: # %for.body 470; CHECK-FORCEALL-NEXT: # in Loop: Header=BB4_4 Depth=1 471; CHECK-FORCEALL-NEXT: movl %r8d, %eax 472; CHECK-FORCEALL-NEXT: .LBB4_6: # %for.body 473; CHECK-FORCEALL-NEXT: # in Loop: Header=BB4_4 Depth=1 474; CHECK-FORCEALL-NEXT: addq $1, %rdx 475; CHECK-FORCEALL-NEXT: movl %eax, %r8d 476; CHECK-FORCEALL-NEXT: cmpq %rdx, %rcx 477; CHECK-FORCEALL-NEXT: je .LBB4_2 478; CHECK-FORCEALL-NEXT: jmp .LBB4_4 479entry: 480 %0 = load i32, ptr %a, align 4 481 %cmp13 = icmp sgt i32 %n, 1 482 br i1 %cmp13, label %for.body.preheader, label %for.cond.cleanup 483 484for.body.preheader: ; preds = %entry 485 %wide.trip.count = zext i32 %n to i64 486 br label %for.body 487 488for.cond.cleanup: ; preds = %for.body, %entry 489 %t.0.lcssa = phi i32 [ %0, %entry ], [ %.t.0, %for.body ] 490 ret i32 %t.0.lcssa 491 492for.body: ; preds = %for.body.preheader, %for.body 493 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %for.body.preheader ] 494 %t.014 = phi i32 [ %.t.0, %for.body ], [ %0, %for.body.preheader ] 495 %arrayidx1 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv 496 %1 = load i32, ptr %arrayidx1, align 4 497 %cmp2 = icmp sgt i32 %1, %t.014 498 %.t.0 = select i1 %cmp2, i32 %1, i32 %t.014 499 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 500 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 501 br i1 %exitcond, label %for.cond.cleanup, label %for.body 502} 503 504define i32 @BinarySearch(i32 %Mask, ptr nocapture readonly %Curr, ptr nocapture readonly %Next) #0 { 505; CHECK-LABEL: BinarySearch: 506; CHECK: # %bb.0: # %entry 507; CHECK-NEXT: movl (%rsi), %eax 508; CHECK-NEXT: jmp .LBB5_2 509; CHECK-NEXT: .LBB5_1: # %while.body 510; CHECK-NEXT: # in Loop: Header=BB5_2 Depth=1 511; CHECK-NEXT: movl %ecx, %eax 512; CHECK-NEXT: xorl %ecx, %ecx 513; CHECK-NEXT: btl %eax, %edi 514; CHECK-NEXT: setae %cl 515; CHECK-NEXT: movq 8(%rdx,%rcx,8), %rdx 516; CHECK-NEXT: .LBB5_2: # %while.body 517; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 518; CHECK-NEXT: movl (%rdx), %ecx 519; CHECK-NEXT: cmpl %ecx, %eax 520; CHECK-NEXT: ja .LBB5_1 521; CHECK-NEXT: # %bb.3: # %while.end 522; CHECK-NEXT: retq 523; 524; CHECK-FORCEALL-LABEL: BinarySearch: 525; CHECK-FORCEALL: # %bb.0: # %entry 526; CHECK-FORCEALL-NEXT: movl (%rsi), %eax 527; CHECK-FORCEALL-NEXT: jmp .LBB5_2 528; CHECK-FORCEALL-NEXT: .LBB5_1: # %while.body 529; CHECK-FORCEALL-NEXT: # in Loop: Header=BB5_2 Depth=1 530; CHECK-FORCEALL-NEXT: movl %ecx, %eax 531; CHECK-FORCEALL-NEXT: xorl %ecx, %ecx 532; CHECK-FORCEALL-NEXT: btl %eax, %edi 533; CHECK-FORCEALL-NEXT: setae %cl 534; CHECK-FORCEALL-NEXT: movq 8(%rdx,%rcx,8), %rdx 535; CHECK-FORCEALL-NEXT: .LBB5_2: # %while.body 536; CHECK-FORCEALL-NEXT: # =>This Inner Loop Header: Depth=1 537; CHECK-FORCEALL-NEXT: movl (%rdx), %ecx 538; CHECK-FORCEALL-NEXT: cmpl %ecx, %eax 539; CHECK-FORCEALL-NEXT: ja .LBB5_1 540; CHECK-FORCEALL-NEXT: # %bb.3: # %while.end 541; CHECK-FORCEALL-NEXT: retq 542entry: 543 %0 = load i32, ptr %Curr, align 8 544 %1 = load i32, ptr %Next, align 8 545 %cmp10 = icmp ugt i32 %0, %1 546 br i1 %cmp10, label %while.body, label %while.end 547 548while.body: ; preds = %entry, %while.body 549 %2 = phi i32 [ %4, %while.body ], [ %1, %entry ] 550 %Next.addr.011 = phi ptr [ %3, %while.body ], [ %Next, %entry ] 551 %shl = shl i32 1, %2 552 %and = and i32 %shl, %Mask 553 %tobool = icmp eq i32 %and, 0 554 %Left = getelementptr inbounds %struct.Node, ptr %Next.addr.011, i64 0, i32 2 555 %Right = getelementptr inbounds %struct.Node, ptr %Next.addr.011, i64 0, i32 1 556 %Left.sink = select i1 %tobool, ptr %Left, ptr %Right 557 %3 = load ptr, ptr %Left.sink, align 8 558 %4 = load i32, ptr %3, align 8 559 %cmp = icmp ugt i32 %2, %4 560 br i1 %cmp, label %while.body, label %while.end 561 562while.end: ; preds = %while.body, %entry 563 %.lcssa = phi i32 [ %0, %entry ], [ %2, %while.body ] 564 ret i32 %.lcssa 565} 566 567;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 568;; The following test checks that x86-cmov-converter optimization transforms 569;; CMOV instructions into branch correctly. 570;; 571;; MBB: 572;; cond = cmp ... 573;; v1 = CMOVgt t1, f1, cond 574;; v2 = CMOVle s1, f2, cond 575;; 576;; Where: t1 = 11, f1 = 22, f2 = a 577;; 578;; After CMOV transformation 579;; ------------------------- 580;; MBB: 581;; cond = cmp ... 582;; ja %SinkMBB 583;; 584;; FalseMBB: 585;; jmp %SinkMBB 586;; 587;; SinkMBB: 588;; %v1 = phi[%f1, %FalseMBB], [%t1, %MBB] 589;; %v2 = phi[%f1, %FalseMBB], [%f2, %MBB] ; For CMOV with OppCC switch 590;; ; true-value with false-value 591;; ; Phi instruction cannot use 592;; ; previous Phi instruction result 593;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 594 595define void @Transform(ptr%arr, ptr%arr2, i32 %a, i32 %b, i32 %c, i32 %n) #0 { 596; CHECK-LABEL: Transform: 597; CHECK: # %bb.0: # %entry 598; CHECK-NEXT: movb $1, %al 599; CHECK-NEXT: testb %al, %al 600; CHECK-NEXT: jne .LBB6_5 601; CHECK-NEXT: # %bb.1: # %while.body.preheader 602; CHECK-NEXT: movl %edx, %ecx 603; CHECK-NEXT: xorl %esi, %esi 604; CHECK-NEXT: .LBB6_2: # %while.body 605; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 606; CHECK-NEXT: movslq %esi, %rsi 607; CHECK-NEXT: movl (%rdi,%rsi,4), %eax 608; CHECK-NEXT: xorl %edx, %edx 609; CHECK-NEXT: divl %ecx 610; CHECK-NEXT: movl %eax, %edx 611; CHECK-NEXT: movl $11, %eax 612; CHECK-NEXT: movl %ecx, %r8d 613; CHECK-NEXT: cmpl %ecx, %edx 614; CHECK-NEXT: ja .LBB6_4 615; CHECK-NEXT: # %bb.3: # %while.body 616; CHECK-NEXT: # in Loop: Header=BB6_2 Depth=1 617; CHECK-NEXT: movl $22, %eax 618; CHECK-NEXT: movl $22, %r8d 619; CHECK-NEXT: .LBB6_4: # %while.body 620; CHECK-NEXT: # in Loop: Header=BB6_2 Depth=1 621; CHECK-NEXT: xorl %edx, %edx 622; CHECK-NEXT: divl %r8d 623; CHECK-NEXT: movl %edx, (%rdi,%rsi,4) 624; CHECK-NEXT: addl $1, %esi 625; CHECK-NEXT: cmpl %r9d, %esi 626; CHECK-NEXT: ja .LBB6_2 627; CHECK-NEXT: .LBB6_5: # %while.end 628; CHECK-NEXT: retq 629; 630; CHECK-FORCEALL-LABEL: Transform: 631; CHECK-FORCEALL: # %bb.0: # %entry 632; CHECK-FORCEALL-NEXT: movb $1, %al 633; CHECK-FORCEALL-NEXT: testb %al, %al 634; CHECK-FORCEALL-NEXT: jne .LBB6_5 635; CHECK-FORCEALL-NEXT: # %bb.1: # %while.body.preheader 636; CHECK-FORCEALL-NEXT: movl %edx, %ecx 637; CHECK-FORCEALL-NEXT: xorl %esi, %esi 638; CHECK-FORCEALL-NEXT: .LBB6_2: # %while.body 639; CHECK-FORCEALL-NEXT: # =>This Inner Loop Header: Depth=1 640; CHECK-FORCEALL-NEXT: movslq %esi, %rsi 641; CHECK-FORCEALL-NEXT: movl (%rdi,%rsi,4), %eax 642; CHECK-FORCEALL-NEXT: xorl %edx, %edx 643; CHECK-FORCEALL-NEXT: divl %ecx 644; CHECK-FORCEALL-NEXT: movl %eax, %edx 645; CHECK-FORCEALL-NEXT: movl $11, %eax 646; CHECK-FORCEALL-NEXT: movl %ecx, %r8d 647; CHECK-FORCEALL-NEXT: cmpl %ecx, %edx 648; CHECK-FORCEALL-NEXT: ja .LBB6_4 649; CHECK-FORCEALL-NEXT: # %bb.3: # %while.body 650; CHECK-FORCEALL-NEXT: # in Loop: Header=BB6_2 Depth=1 651; CHECK-FORCEALL-NEXT: movl $22, %eax 652; CHECK-FORCEALL-NEXT: movl $22, %r8d 653; CHECK-FORCEALL-NEXT: .LBB6_4: # %while.body 654; CHECK-FORCEALL-NEXT: # in Loop: Header=BB6_2 Depth=1 655; CHECK-FORCEALL-NEXT: xorl %edx, %edx 656; CHECK-FORCEALL-NEXT: divl %r8d 657; CHECK-FORCEALL-NEXT: movl %edx, (%rdi,%rsi,4) 658; CHECK-FORCEALL-NEXT: addl $1, %esi 659; CHECK-FORCEALL-NEXT: cmpl %r9d, %esi 660; CHECK-FORCEALL-NEXT: ja .LBB6_2 661; CHECK-FORCEALL-NEXT: .LBB6_5: # %while.end 662; CHECK-FORCEALL-NEXT: retq 663entry: 664 %cmp10 = icmp ugt i32 0, %n 665 br i1 %cmp10, label %while.body, label %while.end 666 667while.body: ; preds = %entry, %while.body 668 %i = phi i32 [ %i_inc, %while.body ], [ 0, %entry ] 669 %arr_i = getelementptr inbounds i32, ptr %arr, i32 %i 670 %x = load i32, ptr %arr_i, align 4 671 %div = udiv i32 %x, %a 672 %cond = icmp ugt i32 %div, %a 673 %condOpp = icmp ule i32 %div, %a 674 %s1 = select i1 %cond, i32 11, i32 22 675 %s2 = select i1 %condOpp, i32 %s1, i32 %a 676 %sum = urem i32 %s1, %s2 677 store i32 %sum, ptr %arr_i, align 4 678 %i_inc = add i32 %i, 1 679 %cmp = icmp ugt i32 %i_inc, %n 680 br i1 %cmp, label %while.body, label %while.end 681 682while.end: ; preds = %while.body, %entry 683 ret void 684} 685 686; Test that we always will convert a cmov with a memory operand into a branch, 687; even outside of a loop. 688define i32 @test_cmov_memoperand(i32 %a, i32 %b, i32 %x, ptr %y) #0 { 689; CHECK-LABEL: test_cmov_memoperand: 690; CHECK: # %bb.0: # %entry 691; CHECK-NEXT: movl %edx, %eax 692; CHECK-NEXT: cmpl %esi, %edi 693; CHECK-NEXT: ja .LBB7_2 694; CHECK-NEXT: # %bb.1: # %entry 695; CHECK-NEXT: movl (%rcx), %eax 696; CHECK-NEXT: .LBB7_2: # %entry 697; CHECK-NEXT: retq 698; 699; CHECK-FORCEALL-LABEL: test_cmov_memoperand: 700; CHECK-FORCEALL: # %bb.0: # %entry 701; CHECK-FORCEALL-NEXT: movl %edx, %eax 702; CHECK-FORCEALL-NEXT: cmpl %esi, %edi 703; CHECK-FORCEALL-NEXT: ja .LBB7_2 704; CHECK-FORCEALL-NEXT: # %bb.1: # %entry 705; CHECK-FORCEALL-NEXT: movl (%rcx), %eax 706; CHECK-FORCEALL-NEXT: .LBB7_2: # %entry 707; CHECK-FORCEALL-NEXT: retq 708entry: 709 %cond = icmp ugt i32 %a, %b 710 %load = load i32, ptr %y 711 %z = select i1 %cond, i32 %x, i32 %load 712 ret i32 %z 713} 714 715; If cmov instruction is marked as unpredictable, do not convert it to branch. 716define i32 @test_cmov_memoperand_unpredictable(i32 %a, i32 %b, i32 %x, ptr %y) #0 { 717; CHECK-LABEL: test_cmov_memoperand_unpredictable: 718; CHECK: # %bb.0: # %entry 719; CHECK-NEXT: movl %edx, %eax 720; CHECK-NEXT: cmpl %esi, %edi 721; CHECK-NEXT: cmovbel (%rcx), %eax 722; CHECK-NEXT: retq 723; 724; CHECK-FORCEALL-LABEL: test_cmov_memoperand_unpredictable: 725; CHECK-FORCEALL: # %bb.0: # %entry 726; CHECK-FORCEALL-NEXT: movl %edx, %eax 727; CHECK-FORCEALL-NEXT: cmpl %esi, %edi 728; CHECK-FORCEALL-NEXT: cmovbel (%rcx), %eax 729; CHECK-FORCEALL-NEXT: retq 730entry: 731 %cond = icmp ugt i32 %a, %b 732 %load = load i32, ptr %y 733 %z = select i1 %cond, i32 %x, i32 %load, !unpredictable !0 734 ret i32 %z 735} 736 737; Test that we can convert a group of cmovs where only one has a memory 738; operand. 739define i32 @test_cmov_memoperand_in_group(i32 %a, i32 %b, i32 %x, ptr %y.ptr) #0 { 740; CHECK-LABEL: test_cmov_memoperand_in_group: 741; CHECK: # %bb.0: # %entry 742; CHECK-NEXT: movl %edx, %eax 743; CHECK-NEXT: movl %edx, %r8d 744; CHECK-NEXT: cmpl %esi, %edi 745; CHECK-NEXT: ja .LBB9_2 746; CHECK-NEXT: # %bb.1: # %entry 747; CHECK-NEXT: movl (%rcx), %edx 748; CHECK-NEXT: movl %edi, %eax 749; CHECK-NEXT: movl %esi, %r8d 750; CHECK-NEXT: .LBB9_2: # %entry 751; CHECK-NEXT: addl %r8d, %eax 752; CHECK-NEXT: addl %edx, %eax 753; CHECK-NEXT: retq 754; 755; CHECK-FORCEALL-LABEL: test_cmov_memoperand_in_group: 756; CHECK-FORCEALL: # %bb.0: # %entry 757; CHECK-FORCEALL-NEXT: movl %edx, %eax 758; CHECK-FORCEALL-NEXT: movl %edx, %r8d 759; CHECK-FORCEALL-NEXT: cmpl %esi, %edi 760; CHECK-FORCEALL-NEXT: ja .LBB9_2 761; CHECK-FORCEALL-NEXT: # %bb.1: # %entry 762; CHECK-FORCEALL-NEXT: movl (%rcx), %edx 763; CHECK-FORCEALL-NEXT: movl %edi, %eax 764; CHECK-FORCEALL-NEXT: movl %esi, %r8d 765; CHECK-FORCEALL-NEXT: .LBB9_2: # %entry 766; CHECK-FORCEALL-NEXT: addl %r8d, %eax 767; CHECK-FORCEALL-NEXT: addl %edx, %eax 768; CHECK-FORCEALL-NEXT: retq 769entry: 770 %cond = icmp ugt i32 %a, %b 771 %y = load i32, ptr %y.ptr 772 %z1 = select i1 %cond, i32 %x, i32 %a 773 %z2 = select i1 %cond, i32 %x, i32 %y 774 %z3 = select i1 %cond, i32 %x, i32 %b 775 %s1 = add i32 %z1, %z2 776 %s2 = add i32 %s1, %z3 777 ret i32 %s2 778} 779 780; Same as before but with operands reversed in the select with a load. 781define i32 @test_cmov_memoperand_in_group2(i32 %a, i32 %b, i32 %x, ptr %y.ptr) #0 { 782; CHECK-LABEL: test_cmov_memoperand_in_group2: 783; CHECK: # %bb.0: # %entry 784; CHECK-NEXT: movl %edx, %eax 785; CHECK-NEXT: movl %edx, %r8d 786; CHECK-NEXT: cmpl %esi, %edi 787; CHECK-NEXT: jbe .LBB10_2 788; CHECK-NEXT: # %bb.1: # %entry 789; CHECK-NEXT: movl (%rcx), %edx 790; CHECK-NEXT: movl %edi, %eax 791; CHECK-NEXT: movl %esi, %r8d 792; CHECK-NEXT: .LBB10_2: # %entry 793; CHECK-NEXT: addl %r8d, %eax 794; CHECK-NEXT: addl %edx, %eax 795; CHECK-NEXT: retq 796; 797; CHECK-FORCEALL-LABEL: test_cmov_memoperand_in_group2: 798; CHECK-FORCEALL: # %bb.0: # %entry 799; CHECK-FORCEALL-NEXT: movl %edx, %eax 800; CHECK-FORCEALL-NEXT: movl %edx, %r8d 801; CHECK-FORCEALL-NEXT: cmpl %esi, %edi 802; CHECK-FORCEALL-NEXT: jbe .LBB10_2 803; CHECK-FORCEALL-NEXT: # %bb.1: # %entry 804; CHECK-FORCEALL-NEXT: movl (%rcx), %edx 805; CHECK-FORCEALL-NEXT: movl %edi, %eax 806; CHECK-FORCEALL-NEXT: movl %esi, %r8d 807; CHECK-FORCEALL-NEXT: .LBB10_2: # %entry 808; CHECK-FORCEALL-NEXT: addl %r8d, %eax 809; CHECK-FORCEALL-NEXT: addl %edx, %eax 810; CHECK-FORCEALL-NEXT: retq 811entry: 812 %cond = icmp ugt i32 %a, %b 813 %y = load i32, ptr %y.ptr 814 %z2 = select i1 %cond, i32 %a, i32 %x 815 %z1 = select i1 %cond, i32 %y, i32 %x 816 %z3 = select i1 %cond, i32 %b, i32 %x 817 %s1 = add i32 %z1, %z2 818 %s2 = add i32 %s1, %z3 819 ret i32 %s2 820} 821 822; Test that we don't convert a group of cmovs with conflicting directions of 823; loads. 824define i32 @test_cmov_memoperand_conflicting_dir(i32 %a, i32 %b, i32 %x, ptr %y1.ptr, ptr %y2.ptr) #0 { 825; CHECK-LABEL: test_cmov_memoperand_conflicting_dir: 826; CHECK: # %bb.0: # %entry 827; CHECK-NEXT: cmpl %esi, %edi 828; CHECK-NEXT: movl (%rcx), %eax 829; CHECK-NEXT: cmoval %edx, %eax 830; CHECK-NEXT: cmoval (%r8), %edx 831; CHECK-NEXT: addl %edx, %eax 832; CHECK-NEXT: retq 833; 834; CHECK-FORCEALL-LABEL: test_cmov_memoperand_conflicting_dir: 835; CHECK-FORCEALL: # %bb.0: # %entry 836; CHECK-FORCEALL-NEXT: cmpl %esi, %edi 837; CHECK-FORCEALL-NEXT: movl (%rcx), %eax 838; CHECK-FORCEALL-NEXT: cmoval %edx, %eax 839; CHECK-FORCEALL-NEXT: cmoval (%r8), %edx 840; CHECK-FORCEALL-NEXT: addl %edx, %eax 841; CHECK-FORCEALL-NEXT: retq 842entry: 843 %cond = icmp ugt i32 %a, %b 844 %y1 = load i32, ptr %y1.ptr 845 %y2 = load i32, ptr %y2.ptr 846 %z1 = select i1 %cond, i32 %x, i32 %y1 847 %z2 = select i1 %cond, i32 %y2, i32 %x 848 %s1 = add i32 %z1, %z2 849 ret i32 %s1 850} 851 852; Test that we can convert a group of cmovs where only one has a memory 853; operand and where that memory operand's registers come from a prior cmov in 854; the group. 855define i32 @test_cmov_memoperand_in_group_reuse_for_addr(i32 %a, i32 %b, ptr %x, ptr %y) #0 { 856; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr: 857; CHECK: # %bb.0: # %entry 858; CHECK-NEXT: movl %edi, %eax 859; CHECK-NEXT: cmpl %esi, %edi 860; CHECK-NEXT: ja .LBB12_2 861; CHECK-NEXT: # %bb.1: # %entry 862; CHECK-NEXT: movl (%rcx), %eax 863; CHECK-NEXT: .LBB12_2: # %entry 864; CHECK-NEXT: retq 865; 866; CHECK-FORCEALL-LABEL: test_cmov_memoperand_in_group_reuse_for_addr: 867; CHECK-FORCEALL: # %bb.0: # %entry 868; CHECK-FORCEALL-NEXT: movl %edi, %eax 869; CHECK-FORCEALL-NEXT: cmpl %esi, %edi 870; CHECK-FORCEALL-NEXT: ja .LBB12_2 871; CHECK-FORCEALL-NEXT: # %bb.1: # %entry 872; CHECK-FORCEALL-NEXT: movl (%rcx), %eax 873; CHECK-FORCEALL-NEXT: .LBB12_2: # %entry 874; CHECK-FORCEALL-NEXT: retq 875entry: 876 %cond = icmp ugt i32 %a, %b 877 %p = select i1 %cond, ptr %x, ptr %y 878 %load = load i32, ptr %p 879 %z = select i1 %cond, i32 %a, i32 %load 880 ret i32 %z 881} 882 883; Test that we can convert a group of two cmovs with memory operands where one 884; uses the result of the other as part of the address. 885define i32 @test_cmov_memoperand_in_group_reuse_for_addr2(i32 %a, i32 %b, ptr %x, ptr %y) #0 { 886; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr2: 887; CHECK: # %bb.0: # %entry 888; CHECK-NEXT: movl %edi, %eax 889; CHECK-NEXT: cmpl %esi, %edi 890; CHECK-NEXT: ja .LBB13_2 891; CHECK-NEXT: # %bb.1: # %entry 892; CHECK-NEXT: movq (%rcx), %rax 893; CHECK-NEXT: movl (%rax), %eax 894; CHECK-NEXT: .LBB13_2: # %entry 895; CHECK-NEXT: retq 896; 897; CHECK-FORCEALL-LABEL: test_cmov_memoperand_in_group_reuse_for_addr2: 898; CHECK-FORCEALL: # %bb.0: # %entry 899; CHECK-FORCEALL-NEXT: movl %edi, %eax 900; CHECK-FORCEALL-NEXT: cmpl %esi, %edi 901; CHECK-FORCEALL-NEXT: ja .LBB13_2 902; CHECK-FORCEALL-NEXT: # %bb.1: # %entry 903; CHECK-FORCEALL-NEXT: movq (%rcx), %rax 904; CHECK-FORCEALL-NEXT: movl (%rax), %eax 905; CHECK-FORCEALL-NEXT: .LBB13_2: # %entry 906; CHECK-FORCEALL-NEXT: retq 907entry: 908 %cond = icmp ugt i32 %a, %b 909 %load1 = load ptr, ptr %y 910 %p = select i1 %cond, ptr %x, ptr %load1 911 %load2 = load i32, ptr %p 912 %z = select i1 %cond, i32 %a, i32 %load2 913 ret i32 %z 914} 915 916; Test that we can convert a group of cmovs where only one has a memory 917; operand and where that memory operand's registers come from a prior cmov and 918; where that cmov gets *its* input from a prior cmov in the group. 919define i32 @test_cmov_memoperand_in_group_reuse_for_addr3(i32 %a, i32 %b, ptr %x, ptr %y, ptr %z) #0 { 920; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr3: 921; CHECK: # %bb.0: # %entry 922; CHECK-NEXT: movl %edi, %eax 923; CHECK-NEXT: cmpl %esi, %edi 924; CHECK-NEXT: ja .LBB14_2 925; CHECK-NEXT: # %bb.1: # %entry 926; CHECK-NEXT: movl (%rcx), %eax 927; CHECK-NEXT: .LBB14_2: # %entry 928; CHECK-NEXT: retq 929; 930; CHECK-FORCEALL-LABEL: test_cmov_memoperand_in_group_reuse_for_addr3: 931; CHECK-FORCEALL: # %bb.0: # %entry 932; CHECK-FORCEALL-NEXT: movl %edi, %eax 933; CHECK-FORCEALL-NEXT: cmpl %esi, %edi 934; CHECK-FORCEALL-NEXT: ja .LBB14_2 935; CHECK-FORCEALL-NEXT: # %bb.1: # %entry 936; CHECK-FORCEALL-NEXT: movl (%rcx), %eax 937; CHECK-FORCEALL-NEXT: .LBB14_2: # %entry 938; CHECK-FORCEALL-NEXT: retq 939entry: 940 %cond = icmp ugt i32 %a, %b 941 %p = select i1 %cond, ptr %x, ptr %y 942 %p2 = select i1 %cond, ptr %z, ptr %p 943 %load = load i32, ptr %p2 944 %r = select i1 %cond, i32 %a, i32 %load 945 ret i32 %r 946} 947 948@begin = external global ptr 949@end = external global ptr 950 951define void @test_memoperand_loop(i32 %data) #0 { 952; CHECK-LABEL: test_memoperand_loop: 953; CHECK: # %bb.0: # %entry 954; CHECK-NEXT: movq begin@GOTPCREL(%rip), %rax 955; CHECK-NEXT: movq (%rax), %rcx 956; CHECK-NEXT: movq end@GOTPCREL(%rip), %rdx 957; CHECK-NEXT: movq (%rdx), %rdx 958; CHECK-NEXT: xorl %esi, %esi 959; CHECK-NEXT: movq %rcx, %r8 960; CHECK-NEXT: .LBB15_1: # %loop.body 961; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 962; CHECK-NEXT: addq $8, %r8 963; CHECK-NEXT: cmpq %rdx, %r8 964; CHECK-NEXT: ja .LBB15_3 965; CHECK-NEXT: # %bb.2: # %loop.body 966; CHECK-NEXT: # in Loop: Header=BB15_1 Depth=1 967; CHECK-NEXT: movq (%rax), %r8 968; CHECK-NEXT: .LBB15_3: # %loop.body 969; CHECK-NEXT: # in Loop: Header=BB15_1 Depth=1 970; CHECK-NEXT: movl %edi, (%r8) 971; CHECK-NEXT: addq $8, %r8 972; CHECK-NEXT: cmpq %rdx, %r8 973; CHECK-NEXT: ja .LBB15_5 974; CHECK-NEXT: # %bb.4: # %loop.body 975; CHECK-NEXT: # in Loop: Header=BB15_1 Depth=1 976; CHECK-NEXT: movq %rcx, %r8 977; CHECK-NEXT: .LBB15_5: # %loop.body 978; CHECK-NEXT: # in Loop: Header=BB15_1 Depth=1 979; CHECK-NEXT: movl %edi, (%r8) 980; CHECK-NEXT: addl $1, %esi 981; CHECK-NEXT: cmpl $1024, %esi # imm = 0x400 982; CHECK-NEXT: jl .LBB15_1 983; CHECK-NEXT: # %bb.6: # %exit 984; CHECK-NEXT: retq 985; 986; CHECK-FORCEALL-LABEL: test_memoperand_loop: 987; CHECK-FORCEALL: # %bb.0: # %entry 988; CHECK-FORCEALL-NEXT: movq begin@GOTPCREL(%rip), %rax 989; CHECK-FORCEALL-NEXT: movq (%rax), %rcx 990; CHECK-FORCEALL-NEXT: movq end@GOTPCREL(%rip), %rdx 991; CHECK-FORCEALL-NEXT: movq (%rdx), %rdx 992; CHECK-FORCEALL-NEXT: xorl %esi, %esi 993; CHECK-FORCEALL-NEXT: movq %rcx, %r8 994; CHECK-FORCEALL-NEXT: .LBB15_1: # %loop.body 995; CHECK-FORCEALL-NEXT: # =>This Inner Loop Header: Depth=1 996; CHECK-FORCEALL-NEXT: addq $8, %r8 997; CHECK-FORCEALL-NEXT: cmpq %rdx, %r8 998; CHECK-FORCEALL-NEXT: ja .LBB15_3 999; CHECK-FORCEALL-NEXT: # %bb.2: # %loop.body 1000; CHECK-FORCEALL-NEXT: # in Loop: Header=BB15_1 Depth=1 1001; CHECK-FORCEALL-NEXT: movq (%rax), %r8 1002; CHECK-FORCEALL-NEXT: .LBB15_3: # %loop.body 1003; CHECK-FORCEALL-NEXT: # in Loop: Header=BB15_1 Depth=1 1004; CHECK-FORCEALL-NEXT: movl %edi, (%r8) 1005; CHECK-FORCEALL-NEXT: addq $8, %r8 1006; CHECK-FORCEALL-NEXT: cmpq %rdx, %r8 1007; CHECK-FORCEALL-NEXT: ja .LBB15_5 1008; CHECK-FORCEALL-NEXT: # %bb.4: # %loop.body 1009; CHECK-FORCEALL-NEXT: # in Loop: Header=BB15_1 Depth=1 1010; CHECK-FORCEALL-NEXT: movq %rcx, %r8 1011; CHECK-FORCEALL-NEXT: .LBB15_5: # %loop.body 1012; CHECK-FORCEALL-NEXT: # in Loop: Header=BB15_1 Depth=1 1013; CHECK-FORCEALL-NEXT: movl %edi, (%r8) 1014; CHECK-FORCEALL-NEXT: addl $1, %esi 1015; CHECK-FORCEALL-NEXT: cmpl $1024, %esi # imm = 0x400 1016; CHECK-FORCEALL-NEXT: jl .LBB15_1 1017; CHECK-FORCEALL-NEXT: # %bb.6: # %exit 1018; CHECK-FORCEALL-NEXT: retq 1019entry: 1020 %begin = load ptr, ptr @begin, align 8 1021 %end = load ptr, ptr @end, align 8 1022 br label %loop.body 1023loop.body: 1024 %phi.iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.body ] 1025 %phi.ptr = phi ptr [ %begin, %entry ], [ %dst2, %loop.body ] 1026 %gep1 = getelementptr inbounds i32, ptr%phi.ptr, i64 2 1027 %cmp1 = icmp ugt ptr %gep1, %end 1028 %begin_dup = load ptr, ptr @begin, align 8 1029 %dst1 = select i1 %cmp1, ptr %gep1, ptr %begin_dup 1030 store i32 %data, ptr%dst1, align 4 1031 %gep2 = getelementptr inbounds i32, ptr%dst1, i64 2 1032 %cmp2 = icmp ugt ptr %gep2, %end 1033 %dst2 = select i1 %cmp2, ptr %gep2, ptr %begin 1034 store i32 %data, ptr%dst2, align 4 1035 %iv.next = add i32 %phi.iv, 1 1036 %cond = icmp slt i32 %iv.next, 1024 1037 br i1 %cond, label %loop.body, label %exit 1038exit: 1039 ret void 1040} 1041 1042attributes #0 = {"target-cpu"="x86-64" "tune-cpu"="x86-64"} 1043!0 = !{} 1044