1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs -ppc-formprep-chain-commoning \ 3; RUN: -mtriple=powerpc-ibm-aix-xcoff -mcpu=pwr9 < %s | FileCheck %s 4 5; Test that on 32 bit AIX, the chain commoning still works without crash. 6 7; addresses: 8; 1: base1 + offset 9; 2: + offset 10; 3: + offset 11; 4: + offset 12; 13; chains: 14; 1: base: base1 + offset, offsets: (0, offset) 15; 2: base: base1 + 3*offset, offsets: (0, offset) 16; 17; long long two_chain_same_offset_succ_i32(char *p, int offset, int base1, long long n) { 18; int o1 = base1 + offset; 19; int o2 = base1 + 2 * offset; 20; int o3 = base1 + 3 * offset; 21; int o4 = base1 + 4 * offset; 22; char *p1 = p + o1; 23; char *p2 = p + o2; 24; char *p3 = p + o3; 25; char *p4 = p + o4; 26; long long sum = 0; 27; for (long long i = 0; i < n; ++i) { 28; unsigned long x1 = *(unsigned long *)(p1 + i); 29; unsigned long x2 = *(unsigned long *)(p2 + i); 30; unsigned long x3 = *(unsigned long *)(p3 + i); 31; unsigned long x4 = *(unsigned long *)(p4 + i); 32; sum += x1 * x2 * x3 * x4; 33; } 34; return sum; 35; } 36; 37define i64 @two_chain_same_offset_succ_i32(ptr %p, i32 %offset, i32 %base1, i64 %n) { 38; CHECK-LABEL: two_chain_same_offset_succ_i32: 39; CHECK: # %bb.0: # %entry 40; CHECK-NEXT: cmplwi r6, 0 41; CHECK-NEXT: cmpwi cr1, r6, 0 42; CHECK-NEXT: crandc 4*cr5+lt, 4*cr1+lt, eq 43; CHECK-NEXT: cmpwi cr1, r7, 0 44; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_6 45; CHECK-NEXT: # %bb.1: # %entry 46; CHECK-NEXT: crand 4*cr5+lt, eq, 4*cr1+eq 47; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_6 48; CHECK-NEXT: # %bb.2: # %for.body.preheader 49; CHECK-NEXT: slwi r8, r4, 1 50; CHECK-NEXT: li r10, 0 51; CHECK-NEXT: li r11, 0 52; CHECK-NEXT: stw r30, -8(r1) # 4-byte Folded Spill 53; CHECK-NEXT: add r8, r4, r8 54; CHECK-NEXT: stw r31, -4(r1) # 4-byte Folded Spill 55; CHECK-NEXT: add r9, r5, r8 56; CHECK-NEXT: add r5, r5, r4 57; CHECK-NEXT: add r8, r3, r5 58; CHECK-NEXT: add r9, r3, r9 59; CHECK-NEXT: li r3, 0 60; CHECK-NEXT: li r5, 0 61; CHECK-NEXT: .align 4 62; CHECK-NEXT: L..BB0_3: # %for.body 63; CHECK-NEXT: # 64; CHECK-NEXT: lwz r12, 0(r8) 65; CHECK-NEXT: lwzx r0, r8, r4 66; CHECK-NEXT: lwz r31, 0(r9) 67; CHECK-NEXT: lwzx r30, r9, r4 68; CHECK-NEXT: addi r8, r8, 1 69; CHECK-NEXT: addi r9, r9, 1 70; CHECK-NEXT: mullw r12, r0, r12 71; CHECK-NEXT: mullw r12, r12, r31 72; CHECK-NEXT: mullw r12, r12, r30 73; CHECK-NEXT: addc r5, r5, r12 74; CHECK-NEXT: addze r3, r3 75; CHECK-NEXT: addic r11, r11, 1 76; CHECK-NEXT: addze r10, r10 77; CHECK-NEXT: cmplw r10, r6 78; CHECK-NEXT: cmpw cr1, r10, r6 79; CHECK-NEXT: crandc 4*cr5+lt, 4*cr1+lt, eq 80; CHECK-NEXT: cmplw cr1, r11, r7 81; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_3 82; CHECK-NEXT: # %bb.4: # %for.body 83; CHECK-NEXT: # 84; CHECK-NEXT: crand 4*cr5+lt, eq, 4*cr1+lt 85; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_3 86; CHECK-NEXT: # %bb.5: 87; CHECK-NEXT: lwz r31, -4(r1) # 4-byte Folded Reload 88; CHECK-NEXT: lwz r30, -8(r1) # 4-byte Folded Reload 89; CHECK-NEXT: mr r4, r5 90; CHECK-NEXT: blr 91; CHECK-NEXT: L..BB0_6: 92; CHECK-NEXT: li r3, 0 93; CHECK-NEXT: li r4, 0 94; CHECK-NEXT: blr 95entry: 96 %add = add nsw i32 %base1, %offset 97 %mul = shl nsw i32 %offset, 1 98 %add1 = add nsw i32 %mul, %base1 99 %mul2 = mul nsw i32 %offset, 3 100 %add3 = add nsw i32 %mul2, %base1 101 %mul4 = shl nsw i32 %offset, 2 102 %add5 = add nsw i32 %mul4, %base1 103 %add.ptr = getelementptr inbounds i8, ptr %p, i32 %add 104 %add.ptr6 = getelementptr inbounds i8, ptr %p, i32 %add1 105 %add.ptr7 = getelementptr inbounds i8, ptr %p, i32 %add3 106 %add.ptr8 = getelementptr inbounds i8, ptr %p, i32 %add5 107 %cmp49 = icmp sgt i64 %n, 0 108 br i1 %cmp49, label %for.body, label %for.cond.cleanup 109 110for.cond.cleanup: ; preds = %for.body, %entry 111 %sum.0.lcssa = phi i64 [ 0, %entry ], [ %add19, %for.body ] 112 ret i64 %sum.0.lcssa 113 114for.body: ; preds = %entry, %for.body 115 %sum.051 = phi i64 [ %add19, %for.body ], [ 0, %entry ] 116 %i.050 = phi i64 [ %inc, %for.body ], [ 0, %entry ] 117 %idx.ext = trunc i64 %i.050 to i32 118 %add.ptr9 = getelementptr inbounds i8, ptr %add.ptr, i32 %idx.ext 119 %0 = load i32, ptr %add.ptr9, align 4 120 %add.ptr11 = getelementptr inbounds i8, ptr %add.ptr6, i32 %idx.ext 121 %1 = load i32, ptr %add.ptr11, align 4 122 %add.ptr13 = getelementptr inbounds i8, ptr %add.ptr7, i32 %idx.ext 123 %2 = load i32, ptr %add.ptr13, align 4 124 %add.ptr15 = getelementptr inbounds i8, ptr %add.ptr8, i32 %idx.ext 125 %3 = load i32, ptr %add.ptr15, align 4 126 %mul16 = mul i32 %1, %0 127 %mul17 = mul i32 %mul16, %2 128 %mul18 = mul i32 %mul17, %3 129 %conv = zext i32 %mul18 to i64 130 %add19 = add nuw nsw i64 %sum.051, %conv 131 %inc = add nuw nsw i64 %i.050, 1 132 %cmp = icmp slt i64 %inc, %n 133 br i1 %cmp, label %for.body, label %for.cond.cleanup 134} 135 136