1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -aa-pipeline=basic-aa -passes=loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S | FileCheck %s 3 4target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" 5target triple = "aarch64-unknown-linux-gnu" 6 7;; Test to make sure DA outputs the correction direction 8;; vector [< =] hence the loopnest is interchanged. 9;; 10;; void test1(unsigned a[restrict N1][N2], 11;; unsigned b[restrict N1][N2], 12;; unsigned c[restrict N1][N2]) { 13;; for (unsigned long i2 = 1; i2 < N2-1; i2++) { 14;; for (unsigned long i1 = 1; i1 < N1-1; i1++) { 15;; a[i1][i2+1] = b[i1][i2]; 16;; c[i1][i2] = a[i1][i2]; 17;; } 18;; } 19;; } 20 21define void @test1(ptr noalias noundef %a, ptr noalias noundef %b, ptr noalias noundef %c) { 22; CHECK-LABEL: @test1( 23; CHECK-NEXT: entry: 24; CHECK-NEXT: br label [[LOOP2_HEADER_PREHEADER:%.*]] 25; CHECK: loop1.header.preheader: 26; CHECK-NEXT: br label [[LOOP1_HEADER:%.*]] 27; CHECK: loop1.header: 28; CHECK-NEXT: [[I2:%.*]] = phi i64 [ [[I2_INC:%.*]], [[LOOP1_LATCH:%.*]] ], [ 1, [[LOOP1_HEADER_PREHEADER:%.*]] ] 29; CHECK-NEXT: [[I2_ST:%.*]] = add i64 [[I2]], 1 30; CHECK-NEXT: [[I2_LD:%.*]] = add i64 [[I2]], 0 31; CHECK-NEXT: br label [[LOOP2_HEADER_SPLIT1:%.*]] 32; CHECK: loop2.header.preheader: 33; CHECK-NEXT: br label [[LOOP2_HEADER:%.*]] 34; CHECK: loop2.header: 35; CHECK-NEXT: [[I1:%.*]] = phi i64 [ [[TMP0:%.*]], [[LOOP2_HEADER_SPLIT:%.*]] ], [ 1, [[LOOP2_HEADER_PREHEADER]] ] 36; CHECK-NEXT: br label [[LOOP1_HEADER_PREHEADER]] 37; CHECK: loop2.header.split1: 38; CHECK-NEXT: [[I1_ST:%.*]] = add i64 [[I1]], 0 39; CHECK-NEXT: [[I1_LD:%.*]] = add i64 [[I1]], 0 40; CHECK-NEXT: [[A_ST:%.*]] = getelementptr inbounds [64 x i32], ptr [[A:%.*]], i64 [[I1_ST]], i64 [[I2_ST]] 41; CHECK-NEXT: [[A_LD:%.*]] = getelementptr inbounds [64 x i32], ptr [[A]], i64 [[I1_LD]], i64 [[I2_LD]] 42; CHECK-NEXT: [[B_LD:%.*]] = getelementptr inbounds [64 x i32], ptr [[B:%.*]], i64 [[I1]], i64 [[I2]] 43; CHECK-NEXT: [[C_ST:%.*]] = getelementptr inbounds [64 x i32], ptr [[C:%.*]], i64 [[I1]], i64 [[I2]] 44; CHECK-NEXT: [[B_VAL:%.*]] = load i32, ptr [[B_LD]], align 4 45; CHECK-NEXT: store i32 [[B_VAL]], ptr [[A_ST]], align 4 46; CHECK-NEXT: [[A_VAL:%.*]] = load i32, ptr [[A_LD]], align 4 47; CHECK-NEXT: store i32 [[A_VAL]], ptr [[C_ST]], align 4 48; CHECK-NEXT: [[I1_INC:%.*]] = add nuw nsw i64 [[I1]], 1 49; CHECK-NEXT: [[LOOP2_EXITCOND_NOT:%.*]] = icmp eq i64 [[I1_INC]], 63 50; CHECK-NEXT: br label [[LOOP1_LATCH]] 51; CHECK: loop2.header.split: 52; CHECK-NEXT: [[TMP0]] = add nuw nsw i64 [[I1]], 1 53; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 63 54; CHECK-NEXT: br i1 [[TMP1]], label [[EXIT:%.*]], label [[LOOP2_HEADER]] 55; CHECK: loop1.latch: 56; CHECK-NEXT: [[I2_INC]] = add nuw nsw i64 [[I2]], 1 57; CHECK-NEXT: [[LOOP1_EXITCOND_NOT:%.*]] = icmp eq i64 [[I2_INC]], 63 58; CHECK-NEXT: br i1 [[LOOP1_EXITCOND_NOT]], label [[LOOP2_HEADER_SPLIT]], label [[LOOP1_HEADER]] 59; CHECK: exit: 60; CHECK-NEXT: ret void 61; 62entry: 63 br label %loop1.header 64 65loop1.header: 66 %i2 = phi i64 [ 1, %entry ], [ %i2.inc, %loop1.latch ] 67 %i2.st = add i64 %i2, 1 68 %i2.ld = add i64 %i2, 0 69 br label %loop2.header 70 71loop2.header: 72 %i1 = phi i64 [ 1, %loop1.header ], [ %i1.inc, %loop2.header ] 73 %i1.st = add i64 %i1, 0 74 %i1.ld = add i64 %i1, 0 75 %a.st = getelementptr inbounds [64 x i32], ptr %a, i64 %i1.st, i64 %i2.st 76 %a.ld = getelementptr inbounds [64 x i32], ptr %a, i64 %i1.ld, i64 %i2.ld 77 %b.ld = getelementptr inbounds [64 x i32], ptr %b, i64 %i1, i64 %i2 78 %c.st = getelementptr inbounds [64 x i32], ptr %c, i64 %i1, i64 %i2 79 %b.val = load i32, ptr %b.ld, align 4 80 store i32 %b.val, ptr %a.st, align 4 ; (X) store to a[i1][i2+1] 81 %a.val = load i32, ptr %a.ld, align 4 ; (Y) load from a[i1][i2] 82 store i32 %a.val, ptr %c.st, align 4 83 %i1.inc = add nuw nsw i64 %i1, 1 84 %loop2.exitcond.not = icmp eq i64 %i1.inc, 63 85 br i1 %loop2.exitcond.not, label %loop1.latch, label %loop2.header 86 87loop1.latch: 88 %i2.inc = add nuw nsw i64 %i2, 1 89 %loop1.exitcond.not = icmp eq i64 %i2.inc, 63 90 br i1 %loop1.exitcond.not, label %exit, label %loop1.header 91 92exit: 93 ret void 94} 95 96;; Semantically equivalent to test1() with only the difference 97;; of the order of a load and a store at (X) and (Y). 98;; 99;; Test to make sure DA outputs the correction direction 100;; vector [< =] hence the loopnest is interchanged. 101 102define void @test2(ptr noalias noundef %a, ptr noalias noundef %b, ptr noalias noundef %c) { 103; CHECK-LABEL: @test2( 104; CHECK-NEXT: entry: 105; CHECK-NEXT: br label [[LOOP2_HEADER_PREHEADER:%.*]] 106; CHECK: loop1.header.preheader: 107; CHECK-NEXT: br label [[LOOP1_HEADER:%.*]] 108; CHECK: loop1.header: 109; CHECK-NEXT: [[I2:%.*]] = phi i64 [ [[I2_INC:%.*]], [[LOOP1_LATCH:%.*]] ], [ 1, [[LOOP1_HEADER_PREHEADER:%.*]] ] 110; CHECK-NEXT: [[I2_ST:%.*]] = add i64 [[I2]], 1 111; CHECK-NEXT: [[I2_LD:%.*]] = add i64 [[I2]], 0 112; CHECK-NEXT: br label [[LOOP2_HEADER_SPLIT1:%.*]] 113; CHECK: loop2.header.preheader: 114; CHECK-NEXT: br label [[LOOP2_HEADER:%.*]] 115; CHECK: loop2.header: 116; CHECK-NEXT: [[I1:%.*]] = phi i64 [ [[TMP0:%.*]], [[LOOP2_HEADER_SPLIT:%.*]] ], [ 1, [[LOOP2_HEADER_PREHEADER]] ] 117; CHECK-NEXT: br label [[LOOP1_HEADER_PREHEADER]] 118; CHECK: loop2.header.split1: 119; CHECK-NEXT: [[I1_ST:%.*]] = add i64 [[I1]], 0 120; CHECK-NEXT: [[I1_LD:%.*]] = add i64 [[I1]], 0 121; CHECK-NEXT: [[A_ST:%.*]] = getelementptr inbounds [64 x i32], ptr [[A:%.*]], i64 [[I1_ST]], i64 [[I2_ST]] 122; CHECK-NEXT: [[A_LD:%.*]] = getelementptr inbounds [64 x i32], ptr [[A]], i64 [[I1_LD]], i64 [[I2_LD]] 123; CHECK-NEXT: [[B_LD:%.*]] = getelementptr inbounds [64 x i32], ptr [[B:%.*]], i64 [[I1]], i64 [[I2]] 124; CHECK-NEXT: [[C_ST:%.*]] = getelementptr inbounds [64 x i32], ptr [[C:%.*]], i64 [[I1]], i64 [[I2]] 125; CHECK-NEXT: [[B_VAL:%.*]] = load i32, ptr [[B_LD]], align 4 126; CHECK-NEXT: [[A_VAL:%.*]] = load i32, ptr [[A_LD]], align 4 127; CHECK-NEXT: store i32 [[B_VAL]], ptr [[A_ST]], align 4 128; CHECK-NEXT: store i32 [[A_VAL]], ptr [[C_ST]], align 4 129; CHECK-NEXT: [[I1_INC:%.*]] = add nuw nsw i64 [[I1]], 1 130; CHECK-NEXT: [[LOOP2_EXITCOND_NOT:%.*]] = icmp eq i64 [[I1_INC]], 63 131; CHECK-NEXT: br label [[LOOP1_LATCH]] 132; CHECK: loop2.header.split: 133; CHECK-NEXT: [[TMP0]] = add nuw nsw i64 [[I1]], 1 134; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 63 135; CHECK-NEXT: br i1 [[TMP1]], label [[EXIT:%.*]], label [[LOOP2_HEADER]] 136; CHECK: loop1.latch: 137; CHECK-NEXT: [[I2_INC]] = add nuw nsw i64 [[I2]], 1 138; CHECK-NEXT: [[LOOP1_EXITCOND_NOT:%.*]] = icmp eq i64 [[I2_INC]], 63 139; CHECK-NEXT: br i1 [[LOOP1_EXITCOND_NOT]], label [[LOOP2_HEADER_SPLIT]], label [[LOOP1_HEADER]] 140; CHECK: exit: 141; CHECK-NEXT: ret void 142; 143entry: 144 br label %loop1.header 145 146loop1.header: 147 %i2 = phi i64 [ 1, %entry ], [ %i2.inc, %loop1.latch ] 148 %i2.st = add i64 %i2, 1 149 %i2.ld = add i64 %i2, 0 150 br label %loop2.header 151 152loop2.header: 153 %i1 = phi i64 [ 1, %loop1.header ], [ %i1.inc, %loop2.header ] 154 %i1.st = add i64 %i1, 0 155 %i1.ld = add i64 %i1, 0 156 %a.st = getelementptr inbounds [64 x i32], ptr %a, i64 %i1.st, i64 %i2.st 157 %a.ld = getelementptr inbounds [64 x i32], ptr %a, i64 %i1.ld, i64 %i2.ld 158 %b.ld = getelementptr inbounds [64 x i32], ptr %b, i64 %i1, i64 %i2 159 %c.st = getelementptr inbounds [64 x i32], ptr %c, i64 %i1, i64 %i2 160 %b.val = load i32, ptr %b.ld, align 4 161 %a.val = load i32, ptr %a.ld, align 4 ; (Y) load from a[i1][i2] 162 store i32 %b.val, ptr %a.st, align 4 ; (X) store to a[i1][i2+1] 163 store i32 %a.val, ptr %c.st, align 4 164 %i1.inc = add nuw nsw i64 %i1, 1 165 %loop2.exitcond.not = icmp eq i64 %i1.inc, 63 166 br i1 %loop2.exitcond.not, label %loop1.latch, label %loop2.header 167 168loop1.latch: 169 %i2.inc = add nuw nsw i64 %i2, 1 170 %loop1.exitcond.not = icmp eq i64 %i2.inc, 63 171 br i1 %loop1.exitcond.not, label %exit, label %loop1.header 172 173exit: 174 ret void 175} 176