1; RUN: opt %loadPolly -polly-print-flatten-schedule -disable-output < %s | FileCheck %s 2; 3; dgemm kernel 4; C := alpha*A*B + beta*C 5; C[ni][nj] 6; A[ni][nk] 7; B[nk][nj] 8 9target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" 10 11define void @gemm(i32 %ni, i32 %nj, i32 %nk, double %alpha, double %beta, ptr noalias nonnull %C, ptr noalias nonnull %A, ptr noalias nonnull %B) { 12entry: 13 br label %ni.for 14 15ni.for: 16 %i = phi i32 [0, %entry], [%i.inc, %ni.inc] 17 %i.cmp = icmp slt i32 %i, 3 18 br i1 %i.cmp, label %nj.for, label %ni.exit 19 20 nj.for: 21 %j = phi i32 [0, %ni.for], [%j.inc, %nj.inc] 22 %j.cmp = icmp slt i32 %j, 7 23 br i1 %j.cmp, label %nj_beta, label %nj.exit 24 25 nj_beta: 26 %c_stride = mul nsw i32 %i, 3; %nj 27 %c_idx_i = getelementptr inbounds double, ptr %C, i32 %c_stride 28 %c_idx_ij = getelementptr inbounds double, ptr %c_idx_i, i32 %j 29 30 ; C[i]ptr= beta 31 %c = load double, ptr %c_idx_ij 32 %c_beta = fmul double %c, %beta 33 store double %c_beta, ptr %c_idx_ij 34 35 br label %nk.for 36 37 nk.for: 38 %k = phi i32 [0, %nj_beta], [%k.inc, %nk.inc] 39 %k.cmp = icmp slt i32 %k, 3 ; %nk 40 br i1 %k.cmp, label %nk_alpha, label %nk.exit 41 42 nk_alpha: 43 %a_stride = mul nsw i32 %i, 3; %nk 44 %a_idx_i = getelementptr inbounds double, ptr %A, i32 %a_stride 45 %a_idx_ik = getelementptr inbounds double, ptr %a_idx_i, i32 %k 46 47 %b_stride = mul nsw i32 %k, 3; %nj 48 %b_idx_k = getelementptr inbounds double, ptr %B, i32 %b_stride 49 %b_idx_kj = getelementptr inbounds double, ptr %b_idx_k, i32 %j 50 51 ; C[i][j] += alpha * A[i]ptr B[k][j] 52 %a = load double, ptr %a_idx_ik 53 %b = load double, ptr %b_idx_kj 54 %beta_c = load double, ptr %c_idx_ij 55 56 %alpha_a = fmul double %a, %alpha 57 %alpha_a_b = fmul double %alpha_a, %b 58 %beta_c_alpha_a_b = fadd double %beta_c, %alpha_a_b 59 60 store double %beta_c_alpha_a_b, ptr %c_idx_ij 61 62 br label %nk.inc 63 64 nk.inc: 65 %k.inc = add nuw nsw i32 %k, 1 66 br label %nk.for 67 68 nk.exit: 69 ; store double %c, ptr %c_idx_ij 70 br label %nj.inc 71 72 nj.inc: 73 %j.inc = add nuw nsw i32 %j, 1 74 br label %nj.for 75 76 nj.exit: 77 br label %ni.inc 78 79ni.inc: 80 %i.inc = add nuw nsw i32 %i, 1 81 br label %ni.for 82 83ni.exit: 84 br label %return 85 86return: 87 ret void 88} 89 90 91; CHECK: Schedule before flattening { 92; CHECK-NEXT: { Stmt_nk_alpha[i0, i1, i2] -> [i0, i1, 1, i2] } 93; CHECK-NEXT: { Stmt_nj_beta[i0, i1] -> [i0, i1, 0, 0] } 94; CHECK-NEXT: } 95; CHECK: Schedule after flattening { 96; CHECK-NEXT: { Stmt_nj_beta[i0, i1] -> [28i0 + 4i1] } 97; CHECK-NEXT: { Stmt_nk_alpha[i0, i1, i2] -> [1 + 28i0 + 4i1 + i2] } 98; CHECK-NEXT: } 99