xref: /llvm-project/polly/test/FlattenSchedule/gemm.ll (revision b332499a94df11870dfc7598645c59656deb933d)
1; RUN: opt %loadPolly -polly-print-flatten-schedule -disable-output < %s | FileCheck %s
2;
3; dgemm kernel
4; C := alpha*A*B + beta*C
5; C[ni][nj]
6; A[ni][nk]
7; B[nk][nj]
8
9target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
10
11define void @gemm(i32 %ni, i32 %nj, i32 %nk, double %alpha, double %beta, ptr noalias nonnull %C, ptr noalias nonnull %A, ptr noalias nonnull %B) {
12entry:
13  br label %ni.for
14
15ni.for:
16  %i = phi i32 [0, %entry], [%i.inc, %ni.inc]
17  %i.cmp = icmp slt i32 %i, 3
18  br i1 %i.cmp, label %nj.for, label %ni.exit
19
20  nj.for:
21    %j = phi i32 [0, %ni.for], [%j.inc, %nj.inc]
22    %j.cmp = icmp slt i32 %j, 7
23    br i1 %j.cmp, label %nj_beta, label %nj.exit
24
25    nj_beta:
26     %c_stride = mul nsw i32 %i, 3; %nj
27     %c_idx_i = getelementptr inbounds double, ptr %C, i32 %c_stride
28     %c_idx_ij = getelementptr inbounds double, ptr %c_idx_i, i32 %j
29
30     ; C[i]ptr= beta
31     %c = load double, ptr %c_idx_ij
32     %c_beta = fmul double %c, %beta
33     store double %c_beta, ptr %c_idx_ij
34
35     br label %nk.for
36
37    nk.for:
38      %k = phi i32 [0, %nj_beta], [%k.inc, %nk.inc]
39      %k.cmp = icmp slt i32 %k, 3 ; %nk
40      br i1 %k.cmp, label %nk_alpha, label %nk.exit
41
42      nk_alpha:
43        %a_stride = mul nsw i32 %i, 3; %nk
44        %a_idx_i = getelementptr inbounds double, ptr %A, i32 %a_stride
45        %a_idx_ik = getelementptr inbounds double, ptr %a_idx_i, i32 %k
46
47        %b_stride = mul nsw i32 %k, 3; %nj
48        %b_idx_k = getelementptr inbounds double, ptr %B, i32 %b_stride
49        %b_idx_kj = getelementptr inbounds double, ptr %b_idx_k, i32 %j
50
51        ; C[i][j] += alpha * A[i]ptr B[k][j]
52        %a = load double, ptr %a_idx_ik
53        %b = load double, ptr %b_idx_kj
54        %beta_c = load double, ptr %c_idx_ij
55
56        %alpha_a = fmul double %a, %alpha
57        %alpha_a_b = fmul double %alpha_a, %b
58        %beta_c_alpha_a_b = fadd double %beta_c, %alpha_a_b
59
60        store double %beta_c_alpha_a_b, ptr %c_idx_ij
61
62        br label %nk.inc
63
64    nk.inc:
65      %k.inc = add nuw nsw i32 %k, 1
66      br label %nk.for
67
68    nk.exit:
69      ; store double %c, ptr %c_idx_ij
70      br label %nj.inc
71
72  nj.inc:
73    %j.inc = add nuw nsw i32 %j, 1
74    br label %nj.for
75
76  nj.exit:
77    br label %ni.inc
78
79ni.inc:
80  %i.inc = add nuw nsw i32 %i, 1
81  br label %ni.for
82
83ni.exit:
84  br label %return
85
86return:
87  ret void
88}
89
90
91; CHECK:      Schedule before flattening {
92; CHECK-NEXT:     { Stmt_nk_alpha[i0, i1, i2] -> [i0, i1, 1, i2] }
93; CHECK-NEXT:     { Stmt_nj_beta[i0, i1] -> [i0, i1, 0, 0] }
94; CHECK-NEXT: }
95; CHECK:      Schedule after flattening {
96; CHECK-NEXT:     { Stmt_nj_beta[i0, i1] -> [28i0 + 4i1] }
97; CHECK-NEXT:     { Stmt_nk_alpha[i0, i1, i2] -> [1 + 28i0 + 4i1 + i2] }
98; CHECK-NEXT: }
99