xref: /llvm-project/llvm/test/Transforms/LowerMatrixIntrinsics/propagate-multiple-iterations.ll (revision 256d5ad3e8db42e125e4954fadeba2b37421d91f)
1ccf24225SFlorian Hahn; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
287c99d2bSFlorian Hahn; RUN: opt -matrix-allow-contract=false -passes='lower-matrix-intrinsics' -S < %s | FileCheck %s
3ccf24225SFlorian Hahn
4ccf24225SFlorian Hahn
5ccf24225SFlorian Hahn; Make sure we propagate in multiple iterations. First, we back-propagate the
6ccf24225SFlorian Hahn; shape information from the transpose to %A, in the next iteration we
7ccf24225SFlorian Hahn; forward-propagate it to %Mul, and then back to %B.
8*256d5ad3SMatt Arsenaultdefine <16 x double> @backpropagation_iterations(ptr %A.Ptr, ptr %B.Ptr) {
9ccf24225SFlorian Hahn; CHECK-LABEL: @backpropagation_iterations(
10*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <4 x double>, ptr [[A_PTR:%.*]], align 8
11*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, ptr [[A_PTR]], i64 4
12*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[COL_LOAD1:%.*]] = load <4 x double>, ptr [[VEC_GEP]], align 8
13*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[VEC_GEP2:%.*]] = getelementptr double, ptr [[A_PTR]], i64 8
14*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[COL_LOAD3:%.*]] = load <4 x double>, ptr [[VEC_GEP2]], align 8
15*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[VEC_GEP4:%.*]] = getelementptr double, ptr [[A_PTR]], i64 12
16*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[COL_LOAD5:%.*]] = load <4 x double>, ptr [[VEC_GEP4]], align 8
17*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x double> [[COL_LOAD]], i64 0
18*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x double> poison, double [[TMP1]], i64 0
19*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x double> [[COL_LOAD1]], i64 0
20*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x double> [[TMP2]], double [[TMP3]], i64 1
21*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x double> [[COL_LOAD3]], i64 0
22*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x double> [[TMP4]], double [[TMP5]], i64 2
23*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x double> [[COL_LOAD5]], i64 0
24*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x double> [[TMP6]], double [[TMP7]], i64 3
25*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x double> [[COL_LOAD]], i64 1
26*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x double> poison, double [[TMP9]], i64 0
27*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <4 x double> [[COL_LOAD1]], i64 1
28*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x double> [[TMP10]], double [[TMP11]], i64 1
29*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <4 x double> [[COL_LOAD3]], i64 1
30*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x double> [[TMP12]], double [[TMP13]], i64 2
31*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x double> [[COL_LOAD5]], i64 1
32*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x double> [[TMP14]], double [[TMP15]], i64 3
33*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <4 x double> [[COL_LOAD]], i64 2
34*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <4 x double> poison, double [[TMP17]], i64 0
35*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <4 x double> [[COL_LOAD1]], i64 2
36*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <4 x double> [[TMP18]], double [[TMP19]], i64 1
37*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <4 x double> [[COL_LOAD3]], i64 2
38*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x double> [[TMP20]], double [[TMP21]], i64 2
39*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <4 x double> [[COL_LOAD5]], i64 2
40*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x double> [[TMP22]], double [[TMP23]], i64 3
41*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP25:%.*]] = extractelement <4 x double> [[COL_LOAD]], i64 3
42*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP26:%.*]] = insertelement <4 x double> poison, double [[TMP25]], i64 0
43*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP27:%.*]] = extractelement <4 x double> [[COL_LOAD1]], i64 3
44*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP28:%.*]] = insertelement <4 x double> [[TMP26]], double [[TMP27]], i64 1
45*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP29:%.*]] = extractelement <4 x double> [[COL_LOAD3]], i64 3
46*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP30:%.*]] = insertelement <4 x double> [[TMP28]], double [[TMP29]], i64 2
47*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP31:%.*]] = extractelement <4 x double> [[COL_LOAD5]], i64 3
48*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP32:%.*]] = insertelement <4 x double> [[TMP30]], double [[TMP31]], i64 3
49*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[COL_LOAD6:%.*]] = load <4 x double>, ptr [[B_PTR:%.*]], align 8
50*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[VEC_GEP7:%.*]] = getelementptr double, ptr [[B_PTR]], i64 4
51*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[COL_LOAD8:%.*]] = load <4 x double>, ptr [[VEC_GEP7]], align 8
52*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[VEC_GEP9:%.*]] = getelementptr double, ptr [[B_PTR]], i64 8
53*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[COL_LOAD10:%.*]] = load <4 x double>, ptr [[VEC_GEP9]], align 8
54*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[VEC_GEP11:%.*]] = getelementptr double, ptr [[B_PTR]], i64 12
55*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[COL_LOAD12:%.*]] = load <4 x double>, ptr [[VEC_GEP11]], align 8
56*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP33:%.*]] = fmul <4 x double> [[COL_LOAD]], [[COL_LOAD6]]
57*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP34:%.*]] = fmul <4 x double> [[COL_LOAD1]], [[COL_LOAD8]]
58*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP35:%.*]] = fmul <4 x double> [[COL_LOAD3]], [[COL_LOAD10]]
59*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP36:%.*]] = fmul <4 x double> [[COL_LOAD5]], [[COL_LOAD12]]
60*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP37:%.*]] = shufflevector <4 x double> [[TMP33]], <4 x double> [[TMP34]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
61*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP38:%.*]] = shufflevector <4 x double> [[TMP35]], <4 x double> [[TMP36]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
62*256d5ad3SMatt Arsenault; CHECK-NEXT:    [[TMP39:%.*]] = shufflevector <8 x double> [[TMP37]], <8 x double> [[TMP38]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
63*256d5ad3SMatt Arsenault; CHECK-NEXT:    ret <16 x double> [[TMP39]]
64ccf24225SFlorian Hahn;
65*256d5ad3SMatt Arsenault  %A = load <16 x double>, ptr %A.Ptr, align 8
66ccf24225SFlorian Hahn  %A.trans = tail call <16 x double> @llvm.matrix.transpose.v16f64(<16 x double> %A, i32 4, i32 4)
67*256d5ad3SMatt Arsenault  %B = load <16 x double>, ptr %B.Ptr, align 8
68ccf24225SFlorian Hahn  %Mul = fmul <16 x double> %A, %B
69ccf24225SFlorian Hahn  ret <16 x double> %Mul
70ccf24225SFlorian Hahn}
71ccf24225SFlorian Hahn
72ccf24225SFlorian Hahndeclare <16 x double> @llvm.matrix.multiply.v16f64.v16f64.v16f64(<16 x double>, <16 x double>, i32 immarg, i32 immarg, i32 immarg)
73ccf24225SFlorian Hahndeclare <16 x double> @llvm.matrix.transpose.v16f64(<16 x double>, i32 immarg, i32 immarg)
74