Lines Matching +full:matrix +full:- +full:matrix +full:- +full:transpose
1 ; REQUIRES: aarch64-registered-target
5 ; RUN: opt -passes=lower-matrix-intrinsics -pass-remarks=lower-matrix-intrinsics -mtriple=arm64-app…
7 ; CHECK-LABEL: remark: test.h:40:20: Lowered with 6 stores, 6 loads, 24 compute ops
8 ; CHECK-NEXT: store(
9 ; CHECK-NEXT: transpose.2x6.double(load(addr %A)),
10 ; CHECK-NEXT: addr %B)
11 define void @transpose(ptr %A, ptr %B) !dbg !23 {
13 …%t = call <12 x double> @llvm.matrix.transpose.v12f64.v12f64(<12 x double> %load, i32 2, i32 6), !…
18 ; CHECK-LABEL: remark: test.h:50:20: Lowered with 2 stores, 12 loads, 22 compute ops
19 ; CHECK-NEXT: store(
20 ; CHECK-NEXT: multiply.2x6.6x2.double(
21 ; CHECK-NEXT: load(addr %A),
22 ; CHECK-NEXT: load(addr %B)),
23 ; CHECK-NEXT: addr %C)
25 %A.matrix = load <12 x double>, ptr %A, !dbg !26
26 %B.matrix = load <12 x double>, ptr %B, !dbg !26
27 …%t = call <4 x double> @llvm.matrix.multiply(<12 x double> %A.matrix, <12 x double> %B.matrix, i32…
32 ; CHECK-LABEL: remark: test.h:60:20: Lowered with 6 stores, 6 loads, 0 compute ops
33 ; CHECK-NEXT: store(
34 ; CHECK-NEXT: column.major.load.3x3.double(addr %A, 5),
35 ; CHECK-NEXT: addr %B)
37 …%A.matrix = call <9 x double> @llvm.matrix.column.major.load(ptr %A, i64 5, i1 false, i32 3, i32 3…
38 store <9 x double> %A.matrix, ptr %B, !dbg !28
42 ; CHECK-LABEL: remark: test.h:70:20: Lowered with 6 stores, 6 loads, 0 compute ops
43 ; CHECK-NEXT: column.major.store.3x3.double(
44 ; CHECK-NEXT: column.major.load.3x3.double(addr %A, 5),
45 ; CHECK-NEXT: addr %B,
46 ; CHECK-NEXT: 10)
48 …%A.matrix = call <9 x double> @llvm.matrix.column.major.load(ptr %A, i64 5, i1 false, i32 3, i32 3…
49 …call void @llvm.matrix.column.major.store(<9 x double> %A.matrix, ptr %B, i64 10, i1 false, i32 3,…
53 ; CHECK-LABEL: remark: test.h:80:20: Lowered with 6 stores, 6 loads, 12 compute ops
54 ; CHECK-NEXT: column.major.store.3x3.double(
55 ; CHECK-NEXT: fmul(
56 ; CHECK-NEXT: fadd(
57 ; CHECK-NEXT: column.major.load.3x3.double(addr %A, 5)
58 ; CHECK-NEXT: (reused) column.major.load.3x3.double(addr %A, 5)),
59 ; CHECK-NEXT: (reused) column.major.load.3x3.double(addr %A, 5)),
60 ; CHECK-NEXT: addr %B,
61 ; CHECK-NEXT: 10)
64 …%A.matrix = call <9 x double> @llvm.matrix.column.major.load(ptr %A, i64 5, i1 false, i32 3, i32 3…
65 %R1.matrix = fadd <9 x double> %A.matrix, %A.matrix, !dbg !32
66 %R2.matrix = fmul <9 x double> %R1.matrix, %A.matrix, !dbg !32
67 …call void @llvm.matrix.column.major.store(<9 x double> %R2.matrix, ptr %B, i64 10, i1 false, i32 3…
71 ; CHECK-LABEL: remark: test.h:90:20: Lowered with 6 stores, 6 loads, 12 compute ops
72 ; CHECK-NEXT: column.major.store.3x3.double(
73 ; CHECK-NEXT: fmul(
74 ; CHECK-NEXT: fadd(
75 ; CHECK-NEXT: column.major.load.3x3.double(addr %A, 5)
76 ; CHECK-NEXT: (reused) column.major.load.3x3.double(addr %A, 5)),
77 ; CHECK-NEXT: (reused) column.major.load.3x3.double(addr %A, 5)),
78 ; CHECK-NEXT: addr %B,
79 ; CHECK-NEXT: 10)
80 ; CHECK-NEXT: remark: test.h:90:20: Lowered with 2 stores, 12 loads, 22 compute ops
81 ; CHECK-NEXT: store(
82 ; CHECK-NEXT: multiply.2x6.6x2.double(
83 ; CHECK-NEXT: load(addr %C),
84 ; CHECK-NEXT: load(addr %D)),
85 ; CHECK-NEXT: addr %E)
88 …%A.matrix = call <9 x double> @llvm.matrix.column.major.load(ptr %A, i64 5, i1 false, i32 3, i32 3…
89 %R1.matrix = fadd <9 x double> %A.matrix, %A.matrix, !dbg !34
90 %R2.matrix = fmul <9 x double> %R1.matrix, %A.matrix, !dbg !34
91 …call void @llvm.matrix.column.major.store(<9 x double> %R2.matrix, ptr %B, i64 10, i1 false, i32 3…
93 %C.matrix = load <12 x double>, ptr %C, !dbg !34
94 %D.matrix = load <12 x double>, ptr %D, !dbg !34
95 …%Mult.matrix = call <4 x double> @llvm.matrix.multiply(<12 x double> %C.matrix, <12 x double> %D.m…
96 store <4 x double> %Mult.matrix, ptr %E, !dbg !34
101 ; CHECK-LABEL: remark: test.h:100:20: Lowered with 6 stores, 6 loads, 12 compute ops
102 ; CHECK-NEXT: column.major.store.3x3.double(
103 ; CHECK-NEXT: fmul(
104 ; CHECK-NEXT: fadd(
105 ; CHECK-NEXT: column.major.load.3x3.double(addr %A, 5)
106 ; CHECK-NEXT: (reused) column.major.load.3x3.double(addr %A, 5)),
107 ; CHECK-NEXT: (reused) column.major.load.3x3.double(addr %A, 5)),
108 ; CHECK-NEXT: addr %B,
109 ; CHECK-NEXT: 10)
111 …%A.matrix = call <9 x double> @llvm.matrix.column.major.load(ptr %A, i64 5, i1 false, i32 3, i32 3…
112 %R1.matrix = fadd <9 x double> %A.matrix, %A.matrix, !dbg !36
113 %R2.matrix = fmul <9 x double> %R1.matrix, %A.matrix, !dbg !36
114 …call void @llvm.matrix.column.major.store(<9 x double> %R2.matrix, ptr %B, i64 10, i1 false, i32 3…
118 ; CHECK-LABEL: remark: test.h:30:20: Lowered with 10 stores, 9 loads, 30 compute ops
119 ; CHECK-NEXT: store(
120 ; CHECK-NEXT: transpose.5x3.double(load(addr %A)),
121 ; CHECK-NEXT: stack addr %s1)
130 …%t = call <15 x double> @llvm.matrix.transpose.v15f64.v15f64(<15 x double> %av, i32 5, i32 3), !db…
136 declare <12 x double> @llvm.matrix.transpose.v12f64.v12f64(<12 x double>, i32, i32)
137 declare <4 x double> @llvm.matrix.multiply(<12 x double>, <12 x double>, i32, i32, i32)
138 declare <9 x double> @llvm.matrix.column.major.load(ptr, i64, i1, i32, i32)
139 declare <15 x double> @llvm.matrix.transpose.v15f64.v15f64(<15 x double>, i32, i32)
140 declare void @llvm.matrix.column.major.store(<9 x double>, ptr, i64, i1, i32, i32)