Lines Matching +full:vector +full:- +full:matrix
2 ; RUN: opt < %s -passes=instcombine -S | FileCheck %s
4 ; The result has the fewest vector elements between the result and the two operands so the negation can be moved there
6 ; CHECK-LABEL: @test_negation_move_to_result(
7 ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1)
8 ; CHECK-NEXT: [[RES:%.*]] = fneg <2 x double> [[TMP1]]
9 ; CHECK-NEXT: ret <2 x double> [[RES]]
12 %res = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> %a.neg, <3 x double> %b, i32 2, i32 3, i32 1)
16 ; The result has the fewest vector elements between the result and the two operands so the negation can be moved there
19 ; CHECK-LABEL: @test_negation_move_to_result_with_fastflags(
20 ; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1)
21 ; CHECK-NEXT: [[RES:%.*]] = fneg fast <2 x double> [[TMP1]]
22 ; CHECK-NEXT: ret <2 x double> [[RES]]
25 %res = tail call fast <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> %a.neg, <3 x double> %b, i32 2, i32 3, i32 1)
30 ; CHECK-LABEL: @test_negation_move_to_result_with_nnan_flag(
31 ; CHECK-NEXT: [[TMP1:%.*]] = call nnan <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1)
32 ; CHECK-NEXT: [[RES:%.*]] = fneg nnan <2 x double> [[TMP1]]
33 ; CHECK-NEXT: ret <2 x double> [[RES]]
36 %res = tail call nnan <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> %a.neg, <3 x double> %b, i32 2, i32 3, i32 1)
41 ; CHECK-LABEL: @test_negation_move_to_result_with_nsz_flag(
42 ; CHECK-NEXT: [[TMP1:%.*]] = call nsz <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1)
43 ; CHECK-NEXT: [[RES:%.*]] = fneg nsz <2 x double> [[TMP1]]
44 ; CHECK-NEXT: ret <2 x double> [[RES]]
47 %res = tail call nsz <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> %a.neg, <3 x double> %b, i32 2, i32 3, i32 1)
52 ; CHECK-LABEL: @test_negation_move_to_result_with_fastflag_on_negation(
53 ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1)
54 ; CHECK-NEXT: [[RES:%.*]] = fneg <2 x double> [[TMP1]]
55 ; CHECK-NEXT: ret <2 x double> [[RES]]
58 %res = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> %a.neg, <3 x double> %b, i32 2, i32 3, i32 1)
62 ; %b has the fewest vector elements between the result and the two operands so the negation can be moved there
64 ; CHECK-LABEL: @test_move_negation_to_second_operand(
65 ; CHECK-NEXT: [[TMP1:%.*]] = fneg <3 x double> [[B:%.*]]
66 ; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A:%.*]], <3 x double> [[TMP1]], i32 9, i32 3, i32 1)
67 ; CHECK-NEXT: ret <9 x double> [[RES]]
70 %res = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> %a.neg, <3 x double> %b, i32 9, i32 3, i32 1)
74 ; %b has the fewest vector elements between the result and the two operands so the negation can be moved there
77 ; CHECK-LABEL: @test_move_negation_to_second_operand_with_fast_flags(
78 ; CHECK-NEXT: [[TMP1:%.*]] = fneg <3 x double> [[B:%.*]]
79 ; CHECK-NEXT: [[RES:%.*]] = tail call fast <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A:%.*]], <3 x double> [[TMP1]], i32 9, i32 3, i32 1)
80 ; CHECK-NEXT: ret <9 x double> [[RES]]
83 %res = tail call fast <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> %a.neg, <3 x double> %b, i32 9, i32 3, i32 1)
87 ; The result has the fewest vector elements between the result and the two operands so the negation can be moved there
89 ; CHECK-LABEL: @test_negation_move_to_result_from_second_operand(
90 ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x double> @llvm.matrix.multiply.v2f64.v3f64.v6f64(<3 x double> [[A:%.*]], <6 x double> [[B:%.*]], i32 1, i32 3, i32 2)
91 ; CHECK-NEXT: [[RES:%.*]] = fneg <2 x double> [[TMP1]]
92 ; CHECK-NEXT: ret <2 x double> [[RES]]
95 %res = tail call <2 x double> @llvm.matrix.multiply.v2f64.v3f64.v6f64(<3 x double> %a, <6 x double> %b.neg, i32 1, i32 3, i32 2)
99 ; %a has the fewest vector elements between the result and the two operands so the negation can be moved there
101 ; CHECK-LABEL: @test_move_negation_to_first_operand(
102 ; CHECK-NEXT: [[TMP1:%.*]] = fneg <3 x double> [[A:%.*]]
103 ; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v3f64.v27f64(<3 x double> [[TMP1]], <27 x double> [[B:%.*]], i32 1, i32 3, i32 9)
104 ; CHECK-NEXT: ret <9 x double> [[RES]]
107 %res = tail call <9 x double> @llvm.matrix.multiply.v9f64.v3f64.v27f64(<3 x double> %a, <27 x double> %b.neg, i32 1, i32 3, i32 9)
111 ; %a has the fewest vector elements between the result and the two operands so the negation is not moved
113 ; CHECK-LABEL: @test_negation_not_moved(
114 ; CHECK-NEXT: [[A_NEG:%.*]] = fneg <3 x double> [[A:%.*]]
115 ; CHECK-NEXT: [[RES:%.*]] = tail call <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double> [[A_NEG]], <5 x double> [[B:%.*]], i32 3, i32 1, i32 5)
116 ; CHECK-NEXT: ret <15 x double> [[RES]]
119 %res = tail call <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double> %a.neg, <5 x double> %b, i32 3, i32 1, i32 5)
123 ; %b as the fewest vector elements between the result and the two operands so the negation is not moved
125 ; CHECK-LABEL: @test_negation_not_moved_second_operand(
126 ; CHECK-NEXT: [[B_NEG:%.*]] = fneg <3 x double> [[B:%.*]]
127 ; CHECK-NEXT: [[RES:%.*]] = tail call <15 x double> @llvm.matrix.multiply.v15f64.v5f64.v3f64(<5 x double> [[A:%.*]], <3 x double> [[B_NEG]], i32 5, i32 1, i32 3)
128 ; CHECK-NEXT: ret <15 x double> [[RES]]
131 %res = tail call <15 x double> @llvm.matrix.multiply.v15f64.v5f64.v3f64(<5 x double> %a, <3 x double> %b.neg, i32 5, i32 1, i32 3)
135 ; the negation should be moved from the result to operand %a because it has the smallest vector element count
137 ; CHECK-LABEL: @test_negation_on_result(
138 ; CHECK-NEXT: [[RES:%.*]] = tail call <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double> [[A:%.*]], <5 x double> [[B:%.*]], i32 3, i32 1, i32 5)
139 ; CHECK-NEXT: [[RES_2:%.*]] = fneg <15 x double> [[RES]]
140 ; CHECK-NEXT: ret <15 x double> [[RES_2]]
142 %res = tail call <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double> %a, <5 x double> %b, i32 3, i32 1, i32 5)
149 ; CHECK-LABEL: @test_with_two_operands_negated1(
150 ; CHECK-NEXT: [[RES:%.*]] = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1)
151 ; CHECK-NEXT: ret <2 x double> [[RES]]
155 %res = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> %a.neg, <3 x double> %b.neg, i32 2, i32 3, i32 1)
161 ; CHECK-LABEL: @test_with_two_operands_negated2(
162 ; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 9, i32 3, i32 1)
163 ; CHECK-NEXT: ret <9 x double> [[RES]]
167 %res = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> %a.neg, <3 x double> %b.neg, i32 9, i32 3, i32 1)
173 ; CHECK-LABEL: @test_with_two_operands_negated_with_fastflags(
174 ; CHECK-NEXT: [[RES:%.*]] = tail call fast <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 9, i32 3, i32 1)
175 ; CHECK-NEXT: ret <9 x double> [[RES]]
179 %res = tail call fast <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> %a.neg, <3 x double> %b.neg, i32 9, i32 3, i32 1)
185 ; CHECK-LABEL: @test_with_two_operands_negated2_commute(
186 ; CHECK-NEXT: [[RES:%.*]] = call <9 x double> @llvm.matrix.multiply.v9f64.v3f64.v27f64(<3 x double> [[A:%.*]], <27 x double> [[B:%.*]], i32 1, i32 3, i32 9)
187 ; CHECK-NEXT: ret <9 x double> [[RES]]
191 %res = call <9 x double> @llvm.matrix.multiply.v9f64.v3f64.v27f64(<3 x double> %a.neg, <27 x double> %b.neg, i32 1, i32 3, i32 9)
196 ; CHECK-LABEL: @matrix_multiply_two_operands_negated_with_same_size(
197 ; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.matrix.multiply.v4f64.v2f64.v2f64(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], i32 2, i32 1, i32 2)
198 ; CHECK-NEXT: ret <4 x double> [[RES]]
202 %res = call <4 x double> @llvm.matrix.multiply.v4f64.v2f64.v2f64(<2 x double> %a.neg, <2 x double> %b.neg, i32 2, i32 1, i32 2)
207 ; CHECK-LABEL: @matrix_multiply_two_operands_with_multiple_uses(
208 ; CHECK-NEXT: [[RES:%.*]] = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> [[A:%.*]], <3 x double> [[B:%.*]], i32 2, i32 3, i32 1)
209 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <6 x double> [[A]], <6 x double> poison, <2 x i32> <i32 0, i32 1>
210 ; CHECK-NEXT: [[RES_3:%.*]] = fsub <2 x double> [[RES]], [[TMP1]]
211 ; CHECK-NEXT: ret <2 x double> [[RES_3]]
215 %res = tail call <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double> %a.neg, <3 x double> %b.neg, i32 2, i32 3, i32 1)
223 ; CHECK-LABEL: @matrix_multiply_two_operands_with_multiple_uses2(
224 ; CHECK-NEXT: [[A_NEG:%.*]] = fneg <27 x double> [[A:%.*]]
225 ; CHECK-NEXT: [[B_NEG:%.*]] = fneg <3 x double> [[B:%.*]]
226 ; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A]], <3 x double> [[B]], i32 9, i32 3, i32 1)
227 ; CHECK-NEXT: store <27 x double> [[A_NEG]], ptr [[A_LOC:%.*]], align 256
228 ; CHECK-NEXT: store <3 x double> [[B_NEG]], ptr [[B_LOC:%.*]], align 32
229 ; CHECK-NEXT: ret <9 x double> [[RES]]
233 %res = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> %a.neg, <3 x double> %b.neg, i32 9, i32 3, i32 1)
240 ; CHECK-LABEL: @fneg_with_multiple_uses(
241 ; CHECK-NEXT: [[A_NEG:%.*]] = fneg <15 x double> [[A:%.*]]
242 ; CHECK-NEXT: [[RES:%.*]] = tail call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> [[A_NEG]], <20 x double> [[B:%.*]], i32 3, i32 5, i32 4)
243 ; CHECK-NEXT: [[RES_2:%.*]] = shufflevector <15 x double> [[A_NEG]], <15 x double> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
244 ; CHECK-NEXT: [[RES_3:%.*]] = fadd <12 x double> [[RES_2]], [[RES]]
245 ; CHECK-NEXT: ret <12 x double> [[RES_3]]
248 %res = tail call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> %a.neg, <20 x double> %b, i32 3, i32 5, i32 4)
256 ; CHECK-LABEL: @fneg_with_multiple_uses_2(
257 ; CHECK-NEXT: [[A_NEG:%.*]] = fneg <15 x double> [[A:%.*]]
258 ; CHECK-NEXT: [[RES:%.*]] = tail call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> [[A_NEG]], <20 x double> [[B:%.*]], i32 3, i32 5, i32 4)
259 ; CHECK-NEXT: store <15 x double> [[A_NEG]], ptr [[A_LOC:%.*]], align 128
260 ; CHECK-NEXT: ret <12 x double> [[RES]]
263 %res = tail call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> %a.neg, <20 x double> %b, i32 3, i32 5, i32 4)
269 ; CHECK-LABEL: @chain_of_matrix_multiplies(
270 ; CHECK-NEXT: [[TMP1:%.*]] = fneg <3 x double> [[B:%.*]]
271 ; CHECK-NEXT: [[RES:%.*]] = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> [[A:%.*]], <3 x double> [[TMP1]], i32 9, i32 3, i32 1)
272 ; CHECK-NEXT: [[RES_2:%.*]] = tail call <72 x double> @llvm.matrix.multiply.v72f64.v9f64.v8f64(<9 x double> [[RES]], <8 x double> [[C:%.*]], i32 9, i32 1, i32 8)
273 ; CHECK-NEXT: ret <72 x double> [[RES_2]]
276 %res = tail call <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double> %a.neg, <3 x double> %b, i32 9, i32 3, i32 1)
277 %res.2 = tail call <72 x double> @llvm.matrix.multiply.v72f64.v9f64.v8f64(<9 x double> %res, <8 x double> %c, i32 9, i32 1, i32 8)
284 ; CHECK-LABEL: @chain_of_matrix_multiplies_with_two_negations(
285 ; CHECK-NEXT: [[TMP1:%.*]] = fneg <3 x double> [[A:%.*]]
286 ; CHECK-NEXT: [[RES:%.*]] = tail call <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double> [[TMP1]], <5 x double> [[B:%.*]], i32 3, i32 1, i32 5)
287 ; CHECK-NEXT: [[TMP2:%.*]] = call <6 x double> @llvm.matrix.multiply.v6f64.v15f64.v10f64(<15 x double> [[RES]], <10 x double> [[C:%.*]], i32 3, i32 5, i32 2)
288 ; CHECK-NEXT: [[RES_2:%.*]] = fneg <6 x double> [[TMP2]]
289 ; CHECK-NEXT: ret <6 x double> [[RES_2]]
292 %res = tail call <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double> %a, <5 x double> %b.neg, i32 3, i32 1, i32 5)
294 %res.2 = tail call <6 x double> @llvm.matrix.multiply.v6f64.v15f64.v10f64(<15 x double> %res.neg, <10 x double> %c, i32 3, i32 5, i32 2)
298 ; negation should be propagated to the result of the second matrix multiplication
300 ; CHECK-LABEL: @chain_of_matrix_multiplies_propagation(
301 ; CHECK-NEXT: [[TMP1:%.*]] = call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> [[A:%.*]], <20 x double> [[B:%.*]], i32 3, i32 5, i32 4)
302 ; CHECK-NEXT: [[TMP2:%.*]] = call <6 x double> @llvm.matrix.multiply.v6f64.v12f64.v8f64(<12 x double> [[TMP1]], <8 x double> [[C:%.*]], i32 3, i32 4, i32 2)
303 ; CHECK-NEXT: [[RES_2:%.*]] = fneg <6 x double> [[TMP2]]
304 ; CHECK-NEXT: ret <6 x double> [[RES_2]]
307 %res = tail call <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double> %a.neg, <20 x double> %b, i32 3, i32 5, i32 4)
308 %res.2 = tail call <6 x double> @llvm.matrix.multiply.v6f64.v12f64.v8f64(<12 x double> %res, <8 x double> %c, i32 3, i32 4, i32 2)
312 declare <2 x double> @llvm.matrix.multiply.v2f64.v6f64.v3f64(<6 x double>, <3 x double>, i32 immarg, i32 immarg, i32 immarg) #1
313 declare <4 x double> @llvm.matrix.multiply.v4f64.v2f64.v2f64(<2 x double>, <2 x double>, i32 immarg, i32 immarg, i32 immarg) #1
314 declare <2 x double> @llvm.matrix.multiply.v2f64.v3f64.v6f64(<3 x double>, <6 x double>, i32 immarg, i32 immarg, i32 immarg) #1
315 declare <9 x double> @llvm.matrix.multiply.v9f64.v27f64.v3f64(<27 x double>, <3 x double>, i32 immarg, i32 immarg, i32 immarg) #1
316 declare <9 x double> @llvm.matrix.multiply.v9f64.v3f64.v27f64(<3 x double>, <27 x double>, i32 immarg, i32 immarg, i32 immarg)
317 declare <15 x double> @llvm.matrix.multiply.v15f64.v3f64.v5f64(<3 x double>, <5 x double>, i32 immarg, i32 immarg, i32 immarg) #1
318 declare <15 x double> @llvm.matrix.multiply.v15f64.v5f64.v3f64(<5 x double>, <3 x double>, i32 immarg, i32 immarg, i32 immarg) #1
319 declare <72 x double> @llvm.matrix.multiply.v72f64.v9f64.v8f64(<9 x double>, <8 x double>, i32 immarg, i32 immarg, i32 immarg) #1
320 declare <12 x double> @llvm.matrix.multiply.v12f64.v15f64.v20f64(<15 x double>, <20 x double>, i32 immarg, i32 immarg, i32 immarg) #1
321 declare <21 x double> @llvm.matrix.multiply.v21f64.v15f64.v35f64(<15 x double>, <35 x double>, i32 immarg, i32 immarg, i32 immarg) #1
322 declare <6 x double> @llvm.matrix.multiply.v6f64.v15f64.v10f64(<15 x double>, <10 x double>, i32 immarg, i32 immarg, i32 immarg) #1
323 declare <6 x double> @llvm.matrix.multiply.v6f64.v12f64.v8f64(<12 x double>, <8 x double>, i32 immarg, i32 immarg, i32 immarg) #1