xref: /llvm-project/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp (revision f023da12d12635f5fba436e825cbfc999e28e623)
1 //===-- PPCIntrinsicCall.cpp ----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Helper routines for constructing the FIR dialect of MLIR for PowerPC
10 // intrinsics. Extensive use of MLIR interfaces and MLIR's coding style
11 // (https://mlir.llvm.org/getting_started/DeveloperGuide/) is used in this
12 // module.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "flang/Optimizer/Builder/PPCIntrinsicCall.h"
17 #include "flang/Evaluate/common.h"
18 #include "flang/Optimizer/Builder/FIRBuilder.h"
19 #include "flang/Optimizer/Builder/MutableBox.h"
20 #include "mlir/Dialect/Vector/IR/VectorOps.h"
21 
22 namespace fir {
23 
24 using PI = PPCIntrinsicLibrary;
25 
26 // PPC specific intrinsic handlers.
27 static constexpr IntrinsicHandler ppcHandlers[]{
28     {"__ppc_mma_assemble_acc",
29      static_cast<IntrinsicLibrary::SubroutineGenerator>(
30          &PI::genMmaIntr<MMAOp::AssembleAcc, MMAHandlerOp::SubToFunc>),
31      {{{"acc", asAddr},
32        {"arg1", asValue},
33        {"arg2", asValue},
34        {"arg3", asValue},
35        {"arg4", asValue}}},
36      /*isElemental=*/true},
37     {"__ppc_mma_assemble_pair",
38      static_cast<IntrinsicLibrary::SubroutineGenerator>(
39          &PI::genMmaIntr<MMAOp::AssemblePair, MMAHandlerOp::SubToFunc>),
40      {{{"pair", asAddr}, {"arg1", asValue}, {"arg2", asValue}}},
41      /*isElemental=*/true},
42     {"__ppc_mma_build_acc",
43      static_cast<IntrinsicLibrary::SubroutineGenerator>(
44          &PI::genMmaIntr<MMAOp::AssembleAcc,
45                          MMAHandlerOp::SubToFuncReverseArgOnLE>),
46      {{{"acc", asAddr},
47        {"arg1", asValue},
48        {"arg2", asValue},
49        {"arg3", asValue},
50        {"arg4", asValue}}},
51      /*isElemental=*/true},
52     {"__ppc_mma_disassemble_acc",
53      static_cast<IntrinsicLibrary::SubroutineGenerator>(
54          &PI::genMmaIntr<MMAOp::DisassembleAcc, MMAHandlerOp::SubToFunc>),
55      {{{"data", asAddr}, {"acc", asValue}}},
56      /*isElemental=*/true},
57     {"__ppc_mma_disassemble_pair",
58      static_cast<IntrinsicLibrary::SubroutineGenerator>(
59          &PI::genMmaIntr<MMAOp::DisassemblePair, MMAHandlerOp::SubToFunc>),
60      {{{"data", asAddr}, {"pair", asValue}}},
61      /*isElemental=*/true},
62     {"__ppc_mma_pmxvbf16ger2_",
63      static_cast<IntrinsicLibrary::SubroutineGenerator>(
64          &PI::genMmaIntr<MMAOp::Pmxvbf16ger2, MMAHandlerOp::SubToFunc>),
65      {{{"acc", asAddr},
66        {"a", asValue},
67        {"b", asValue},
68        {"xmask", asValue},
69        {"ymask", asValue},
70        {"pmask", asValue}}},
71      /*isElemental=*/true},
72     {"__ppc_mma_pmxvbf16ger2nn",
73      static_cast<IntrinsicLibrary::SubroutineGenerator>(
74          &PI::genMmaIntr<MMAOp::Pmxvbf16ger2nn,
75                          MMAHandlerOp::FirstArgIsResult>),
76      {{{"acc", asAddr},
77        {"a", asValue},
78        {"b", asValue},
79        {"xmask", asValue},
80        {"ymask", asValue},
81        {"pmask", asValue}}},
82      /*isElemental=*/true},
83     {"__ppc_mma_pmxvbf16ger2np",
84      static_cast<IntrinsicLibrary::SubroutineGenerator>(
85          &PI::genMmaIntr<MMAOp::Pmxvbf16ger2np,
86                          MMAHandlerOp::FirstArgIsResult>),
87      {{{"acc", asAddr},
88        {"a", asValue},
89        {"b", asValue},
90        {"xmask", asValue},
91        {"ymask", asValue},
92        {"pmask", asValue}}},
93      /*isElemental=*/true},
94     {"__ppc_mma_pmxvbf16ger2pn",
95      static_cast<IntrinsicLibrary::SubroutineGenerator>(
96          &PI::genMmaIntr<MMAOp::Pmxvbf16ger2pn,
97                          MMAHandlerOp::FirstArgIsResult>),
98      {{{"acc", asAddr},
99        {"a", asValue},
100        {"b", asValue},
101        {"xmask", asValue},
102        {"ymask", asValue},
103        {"pmask", asValue}}},
104      /*isElemental=*/true},
105     {"__ppc_mma_pmxvbf16ger2pp",
106      static_cast<IntrinsicLibrary::SubroutineGenerator>(
107          &PI::genMmaIntr<MMAOp::Pmxvbf16ger2pp,
108                          MMAHandlerOp::FirstArgIsResult>),
109      {{{"acc", asAddr},
110        {"a", asValue},
111        {"b", asValue},
112        {"xmask", asValue},
113        {"ymask", asValue},
114        {"pmask", asValue}}},
115      /*isElemental=*/true},
116     {"__ppc_mma_pmxvf16ger2_",
117      static_cast<IntrinsicLibrary::SubroutineGenerator>(
118          &PI::genMmaIntr<MMAOp::Pmxvf16ger2, MMAHandlerOp::SubToFunc>),
119      {{{"acc", asAddr},
120        {"a", asValue},
121        {"b", asValue},
122        {"xmask", asValue},
123        {"ymask", asValue},
124        {"pmask", asValue}}},
125      /*isElemental=*/true},
126     {"__ppc_mma_pmxvf16ger2nn",
127      static_cast<IntrinsicLibrary::SubroutineGenerator>(
128          &PI::genMmaIntr<MMAOp::Pmxvf16ger2nn, MMAHandlerOp::FirstArgIsResult>),
129      {{{"acc", asAddr},
130        {"a", asValue},
131        {"b", asValue},
132        {"xmask", asValue},
133        {"ymask", asValue},
134        {"pmask", asValue}}},
135      /*isElemental=*/true},
136     {"__ppc_mma_pmxvf16ger2np",
137      static_cast<IntrinsicLibrary::SubroutineGenerator>(
138          &PI::genMmaIntr<MMAOp::Pmxvf16ger2np, MMAHandlerOp::FirstArgIsResult>),
139      {{{"acc", asAddr},
140        {"a", asValue},
141        {"b", asValue},
142        {"xmask", asValue},
143        {"ymask", asValue},
144        {"pmask", asValue}}},
145      /*isElemental=*/true},
146     {"__ppc_mma_pmxvf16ger2pn",
147      static_cast<IntrinsicLibrary::SubroutineGenerator>(
148          &PI::genMmaIntr<MMAOp::Pmxvf16ger2pn, MMAHandlerOp::FirstArgIsResult>),
149      {{{"acc", asAddr},
150        {"a", asValue},
151        {"b", asValue},
152        {"xmask", asValue},
153        {"ymask", asValue},
154        {"pmask", asValue}}},
155      /*isElemental=*/true},
156     {"__ppc_mma_pmxvf16ger2pp",
157      static_cast<IntrinsicLibrary::SubroutineGenerator>(
158          &PI::genMmaIntr<MMAOp::Pmxvf16ger2pp, MMAHandlerOp::FirstArgIsResult>),
159      {{{"acc", asAddr},
160        {"a", asValue},
161        {"b", asValue},
162        {"xmask", asValue},
163        {"ymask", asValue},
164        {"pmask", asValue}}},
165      /*isElemental=*/true},
166     {"__ppc_mma_pmxvf32ger",
167      static_cast<IntrinsicLibrary::SubroutineGenerator>(
168          &PI::genMmaIntr<MMAOp::Pmxvf32ger, MMAHandlerOp::SubToFunc>),
169      {{{"acc", asAddr},
170        {"a", asValue},
171        {"b", asValue},
172        {"xmask", asValue},
173        {"ymask", asValue}}},
174      /*isElemental=*/true},
175     {"__ppc_mma_pmxvf32gernn",
176      static_cast<IntrinsicLibrary::SubroutineGenerator>(
177          &PI::genMmaIntr<MMAOp::Pmxvf32gernn, MMAHandlerOp::FirstArgIsResult>),
178      {{{"acc", asAddr},
179        {"a", asValue},
180        {"b", asValue},
181        {"xmask", asValue},
182        {"ymask", asValue}}},
183      /*isElemental=*/true},
184     {"__ppc_mma_pmxvf32gernp",
185      static_cast<IntrinsicLibrary::SubroutineGenerator>(
186          &PI::genMmaIntr<MMAOp::Pmxvf32gernp, MMAHandlerOp::FirstArgIsResult>),
187      {{{"acc", asAddr},
188        {"a", asValue},
189        {"b", asValue},
190        {"xmask", asValue},
191        {"ymask", asValue}}},
192      /*isElemental=*/true},
193     {"__ppc_mma_pmxvf32gerpn",
194      static_cast<IntrinsicLibrary::SubroutineGenerator>(
195          &PI::genMmaIntr<MMAOp::Pmxvf32gerpn, MMAHandlerOp::FirstArgIsResult>),
196      {{{"acc", asAddr},
197        {"a", asValue},
198        {"b", asValue},
199        {"xmask", asValue},
200        {"ymask", asValue}}},
201      /*isElemental=*/true},
202     {"__ppc_mma_pmxvf32gerpp",
203      static_cast<IntrinsicLibrary::SubroutineGenerator>(
204          &PI::genMmaIntr<MMAOp::Pmxvf32gerpp, MMAHandlerOp::FirstArgIsResult>),
205      {{{"acc", asAddr},
206        {"a", asValue},
207        {"b", asValue},
208        {"xmask", asValue},
209        {"ymask", asValue}}},
210      /*isElemental=*/true},
211     {"__ppc_mma_pmxvf64ger",
212      static_cast<IntrinsicLibrary::SubroutineGenerator>(
213          &PI::genMmaIntr<MMAOp::Pmxvf64ger, MMAHandlerOp::SubToFunc>),
214      {{{"acc", asAddr},
215        {"a", asValue},
216        {"b", asValue},
217        {"xmask", asValue},
218        {"ymask", asValue}}},
219      /*isElemental=*/true},
220     {"__ppc_mma_pmxvf64gernn",
221      static_cast<IntrinsicLibrary::SubroutineGenerator>(
222          &PI::genMmaIntr<MMAOp::Pmxvf64gernn, MMAHandlerOp::FirstArgIsResult>),
223      {{{"acc", asAddr},
224        {"a", asValue},
225        {"b", asValue},
226        {"xmask", asValue},
227        {"ymask", asValue}}},
228      /*isElemental=*/true},
229     {"__ppc_mma_pmxvf64gernp",
230      static_cast<IntrinsicLibrary::SubroutineGenerator>(
231          &PI::genMmaIntr<MMAOp::Pmxvf64gernp, MMAHandlerOp::FirstArgIsResult>),
232      {{{"acc", asAddr},
233        {"a", asValue},
234        {"b", asValue},
235        {"xmask", asValue},
236        {"ymask", asValue}}},
237      /*isElemental=*/true},
238     {"__ppc_mma_pmxvf64gerpn",
239      static_cast<IntrinsicLibrary::SubroutineGenerator>(
240          &PI::genMmaIntr<MMAOp::Pmxvf64gerpn, MMAHandlerOp::FirstArgIsResult>),
241      {{{"acc", asAddr},
242        {"a", asValue},
243        {"b", asValue},
244        {"xmask", asValue},
245        {"ymask", asValue}}},
246      /*isElemental=*/true},
247     {"__ppc_mma_pmxvf64gerpp",
248      static_cast<IntrinsicLibrary::SubroutineGenerator>(
249          &PI::genMmaIntr<MMAOp::Pmxvf64gerpp, MMAHandlerOp::FirstArgIsResult>),
250      {{{"acc", asAddr},
251        {"a", asValue},
252        {"b", asValue},
253        {"xmask", asValue},
254        {"ymask", asValue}}},
255      /*isElemental=*/true},
256     {"__ppc_mma_pmxvi16ger2_",
257      static_cast<IntrinsicLibrary::SubroutineGenerator>(
258          &PI::genMmaIntr<MMAOp::Pmxvi16ger2, MMAHandlerOp::SubToFunc>),
259      {{{"acc", asAddr},
260        {"a", asValue},
261        {"b", asValue},
262        {"xmask", asValue},
263        {"ymask", asValue},
264        {"pmask", asValue}}},
265      /*isElemental=*/true},
266     {"__ppc_mma_pmxvi16ger2pp",
267      static_cast<IntrinsicLibrary::SubroutineGenerator>(
268          &PI::genMmaIntr<MMAOp::Pmxvi16ger2pp, MMAHandlerOp::FirstArgIsResult>),
269      {{{"acc", asAddr},
270        {"a", asValue},
271        {"b", asValue},
272        {"xmask", asValue},
273        {"ymask", asValue},
274        {"pmask", asValue}}},
275      /*isElemental=*/true},
276     {"__ppc_mma_pmxvi16ger2s",
277      static_cast<IntrinsicLibrary::SubroutineGenerator>(
278          &PI::genMmaIntr<MMAOp::Pmxvi16ger2s, MMAHandlerOp::SubToFunc>),
279      {{{"acc", asAddr},
280        {"a", asValue},
281        {"b", asValue},
282        {"xmask", asValue},
283        {"ymask", asValue},
284        {"pmask", asValue}}},
285      /*isElemental=*/true},
286     {"__ppc_mma_pmxvi16ger2spp",
287      static_cast<IntrinsicLibrary::SubroutineGenerator>(
288          &PI::genMmaIntr<MMAOp::Pmxvi16ger2spp,
289                          MMAHandlerOp::FirstArgIsResult>),
290      {{{"acc", asAddr},
291        {"a", asValue},
292        {"b", asValue},
293        {"xmask", asValue},
294        {"ymask", asValue},
295        {"pmask", asValue}}},
296      /*isElemental=*/true},
297     {"__ppc_mma_pmxvi4ger8_",
298      static_cast<IntrinsicLibrary::SubroutineGenerator>(
299          &PI::genMmaIntr<MMAOp::Pmxvi4ger8, MMAHandlerOp::SubToFunc>),
300      {{{"acc", asAddr},
301        {"a", asValue},
302        {"b", asValue},
303        {"xmask", asValue},
304        {"ymask", asValue},
305        {"pmask", asValue}}},
306      /*isElemental=*/true},
307     {"__ppc_mma_pmxvi4ger8pp",
308      static_cast<IntrinsicLibrary::SubroutineGenerator>(
309          &PI::genMmaIntr<MMAOp::Pmxvi4ger8pp, MMAHandlerOp::FirstArgIsResult>),
310      {{{"acc", asAddr},
311        {"a", asValue},
312        {"b", asValue},
313        {"xmask", asValue},
314        {"ymask", asValue},
315        {"pmask", asValue}}},
316      /*isElemental=*/true},
317     {"__ppc_mma_pmxvi8ger4_",
318      static_cast<IntrinsicLibrary::SubroutineGenerator>(
319          &PI::genMmaIntr<MMAOp::Pmxvi8ger4, MMAHandlerOp::SubToFunc>),
320      {{{"acc", asAddr},
321        {"a", asValue},
322        {"b", asValue},
323        {"xmask", asValue},
324        {"ymask", asValue},
325        {"pmask", asValue}}},
326      /*isElemental=*/true},
327     {"__ppc_mma_pmxvi8ger4pp",
328      static_cast<IntrinsicLibrary::SubroutineGenerator>(
329          &PI::genMmaIntr<MMAOp::Pmxvi8ger4pp, MMAHandlerOp::FirstArgIsResult>),
330      {{{"acc", asAddr},
331        {"a", asValue},
332        {"b", asValue},
333        {"xmask", asValue},
334        {"ymask", asValue},
335        {"pmask", asValue}}},
336      /*isElemental=*/true},
337     {"__ppc_mma_pmxvi8ger4spp",
338      static_cast<IntrinsicLibrary::SubroutineGenerator>(
339          &PI::genMmaIntr<MMAOp::Pmxvi8ger4spp, MMAHandlerOp::FirstArgIsResult>),
340      {{{"acc", asAddr},
341        {"a", asValue},
342        {"b", asValue},
343        {"xmask", asValue},
344        {"ymask", asValue},
345        {"pmask", asValue}}},
346      /*isElemental=*/true},
347     {"__ppc_mma_xvbf16ger2_",
348      static_cast<IntrinsicLibrary::SubroutineGenerator>(
349          &PI::genMmaIntr<MMAOp::Xvbf16ger2, MMAHandlerOp::SubToFunc>),
350      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
351      /*isElemental=*/true},
352     {"__ppc_mma_xvbf16ger2nn",
353      static_cast<IntrinsicLibrary::SubroutineGenerator>(
354          &PI::genMmaIntr<MMAOp::Xvbf16ger2nn, MMAHandlerOp::FirstArgIsResult>),
355      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
356      /*isElemental=*/true},
357     {"__ppc_mma_xvbf16ger2np",
358      static_cast<IntrinsicLibrary::SubroutineGenerator>(
359          &PI::genMmaIntr<MMAOp::Xvbf16ger2np, MMAHandlerOp::FirstArgIsResult>),
360      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
361      /*isElemental=*/true},
362     {"__ppc_mma_xvbf16ger2pn",
363      static_cast<IntrinsicLibrary::SubroutineGenerator>(
364          &PI::genMmaIntr<MMAOp::Xvbf16ger2pn, MMAHandlerOp::FirstArgIsResult>),
365      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
366      /*isElemental=*/true},
367     {"__ppc_mma_xvbf16ger2pp",
368      static_cast<IntrinsicLibrary::SubroutineGenerator>(
369          &PI::genMmaIntr<MMAOp::Xvbf16ger2pp, MMAHandlerOp::FirstArgIsResult>),
370      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
371      /*isElemental=*/true},
372     {"__ppc_mma_xvf16ger2_",
373      static_cast<IntrinsicLibrary::SubroutineGenerator>(
374          &PI::genMmaIntr<MMAOp::Xvf16ger2, MMAHandlerOp::SubToFunc>),
375      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
376      /*isElemental=*/true},
377     {"__ppc_mma_xvf16ger2nn",
378      static_cast<IntrinsicLibrary::SubroutineGenerator>(
379          &PI::genMmaIntr<MMAOp::Xvf16ger2nn, MMAHandlerOp::FirstArgIsResult>),
380      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
381      /*isElemental=*/true},
382     {"__ppc_mma_xvf16ger2np",
383      static_cast<IntrinsicLibrary::SubroutineGenerator>(
384          &PI::genMmaIntr<MMAOp::Xvf16ger2np, MMAHandlerOp::FirstArgIsResult>),
385      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
386      /*isElemental=*/true},
387     {"__ppc_mma_xvf16ger2pn",
388      static_cast<IntrinsicLibrary::SubroutineGenerator>(
389          &PI::genMmaIntr<MMAOp::Xvf16ger2pn, MMAHandlerOp::FirstArgIsResult>),
390      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
391      /*isElemental=*/true},
392     {"__ppc_mma_xvf16ger2pp",
393      static_cast<IntrinsicLibrary::SubroutineGenerator>(
394          &PI::genMmaIntr<MMAOp::Xvf16ger2pp, MMAHandlerOp::FirstArgIsResult>),
395      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
396      /*isElemental=*/true},
397     {"__ppc_mma_xvf32ger",
398      static_cast<IntrinsicLibrary::SubroutineGenerator>(
399          &PI::genMmaIntr<MMAOp::Xvf32ger, MMAHandlerOp::SubToFunc>),
400      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
401      /*isElemental=*/true},
402     {"__ppc_mma_xvf32gernn",
403      static_cast<IntrinsicLibrary::SubroutineGenerator>(
404          &PI::genMmaIntr<MMAOp::Xvf32gernn, MMAHandlerOp::FirstArgIsResult>),
405      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
406      /*isElemental=*/true},
407     {"__ppc_mma_xvf32gernp",
408      static_cast<IntrinsicLibrary::SubroutineGenerator>(
409          &PI::genMmaIntr<MMAOp::Xvf32gernp, MMAHandlerOp::FirstArgIsResult>),
410      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
411      /*isElemental=*/true},
412     {"__ppc_mma_xvf32gerpn",
413      static_cast<IntrinsicLibrary::SubroutineGenerator>(
414          &PI::genMmaIntr<MMAOp::Xvf32gerpn, MMAHandlerOp::FirstArgIsResult>),
415      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
416      /*isElemental=*/true},
417     {"__ppc_mma_xvf32gerpp",
418      static_cast<IntrinsicLibrary::SubroutineGenerator>(
419          &PI::genMmaIntr<MMAOp::Xvf32gerpp, MMAHandlerOp::FirstArgIsResult>),
420      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
421      /*isElemental=*/true},
422     {"__ppc_mma_xvf64ger",
423      static_cast<IntrinsicLibrary::SubroutineGenerator>(
424          &PI::genMmaIntr<MMAOp::Xvf64ger, MMAHandlerOp::SubToFunc>),
425      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
426      /*isElemental=*/true},
427     {"__ppc_mma_xvf64gernn",
428      static_cast<IntrinsicLibrary::SubroutineGenerator>(
429          &PI::genMmaIntr<MMAOp::Xvf64gernn, MMAHandlerOp::FirstArgIsResult>),
430      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
431      /*isElemental=*/true},
432     {"__ppc_mma_xvf64gernp",
433      static_cast<IntrinsicLibrary::SubroutineGenerator>(
434          &PI::genMmaIntr<MMAOp::Xvf64gernp, MMAHandlerOp::FirstArgIsResult>),
435      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
436      /*isElemental=*/true},
437     {"__ppc_mma_xvf64gerpn",
438      static_cast<IntrinsicLibrary::SubroutineGenerator>(
439          &PI::genMmaIntr<MMAOp::Xvf64gerpn, MMAHandlerOp::FirstArgIsResult>),
440      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
441      /*isElemental=*/true},
442     {"__ppc_mma_xvf64gerpp",
443      static_cast<IntrinsicLibrary::SubroutineGenerator>(
444          &PI::genMmaIntr<MMAOp::Xvf64gerpp, MMAHandlerOp::FirstArgIsResult>),
445      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
446      /*isElemental=*/true},
447     {"__ppc_mma_xvi16ger2_",
448      static_cast<IntrinsicLibrary::SubroutineGenerator>(
449          &PI::genMmaIntr<MMAOp::Xvi16ger2, MMAHandlerOp::SubToFunc>),
450      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
451      /*isElemental=*/true},
452     {"__ppc_mma_xvi16ger2pp",
453      static_cast<IntrinsicLibrary::SubroutineGenerator>(
454          &PI::genMmaIntr<MMAOp::Xvi16ger2pp, MMAHandlerOp::FirstArgIsResult>),
455      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
456      /*isElemental=*/true},
457     {"__ppc_mma_xvi16ger2s",
458      static_cast<IntrinsicLibrary::SubroutineGenerator>(
459          &PI::genMmaIntr<MMAOp::Xvi16ger2s, MMAHandlerOp::SubToFunc>),
460      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
461      /*isElemental=*/true},
462     {"__ppc_mma_xvi16ger2spp",
463      static_cast<IntrinsicLibrary::SubroutineGenerator>(
464          &PI::genMmaIntr<MMAOp::Xvi16ger2spp, MMAHandlerOp::FirstArgIsResult>),
465      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
466      /*isElemental=*/true},
467     {"__ppc_mma_xvi4ger8_",
468      static_cast<IntrinsicLibrary::SubroutineGenerator>(
469          &PI::genMmaIntr<MMAOp::Xvi4ger8, MMAHandlerOp::SubToFunc>),
470      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
471      /*isElemental=*/true},
472     {"__ppc_mma_xvi4ger8pp",
473      static_cast<IntrinsicLibrary::SubroutineGenerator>(
474          &PI::genMmaIntr<MMAOp::Xvi4ger8pp, MMAHandlerOp::FirstArgIsResult>),
475      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
476      /*isElemental=*/true},
477     {"__ppc_mma_xvi8ger4_",
478      static_cast<IntrinsicLibrary::SubroutineGenerator>(
479          &PI::genMmaIntr<MMAOp::Xvi8ger4, MMAHandlerOp::SubToFunc>),
480      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
481      /*isElemental=*/true},
482     {"__ppc_mma_xvi8ger4pp",
483      static_cast<IntrinsicLibrary::SubroutineGenerator>(
484          &PI::genMmaIntr<MMAOp::Xvi8ger4pp, MMAHandlerOp::FirstArgIsResult>),
485      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
486      /*isElemental=*/true},
487     {"__ppc_mma_xvi8ger4spp",
488      static_cast<IntrinsicLibrary::SubroutineGenerator>(
489          &PI::genMmaIntr<MMAOp::Xvi8ger4spp, MMAHandlerOp::FirstArgIsResult>),
490      {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}},
491      /*isElemental=*/true},
492     {"__ppc_mma_xxmfacc",
493      static_cast<IntrinsicLibrary::SubroutineGenerator>(
494          &PI::genMmaIntr<MMAOp::Xxmfacc, MMAHandlerOp::FirstArgIsResult>),
495      {{{"acc", asAddr}}},
496      /*isElemental=*/true},
497     {"__ppc_mma_xxmtacc",
498      static_cast<IntrinsicLibrary::SubroutineGenerator>(
499          &PI::genMmaIntr<MMAOp::Xxmtacc, MMAHandlerOp::FirstArgIsResult>),
500      {{{"acc", asAddr}}},
501      /*isElemental=*/true},
502     {"__ppc_mma_xxsetaccz",
503      static_cast<IntrinsicLibrary::SubroutineGenerator>(
504          &PI::genMmaIntr<MMAOp::Xxsetaccz, MMAHandlerOp::SubToFunc>),
505      {{{"acc", asAddr}}},
506      /*isElemental=*/true},
507     {"__ppc_mtfsf",
508      static_cast<IntrinsicLibrary::SubroutineGenerator>(&PI::genMtfsf<false>),
509      {{{"mask", asValue}, {"r", asValue}}},
510      /*isElemental=*/false},
511     {"__ppc_mtfsfi",
512      static_cast<IntrinsicLibrary::SubroutineGenerator>(&PI::genMtfsf<true>),
513      {{{"bf", asValue}, {"i", asValue}}},
514      /*isElemental=*/false},
515     {"__ppc_vec_abs",
516      static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecAbs),
517      {{{"arg1", asValue}}},
518      /*isElemental=*/true},
519     {"__ppc_vec_add",
520      static_cast<IntrinsicLibrary::ExtendedGenerator>(
521          &PI::genVecAddAndMulSubXor<VecOp::Add>),
522      {{{"arg1", asValue}, {"arg2", asValue}}},
523      /*isElemental=*/true},
524     {"__ppc_vec_and",
525      static_cast<IntrinsicLibrary::ExtendedGenerator>(
526          &PI::genVecAddAndMulSubXor<VecOp::And>),
527      {{{"arg1", asValue}, {"arg2", asValue}}},
528      /*isElemental=*/true},
529     {"__ppc_vec_any_ge",
530      static_cast<IntrinsicLibrary::ExtendedGenerator>(
531          &PI::genVecAnyCompare<VecOp::Anyge>),
532      {{{"arg1", asValue}, {"arg2", asValue}}},
533      /*isElemental=*/true},
534     {"__ppc_vec_cmpge",
535      static_cast<IntrinsicLibrary::ExtendedGenerator>(
536          &PI::genVecCmp<VecOp::Cmpge>),
537      {{{"arg1", asValue}, {"arg2", asValue}}},
538      /*isElemental=*/true},
539     {"__ppc_vec_cmpgt",
540      static_cast<IntrinsicLibrary::ExtendedGenerator>(
541          &PI::genVecCmp<VecOp::Cmpgt>),
542      {{{"arg1", asValue}, {"arg2", asValue}}},
543      /*isElemental=*/true},
544     {"__ppc_vec_cmple",
545      static_cast<IntrinsicLibrary::ExtendedGenerator>(
546          &PI::genVecCmp<VecOp::Cmple>),
547      {{{"arg1", asValue}, {"arg2", asValue}}},
548      /*isElemental=*/true},
549     {"__ppc_vec_cmplt",
550      static_cast<IntrinsicLibrary::ExtendedGenerator>(
551          &PI::genVecCmp<VecOp::Cmplt>),
552      {{{"arg1", asValue}, {"arg2", asValue}}},
553      /*isElemental=*/true},
554     {"__ppc_vec_convert",
555      static_cast<IntrinsicLibrary::ExtendedGenerator>(
556          &PI::genVecConvert<VecOp::Convert>),
557      {{{"v", asValue}, {"mold", asValue}}},
558      /*isElemental=*/false},
559     {"__ppc_vec_ctf",
560      static_cast<IntrinsicLibrary::ExtendedGenerator>(
561          &PI::genVecConvert<VecOp::Ctf>),
562      {{{"arg1", asValue}, {"arg2", asValue}}},
563      /*isElemental=*/true},
564     {"__ppc_vec_cvf",
565      static_cast<IntrinsicLibrary::ExtendedGenerator>(
566          &PI::genVecConvert<VecOp::Cvf>),
567      {{{"arg1", asValue}}},
568      /*isElemental=*/true},
569     {"__ppc_vec_extract",
570      static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecExtract),
571      {{{"arg1", asValue}, {"arg2", asValue}}},
572      /*isElemental=*/true},
573     {"__ppc_vec_insert",
574      static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecInsert),
575      {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
576      /*isElemental=*/true},
577     {"__ppc_vec_ld",
578      static_cast<IntrinsicLibrary::ExtendedGenerator>(
579          &PI::genVecLdCallGrp<VecOp::Ld>),
580      {{{"arg1", asValue}, {"arg2", asAddr}}},
581      /*isElemental=*/false},
582     {"__ppc_vec_lde",
583      static_cast<IntrinsicLibrary::ExtendedGenerator>(
584          &PI::genVecLdCallGrp<VecOp::Lde>),
585      {{{"arg1", asValue}, {"arg2", asAddr}}},
586      /*isElemental=*/false},
587     {"__ppc_vec_ldl",
588      static_cast<IntrinsicLibrary::ExtendedGenerator>(
589          &PI::genVecLdCallGrp<VecOp::Ldl>),
590      {{{"arg1", asValue}, {"arg2", asAddr}}},
591      /*isElemental=*/false},
592     {"__ppc_vec_lvsl",
593      static_cast<IntrinsicLibrary::ExtendedGenerator>(
594          &PI::genVecLvsGrp<VecOp::Lvsl>),
595      {{{"arg1", asValue}, {"arg2", asAddr}}},
596      /*isElemental=*/false},
597     {"__ppc_vec_lvsr",
598      static_cast<IntrinsicLibrary::ExtendedGenerator>(
599          &PI::genVecLvsGrp<VecOp::Lvsr>),
600      {{{"arg1", asValue}, {"arg2", asAddr}}},
601      /*isElemental=*/false},
602     {"__ppc_vec_lxv",
603      static_cast<IntrinsicLibrary::ExtendedGenerator>(
604          &PI::genVecLdNoCallGrp<VecOp::Lxv>),
605      {{{"arg1", asValue}, {"arg2", asAddr}}},
606      /*isElemental=*/false},
607     {"__ppc_vec_lxvp",
608      static_cast<IntrinsicLibrary::ExtendedGenerator>(
609          &PI::genVecLdCallGrp<VecOp::Lxvp>),
610      {{{"arg1", asValue}, {"arg2", asAddr}}},
611      /*isElemental=*/false},
612     {"__ppc_vec_mergeh",
613      static_cast<IntrinsicLibrary::ExtendedGenerator>(
614          &PI::genVecMerge<VecOp::Mergeh>),
615      {{{"arg1", asValue}, {"arg2", asValue}}},
616      /*isElemental=*/true},
617     {"__ppc_vec_mergel",
618      static_cast<IntrinsicLibrary::ExtendedGenerator>(
619          &PI::genVecMerge<VecOp::Mergel>),
620      {{{"arg1", asValue}, {"arg2", asValue}}},
621      /*isElemental=*/true},
622     {"__ppc_vec_msub",
623      static_cast<IntrinsicLibrary::ExtendedGenerator>(
624          &PI::genVecNmaddMsub<VecOp::Msub>),
625      {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
626      /*isElemental=*/true},
627     {"__ppc_vec_mul",
628      static_cast<IntrinsicLibrary::ExtendedGenerator>(
629          &PI::genVecAddAndMulSubXor<VecOp::Mul>),
630      {{{"arg1", asValue}, {"arg2", asValue}}},
631      /*isElemental=*/true},
632     {"__ppc_vec_nmadd",
633      static_cast<IntrinsicLibrary::ExtendedGenerator>(
634          &PI::genVecNmaddMsub<VecOp::Nmadd>),
635      {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
636      /*isElemental=*/true},
637     {"__ppc_vec_perm",
638      static_cast<IntrinsicLibrary::ExtendedGenerator>(
639          &PI::genVecPerm<VecOp::Perm>),
640      {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
641      /*isElemental=*/true},
642     {"__ppc_vec_permi",
643      static_cast<IntrinsicLibrary::ExtendedGenerator>(
644          &PI::genVecPerm<VecOp::Permi>),
645      {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
646      /*isElemental=*/true},
647     {"__ppc_vec_sel",
648      static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecSel),
649      {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
650      /*isElemental=*/true},
651     {"__ppc_vec_sl",
652      static_cast<IntrinsicLibrary::ExtendedGenerator>(
653          &PI::genVecShift<VecOp::Sl>),
654      {{{"arg1", asValue}, {"arg2", asValue}}},
655      /*isElemental=*/true},
656     {"__ppc_vec_sld",
657      static_cast<IntrinsicLibrary::ExtendedGenerator>(
658          &PI::genVecShift<VecOp::Sld>),
659      {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
660      /*isElemental=*/true},
661     {"__ppc_vec_sldw",
662      static_cast<IntrinsicLibrary::ExtendedGenerator>(
663          &PI::genVecShift<VecOp::Sldw>),
664      {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
665      /*isElemental=*/true},
666     {"__ppc_vec_sll",
667      static_cast<IntrinsicLibrary::ExtendedGenerator>(
668          &PI::genVecShift<VecOp::Sll>),
669      {{{"arg1", asValue}, {"arg2", asValue}}},
670      /*isElemental=*/true},
671     {"__ppc_vec_slo",
672      static_cast<IntrinsicLibrary::ExtendedGenerator>(
673          &PI::genVecShift<VecOp::Slo>),
674      {{{"arg1", asValue}, {"arg2", asValue}}},
675      /*isElemental=*/true},
676     {"__ppc_vec_splat",
677      static_cast<IntrinsicLibrary::ExtendedGenerator>(
678          &PI::genVecSplat<VecOp::Splat>),
679      {{{"arg1", asValue}, {"arg2", asValue}}},
680      /*isElemental=*/true},
681     {"__ppc_vec_splat_s32_",
682      static_cast<IntrinsicLibrary::ExtendedGenerator>(
683          &PI::genVecSplat<VecOp::Splat_s32>),
684      {{{"arg1", asValue}}},
685      /*isElemental=*/true},
686     {"__ppc_vec_splats",
687      static_cast<IntrinsicLibrary::ExtendedGenerator>(
688          &PI::genVecSplat<VecOp::Splats>),
689      {{{"arg1", asValue}}},
690      /*isElemental=*/true},
691     {"__ppc_vec_sr",
692      static_cast<IntrinsicLibrary::ExtendedGenerator>(
693          &PI::genVecShift<VecOp::Sr>),
694      {{{"arg1", asValue}, {"arg2", asValue}}},
695      /*isElemental=*/true},
696     {"__ppc_vec_srl",
697      static_cast<IntrinsicLibrary::ExtendedGenerator>(
698          &PI::genVecShift<VecOp::Srl>),
699      {{{"arg1", asValue}, {"arg2", asValue}}},
700      /*isElemental=*/true},
701     {"__ppc_vec_sro",
702      static_cast<IntrinsicLibrary::ExtendedGenerator>(
703          &PI::genVecShift<VecOp::Sro>),
704      {{{"arg1", asValue}, {"arg2", asValue}}},
705      /*isElemental=*/true},
706     {"__ppc_vec_st",
707      static_cast<IntrinsicLibrary::SubroutineGenerator>(
708          &PI::genVecStore<VecOp::St>),
709      {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
710      /*isElemental=*/false},
711     {"__ppc_vec_ste",
712      static_cast<IntrinsicLibrary::SubroutineGenerator>(
713          &PI::genVecStore<VecOp::Ste>),
714      {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
715      /*isElemental=*/false},
716     {"__ppc_vec_stxv",
717      static_cast<IntrinsicLibrary::SubroutineGenerator>(
718          &PI::genVecXStore<VecOp::Stxv>),
719      {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
720      /*isElemental=*/false},
721     {"__ppc_vec_stxvp",
722      static_cast<IntrinsicLibrary::SubroutineGenerator>(
723          &PI::genVecStore<VecOp::Stxvp>),
724      {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
725      /*isElemental=*/false},
726     {"__ppc_vec_sub",
727      static_cast<IntrinsicLibrary::ExtendedGenerator>(
728          &PI::genVecAddAndMulSubXor<VecOp::Sub>),
729      {{{"arg1", asValue}, {"arg2", asValue}}},
730      /*isElemental=*/true},
731     {"__ppc_vec_xl",
732      static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecXlGrp),
733      {{{"arg1", asValue}, {"arg2", asAddr}}},
734      /*isElemental=*/false},
735     {"__ppc_vec_xl_be",
736      static_cast<IntrinsicLibrary::ExtendedGenerator>(
737          &PI::genVecLdNoCallGrp<VecOp::Xlbe>),
738      {{{"arg1", asValue}, {"arg2", asAddr}}},
739      /*isElemental=*/false},
740     {"__ppc_vec_xld2_",
741      static_cast<IntrinsicLibrary::ExtendedGenerator>(
742          &PI::genVecLdCallGrp<VecOp::Xld2>),
743      {{{"arg1", asValue}, {"arg2", asAddr}}},
744      /*isElemental=*/false},
745     {"__ppc_vec_xlds",
746      static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecXlds),
747      {{{"arg1", asValue}, {"arg2", asAddr}}},
748      /*isElemental=*/false},
749     {"__ppc_vec_xlw4_",
750      static_cast<IntrinsicLibrary::ExtendedGenerator>(
751          &PI::genVecLdCallGrp<VecOp::Xlw4>),
752      {{{"arg1", asValue}, {"arg2", asAddr}}},
753      /*isElemental=*/false},
754     {"__ppc_vec_xor",
755      static_cast<IntrinsicLibrary::ExtendedGenerator>(
756          &PI::genVecAddAndMulSubXor<VecOp::Xor>),
757      {{{"arg1", asValue}, {"arg2", asValue}}},
758      /*isElemental=*/true},
759     {"__ppc_vec_xst",
760      static_cast<IntrinsicLibrary::SubroutineGenerator>(
761          &PI::genVecXStore<VecOp::Xst>),
762      {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
763      /*isElemental=*/false},
764     {"__ppc_vec_xst_be",
765      static_cast<IntrinsicLibrary::SubroutineGenerator>(
766          &PI::genVecXStore<VecOp::Xst_be>),
767      {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
768      /*isElemental=*/false},
769     {"__ppc_vec_xstd2_",
770      static_cast<IntrinsicLibrary::SubroutineGenerator>(
771          &PI::genVecXStore<VecOp::Xstd2>),
772      {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
773      /*isElemental=*/false},
774     {"__ppc_vec_xstw4_",
775      static_cast<IntrinsicLibrary::SubroutineGenerator>(
776          &PI::genVecXStore<VecOp::Xstw4>),
777      {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
778      /*isElemental=*/false},
779 };
780 
781 static constexpr MathOperation ppcMathOperations[] = {
782     // fcfi is just another name for fcfid, there is no llvm.ppc.fcfi.
783     {"__ppc_fcfi", "llvm.ppc.fcfid", genFuncType<Ty::Real<8>, Ty::Real<8>>,
784      genLibCall},
785     {"__ppc_fcfid", "llvm.ppc.fcfid", genFuncType<Ty::Real<8>, Ty::Real<8>>,
786      genLibCall},
787     {"__ppc_fcfud", "llvm.ppc.fcfud", genFuncType<Ty::Real<8>, Ty::Real<8>>,
788      genLibCall},
789     {"__ppc_fctid", "llvm.ppc.fctid", genFuncType<Ty::Real<8>, Ty::Real<8>>,
790      genLibCall},
791     {"__ppc_fctidz", "llvm.ppc.fctidz", genFuncType<Ty::Real<8>, Ty::Real<8>>,
792      genLibCall},
793     {"__ppc_fctiw", "llvm.ppc.fctiw", genFuncType<Ty::Real<8>, Ty::Real<8>>,
794      genLibCall},
795     {"__ppc_fctiwz", "llvm.ppc.fctiwz", genFuncType<Ty::Real<8>, Ty::Real<8>>,
796      genLibCall},
797     {"__ppc_fctudz", "llvm.ppc.fctudz", genFuncType<Ty::Real<8>, Ty::Real<8>>,
798      genLibCall},
799     {"__ppc_fctuwz", "llvm.ppc.fctuwz", genFuncType<Ty::Real<8>, Ty::Real<8>>,
800      genLibCall},
801     {"__ppc_fmadd", "llvm.fma.f32",
802      genFuncType<Ty::Real<4>, Ty::Real<4>, Ty::Real<4>, Ty::Real<4>>,
803      genMathOp<mlir::math::FmaOp>},
804     {"__ppc_fmadd", "llvm.fma.f64",
805      genFuncType<Ty::Real<8>, Ty::Real<8>, Ty::Real<8>, Ty::Real<8>>,
806      genMathOp<mlir::math::FmaOp>},
807     {"__ppc_fmsub", "llvm.ppc.fmsubs",
808      genFuncType<Ty::Real<4>, Ty::Real<4>, Ty::Real<4>, Ty::Real<4>>,
809      genLibCall},
810     {"__ppc_fmsub", "llvm.ppc.fmsub",
811      genFuncType<Ty::Real<8>, Ty::Real<8>, Ty::Real<8>, Ty::Real<8>>,
812      genLibCall},
813     {"__ppc_fnabs", "llvm.ppc.fnabss", genFuncType<Ty::Real<4>, Ty::Real<4>>,
814      genLibCall},
815     {"__ppc_fnabs", "llvm.ppc.fnabs", genFuncType<Ty::Real<8>, Ty::Real<8>>,
816      genLibCall},
817     {"__ppc_fnmadd", "llvm.ppc.fnmadds",
818      genFuncType<Ty::Real<4>, Ty::Real<4>, Ty::Real<4>, Ty::Real<4>>,
819      genLibCall},
820     {"__ppc_fnmadd", "llvm.ppc.fnmadd",
821      genFuncType<Ty::Real<8>, Ty::Real<8>, Ty::Real<8>, Ty::Real<8>>,
822      genLibCall},
823     {"__ppc_fnmsub", "llvm.ppc.fnmsub.f32",
824      genFuncType<Ty::Real<4>, Ty::Real<4>, Ty::Real<4>, Ty::Real<4>>,
825      genLibCall},
826     {"__ppc_fnmsub", "llvm.ppc.fnmsub.f64",
827      genFuncType<Ty::Real<8>, Ty::Real<8>, Ty::Real<8>, Ty::Real<8>>,
828      genLibCall},
829     {"__ppc_fre", "llvm.ppc.fre", genFuncType<Ty::Real<8>, Ty::Real<8>>,
830      genLibCall},
831     {"__ppc_fres", "llvm.ppc.fres", genFuncType<Ty::Real<4>, Ty::Real<4>>,
832      genLibCall},
833     {"__ppc_frsqrte", "llvm.ppc.frsqrte", genFuncType<Ty::Real<8>, Ty::Real<8>>,
834      genLibCall},
835     {"__ppc_frsqrtes", "llvm.ppc.frsqrtes",
836      genFuncType<Ty::Real<4>, Ty::Real<4>>, genLibCall},
837     {"__ppc_vec_cvbf16spn", "llvm.ppc.vsx.xvcvbf16spn",
838      genFuncType<Ty::UnsignedVector<1>, Ty::UnsignedVector<1>>, genLibCall},
839     {"__ppc_vec_cvspbf16_", "llvm.ppc.vsx.xvcvspbf16",
840      genFuncType<Ty::UnsignedVector<1>, Ty::UnsignedVector<1>>, genLibCall},
841     {"__ppc_vec_madd", "llvm.fma.v4f32",
842      genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>,
843                  Ty::RealVector<4>>,
844      genLibCall},
845     {"__ppc_vec_madd", "llvm.fma.v2f64",
846      genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>,
847                  Ty::RealVector<8>>,
848      genLibCall},
849     {"__ppc_vec_max", "llvm.ppc.altivec.vmaxsb",
850      genFuncType<Ty::IntegerVector<1>, Ty::IntegerVector<1>,
851                  Ty::IntegerVector<1>>,
852      genLibCall},
853     {"__ppc_vec_max", "llvm.ppc.altivec.vmaxsh",
854      genFuncType<Ty::IntegerVector<2>, Ty::IntegerVector<2>,
855                  Ty::IntegerVector<2>>,
856      genLibCall},
857     {"__ppc_vec_max", "llvm.ppc.altivec.vmaxsw",
858      genFuncType<Ty::IntegerVector<4>, Ty::IntegerVector<4>,
859                  Ty::IntegerVector<4>>,
860      genLibCall},
861     {"__ppc_vec_max", "llvm.ppc.altivec.vmaxsd",
862      genFuncType<Ty::IntegerVector<8>, Ty::IntegerVector<8>,
863                  Ty::IntegerVector<8>>,
864      genLibCall},
865     {"__ppc_vec_max", "llvm.ppc.altivec.vmaxub",
866      genFuncType<Ty::UnsignedVector<1>, Ty::UnsignedVector<1>,
867                  Ty::UnsignedVector<1>>,
868      genLibCall},
869     {"__ppc_vec_max", "llvm.ppc.altivec.vmaxuh",
870      genFuncType<Ty::UnsignedVector<2>, Ty::UnsignedVector<2>,
871                  Ty::UnsignedVector<2>>,
872      genLibCall},
873     {"__ppc_vec_max", "llvm.ppc.altivec.vmaxuw",
874      genFuncType<Ty::UnsignedVector<4>, Ty::UnsignedVector<4>,
875                  Ty::UnsignedVector<4>>,
876      genLibCall},
877     {"__ppc_vec_max", "llvm.ppc.altivec.vmaxud",
878      genFuncType<Ty::UnsignedVector<8>, Ty::UnsignedVector<8>,
879                  Ty::UnsignedVector<8>>,
880      genLibCall},
881     {"__ppc_vec_max", "llvm.ppc.vsx.xvmaxsp",
882      genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>>,
883      genLibCall},
884     {"__ppc_vec_max", "llvm.ppc.vsx.xvmaxdp",
885      genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>>,
886      genLibCall},
887     {"__ppc_vec_min", "llvm.ppc.altivec.vminsb",
888      genFuncType<Ty::IntegerVector<1>, Ty::IntegerVector<1>,
889                  Ty::IntegerVector<1>>,
890      genLibCall},
891     {"__ppc_vec_min", "llvm.ppc.altivec.vminsh",
892      genFuncType<Ty::IntegerVector<2>, Ty::IntegerVector<2>,
893                  Ty::IntegerVector<2>>,
894      genLibCall},
895     {"__ppc_vec_min", "llvm.ppc.altivec.vminsw",
896      genFuncType<Ty::IntegerVector<4>, Ty::IntegerVector<4>,
897                  Ty::IntegerVector<4>>,
898      genLibCall},
899     {"__ppc_vec_min", "llvm.ppc.altivec.vminsd",
900      genFuncType<Ty::IntegerVector<8>, Ty::IntegerVector<8>,
901                  Ty::IntegerVector<8>>,
902      genLibCall},
903     {"__ppc_vec_min", "llvm.ppc.altivec.vminub",
904      genFuncType<Ty::UnsignedVector<1>, Ty::UnsignedVector<1>,
905                  Ty::UnsignedVector<1>>,
906      genLibCall},
907     {"__ppc_vec_min", "llvm.ppc.altivec.vminuh",
908      genFuncType<Ty::UnsignedVector<2>, Ty::UnsignedVector<2>,
909                  Ty::UnsignedVector<2>>,
910      genLibCall},
911     {"__ppc_vec_min", "llvm.ppc.altivec.vminuw",
912      genFuncType<Ty::UnsignedVector<4>, Ty::UnsignedVector<4>,
913                  Ty::UnsignedVector<4>>,
914      genLibCall},
915     {"__ppc_vec_min", "llvm.ppc.altivec.vminud",
916      genFuncType<Ty::UnsignedVector<8>, Ty::UnsignedVector<8>,
917                  Ty::UnsignedVector<8>>,
918      genLibCall},
919     {"__ppc_vec_min", "llvm.ppc.vsx.xvminsp",
920      genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>>,
921      genLibCall},
922     {"__ppc_vec_min", "llvm.ppc.vsx.xvmindp",
923      genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>>,
924      genLibCall},
925     {"__ppc_vec_nmsub", "llvm.ppc.fnmsub.v4f32",
926      genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>,
927                  Ty::RealVector<4>>,
928      genLibCall},
929     {"__ppc_vec_nmsub", "llvm.ppc.fnmsub.v2f64",
930      genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>,
931                  Ty::RealVector<8>>,
932      genLibCall},
933 };
934 
935 const IntrinsicHandler *findPPCIntrinsicHandler(llvm::StringRef name) {
936   auto compare = [](const IntrinsicHandler &ppcHandler, llvm::StringRef name) {
937     return name.compare(ppcHandler.name) > 0;
938   };
939   auto result = llvm::lower_bound(ppcHandlers, name, compare);
940   return result != std::end(ppcHandlers) && result->name == name ? result
941                                                                  : nullptr;
942 }
943 
944 using RtMap = Fortran::common::StaticMultimapView<MathOperation>;
945 static constexpr RtMap ppcMathOps(ppcMathOperations);
946 static_assert(ppcMathOps.Verify() && "map must be sorted");
947 
948 std::pair<const MathOperation *, const MathOperation *>
949 checkPPCMathOperationsRange(llvm::StringRef name) {
950   return ppcMathOps.equal_range(name);
951 }
952 
953 // Helper functions for vector element ordering.
954 bool PPCIntrinsicLibrary::isBEVecElemOrderOnLE() {
955   const auto triple{fir::getTargetTriple(builder.getModule())};
956   return (triple.isLittleEndian() &&
957           converter->getLoweringOptions().getNoPPCNativeVecElemOrder());
958 }
959 bool PPCIntrinsicLibrary::isNativeVecElemOrderOnLE() {
960   const auto triple{fir::getTargetTriple(builder.getModule())};
961   return (triple.isLittleEndian() &&
962           !converter->getLoweringOptions().getNoPPCNativeVecElemOrder());
963 }
964 bool PPCIntrinsicLibrary::changeVecElemOrder() {
965   const auto triple{fir::getTargetTriple(builder.getModule())};
966   return (triple.isLittleEndian() !=
967           converter->getLoweringOptions().getNoPPCNativeVecElemOrder());
968 }
969 
970 static mlir::FunctionType genMmaVpFuncType(mlir::MLIRContext *context,
971                                            int quadCnt, int pairCnt, int vecCnt,
972                                            int intCnt = 0,
973                                            int vecElemBitSize = 8,
974                                            int intBitSize = 32) {
975   // Constructs a function type with the following signature:
976   // Result type: __vector_pair
977   // Arguments:
978   //   quadCnt: number of arguments that has __vector_quad type, followed by
979   //   pairCnt: number of arguments that has __vector_pair type, followed by
980   //   vecCnt: number of arguments that has vector(integer) type, followed by
981   //   intCnt: number of arguments that has integer type
982   //   vecElemBitSize: specifies the size of vector elements in bits
983   //   intBitSize: specifies the size of integer arguments in bits
984   auto vType{mlir::VectorType::get(
985       128 / vecElemBitSize, mlir::IntegerType::get(context, vecElemBitSize))};
986   auto vpType{fir::VectorType::get(256, mlir::IntegerType::get(context, 1))};
987   auto vqType{fir::VectorType::get(512, mlir::IntegerType::get(context, 1))};
988   auto iType{mlir::IntegerType::get(context, intBitSize)};
989   llvm::SmallVector<mlir::Type> argTypes;
990   for (int i = 0; i < quadCnt; ++i) {
991     argTypes.push_back(vqType);
992   }
993   for (int i = 0; i < pairCnt; ++i) {
994     argTypes.push_back(vpType);
995   }
996   for (int i = 0; i < vecCnt; ++i) {
997     argTypes.push_back(vType);
998   }
999   for (int i = 0; i < intCnt; ++i) {
1000     argTypes.push_back(iType);
1001   }
1002 
1003   return mlir::FunctionType::get(context, argTypes, {vpType});
1004 }
1005 
1006 static mlir::FunctionType genMmaVqFuncType(mlir::MLIRContext *context,
1007                                            int quadCnt, int pairCnt, int vecCnt,
1008                                            int intCnt = 0,
1009                                            int vecElemBitSize = 8,
1010                                            int intBitSize = 32) {
1011   // Constructs a function type with the following signature:
1012   // Result type: __vector_quad
1013   // Arguments:
1014   //   quadCnt: number of arguments that has __vector_quad type, followed by
1015   //   pairCnt: number of arguments that has __vector_pair type, followed by
1016   //   vecCnt: number of arguments that has vector(integer) type, followed by
1017   //   intCnt: number of arguments that has integer type
1018   //   vecElemBitSize: specifies the size of vector elements in bits
1019   //   intBitSize: specifies the size of integer arguments in bits
1020   auto vType{mlir::VectorType::get(
1021       128 / vecElemBitSize, mlir::IntegerType::get(context, vecElemBitSize))};
1022   auto vpType{fir::VectorType::get(256, mlir::IntegerType::get(context, 1))};
1023   auto vqType{fir::VectorType::get(512, mlir::IntegerType::get(context, 1))};
1024   auto iType{mlir::IntegerType::get(context, intBitSize)};
1025   llvm::SmallVector<mlir::Type> argTypes;
1026   for (int i = 0; i < quadCnt; ++i) {
1027     argTypes.push_back(vqType);
1028   }
1029   for (int i = 0; i < pairCnt; ++i) {
1030     argTypes.push_back(vpType);
1031   }
1032   for (int i = 0; i < vecCnt; ++i) {
1033     argTypes.push_back(vType);
1034   }
1035   for (int i = 0; i < intCnt; ++i) {
1036     argTypes.push_back(iType);
1037   }
1038 
1039   return mlir::FunctionType::get(context, argTypes, {vqType});
1040 }
1041 
1042 mlir::FunctionType genMmaDisassembleFuncType(mlir::MLIRContext *context,
1043                                              MMAOp mmaOp) {
1044   auto vType{mlir::VectorType::get(16, mlir::IntegerType::get(context, 8))};
1045   llvm::SmallVector<mlir::Type> members;
1046 
1047   if (mmaOp == MMAOp::DisassembleAcc) {
1048     auto vqType{fir::VectorType::get(512, mlir::IntegerType::get(context, 1))};
1049     members.push_back(vType);
1050     members.push_back(vType);
1051     members.push_back(vType);
1052     members.push_back(vType);
1053     auto resType{mlir::LLVM::LLVMStructType::getLiteral(context, members)};
1054     return mlir::FunctionType::get(context, {vqType}, {resType});
1055   } else if (mmaOp == MMAOp::DisassemblePair) {
1056     auto vpType{fir::VectorType::get(256, mlir::IntegerType::get(context, 1))};
1057     members.push_back(vType);
1058     members.push_back(vType);
1059     auto resType{mlir::LLVM::LLVMStructType::getLiteral(context, members)};
1060     return mlir::FunctionType::get(context, {vpType}, {resType});
1061   } else {
1062     llvm_unreachable(
1063         "Unsupported intrinsic code for function signature generator");
1064   }
1065 }
1066 
1067 //===----------------------------------------------------------------------===//
1068 // PowerPC specific intrinsic handlers.
1069 //===----------------------------------------------------------------------===//
1070 
1071 // MTFSF, MTFSFI
1072 template <bool isImm>
1073 void PPCIntrinsicLibrary::genMtfsf(llvm::ArrayRef<fir::ExtendedValue> args) {
1074   assert(args.size() == 2);
1075   llvm::SmallVector<mlir::Value> scalarArgs;
1076   for (const fir::ExtendedValue &arg : args)
1077     if (arg.getUnboxed())
1078       scalarArgs.emplace_back(fir::getBase(arg));
1079     else
1080       mlir::emitError(loc, "nonscalar intrinsic argument");
1081 
1082   mlir::FunctionType libFuncType;
1083   mlir::func::FuncOp funcOp;
1084   if (isImm) {
1085     libFuncType = genFuncType<Ty::Void, Ty::Integer<4>, Ty::Integer<4>>(
1086         builder.getContext(), builder);
1087     funcOp = builder.createFunction(loc, "llvm.ppc.mtfsfi", libFuncType);
1088   } else {
1089     libFuncType = genFuncType<Ty::Void, Ty::Integer<4>, Ty::Real<8>>(
1090         builder.getContext(), builder);
1091     funcOp = builder.createFunction(loc, "llvm.ppc.mtfsf", libFuncType);
1092   }
1093   builder.create<fir::CallOp>(loc, funcOp, scalarArgs);
1094 }
1095 
1096 // VEC_ABS
1097 fir::ExtendedValue
1098 PPCIntrinsicLibrary::genVecAbs(mlir::Type resultType,
1099                                llvm::ArrayRef<fir::ExtendedValue> args) {
1100   assert(args.size() == 1);
1101   auto context{builder.getContext()};
1102   auto argBases{getBasesForArgs(args)};
1103   auto vTypeInfo{getVecTypeFromFir(argBases[0])};
1104 
1105   mlir::func::FuncOp funcOp{nullptr};
1106   mlir::FunctionType ftype;
1107   llvm::StringRef fname{};
1108   if (vTypeInfo.isFloat()) {
1109     if (vTypeInfo.isFloat32()) {
1110       fname = "llvm.fabs.v4f32";
1111       ftype =
1112           genFuncType<Ty::RealVector<4>, Ty::RealVector<4>>(context, builder);
1113     } else if (vTypeInfo.isFloat64()) {
1114       fname = "llvm.fabs.v2f64";
1115       ftype =
1116           genFuncType<Ty::RealVector<8>, Ty::RealVector<8>>(context, builder);
1117     }
1118 
1119     funcOp = builder.createFunction(loc, fname, ftype);
1120     auto callOp{builder.create<fir::CallOp>(loc, funcOp, argBases[0])};
1121     return callOp.getResult(0);
1122   } else if (auto eleTy = mlir::dyn_cast<mlir::IntegerType>(vTypeInfo.eleTy)) {
1123     // vec_abs(arg1) = max(0 - arg1, arg1)
1124 
1125     auto newVecTy{mlir::VectorType::get(vTypeInfo.len, eleTy)};
1126     auto varg1{builder.createConvert(loc, newVecTy, argBases[0])};
1127     // construct vector(0,..)
1128     auto zeroVal{builder.createIntegerConstant(loc, eleTy, 0)};
1129     auto vZero{
1130         builder.create<mlir::vector::BroadcastOp>(loc, newVecTy, zeroVal)};
1131     auto zeroSubVarg1{builder.create<mlir::arith::SubIOp>(loc, vZero, varg1)};
1132 
1133     mlir::func::FuncOp funcOp{nullptr};
1134     switch (eleTy.getWidth()) {
1135     case 8:
1136       fname = "llvm.ppc.altivec.vmaxsb";
1137       ftype = genFuncType<Ty::IntegerVector<1>, Ty::IntegerVector<1>,
1138                           Ty::IntegerVector<1>>(context, builder);
1139       break;
1140     case 16:
1141       fname = "llvm.ppc.altivec.vmaxsh";
1142       ftype = genFuncType<Ty::IntegerVector<2>, Ty::IntegerVector<2>,
1143                           Ty::IntegerVector<2>>(context, builder);
1144       break;
1145     case 32:
1146       fname = "llvm.ppc.altivec.vmaxsw";
1147       ftype = genFuncType<Ty::IntegerVector<4>, Ty::IntegerVector<4>,
1148                           Ty::IntegerVector<4>>(context, builder);
1149       break;
1150     case 64:
1151       fname = "llvm.ppc.altivec.vmaxsd";
1152       ftype = genFuncType<Ty::IntegerVector<8>, Ty::IntegerVector<8>,
1153                           Ty::IntegerVector<8>>(context, builder);
1154       break;
1155     default:
1156       llvm_unreachable("invalid integer size");
1157     }
1158     funcOp = builder.createFunction(loc, fname, ftype);
1159 
1160     mlir::Value args[] = {zeroSubVarg1, varg1};
1161     auto callOp{builder.create<fir::CallOp>(loc, funcOp, args)};
1162     return builder.createConvert(loc, argBases[0].getType(),
1163                                  callOp.getResult(0));
1164   }
1165 
1166   llvm_unreachable("unknown vector type");
1167 }
1168 
1169 // VEC_ADD, VEC_AND, VEC_SUB, VEC_MUL, VEC_XOR
1170 template <VecOp vop>
1171 fir::ExtendedValue PPCIntrinsicLibrary::genVecAddAndMulSubXor(
1172     mlir::Type resultType, llvm::ArrayRef<fir::ExtendedValue> args) {
1173   assert(args.size() == 2);
1174   auto argBases{getBasesForArgs(args)};
1175   auto argsTy{getTypesForArgs(argBases)};
1176   assert(mlir::isa<fir::VectorType>(argsTy[0]) &&
1177          mlir::isa<fir::VectorType>(argsTy[1]));
1178 
1179   auto vecTyInfo{getVecTypeFromFir(argBases[0])};
1180 
1181   const auto isInteger{mlir::isa<mlir::IntegerType>(vecTyInfo.eleTy)};
1182   const auto isFloat{mlir::isa<mlir::FloatType>(vecTyInfo.eleTy)};
1183   assert((isInteger || isFloat) && "unknown vector type");
1184 
1185   auto vargs{convertVecArgs(builder, loc, vecTyInfo, argBases)};
1186 
1187   mlir::Value r{nullptr};
1188   switch (vop) {
1189   case VecOp::Add:
1190     if (isInteger)
1191       r = builder.create<mlir::arith::AddIOp>(loc, vargs[0], vargs[1]);
1192     else if (isFloat)
1193       r = builder.create<mlir::arith::AddFOp>(loc, vargs[0], vargs[1]);
1194     break;
1195   case VecOp::Mul:
1196     if (isInteger)
1197       r = builder.create<mlir::arith::MulIOp>(loc, vargs[0], vargs[1]);
1198     else if (isFloat)
1199       r = builder.create<mlir::arith::MulFOp>(loc, vargs[0], vargs[1]);
1200     break;
1201   case VecOp::Sub:
1202     if (isInteger)
1203       r = builder.create<mlir::arith::SubIOp>(loc, vargs[0], vargs[1]);
1204     else if (isFloat)
1205       r = builder.create<mlir::arith::SubFOp>(loc, vargs[0], vargs[1]);
1206     break;
1207   case VecOp::And:
1208   case VecOp::Xor: {
1209     mlir::Value arg1{nullptr};
1210     mlir::Value arg2{nullptr};
1211     if (isInteger) {
1212       arg1 = vargs[0];
1213       arg2 = vargs[1];
1214     } else if (isFloat) {
1215       // bitcast the arguments to integer
1216       auto wd{mlir::dyn_cast<mlir::FloatType>(vecTyInfo.eleTy).getWidth()};
1217       auto ftype{builder.getIntegerType(wd)};
1218       auto bcVecTy{mlir::VectorType::get(vecTyInfo.len, ftype)};
1219       arg1 = builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, vargs[0]);
1220       arg2 = builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, vargs[1]);
1221     }
1222     if (vop == VecOp::And)
1223       r = builder.create<mlir::arith::AndIOp>(loc, arg1, arg2);
1224     else if (vop == VecOp::Xor)
1225       r = builder.create<mlir::arith::XOrIOp>(loc, arg1, arg2);
1226 
1227     if (isFloat)
1228       r = builder.create<mlir::vector::BitCastOp>(loc, vargs[0].getType(), r);
1229 
1230     break;
1231   }
1232   }
1233 
1234   return builder.createConvert(loc, argsTy[0], r);
1235 }
1236 
1237 // VEC_ANY_GE
1238 template <VecOp vop>
1239 fir::ExtendedValue
1240 PPCIntrinsicLibrary::genVecAnyCompare(mlir::Type resultType,
1241                                       llvm::ArrayRef<fir::ExtendedValue> args) {
1242   assert(args.size() == 2);
1243   assert(vop == VecOp::Anyge && "unknown vector compare operation");
1244   auto argBases{getBasesForArgs(args)};
1245   VecTypeInfo vTypeInfo{getVecTypeFromFir(argBases[0])};
1246   [[maybe_unused]] const auto isSupportedTy{
1247       mlir::isa<mlir::Float32Type, mlir::Float64Type, mlir::IntegerType>(
1248           vTypeInfo.eleTy)};
1249   assert(isSupportedTy && "unsupported vector type");
1250 
1251   // Constants for mapping CR6 bits to predicate result
1252   enum { CR6_EQ_REV = 1, CR6_LT_REV = 3 };
1253 
1254   auto context{builder.getContext()};
1255 
1256   static std::map<std::pair<ParamTypeId, unsigned>,
1257                   std::pair<llvm::StringRef, mlir::FunctionType>>
1258       uiBuiltin{
1259           {std::make_pair(ParamTypeId::IntegerVector, 8),
1260            std::make_pair(
1261                "llvm.ppc.altivec.vcmpgtsb.p",
1262                genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::IntegerVector<1>,
1263                            Ty::IntegerVector<1>>(context, builder))},
1264           {std::make_pair(ParamTypeId::IntegerVector, 16),
1265            std::make_pair(
1266                "llvm.ppc.altivec.vcmpgtsh.p",
1267                genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::IntegerVector<2>,
1268                            Ty::IntegerVector<2>>(context, builder))},
1269           {std::make_pair(ParamTypeId::IntegerVector, 32),
1270            std::make_pair(
1271                "llvm.ppc.altivec.vcmpgtsw.p",
1272                genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::IntegerVector<4>,
1273                            Ty::IntegerVector<4>>(context, builder))},
1274           {std::make_pair(ParamTypeId::IntegerVector, 64),
1275            std::make_pair(
1276                "llvm.ppc.altivec.vcmpgtsd.p",
1277                genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::IntegerVector<8>,
1278                            Ty::IntegerVector<8>>(context, builder))},
1279           {std::make_pair(ParamTypeId::UnsignedVector, 8),
1280            std::make_pair(
1281                "llvm.ppc.altivec.vcmpgtub.p",
1282                genFuncType<Ty::Integer<4>, Ty::Integer<4>,
1283                            Ty::UnsignedVector<1>, Ty::UnsignedVector<1>>(
1284                    context, builder))},
1285           {std::make_pair(ParamTypeId::UnsignedVector, 16),
1286            std::make_pair(
1287                "llvm.ppc.altivec.vcmpgtuh.p",
1288                genFuncType<Ty::Integer<4>, Ty::Integer<4>,
1289                            Ty::UnsignedVector<2>, Ty::UnsignedVector<2>>(
1290                    context, builder))},
1291           {std::make_pair(ParamTypeId::UnsignedVector, 32),
1292            std::make_pair(
1293                "llvm.ppc.altivec.vcmpgtuw.p",
1294                genFuncType<Ty::Integer<4>, Ty::Integer<4>,
1295                            Ty::UnsignedVector<4>, Ty::UnsignedVector<4>>(
1296                    context, builder))},
1297           {std::make_pair(ParamTypeId::UnsignedVector, 64),
1298            std::make_pair(
1299                "llvm.ppc.altivec.vcmpgtud.p",
1300                genFuncType<Ty::Integer<4>, Ty::Integer<4>,
1301                            Ty::UnsignedVector<8>, Ty::UnsignedVector<8>>(
1302                    context, builder))},
1303       };
1304 
1305   mlir::FunctionType ftype{nullptr};
1306   llvm::StringRef fname;
1307   const auto i32Ty{mlir::IntegerType::get(context, 32)};
1308   llvm::SmallVector<mlir::Value> cmpArgs;
1309   mlir::Value op{nullptr};
1310   const auto width{vTypeInfo.eleTy.getIntOrFloatBitWidth()};
1311 
1312   if (auto elementTy = mlir::dyn_cast<mlir::IntegerType>(vTypeInfo.eleTy)) {
1313     std::pair<llvm::StringRef, mlir::FunctionType> bi;
1314     bi = (elementTy.isUnsignedInteger())
1315              ? uiBuiltin[std::pair(ParamTypeId::UnsignedVector, width)]
1316              : uiBuiltin[std::pair(ParamTypeId::IntegerVector, width)];
1317 
1318     fname = std::get<0>(bi);
1319     ftype = std::get<1>(bi);
1320 
1321     op = builder.createIntegerConstant(loc, i32Ty, CR6_LT_REV);
1322     cmpArgs.emplace_back(op);
1323     // reverse the argument order
1324     cmpArgs.emplace_back(argBases[1]);
1325     cmpArgs.emplace_back(argBases[0]);
1326   } else if (vTypeInfo.isFloat()) {
1327     if (vTypeInfo.isFloat32()) {
1328       fname = "llvm.ppc.vsx.xvcmpgesp.p";
1329       ftype = genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::RealVector<4>,
1330                           Ty::RealVector<4>>(context, builder);
1331     } else {
1332       fname = "llvm.ppc.vsx.xvcmpgedp.p";
1333       ftype = genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::RealVector<8>,
1334                           Ty::RealVector<8>>(context, builder);
1335     }
1336     op = builder.createIntegerConstant(loc, i32Ty, CR6_EQ_REV);
1337     cmpArgs.emplace_back(op);
1338     cmpArgs.emplace_back(argBases[0]);
1339     cmpArgs.emplace_back(argBases[1]);
1340   }
1341   assert((!fname.empty() && ftype) && "invalid type");
1342 
1343   mlir::func::FuncOp funcOp{builder.createFunction(loc, fname, ftype)};
1344   auto callOp{builder.create<fir::CallOp>(loc, funcOp, cmpArgs)};
1345   return callOp.getResult(0);
1346 }
1347 
1348 static std::pair<llvm::StringRef, mlir::FunctionType>
1349 getVecCmpFuncTypeAndName(VecTypeInfo &vTypeInfo, VecOp vop,
1350                          fir::FirOpBuilder &builder) {
1351   auto context{builder.getContext()};
1352   static std::map<std::pair<ParamTypeId, unsigned>,
1353                   std::pair<llvm::StringRef, mlir::FunctionType>>
1354       iuBuiltinName{
1355           {std::make_pair(ParamTypeId::IntegerVector, 8),
1356            std::make_pair(
1357                "llvm.ppc.altivec.vcmpgtsb",
1358                genFuncType<Ty::UnsignedVector<1>, Ty::IntegerVector<1>,
1359                            Ty::IntegerVector<1>>(context, builder))},
1360           {std::make_pair(ParamTypeId::IntegerVector, 16),
1361            std::make_pair(
1362                "llvm.ppc.altivec.vcmpgtsh",
1363                genFuncType<Ty::UnsignedVector<2>, Ty::IntegerVector<2>,
1364                            Ty::IntegerVector<2>>(context, builder))},
1365           {std::make_pair(ParamTypeId::IntegerVector, 32),
1366            std::make_pair(
1367                "llvm.ppc.altivec.vcmpgtsw",
1368                genFuncType<Ty::UnsignedVector<4>, Ty::IntegerVector<4>,
1369                            Ty::IntegerVector<4>>(context, builder))},
1370           {std::make_pair(ParamTypeId::IntegerVector, 64),
1371            std::make_pair(
1372                "llvm.ppc.altivec.vcmpgtsd",
1373                genFuncType<Ty::UnsignedVector<8>, Ty::IntegerVector<8>,
1374                            Ty::IntegerVector<8>>(context, builder))},
1375           {std::make_pair(ParamTypeId::UnsignedVector, 8),
1376            std::make_pair(
1377                "llvm.ppc.altivec.vcmpgtub",
1378                genFuncType<Ty::UnsignedVector<1>, Ty::UnsignedVector<1>,
1379                            Ty::UnsignedVector<1>>(context, builder))},
1380           {std::make_pair(ParamTypeId::UnsignedVector, 16),
1381            std::make_pair(
1382                "llvm.ppc.altivec.vcmpgtuh",
1383                genFuncType<Ty::UnsignedVector<2>, Ty::UnsignedVector<2>,
1384                            Ty::UnsignedVector<2>>(context, builder))},
1385           {std::make_pair(ParamTypeId::UnsignedVector, 32),
1386            std::make_pair(
1387                "llvm.ppc.altivec.vcmpgtuw",
1388                genFuncType<Ty::UnsignedVector<4>, Ty::UnsignedVector<4>,
1389                            Ty::UnsignedVector<4>>(context, builder))},
1390           {std::make_pair(ParamTypeId::UnsignedVector, 64),
1391            std::make_pair(
1392                "llvm.ppc.altivec.vcmpgtud",
1393                genFuncType<Ty::UnsignedVector<8>, Ty::UnsignedVector<8>,
1394                            Ty::UnsignedVector<8>>(context, builder))}};
1395 
1396   // VSX only defines GE and GT builtins. Cmple and Cmplt use GE and GT with
1397   // arguments revsered.
1398   enum class Cmp { gtOrLt, geOrLe };
1399   static std::map<std::pair<Cmp, int>,
1400                   std::pair<llvm::StringRef, mlir::FunctionType>>
1401       rGBI{{std::make_pair(Cmp::geOrLe, 32),
1402             std::make_pair("llvm.ppc.vsx.xvcmpgesp",
1403                            genFuncType<Ty::UnsignedVector<4>, Ty::RealVector<4>,
1404                                        Ty::RealVector<4>>(context, builder))},
1405            {std::make_pair(Cmp::geOrLe, 64),
1406             std::make_pair("llvm.ppc.vsx.xvcmpgedp",
1407                            genFuncType<Ty::UnsignedVector<8>, Ty::RealVector<8>,
1408                                        Ty::RealVector<8>>(context, builder))},
1409            {std::make_pair(Cmp::gtOrLt, 32),
1410             std::make_pair("llvm.ppc.vsx.xvcmpgtsp",
1411                            genFuncType<Ty::UnsignedVector<4>, Ty::RealVector<4>,
1412                                        Ty::RealVector<4>>(context, builder))},
1413            {std::make_pair(Cmp::gtOrLt, 64),
1414             std::make_pair("llvm.ppc.vsx.xvcmpgtdp",
1415                            genFuncType<Ty::UnsignedVector<8>, Ty::RealVector<8>,
1416                                        Ty::RealVector<8>>(context, builder))}};
1417 
1418   const auto width{vTypeInfo.eleTy.getIntOrFloatBitWidth()};
1419   std::pair<llvm::StringRef, mlir::FunctionType> specFunc;
1420   if (auto elementTy = mlir::dyn_cast<mlir::IntegerType>(vTypeInfo.eleTy))
1421     specFunc =
1422         (elementTy.isUnsignedInteger())
1423             ? iuBuiltinName[std::make_pair(ParamTypeId::UnsignedVector, width)]
1424             : iuBuiltinName[std::make_pair(ParamTypeId::IntegerVector, width)];
1425   else if (vTypeInfo.isFloat())
1426     specFunc = (vop == VecOp::Cmpge || vop == VecOp::Cmple)
1427                    ? rGBI[std::make_pair(Cmp::geOrLe, width)]
1428                    : rGBI[std::make_pair(Cmp::gtOrLt, width)];
1429 
1430   assert(!std::get<0>(specFunc).empty() && "unknown builtin name");
1431   assert(std::get<1>(specFunc) && "unknown function type");
1432   return specFunc;
1433 }
1434 
1435 // VEC_CMPGE, VEC_CMPGT, VEC_CMPLE, VEC_CMPLT
1436 template <VecOp vop>
1437 fir::ExtendedValue
1438 PPCIntrinsicLibrary::genVecCmp(mlir::Type resultType,
1439                                llvm::ArrayRef<fir::ExtendedValue> args) {
1440   assert(args.size() == 2);
1441   auto context{builder.getContext()};
1442   auto argBases{getBasesForArgs(args)};
1443   VecTypeInfo vecTyInfo{getVecTypeFromFir(argBases[0])};
1444   auto varg{convertVecArgs(builder, loc, vecTyInfo, argBases)};
1445 
1446   std::pair<llvm::StringRef, mlir::FunctionType> funcTyNam{
1447       getVecCmpFuncTypeAndName(vecTyInfo, vop, builder)};
1448 
1449   mlir::func::FuncOp funcOp = builder.createFunction(
1450       loc, std::get<0>(funcTyNam), std::get<1>(funcTyNam));
1451 
1452   mlir::Value res{nullptr};
1453 
1454   if (auto eTy = mlir::dyn_cast<mlir::IntegerType>(vecTyInfo.eleTy)) {
1455     constexpr int firstArg{0};
1456     constexpr int secondArg{1};
1457     std::map<VecOp, std::array<int, 2>> argOrder{
1458         {VecOp::Cmpge, {secondArg, firstArg}},
1459         {VecOp::Cmple, {firstArg, secondArg}},
1460         {VecOp::Cmpgt, {firstArg, secondArg}},
1461         {VecOp::Cmplt, {secondArg, firstArg}}};
1462 
1463     // Construct the function return type, unsigned vector, for conversion.
1464     auto itype = mlir::IntegerType::get(context, eTy.getWidth(),
1465                                         mlir::IntegerType::Unsigned);
1466     auto returnType = fir::VectorType::get(vecTyInfo.len, itype);
1467 
1468     switch (vop) {
1469     case VecOp::Cmpgt:
1470     case VecOp::Cmplt: {
1471       // arg1 > arg2 --> vcmpgt(arg1, arg2)
1472       // arg1 < arg2 --> vcmpgt(arg2, arg1)
1473       mlir::Value vargs[]{argBases[argOrder[vop][0]],
1474                           argBases[argOrder[vop][1]]};
1475       auto callOp{builder.create<fir::CallOp>(loc, funcOp, vargs)};
1476       res = callOp.getResult(0);
1477       break;
1478     }
1479     case VecOp::Cmpge:
1480     case VecOp::Cmple: {
1481       // arg1 >= arg2 --> vcmpge(arg2, arg1) xor vector(-1)
1482       // arg1 <= arg2 --> vcmpge(arg1, arg2) xor vector(-1)
1483       mlir::Value vargs[]{argBases[argOrder[vop][0]],
1484                           argBases[argOrder[vop][1]]};
1485 
1486       // Construct a constant vector(-1)
1487       auto negOneVal{builder.createIntegerConstant(
1488           loc, getConvertedElementType(context, eTy), -1)};
1489       auto vNegOne{builder.create<mlir::vector::BroadcastOp>(
1490           loc, vecTyInfo.toMlirVectorType(context), negOneVal)};
1491 
1492       auto callOp{builder.create<fir::CallOp>(loc, funcOp, vargs)};
1493       mlir::Value callRes{callOp.getResult(0)};
1494       auto vargs2{
1495           convertVecArgs(builder, loc, vecTyInfo, mlir::ValueRange{callRes})};
1496       auto xorRes{builder.create<mlir::arith::XOrIOp>(loc, vargs2[0], vNegOne)};
1497 
1498       res = builder.createConvert(loc, returnType, xorRes);
1499       break;
1500     }
1501     default:
1502       llvm_unreachable("Invalid vector operation for generator");
1503     }
1504   } else if (vecTyInfo.isFloat()) {
1505     mlir::Value vargs[2];
1506     switch (vop) {
1507     case VecOp::Cmpge:
1508     case VecOp::Cmpgt:
1509       vargs[0] = argBases[0];
1510       vargs[1] = argBases[1];
1511       break;
1512     case VecOp::Cmple:
1513     case VecOp::Cmplt:
1514       // Swap the arguments as xvcmpg[et] is used
1515       vargs[0] = argBases[1];
1516       vargs[1] = argBases[0];
1517       break;
1518     default:
1519       llvm_unreachable("Invalid vector operation for generator");
1520     }
1521     auto callOp{builder.create<fir::CallOp>(loc, funcOp, vargs)};
1522     res = callOp.getResult(0);
1523   } else
1524     llvm_unreachable("invalid vector type");
1525 
1526   return res;
1527 }
1528 
1529 static inline mlir::Value swapVectorWordPairs(fir::FirOpBuilder &builder,
1530                                               mlir::Location loc,
1531                                               mlir::Value arg) {
1532   auto ty = arg.getType();
1533   auto context{builder.getContext()};
1534   auto vtype{mlir::VectorType::get(16, mlir::IntegerType::get(context, 8))};
1535 
1536   if (ty != vtype)
1537     arg = builder.create<mlir::LLVM::BitcastOp>(loc, vtype, arg).getResult();
1538 
1539   llvm::SmallVector<int64_t, 16> mask{4,  5,  6,  7,  0, 1, 2,  3,
1540                                       12, 13, 14, 15, 8, 9, 10, 11};
1541   arg = builder.create<mlir::vector::ShuffleOp>(loc, arg, arg, mask);
1542   if (ty != vtype)
1543     arg = builder.create<mlir::LLVM::BitcastOp>(loc, ty, arg);
1544   return arg;
1545 }
1546 
1547 // VEC_CONVERT, VEC_CTF, VEC_CVF
1548 template <VecOp vop>
1549 fir::ExtendedValue
1550 PPCIntrinsicLibrary::genVecConvert(mlir::Type resultType,
1551                                    llvm::ArrayRef<fir::ExtendedValue> args) {
1552   auto context{builder.getContext()};
1553   auto argBases{getBasesForArgs(args)};
1554   auto vecTyInfo{getVecTypeFromFir(argBases[0])};
1555   auto mlirTy{vecTyInfo.toMlirVectorType(context)};
1556   auto vArg1{builder.createConvert(loc, mlirTy, argBases[0])};
1557   const auto i32Ty{mlir::IntegerType::get(context, 32)};
1558 
1559   switch (vop) {
1560   case VecOp::Ctf: {
1561     assert(args.size() == 2);
1562     auto convArg{builder.createConvert(loc, i32Ty, argBases[1])};
1563     auto eTy{mlir::dyn_cast<mlir::IntegerType>(vecTyInfo.eleTy)};
1564     assert(eTy && "Unsupported vector type");
1565     const auto isUnsigned{eTy.isUnsignedInteger()};
1566     const auto width{eTy.getWidth()};
1567 
1568     if (width == 32) {
1569       auto ftype{(isUnsigned)
1570                      ? genFuncType<Ty::RealVector<4>, Ty::UnsignedVector<4>,
1571                                    Ty::Integer<4>>(context, builder)
1572                      : genFuncType<Ty::RealVector<4>, Ty::IntegerVector<4>,
1573                                    Ty::Integer<4>>(context, builder)};
1574       const llvm::StringRef fname{(isUnsigned) ? "llvm.ppc.altivec.vcfux"
1575                                                : "llvm.ppc.altivec.vcfsx"};
1576       auto funcOp{builder.createFunction(loc, fname, ftype)};
1577       mlir::Value newArgs[] = {argBases[0], convArg};
1578       auto callOp{builder.create<fir::CallOp>(loc, funcOp, newArgs)};
1579 
1580       return callOp.getResult(0);
1581     } else if (width == 64) {
1582       auto fTy{mlir::Float64Type::get(context)};
1583       auto ty{mlir::VectorType::get(2, fTy)};
1584 
1585       // vec_vtf(arg1, arg2) = fmul(1.0 / (1 << arg2), llvm.sitofp(arg1))
1586       auto convOp{(isUnsigned)
1587                       ? builder.create<mlir::LLVM::UIToFPOp>(loc, ty, vArg1)
1588                       : builder.create<mlir::LLVM::SIToFPOp>(loc, ty, vArg1)};
1589 
1590       // construct vector<1./(1<<arg1), 1.0/(1<<arg1)>
1591       auto constInt{mlir::dyn_cast_or_null<mlir::IntegerAttr>(
1592           mlir::dyn_cast<mlir::arith::ConstantOp>(argBases[1].getDefiningOp())
1593               .getValue())};
1594       assert(constInt && "expected integer constant argument");
1595       double f{1.0 / (1 << constInt.getInt())};
1596       llvm::SmallVector<double> vals{f, f};
1597       auto constOp{builder.create<mlir::arith::ConstantOp>(
1598           loc, ty, builder.getF64VectorAttr(vals))};
1599 
1600       auto mulOp{builder.create<mlir::LLVM::FMulOp>(
1601           loc, ty, convOp->getResult(0), constOp)};
1602 
1603       return builder.createConvert(loc, fir::VectorType::get(2, fTy), mulOp);
1604     }
1605     llvm_unreachable("invalid element integer kind");
1606   }
1607   case VecOp::Convert: {
1608     assert(args.size() == 2);
1609     // resultType has mold type (if scalar) or element type (if array)
1610     auto resTyInfo{getVecTypeFromFirType(resultType)};
1611     auto moldTy{resTyInfo.toMlirVectorType(context)};
1612     auto firTy{resTyInfo.toFirVectorType()};
1613 
1614     // vec_convert(v, mold) = bitcast v to "type of mold"
1615     auto conv{builder.create<mlir::LLVM::BitcastOp>(loc, moldTy, vArg1)};
1616 
1617     return builder.createConvert(loc, firTy, conv);
1618   }
1619   case VecOp::Cvf: {
1620     assert(args.size() == 1);
1621 
1622     mlir::Value newArgs[]{vArg1};
1623     if (vecTyInfo.isFloat32()) {
1624       if (changeVecElemOrder())
1625         newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]);
1626 
1627       const llvm::StringRef fname{"llvm.ppc.vsx.xvcvspdp"};
1628       auto ftype{
1629           genFuncType<Ty::RealVector<8>, Ty::RealVector<4>>(context, builder)};
1630       auto funcOp{builder.createFunction(loc, fname, ftype)};
1631       auto callOp{builder.create<fir::CallOp>(loc, funcOp, newArgs)};
1632 
1633       return callOp.getResult(0);
1634     } else if (vecTyInfo.isFloat64()) {
1635       const llvm::StringRef fname{"llvm.ppc.vsx.xvcvdpsp"};
1636       auto ftype{
1637           genFuncType<Ty::RealVector<4>, Ty::RealVector<8>>(context, builder)};
1638       auto funcOp{builder.createFunction(loc, fname, ftype)};
1639       newArgs[0] =
1640           builder.create<fir::CallOp>(loc, funcOp, newArgs).getResult(0);
1641       auto fvf32Ty{newArgs[0].getType()};
1642       auto f32type{mlir::Float32Type::get(context)};
1643       auto mvf32Ty{mlir::VectorType::get(4, f32type)};
1644       newArgs[0] = builder.createConvert(loc, mvf32Ty, newArgs[0]);
1645 
1646       if (changeVecElemOrder())
1647         newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]);
1648 
1649       return builder.createConvert(loc, fvf32Ty, newArgs[0]);
1650     }
1651     llvm_unreachable("invalid element integer kind");
1652   }
1653   default:
1654     llvm_unreachable("Invalid vector operation for generator");
1655   }
1656 }
1657 
1658 static mlir::Value convertVectorElementOrder(fir::FirOpBuilder &builder,
1659                                              mlir::Location loc,
1660                                              VecTypeInfo vecInfo,
1661                                              mlir::Value idx) {
1662   mlir::Value numSub1{
1663       builder.createIntegerConstant(loc, idx.getType(), vecInfo.len - 1)};
1664   return builder.create<mlir::LLVM::SubOp>(loc, idx.getType(), numSub1, idx);
1665 }
1666 
1667 // VEC_EXTRACT
1668 fir::ExtendedValue
1669 PPCIntrinsicLibrary::genVecExtract(mlir::Type resultType,
1670                                    llvm::ArrayRef<fir::ExtendedValue> args) {
1671   assert(args.size() == 2);
1672   auto argBases{getBasesForArgs(args)};
1673   auto argTypes{getTypesForArgs(argBases)};
1674   auto vecTyInfo{getVecTypeFromFir(argBases[0])};
1675 
1676   auto mlirTy{vecTyInfo.toMlirVectorType(builder.getContext())};
1677   auto varg0{builder.createConvert(loc, mlirTy, argBases[0])};
1678 
1679   // arg2 modulo the number of elements in arg1 to determine the element
1680   // position
1681   auto numEle{builder.createIntegerConstant(loc, argTypes[1], vecTyInfo.len)};
1682   mlir::Value uremOp{
1683       builder.create<mlir::LLVM::URemOp>(loc, argBases[1], numEle)};
1684 
1685   if (!isNativeVecElemOrderOnLE())
1686     uremOp = convertVectorElementOrder(builder, loc, vecTyInfo, uremOp);
1687 
1688   return builder.create<mlir::vector::ExtractElementOp>(loc, varg0, uremOp);
1689 }
1690 
1691 // VEC_INSERT
1692 fir::ExtendedValue
1693 PPCIntrinsicLibrary::genVecInsert(mlir::Type resultType,
1694                                   llvm::ArrayRef<fir::ExtendedValue> args) {
1695   assert(args.size() == 3);
1696   auto argBases{getBasesForArgs(args)};
1697   auto argTypes{getTypesForArgs(argBases)};
1698   auto vecTyInfo{getVecTypeFromFir(argBases[1])};
1699   auto mlirTy{vecTyInfo.toMlirVectorType(builder.getContext())};
1700   auto varg1{builder.createConvert(loc, mlirTy, argBases[1])};
1701 
1702   auto numEle{builder.createIntegerConstant(loc, argTypes[2], vecTyInfo.len)};
1703   mlir::Value uremOp{
1704       builder.create<mlir::LLVM::URemOp>(loc, argBases[2], numEle)};
1705 
1706   if (!isNativeVecElemOrderOnLE())
1707     uremOp = convertVectorElementOrder(builder, loc, vecTyInfo, uremOp);
1708 
1709   auto res{builder.create<mlir::vector::InsertElementOp>(loc, argBases[0],
1710                                                          varg1, uremOp)};
1711   return builder.create<fir::ConvertOp>(loc, vecTyInfo.toFirVectorType(), res);
1712 }
1713 
1714 // VEC_MERGEH, VEC_MERGEL
1715 template <VecOp vop>
1716 fir::ExtendedValue
1717 PPCIntrinsicLibrary::genVecMerge(mlir::Type resultType,
1718                                  llvm::ArrayRef<fir::ExtendedValue> args) {
1719   assert(args.size() == 2);
1720   auto argBases{getBasesForArgs(args)};
1721   auto vecTyInfo{getVecTypeFromFir(argBases[0])};
1722   llvm::SmallVector<int64_t, 16> mMask; // native vector element order mask
1723   llvm::SmallVector<int64_t, 16> rMask; // non-native vector element order mask
1724 
1725   switch (vop) {
1726   case VecOp::Mergeh: {
1727     switch (vecTyInfo.len) {
1728     case 2: {
1729       enum { V1 = 0, V2 = 2 };
1730       mMask = {V1 + 0, V2 + 0};
1731       rMask = {V2 + 1, V1 + 1};
1732       break;
1733     }
1734     case 4: {
1735       enum { V1 = 0, V2 = 4 };
1736       mMask = {V1 + 0, V2 + 0, V1 + 1, V2 + 1};
1737       rMask = {V2 + 2, V1 + 2, V2 + 3, V1 + 3};
1738       break;
1739     }
1740     case 8: {
1741       enum { V1 = 0, V2 = 8 };
1742       mMask = {V1 + 0, V2 + 0, V1 + 1, V2 + 1, V1 + 2, V2 + 2, V1 + 3, V2 + 3};
1743       rMask = {V2 + 4, V1 + 4, V2 + 5, V1 + 5, V2 + 6, V1 + 6, V2 + 7, V1 + 7};
1744       break;
1745     }
1746     case 16:
1747       mMask = {0x00, 0x10, 0x01, 0x11, 0x02, 0x12, 0x03, 0x13,
1748                0x04, 0x14, 0x05, 0x15, 0x06, 0x16, 0x07, 0x17};
1749       rMask = {0x18, 0x08, 0x19, 0x09, 0x1A, 0x0A, 0x1B, 0x0B,
1750                0x1C, 0x0C, 0x1D, 0x0D, 0x1E, 0x0E, 0x1F, 0x0F};
1751       break;
1752     default:
1753       llvm_unreachable("unexpected vector length");
1754     }
1755     break;
1756   }
1757   case VecOp::Mergel: {
1758     switch (vecTyInfo.len) {
1759     case 2: {
1760       enum { V1 = 0, V2 = 2 };
1761       mMask = {V1 + 1, V2 + 1};
1762       rMask = {V2 + 0, V1 + 0};
1763       break;
1764     }
1765     case 4: {
1766       enum { V1 = 0, V2 = 4 };
1767       mMask = {V1 + 2, V2 + 2, V1 + 3, V2 + 3};
1768       rMask = {V2 + 0, V1 + 0, V2 + 1, V1 + 1};
1769       break;
1770     }
1771     case 8: {
1772       enum { V1 = 0, V2 = 8 };
1773       mMask = {V1 + 4, V2 + 4, V1 + 5, V2 + 5, V1 + 6, V2 + 6, V1 + 7, V2 + 7};
1774       rMask = {V2 + 0, V1 + 0, V2 + 1, V1 + 1, V2 + 2, V1 + 2, V2 + 3, V1 + 3};
1775       break;
1776     }
1777     case 16:
1778       mMask = {0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A, 0x0B, 0x1B,
1779                0x0C, 0x1C, 0x0D, 0x1D, 0x0E, 0x1E, 0x0F, 0x1F};
1780       rMask = {0x10, 0x00, 0x11, 0x01, 0x12, 0x02, 0x13, 0x03,
1781                0x14, 0x04, 0x15, 0x05, 0x16, 0x06, 0x17, 0x07};
1782       break;
1783     default:
1784       llvm_unreachable("unexpected vector length");
1785     }
1786     break;
1787   }
1788   default:
1789     llvm_unreachable("invalid vector operation for generator");
1790   }
1791 
1792   auto vargs{convertVecArgs(builder, loc, vecTyInfo, argBases)};
1793 
1794   llvm::SmallVector<int64_t, 16> &mergeMask =
1795       (isBEVecElemOrderOnLE()) ? rMask : mMask;
1796 
1797   auto callOp{builder.create<mlir::vector::ShuffleOp>(loc, vargs[0], vargs[1],
1798                                                       mergeMask)};
1799   return builder.createConvert(loc, resultType, callOp);
1800 }
1801 
1802 static mlir::Value addOffsetToAddress(fir::FirOpBuilder &builder,
1803                                       mlir::Location loc, mlir::Value baseAddr,
1804                                       mlir::Value offset) {
1805   auto typeExtent{fir::SequenceType::getUnknownExtent()};
1806   // Construct an !fir.ref<!ref.array<?xi8>> type
1807   auto arrRefTy{builder.getRefType(fir::SequenceType::get(
1808       {typeExtent}, mlir::IntegerType::get(builder.getContext(), 8)))};
1809   // Convert arg to !fir.ref<!ref.array<?xi8>>
1810   auto resAddr{builder.create<fir::ConvertOp>(loc, arrRefTy, baseAddr)};
1811 
1812   return builder.create<fir::CoordinateOp>(loc, arrRefTy, resAddr, offset);
1813 }
1814 
1815 static mlir::Value reverseVectorElements(fir::FirOpBuilder &builder,
1816                                          mlir::Location loc, mlir::Value v,
1817                                          int64_t len) {
1818   assert(mlir::isa<mlir::VectorType>(v.getType()));
1819   assert(len > 0);
1820   llvm::SmallVector<int64_t, 16> mask;
1821   for (int64_t i = 0; i < len; ++i) {
1822     mask.push_back(len - 1 - i);
1823   }
1824   auto undefVec{builder.create<fir::UndefOp>(loc, v.getType())};
1825   return builder.create<mlir::vector::ShuffleOp>(loc, v, undefVec, mask);
1826 }
1827 
1828 static mlir::NamedAttribute getAlignmentAttr(fir::FirOpBuilder &builder,
1829                                              const int val) {
1830   auto i64ty{mlir::IntegerType::get(builder.getContext(), 64)};
1831   auto alignAttr{mlir::IntegerAttr::get(i64ty, val)};
1832   return builder.getNamedAttr("alignment", alignAttr);
1833 }
1834 
1835 fir::ExtendedValue
1836 PPCIntrinsicLibrary::genVecXlGrp(mlir::Type resultType,
1837                                  llvm::ArrayRef<fir::ExtendedValue> args) {
1838   VecTypeInfo vecTyInfo{getVecTypeFromFirType(resultType)};
1839   switch (vecTyInfo.eleTy.getIntOrFloatBitWidth()) {
1840   case 8:
1841     // vec_xlb1
1842     return genVecLdNoCallGrp<VecOp::Xl>(resultType, args);
1843   case 16:
1844     // vec_xlh8
1845     return genVecLdNoCallGrp<VecOp::Xl>(resultType, args);
1846   case 32:
1847     // vec_xlw4
1848     return genVecLdCallGrp<VecOp::Xlw4>(resultType, args);
1849   case 64:
1850     // vec_xld2
1851     return genVecLdCallGrp<VecOp::Xld2>(resultType, args);
1852   default:
1853     llvm_unreachable("invalid kind");
1854   }
1855   llvm_unreachable("invalid vector operation for generator");
1856 }
1857 
1858 template <VecOp vop>
1859 fir::ExtendedValue PPCIntrinsicLibrary::genVecLdNoCallGrp(
1860     mlir::Type resultType, llvm::ArrayRef<fir::ExtendedValue> args) {
1861   assert(args.size() == 2);
1862   auto arg0{getBase(args[0])};
1863   auto arg1{getBase(args[1])};
1864 
1865   auto vecTyInfo{getVecTypeFromFirType(resultType)};
1866   auto mlirTy{vecTyInfo.toMlirVectorType(builder.getContext())};
1867   auto firTy{vecTyInfo.toFirVectorType()};
1868 
1869   // Add the %val of arg0 to %addr of arg1
1870   auto addr{addOffsetToAddress(builder, loc, arg1, arg0)};
1871 
1872   const auto triple{fir::getTargetTriple(builder.getModule())};
1873   // Need to get align 1.
1874   auto result{builder.create<fir::LoadOp>(loc, mlirTy, addr,
1875                                           getAlignmentAttr(builder, 1))};
1876   if ((vop == VecOp::Xl && isBEVecElemOrderOnLE()) ||
1877       (vop == VecOp::Xlbe && triple.isLittleEndian()))
1878     return builder.createConvert(
1879         loc, firTy, reverseVectorElements(builder, loc, result, vecTyInfo.len));
1880 
1881   return builder.createConvert(loc, firTy, result);
1882 }
1883 
1884 // VEC_LD, VEC_LDE, VEC_LDL, VEC_LXVP, VEC_XLD2, VEC_XLW4
1885 template <VecOp vop>
1886 fir::ExtendedValue
1887 PPCIntrinsicLibrary::genVecLdCallGrp(mlir::Type resultType,
1888                                      llvm::ArrayRef<fir::ExtendedValue> args) {
1889   assert(args.size() == 2);
1890   auto context{builder.getContext()};
1891   auto arg0{getBase(args[0])};
1892   auto arg1{getBase(args[1])};
1893 
1894   // Prepare the return type in FIR.
1895   auto vecResTyInfo{getVecTypeFromFirType(resultType)};
1896   auto mlirTy{vecResTyInfo.toMlirVectorType(context)};
1897   auto firTy{vecResTyInfo.toFirVectorType()};
1898 
1899   // llvm.ppc.altivec.lvx* returns <4xi32>
1900   // Others, like "llvm.ppc.altivec.lvebx" too if arg2 is not of Integer type
1901   const auto i32Ty{mlir::IntegerType::get(builder.getContext(), 32)};
1902   const auto mVecI32Ty{mlir::VectorType::get(4, i32Ty)};
1903 
1904   // For vec_ld, need to convert arg0 from i64 to i32
1905   if (vop == VecOp::Ld && arg0.getType().getIntOrFloatBitWidth() == 64)
1906     arg0 = builder.createConvert(loc, i32Ty, arg0);
1907 
1908   // Add the %val of arg0 to %addr of arg1
1909   auto addr{addOffsetToAddress(builder, loc, arg1, arg0)};
1910   llvm::SmallVector<mlir::Value, 4> parsedArgs{addr};
1911 
1912   mlir::Type intrinResTy{nullptr};
1913   llvm::StringRef fname{};
1914   switch (vop) {
1915   case VecOp::Ld:
1916     fname = "llvm.ppc.altivec.lvx";
1917     intrinResTy = mVecI32Ty;
1918     break;
1919   case VecOp::Lde:
1920     switch (vecResTyInfo.eleTy.getIntOrFloatBitWidth()) {
1921     case 8:
1922       fname = "llvm.ppc.altivec.lvebx";
1923       intrinResTy = mlirTy;
1924       break;
1925     case 16:
1926       fname = "llvm.ppc.altivec.lvehx";
1927       intrinResTy = mlirTy;
1928       break;
1929     case 32:
1930       fname = "llvm.ppc.altivec.lvewx";
1931       if (mlir::isa<mlir::IntegerType>(vecResTyInfo.eleTy))
1932         intrinResTy = mlirTy;
1933       else
1934         intrinResTy = mVecI32Ty;
1935       break;
1936     default:
1937       llvm_unreachable("invalid vector for vec_lde");
1938     }
1939     break;
1940   case VecOp::Ldl:
1941     fname = "llvm.ppc.altivec.lvxl";
1942     intrinResTy = mVecI32Ty;
1943     break;
1944   case VecOp::Lxvp:
1945     fname = "llvm.ppc.vsx.lxvp";
1946     intrinResTy = fir::VectorType::get(256, mlir::IntegerType::get(context, 1));
1947     break;
1948   case VecOp::Xld2: {
1949     fname = isBEVecElemOrderOnLE() ? "llvm.ppc.vsx.lxvd2x.be"
1950                                    : "llvm.ppc.vsx.lxvd2x";
1951     // llvm.ppc.altivec.lxvd2x* returns <2 x double>
1952     intrinResTy = mlir::VectorType::get(2, mlir::Float64Type::get(context));
1953   } break;
1954   case VecOp::Xlw4:
1955     fname = isBEVecElemOrderOnLE() ? "llvm.ppc.vsx.lxvw4x.be"
1956                                    : "llvm.ppc.vsx.lxvw4x";
1957     // llvm.ppc.altivec.lxvw4x* returns <4xi32>
1958     intrinResTy = mVecI32Ty;
1959     break;
1960   default:
1961     llvm_unreachable("invalid vector operation for generator");
1962   }
1963 
1964   auto funcType{
1965       mlir::FunctionType::get(context, {addr.getType()}, {intrinResTy})};
1966   auto funcOp{builder.createFunction(loc, fname, funcType)};
1967   auto result{
1968       builder.create<fir::CallOp>(loc, funcOp, parsedArgs).getResult(0)};
1969 
1970   if (vop == VecOp::Lxvp)
1971     return result;
1972 
1973   if (intrinResTy != mlirTy)
1974     result = builder.create<mlir::vector::BitCastOp>(loc, mlirTy, result);
1975 
1976   if (vop != VecOp::Xld2 && vop != VecOp::Xlw4 && isBEVecElemOrderOnLE())
1977     return builder.createConvert(
1978         loc, firTy,
1979         reverseVectorElements(builder, loc, result, vecResTyInfo.len));
1980 
1981   return builder.createConvert(loc, firTy, result);
1982 }
1983 
1984 // VEC_LVSL, VEC_LVSR
1985 template <VecOp vop>
1986 fir::ExtendedValue
1987 PPCIntrinsicLibrary::genVecLvsGrp(mlir::Type resultType,
1988                                   llvm::ArrayRef<fir::ExtendedValue> args) {
1989   assert(args.size() == 2);
1990   auto context{builder.getContext()};
1991   auto arg0{getBase(args[0])};
1992   auto arg1{getBase(args[1])};
1993 
1994   auto vecTyInfo{getVecTypeFromFirType(resultType)};
1995   auto mlirTy{vecTyInfo.toMlirVectorType(context)};
1996   auto firTy{vecTyInfo.toFirVectorType()};
1997 
1998   // Convert arg0 to i64 type if needed
1999   auto i64ty{mlir::IntegerType::get(context, 64)};
2000   if (arg0.getType() != i64ty)
2001     arg0 = builder.create<fir::ConvertOp>(loc, i64ty, arg0);
2002 
2003   // offset is modulo 16, so shift left 56 bits and then right 56 bits to clear
2004   //   upper 56 bit while preserving sign
2005   auto shiftVal{builder.createIntegerConstant(loc, i64ty, 56)};
2006   auto offset{builder.create<mlir::arith::ShLIOp>(loc, arg0, shiftVal)};
2007   auto offset2{builder.create<mlir::arith::ShRSIOp>(loc, offset, shiftVal)};
2008 
2009   // Add the offsetArg to %addr of arg1
2010   auto addr{addOffsetToAddress(builder, loc, arg1, offset2)};
2011   llvm::SmallVector<mlir::Value, 4> parsedArgs{addr};
2012 
2013   llvm::StringRef fname{};
2014   switch (vop) {
2015   case VecOp::Lvsl:
2016     fname = "llvm.ppc.altivec.lvsl";
2017     break;
2018   case VecOp::Lvsr:
2019     fname = "llvm.ppc.altivec.lvsr";
2020     break;
2021   default:
2022     llvm_unreachable("invalid vector operation for generator");
2023   }
2024   auto funcType{mlir::FunctionType::get(context, {addr.getType()}, {mlirTy})};
2025   auto funcOp{builder.createFunction(loc, fname, funcType)};
2026   auto result{
2027       builder.create<fir::CallOp>(loc, funcOp, parsedArgs).getResult(0)};
2028 
2029   if (isNativeVecElemOrderOnLE())
2030     return builder.createConvert(
2031         loc, firTy, reverseVectorElements(builder, loc, result, vecTyInfo.len));
2032 
2033   return builder.createConvert(loc, firTy, result);
2034 }
2035 
2036 // VEC_NMADD, VEC_MSUB
2037 template <VecOp vop>
2038 fir::ExtendedValue
2039 PPCIntrinsicLibrary::genVecNmaddMsub(mlir::Type resultType,
2040                                      llvm::ArrayRef<fir::ExtendedValue> args) {
2041   assert(args.size() == 3);
2042   auto context{builder.getContext()};
2043   auto argBases{getBasesForArgs(args)};
2044   auto vTypeInfo{getVecTypeFromFir(argBases[0])};
2045   auto newArgs{convertVecArgs(builder, loc, vTypeInfo, argBases)};
2046   const auto width{vTypeInfo.eleTy.getIntOrFloatBitWidth()};
2047 
2048   static std::map<int, std::pair<llvm::StringRef, mlir::FunctionType>> fmaMap{
2049       {32,
2050        std::make_pair(
2051            "llvm.fma.v4f32",
2052            genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>>(
2053                context, builder))},
2054       {64,
2055        std::make_pair(
2056            "llvm.fma.v2f64",
2057            genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>>(
2058                context, builder))}};
2059 
2060   auto funcOp{builder.createFunction(loc, std::get<0>(fmaMap[width]),
2061                                      std::get<1>(fmaMap[width]))};
2062   if (vop == VecOp::Nmadd) {
2063     // vec_nmadd(arg1, arg2, arg3) = -fma(arg1, arg2, arg3)
2064     auto callOp{builder.create<fir::CallOp>(loc, funcOp, newArgs)};
2065 
2066     // We need to convert fir.vector to MLIR vector to use fneg and then back
2067     // to fir.vector to store.
2068     auto vCall{builder.createConvert(loc, vTypeInfo.toMlirVectorType(context),
2069                                      callOp.getResult(0))};
2070     auto neg{builder.create<mlir::arith::NegFOp>(loc, vCall)};
2071     return builder.createConvert(loc, vTypeInfo.toFirVectorType(), neg);
2072   } else if (vop == VecOp::Msub) {
2073     // vec_msub(arg1, arg2, arg3) = fma(arg1, arg2, -arg3)
2074     newArgs[2] = builder.create<mlir::arith::NegFOp>(loc, newArgs[2]);
2075 
2076     auto callOp{builder.create<fir::CallOp>(loc, funcOp, newArgs)};
2077     return callOp.getResult(0);
2078   }
2079   llvm_unreachable("Invalid vector operation for generator");
2080 }
2081 
2082 // VEC_PERM, VEC_PERMI
2083 template <VecOp vop>
2084 fir::ExtendedValue
2085 PPCIntrinsicLibrary::genVecPerm(mlir::Type resultType,
2086                                 llvm::ArrayRef<fir::ExtendedValue> args) {
2087   assert(args.size() == 3);
2088   auto context{builder.getContext()};
2089   auto argBases{getBasesForArgs(args)};
2090   auto argTypes{getTypesForArgs(argBases)};
2091   auto vecTyInfo{getVecTypeFromFir(argBases[0])};
2092   auto mlirTy{vecTyInfo.toMlirVectorType(context)};
2093 
2094   auto vi32Ty{mlir::VectorType::get(4, mlir::IntegerType::get(context, 32))};
2095   auto vf64Ty{mlir::VectorType::get(2, mlir::Float64Type::get(context))};
2096 
2097   auto mArg0{builder.createConvert(loc, mlirTy, argBases[0])};
2098   auto mArg1{builder.createConvert(loc, mlirTy, argBases[1])};
2099 
2100   switch (vop) {
2101   case VecOp::Perm: {
2102     VecTypeInfo maskVecTyInfo{getVecTypeFromFir(argBases[2])};
2103     auto mlirMaskTy{maskVecTyInfo.toMlirVectorType(context)};
2104     auto mMask{builder.createConvert(loc, mlirMaskTy, argBases[2])};
2105 
2106     if (mlirTy != vi32Ty) {
2107       mArg0 =
2108           builder.create<mlir::LLVM::BitcastOp>(loc, vi32Ty, mArg0).getResult();
2109       mArg1 =
2110           builder.create<mlir::LLVM::BitcastOp>(loc, vi32Ty, mArg1).getResult();
2111     }
2112 
2113     auto funcOp{builder.createFunction(
2114         loc, "llvm.ppc.altivec.vperm",
2115         genFuncType<Ty::IntegerVector<4>, Ty::IntegerVector<4>,
2116                     Ty::IntegerVector<4>, Ty::IntegerVector<1>>(context,
2117                                                                 builder))};
2118 
2119     llvm::SmallVector<mlir::Value> newArgs;
2120     if (isNativeVecElemOrderOnLE()) {
2121       auto i8Ty{mlir::IntegerType::get(context, 8)};
2122       auto v8Ty{mlir::VectorType::get(16, i8Ty)};
2123       auto negOne{builder.createMinusOneInteger(loc, i8Ty)};
2124       auto vNegOne{
2125           builder.create<mlir::vector::BroadcastOp>(loc, v8Ty, negOne)};
2126 
2127       mMask = builder.create<mlir::arith::XOrIOp>(loc, mMask, vNegOne);
2128       newArgs = {mArg1, mArg0, mMask};
2129     } else {
2130       newArgs = {mArg0, mArg1, mMask};
2131     }
2132 
2133     auto res{builder.create<fir::CallOp>(loc, funcOp, newArgs).getResult(0)};
2134 
2135     if (res.getType() != argTypes[0]) {
2136       // fir.call llvm.ppc.altivec.vperm returns !fir.vector<i4:32>
2137       // convert the result back to the original type
2138       res = builder.createConvert(loc, vi32Ty, res);
2139       if (mlirTy != vi32Ty)
2140         res =
2141             builder.create<mlir::LLVM::BitcastOp>(loc, mlirTy, res).getResult();
2142     }
2143     return builder.createConvert(loc, resultType, res);
2144   }
2145   case VecOp::Permi: {
2146     // arg3 is a constant
2147     auto constIntOp{mlir::dyn_cast_or_null<mlir::IntegerAttr>(
2148         mlir::dyn_cast<mlir::arith::ConstantOp>(argBases[2].getDefiningOp())
2149             .getValue())};
2150     assert(constIntOp && "expected integer constant argument");
2151     auto constInt{constIntOp.getInt()};
2152     // arg1, arg2, and result type share same VecTypeInfo
2153     if (vecTyInfo.isFloat()) {
2154       mArg0 =
2155           builder.create<mlir::LLVM::BitcastOp>(loc, vf64Ty, mArg0).getResult();
2156       mArg1 =
2157           builder.create<mlir::LLVM::BitcastOp>(loc, vf64Ty, mArg1).getResult();
2158     }
2159 
2160     llvm::SmallVector<int64_t, 2> nMask; // native vector element order mask
2161     llvm::SmallVector<int64_t, 2> rMask; // non-native vector element order mask
2162     enum { V1 = 0, V2 = 2 };
2163     switch (constInt) {
2164     case 0:
2165       nMask = {V1 + 0, V2 + 0};
2166       rMask = {V2 + 1, V1 + 1};
2167       break;
2168     case 1:
2169       nMask = {V1 + 0, V2 + 1};
2170       rMask = {V2 + 0, V1 + 1};
2171       break;
2172     case 2:
2173       nMask = {V1 + 1, V2 + 0};
2174       rMask = {V2 + 1, V1 + 0};
2175       break;
2176     case 3:
2177       nMask = {V1 + 1, V2 + 1};
2178       rMask = {V2 + 0, V1 + 0};
2179       break;
2180     default:
2181       llvm_unreachable("unexpected arg3 value for vec_permi");
2182     }
2183 
2184     llvm::SmallVector<int64_t, 2> mask =
2185         (isBEVecElemOrderOnLE()) ? rMask : nMask;
2186     auto res{builder.create<mlir::vector::ShuffleOp>(loc, mArg0, mArg1, mask)};
2187     if (res.getType() != mlirTy) {
2188       auto cast{builder.create<mlir::LLVM::BitcastOp>(loc, mlirTy, res)};
2189       return builder.createConvert(loc, resultType, cast);
2190     }
2191     return builder.createConvert(loc, resultType, res);
2192   }
2193   default:
2194     llvm_unreachable("invalid vector operation for generator");
2195   }
2196 }
2197 
2198 // VEC_SEL
2199 fir::ExtendedValue
2200 PPCIntrinsicLibrary::genVecSel(mlir::Type resultType,
2201                                llvm::ArrayRef<fir::ExtendedValue> args) {
2202   assert(args.size() == 3);
2203   auto argBases{getBasesForArgs(args)};
2204   llvm::SmallVector<VecTypeInfo, 4> vecTyInfos;
2205   for (size_t i = 0; i < argBases.size(); i++) {
2206     vecTyInfos.push_back(getVecTypeFromFir(argBases[i]));
2207   }
2208   auto vargs{convertVecArgs(builder, loc, vecTyInfos, argBases)};
2209 
2210   auto i8Ty{mlir::IntegerType::get(builder.getContext(), 8)};
2211   auto negOne{builder.createMinusOneInteger(loc, i8Ty)};
2212 
2213   // construct a constant <16 x i8> vector with value -1 for bitcast
2214   auto bcVecTy{mlir::VectorType::get(16, i8Ty)};
2215   auto vNegOne{builder.create<mlir::vector::BroadcastOp>(loc, bcVecTy, negOne)};
2216 
2217   // bitcast arguments to bcVecTy
2218   auto arg1{builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, vargs[0])};
2219   auto arg2{builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, vargs[1])};
2220   auto arg3{builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, vargs[2])};
2221 
2222   // vec_sel(arg1, arg2, arg3) =
2223   //   (arg2 and arg3) or (arg1 and (arg3 xor vector(-1,...)))
2224   auto comp{builder.create<mlir::arith::XOrIOp>(loc, arg3, vNegOne)};
2225   auto a1AndComp{builder.create<mlir::arith::AndIOp>(loc, arg1, comp)};
2226   auto a1OrA2{builder.create<mlir::arith::AndIOp>(loc, arg2, arg3)};
2227   auto res{builder.create<mlir::arith::OrIOp>(loc, a1AndComp, a1OrA2)};
2228 
2229   auto bcRes{
2230       builder.create<mlir::vector::BitCastOp>(loc, vargs[0].getType(), res)};
2231 
2232   return builder.createConvert(loc, vecTyInfos[0].toFirVectorType(), bcRes);
2233 }
2234 
2235 // VEC_SL, VEC_SLD, VEC_SLDW, VEC_SLL, VEC_SLO, VEC_SR, VEC_SRL, VEC_SRO
2236 template <VecOp vop>
2237 fir::ExtendedValue
2238 PPCIntrinsicLibrary::genVecShift(mlir::Type resultType,
2239                                  llvm::ArrayRef<fir::ExtendedValue> args) {
2240   auto context{builder.getContext()};
2241   auto argBases{getBasesForArgs(args)};
2242   auto argTypes{getTypesForArgs(argBases)};
2243 
2244   llvm::SmallVector<VecTypeInfo, 2> vecTyInfoArgs;
2245   vecTyInfoArgs.push_back(getVecTypeFromFir(argBases[0]));
2246   vecTyInfoArgs.push_back(getVecTypeFromFir(argBases[1]));
2247 
2248   // Convert the first two arguments to MLIR vectors
2249   llvm::SmallVector<mlir::Type, 2> mlirTyArgs;
2250   mlirTyArgs.push_back(vecTyInfoArgs[0].toMlirVectorType(context));
2251   mlirTyArgs.push_back(vecTyInfoArgs[1].toMlirVectorType(context));
2252 
2253   llvm::SmallVector<mlir::Value, 2> mlirVecArgs;
2254   mlirVecArgs.push_back(builder.createConvert(loc, mlirTyArgs[0], argBases[0]));
2255   mlirVecArgs.push_back(builder.createConvert(loc, mlirTyArgs[1], argBases[1]));
2256 
2257   mlir::Value shftRes{nullptr};
2258 
2259   if (vop == VecOp::Sl || vop == VecOp::Sr) {
2260     assert(args.size() == 2);
2261     // Construct the mask
2262     auto width{
2263         mlir::dyn_cast<mlir::IntegerType>(vecTyInfoArgs[1].eleTy).getWidth()};
2264     auto vecVal{builder.createIntegerConstant(
2265         loc, getConvertedElementType(context, vecTyInfoArgs[0].eleTy), width)};
2266     auto mask{
2267         builder.create<mlir::vector::BroadcastOp>(loc, mlirTyArgs[1], vecVal)};
2268     auto shft{builder.create<mlir::arith::RemUIOp>(loc, mlirVecArgs[1], mask)};
2269 
2270     mlir::Value res{nullptr};
2271     if (vop == VecOp::Sr)
2272       res = builder.create<mlir::arith::ShRUIOp>(loc, mlirVecArgs[0], shft);
2273     else if (vop == VecOp::Sl)
2274       res = builder.create<mlir::arith::ShLIOp>(loc, mlirVecArgs[0], shft);
2275 
2276     shftRes = builder.createConvert(loc, argTypes[0], res);
2277   } else if (vop == VecOp::Sll || vop == VecOp::Slo || vop == VecOp::Srl ||
2278              vop == VecOp::Sro) {
2279     assert(args.size() == 2);
2280 
2281     // Bitcast to vector<4xi32>
2282     auto bcVecTy{mlir::VectorType::get(4, builder.getIntegerType(32))};
2283     if (mlirTyArgs[0] != bcVecTy)
2284       mlirVecArgs[0] =
2285           builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, mlirVecArgs[0]);
2286     if (mlirTyArgs[1] != bcVecTy)
2287       mlirVecArgs[1] =
2288           builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, mlirVecArgs[1]);
2289 
2290     llvm::StringRef funcName;
2291     switch (vop) {
2292     case VecOp::Srl:
2293       funcName = "llvm.ppc.altivec.vsr";
2294       break;
2295     case VecOp::Sro:
2296       funcName = "llvm.ppc.altivec.vsro";
2297       break;
2298     case VecOp::Sll:
2299       funcName = "llvm.ppc.altivec.vsl";
2300       break;
2301     case VecOp::Slo:
2302       funcName = "llvm.ppc.altivec.vslo";
2303       break;
2304     default:
2305       llvm_unreachable("unknown vector shift operation");
2306     }
2307     auto funcTy{genFuncType<Ty::IntegerVector<4>, Ty::IntegerVector<4>,
2308                             Ty::IntegerVector<4>>(context, builder)};
2309     mlir::func::FuncOp funcOp{builder.createFunction(loc, funcName, funcTy)};
2310     auto callOp{builder.create<fir::CallOp>(loc, funcOp, mlirVecArgs)};
2311 
2312     // If the result vector type is different from the original type, need
2313     // to convert to mlir vector, bitcast and then convert back to fir vector.
2314     if (callOp.getResult(0).getType() != argTypes[0]) {
2315       auto res = builder.createConvert(loc, bcVecTy, callOp.getResult(0));
2316       res = builder.create<mlir::vector::BitCastOp>(loc, mlirTyArgs[0], res);
2317       shftRes = builder.createConvert(loc, argTypes[0], res);
2318     } else {
2319       shftRes = callOp.getResult(0);
2320     }
2321   } else if (vop == VecOp::Sld || vop == VecOp::Sldw) {
2322     assert(args.size() == 3);
2323     auto constIntOp = mlir::dyn_cast_or_null<mlir::IntegerAttr>(
2324         mlir::dyn_cast<mlir::arith::ConstantOp>(argBases[2].getDefiningOp())
2325             .getValue());
2326     assert(constIntOp && "expected integer constant argument");
2327 
2328     // Bitcast to vector<16xi8>
2329     auto vi8Ty{mlir::VectorType::get(16, builder.getIntegerType(8))};
2330     if (mlirTyArgs[0] != vi8Ty) {
2331       mlirVecArgs[0] =
2332           builder.create<mlir::LLVM::BitcastOp>(loc, vi8Ty, mlirVecArgs[0])
2333               .getResult();
2334       mlirVecArgs[1] =
2335           builder.create<mlir::LLVM::BitcastOp>(loc, vi8Ty, mlirVecArgs[1])
2336               .getResult();
2337     }
2338 
2339     // Construct the mask for shuffling
2340     auto shiftVal{constIntOp.getInt()};
2341     if (vop == VecOp::Sldw)
2342       shiftVal = shiftVal << 2;
2343     shiftVal &= 0xF;
2344     llvm::SmallVector<int64_t, 16> mask;
2345     // Shuffle with mask based on the endianness
2346     const auto triple{fir::getTargetTriple(builder.getModule())};
2347     if (triple.isLittleEndian()) {
2348       for (int i = 16; i < 32; ++i)
2349         mask.push_back(i - shiftVal);
2350       shftRes = builder.create<mlir::vector::ShuffleOp>(loc, mlirVecArgs[1],
2351                                                         mlirVecArgs[0], mask);
2352     } else {
2353       for (int i = 0; i < 16; ++i)
2354         mask.push_back(i + shiftVal);
2355       shftRes = builder.create<mlir::vector::ShuffleOp>(loc, mlirVecArgs[0],
2356                                                         mlirVecArgs[1], mask);
2357     }
2358 
2359     // Bitcast to the original type
2360     if (shftRes.getType() != mlirTyArgs[0])
2361       shftRes =
2362           builder.create<mlir::LLVM::BitcastOp>(loc, mlirTyArgs[0], shftRes);
2363 
2364     return builder.createConvert(loc, resultType, shftRes);
2365   } else
2366     llvm_unreachable("Invalid vector operation for generator");
2367 
2368   return shftRes;
2369 }
2370 
2371 // VEC_SPLAT, VEC_SPLATS, VEC_SPLAT_S32
2372 template <VecOp vop>
2373 fir::ExtendedValue
2374 PPCIntrinsicLibrary::genVecSplat(mlir::Type resultType,
2375                                  llvm::ArrayRef<fir::ExtendedValue> args) {
2376   auto context{builder.getContext()};
2377   auto argBases{getBasesForArgs(args)};
2378 
2379   mlir::vector::SplatOp splatOp{nullptr};
2380   mlir::Type retTy{nullptr};
2381   switch (vop) {
2382   case VecOp::Splat: {
2383     assert(args.size() == 2);
2384     auto vecTyInfo{getVecTypeFromFir(argBases[0])};
2385 
2386     auto extractOp{genVecExtract(resultType, args)};
2387     splatOp = builder.create<mlir::vector::SplatOp>(
2388         loc, *(extractOp.getUnboxed()), vecTyInfo.toMlirVectorType(context));
2389     retTy = vecTyInfo.toFirVectorType();
2390     break;
2391   }
2392   case VecOp::Splats: {
2393     assert(args.size() == 1);
2394     auto vecTyInfo{getVecTypeFromEle(argBases[0])};
2395 
2396     splatOp = builder.create<mlir::vector::SplatOp>(
2397         loc, argBases[0], vecTyInfo.toMlirVectorType(context));
2398     retTy = vecTyInfo.toFirVectorType();
2399     break;
2400   }
2401   case VecOp::Splat_s32: {
2402     assert(args.size() == 1);
2403     auto eleTy{builder.getIntegerType(32)};
2404     auto intOp{builder.createConvert(loc, eleTy, argBases[0])};
2405 
2406     // the intrinsic always returns vector(integer(4))
2407     splatOp = builder.create<mlir::vector::SplatOp>(
2408         loc, intOp, mlir::VectorType::get(4, eleTy));
2409     retTy = fir::VectorType::get(4, eleTy);
2410     break;
2411   }
2412   default:
2413     llvm_unreachable("invalid vector operation for generator");
2414   }
2415   return builder.createConvert(loc, retTy, splatOp);
2416 }
2417 
2418 fir::ExtendedValue
2419 PPCIntrinsicLibrary::genVecXlds(mlir::Type resultType,
2420                                 llvm::ArrayRef<fir::ExtendedValue> args) {
2421   assert(args.size() == 2);
2422   auto arg0{getBase(args[0])};
2423   auto arg1{getBase(args[1])};
2424 
2425   // Prepare the return type in FIR.
2426   auto vecTyInfo{getVecTypeFromFirType(resultType)};
2427   auto mlirTy{vecTyInfo.toMlirVectorType(builder.getContext())};
2428   auto firTy{vecTyInfo.toFirVectorType()};
2429 
2430   // Add the %val of arg0 to %addr of arg1
2431   auto addr{addOffsetToAddress(builder, loc, arg1, arg0)};
2432 
2433   auto i64Ty{mlir::IntegerType::get(builder.getContext(), 64)};
2434   auto i64VecTy{mlir::VectorType::get(2, i64Ty)};
2435   auto i64RefTy{builder.getRefType(i64Ty)};
2436   auto addrConv{builder.create<fir::ConvertOp>(loc, i64RefTy, addr)};
2437 
2438   auto addrVal{builder.create<fir::LoadOp>(loc, addrConv)};
2439   auto splatRes{builder.create<mlir::vector::SplatOp>(loc, addrVal, i64VecTy)};
2440 
2441   mlir::Value result{nullptr};
2442   if (mlirTy != splatRes.getType()) {
2443     result = builder.create<mlir::vector::BitCastOp>(loc, mlirTy, splatRes);
2444   } else
2445     result = splatRes;
2446 
2447   return builder.createConvert(loc, firTy, result);
2448 }
2449 
2450 const char *getMmaIrIntrName(MMAOp mmaOp) {
2451   switch (mmaOp) {
2452   case MMAOp::AssembleAcc:
2453     return "llvm.ppc.mma.assemble.acc";
2454   case MMAOp::AssemblePair:
2455     return "llvm.ppc.vsx.assemble.pair";
2456   case MMAOp::DisassembleAcc:
2457     return "llvm.ppc.mma.disassemble.acc";
2458   case MMAOp::DisassemblePair:
2459     return "llvm.ppc.vsx.disassemble.pair";
2460   case MMAOp::Xxmfacc:
2461     return "llvm.ppc.mma.xxmfacc";
2462   case MMAOp::Xxmtacc:
2463     return "llvm.ppc.mma.xxmtacc";
2464   case MMAOp::Xxsetaccz:
2465     return "llvm.ppc.mma.xxsetaccz";
2466   case MMAOp::Pmxvbf16ger2:
2467     return "llvm.ppc.mma.pmxvbf16ger2";
2468   case MMAOp::Pmxvbf16ger2nn:
2469     return "llvm.ppc.mma.pmxvbf16ger2nn";
2470   case MMAOp::Pmxvbf16ger2np:
2471     return "llvm.ppc.mma.pmxvbf16ger2np";
2472   case MMAOp::Pmxvbf16ger2pn:
2473     return "llvm.ppc.mma.pmxvbf16ger2pn";
2474   case MMAOp::Pmxvbf16ger2pp:
2475     return "llvm.ppc.mma.pmxvbf16ger2pp";
2476   case MMAOp::Pmxvf16ger2:
2477     return "llvm.ppc.mma.pmxvf16ger2";
2478   case MMAOp::Pmxvf16ger2nn:
2479     return "llvm.ppc.mma.pmxvf16ger2nn";
2480   case MMAOp::Pmxvf16ger2np:
2481     return "llvm.ppc.mma.pmxvf16ger2np";
2482   case MMAOp::Pmxvf16ger2pn:
2483     return "llvm.ppc.mma.pmxvf16ger2pn";
2484   case MMAOp::Pmxvf16ger2pp:
2485     return "llvm.ppc.mma.pmxvf16ger2pp";
2486   case MMAOp::Pmxvf32ger:
2487     return "llvm.ppc.mma.pmxvf32ger";
2488   case MMAOp::Pmxvf32gernn:
2489     return "llvm.ppc.mma.pmxvf32gernn";
2490   case MMAOp::Pmxvf32gernp:
2491     return "llvm.ppc.mma.pmxvf32gernp";
2492   case MMAOp::Pmxvf32gerpn:
2493     return "llvm.ppc.mma.pmxvf32gerpn";
2494   case MMAOp::Pmxvf32gerpp:
2495     return "llvm.ppc.mma.pmxvf32gerpp";
2496   case MMAOp::Pmxvf64ger:
2497     return "llvm.ppc.mma.pmxvf64ger";
2498   case MMAOp::Pmxvf64gernn:
2499     return "llvm.ppc.mma.pmxvf64gernn";
2500   case MMAOp::Pmxvf64gernp:
2501     return "llvm.ppc.mma.pmxvf64gernp";
2502   case MMAOp::Pmxvf64gerpn:
2503     return "llvm.ppc.mma.pmxvf64gerpn";
2504   case MMAOp::Pmxvf64gerpp:
2505     return "llvm.ppc.mma.pmxvf64gerpp";
2506   case MMAOp::Pmxvi16ger2:
2507     return "llvm.ppc.mma.pmxvi16ger2";
2508   case MMAOp::Pmxvi16ger2pp:
2509     return "llvm.ppc.mma.pmxvi16ger2pp";
2510   case MMAOp::Pmxvi16ger2s:
2511     return "llvm.ppc.mma.pmxvi16ger2s";
2512   case MMAOp::Pmxvi16ger2spp:
2513     return "llvm.ppc.mma.pmxvi16ger2spp";
2514   case MMAOp::Pmxvi4ger8:
2515     return "llvm.ppc.mma.pmxvi4ger8";
2516   case MMAOp::Pmxvi4ger8pp:
2517     return "llvm.ppc.mma.pmxvi4ger8pp";
2518   case MMAOp::Pmxvi8ger4:
2519     return "llvm.ppc.mma.pmxvi8ger4";
2520   case MMAOp::Pmxvi8ger4pp:
2521     return "llvm.ppc.mma.pmxvi8ger4pp";
2522   case MMAOp::Pmxvi8ger4spp:
2523     return "llvm.ppc.mma.pmxvi8ger4spp";
2524   case MMAOp::Xvbf16ger2:
2525     return "llvm.ppc.mma.xvbf16ger2";
2526   case MMAOp::Xvbf16ger2nn:
2527     return "llvm.ppc.mma.xvbf16ger2nn";
2528   case MMAOp::Xvbf16ger2np:
2529     return "llvm.ppc.mma.xvbf16ger2np";
2530   case MMAOp::Xvbf16ger2pn:
2531     return "llvm.ppc.mma.xvbf16ger2pn";
2532   case MMAOp::Xvbf16ger2pp:
2533     return "llvm.ppc.mma.xvbf16ger2pp";
2534   case MMAOp::Xvf16ger2:
2535     return "llvm.ppc.mma.xvf16ger2";
2536   case MMAOp::Xvf16ger2nn:
2537     return "llvm.ppc.mma.xvf16ger2nn";
2538   case MMAOp::Xvf16ger2np:
2539     return "llvm.ppc.mma.xvf16ger2np";
2540   case MMAOp::Xvf16ger2pn:
2541     return "llvm.ppc.mma.xvf16ger2pn";
2542   case MMAOp::Xvf16ger2pp:
2543     return "llvm.ppc.mma.xvf16ger2pp";
2544   case MMAOp::Xvf32ger:
2545     return "llvm.ppc.mma.xvf32ger";
2546   case MMAOp::Xvf32gernn:
2547     return "llvm.ppc.mma.xvf32gernn";
2548   case MMAOp::Xvf32gernp:
2549     return "llvm.ppc.mma.xvf32gernp";
2550   case MMAOp::Xvf32gerpn:
2551     return "llvm.ppc.mma.xvf32gerpn";
2552   case MMAOp::Xvf32gerpp:
2553     return "llvm.ppc.mma.xvf32gerpp";
2554   case MMAOp::Xvf64ger:
2555     return "llvm.ppc.mma.xvf64ger";
2556   case MMAOp::Xvf64gernn:
2557     return "llvm.ppc.mma.xvf64gernn";
2558   case MMAOp::Xvf64gernp:
2559     return "llvm.ppc.mma.xvf64gernp";
2560   case MMAOp::Xvf64gerpn:
2561     return "llvm.ppc.mma.xvf64gerpn";
2562   case MMAOp::Xvf64gerpp:
2563     return "llvm.ppc.mma.xvf64gerpp";
2564   case MMAOp::Xvi16ger2:
2565     return "llvm.ppc.mma.xvi16ger2";
2566   case MMAOp::Xvi16ger2pp:
2567     return "llvm.ppc.mma.xvi16ger2pp";
2568   case MMAOp::Xvi16ger2s:
2569     return "llvm.ppc.mma.xvi16ger2s";
2570   case MMAOp::Xvi16ger2spp:
2571     return "llvm.ppc.mma.xvi16ger2spp";
2572   case MMAOp::Xvi4ger8:
2573     return "llvm.ppc.mma.xvi4ger8";
2574   case MMAOp::Xvi4ger8pp:
2575     return "llvm.ppc.mma.xvi4ger8pp";
2576   case MMAOp::Xvi8ger4:
2577     return "llvm.ppc.mma.xvi8ger4";
2578   case MMAOp::Xvi8ger4pp:
2579     return "llvm.ppc.mma.xvi8ger4pp";
2580   case MMAOp::Xvi8ger4spp:
2581     return "llvm.ppc.mma.xvi8ger4spp";
2582   }
2583   llvm_unreachable("getMmaIrIntrName");
2584 }
2585 
2586 mlir::FunctionType getMmaIrFuncType(mlir::MLIRContext *context, MMAOp mmaOp) {
2587   switch (mmaOp) {
2588   case MMAOp::AssembleAcc:
2589     return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 4);
2590   case MMAOp::AssemblePair:
2591     return genMmaVpFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
2592   case MMAOp::DisassembleAcc:
2593     return genMmaDisassembleFuncType(context, mmaOp);
2594   case MMAOp::DisassemblePair:
2595     return genMmaDisassembleFuncType(context, mmaOp);
2596   case MMAOp::Xxmfacc:
2597     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 0);
2598   case MMAOp::Xxmtacc:
2599     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 0);
2600   case MMAOp::Xxsetaccz:
2601     return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 0);
2602   case MMAOp::Pmxvbf16ger2:
2603     return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2,
2604                             /*Integer*/ 3);
2605   case MMAOp::Pmxvbf16ger2nn:
2606     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2607                             /*Integer*/ 3);
2608   case MMAOp::Pmxvbf16ger2np:
2609     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2610                             /*Integer*/ 3);
2611   case MMAOp::Pmxvbf16ger2pn:
2612     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2613                             /*Integer*/ 3);
2614   case MMAOp::Pmxvbf16ger2pp:
2615     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2616                             /*Integer*/ 3);
2617   case MMAOp::Pmxvf16ger2:
2618     return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2,
2619                             /*Integer*/ 3);
2620   case MMAOp::Pmxvf16ger2nn:
2621     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2622                             /*Integer*/ 3);
2623   case MMAOp::Pmxvf16ger2np:
2624     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2625                             /*Integer*/ 3);
2626   case MMAOp::Pmxvf16ger2pn:
2627     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2628                             /*Integer*/ 3);
2629   case MMAOp::Pmxvf16ger2pp:
2630     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2631                             /*Integer*/ 3);
2632   case MMAOp::Pmxvf32ger:
2633     return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2,
2634                             /*Integer*/ 2);
2635   case MMAOp::Pmxvf32gernn:
2636     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2637                             /*Integer*/ 2);
2638   case MMAOp::Pmxvf32gernp:
2639     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2640                             /*Integer*/ 2);
2641   case MMAOp::Pmxvf32gerpn:
2642     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2643                             /*Integer*/ 2);
2644   case MMAOp::Pmxvf32gerpp:
2645     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2646                             /*Integer*/ 2);
2647   case MMAOp::Pmxvf64ger:
2648     return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 1, /*Vector*/ 1,
2649                             /*Integer*/ 2);
2650   case MMAOp::Pmxvf64gernn:
2651     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1,
2652                             /*Integer*/ 2);
2653   case MMAOp::Pmxvf64gernp:
2654     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1,
2655                             /*Integer*/ 2);
2656   case MMAOp::Pmxvf64gerpn:
2657     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1,
2658                             /*Integer*/ 2);
2659   case MMAOp::Pmxvf64gerpp:
2660     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1,
2661                             /*Integer*/ 2);
2662   case MMAOp::Pmxvi16ger2:
2663     return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2,
2664                             /*Integer*/ 3);
2665   case MMAOp::Pmxvi16ger2pp:
2666     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2667                             /*Integer*/ 3);
2668   case MMAOp::Pmxvi16ger2s:
2669     return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2,
2670                             /*Integer*/ 3);
2671   case MMAOp::Pmxvi16ger2spp:
2672     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2673                             /*Integer*/ 3);
2674   case MMAOp::Pmxvi4ger8:
2675     return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2,
2676                             /*Integer*/ 3);
2677   case MMAOp::Pmxvi4ger8pp:
2678     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2679                             /*Integer*/ 3);
2680   case MMAOp::Pmxvi8ger4:
2681     return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2,
2682                             /*Integer*/ 3);
2683   case MMAOp::Pmxvi8ger4pp:
2684     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2685                             /*Integer*/ 3);
2686   case MMAOp::Pmxvi8ger4spp:
2687     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2,
2688                             /*Integer*/ 3);
2689   case MMAOp::Xvbf16ger2:
2690     return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
2691   case MMAOp::Xvbf16ger2nn:
2692     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2693   case MMAOp::Xvbf16ger2np:
2694     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2695   case MMAOp::Xvbf16ger2pn:
2696     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2697   case MMAOp::Xvbf16ger2pp:
2698     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2699   case MMAOp::Xvf16ger2:
2700     return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
2701   case MMAOp::Xvf16ger2nn:
2702     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2703   case MMAOp::Xvf16ger2np:
2704     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2705   case MMAOp::Xvf16ger2pn:
2706     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2707   case MMAOp::Xvf16ger2pp:
2708     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2709   case MMAOp::Xvf32ger:
2710     return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
2711   case MMAOp::Xvf32gernn:
2712     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2713   case MMAOp::Xvf32gernp:
2714     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2715   case MMAOp::Xvf32gerpn:
2716     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2717   case MMAOp::Xvf32gerpp:
2718     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2719   case MMAOp::Xvf64ger:
2720     return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 1, /*Vector*/ 1);
2721   case MMAOp::Xvf64gernn:
2722     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1);
2723   case MMAOp::Xvf64gernp:
2724     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1);
2725   case MMAOp::Xvf64gerpn:
2726     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1);
2727   case MMAOp::Xvf64gerpp:
2728     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1);
2729   case MMAOp::Xvi16ger2:
2730     return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
2731   case MMAOp::Xvi16ger2pp:
2732     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2733   case MMAOp::Xvi16ger2s:
2734     return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
2735   case MMAOp::Xvi16ger2spp:
2736     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2737   case MMAOp::Xvi4ger8:
2738     return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
2739   case MMAOp::Xvi4ger8pp:
2740     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2741   case MMAOp::Xvi8ger4:
2742     return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2);
2743   case MMAOp::Xvi8ger4pp:
2744     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2745   case MMAOp::Xvi8ger4spp:
2746     return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2);
2747   }
2748   llvm_unreachable("getMmaIrFuncType");
2749 }
2750 
2751 template <MMAOp IntrId, MMAHandlerOp HandlerOp>
2752 void PPCIntrinsicLibrary::genMmaIntr(llvm::ArrayRef<fir::ExtendedValue> args) {
2753   auto context{builder.getContext()};
2754   mlir::FunctionType intrFuncType{getMmaIrFuncType(context, IntrId)};
2755   mlir::func::FuncOp funcOp{
2756       builder.createFunction(loc, getMmaIrIntrName(IntrId), intrFuncType)};
2757   llvm::SmallVector<mlir::Value> intrArgs;
2758 
2759   // Depending on SubToFunc, change the subroutine call to a function call.
2760   // First argument represents the result. Rest of the arguments
2761   // are shifted one position to form the actual argument list.
2762   size_t argStart{0};
2763   size_t argStep{1};
2764   size_t e{args.size()};
2765   if (HandlerOp == MMAHandlerOp::SubToFunc) {
2766     // The first argument becomes function result. Start from the second
2767     // argument.
2768     argStart = 1;
2769   } else if (HandlerOp == MMAHandlerOp::SubToFuncReverseArgOnLE) {
2770     // Reverse argument order on little-endian target only.
2771     // The reversal does not depend on the setting of non-native-order option.
2772     const auto triple{fir::getTargetTriple(builder.getModule())};
2773     if (triple.isLittleEndian()) {
2774       // Load the arguments in reverse order.
2775       argStart = args.size() - 1;
2776       // The first argument becomes function result. Stop at the second
2777       // argument.
2778       e = 0;
2779       argStep = -1;
2780     } else {
2781       // Load the arguments in natural order.
2782       // The first argument becomes function result. Start from the second
2783       // argument.
2784       argStart = 1;
2785     }
2786   }
2787 
2788   for (size_t i = argStart, j = 0; i != e; i += argStep, ++j) {
2789     auto v{fir::getBase(args[i])};
2790     if (i == 0 && HandlerOp == MMAHandlerOp::FirstArgIsResult) {
2791       // First argument is passed in as an address. We need to load
2792       // the content to match the LLVM interface.
2793       v = builder.create<fir::LoadOp>(loc, v);
2794     }
2795     auto vType{v.getType()};
2796     mlir::Type targetType{intrFuncType.getInput(j)};
2797     if (vType != targetType) {
2798       if (mlir::isa<mlir::VectorType>(targetType)) {
2799         // Perform vector type conversion for arguments passed by value.
2800         auto eleTy{mlir::dyn_cast<fir::VectorType>(vType).getElementType()};
2801         auto len{mlir::dyn_cast<fir::VectorType>(vType).getLen()};
2802         mlir::VectorType mlirType = mlir::VectorType::get(len, eleTy);
2803         auto v0{builder.createConvert(loc, mlirType, v)};
2804         auto v1{builder.create<mlir::vector::BitCastOp>(loc, targetType, v0)};
2805         intrArgs.push_back(v1);
2806       } else if (mlir::isa<mlir::IntegerType>(targetType) &&
2807                  mlir::isa<mlir::IntegerType>(vType)) {
2808         auto v0{builder.createConvert(loc, targetType, v)};
2809         intrArgs.push_back(v0);
2810       } else {
2811         llvm::errs() << "\nUnexpected type conversion requested: "
2812                      << " from " << vType << " to " << targetType << "\n";
2813         llvm_unreachable("Unsupported type conversion for argument to PowerPC "
2814                          "MMA intrinsic");
2815       }
2816     } else {
2817       intrArgs.push_back(v);
2818     }
2819   }
2820   auto callSt{builder.create<fir::CallOp>(loc, funcOp, intrArgs)};
2821   if (HandlerOp == MMAHandlerOp::SubToFunc ||
2822       HandlerOp == MMAHandlerOp::SubToFuncReverseArgOnLE ||
2823       HandlerOp == MMAHandlerOp::FirstArgIsResult) {
2824     // Convert pointer type if needed.
2825     mlir::Value callResult{callSt.getResult(0)};
2826     mlir::Value destPtr{fir::getBase(args[0])};
2827     mlir::Type callResultPtrType{builder.getRefType(callResult.getType())};
2828     if (destPtr.getType() != callResultPtrType) {
2829       destPtr = builder.create<fir::ConvertOp>(loc, callResultPtrType, destPtr);
2830     }
2831     // Copy the result.
2832     builder.create<fir::StoreOp>(loc, callResult, destPtr);
2833   }
2834 }
2835 
2836 // VEC_ST, VEC_STE
2837 template <VecOp vop>
2838 void PPCIntrinsicLibrary::genVecStore(llvm::ArrayRef<fir::ExtendedValue> args) {
2839   assert(args.size() == 3);
2840 
2841   auto context{builder.getContext()};
2842   auto argBases{getBasesForArgs(args)};
2843   auto arg1TyInfo{getVecTypeFromFir(argBases[0])};
2844 
2845   auto addr{addOffsetToAddress(builder, loc, argBases[2], argBases[1])};
2846 
2847   llvm::StringRef fname{};
2848   mlir::VectorType stTy{nullptr};
2849   auto i32ty{mlir::IntegerType::get(context, 32)};
2850   switch (vop) {
2851   case VecOp::St:
2852     stTy = mlir::VectorType::get(4, i32ty);
2853     fname = "llvm.ppc.altivec.stvx";
2854     break;
2855   case VecOp::Ste: {
2856     const auto width{arg1TyInfo.eleTy.getIntOrFloatBitWidth()};
2857     const auto len{arg1TyInfo.len};
2858 
2859     if (arg1TyInfo.isFloat32()) {
2860       stTy = mlir::VectorType::get(len, i32ty);
2861       fname = "llvm.ppc.altivec.stvewx";
2862     } else if (mlir::isa<mlir::IntegerType>(arg1TyInfo.eleTy)) {
2863       stTy = mlir::VectorType::get(len, mlir::IntegerType::get(context, width));
2864 
2865       switch (width) {
2866       case 8:
2867         fname = "llvm.ppc.altivec.stvebx";
2868         break;
2869       case 16:
2870         fname = "llvm.ppc.altivec.stvehx";
2871         break;
2872       case 32:
2873         fname = "llvm.ppc.altivec.stvewx";
2874         break;
2875       default:
2876         assert(false && "invalid element size");
2877       }
2878     } else
2879       assert(false && "unknown type");
2880     break;
2881   }
2882   case VecOp::Stxvp:
2883     // __vector_pair type
2884     stTy = mlir::VectorType::get(256, mlir::IntegerType::get(context, 1));
2885     fname = "llvm.ppc.vsx.stxvp";
2886     break;
2887   default:
2888     llvm_unreachable("invalid vector operation for generator");
2889   }
2890 
2891   auto funcType{
2892       mlir::FunctionType::get(context, {stTy, addr.getType()}, std::nullopt)};
2893   mlir::func::FuncOp funcOp = builder.createFunction(loc, fname, funcType);
2894 
2895   llvm::SmallVector<mlir::Value, 4> biArgs;
2896 
2897   if (vop == VecOp::Stxvp) {
2898     biArgs.push_back(argBases[0]);
2899     biArgs.push_back(addr);
2900     builder.create<fir::CallOp>(loc, funcOp, biArgs);
2901     return;
2902   }
2903 
2904   auto vecTyInfo{getVecTypeFromFirType(argBases[0].getType())};
2905   auto cnv{builder.createConvert(loc, vecTyInfo.toMlirVectorType(context),
2906                                  argBases[0])};
2907 
2908   mlir::Value newArg1{nullptr};
2909   if (stTy != arg1TyInfo.toMlirVectorType(context))
2910     newArg1 = builder.create<mlir::vector::BitCastOp>(loc, stTy, cnv);
2911   else
2912     newArg1 = cnv;
2913 
2914   if (isBEVecElemOrderOnLE())
2915     newArg1 = builder.createConvert(
2916         loc, stTy, reverseVectorElements(builder, loc, newArg1, 4));
2917 
2918   biArgs.push_back(newArg1);
2919   biArgs.push_back(addr);
2920 
2921   builder.create<fir::CallOp>(loc, funcOp, biArgs);
2922 }
2923 
2924 // VEC_XST, VEC_XST_BE, VEC_STXV, VEC_XSTD2, VEC_XSTW4
2925 template <VecOp vop>
2926 void PPCIntrinsicLibrary::genVecXStore(
2927     llvm::ArrayRef<fir::ExtendedValue> args) {
2928   assert(args.size() == 3);
2929   auto context{builder.getContext()};
2930   auto argBases{getBasesForArgs(args)};
2931   VecTypeInfo arg1TyInfo{getVecTypeFromFir(argBases[0])};
2932 
2933   auto addr{addOffsetToAddress(builder, loc, argBases[2], argBases[1])};
2934 
2935   mlir::Value trg{nullptr};
2936   mlir::Value src{nullptr};
2937 
2938   switch (vop) {
2939   case VecOp::Xst:
2940   case VecOp::Xst_be: {
2941     src = argBases[0];
2942     trg = builder.createConvert(loc, builder.getRefType(argBases[0].getType()),
2943                                 addr);
2944 
2945     if (vop == VecOp::Xst_be || isBEVecElemOrderOnLE()) {
2946       auto cnv{builder.createConvert(loc, arg1TyInfo.toMlirVectorType(context),
2947                                      argBases[0])};
2948       auto shf{reverseVectorElements(builder, loc, cnv, arg1TyInfo.len)};
2949 
2950       src = builder.createConvert(loc, arg1TyInfo.toFirVectorType(), shf);
2951     }
2952     break;
2953   }
2954   case VecOp::Xstd2:
2955   case VecOp::Xstw4: {
2956     // an 16-byte vector arg1 is treated as two 8-byte elements or
2957     // four 4-byte elements
2958     mlir::IntegerType elemTy;
2959     uint64_t numElem = (vop == VecOp::Xstd2) ? 2 : 4;
2960     elemTy = builder.getIntegerType(128 / numElem);
2961 
2962     mlir::VectorType mlirVecTy{mlir::VectorType::get(numElem, elemTy)};
2963     fir::VectorType firVecTy{fir::VectorType::get(numElem, elemTy)};
2964 
2965     auto cnv{builder.createConvert(loc, arg1TyInfo.toMlirVectorType(context),
2966                                    argBases[0])};
2967 
2968     mlir::Type srcTy{nullptr};
2969     if (numElem != arg1TyInfo.len) {
2970       cnv = builder.create<mlir::vector::BitCastOp>(loc, mlirVecTy, cnv);
2971       srcTy = firVecTy;
2972     } else {
2973       srcTy = arg1TyInfo.toFirVectorType();
2974     }
2975 
2976     trg = builder.createConvert(loc, builder.getRefType(srcTy), addr);
2977 
2978     if (isBEVecElemOrderOnLE()) {
2979       cnv = reverseVectorElements(builder, loc, cnv, numElem);
2980     }
2981 
2982     src = builder.createConvert(loc, srcTy, cnv);
2983     break;
2984   }
2985   case VecOp::Stxv:
2986     src = argBases[0];
2987     trg = builder.createConvert(loc, builder.getRefType(argBases[0].getType()),
2988                                 addr);
2989     break;
2990   default:
2991     assert(false && "Invalid vector operation for generator");
2992   }
2993   builder.create<fir::StoreOp>(loc, mlir::TypeRange{},
2994                                mlir::ValueRange{src, trg},
2995                                getAlignmentAttr(builder, 1));
2996 }
2997 
2998 } // namespace fir
2999