xref: /llvm-project/llvm/lib/IR/AutoUpgrade.cpp (revision 547bfda56b2e3f3a4c6d2357d3566dcd3fa996ad)
1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the auto-upgrade helper functions.
10 // This is where deprecated IR intrinsics and other IR features are updated to
11 // current specifications.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/IR/AutoUpgrade.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/BinaryFormat/Dwarf.h"
19 #include "llvm/IR/AttributeMask.h"
20 #include "llvm/IR/Constants.h"
21 #include "llvm/IR/DebugInfo.h"
22 #include "llvm/IR/DebugInfoMetadata.h"
23 #include "llvm/IR/DiagnosticInfo.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/IR/IRBuilder.h"
26 #include "llvm/IR/InstVisitor.h"
27 #include "llvm/IR/Instruction.h"
28 #include "llvm/IR/IntrinsicInst.h"
29 #include "llvm/IR/Intrinsics.h"
30 #include "llvm/IR/IntrinsicsAArch64.h"
31 #include "llvm/IR/IntrinsicsARM.h"
32 #include "llvm/IR/IntrinsicsNVPTX.h"
33 #include "llvm/IR/IntrinsicsRISCV.h"
34 #include "llvm/IR/IntrinsicsWebAssembly.h"
35 #include "llvm/IR/IntrinsicsX86.h"
36 #include "llvm/IR/LLVMContext.h"
37 #include "llvm/IR/MDBuilder.h"
38 #include "llvm/IR/Metadata.h"
39 #include "llvm/IR/Module.h"
40 #include "llvm/IR/Value.h"
41 #include "llvm/IR/Verifier.h"
42 #include "llvm/Support/AMDGPUAddrSpace.h"
43 #include "llvm/Support/CommandLine.h"
44 #include "llvm/Support/ErrorHandling.h"
45 #include "llvm/Support/Regex.h"
46 #include "llvm/TargetParser/Triple.h"
47 #include <cstring>
48 #include <numeric>
49 
50 using namespace llvm;
51 
52 static cl::opt<bool>
53     DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
54                                 cl::desc("Disable autoupgrade of debug info"));
55 
56 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
57 
58 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
59 // changed their type from v4f32 to v2i64.
60 static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID,
61                                   Function *&NewFn) {
62   // Check whether this is an old version of the function, which received
63   // v4f32 arguments.
64   Type *Arg0Type = F->getFunctionType()->getParamType(0);
65   if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
66     return false;
67 
68   // Yes, it's old, replace it with new version.
69   rename(F);
70   NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
71   return true;
72 }
73 
74 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
75 // arguments have changed their type from i32 to i8.
76 static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
77                                              Function *&NewFn) {
78   // Check that the last argument is an i32.
79   Type *LastArgType = F->getFunctionType()->getParamType(
80      F->getFunctionType()->getNumParams() - 1);
81   if (!LastArgType->isIntegerTy(32))
82     return false;
83 
84   // Move this function aside and map down.
85   rename(F);
86   NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
87   return true;
88 }
89 
90 // Upgrade the declaration of fp compare intrinsics that change return type
91 // from scalar to vXi1 mask.
92 static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID,
93                                       Function *&NewFn) {
94   // Check if the return type is a vector.
95   if (F->getReturnType()->isVectorTy())
96     return false;
97 
98   rename(F);
99   NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
100   return true;
101 }
102 
103 static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID,
104                                     Function *&NewFn) {
105   if (F->getReturnType()->getScalarType()->isBFloatTy())
106     return false;
107 
108   rename(F);
109   NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
110   return true;
111 }
112 
113 static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID,
114                                       Function *&NewFn) {
115   if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
116     return false;
117 
118   rename(F);
119   NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
120   return true;
121 }
122 
123 static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
124   // All of the intrinsics matches below should be marked with which llvm
125   // version started autoupgrading them. At some point in the future we would
126   // like to use this information to remove upgrade code for some older
127   // intrinsics. It is currently undecided how we will determine that future
128   // point.
129   if (Name.consume_front("avx."))
130     return (Name.starts_with("blend.p") ||        // Added in 3.7
131             Name == "cvt.ps2.pd.256" ||           // Added in 3.9
132             Name == "cvtdq2.pd.256" ||            // Added in 3.9
133             Name == "cvtdq2.ps.256" ||            // Added in 7.0
134             Name.starts_with("movnt.") ||         // Added in 3.2
135             Name.starts_with("sqrt.p") ||         // Added in 7.0
136             Name.starts_with("storeu.") ||        // Added in 3.9
137             Name.starts_with("vbroadcast.s") ||   // Added in 3.5
138             Name.starts_with("vbroadcastf128") || // Added in 4.0
139             Name.starts_with("vextractf128.") ||  // Added in 3.7
140             Name.starts_with("vinsertf128.") ||   // Added in 3.7
141             Name.starts_with("vperm2f128.") ||    // Added in 6.0
142             Name.starts_with("vpermil."));        // Added in 3.1
143 
144   if (Name.consume_front("avx2."))
145     return (Name == "movntdqa" ||             // Added in 5.0
146             Name.starts_with("pabs.") ||      // Added in 6.0
147             Name.starts_with("padds.") ||     // Added in 8.0
148             Name.starts_with("paddus.") ||    // Added in 8.0
149             Name.starts_with("pblendd.") ||   // Added in 3.7
150             Name == "pblendw" ||              // Added in 3.7
151             Name.starts_with("pbroadcast") || // Added in 3.8
152             Name.starts_with("pcmpeq.") ||    // Added in 3.1
153             Name.starts_with("pcmpgt.") ||    // Added in 3.1
154             Name.starts_with("pmax") ||       // Added in 3.9
155             Name.starts_with("pmin") ||       // Added in 3.9
156             Name.starts_with("pmovsx") ||     // Added in 3.9
157             Name.starts_with("pmovzx") ||     // Added in 3.9
158             Name == "pmul.dq" ||              // Added in 7.0
159             Name == "pmulu.dq" ||             // Added in 7.0
160             Name.starts_with("psll.dq") ||    // Added in 3.7
161             Name.starts_with("psrl.dq") ||    // Added in 3.7
162             Name.starts_with("psubs.") ||     // Added in 8.0
163             Name.starts_with("psubus.") ||    // Added in 8.0
164             Name.starts_with("vbroadcast") || // Added in 3.8
165             Name == "vbroadcasti128" ||       // Added in 3.7
166             Name == "vextracti128" ||         // Added in 3.7
167             Name == "vinserti128" ||          // Added in 3.7
168             Name == "vperm2i128");            // Added in 6.0
169 
170   if (Name.consume_front("avx512.")) {
171     if (Name.consume_front("mask."))
172       // 'avx512.mask.*'
173       return (Name.starts_with("add.p") ||       // Added in 7.0. 128/256 in 4.0
174               Name.starts_with("and.") ||        // Added in 3.9
175               Name.starts_with("andn.") ||       // Added in 3.9
176               Name.starts_with("broadcast.s") || // Added in 3.9
177               Name.starts_with("broadcastf32x4.") || // Added in 6.0
178               Name.starts_with("broadcastf32x8.") || // Added in 6.0
179               Name.starts_with("broadcastf64x2.") || // Added in 6.0
180               Name.starts_with("broadcastf64x4.") || // Added in 6.0
181               Name.starts_with("broadcasti32x4.") || // Added in 6.0
182               Name.starts_with("broadcasti32x8.") || // Added in 6.0
183               Name.starts_with("broadcasti64x2.") || // Added in 6.0
184               Name.starts_with("broadcasti64x4.") || // Added in 6.0
185               Name.starts_with("cmp.b") ||           // Added in 5.0
186               Name.starts_with("cmp.d") ||           // Added in 5.0
187               Name.starts_with("cmp.q") ||           // Added in 5.0
188               Name.starts_with("cmp.w") ||           // Added in 5.0
189               Name.starts_with("compress.b") ||      // Added in 9.0
190               Name.starts_with("compress.d") ||      // Added in 9.0
191               Name.starts_with("compress.p") ||      // Added in 9.0
192               Name.starts_with("compress.q") ||      // Added in 9.0
193               Name.starts_with("compress.store.") || // Added in 7.0
194               Name.starts_with("compress.w") ||      // Added in 9.0
195               Name.starts_with("conflict.") ||       // Added in 9.0
196               Name.starts_with("cvtdq2pd.") ||       // Added in 4.0
197               Name.starts_with("cvtdq2ps.") ||       // Added in 7.0 updated 9.0
198               Name == "cvtpd2dq.256" ||              // Added in 7.0
199               Name == "cvtpd2ps.256" ||              // Added in 7.0
200               Name == "cvtps2pd.128" ||              // Added in 7.0
201               Name == "cvtps2pd.256" ||              // Added in 7.0
202               Name.starts_with("cvtqq2pd.") ||       // Added in 7.0 updated 9.0
203               Name == "cvtqq2ps.256" ||              // Added in 9.0
204               Name == "cvtqq2ps.512" ||              // Added in 9.0
205               Name == "cvttpd2dq.256" ||             // Added in 7.0
206               Name == "cvttps2dq.128" ||             // Added in 7.0
207               Name == "cvttps2dq.256" ||             // Added in 7.0
208               Name.starts_with("cvtudq2pd.") ||      // Added in 4.0
209               Name.starts_with("cvtudq2ps.") ||      // Added in 7.0 updated 9.0
210               Name.starts_with("cvtuqq2pd.") ||      // Added in 7.0 updated 9.0
211               Name == "cvtuqq2ps.256" ||             // Added in 9.0
212               Name == "cvtuqq2ps.512" ||             // Added in 9.0
213               Name.starts_with("dbpsadbw.") ||       // Added in 7.0
214               Name.starts_with("div.p") ||    // Added in 7.0. 128/256 in 4.0
215               Name.starts_with("expand.b") || // Added in 9.0
216               Name.starts_with("expand.d") || // Added in 9.0
217               Name.starts_with("expand.load.") || // Added in 7.0
218               Name.starts_with("expand.p") ||     // Added in 9.0
219               Name.starts_with("expand.q") ||     // Added in 9.0
220               Name.starts_with("expand.w") ||     // Added in 9.0
221               Name.starts_with("fpclass.p") ||    // Added in 7.0
222               Name.starts_with("insert") ||       // Added in 4.0
223               Name.starts_with("load.") ||        // Added in 3.9
224               Name.starts_with("loadu.") ||       // Added in 3.9
225               Name.starts_with("lzcnt.") ||       // Added in 5.0
226               Name.starts_with("max.p") ||       // Added in 7.0. 128/256 in 5.0
227               Name.starts_with("min.p") ||       // Added in 7.0. 128/256 in 5.0
228               Name.starts_with("movddup") ||     // Added in 3.9
229               Name.starts_with("move.s") ||      // Added in 4.0
230               Name.starts_with("movshdup") ||    // Added in 3.9
231               Name.starts_with("movsldup") ||    // Added in 3.9
232               Name.starts_with("mul.p") ||       // Added in 7.0. 128/256 in 4.0
233               Name.starts_with("or.") ||         // Added in 3.9
234               Name.starts_with("pabs.") ||       // Added in 6.0
235               Name.starts_with("packssdw.") ||   // Added in 5.0
236               Name.starts_with("packsswb.") ||   // Added in 5.0
237               Name.starts_with("packusdw.") ||   // Added in 5.0
238               Name.starts_with("packuswb.") ||   // Added in 5.0
239               Name.starts_with("padd.") ||       // Added in 4.0
240               Name.starts_with("padds.") ||      // Added in 8.0
241               Name.starts_with("paddus.") ||     // Added in 8.0
242               Name.starts_with("palignr.") ||    // Added in 3.9
243               Name.starts_with("pand.") ||       // Added in 3.9
244               Name.starts_with("pandn.") ||      // Added in 3.9
245               Name.starts_with("pavg") ||        // Added in 6.0
246               Name.starts_with("pbroadcast") ||  // Added in 6.0
247               Name.starts_with("pcmpeq.") ||     // Added in 3.9
248               Name.starts_with("pcmpgt.") ||     // Added in 3.9
249               Name.starts_with("perm.df.") ||    // Added in 3.9
250               Name.starts_with("perm.di.") ||    // Added in 3.9
251               Name.starts_with("permvar.") ||    // Added in 7.0
252               Name.starts_with("pmaddubs.w.") || // Added in 7.0
253               Name.starts_with("pmaddw.d.") ||   // Added in 7.0
254               Name.starts_with("pmax") ||        // Added in 4.0
255               Name.starts_with("pmin") ||        // Added in 4.0
256               Name == "pmov.qd.256" ||           // Added in 9.0
257               Name == "pmov.qd.512" ||           // Added in 9.0
258               Name == "pmov.wb.256" ||           // Added in 9.0
259               Name == "pmov.wb.512" ||           // Added in 9.0
260               Name.starts_with("pmovsx") ||      // Added in 4.0
261               Name.starts_with("pmovzx") ||      // Added in 4.0
262               Name.starts_with("pmul.dq.") ||    // Added in 4.0
263               Name.starts_with("pmul.hr.sw.") || // Added in 7.0
264               Name.starts_with("pmulh.w.") ||    // Added in 7.0
265               Name.starts_with("pmulhu.w.") ||   // Added in 7.0
266               Name.starts_with("pmull.") ||      // Added in 4.0
267               Name.starts_with("pmultishift.qb.") || // Added in 8.0
268               Name.starts_with("pmulu.dq.") ||       // Added in 4.0
269               Name.starts_with("por.") ||            // Added in 3.9
270               Name.starts_with("prol.") ||           // Added in 8.0
271               Name.starts_with("prolv.") ||          // Added in 8.0
272               Name.starts_with("pror.") ||           // Added in 8.0
273               Name.starts_with("prorv.") ||          // Added in 8.0
274               Name.starts_with("pshuf.b.") ||        // Added in 4.0
275               Name.starts_with("pshuf.d.") ||        // Added in 3.9
276               Name.starts_with("pshufh.w.") ||       // Added in 3.9
277               Name.starts_with("pshufl.w.") ||       // Added in 3.9
278               Name.starts_with("psll.d") ||          // Added in 4.0
279               Name.starts_with("psll.q") ||          // Added in 4.0
280               Name.starts_with("psll.w") ||          // Added in 4.0
281               Name.starts_with("pslli") ||           // Added in 4.0
282               Name.starts_with("psllv") ||           // Added in 4.0
283               Name.starts_with("psra.d") ||          // Added in 4.0
284               Name.starts_with("psra.q") ||          // Added in 4.0
285               Name.starts_with("psra.w") ||          // Added in 4.0
286               Name.starts_with("psrai") ||           // Added in 4.0
287               Name.starts_with("psrav") ||           // Added in 4.0
288               Name.starts_with("psrl.d") ||          // Added in 4.0
289               Name.starts_with("psrl.q") ||          // Added in 4.0
290               Name.starts_with("psrl.w") ||          // Added in 4.0
291               Name.starts_with("psrli") ||           // Added in 4.0
292               Name.starts_with("psrlv") ||           // Added in 4.0
293               Name.starts_with("psub.") ||           // Added in 4.0
294               Name.starts_with("psubs.") ||          // Added in 8.0
295               Name.starts_with("psubus.") ||         // Added in 8.0
296               Name.starts_with("pternlog.") ||       // Added in 7.0
297               Name.starts_with("punpckh") ||         // Added in 3.9
298               Name.starts_with("punpckl") ||         // Added in 3.9
299               Name.starts_with("pxor.") ||           // Added in 3.9
300               Name.starts_with("shuf.f") ||          // Added in 6.0
301               Name.starts_with("shuf.i") ||          // Added in 6.0
302               Name.starts_with("shuf.p") ||          // Added in 4.0
303               Name.starts_with("sqrt.p") ||          // Added in 7.0
304               Name.starts_with("store.b.") ||        // Added in 3.9
305               Name.starts_with("store.d.") ||        // Added in 3.9
306               Name.starts_with("store.p") ||         // Added in 3.9
307               Name.starts_with("store.q.") ||        // Added in 3.9
308               Name.starts_with("store.w.") ||        // Added in 3.9
309               Name == "store.ss" ||                  // Added in 7.0
310               Name.starts_with("storeu.") ||         // Added in 3.9
311               Name.starts_with("sub.p") ||       // Added in 7.0. 128/256 in 4.0
312               Name.starts_with("ucmp.") ||       // Added in 5.0
313               Name.starts_with("unpckh.") ||     // Added in 3.9
314               Name.starts_with("unpckl.") ||     // Added in 3.9
315               Name.starts_with("valign.") ||     // Added in 4.0
316               Name == "vcvtph2ps.128" ||         // Added in 11.0
317               Name == "vcvtph2ps.256" ||         // Added in 11.0
318               Name.starts_with("vextract") ||    // Added in 4.0
319               Name.starts_with("vfmadd.") ||     // Added in 7.0
320               Name.starts_with("vfmaddsub.") ||  // Added in 7.0
321               Name.starts_with("vfnmadd.") ||    // Added in 7.0
322               Name.starts_with("vfnmsub.") ||    // Added in 7.0
323               Name.starts_with("vpdpbusd.") ||   // Added in 7.0
324               Name.starts_with("vpdpbusds.") ||  // Added in 7.0
325               Name.starts_with("vpdpwssd.") ||   // Added in 7.0
326               Name.starts_with("vpdpwssds.") ||  // Added in 7.0
327               Name.starts_with("vpermi2var.") || // Added in 7.0
328               Name.starts_with("vpermil.p") ||   // Added in 3.9
329               Name.starts_with("vpermilvar.") || // Added in 4.0
330               Name.starts_with("vpermt2var.") || // Added in 7.0
331               Name.starts_with("vpmadd52") ||    // Added in 7.0
332               Name.starts_with("vpshld.") ||     // Added in 7.0
333               Name.starts_with("vpshldv.") ||    // Added in 8.0
334               Name.starts_with("vpshrd.") ||     // Added in 7.0
335               Name.starts_with("vpshrdv.") ||    // Added in 8.0
336               Name.starts_with("vpshufbitqmb.") || // Added in 8.0
337               Name.starts_with("xor."));           // Added in 3.9
338 
339     if (Name.consume_front("mask3."))
340       // 'avx512.mask3.*'
341       return (Name.starts_with("vfmadd.") ||    // Added in 7.0
342               Name.starts_with("vfmaddsub.") || // Added in 7.0
343               Name.starts_with("vfmsub.") ||    // Added in 7.0
344               Name.starts_with("vfmsubadd.") || // Added in 7.0
345               Name.starts_with("vfnmsub."));    // Added in 7.0
346 
347     if (Name.consume_front("maskz."))
348       // 'avx512.maskz.*'
349       return (Name.starts_with("pternlog.") ||   // Added in 7.0
350               Name.starts_with("vfmadd.") ||     // Added in 7.0
351               Name.starts_with("vfmaddsub.") ||  // Added in 7.0
352               Name.starts_with("vpdpbusd.") ||   // Added in 7.0
353               Name.starts_with("vpdpbusds.") ||  // Added in 7.0
354               Name.starts_with("vpdpwssd.") ||   // Added in 7.0
355               Name.starts_with("vpdpwssds.") ||  // Added in 7.0
356               Name.starts_with("vpermt2var.") || // Added in 7.0
357               Name.starts_with("vpmadd52") ||    // Added in 7.0
358               Name.starts_with("vpshldv.") ||    // Added in 8.0
359               Name.starts_with("vpshrdv."));     // Added in 8.0
360 
361     // 'avx512.*'
362     return (Name == "movntdqa" ||               // Added in 5.0
363             Name == "pmul.dq.512" ||            // Added in 7.0
364             Name == "pmulu.dq.512" ||           // Added in 7.0
365             Name.starts_with("broadcastm") ||   // Added in 6.0
366             Name.starts_with("cmp.p") ||        // Added in 12.0
367             Name.starts_with("cvtb2mask.") ||   // Added in 7.0
368             Name.starts_with("cvtd2mask.") ||   // Added in 7.0
369             Name.starts_with("cvtmask2") ||     // Added in 5.0
370             Name.starts_with("cvtq2mask.") ||   // Added in 7.0
371             Name == "cvtusi2sd" ||              // Added in 7.0
372             Name.starts_with("cvtw2mask.") ||   // Added in 7.0
373             Name == "kand.w" ||                 // Added in 7.0
374             Name == "kandn.w" ||                // Added in 7.0
375             Name == "knot.w" ||                 // Added in 7.0
376             Name == "kor.w" ||                  // Added in 7.0
377             Name == "kortestc.w" ||             // Added in 7.0
378             Name == "kortestz.w" ||             // Added in 7.0
379             Name.starts_with("kunpck") ||       // added in 6.0
380             Name == "kxnor.w" ||                // Added in 7.0
381             Name == "kxor.w" ||                 // Added in 7.0
382             Name.starts_with("padds.") ||       // Added in 8.0
383             Name.starts_with("pbroadcast") ||   // Added in 3.9
384             Name.starts_with("prol") ||         // Added in 8.0
385             Name.starts_with("pror") ||         // Added in 8.0
386             Name.starts_with("psll.dq") ||      // Added in 3.9
387             Name.starts_with("psrl.dq") ||      // Added in 3.9
388             Name.starts_with("psubs.") ||       // Added in 8.0
389             Name.starts_with("ptestm") ||       // Added in 6.0
390             Name.starts_with("ptestnm") ||      // Added in 6.0
391             Name.starts_with("storent.") ||     // Added in 3.9
392             Name.starts_with("vbroadcast.s") || // Added in 7.0
393             Name.starts_with("vpshld.") ||      // Added in 8.0
394             Name.starts_with("vpshrd."));       // Added in 8.0
395   }
396 
397   if (Name.consume_front("fma."))
398     return (Name.starts_with("vfmadd.") ||    // Added in 7.0
399             Name.starts_with("vfmsub.") ||    // Added in 7.0
400             Name.starts_with("vfmsubadd.") || // Added in 7.0
401             Name.starts_with("vfnmadd.") ||   // Added in 7.0
402             Name.starts_with("vfnmsub."));    // Added in 7.0
403 
404   if (Name.consume_front("fma4."))
405     return Name.starts_with("vfmadd.s"); // Added in 7.0
406 
407   if (Name.consume_front("sse."))
408     return (Name == "add.ss" ||            // Added in 4.0
409             Name == "cvtsi2ss" ||          // Added in 7.0
410             Name == "cvtsi642ss" ||        // Added in 7.0
411             Name == "div.ss" ||            // Added in 4.0
412             Name == "mul.ss" ||            // Added in 4.0
413             Name.starts_with("sqrt.p") ||  // Added in 7.0
414             Name == "sqrt.ss" ||           // Added in 7.0
415             Name.starts_with("storeu.") || // Added in 3.9
416             Name == "sub.ss");             // Added in 4.0
417 
418   if (Name.consume_front("sse2."))
419     return (Name == "add.sd" ||            // Added in 4.0
420             Name == "cvtdq2pd" ||          // Added in 3.9
421             Name == "cvtdq2ps" ||          // Added in 7.0
422             Name == "cvtps2pd" ||          // Added in 3.9
423             Name == "cvtsi2sd" ||          // Added in 7.0
424             Name == "cvtsi642sd" ||        // Added in 7.0
425             Name == "cvtss2sd" ||          // Added in 7.0
426             Name == "div.sd" ||            // Added in 4.0
427             Name == "mul.sd" ||            // Added in 4.0
428             Name.starts_with("padds.") ||  // Added in 8.0
429             Name.starts_with("paddus.") || // Added in 8.0
430             Name.starts_with("pcmpeq.") || // Added in 3.1
431             Name.starts_with("pcmpgt.") || // Added in 3.1
432             Name == "pmaxs.w" ||           // Added in 3.9
433             Name == "pmaxu.b" ||           // Added in 3.9
434             Name == "pmins.w" ||           // Added in 3.9
435             Name == "pminu.b" ||           // Added in 3.9
436             Name == "pmulu.dq" ||          // Added in 7.0
437             Name.starts_with("pshuf") ||   // Added in 3.9
438             Name.starts_with("psll.dq") || // Added in 3.7
439             Name.starts_with("psrl.dq") || // Added in 3.7
440             Name.starts_with("psubs.") ||  // Added in 8.0
441             Name.starts_with("psubus.") || // Added in 8.0
442             Name.starts_with("sqrt.p") ||  // Added in 7.0
443             Name == "sqrt.sd" ||           // Added in 7.0
444             Name == "storel.dq" ||         // Added in 3.9
445             Name.starts_with("storeu.") || // Added in 3.9
446             Name == "sub.sd");             // Added in 4.0
447 
448   if (Name.consume_front("sse41."))
449     return (Name.starts_with("blendp") || // Added in 3.7
450             Name == "movntdqa" ||         // Added in 5.0
451             Name == "pblendw" ||          // Added in 3.7
452             Name == "pmaxsb" ||           // Added in 3.9
453             Name == "pmaxsd" ||           // Added in 3.9
454             Name == "pmaxud" ||           // Added in 3.9
455             Name == "pmaxuw" ||           // Added in 3.9
456             Name == "pminsb" ||           // Added in 3.9
457             Name == "pminsd" ||           // Added in 3.9
458             Name == "pminud" ||           // Added in 3.9
459             Name == "pminuw" ||           // Added in 3.9
460             Name.starts_with("pmovsx") || // Added in 3.8
461             Name.starts_with("pmovzx") || // Added in 3.9
462             Name == "pmuldq");            // Added in 7.0
463 
464   if (Name.consume_front("sse42."))
465     return Name == "crc32.64.8"; // Added in 3.4
466 
467   if (Name.consume_front("sse4a."))
468     return Name.starts_with("movnt."); // Added in 3.9
469 
470   if (Name.consume_front("ssse3."))
471     return (Name == "pabs.b.128" || // Added in 6.0
472             Name == "pabs.d.128" || // Added in 6.0
473             Name == "pabs.w.128");  // Added in 6.0
474 
475   if (Name.consume_front("xop."))
476     return (Name == "vpcmov" ||          // Added in 3.8
477             Name == "vpcmov.256" ||      // Added in 5.0
478             Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
479             Name.starts_with("vprot"));  // Added in 8.0
480 
481   return (Name == "addcarry.u32" ||        // Added in 8.0
482           Name == "addcarry.u64" ||        // Added in 8.0
483           Name == "addcarryx.u32" ||       // Added in 8.0
484           Name == "addcarryx.u64" ||       // Added in 8.0
485           Name == "subborrow.u32" ||       // Added in 8.0
486           Name == "subborrow.u64" ||       // Added in 8.0
487           Name.starts_with("vcvtph2ps.")); // Added in 11.0
488 }
489 
490 static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name,
491                                         Function *&NewFn) {
492   // Only handle intrinsics that start with "x86.".
493   if (!Name.consume_front("x86."))
494     return false;
495 
496   if (shouldUpgradeX86Intrinsic(F, Name)) {
497     NewFn = nullptr;
498     return true;
499   }
500 
501   if (Name == "rdtscp") { // Added in 8.0
502     // If this intrinsic has 0 operands, it's the new version.
503     if (F->getFunctionType()->getNumParams() == 0)
504       return false;
505 
506     rename(F);
507     NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
508                                               Intrinsic::x86_rdtscp);
509     return true;
510   }
511 
512   Intrinsic::ID ID;
513 
514   // SSE4.1 ptest functions may have an old signature.
515   if (Name.consume_front("sse41.ptest")) { // Added in 3.2
516     ID = StringSwitch<Intrinsic::ID>(Name)
517              .Case("c", Intrinsic::x86_sse41_ptestc)
518              .Case("z", Intrinsic::x86_sse41_ptestz)
519              .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
520              .Default(Intrinsic::not_intrinsic);
521     if (ID != Intrinsic::not_intrinsic)
522       return upgradePTESTIntrinsic(F, ID, NewFn);
523 
524     return false;
525   }
526 
527   // Several blend and other instructions with masks used the wrong number of
528   // bits.
529 
530   // Added in 3.6
531   ID = StringSwitch<Intrinsic::ID>(Name)
532            .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
533            .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
534            .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
535            .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
536            .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
537            .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
538            .Default(Intrinsic::not_intrinsic);
539   if (ID != Intrinsic::not_intrinsic)
540     return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
541 
542   if (Name.consume_front("avx512.mask.cmp.")) {
543     // Added in 7.0
544     ID = StringSwitch<Intrinsic::ID>(Name)
545              .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
546              .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
547              .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
548              .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
549              .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
550              .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
551              .Default(Intrinsic::not_intrinsic);
552     if (ID != Intrinsic::not_intrinsic)
553       return upgradeX86MaskedFPCompare(F, ID, NewFn);
554     return false; // No other 'x86.avx523.mask.cmp.*'.
555   }
556 
557   if (Name.consume_front("avx512bf16.")) {
558     // Added in 9.0
559     ID = StringSwitch<Intrinsic::ID>(Name)
560              .Case("cvtne2ps2bf16.128",
561                    Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
562              .Case("cvtne2ps2bf16.256",
563                    Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
564              .Case("cvtne2ps2bf16.512",
565                    Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
566              .Case("mask.cvtneps2bf16.128",
567                    Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
568              .Case("cvtneps2bf16.256",
569                    Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
570              .Case("cvtneps2bf16.512",
571                    Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
572              .Default(Intrinsic::not_intrinsic);
573     if (ID != Intrinsic::not_intrinsic)
574       return upgradeX86BF16Intrinsic(F, ID, NewFn);
575 
576     // Added in 9.0
577     ID = StringSwitch<Intrinsic::ID>(Name)
578              .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
579              .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
580              .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
581              .Default(Intrinsic::not_intrinsic);
582     if (ID != Intrinsic::not_intrinsic)
583       return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
584     return false; // No other 'x86.avx512bf16.*'.
585   }
586 
587   if (Name.consume_front("xop.")) {
588     Intrinsic::ID ID = Intrinsic::not_intrinsic;
589     if (Name.starts_with("vpermil2")) { // Added in 3.9
590       // Upgrade any XOP PERMIL2 index operand still using a float/double
591       // vector.
592       auto Idx = F->getFunctionType()->getParamType(2);
593       if (Idx->isFPOrFPVectorTy()) {
594         unsigned IdxSize = Idx->getPrimitiveSizeInBits();
595         unsigned EltSize = Idx->getScalarSizeInBits();
596         if (EltSize == 64 && IdxSize == 128)
597           ID = Intrinsic::x86_xop_vpermil2pd;
598         else if (EltSize == 32 && IdxSize == 128)
599           ID = Intrinsic::x86_xop_vpermil2ps;
600         else if (EltSize == 64 && IdxSize == 256)
601           ID = Intrinsic::x86_xop_vpermil2pd_256;
602         else
603           ID = Intrinsic::x86_xop_vpermil2ps_256;
604       }
605     } else if (F->arg_size() == 2)
606       // frcz.ss/sd may need to have an argument dropped. Added in 3.2
607       ID = StringSwitch<Intrinsic::ID>(Name)
608                .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
609                .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
610                .Default(Intrinsic::not_intrinsic);
611 
612     if (ID != Intrinsic::not_intrinsic) {
613       rename(F);
614       NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
615       return true;
616     }
617     return false; // No other 'x86.xop.*'
618   }
619 
620   if (Name == "seh.recoverfp") {
621     NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
622                                               Intrinsic::eh_recoverfp);
623     return true;
624   }
625 
626   return false;
627 }
628 
629 // Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
630 // IsArm: 'arm.*', !IsArm: 'aarch64.*'.
631 static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F,
632                                                  StringRef Name,
633                                                  Function *&NewFn) {
634   if (Name.starts_with("rbit")) {
635     // '(arm|aarch64).rbit'.
636     NewFn = Intrinsic::getOrInsertDeclaration(
637         F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType());
638     return true;
639   }
640 
641   if (Name == "thread.pointer") {
642     // '(arm|aarch64).thread.pointer'.
643     NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
644                                               Intrinsic::thread_pointer);
645     return true;
646   }
647 
648   bool Neon = Name.consume_front("neon.");
649   if (Neon) {
650     // '(arm|aarch64).neon.*'.
651     // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
652     // v16i8 respectively.
653     if (Name.consume_front("bfdot.")) {
654       // (arm|aarch64).neon.bfdot.*'.
655       Intrinsic::ID ID =
656           StringSwitch<Intrinsic::ID>(Name)
657               .Cases("v2f32.v8i8", "v4f32.v16i8",
658                      IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
659                            : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
660               .Default(Intrinsic::not_intrinsic);
661       if (ID != Intrinsic::not_intrinsic) {
662         size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
663         assert((OperandWidth == 64 || OperandWidth == 128) &&
664                "Unexpected operand width");
665         LLVMContext &Ctx = F->getParent()->getContext();
666         std::array<Type *, 2> Tys{
667             {F->getReturnType(),
668              FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
669         NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
670         return true;
671       }
672       return false; // No other '(arm|aarch64).neon.bfdot.*'.
673     }
674 
675     // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
676     // anymore and accept v8bf16 instead of v16i8.
677     if (Name.consume_front("bfm")) {
678       // (arm|aarch64).neon.bfm*'.
679       if (Name.consume_back(".v4f32.v16i8")) {
680         // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
681         Intrinsic::ID ID =
682             StringSwitch<Intrinsic::ID>(Name)
683                 .Case("mla",
684                       IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
685                             : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
686                 .Case("lalb",
687                       IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
688                             : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
689                 .Case("lalt",
690                       IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
691                             : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
692                 .Default(Intrinsic::not_intrinsic);
693         if (ID != Intrinsic::not_intrinsic) {
694           NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
695           return true;
696         }
697         return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
698       }
699       return false; // No other '(arm|aarch64).neon.bfm*.
700     }
701     // Continue on to Aarch64 Neon or Arm Neon.
702   }
703   // Continue on to Arm or Aarch64.
704 
705   if (IsArm) {
706     // 'arm.*'.
707     if (Neon) {
708       // 'arm.neon.*'.
709       Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
710                              .StartsWith("vclz.", Intrinsic::ctlz)
711                              .StartsWith("vcnt.", Intrinsic::ctpop)
712                              .StartsWith("vqadds.", Intrinsic::sadd_sat)
713                              .StartsWith("vqaddu.", Intrinsic::uadd_sat)
714                              .StartsWith("vqsubs.", Intrinsic::ssub_sat)
715                              .StartsWith("vqsubu.", Intrinsic::usub_sat)
716                              .Default(Intrinsic::not_intrinsic);
717       if (ID != Intrinsic::not_intrinsic) {
718         NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
719                                                   F->arg_begin()->getType());
720         return true;
721       }
722 
723       if (Name.consume_front("vst")) {
724         // 'arm.neon.vst*'.
725         static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
726         SmallVector<StringRef, 2> Groups;
727         if (vstRegex.match(Name, &Groups)) {
728           static const Intrinsic::ID StoreInts[] = {
729               Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
730               Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
731 
732           static const Intrinsic::ID StoreLaneInts[] = {
733               Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
734               Intrinsic::arm_neon_vst4lane};
735 
736           auto fArgs = F->getFunctionType()->params();
737           Type *Tys[] = {fArgs[0], fArgs[1]};
738           if (Groups[1].size() == 1)
739             NewFn = Intrinsic::getOrInsertDeclaration(
740                 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
741           else
742             NewFn = Intrinsic::getOrInsertDeclaration(
743                 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
744           return true;
745         }
746         return false; // No other 'arm.neon.vst*'.
747       }
748 
749       return false; // No other 'arm.neon.*'.
750     }
751 
752     if (Name.consume_front("mve.")) {
753       // 'arm.mve.*'.
754       if (Name == "vctp64") {
755         if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
756           // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
757           // the function and deal with it below in UpgradeIntrinsicCall.
758           rename(F);
759           return true;
760         }
761         return false; // Not 'arm.mve.vctp64'.
762       }
763 
764       // These too are changed to accept a v2i1 instead of the old v4i1.
765       if (Name.consume_back(".v4i1")) {
766         // 'arm.mve.*.v4i1'.
767         if (Name.consume_back(".predicated.v2i64.v4i32"))
768           // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
769           return Name == "mull.int" || Name == "vqdmull";
770 
771         if (Name.consume_back(".v2i64")) {
772           // 'arm.mve.*.v2i64.v4i1'
773           bool IsGather = Name.consume_front("vldr.gather.");
774           if (IsGather || Name.consume_front("vstr.scatter.")) {
775             if (Name.consume_front("base.")) {
776               // Optional 'wb.' prefix.
777               Name.consume_front("wb.");
778               // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
779               // predicated.v2i64.v2i64.v4i1'.
780               return Name == "predicated.v2i64";
781             }
782 
783             if (Name.consume_front("offset.predicated."))
784               return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
785                      Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
786 
787             // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
788             return false;
789           }
790 
791           return false; // No other 'arm.mve.*.v2i64.v4i1'.
792         }
793         return false; // No other 'arm.mve.*.v4i1'.
794       }
795       return false; // No other 'arm.mve.*'.
796     }
797 
798     if (Name.consume_front("cde.vcx")) {
799       // 'arm.cde.vcx*'.
800       if (Name.consume_back(".predicated.v2i64.v4i1"))
801         // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
802         return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
803                Name == "3q" || Name == "3qa";
804 
805       return false; // No other 'arm.cde.vcx*'.
806     }
807   } else {
808     // 'aarch64.*'.
809     if (Neon) {
810       // 'aarch64.neon.*'.
811       Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
812                              .StartsWith("frintn", Intrinsic::roundeven)
813                              .StartsWith("rbit", Intrinsic::bitreverse)
814                              .Default(Intrinsic::not_intrinsic);
815       if (ID != Intrinsic::not_intrinsic) {
816         NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
817                                                   F->arg_begin()->getType());
818         return true;
819       }
820 
821       if (Name.starts_with("addp")) {
822         // 'aarch64.neon.addp*'.
823         if (F->arg_size() != 2)
824           return false; // Invalid IR.
825         VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
826         if (Ty && Ty->getElementType()->isFloatingPointTy()) {
827           NewFn = Intrinsic::getOrInsertDeclaration(
828               F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
829           return true;
830         }
831       }
832 
833       // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc.
834       if (Name.starts_with("bfcvt")) {
835         NewFn = nullptr;
836         return true;
837       }
838 
839       return false; // No other 'aarch64.neon.*'.
840     }
841     if (Name.consume_front("sve.")) {
842       // 'aarch64.sve.*'.
843       if (Name.consume_front("bf")) {
844         if (Name.consume_back(".lane")) {
845           // 'aarch64.sve.bf*.lane'.
846           Intrinsic::ID ID =
847               StringSwitch<Intrinsic::ID>(Name)
848                   .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
849                   .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
850                   .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
851                   .Default(Intrinsic::not_intrinsic);
852           if (ID != Intrinsic::not_intrinsic) {
853             NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
854             return true;
855           }
856           return false; // No other 'aarch64.sve.bf*.lane'.
857         }
858         return false; // No other 'aarch64.sve.bf*'.
859       }
860 
861       // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
862       if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
863         NewFn = nullptr;
864         return true;
865       }
866 
867       if (Name.consume_front("addqv")) {
868         // 'aarch64.sve.addqv'.
869         if (!F->getReturnType()->isFPOrFPVectorTy())
870           return false;
871 
872         auto Args = F->getFunctionType()->params();
873         Type *Tys[] = {F->getReturnType(), Args[1]};
874         NewFn = Intrinsic::getOrInsertDeclaration(
875             F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
876         return true;
877       }
878 
879       if (Name.consume_front("ld")) {
880         // 'aarch64.sve.ld*'.
881         static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
882         if (LdRegex.match(Name)) {
883           Type *ScalarTy =
884               cast<VectorType>(F->getReturnType())->getElementType();
885           ElementCount EC =
886               cast<VectorType>(F->arg_begin()->getType())->getElementCount();
887           Type *Ty = VectorType::get(ScalarTy, EC);
888           static const Intrinsic::ID LoadIDs[] = {
889               Intrinsic::aarch64_sve_ld2_sret,
890               Intrinsic::aarch64_sve_ld3_sret,
891               Intrinsic::aarch64_sve_ld4_sret,
892           };
893           NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
894                                                     LoadIDs[Name[0] - '2'], Ty);
895           return true;
896         }
897         return false; // No other 'aarch64.sve.ld*'.
898       }
899 
900       if (Name.consume_front("tuple.")) {
901         // 'aarch64.sve.tuple.*'.
902         if (Name.starts_with("get")) {
903           // 'aarch64.sve.tuple.get*'.
904           Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
905           NewFn = Intrinsic::getOrInsertDeclaration(
906               F->getParent(), Intrinsic::vector_extract, Tys);
907           return true;
908         }
909 
910         if (Name.starts_with("set")) {
911           // 'aarch64.sve.tuple.set*'.
912           auto Args = F->getFunctionType()->params();
913           Type *Tys[] = {Args[0], Args[2], Args[1]};
914           NewFn = Intrinsic::getOrInsertDeclaration(
915               F->getParent(), Intrinsic::vector_insert, Tys);
916           return true;
917         }
918 
919         static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
920         if (CreateTupleRegex.match(Name)) {
921           // 'aarch64.sve.tuple.create*'.
922           auto Args = F->getFunctionType()->params();
923           Type *Tys[] = {F->getReturnType(), Args[1]};
924           NewFn = Intrinsic::getOrInsertDeclaration(
925               F->getParent(), Intrinsic::vector_insert, Tys);
926           return true;
927         }
928         return false; // No other 'aarch64.sve.tuple.*'.
929       }
930       return false; // No other 'aarch64.sve.*'.
931     }
932   }
933   return false; // No other 'arm.*', 'aarch64.*'.
934 }
935 
936 static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name) {
937   if (Name.consume_front("abs."))
938     return StringSwitch<Intrinsic::ID>(Name)
939         .Case("bf16", Intrinsic::nvvm_abs_bf16)
940         .Case("bf16x2", Intrinsic::nvvm_abs_bf16x2)
941         .Default(Intrinsic::not_intrinsic);
942 
943   if (Name.consume_front("fma.rn."))
944     return StringSwitch<Intrinsic::ID>(Name)
945         .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
946         .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
947         .Case("ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
948         .Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
949         .Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
950         .Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
951         .Case("ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
952         .Case("ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
953         .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
954         .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
955         .Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
956         .Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
957         .Default(Intrinsic::not_intrinsic);
958 
959   if (Name.consume_front("fmax."))
960     return StringSwitch<Intrinsic::ID>(Name)
961         .Case("bf16", Intrinsic::nvvm_fmax_bf16)
962         .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
963         .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
964         .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
965         .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
966         .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
967         .Case("ftz.nan.xorsign.abs.bf16",
968               Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
969         .Case("ftz.nan.xorsign.abs.bf16x2",
970               Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
971         .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
972         .Case("ftz.xorsign.abs.bf16x2",
973               Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
974         .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
975         .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
976         .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
977         .Case("nan.xorsign.abs.bf16x2",
978               Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
979         .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
980         .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
981         .Default(Intrinsic::not_intrinsic);
982 
983   if (Name.consume_front("fmin."))
984     return StringSwitch<Intrinsic::ID>(Name)
985         .Case("bf16", Intrinsic::nvvm_fmin_bf16)
986         .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
987         .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
988         .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
989         .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
990         .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
991         .Case("ftz.nan.xorsign.abs.bf16",
992               Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
993         .Case("ftz.nan.xorsign.abs.bf16x2",
994               Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
995         .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
996         .Case("ftz.xorsign.abs.bf16x2",
997               Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
998         .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
999         .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1000         .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1001         .Case("nan.xorsign.abs.bf16x2",
1002               Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1003         .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1004         .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1005         .Default(Intrinsic::not_intrinsic);
1006 
1007   if (Name.consume_front("neg."))
1008     return StringSwitch<Intrinsic::ID>(Name)
1009         .Case("bf16", Intrinsic::nvvm_neg_bf16)
1010         .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
1011         .Default(Intrinsic::not_intrinsic);
1012 
1013   return Intrinsic::not_intrinsic;
1014 }
1015 
1016 static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
1017                                       bool CanUpgradeDebugIntrinsicsToRecords) {
1018   assert(F && "Illegal to upgrade a non-existent Function.");
1019 
1020   StringRef Name = F->getName();
1021 
1022   // Quickly eliminate it, if it's not a candidate.
1023   if (!Name.consume_front("llvm.") || Name.empty())
1024     return false;
1025 
1026   switch (Name[0]) {
1027   default: break;
1028   case 'a': {
1029     bool IsArm = Name.consume_front("arm.");
1030     if (IsArm || Name.consume_front("aarch64.")) {
1031       if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1032         return true;
1033       break;
1034     }
1035 
1036     if (Name.consume_front("amdgcn.")) {
1037       if (Name == "alignbit") {
1038         // Target specific intrinsic became redundant
1039         NewFn = Intrinsic::getOrInsertDeclaration(
1040             F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1041         return true;
1042       }
1043 
1044       if (Name.consume_front("atomic.")) {
1045         if (Name.starts_with("inc") || Name.starts_with("dec")) {
1046           // These were replaced with atomicrmw uinc_wrap and udec_wrap, so
1047           // there's no new declaration.
1048           NewFn = nullptr;
1049           return true;
1050         }
1051         break; // No other 'amdgcn.atomic.*'
1052       }
1053 
1054       if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
1055           Name.consume_front("flat.atomic.")) {
1056         if (Name.starts_with("fadd") ||
1057             // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1058             (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||
1059             (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {
1060           // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1061           // declaration.
1062           NewFn = nullptr;
1063           return true;
1064         }
1065       }
1066 
1067       if (Name.starts_with("ldexp.")) {
1068         // Target specific intrinsic became redundant
1069         NewFn = Intrinsic::getOrInsertDeclaration(
1070             F->getParent(), Intrinsic::ldexp,
1071             {F->getReturnType(), F->getArg(1)->getType()});
1072         return true;
1073       }
1074       break; // No other 'amdgcn.*'
1075     }
1076 
1077     break;
1078   }
1079   case 'c': {
1080     if (F->arg_size() == 1) {
1081       Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
1082                              .StartsWith("ctlz.", Intrinsic::ctlz)
1083                              .StartsWith("cttz.", Intrinsic::cttz)
1084                              .Default(Intrinsic::not_intrinsic);
1085       if (ID != Intrinsic::not_intrinsic) {
1086         rename(F);
1087         NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1088                                                   F->arg_begin()->getType());
1089         return true;
1090       }
1091     }
1092 
1093     if (F->arg_size() == 2 && Name == "coro.end") {
1094       rename(F);
1095       NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1096                                                 Intrinsic::coro_end);
1097       return true;
1098     }
1099 
1100     break;
1101   }
1102   case 'd':
1103     if (Name.consume_front("dbg.")) {
1104       // Mark debug intrinsics for upgrade to new debug format.
1105       if (CanUpgradeDebugIntrinsicsToRecords &&
1106           F->getParent()->IsNewDbgInfoFormat) {
1107         if (Name == "addr" || Name == "value" || Name == "assign" ||
1108             Name == "declare" || Name == "label") {
1109           // There's no function to replace these with.
1110           NewFn = nullptr;
1111           // But we do want these to get upgraded.
1112           return true;
1113         }
1114       }
1115       // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1116       // converted to DbgVariableRecords later.
1117       if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1118         rename(F);
1119         NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1120                                                   Intrinsic::dbg_value);
1121         return true;
1122       }
1123       break; // No other 'dbg.*'.
1124     }
1125     break;
1126   case 'e':
1127     if (Name.consume_front("experimental.vector.")) {
1128       Intrinsic::ID ID =
1129           StringSwitch<Intrinsic::ID>(Name)
1130               // Skip over extract.last.active, otherwise it will be 'upgraded'
1131               // to a regular vector extract which is a different operation.
1132               .StartsWith("extract.last.active.", Intrinsic::not_intrinsic)
1133               .StartsWith("extract.", Intrinsic::vector_extract)
1134               .StartsWith("insert.", Intrinsic::vector_insert)
1135               .StartsWith("splice.", Intrinsic::vector_splice)
1136               .StartsWith("reverse.", Intrinsic::vector_reverse)
1137               .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1138               .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1139               .Default(Intrinsic::not_intrinsic);
1140       if (ID != Intrinsic::not_intrinsic) {
1141         const auto *FT = F->getFunctionType();
1142         SmallVector<Type *, 2> Tys;
1143         if (ID == Intrinsic::vector_extract ||
1144             ID == Intrinsic::vector_interleave2)
1145           // Extracting overloads the return type.
1146           Tys.push_back(FT->getReturnType());
1147         if (ID != Intrinsic::vector_interleave2)
1148           Tys.push_back(FT->getParamType(0));
1149         if (ID == Intrinsic::vector_insert)
1150           // Inserting overloads the inserted type.
1151           Tys.push_back(FT->getParamType(1));
1152         rename(F);
1153         NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
1154         return true;
1155       }
1156 
1157       if (Name.consume_front("reduce.")) {
1158         SmallVector<StringRef, 2> Groups;
1159         static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1160         if (R.match(Name, &Groups))
1161           ID = StringSwitch<Intrinsic::ID>(Groups[1])
1162                    .Case("add", Intrinsic::vector_reduce_add)
1163                    .Case("mul", Intrinsic::vector_reduce_mul)
1164                    .Case("and", Intrinsic::vector_reduce_and)
1165                    .Case("or", Intrinsic::vector_reduce_or)
1166                    .Case("xor", Intrinsic::vector_reduce_xor)
1167                    .Case("smax", Intrinsic::vector_reduce_smax)
1168                    .Case("smin", Intrinsic::vector_reduce_smin)
1169                    .Case("umax", Intrinsic::vector_reduce_umax)
1170                    .Case("umin", Intrinsic::vector_reduce_umin)
1171                    .Case("fmax", Intrinsic::vector_reduce_fmax)
1172                    .Case("fmin", Intrinsic::vector_reduce_fmin)
1173                    .Default(Intrinsic::not_intrinsic);
1174 
1175         bool V2 = false;
1176         if (ID == Intrinsic::not_intrinsic) {
1177           static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1178           Groups.clear();
1179           V2 = true;
1180           if (R2.match(Name, &Groups))
1181             ID = StringSwitch<Intrinsic::ID>(Groups[1])
1182                      .Case("fadd", Intrinsic::vector_reduce_fadd)
1183                      .Case("fmul", Intrinsic::vector_reduce_fmul)
1184                      .Default(Intrinsic::not_intrinsic);
1185         }
1186         if (ID != Intrinsic::not_intrinsic) {
1187           rename(F);
1188           auto Args = F->getFunctionType()->params();
1189           NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1190                                                     {Args[V2 ? 1 : 0]});
1191           return true;
1192         }
1193         break; // No other 'expermental.vector.reduce.*'.
1194       }
1195       break; // No other 'experimental.vector.*'.
1196     }
1197     if (Name.consume_front("experimental.stepvector.")) {
1198       Intrinsic::ID ID = Intrinsic::stepvector;
1199       rename(F);
1200       NewFn = Intrinsic::getOrInsertDeclaration(
1201           F->getParent(), ID, F->getFunctionType()->getReturnType());
1202       return true;
1203     }
1204     break; // No other 'e*'.
1205   case 'f':
1206     if (Name.starts_with("flt.rounds")) {
1207       rename(F);
1208       NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1209                                                 Intrinsic::get_rounding);
1210       return true;
1211     }
1212     break;
1213   case 'i':
1214     if (Name.starts_with("invariant.group.barrier")) {
1215       // Rename invariant.group.barrier to launder.invariant.group
1216       auto Args = F->getFunctionType()->params();
1217       Type* ObjectPtr[1] = {Args[0]};
1218       rename(F);
1219       NewFn = Intrinsic::getOrInsertDeclaration(
1220           F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1221       return true;
1222     }
1223     break;
1224   case 'm': {
1225     // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1226     // alignment parameter to embedding the alignment as an attribute of
1227     // the pointer args.
1228     if (unsigned ID = StringSwitch<unsigned>(Name)
1229                           .StartsWith("memcpy.", Intrinsic::memcpy)
1230                           .StartsWith("memmove.", Intrinsic::memmove)
1231                           .Default(0)) {
1232       if (F->arg_size() == 5) {
1233         rename(F);
1234         // Get the types of dest, src, and len
1235         ArrayRef<Type *> ParamTypes =
1236             F->getFunctionType()->params().slice(0, 3);
1237         NewFn =
1238             Intrinsic::getOrInsertDeclaration(F->getParent(), ID, ParamTypes);
1239         return true;
1240       }
1241     }
1242     if (Name.starts_with("memset.") && F->arg_size() == 5) {
1243       rename(F);
1244       // Get the types of dest, and len
1245       const auto *FT = F->getFunctionType();
1246       Type *ParamTypes[2] = {
1247           FT->getParamType(0), // Dest
1248           FT->getParamType(2)  // len
1249       };
1250       NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1251                                                 Intrinsic::memset, ParamTypes);
1252       return true;
1253     }
1254     break;
1255   }
1256   case 'n': {
1257     if (Name.consume_front("nvvm.")) {
1258       // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1259       if (F->arg_size() == 1) {
1260         Intrinsic::ID IID =
1261             StringSwitch<Intrinsic::ID>(Name)
1262                 .Cases("brev32", "brev64", Intrinsic::bitreverse)
1263                 .Case("clz.i", Intrinsic::ctlz)
1264                 .Case("popc.i", Intrinsic::ctpop)
1265                 .Default(Intrinsic::not_intrinsic);
1266         if (IID != Intrinsic::not_intrinsic) {
1267           NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1268                                                     {F->getReturnType()});
1269           return true;
1270         }
1271       }
1272 
1273       // Check for nvvm intrinsics that need a return type adjustment.
1274       if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1275         Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);
1276         if (IID != Intrinsic::not_intrinsic) {
1277           NewFn = nullptr;
1278           return true;
1279         }
1280       }
1281 
1282       // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1283       // not to an intrinsic alone.  We expand them in UpgradeIntrinsicCall.
1284       //
1285       // TODO: We could add lohi.i2d.
1286       bool Expand = false;
1287       if (Name.consume_front("abs."))
1288         // nvvm.abs.{i,ii}
1289         Expand = Name == "i" || Name == "ll";
1290       else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f")
1291         Expand = true;
1292       else if (Name.consume_front("max.") || Name.consume_front("min."))
1293         // nvvm.{min,max}.{i,ii,ui,ull}
1294         Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
1295                  Name == "ui" || Name == "ull";
1296       else if (Name.consume_front("atomic.load.add."))
1297         // nvvm.atomic.load.add.{f32.p,f64.p}
1298         Expand = Name.starts_with("f32.p") || Name.starts_with("f64.p");
1299       else if (Name.consume_front("bitcast."))
1300         // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1301         Expand =
1302             Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1303       else if (Name.consume_front("rotate."))
1304         // nvvm.rotate.{b32,b64,right.b64}
1305         Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1306       else if (Name.consume_front("ptr.gen.to."))
1307         // nvvm.ptr.gen.to.{local,shared,global,constant}
1308         Expand = Name.starts_with("local") || Name.starts_with("shared") ||
1309                  Name.starts_with("global") || Name.starts_with("constant");
1310       else if (Name.consume_front("ptr."))
1311         // nvvm.ptr.{local,shared,global,constant}.to.gen
1312         Expand =
1313             (Name.consume_front("local") || Name.consume_front("shared") ||
1314              Name.consume_front("global") || Name.consume_front("constant")) &&
1315             Name.starts_with(".to.gen");
1316       else if (Name.consume_front("ldg.global."))
1317         // nvvm.ldg.global.{i,p,f}
1318         Expand = (Name.starts_with("i.") || Name.starts_with("f.") ||
1319                   Name.starts_with("p."));
1320       else
1321         Expand = false;
1322 
1323       if (Expand) {
1324         NewFn = nullptr;
1325         return true;
1326       }
1327       break; // No other 'nvvm.*'.
1328     }
1329     break;
1330   }
1331   case 'o':
1332     // We only need to change the name to match the mangling including the
1333     // address space.
1334     if (Name.starts_with("objectsize.")) {
1335       Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1336       if (F->arg_size() == 2 || F->arg_size() == 3 ||
1337           F->getName() !=
1338               Intrinsic::getName(Intrinsic::objectsize, Tys, F->getParent())) {
1339         rename(F);
1340         NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1341                                                   Intrinsic::objectsize, Tys);
1342         return true;
1343       }
1344     }
1345     break;
1346 
1347   case 'p':
1348     if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1349       rename(F);
1350       NewFn = Intrinsic::getOrInsertDeclaration(
1351           F->getParent(), Intrinsic::ptr_annotation,
1352           {F->arg_begin()->getType(), F->getArg(1)->getType()});
1353       return true;
1354     }
1355     break;
1356 
1357   case 'r': {
1358     if (Name.consume_front("riscv.")) {
1359       Intrinsic::ID ID;
1360       ID = StringSwitch<Intrinsic::ID>(Name)
1361                .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1362                .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1363                .Case("aes32esi", Intrinsic::riscv_aes32esi)
1364                .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1365                .Default(Intrinsic::not_intrinsic);
1366       if (ID != Intrinsic::not_intrinsic) {
1367         if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1368           rename(F);
1369           NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1370           return true;
1371         }
1372         break; // No other applicable upgrades.
1373       }
1374 
1375       ID = StringSwitch<Intrinsic::ID>(Name)
1376                .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1377                .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1378                .Default(Intrinsic::not_intrinsic);
1379       if (ID != Intrinsic::not_intrinsic) {
1380         if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1381             F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1382           rename(F);
1383           NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1384           return true;
1385         }
1386         break; // No other applicable upgrades.
1387       }
1388 
1389       ID = StringSwitch<Intrinsic::ID>(Name)
1390                .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1391                .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1392                .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1393                .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1394                .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1395                .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1396                .Default(Intrinsic::not_intrinsic);
1397       if (ID != Intrinsic::not_intrinsic) {
1398         if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1399           rename(F);
1400           NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1401           return true;
1402         }
1403         break; // No other applicable upgrades.
1404       }
1405       break; // No other 'riscv.*' intrinsics
1406     }
1407   } break;
1408 
1409   case 's':
1410     if (Name == "stackprotectorcheck") {
1411       NewFn = nullptr;
1412       return true;
1413     }
1414     break;
1415 
1416   case 'v': {
1417     if (Name == "var.annotation" && F->arg_size() == 4) {
1418       rename(F);
1419       NewFn = Intrinsic::getOrInsertDeclaration(
1420           F->getParent(), Intrinsic::var_annotation,
1421           {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1422       return true;
1423     }
1424     break;
1425   }
1426 
1427   case 'w':
1428     if (Name.consume_front("wasm.")) {
1429       Intrinsic::ID ID =
1430           StringSwitch<Intrinsic::ID>(Name)
1431               .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1432               .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1433               .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1434               .Default(Intrinsic::not_intrinsic);
1435       if (ID != Intrinsic::not_intrinsic) {
1436         rename(F);
1437         NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1438                                                   F->getReturnType());
1439         return true;
1440       }
1441 
1442       if (Name.consume_front("dot.i8x16.i7x16.")) {
1443         ID = StringSwitch<Intrinsic::ID>(Name)
1444                  .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1445                  .Case("add.signed",
1446                        Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1447                  .Default(Intrinsic::not_intrinsic);
1448         if (ID != Intrinsic::not_intrinsic) {
1449           rename(F);
1450           NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1451           return true;
1452         }
1453         break; // No other 'wasm.dot.i8x16.i7x16.*'.
1454       }
1455       break; // No other 'wasm.*'.
1456     }
1457     break;
1458 
1459   case 'x':
1460     if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1461       return true;
1462   }
1463 
1464   auto *ST = dyn_cast<StructType>(F->getReturnType());
1465   if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1466       F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1467     // Replace return type with literal non-packed struct. Only do this for
1468     // intrinsics declared to return a struct, not for intrinsics with
1469     // overloaded return type, in which case the exact struct type will be
1470     // mangled into the name.
1471     SmallVector<Intrinsic::IITDescriptor> Desc;
1472     Intrinsic::getIntrinsicInfoTableEntries(F->getIntrinsicID(), Desc);
1473     if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1474       auto *FT = F->getFunctionType();
1475       auto *NewST = StructType::get(ST->getContext(), ST->elements());
1476       auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1477       std::string Name = F->getName().str();
1478       rename(F);
1479       NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1480                                Name, F->getParent());
1481 
1482       // The new function may also need remangling.
1483       if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1484         NewFn = *Result;
1485       return true;
1486     }
1487   }
1488 
1489   // Remangle our intrinsic since we upgrade the mangling
1490   auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
1491   if (Result != std::nullopt) {
1492     NewFn = *Result;
1493     return true;
1494   }
1495 
1496   //  This may not belong here. This function is effectively being overloaded
1497   //  to both detect an intrinsic which needs upgrading, and to provide the
1498   //  upgraded form of the intrinsic. We should perhaps have two separate
1499   //  functions for this.
1500   return false;
1501 }
1502 
1503 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn,
1504                                     bool CanUpgradeDebugIntrinsicsToRecords) {
1505   NewFn = nullptr;
1506   bool Upgraded =
1507       upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1508   assert(F != NewFn && "Intrinsic function upgraded to the same function");
1509 
1510   // Upgrade intrinsic attributes.  This does not change the function.
1511   if (NewFn)
1512     F = NewFn;
1513   if (Intrinsic::ID id = F->getIntrinsicID())
1514     F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
1515   return Upgraded;
1516 }
1517 
1518 GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
1519   if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1520                           GV->getName() == "llvm.global_dtors")) ||
1521       !GV->hasInitializer())
1522     return nullptr;
1523   ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
1524   if (!ATy)
1525     return nullptr;
1526   StructType *STy = dyn_cast<StructType>(ATy->getElementType());
1527   if (!STy || STy->getNumElements() != 2)
1528     return nullptr;
1529 
1530   LLVMContext &C = GV->getContext();
1531   IRBuilder<> IRB(C);
1532   auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1533                                IRB.getPtrTy());
1534   Constant *Init = GV->getInitializer();
1535   unsigned N = Init->getNumOperands();
1536   std::vector<Constant *> NewCtors(N);
1537   for (unsigned i = 0; i != N; ++i) {
1538     auto Ctor = cast<Constant>(Init->getOperand(i));
1539     NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1540                                       Ctor->getAggregateElement(1),
1541                                       Constant::getNullValue(IRB.getPtrTy()));
1542   }
1543   Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1544 
1545   return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1546                             NewInit, GV->getName());
1547 }
1548 
1549 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1550 // to byte shuffles.
1551 static Value *upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
1552                                          unsigned Shift) {
1553   auto *ResultTy = cast<FixedVectorType>(Op->getType());
1554   unsigned NumElts = ResultTy->getNumElements() * 8;
1555 
1556   // Bitcast from a 64-bit element type to a byte element type.
1557   Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1558   Op = Builder.CreateBitCast(Op, VecTy, "cast");
1559 
1560   // We'll be shuffling in zeroes.
1561   Value *Res = Constant::getNullValue(VecTy);
1562 
1563   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1564   // we'll just return the zero vector.
1565   if (Shift < 16) {
1566     int Idxs[64];
1567     // 256/512-bit version is split into 2/4 16-byte lanes.
1568     for (unsigned l = 0; l != NumElts; l += 16)
1569       for (unsigned i = 0; i != 16; ++i) {
1570         unsigned Idx = NumElts + i - Shift;
1571         if (Idx < NumElts)
1572           Idx -= NumElts - 16; // end of lane, switch operand.
1573         Idxs[l + i] = Idx + l;
1574       }
1575 
1576     Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1577   }
1578 
1579   // Bitcast back to a 64-bit element type.
1580   return Builder.CreateBitCast(Res, ResultTy, "cast");
1581 }
1582 
1583 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1584 // to byte shuffles.
1585 static Value *upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
1586                                          unsigned Shift) {
1587   auto *ResultTy = cast<FixedVectorType>(Op->getType());
1588   unsigned NumElts = ResultTy->getNumElements() * 8;
1589 
1590   // Bitcast from a 64-bit element type to a byte element type.
1591   Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1592   Op = Builder.CreateBitCast(Op, VecTy, "cast");
1593 
1594   // We'll be shuffling in zeroes.
1595   Value *Res = Constant::getNullValue(VecTy);
1596 
1597   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1598   // we'll just return the zero vector.
1599   if (Shift < 16) {
1600     int Idxs[64];
1601     // 256/512-bit version is split into 2/4 16-byte lanes.
1602     for (unsigned l = 0; l != NumElts; l += 16)
1603       for (unsigned i = 0; i != 16; ++i) {
1604         unsigned Idx = i + Shift;
1605         if (Idx >= 16)
1606           Idx += NumElts - 16; // end of lane, switch operand.
1607         Idxs[l + i] = Idx + l;
1608       }
1609 
1610     Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
1611   }
1612 
1613   // Bitcast back to a 64-bit element type.
1614   return Builder.CreateBitCast(Res, ResultTy, "cast");
1615 }
1616 
1617 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1618                             unsigned NumElts) {
1619   assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1620   llvm::VectorType *MaskTy = FixedVectorType::get(
1621       Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1622   Mask = Builder.CreateBitCast(Mask, MaskTy);
1623 
1624   // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1625   // i8 and we need to extract down to the right number of elements.
1626   if (NumElts <= 4) {
1627     int Indices[4];
1628     for (unsigned i = 0; i != NumElts; ++i)
1629       Indices[i] = i;
1630     Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
1631                                        "extract");
1632   }
1633 
1634   return Mask;
1635 }
1636 
1637 static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1638                             Value *Op1) {
1639   // If the mask is all ones just emit the first operation.
1640   if (const auto *C = dyn_cast<Constant>(Mask))
1641     if (C->isAllOnesValue())
1642       return Op0;
1643 
1644   Mask = getX86MaskVec(Builder, Mask,
1645                        cast<FixedVectorType>(Op0->getType())->getNumElements());
1646   return Builder.CreateSelect(Mask, Op0, Op1);
1647 }
1648 
1649 static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1650                                   Value *Op1) {
1651   // If the mask is all ones just emit the first operation.
1652   if (const auto *C = dyn_cast<Constant>(Mask))
1653     if (C->isAllOnesValue())
1654       return Op0;
1655 
1656   auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1657                                       Mask->getType()->getIntegerBitWidth());
1658   Mask = Builder.CreateBitCast(Mask, MaskTy);
1659   Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1660   return Builder.CreateSelect(Mask, Op0, Op1);
1661 }
1662 
1663 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1664 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1665 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1666 static Value *upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
1667                                         Value *Op1, Value *Shift,
1668                                         Value *Passthru, Value *Mask,
1669                                         bool IsVALIGN) {
1670   unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1671 
1672   unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1673   assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1674   assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1675   assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1676 
1677   // Mask the immediate for VALIGN.
1678   if (IsVALIGN)
1679     ShiftVal &= (NumElts - 1);
1680 
1681   // If palignr is shifting the pair of vectors more than the size of two
1682   // lanes, emit zero.
1683   if (ShiftVal >= 32)
1684     return llvm::Constant::getNullValue(Op0->getType());
1685 
1686   // If palignr is shifting the pair of input vectors more than one lane,
1687   // but less than two lanes, convert to shifting in zeroes.
1688   if (ShiftVal > 16) {
1689     ShiftVal -= 16;
1690     Op1 = Op0;
1691     Op0 = llvm::Constant::getNullValue(Op0->getType());
1692   }
1693 
1694   int Indices[64];
1695   // 256-bit palignr operates on 128-bit lanes so we need to handle that
1696   for (unsigned l = 0; l < NumElts; l += 16) {
1697     for (unsigned i = 0; i != 16; ++i) {
1698       unsigned Idx = ShiftVal + i;
1699       if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1700         Idx += NumElts - 16; // End of lane, switch operand.
1701       Indices[l + i] = Idx + l;
1702     }
1703   }
1704 
1705   Value *Align = Builder.CreateShuffleVector(
1706       Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
1707 
1708   return emitX86Select(Builder, Mask, Align, Passthru);
1709 }
1710 
1711 static Value *upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI,
1712                                           bool ZeroMask, bool IndexForm) {
1713   Type *Ty = CI.getType();
1714   unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1715   unsigned EltWidth = Ty->getScalarSizeInBits();
1716   bool IsFloat = Ty->isFPOrFPVectorTy();
1717   Intrinsic::ID IID;
1718   if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1719     IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1720   else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1721     IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1722   else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1723     IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1724   else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1725     IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1726   else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1727     IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1728   else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1729     IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1730   else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1731     IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1732   else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1733     IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1734   else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1735     IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1736   else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1737     IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1738   else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1739     IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1740   else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1741     IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1742   else if (VecWidth == 128 && EltWidth == 16)
1743     IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1744   else if (VecWidth == 256 && EltWidth == 16)
1745     IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1746   else if (VecWidth == 512 && EltWidth == 16)
1747     IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1748   else if (VecWidth == 128 && EltWidth == 8)
1749     IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1750   else if (VecWidth == 256 && EltWidth == 8)
1751     IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1752   else if (VecWidth == 512 && EltWidth == 8)
1753     IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1754   else
1755     llvm_unreachable("Unexpected intrinsic");
1756 
1757   Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1758                     CI.getArgOperand(2) };
1759 
1760   // If this isn't index form we need to swap operand 0 and 1.
1761   if (!IndexForm)
1762     std::swap(Args[0], Args[1]);
1763 
1764   Value *V = Builder.CreateIntrinsic(IID, {}, Args);
1765   Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1766                              : Builder.CreateBitCast(CI.getArgOperand(1),
1767                                                      Ty);
1768   return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1769 }
1770 
1771 static Value *upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI,
1772                                          Intrinsic::ID IID) {
1773   Type *Ty = CI.getType();
1774   Value *Op0 = CI.getOperand(0);
1775   Value *Op1 = CI.getOperand(1);
1776   Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
1777 
1778   if (CI.arg_size() == 4) { // For masked intrinsics.
1779     Value *VecSrc = CI.getOperand(2);
1780     Value *Mask = CI.getOperand(3);
1781     Res = emitX86Select(Builder, Mask, Res, VecSrc);
1782   }
1783   return Res;
1784 }
1785 
1786 static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI,
1787                                bool IsRotateRight) {
1788   Type *Ty = CI.getType();
1789   Value *Src = CI.getArgOperand(0);
1790   Value *Amt = CI.getArgOperand(1);
1791 
1792   // Amount may be scalar immediate, in which case create a splat vector.
1793   // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1794   // we only care about the lowest log2 bits anyway.
1795   if (Amt->getType() != Ty) {
1796     unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1797     Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1798     Amt = Builder.CreateVectorSplat(NumElts, Amt);
1799   }
1800 
1801   Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1802   Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
1803 
1804   if (CI.arg_size() == 4) { // For masked intrinsics.
1805     Value *VecSrc = CI.getOperand(2);
1806     Value *Mask = CI.getOperand(3);
1807     Res = emitX86Select(Builder, Mask, Res, VecSrc);
1808   }
1809   return Res;
1810 }
1811 
1812 static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
1813                               bool IsSigned) {
1814   Type *Ty = CI.getType();
1815   Value *LHS = CI.getArgOperand(0);
1816   Value *RHS = CI.getArgOperand(1);
1817 
1818   CmpInst::Predicate Pred;
1819   switch (Imm) {
1820   case 0x0:
1821     Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1822     break;
1823   case 0x1:
1824     Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1825     break;
1826   case 0x2:
1827     Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1828     break;
1829   case 0x3:
1830     Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1831     break;
1832   case 0x4:
1833     Pred = ICmpInst::ICMP_EQ;
1834     break;
1835   case 0x5:
1836     Pred = ICmpInst::ICMP_NE;
1837     break;
1838   case 0x6:
1839     return Constant::getNullValue(Ty); // FALSE
1840   case 0x7:
1841     return Constant::getAllOnesValue(Ty); // TRUE
1842   default:
1843     llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1844   }
1845 
1846   Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1847   Value *Ext = Builder.CreateSExt(Cmp, Ty);
1848   return Ext;
1849 }
1850 
1851 static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI,
1852                                     bool IsShiftRight, bool ZeroMask) {
1853   Type *Ty = CI.getType();
1854   Value *Op0 = CI.getArgOperand(0);
1855   Value *Op1 = CI.getArgOperand(1);
1856   Value *Amt = CI.getArgOperand(2);
1857 
1858   if (IsShiftRight)
1859     std::swap(Op0, Op1);
1860 
1861   // Amount may be scalar immediate, in which case create a splat vector.
1862   // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1863   // we only care about the lowest log2 bits anyway.
1864   if (Amt->getType() != Ty) {
1865     unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1866     Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1867     Amt = Builder.CreateVectorSplat(NumElts, Amt);
1868   }
1869 
1870   Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1871   Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
1872 
1873   unsigned NumArgs = CI.arg_size();
1874   if (NumArgs >= 4) { // For masked intrinsics.
1875     Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1876                     ZeroMask     ? ConstantAggregateZero::get(CI.getType()) :
1877                                    CI.getArgOperand(0);
1878     Value *Mask = CI.getOperand(NumArgs - 1);
1879     Res = emitX86Select(Builder, Mask, Res, VecSrc);
1880   }
1881   return Res;
1882 }
1883 
1884 static Value *upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data,
1885                                  Value *Mask, bool Aligned) {
1886   const Align Alignment =
1887       Aligned
1888           ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
1889           : Align(1);
1890 
1891   // If the mask is all ones just emit a regular store.
1892   if (const auto *C = dyn_cast<Constant>(Mask))
1893     if (C->isAllOnesValue())
1894       return Builder.CreateAlignedStore(Data, Ptr, Alignment);
1895 
1896   // Convert the mask from an integer type to a vector of i1.
1897   unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
1898   Mask = getX86MaskVec(Builder, Mask, NumElts);
1899   return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
1900 }
1901 
1902 static Value *upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr,
1903                                 Value *Passthru, Value *Mask, bool Aligned) {
1904   Type *ValTy = Passthru->getType();
1905   const Align Alignment =
1906       Aligned
1907           ? Align(
1908                 Passthru->getType()->getPrimitiveSizeInBits().getFixedValue() /
1909                 8)
1910           : Align(1);
1911 
1912   // If the mask is all ones just emit a regular store.
1913   if (const auto *C = dyn_cast<Constant>(Mask))
1914     if (C->isAllOnesValue())
1915       return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
1916 
1917   // Convert the mask from an integer type to a vector of i1.
1918   unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
1919   Mask = getX86MaskVec(Builder, Mask, NumElts);
1920   return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
1921 }
1922 
1923 static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
1924   Type *Ty = CI.getType();
1925   Value *Op0 = CI.getArgOperand(0);
1926   Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
1927                                        {Op0, Builder.getInt1(false)});
1928   if (CI.arg_size() == 3)
1929     Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
1930   return Res;
1931 }
1932 
1933 static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
1934   Type *Ty = CI.getType();
1935 
1936   // Arguments have a vXi32 type so cast to vXi64.
1937   Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1938   Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1939 
1940   if (IsSigned) {
1941     // Shift left then arithmetic shift right.
1942     Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1943     LHS = Builder.CreateShl(LHS, ShiftAmt);
1944     LHS = Builder.CreateAShr(LHS, ShiftAmt);
1945     RHS = Builder.CreateShl(RHS, ShiftAmt);
1946     RHS = Builder.CreateAShr(RHS, ShiftAmt);
1947   } else {
1948     // Clear the upper bits.
1949     Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1950     LHS = Builder.CreateAnd(LHS, Mask);
1951     RHS = Builder.CreateAnd(RHS, Mask);
1952   }
1953 
1954   Value *Res = Builder.CreateMul(LHS, RHS);
1955 
1956   if (CI.arg_size() == 4)
1957     Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1958 
1959   return Res;
1960 }
1961 
1962 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1963 static Value *applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
1964                                      Value *Mask) {
1965   unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
1966   if (Mask) {
1967     const auto *C = dyn_cast<Constant>(Mask);
1968     if (!C || !C->isAllOnesValue())
1969       Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1970   }
1971 
1972   if (NumElts < 8) {
1973     int Indices[8];
1974     for (unsigned i = 0; i != NumElts; ++i)
1975       Indices[i] = i;
1976     for (unsigned i = NumElts; i != 8; ++i)
1977       Indices[i] = NumElts + i % NumElts;
1978     Vec = Builder.CreateShuffleVector(Vec,
1979                                       Constant::getNullValue(Vec->getType()),
1980                                       Indices);
1981   }
1982   return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1983 }
1984 
1985 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI,
1986                                    unsigned CC, bool Signed) {
1987   Value *Op0 = CI.getArgOperand(0);
1988   unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1989 
1990   Value *Cmp;
1991   if (CC == 3) {
1992     Cmp = Constant::getNullValue(
1993         FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1994   } else if (CC == 7) {
1995     Cmp = Constant::getAllOnesValue(
1996         FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1997   } else {
1998     ICmpInst::Predicate Pred;
1999     switch (CC) {
2000     default: llvm_unreachable("Unknown condition code");
2001     case 0: Pred = ICmpInst::ICMP_EQ;  break;
2002     case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2003     case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2004     case 4: Pred = ICmpInst::ICMP_NE;  break;
2005     case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2006     case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2007     }
2008     Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
2009   }
2010 
2011   Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
2012 
2013   return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
2014 }
2015 
2016 // Replace a masked intrinsic with an older unmasked intrinsic.
2017 static Value *upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI,
2018                                     Intrinsic::ID IID) {
2019   Value *Rep = Builder.CreateIntrinsic(
2020       IID, {}, {CI.getArgOperand(0), CI.getArgOperand(1)});
2021   return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
2022 }
2023 
2024 static Value *upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI) {
2025   Value* A = CI.getArgOperand(0);
2026   Value* B = CI.getArgOperand(1);
2027   Value* Src = CI.getArgOperand(2);
2028   Value* Mask = CI.getArgOperand(3);
2029 
2030   Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
2031   Value* Cmp = Builder.CreateIsNotNull(AndNode);
2032   Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
2033   Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
2034   Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2035   return Builder.CreateInsertElement(A, Select, (uint64_t)0);
2036 }
2037 
2038 static Value *upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI) {
2039   Value* Op = CI.getArgOperand(0);
2040   Type* ReturnOp = CI.getType();
2041   unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
2042   Value *Mask = getX86MaskVec(Builder, Op, NumElts);
2043   return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
2044 }
2045 
2046 // Replace intrinsic with unmasked version and a select.
2047 static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
2048                                       CallBase &CI, Value *&Rep) {
2049   Name = Name.substr(12); // Remove avx512.mask.
2050 
2051   unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2052   unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2053   Intrinsic::ID IID;
2054   if (Name.starts_with("max.p")) {
2055     if (VecWidth == 128 && EltWidth == 32)
2056       IID = Intrinsic::x86_sse_max_ps;
2057     else if (VecWidth == 128 && EltWidth == 64)
2058       IID = Intrinsic::x86_sse2_max_pd;
2059     else if (VecWidth == 256 && EltWidth == 32)
2060       IID = Intrinsic::x86_avx_max_ps_256;
2061     else if (VecWidth == 256 && EltWidth == 64)
2062       IID = Intrinsic::x86_avx_max_pd_256;
2063     else
2064       llvm_unreachable("Unexpected intrinsic");
2065   } else if (Name.starts_with("min.p")) {
2066     if (VecWidth == 128 && EltWidth == 32)
2067       IID = Intrinsic::x86_sse_min_ps;
2068     else if (VecWidth == 128 && EltWidth == 64)
2069       IID = Intrinsic::x86_sse2_min_pd;
2070     else if (VecWidth == 256 && EltWidth == 32)
2071       IID = Intrinsic::x86_avx_min_ps_256;
2072     else if (VecWidth == 256 && EltWidth == 64)
2073       IID = Intrinsic::x86_avx_min_pd_256;
2074     else
2075       llvm_unreachable("Unexpected intrinsic");
2076   } else if (Name.starts_with("pshuf.b.")) {
2077     if (VecWidth == 128)
2078       IID = Intrinsic::x86_ssse3_pshuf_b_128;
2079     else if (VecWidth == 256)
2080       IID = Intrinsic::x86_avx2_pshuf_b;
2081     else if (VecWidth == 512)
2082       IID = Intrinsic::x86_avx512_pshuf_b_512;
2083     else
2084       llvm_unreachable("Unexpected intrinsic");
2085   } else if (Name.starts_with("pmul.hr.sw.")) {
2086     if (VecWidth == 128)
2087       IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2088     else if (VecWidth == 256)
2089       IID = Intrinsic::x86_avx2_pmul_hr_sw;
2090     else if (VecWidth == 512)
2091       IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2092     else
2093       llvm_unreachable("Unexpected intrinsic");
2094   } else if (Name.starts_with("pmulh.w.")) {
2095     if (VecWidth == 128)
2096       IID = Intrinsic::x86_sse2_pmulh_w;
2097     else if (VecWidth == 256)
2098       IID = Intrinsic::x86_avx2_pmulh_w;
2099     else if (VecWidth == 512)
2100       IID = Intrinsic::x86_avx512_pmulh_w_512;
2101     else
2102       llvm_unreachable("Unexpected intrinsic");
2103   } else if (Name.starts_with("pmulhu.w.")) {
2104     if (VecWidth == 128)
2105       IID = Intrinsic::x86_sse2_pmulhu_w;
2106     else if (VecWidth == 256)
2107       IID = Intrinsic::x86_avx2_pmulhu_w;
2108     else if (VecWidth == 512)
2109       IID = Intrinsic::x86_avx512_pmulhu_w_512;
2110     else
2111       llvm_unreachable("Unexpected intrinsic");
2112   } else if (Name.starts_with("pmaddw.d.")) {
2113     if (VecWidth == 128)
2114       IID = Intrinsic::x86_sse2_pmadd_wd;
2115     else if (VecWidth == 256)
2116       IID = Intrinsic::x86_avx2_pmadd_wd;
2117     else if (VecWidth == 512)
2118       IID = Intrinsic::x86_avx512_pmaddw_d_512;
2119     else
2120       llvm_unreachable("Unexpected intrinsic");
2121   } else if (Name.starts_with("pmaddubs.w.")) {
2122     if (VecWidth == 128)
2123       IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2124     else if (VecWidth == 256)
2125       IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2126     else if (VecWidth == 512)
2127       IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2128     else
2129       llvm_unreachable("Unexpected intrinsic");
2130   } else if (Name.starts_with("packsswb.")) {
2131     if (VecWidth == 128)
2132       IID = Intrinsic::x86_sse2_packsswb_128;
2133     else if (VecWidth == 256)
2134       IID = Intrinsic::x86_avx2_packsswb;
2135     else if (VecWidth == 512)
2136       IID = Intrinsic::x86_avx512_packsswb_512;
2137     else
2138       llvm_unreachable("Unexpected intrinsic");
2139   } else if (Name.starts_with("packssdw.")) {
2140     if (VecWidth == 128)
2141       IID = Intrinsic::x86_sse2_packssdw_128;
2142     else if (VecWidth == 256)
2143       IID = Intrinsic::x86_avx2_packssdw;
2144     else if (VecWidth == 512)
2145       IID = Intrinsic::x86_avx512_packssdw_512;
2146     else
2147       llvm_unreachable("Unexpected intrinsic");
2148   } else if (Name.starts_with("packuswb.")) {
2149     if (VecWidth == 128)
2150       IID = Intrinsic::x86_sse2_packuswb_128;
2151     else if (VecWidth == 256)
2152       IID = Intrinsic::x86_avx2_packuswb;
2153     else if (VecWidth == 512)
2154       IID = Intrinsic::x86_avx512_packuswb_512;
2155     else
2156       llvm_unreachable("Unexpected intrinsic");
2157   } else if (Name.starts_with("packusdw.")) {
2158     if (VecWidth == 128)
2159       IID = Intrinsic::x86_sse41_packusdw;
2160     else if (VecWidth == 256)
2161       IID = Intrinsic::x86_avx2_packusdw;
2162     else if (VecWidth == 512)
2163       IID = Intrinsic::x86_avx512_packusdw_512;
2164     else
2165       llvm_unreachable("Unexpected intrinsic");
2166   } else if (Name.starts_with("vpermilvar.")) {
2167     if (VecWidth == 128 && EltWidth == 32)
2168       IID = Intrinsic::x86_avx_vpermilvar_ps;
2169     else if (VecWidth == 128 && EltWidth == 64)
2170       IID = Intrinsic::x86_avx_vpermilvar_pd;
2171     else if (VecWidth == 256 && EltWidth == 32)
2172       IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2173     else if (VecWidth == 256 && EltWidth == 64)
2174       IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2175     else if (VecWidth == 512 && EltWidth == 32)
2176       IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2177     else if (VecWidth == 512 && EltWidth == 64)
2178       IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2179     else
2180       llvm_unreachable("Unexpected intrinsic");
2181   } else if (Name == "cvtpd2dq.256") {
2182     IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2183   } else if (Name == "cvtpd2ps.256") {
2184     IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2185   } else if (Name == "cvttpd2dq.256") {
2186     IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2187   } else if (Name == "cvttps2dq.128") {
2188     IID = Intrinsic::x86_sse2_cvttps2dq;
2189   } else if (Name == "cvttps2dq.256") {
2190     IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2191   } else if (Name.starts_with("permvar.")) {
2192     bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2193     if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2194       IID = Intrinsic::x86_avx2_permps;
2195     else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2196       IID = Intrinsic::x86_avx2_permd;
2197     else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2198       IID = Intrinsic::x86_avx512_permvar_df_256;
2199     else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2200       IID = Intrinsic::x86_avx512_permvar_di_256;
2201     else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2202       IID = Intrinsic::x86_avx512_permvar_sf_512;
2203     else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2204       IID = Intrinsic::x86_avx512_permvar_si_512;
2205     else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2206       IID = Intrinsic::x86_avx512_permvar_df_512;
2207     else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2208       IID = Intrinsic::x86_avx512_permvar_di_512;
2209     else if (VecWidth == 128 && EltWidth == 16)
2210       IID = Intrinsic::x86_avx512_permvar_hi_128;
2211     else if (VecWidth == 256 && EltWidth == 16)
2212       IID = Intrinsic::x86_avx512_permvar_hi_256;
2213     else if (VecWidth == 512 && EltWidth == 16)
2214       IID = Intrinsic::x86_avx512_permvar_hi_512;
2215     else if (VecWidth == 128 && EltWidth == 8)
2216       IID = Intrinsic::x86_avx512_permvar_qi_128;
2217     else if (VecWidth == 256 && EltWidth == 8)
2218       IID = Intrinsic::x86_avx512_permvar_qi_256;
2219     else if (VecWidth == 512 && EltWidth == 8)
2220       IID = Intrinsic::x86_avx512_permvar_qi_512;
2221     else
2222       llvm_unreachable("Unexpected intrinsic");
2223   } else if (Name.starts_with("dbpsadbw.")) {
2224     if (VecWidth == 128)
2225       IID = Intrinsic::x86_avx512_dbpsadbw_128;
2226     else if (VecWidth == 256)
2227       IID = Intrinsic::x86_avx512_dbpsadbw_256;
2228     else if (VecWidth == 512)
2229       IID = Intrinsic::x86_avx512_dbpsadbw_512;
2230     else
2231       llvm_unreachable("Unexpected intrinsic");
2232   } else if (Name.starts_with("pmultishift.qb.")) {
2233     if (VecWidth == 128)
2234       IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2235     else if (VecWidth == 256)
2236       IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2237     else if (VecWidth == 512)
2238       IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2239     else
2240       llvm_unreachable("Unexpected intrinsic");
2241   } else if (Name.starts_with("conflict.")) {
2242     if (Name[9] == 'd' && VecWidth == 128)
2243       IID = Intrinsic::x86_avx512_conflict_d_128;
2244     else if (Name[9] == 'd' && VecWidth == 256)
2245       IID = Intrinsic::x86_avx512_conflict_d_256;
2246     else if (Name[9] == 'd' && VecWidth == 512)
2247       IID = Intrinsic::x86_avx512_conflict_d_512;
2248     else if (Name[9] == 'q' && VecWidth == 128)
2249       IID = Intrinsic::x86_avx512_conflict_q_128;
2250     else if (Name[9] == 'q' && VecWidth == 256)
2251       IID = Intrinsic::x86_avx512_conflict_q_256;
2252     else if (Name[9] == 'q' && VecWidth == 512)
2253       IID = Intrinsic::x86_avx512_conflict_q_512;
2254     else
2255       llvm_unreachable("Unexpected intrinsic");
2256   } else if (Name.starts_with("pavg.")) {
2257     if (Name[5] == 'b' && VecWidth == 128)
2258       IID = Intrinsic::x86_sse2_pavg_b;
2259     else if (Name[5] == 'b' && VecWidth == 256)
2260       IID = Intrinsic::x86_avx2_pavg_b;
2261     else if (Name[5] == 'b' && VecWidth == 512)
2262       IID = Intrinsic::x86_avx512_pavg_b_512;
2263     else if (Name[5] == 'w' && VecWidth == 128)
2264       IID = Intrinsic::x86_sse2_pavg_w;
2265     else if (Name[5] == 'w' && VecWidth == 256)
2266       IID = Intrinsic::x86_avx2_pavg_w;
2267     else if (Name[5] == 'w' && VecWidth == 512)
2268       IID = Intrinsic::x86_avx512_pavg_w_512;
2269     else
2270       llvm_unreachable("Unexpected intrinsic");
2271   } else
2272     return false;
2273 
2274   SmallVector<Value *, 4> Args(CI.args());
2275   Args.pop_back();
2276   Args.pop_back();
2277   Rep = Builder.CreateIntrinsic(IID, {}, Args);
2278   unsigned NumArgs = CI.arg_size();
2279   Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2280                       CI.getArgOperand(NumArgs - 2));
2281   return true;
2282 }
2283 
2284 /// Upgrade comment in call to inline asm that represents an objc retain release
2285 /// marker.
2286 void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2287   size_t Pos;
2288   if (AsmStr->find("mov\tfp") == 0 &&
2289       AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2290       (Pos = AsmStr->find("# marker")) != std::string::npos) {
2291     AsmStr->replace(Pos, 1, ";");
2292   }
2293 }
2294 
2295 static Value *upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI,
2296                                        Function *F, IRBuilder<> &Builder) {
2297   Value *Rep = nullptr;
2298 
2299   if (Name == "abs.i" || Name == "abs.ll") {
2300     Value *Arg = CI->getArgOperand(0);
2301     Value *Neg = Builder.CreateNeg(Arg, "neg");
2302     Value *Cmp = Builder.CreateICmpSGE(
2303         Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
2304     Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
2305   } else if (Name.starts_with("atomic.load.add.f32.p") ||
2306              Name.starts_with("atomic.load.add.f64.p")) {
2307     Value *Ptr = CI->getArgOperand(0);
2308     Value *Val = CI->getArgOperand(1);
2309     Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
2310                                   AtomicOrdering::SequentiallyConsistent);
2311   } else if (Name.consume_front("max.") &&
2312              (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2313               Name == "ui" || Name == "ull")) {
2314     Value *Arg0 = CI->getArgOperand(0);
2315     Value *Arg1 = CI->getArgOperand(1);
2316     Value *Cmp = Name.starts_with("u")
2317                      ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
2318                      : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
2319     Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
2320   } else if (Name.consume_front("min.") &&
2321              (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2322               Name == "ui" || Name == "ull")) {
2323     Value *Arg0 = CI->getArgOperand(0);
2324     Value *Arg1 = CI->getArgOperand(1);
2325     Value *Cmp = Name.starts_with("u")
2326                      ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
2327                      : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
2328     Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
2329   } else if (Name == "clz.ll") {
2330     // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2331     Value *Arg = CI->getArgOperand(0);
2332     Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->getType()},
2333                                           {Arg, Builder.getFalse()},
2334                                           /*FMFSource=*/nullptr, "ctlz");
2335     Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2336   } else if (Name == "popc.ll") {
2337     // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2338     // i64.
2339     Value *Arg = CI->getArgOperand(0);
2340     Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->getType()},
2341                                           Arg, /*FMFSource=*/nullptr, "ctpop");
2342     Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2343   } else if (Name == "h2f") {
2344     Rep = Builder.CreateIntrinsic(Intrinsic::convert_from_fp16,
2345                                   {Builder.getFloatTy()}, CI->getArgOperand(0),
2346                                   /*FMFSource=*/nullptr, "h2f");
2347   } else if (Name.consume_front("bitcast.") &&
2348              (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2349               Name == "d2ll")) {
2350     Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
2351   } else if (Name == "rotate.b32") {
2352     Value *Arg = CI->getOperand(0);
2353     Value *ShiftAmt = CI->getOperand(1);
2354     Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2355                                   {Arg, Arg, ShiftAmt});
2356   } else if (Name == "rotate.b64") {
2357     Type *Int64Ty = Builder.getInt64Ty();
2358     Value *Arg = CI->getOperand(0);
2359     Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2360     Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2361                                   {Arg, Arg, ZExtShiftAmt});
2362   } else if (Name == "rotate.right.b64") {
2363     Type *Int64Ty = Builder.getInt64Ty();
2364     Value *Arg = CI->getOperand(0);
2365     Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2366     Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2367                                   {Arg, Arg, ZExtShiftAmt});
2368   } else if ((Name.consume_front("ptr.gen.to.") &&
2369               (Name.starts_with("local") || Name.starts_with("shared") ||
2370                Name.starts_with("global") || Name.starts_with("constant"))) ||
2371              (Name.consume_front("ptr.") &&
2372               (Name.consume_front("local") || Name.consume_front("shared") ||
2373                Name.consume_front("global") ||
2374                Name.consume_front("constant")) &&
2375               Name.starts_with(".to.gen"))) {
2376     Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType());
2377   } else if (Name.consume_front("ldg.global")) {
2378     Value *Ptr = CI->getArgOperand(0);
2379     Align PtrAlign = cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue();
2380     // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2381     Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2382     Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign);
2383     MDNode *MD = MDNode::get(Builder.getContext(), {});
2384     LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2385     return LD;
2386   } else {
2387     Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);
2388     if (IID != Intrinsic::not_intrinsic &&
2389         !F->getReturnType()->getScalarType()->isBFloatTy()) {
2390       rename(F);
2391       Function *NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
2392       SmallVector<Value *, 2> Args;
2393       for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2394         Value *Arg = CI->getArgOperand(I);
2395         Type *OldType = Arg->getType();
2396         Type *NewType = NewFn->getArg(I)->getType();
2397         Args.push_back(
2398             (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2399                 ? Builder.CreateBitCast(Arg, NewType)
2400                 : Arg);
2401       }
2402       Rep = Builder.CreateCall(NewFn, Args);
2403       if (F->getReturnType()->isIntegerTy())
2404         Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2405     }
2406   }
2407 
2408   return Rep;
2409 }
2410 
2411 static Value *upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F,
2412                                       IRBuilder<> &Builder) {
2413   LLVMContext &C = F->getContext();
2414   Value *Rep = nullptr;
2415 
2416   if (Name.starts_with("sse4a.movnt.")) {
2417     SmallVector<Metadata *, 1> Elts;
2418     Elts.push_back(
2419         ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2420     MDNode *Node = MDNode::get(C, Elts);
2421 
2422     Value *Arg0 = CI->getArgOperand(0);
2423     Value *Arg1 = CI->getArgOperand(1);
2424 
2425     // Nontemporal (unaligned) store of the 0'th element of the float/double
2426     // vector.
2427     Value *Extract =
2428         Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2429 
2430     StoreInst *SI = Builder.CreateAlignedStore(Extract, Arg0, Align(1));
2431     SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2432   } else if (Name.starts_with("avx.movnt.") ||
2433              Name.starts_with("avx512.storent.")) {
2434     SmallVector<Metadata *, 1> Elts;
2435     Elts.push_back(
2436         ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2437     MDNode *Node = MDNode::get(C, Elts);
2438 
2439     Value *Arg0 = CI->getArgOperand(0);
2440     Value *Arg1 = CI->getArgOperand(1);
2441 
2442     StoreInst *SI = Builder.CreateAlignedStore(
2443         Arg1, Arg0,
2444         Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
2445     SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2446   } else if (Name == "sse2.storel.dq") {
2447     Value *Arg0 = CI->getArgOperand(0);
2448     Value *Arg1 = CI->getArgOperand(1);
2449 
2450     auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2451     Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2452     Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2453     Builder.CreateAlignedStore(Elt, Arg0, Align(1));
2454   } else if (Name.starts_with("sse.storeu.") ||
2455              Name.starts_with("sse2.storeu.") ||
2456              Name.starts_with("avx.storeu.")) {
2457     Value *Arg0 = CI->getArgOperand(0);
2458     Value *Arg1 = CI->getArgOperand(1);
2459     Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2460   } else if (Name == "avx512.mask.store.ss") {
2461     Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2462     upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2463                        Mask, false);
2464   } else if (Name.starts_with("avx512.mask.store")) {
2465     // "avx512.mask.storeu." or "avx512.mask.store."
2466     bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2467     upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2468                        CI->getArgOperand(2), Aligned);
2469   } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
2470     // Upgrade packed integer vector compare intrinsics to compare instructions.
2471     // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2472     bool CmpEq = Name[9] == 'e';
2473     Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2474                              CI->getArgOperand(0), CI->getArgOperand(1));
2475     Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2476   } else if (Name.starts_with("avx512.broadcastm")) {
2477     Type *ExtTy = Type::getInt32Ty(C);
2478     if (CI->getOperand(0)->getType()->isIntegerTy(8))
2479       ExtTy = Type::getInt64Ty(C);
2480     unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2481                        ExtTy->getPrimitiveSizeInBits();
2482     Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2483     Rep = Builder.CreateVectorSplat(NumElts, Rep);
2484   } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2485     Value *Vec = CI->getArgOperand(0);
2486     Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2487     Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->getType(), Elt0);
2488     Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2489   } else if (Name.starts_with("avx.sqrt.p") ||
2490              Name.starts_with("sse2.sqrt.p") ||
2491              Name.starts_with("sse.sqrt.p")) {
2492     Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2493                                   {CI->getArgOperand(0)});
2494   } else if (Name.starts_with("avx512.mask.sqrt.p")) {
2495     if (CI->arg_size() == 4 &&
2496         (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2497          cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2498       Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2499                                           : Intrinsic::x86_avx512_sqrt_pd_512;
2500 
2501       Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
2502       Rep = Builder.CreateIntrinsic(IID, {}, Args);
2503     } else {
2504       Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2505                                     {CI->getArgOperand(0)});
2506     }
2507     Rep =
2508         emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2509   } else if (Name.starts_with("avx512.ptestm") ||
2510              Name.starts_with("avx512.ptestnm")) {
2511     Value *Op0 = CI->getArgOperand(0);
2512     Value *Op1 = CI->getArgOperand(1);
2513     Value *Mask = CI->getArgOperand(2);
2514     Rep = Builder.CreateAnd(Op0, Op1);
2515     llvm::Type *Ty = Op0->getType();
2516     Value *Zero = llvm::Constant::getNullValue(Ty);
2517     ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
2518                                    ? ICmpInst::ICMP_NE
2519                                    : ICmpInst::ICMP_EQ;
2520     Rep = Builder.CreateICmp(Pred, Rep, Zero);
2521     Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2522   } else if (Name.starts_with("avx512.mask.pbroadcast")) {
2523     unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2524                            ->getNumElements();
2525     Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2526     Rep =
2527         emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2528   } else if (Name.starts_with("avx512.kunpck")) {
2529     unsigned NumElts = CI->getType()->getScalarSizeInBits();
2530     Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2531     Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2532     int Indices[64];
2533     for (unsigned i = 0; i != NumElts; ++i)
2534       Indices[i] = i;
2535 
2536     // First extract half of each vector. This gives better codegen than
2537     // doing it in a single shuffle.
2538     LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2539     RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2540     // Concat the vectors.
2541     // NOTE: Operands have to be swapped to match intrinsic definition.
2542     Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2543     Rep = Builder.CreateBitCast(Rep, CI->getType());
2544   } else if (Name == "avx512.kand.w") {
2545     Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2546     Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2547     Rep = Builder.CreateAnd(LHS, RHS);
2548     Rep = Builder.CreateBitCast(Rep, CI->getType());
2549   } else if (Name == "avx512.kandn.w") {
2550     Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2551     Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2552     LHS = Builder.CreateNot(LHS);
2553     Rep = Builder.CreateAnd(LHS, RHS);
2554     Rep = Builder.CreateBitCast(Rep, CI->getType());
2555   } else if (Name == "avx512.kor.w") {
2556     Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2557     Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2558     Rep = Builder.CreateOr(LHS, RHS);
2559     Rep = Builder.CreateBitCast(Rep, CI->getType());
2560   } else if (Name == "avx512.kxor.w") {
2561     Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2562     Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2563     Rep = Builder.CreateXor(LHS, RHS);
2564     Rep = Builder.CreateBitCast(Rep, CI->getType());
2565   } else if (Name == "avx512.kxnor.w") {
2566     Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2567     Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2568     LHS = Builder.CreateNot(LHS);
2569     Rep = Builder.CreateXor(LHS, RHS);
2570     Rep = Builder.CreateBitCast(Rep, CI->getType());
2571   } else if (Name == "avx512.knot.w") {
2572     Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2573     Rep = Builder.CreateNot(Rep);
2574     Rep = Builder.CreateBitCast(Rep, CI->getType());
2575   } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
2576     Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2577     Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2578     Rep = Builder.CreateOr(LHS, RHS);
2579     Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2580     Value *C;
2581     if (Name[14] == 'c')
2582       C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2583     else
2584       C = ConstantInt::getNullValue(Builder.getInt16Ty());
2585     Rep = Builder.CreateICmpEQ(Rep, C);
2586     Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2587   } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2588              Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2589              Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2590              Name == "sse.div.ss" || Name == "sse2.div.sd") {
2591     Type *I32Ty = Type::getInt32Ty(C);
2592     Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2593                                                ConstantInt::get(I32Ty, 0));
2594     Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2595                                                ConstantInt::get(I32Ty, 0));
2596     Value *EltOp;
2597     if (Name.contains(".add."))
2598       EltOp = Builder.CreateFAdd(Elt0, Elt1);
2599     else if (Name.contains(".sub."))
2600       EltOp = Builder.CreateFSub(Elt0, Elt1);
2601     else if (Name.contains(".mul."))
2602       EltOp = Builder.CreateFMul(Elt0, Elt1);
2603     else
2604       EltOp = Builder.CreateFDiv(Elt0, Elt1);
2605     Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2606                                       ConstantInt::get(I32Ty, 0));
2607   } else if (Name.starts_with("avx512.mask.pcmp")) {
2608     // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2609     bool CmpEq = Name[16] == 'e';
2610     Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2611   } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
2612     Type *OpTy = CI->getArgOperand(0)->getType();
2613     unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2614     Intrinsic::ID IID;
2615     switch (VecWidth) {
2616     default:
2617       llvm_unreachable("Unexpected intrinsic");
2618     case 128:
2619       IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
2620       break;
2621     case 256:
2622       IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
2623       break;
2624     case 512:
2625       IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
2626       break;
2627     }
2628 
2629     Rep = Builder.CreateIntrinsic(IID, {},
2630                                   {CI->getOperand(0), CI->getArgOperand(1)});
2631     Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2632   } else if (Name.starts_with("avx512.mask.fpclass.p")) {
2633     Type *OpTy = CI->getArgOperand(0)->getType();
2634     unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2635     unsigned EltWidth = OpTy->getScalarSizeInBits();
2636     Intrinsic::ID IID;
2637     if (VecWidth == 128 && EltWidth == 32)
2638       IID = Intrinsic::x86_avx512_fpclass_ps_128;
2639     else if (VecWidth == 256 && EltWidth == 32)
2640       IID = Intrinsic::x86_avx512_fpclass_ps_256;
2641     else if (VecWidth == 512 && EltWidth == 32)
2642       IID = Intrinsic::x86_avx512_fpclass_ps_512;
2643     else if (VecWidth == 128 && EltWidth == 64)
2644       IID = Intrinsic::x86_avx512_fpclass_pd_128;
2645     else if (VecWidth == 256 && EltWidth == 64)
2646       IID = Intrinsic::x86_avx512_fpclass_pd_256;
2647     else if (VecWidth == 512 && EltWidth == 64)
2648       IID = Intrinsic::x86_avx512_fpclass_pd_512;
2649     else
2650       llvm_unreachable("Unexpected intrinsic");
2651 
2652     Rep = Builder.CreateIntrinsic(IID, {},
2653                                   {CI->getOperand(0), CI->getArgOperand(1)});
2654     Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2655   } else if (Name.starts_with("avx512.cmp.p")) {
2656     SmallVector<Value *, 4> Args(CI->args());
2657     Type *OpTy = Args[0]->getType();
2658     unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2659     unsigned EltWidth = OpTy->getScalarSizeInBits();
2660     Intrinsic::ID IID;
2661     if (VecWidth == 128 && EltWidth == 32)
2662       IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2663     else if (VecWidth == 256 && EltWidth == 32)
2664       IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2665     else if (VecWidth == 512 && EltWidth == 32)
2666       IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2667     else if (VecWidth == 128 && EltWidth == 64)
2668       IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2669     else if (VecWidth == 256 && EltWidth == 64)
2670       IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2671     else if (VecWidth == 512 && EltWidth == 64)
2672       IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2673     else
2674       llvm_unreachable("Unexpected intrinsic");
2675 
2676     Value *Mask = Constant::getAllOnesValue(CI->getType());
2677     if (VecWidth == 512)
2678       std::swap(Mask, Args.back());
2679     Args.push_back(Mask);
2680 
2681     Rep = Builder.CreateIntrinsic(IID, {}, Args);
2682   } else if (Name.starts_with("avx512.mask.cmp.")) {
2683     // Integer compare intrinsics.
2684     unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2685     Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2686   } else if (Name.starts_with("avx512.mask.ucmp.")) {
2687     unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2688     Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2689   } else if (Name.starts_with("avx512.cvtb2mask.") ||
2690              Name.starts_with("avx512.cvtw2mask.") ||
2691              Name.starts_with("avx512.cvtd2mask.") ||
2692              Name.starts_with("avx512.cvtq2mask.")) {
2693     Value *Op = CI->getArgOperand(0);
2694     Value *Zero = llvm::Constant::getNullValue(Op->getType());
2695     Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2696     Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2697   } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
2698              Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
2699              Name.starts_with("avx512.mask.pabs")) {
2700     Rep = upgradeAbs(Builder, *CI);
2701   } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
2702              Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
2703              Name.starts_with("avx512.mask.pmaxs")) {
2704     Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
2705   } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
2706              Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
2707              Name.starts_with("avx512.mask.pmaxu")) {
2708     Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
2709   } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
2710              Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
2711              Name.starts_with("avx512.mask.pmins")) {
2712     Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
2713   } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
2714              Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
2715              Name.starts_with("avx512.mask.pminu")) {
2716     Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
2717   } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
2718              Name == "avx512.pmulu.dq.512" ||
2719              Name.starts_with("avx512.mask.pmulu.dq.")) {
2720     Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
2721   } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
2722              Name == "avx512.pmul.dq.512" ||
2723              Name.starts_with("avx512.mask.pmul.dq.")) {
2724     Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
2725   } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
2726              Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
2727     Rep =
2728         Builder.CreateSIToFP(CI->getArgOperand(1),
2729                              cast<VectorType>(CI->getType())->getElementType());
2730     Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2731   } else if (Name == "avx512.cvtusi2sd") {
2732     Rep =
2733         Builder.CreateUIToFP(CI->getArgOperand(1),
2734                              cast<VectorType>(CI->getType())->getElementType());
2735     Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2736   } else if (Name == "sse2.cvtss2sd") {
2737     Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2738     Rep = Builder.CreateFPExt(
2739         Rep, cast<VectorType>(CI->getType())->getElementType());
2740     Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2741   } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
2742              Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
2743              Name.starts_with("avx512.mask.cvtdq2pd.") ||
2744              Name.starts_with("avx512.mask.cvtudq2pd.") ||
2745              Name.starts_with("avx512.mask.cvtdq2ps.") ||
2746              Name.starts_with("avx512.mask.cvtudq2ps.") ||
2747              Name.starts_with("avx512.mask.cvtqq2pd.") ||
2748              Name.starts_with("avx512.mask.cvtuqq2pd.") ||
2749              Name == "avx512.mask.cvtqq2ps.256" ||
2750              Name == "avx512.mask.cvtqq2ps.512" ||
2751              Name == "avx512.mask.cvtuqq2ps.256" ||
2752              Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
2753              Name == "avx.cvt.ps2.pd.256" ||
2754              Name == "avx512.mask.cvtps2pd.128" ||
2755              Name == "avx512.mask.cvtps2pd.256") {
2756     auto *DstTy = cast<FixedVectorType>(CI->getType());
2757     Rep = CI->getArgOperand(0);
2758     auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2759 
2760     unsigned NumDstElts = DstTy->getNumElements();
2761     if (NumDstElts < SrcTy->getNumElements()) {
2762       assert(NumDstElts == 2 && "Unexpected vector size");
2763       Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
2764     }
2765 
2766     bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2767     bool IsUnsigned = Name.contains("cvtu");
2768     if (IsPS2PD)
2769       Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2770     else if (CI->arg_size() == 4 &&
2771              (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2772               cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2773       Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2774                                      : Intrinsic::x86_avx512_sitofp_round;
2775       Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
2776                                     {Rep, CI->getArgOperand(3)});
2777     } else {
2778       Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2779                        : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2780     }
2781 
2782     if (CI->arg_size() >= 3)
2783       Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2784                           CI->getArgOperand(1));
2785   } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
2786              Name.starts_with("vcvtph2ps.")) {
2787     auto *DstTy = cast<FixedVectorType>(CI->getType());
2788     Rep = CI->getArgOperand(0);
2789     auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2790     unsigned NumDstElts = DstTy->getNumElements();
2791     if (NumDstElts != SrcTy->getNumElements()) {
2792       assert(NumDstElts == 4 && "Unexpected vector size");
2793       Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
2794     }
2795     Rep = Builder.CreateBitCast(
2796         Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
2797     Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
2798     if (CI->arg_size() >= 3)
2799       Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2800                           CI->getArgOperand(1));
2801   } else if (Name.starts_with("avx512.mask.load")) {
2802     // "avx512.mask.loadu." or "avx512.mask.load."
2803     bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
2804     Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2805                             CI->getArgOperand(2), Aligned);
2806   } else if (Name.starts_with("avx512.mask.expand.load.")) {
2807     auto *ResultTy = cast<FixedVectorType>(CI->getType());
2808     Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2809                                    ResultTy->getNumElements());
2810 
2811     Rep = Builder.CreateIntrinsic(
2812         Intrinsic::masked_expandload, ResultTy,
2813         {CI->getOperand(0), MaskVec, CI->getOperand(1)});
2814   } else if (Name.starts_with("avx512.mask.compress.store.")) {
2815     auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
2816     Value *MaskVec =
2817         getX86MaskVec(Builder, CI->getArgOperand(2),
2818                       cast<FixedVectorType>(ResultTy)->getNumElements());
2819 
2820     Rep = Builder.CreateIntrinsic(
2821         Intrinsic::masked_compressstore, ResultTy,
2822         {CI->getArgOperand(1), CI->getArgOperand(0), MaskVec});
2823   } else if (Name.starts_with("avx512.mask.compress.") ||
2824              Name.starts_with("avx512.mask.expand.")) {
2825     auto *ResultTy = cast<FixedVectorType>(CI->getType());
2826 
2827     Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2828                                    ResultTy->getNumElements());
2829 
2830     bool IsCompress = Name[12] == 'c';
2831     Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2832                                    : Intrinsic::x86_avx512_mask_expand;
2833     Rep = Builder.CreateIntrinsic(
2834         IID, ResultTy, {CI->getOperand(0), CI->getOperand(1), MaskVec});
2835   } else if (Name.starts_with("xop.vpcom")) {
2836     bool IsSigned;
2837     if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
2838         Name.ends_with("uq"))
2839       IsSigned = false;
2840     else if (Name.ends_with("b") || Name.ends_with("w") ||
2841              Name.ends_with("d") || Name.ends_with("q"))
2842       IsSigned = true;
2843     else
2844       llvm_unreachable("Unknown suffix");
2845 
2846     unsigned Imm;
2847     if (CI->arg_size() == 3) {
2848       Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2849     } else {
2850       Name = Name.substr(9); // strip off "xop.vpcom"
2851       if (Name.starts_with("lt"))
2852         Imm = 0;
2853       else if (Name.starts_with("le"))
2854         Imm = 1;
2855       else if (Name.starts_with("gt"))
2856         Imm = 2;
2857       else if (Name.starts_with("ge"))
2858         Imm = 3;
2859       else if (Name.starts_with("eq"))
2860         Imm = 4;
2861       else if (Name.starts_with("ne"))
2862         Imm = 5;
2863       else if (Name.starts_with("false"))
2864         Imm = 6;
2865       else if (Name.starts_with("true"))
2866         Imm = 7;
2867       else
2868         llvm_unreachable("Unknown condition");
2869     }
2870 
2871     Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
2872   } else if (Name.starts_with("xop.vpcmov")) {
2873     Value *Sel = CI->getArgOperand(2);
2874     Value *NotSel = Builder.CreateNot(Sel);
2875     Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2876     Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2877     Rep = Builder.CreateOr(Sel0, Sel1);
2878   } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
2879              Name.starts_with("avx512.mask.prol")) {
2880     Rep = upgradeX86Rotate(Builder, *CI, false);
2881   } else if (Name.starts_with("avx512.pror") ||
2882              Name.starts_with("avx512.mask.pror")) {
2883     Rep = upgradeX86Rotate(Builder, *CI, true);
2884   } else if (Name.starts_with("avx512.vpshld.") ||
2885              Name.starts_with("avx512.mask.vpshld") ||
2886              Name.starts_with("avx512.maskz.vpshld")) {
2887     bool ZeroMask = Name[11] == 'z';
2888     Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2889   } else if (Name.starts_with("avx512.vpshrd.") ||
2890              Name.starts_with("avx512.mask.vpshrd") ||
2891              Name.starts_with("avx512.maskz.vpshrd")) {
2892     bool ZeroMask = Name[11] == 'z';
2893     Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2894   } else if (Name == "sse42.crc32.64.8") {
2895     Value *Trunc0 =
2896         Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2897     Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8, {},
2898                                   {Trunc0, CI->getArgOperand(1)});
2899     Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2900   } else if (Name.starts_with("avx.vbroadcast.s") ||
2901              Name.starts_with("avx512.vbroadcast.s")) {
2902     // Replace broadcasts with a series of insertelements.
2903     auto *VecTy = cast<FixedVectorType>(CI->getType());
2904     Type *EltTy = VecTy->getElementType();
2905     unsigned EltNum = VecTy->getNumElements();
2906     Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
2907     Type *I32Ty = Type::getInt32Ty(C);
2908     Rep = PoisonValue::get(VecTy);
2909     for (unsigned I = 0; I < EltNum; ++I)
2910       Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
2911   } else if (Name.starts_with("sse41.pmovsx") ||
2912              Name.starts_with("sse41.pmovzx") ||
2913              Name.starts_with("avx2.pmovsx") ||
2914              Name.starts_with("avx2.pmovzx") ||
2915              Name.starts_with("avx512.mask.pmovsx") ||
2916              Name.starts_with("avx512.mask.pmovzx")) {
2917     auto *DstTy = cast<FixedVectorType>(CI->getType());
2918     unsigned NumDstElts = DstTy->getNumElements();
2919 
2920     // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2921     SmallVector<int, 8> ShuffleMask(NumDstElts);
2922     for (unsigned i = 0; i != NumDstElts; ++i)
2923       ShuffleMask[i] = i;
2924 
2925     Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
2926 
2927     bool DoSext = Name.contains("pmovsx");
2928     Rep =
2929         DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
2930     // If there are 3 arguments, it's a masked intrinsic so we need a select.
2931     if (CI->arg_size() == 3)
2932       Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2933                           CI->getArgOperand(1));
2934   } else if (Name == "avx512.mask.pmov.qd.256" ||
2935              Name == "avx512.mask.pmov.qd.512" ||
2936              Name == "avx512.mask.pmov.wb.256" ||
2937              Name == "avx512.mask.pmov.wb.512") {
2938     Type *Ty = CI->getArgOperand(1)->getType();
2939     Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2940     Rep =
2941         emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2942   } else if (Name.starts_with("avx.vbroadcastf128") ||
2943              Name == "avx2.vbroadcasti128") {
2944     // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2945     Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
2946     unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2947     auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
2948     Value *Load = Builder.CreateAlignedLoad(VT, CI->getArgOperand(0), Align(1));
2949     if (NumSrcElts == 2)
2950       Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
2951     else
2952       Rep = Builder.CreateShuffleVector(Load,
2953                                         ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
2954   } else if (Name.starts_with("avx512.mask.shuf.i") ||
2955              Name.starts_with("avx512.mask.shuf.f")) {
2956     unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2957     Type *VT = CI->getType();
2958     unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2959     unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2960     unsigned ControlBitsMask = NumLanes - 1;
2961     unsigned NumControlBits = NumLanes / 2;
2962     SmallVector<int, 8> ShuffleMask(0);
2963 
2964     for (unsigned l = 0; l != NumLanes; ++l) {
2965       unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2966       // We actually need the other source.
2967       if (l >= NumLanes / 2)
2968         LaneMask += NumLanes;
2969       for (unsigned i = 0; i != NumElementsInLane; ++i)
2970         ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2971     }
2972     Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2973                                       CI->getArgOperand(1), ShuffleMask);
2974     Rep =
2975         emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
2976   } else if (Name.starts_with("avx512.mask.broadcastf") ||
2977              Name.starts_with("avx512.mask.broadcasti")) {
2978     unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
2979                               ->getNumElements();
2980     unsigned NumDstElts =
2981         cast<FixedVectorType>(CI->getType())->getNumElements();
2982 
2983     SmallVector<int, 8> ShuffleMask(NumDstElts);
2984     for (unsigned i = 0; i != NumDstElts; ++i)
2985       ShuffleMask[i] = i % NumSrcElts;
2986 
2987     Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2988                                       CI->getArgOperand(0), ShuffleMask);
2989     Rep =
2990         emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2991   } else if (Name.starts_with("avx2.pbroadcast") ||
2992              Name.starts_with("avx2.vbroadcast") ||
2993              Name.starts_with("avx512.pbroadcast") ||
2994              Name.starts_with("avx512.mask.broadcast.s")) {
2995     // Replace vp?broadcasts with a vector shuffle.
2996     Value *Op = CI->getArgOperand(0);
2997     ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
2998     Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
2999     SmallVector<int, 8> M;
3000     ShuffleVectorInst::getShuffleMask(Constant::getNullValue(MaskTy), M);
3001     Rep = Builder.CreateShuffleVector(Op, M);
3002 
3003     if (CI->arg_size() == 3)
3004       Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3005                           CI->getArgOperand(1));
3006   } else if (Name.starts_with("sse2.padds.") ||
3007              Name.starts_with("avx2.padds.") ||
3008              Name.starts_with("avx512.padds.") ||
3009              Name.starts_with("avx512.mask.padds.")) {
3010     Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3011   } else if (Name.starts_with("sse2.psubs.") ||
3012              Name.starts_with("avx2.psubs.") ||
3013              Name.starts_with("avx512.psubs.") ||
3014              Name.starts_with("avx512.mask.psubs.")) {
3015     Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3016   } else if (Name.starts_with("sse2.paddus.") ||
3017              Name.starts_with("avx2.paddus.") ||
3018              Name.starts_with("avx512.mask.paddus.")) {
3019     Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3020   } else if (Name.starts_with("sse2.psubus.") ||
3021              Name.starts_with("avx2.psubus.") ||
3022              Name.starts_with("avx512.mask.psubus.")) {
3023     Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3024   } else if (Name.starts_with("avx512.mask.palignr.")) {
3025     Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3026                                     CI->getArgOperand(1), CI->getArgOperand(2),
3027                                     CI->getArgOperand(3), CI->getArgOperand(4),
3028                                     false);
3029   } else if (Name.starts_with("avx512.mask.valign.")) {
3030     Rep = upgradeX86ALIGNIntrinsics(
3031         Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3032         CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
3033   } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3034     // 128/256-bit shift left specified in bits.
3035     unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3036     Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3037                                      Shift / 8); // Shift is in bits.
3038   } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3039     // 128/256-bit shift right specified in bits.
3040     unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3041     Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3042                                      Shift / 8); // Shift is in bits.
3043   } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3044              Name == "avx512.psll.dq.512") {
3045     // 128/256/512-bit shift left specified in bytes.
3046     unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3047     Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3048   } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3049              Name == "avx512.psrl.dq.512") {
3050     // 128/256/512-bit shift right specified in bytes.
3051     unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3052     Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3053   } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
3054              Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
3055              Name.starts_with("avx2.pblendd.")) {
3056     Value *Op0 = CI->getArgOperand(0);
3057     Value *Op1 = CI->getArgOperand(1);
3058     unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3059     auto *VecTy = cast<FixedVectorType>(CI->getType());
3060     unsigned NumElts = VecTy->getNumElements();
3061 
3062     SmallVector<int, 16> Idxs(NumElts);
3063     for (unsigned i = 0; i != NumElts; ++i)
3064       Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3065 
3066     Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3067   } else if (Name.starts_with("avx.vinsertf128.") ||
3068              Name == "avx2.vinserti128" ||
3069              Name.starts_with("avx512.mask.insert")) {
3070     Value *Op0 = CI->getArgOperand(0);
3071     Value *Op1 = CI->getArgOperand(1);
3072     unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3073     unsigned DstNumElts =
3074         cast<FixedVectorType>(CI->getType())->getNumElements();
3075     unsigned SrcNumElts =
3076         cast<FixedVectorType>(Op1->getType())->getNumElements();
3077     unsigned Scale = DstNumElts / SrcNumElts;
3078 
3079     // Mask off the high bits of the immediate value; hardware ignores those.
3080     Imm = Imm % Scale;
3081 
3082     // Extend the second operand into a vector the size of the destination.
3083     SmallVector<int, 8> Idxs(DstNumElts);
3084     for (unsigned i = 0; i != SrcNumElts; ++i)
3085       Idxs[i] = i;
3086     for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3087       Idxs[i] = SrcNumElts;
3088     Rep = Builder.CreateShuffleVector(Op1, Idxs);
3089 
3090     // Insert the second operand into the first operand.
3091 
3092     // Note that there is no guarantee that instruction lowering will actually
3093     // produce a vinsertf128 instruction for the created shuffles. In
3094     // particular, the 0 immediate case involves no lane changes, so it can
3095     // be handled as a blend.
3096 
3097     // Example of shuffle mask for 32-bit elements:
3098     // Imm = 1  <i32 0, i32 1, i32 2,  i32 3,  i32 8, i32 9, i32 10, i32 11>
3099     // Imm = 0  <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6,  i32 7 >
3100 
3101     // First fill with identify mask.
3102     for (unsigned i = 0; i != DstNumElts; ++i)
3103       Idxs[i] = i;
3104     // Then replace the elements where we need to insert.
3105     for (unsigned i = 0; i != SrcNumElts; ++i)
3106       Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3107     Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3108 
3109     // If the intrinsic has a mask operand, handle that.
3110     if (CI->arg_size() == 5)
3111       Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3112                           CI->getArgOperand(3));
3113   } else if (Name.starts_with("avx.vextractf128.") ||
3114              Name == "avx2.vextracti128" ||
3115              Name.starts_with("avx512.mask.vextract")) {
3116     Value *Op0 = CI->getArgOperand(0);
3117     unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3118     unsigned DstNumElts =
3119         cast<FixedVectorType>(CI->getType())->getNumElements();
3120     unsigned SrcNumElts =
3121         cast<FixedVectorType>(Op0->getType())->getNumElements();
3122     unsigned Scale = SrcNumElts / DstNumElts;
3123 
3124     // Mask off the high bits of the immediate value; hardware ignores those.
3125     Imm = Imm % Scale;
3126 
3127     // Get indexes for the subvector of the input vector.
3128     SmallVector<int, 8> Idxs(DstNumElts);
3129     for (unsigned i = 0; i != DstNumElts; ++i) {
3130       Idxs[i] = i + (Imm * DstNumElts);
3131     }
3132     Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3133 
3134     // If the intrinsic has a mask operand, handle that.
3135     if (CI->arg_size() == 4)
3136       Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3137                           CI->getArgOperand(2));
3138   } else if (Name.starts_with("avx512.mask.perm.df.") ||
3139              Name.starts_with("avx512.mask.perm.di.")) {
3140     Value *Op0 = CI->getArgOperand(0);
3141     unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3142     auto *VecTy = cast<FixedVectorType>(CI->getType());
3143     unsigned NumElts = VecTy->getNumElements();
3144 
3145     SmallVector<int, 8> Idxs(NumElts);
3146     for (unsigned i = 0; i != NumElts; ++i)
3147       Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3148 
3149     Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3150 
3151     if (CI->arg_size() == 4)
3152       Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3153                           CI->getArgOperand(2));
3154   } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3155     // The immediate permute control byte looks like this:
3156     //    [1:0] - select 128 bits from sources for low half of destination
3157     //    [2]   - ignore
3158     //    [3]   - zero low half of destination
3159     //    [5:4] - select 128 bits from sources for high half of destination
3160     //    [6]   - ignore
3161     //    [7]   - zero high half of destination
3162 
3163     uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3164 
3165     unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3166     unsigned HalfSize = NumElts / 2;
3167     SmallVector<int, 8> ShuffleMask(NumElts);
3168 
3169     // Determine which operand(s) are actually in use for this instruction.
3170     Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3171     Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3172 
3173     // If needed, replace operands based on zero mask.
3174     V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3175     V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3176 
3177     // Permute low half of result.
3178     unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3179     for (unsigned i = 0; i < HalfSize; ++i)
3180       ShuffleMask[i] = StartIndex + i;
3181 
3182     // Permute high half of result.
3183     StartIndex = (Imm & 0x10) ? HalfSize : 0;
3184     for (unsigned i = 0; i < HalfSize; ++i)
3185       ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3186 
3187     Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3188 
3189   } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3190              Name.starts_with("avx512.mask.vpermil.p") ||
3191              Name.starts_with("avx512.mask.pshuf.d.")) {
3192     Value *Op0 = CI->getArgOperand(0);
3193     unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3194     auto *VecTy = cast<FixedVectorType>(CI->getType());
3195     unsigned NumElts = VecTy->getNumElements();
3196     // Calculate the size of each index in the immediate.
3197     unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3198     unsigned IdxMask = ((1 << IdxSize) - 1);
3199 
3200     SmallVector<int, 8> Idxs(NumElts);
3201     // Lookup the bits for this element, wrapping around the immediate every
3202     // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3203     // to offset by the first index of each group.
3204     for (unsigned i = 0; i != NumElts; ++i)
3205       Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3206 
3207     Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3208 
3209     if (CI->arg_size() == 4)
3210       Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3211                           CI->getArgOperand(2));
3212   } else if (Name == "sse2.pshufl.w" ||
3213              Name.starts_with("avx512.mask.pshufl.w.")) {
3214     Value *Op0 = CI->getArgOperand(0);
3215     unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3216     unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3217 
3218     SmallVector<int, 16> Idxs(NumElts);
3219     for (unsigned l = 0; l != NumElts; l += 8) {
3220       for (unsigned i = 0; i != 4; ++i)
3221         Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3222       for (unsigned i = 4; i != 8; ++i)
3223         Idxs[i + l] = i + l;
3224     }
3225 
3226     Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3227 
3228     if (CI->arg_size() == 4)
3229       Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3230                           CI->getArgOperand(2));
3231   } else if (Name == "sse2.pshufh.w" ||
3232              Name.starts_with("avx512.mask.pshufh.w.")) {
3233     Value *Op0 = CI->getArgOperand(0);
3234     unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3235     unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3236 
3237     SmallVector<int, 16> Idxs(NumElts);
3238     for (unsigned l = 0; l != NumElts; l += 8) {
3239       for (unsigned i = 0; i != 4; ++i)
3240         Idxs[i + l] = i + l;
3241       for (unsigned i = 0; i != 4; ++i)
3242         Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3243     }
3244 
3245     Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3246 
3247     if (CI->arg_size() == 4)
3248       Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3249                           CI->getArgOperand(2));
3250   } else if (Name.starts_with("avx512.mask.shuf.p")) {
3251     Value *Op0 = CI->getArgOperand(0);
3252     Value *Op1 = CI->getArgOperand(1);
3253     unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3254     unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3255 
3256     unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3257     unsigned HalfLaneElts = NumLaneElts / 2;
3258 
3259     SmallVector<int, 16> Idxs(NumElts);
3260     for (unsigned i = 0; i != NumElts; ++i) {
3261       // Base index is the starting element of the lane.
3262       Idxs[i] = i - (i % NumLaneElts);
3263       // If we are half way through the lane switch to the other source.
3264       if ((i % NumLaneElts) >= HalfLaneElts)
3265         Idxs[i] += NumElts;
3266       // Now select the specific element. By adding HalfLaneElts bits from
3267       // the immediate. Wrapping around the immediate every 8-bits.
3268       Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3269     }
3270 
3271     Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3272 
3273     Rep =
3274         emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3275   } else if (Name.starts_with("avx512.mask.movddup") ||
3276              Name.starts_with("avx512.mask.movshdup") ||
3277              Name.starts_with("avx512.mask.movsldup")) {
3278     Value *Op0 = CI->getArgOperand(0);
3279     unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3280     unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3281 
3282     unsigned Offset = 0;
3283     if (Name.starts_with("avx512.mask.movshdup."))
3284       Offset = 1;
3285 
3286     SmallVector<int, 16> Idxs(NumElts);
3287     for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3288       for (unsigned i = 0; i != NumLaneElts; i += 2) {
3289         Idxs[i + l + 0] = i + l + Offset;
3290         Idxs[i + l + 1] = i + l + Offset;
3291       }
3292 
3293     Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3294 
3295     Rep =
3296         emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3297   } else if (Name.starts_with("avx512.mask.punpckl") ||
3298              Name.starts_with("avx512.mask.unpckl.")) {
3299     Value *Op0 = CI->getArgOperand(0);
3300     Value *Op1 = CI->getArgOperand(1);
3301     int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3302     int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3303 
3304     SmallVector<int, 64> Idxs(NumElts);
3305     for (int l = 0; l != NumElts; l += NumLaneElts)
3306       for (int i = 0; i != NumLaneElts; ++i)
3307         Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3308 
3309     Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3310 
3311     Rep =
3312         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3313   } else if (Name.starts_with("avx512.mask.punpckh") ||
3314              Name.starts_with("avx512.mask.unpckh.")) {
3315     Value *Op0 = CI->getArgOperand(0);
3316     Value *Op1 = CI->getArgOperand(1);
3317     int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3318     int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3319 
3320     SmallVector<int, 64> Idxs(NumElts);
3321     for (int l = 0; l != NumElts; l += NumLaneElts)
3322       for (int i = 0; i != NumLaneElts; ++i)
3323         Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3324 
3325     Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3326 
3327     Rep =
3328         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3329   } else if (Name.starts_with("avx512.mask.and.") ||
3330              Name.starts_with("avx512.mask.pand.")) {
3331     VectorType *FTy = cast<VectorType>(CI->getType());
3332     VectorType *ITy = VectorType::getInteger(FTy);
3333     Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3334                             Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3335     Rep = Builder.CreateBitCast(Rep, FTy);
3336     Rep =
3337         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3338   } else if (Name.starts_with("avx512.mask.andn.") ||
3339              Name.starts_with("avx512.mask.pandn.")) {
3340     VectorType *FTy = cast<VectorType>(CI->getType());
3341     VectorType *ITy = VectorType::getInteger(FTy);
3342     Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3343     Rep = Builder.CreateAnd(Rep,
3344                             Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3345     Rep = Builder.CreateBitCast(Rep, FTy);
3346     Rep =
3347         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3348   } else if (Name.starts_with("avx512.mask.or.") ||
3349              Name.starts_with("avx512.mask.por.")) {
3350     VectorType *FTy = cast<VectorType>(CI->getType());
3351     VectorType *ITy = VectorType::getInteger(FTy);
3352     Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3353                            Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3354     Rep = Builder.CreateBitCast(Rep, FTy);
3355     Rep =
3356         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3357   } else if (Name.starts_with("avx512.mask.xor.") ||
3358              Name.starts_with("avx512.mask.pxor.")) {
3359     VectorType *FTy = cast<VectorType>(CI->getType());
3360     VectorType *ITy = VectorType::getInteger(FTy);
3361     Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3362                             Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3363     Rep = Builder.CreateBitCast(Rep, FTy);
3364     Rep =
3365         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3366   } else if (Name.starts_with("avx512.mask.padd.")) {
3367     Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3368     Rep =
3369         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3370   } else if (Name.starts_with("avx512.mask.psub.")) {
3371     Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3372     Rep =
3373         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3374   } else if (Name.starts_with("avx512.mask.pmull.")) {
3375     Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3376     Rep =
3377         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3378   } else if (Name.starts_with("avx512.mask.add.p")) {
3379     if (Name.ends_with(".512")) {
3380       Intrinsic::ID IID;
3381       if (Name[17] == 's')
3382         IID = Intrinsic::x86_avx512_add_ps_512;
3383       else
3384         IID = Intrinsic::x86_avx512_add_pd_512;
3385 
3386       Rep = Builder.CreateIntrinsic(
3387           IID, {},
3388           {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3389     } else {
3390       Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3391     }
3392     Rep =
3393         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3394   } else if (Name.starts_with("avx512.mask.div.p")) {
3395     if (Name.ends_with(".512")) {
3396       Intrinsic::ID IID;
3397       if (Name[17] == 's')
3398         IID = Intrinsic::x86_avx512_div_ps_512;
3399       else
3400         IID = Intrinsic::x86_avx512_div_pd_512;
3401 
3402       Rep = Builder.CreateIntrinsic(
3403           IID, {},
3404           {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3405     } else {
3406       Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3407     }
3408     Rep =
3409         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3410   } else if (Name.starts_with("avx512.mask.mul.p")) {
3411     if (Name.ends_with(".512")) {
3412       Intrinsic::ID IID;
3413       if (Name[17] == 's')
3414         IID = Intrinsic::x86_avx512_mul_ps_512;
3415       else
3416         IID = Intrinsic::x86_avx512_mul_pd_512;
3417 
3418       Rep = Builder.CreateIntrinsic(
3419           IID, {},
3420           {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3421     } else {
3422       Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3423     }
3424     Rep =
3425         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3426   } else if (Name.starts_with("avx512.mask.sub.p")) {
3427     if (Name.ends_with(".512")) {
3428       Intrinsic::ID IID;
3429       if (Name[17] == 's')
3430         IID = Intrinsic::x86_avx512_sub_ps_512;
3431       else
3432         IID = Intrinsic::x86_avx512_sub_pd_512;
3433 
3434       Rep = Builder.CreateIntrinsic(
3435           IID, {},
3436           {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3437     } else {
3438       Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3439     }
3440     Rep =
3441         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3442   } else if ((Name.starts_with("avx512.mask.max.p") ||
3443               Name.starts_with("avx512.mask.min.p")) &&
3444              Name.drop_front(18) == ".512") {
3445     bool IsDouble = Name[17] == 'd';
3446     bool IsMin = Name[13] == 'i';
3447     static const Intrinsic::ID MinMaxTbl[2][2] = {
3448         {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3449         {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3450     Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3451 
3452     Rep = Builder.CreateIntrinsic(
3453         IID, {},
3454         {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3455     Rep =
3456         emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3457   } else if (Name.starts_with("avx512.mask.lzcnt.")) {
3458     Rep =
3459         Builder.CreateIntrinsic(Intrinsic::ctlz, CI->getType(),
3460                                 {CI->getArgOperand(0), Builder.getInt1(false)});
3461     Rep =
3462         emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3463   } else if (Name.starts_with("avx512.mask.psll")) {
3464     bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3465     bool IsVariable = Name[16] == 'v';
3466     char Size = Name[16] == '.'   ? Name[17]
3467                 : Name[17] == '.' ? Name[18]
3468                 : Name[18] == '.' ? Name[19]
3469                                   : Name[20];
3470 
3471     Intrinsic::ID IID;
3472     if (IsVariable && Name[17] != '.') {
3473       if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3474         IID = Intrinsic::x86_avx2_psllv_q;
3475       else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3476         IID = Intrinsic::x86_avx2_psllv_q_256;
3477       else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3478         IID = Intrinsic::x86_avx2_psllv_d;
3479       else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3480         IID = Intrinsic::x86_avx2_psllv_d_256;
3481       else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3482         IID = Intrinsic::x86_avx512_psllv_w_128;
3483       else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3484         IID = Intrinsic::x86_avx512_psllv_w_256;
3485       else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3486         IID = Intrinsic::x86_avx512_psllv_w_512;
3487       else
3488         llvm_unreachable("Unexpected size");
3489     } else if (Name.ends_with(".128")) {
3490       if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3491         IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3492                           : Intrinsic::x86_sse2_psll_d;
3493       else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3494         IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3495                           : Intrinsic::x86_sse2_psll_q;
3496       else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3497         IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3498                           : Intrinsic::x86_sse2_psll_w;
3499       else
3500         llvm_unreachable("Unexpected size");
3501     } else if (Name.ends_with(".256")) {
3502       if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3503         IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3504                           : Intrinsic::x86_avx2_psll_d;
3505       else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3506         IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3507                           : Intrinsic::x86_avx2_psll_q;
3508       else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3509         IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3510                           : Intrinsic::x86_avx2_psll_w;
3511       else
3512         llvm_unreachable("Unexpected size");
3513     } else {
3514       if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3515         IID = IsImmediate  ? Intrinsic::x86_avx512_pslli_d_512
3516               : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3517                            : Intrinsic::x86_avx512_psll_d_512;
3518       else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3519         IID = IsImmediate  ? Intrinsic::x86_avx512_pslli_q_512
3520               : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3521                            : Intrinsic::x86_avx512_psll_q_512;
3522       else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3523         IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3524                           : Intrinsic::x86_avx512_psll_w_512;
3525       else
3526         llvm_unreachable("Unexpected size");
3527     }
3528 
3529     Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3530   } else if (Name.starts_with("avx512.mask.psrl")) {
3531     bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3532     bool IsVariable = Name[16] == 'v';
3533     char Size = Name[16] == '.'   ? Name[17]
3534                 : Name[17] == '.' ? Name[18]
3535                 : Name[18] == '.' ? Name[19]
3536                                   : Name[20];
3537 
3538     Intrinsic::ID IID;
3539     if (IsVariable && Name[17] != '.') {
3540       if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3541         IID = Intrinsic::x86_avx2_psrlv_q;
3542       else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3543         IID = Intrinsic::x86_avx2_psrlv_q_256;
3544       else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3545         IID = Intrinsic::x86_avx2_psrlv_d;
3546       else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3547         IID = Intrinsic::x86_avx2_psrlv_d_256;
3548       else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3549         IID = Intrinsic::x86_avx512_psrlv_w_128;
3550       else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3551         IID = Intrinsic::x86_avx512_psrlv_w_256;
3552       else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3553         IID = Intrinsic::x86_avx512_psrlv_w_512;
3554       else
3555         llvm_unreachable("Unexpected size");
3556     } else if (Name.ends_with(".128")) {
3557       if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3558         IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3559                           : Intrinsic::x86_sse2_psrl_d;
3560       else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3561         IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3562                           : Intrinsic::x86_sse2_psrl_q;
3563       else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3564         IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3565                           : Intrinsic::x86_sse2_psrl_w;
3566       else
3567         llvm_unreachable("Unexpected size");
3568     } else if (Name.ends_with(".256")) {
3569       if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3570         IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3571                           : Intrinsic::x86_avx2_psrl_d;
3572       else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3573         IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3574                           : Intrinsic::x86_avx2_psrl_q;
3575       else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3576         IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3577                           : Intrinsic::x86_avx2_psrl_w;
3578       else
3579         llvm_unreachable("Unexpected size");
3580     } else {
3581       if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3582         IID = IsImmediate  ? Intrinsic::x86_avx512_psrli_d_512
3583               : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
3584                            : Intrinsic::x86_avx512_psrl_d_512;
3585       else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3586         IID = IsImmediate  ? Intrinsic::x86_avx512_psrli_q_512
3587               : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
3588                            : Intrinsic::x86_avx512_psrl_q_512;
3589       else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3590         IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3591                           : Intrinsic::x86_avx512_psrl_w_512;
3592       else
3593         llvm_unreachable("Unexpected size");
3594     }
3595 
3596     Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3597   } else if (Name.starts_with("avx512.mask.psra")) {
3598     bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3599     bool IsVariable = Name[16] == 'v';
3600     char Size = Name[16] == '.'   ? Name[17]
3601                 : Name[17] == '.' ? Name[18]
3602                 : Name[18] == '.' ? Name[19]
3603                                   : Name[20];
3604 
3605     Intrinsic::ID IID;
3606     if (IsVariable && Name[17] != '.') {
3607       if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3608         IID = Intrinsic::x86_avx2_psrav_d;
3609       else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3610         IID = Intrinsic::x86_avx2_psrav_d_256;
3611       else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3612         IID = Intrinsic::x86_avx512_psrav_w_128;
3613       else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3614         IID = Intrinsic::x86_avx512_psrav_w_256;
3615       else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3616         IID = Intrinsic::x86_avx512_psrav_w_512;
3617       else
3618         llvm_unreachable("Unexpected size");
3619     } else if (Name.ends_with(".128")) {
3620       if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3621         IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3622                           : Intrinsic::x86_sse2_psra_d;
3623       else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3624         IID = IsImmediate  ? Intrinsic::x86_avx512_psrai_q_128
3625               : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
3626                            : Intrinsic::x86_avx512_psra_q_128;
3627       else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3628         IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3629                           : Intrinsic::x86_sse2_psra_w;
3630       else
3631         llvm_unreachable("Unexpected size");
3632     } else if (Name.ends_with(".256")) {
3633       if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3634         IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3635                           : Intrinsic::x86_avx2_psra_d;
3636       else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3637         IID = IsImmediate  ? Intrinsic::x86_avx512_psrai_q_256
3638               : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
3639                            : Intrinsic::x86_avx512_psra_q_256;
3640       else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3641         IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3642                           : Intrinsic::x86_avx2_psra_w;
3643       else
3644         llvm_unreachable("Unexpected size");
3645     } else {
3646       if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3647         IID = IsImmediate  ? Intrinsic::x86_avx512_psrai_d_512
3648               : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
3649                            : Intrinsic::x86_avx512_psra_d_512;
3650       else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3651         IID = IsImmediate  ? Intrinsic::x86_avx512_psrai_q_512
3652               : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
3653                            : Intrinsic::x86_avx512_psra_q_512;
3654       else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3655         IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3656                           : Intrinsic::x86_avx512_psra_w_512;
3657       else
3658         llvm_unreachable("Unexpected size");
3659     }
3660 
3661     Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3662   } else if (Name.starts_with("avx512.mask.move.s")) {
3663     Rep = upgradeMaskedMove(Builder, *CI);
3664   } else if (Name.starts_with("avx512.cvtmask2")) {
3665     Rep = upgradeMaskToInt(Builder, *CI);
3666   } else if (Name.ends_with(".movntdqa")) {
3667     MDNode *Node = MDNode::get(
3668         C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3669 
3670     LoadInst *LI = Builder.CreateAlignedLoad(
3671         CI->getType(), CI->getArgOperand(0),
3672         Align(CI->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
3673     LI->setMetadata(LLVMContext::MD_nontemporal, Node);
3674     Rep = LI;
3675   } else if (Name.starts_with("fma.vfmadd.") ||
3676              Name.starts_with("fma.vfmsub.") ||
3677              Name.starts_with("fma.vfnmadd.") ||
3678              Name.starts_with("fma.vfnmsub.")) {
3679     bool NegMul = Name[6] == 'n';
3680     bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3681     bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3682 
3683     Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3684                     CI->getArgOperand(2)};
3685 
3686     if (IsScalar) {
3687       Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3688       Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3689       Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3690     }
3691 
3692     if (NegMul && !IsScalar)
3693       Ops[0] = Builder.CreateFNeg(Ops[0]);
3694     if (NegMul && IsScalar)
3695       Ops[1] = Builder.CreateFNeg(Ops[1]);
3696     if (NegAcc)
3697       Ops[2] = Builder.CreateFNeg(Ops[2]);
3698 
3699     Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
3700 
3701     if (IsScalar)
3702       Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3703   } else if (Name.starts_with("fma4.vfmadd.s")) {
3704     Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3705                     CI->getArgOperand(2)};
3706 
3707     Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3708     Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3709     Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3710 
3711     Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
3712 
3713     Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
3714                                       Rep, (uint64_t)0);
3715   } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
3716              Name.starts_with("avx512.maskz.vfmadd.s") ||
3717              Name.starts_with("avx512.mask3.vfmadd.s") ||
3718              Name.starts_with("avx512.mask3.vfmsub.s") ||
3719              Name.starts_with("avx512.mask3.vfnmsub.s")) {
3720     bool IsMask3 = Name[11] == '3';
3721     bool IsMaskZ = Name[11] == 'z';
3722     // Drop the "avx512.mask." to make it easier.
3723     Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3724     bool NegMul = Name[2] == 'n';
3725     bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3726 
3727     Value *A = CI->getArgOperand(0);
3728     Value *B = CI->getArgOperand(1);
3729     Value *C = CI->getArgOperand(2);
3730 
3731     if (NegMul && (IsMask3 || IsMaskZ))
3732       A = Builder.CreateFNeg(A);
3733     if (NegMul && !(IsMask3 || IsMaskZ))
3734       B = Builder.CreateFNeg(B);
3735     if (NegAcc)
3736       C = Builder.CreateFNeg(C);
3737 
3738     A = Builder.CreateExtractElement(A, (uint64_t)0);
3739     B = Builder.CreateExtractElement(B, (uint64_t)0);
3740     C = Builder.CreateExtractElement(C, (uint64_t)0);
3741 
3742     if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3743         cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3744       Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
3745 
3746       Intrinsic::ID IID;
3747       if (Name.back() == 'd')
3748         IID = Intrinsic::x86_avx512_vfmadd_f64;
3749       else
3750         IID = Intrinsic::x86_avx512_vfmadd_f32;
3751       Rep = Builder.CreateIntrinsic(IID, {}, Ops);
3752     } else {
3753       Rep = Builder.CreateIntrinsic(Intrinsic::fma, A->getType(), {A, B, C});
3754     }
3755 
3756     Value *PassThru = IsMaskZ   ? Constant::getNullValue(Rep->getType())
3757                       : IsMask3 ? C
3758                                 : A;
3759 
3760     // For Mask3 with NegAcc, we need to create a new extractelement that
3761     // avoids the negation above.
3762     if (NegAcc && IsMask3)
3763       PassThru =
3764           Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
3765 
3766     Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
3767     Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
3768                                       (uint64_t)0);
3769   } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
3770              Name.starts_with("avx512.mask.vfnmadd.p") ||
3771              Name.starts_with("avx512.mask.vfnmsub.p") ||
3772              Name.starts_with("avx512.mask3.vfmadd.p") ||
3773              Name.starts_with("avx512.mask3.vfmsub.p") ||
3774              Name.starts_with("avx512.mask3.vfnmsub.p") ||
3775              Name.starts_with("avx512.maskz.vfmadd.p")) {
3776     bool IsMask3 = Name[11] == '3';
3777     bool IsMaskZ = Name[11] == 'z';
3778     // Drop the "avx512.mask." to make it easier.
3779     Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3780     bool NegMul = Name[2] == 'n';
3781     bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3782 
3783     Value *A = CI->getArgOperand(0);
3784     Value *B = CI->getArgOperand(1);
3785     Value *C = CI->getArgOperand(2);
3786 
3787     if (NegMul && (IsMask3 || IsMaskZ))
3788       A = Builder.CreateFNeg(A);
3789     if (NegMul && !(IsMask3 || IsMaskZ))
3790       B = Builder.CreateFNeg(B);
3791     if (NegAcc)
3792       C = Builder.CreateFNeg(C);
3793 
3794     if (CI->arg_size() == 5 &&
3795         (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3796          cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3797       Intrinsic::ID IID;
3798       // Check the character before ".512" in string.
3799       if (Name[Name.size() - 5] == 's')
3800         IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3801       else
3802         IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3803 
3804       Rep = Builder.CreateIntrinsic(IID, {}, {A, B, C, CI->getArgOperand(4)});
3805     } else {
3806       Rep = Builder.CreateIntrinsic(Intrinsic::fma, A->getType(), {A, B, C});
3807     }
3808 
3809     Value *PassThru = IsMaskZ   ? llvm::Constant::getNullValue(CI->getType())
3810                       : IsMask3 ? CI->getArgOperand(2)
3811                                 : CI->getArgOperand(0);
3812 
3813     Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3814   } else if (Name.starts_with("fma.vfmsubadd.p")) {
3815     unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3816     unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3817     Intrinsic::ID IID;
3818     if (VecWidth == 128 && EltWidth == 32)
3819       IID = Intrinsic::x86_fma_vfmaddsub_ps;
3820     else if (VecWidth == 256 && EltWidth == 32)
3821       IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
3822     else if (VecWidth == 128 && EltWidth == 64)
3823       IID = Intrinsic::x86_fma_vfmaddsub_pd;
3824     else if (VecWidth == 256 && EltWidth == 64)
3825       IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
3826     else
3827       llvm_unreachable("Unexpected intrinsic");
3828 
3829     Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3830                     CI->getArgOperand(2)};
3831     Ops[2] = Builder.CreateFNeg(Ops[2]);
3832     Rep = Builder.CreateIntrinsic(IID, {}, Ops);
3833   } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
3834              Name.starts_with("avx512.mask3.vfmaddsub.p") ||
3835              Name.starts_with("avx512.maskz.vfmaddsub.p") ||
3836              Name.starts_with("avx512.mask3.vfmsubadd.p")) {
3837     bool IsMask3 = Name[11] == '3';
3838     bool IsMaskZ = Name[11] == 'z';
3839     // Drop the "avx512.mask." to make it easier.
3840     Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3841     bool IsSubAdd = Name[3] == 's';
3842     if (CI->arg_size() == 5) {
3843       Intrinsic::ID IID;
3844       // Check the character before ".512" in string.
3845       if (Name[Name.size() - 5] == 's')
3846         IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3847       else
3848         IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3849 
3850       Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3851                       CI->getArgOperand(2), CI->getArgOperand(4)};
3852       if (IsSubAdd)
3853         Ops[2] = Builder.CreateFNeg(Ops[2]);
3854 
3855       Rep = Builder.CreateIntrinsic(IID, {}, Ops);
3856     } else {
3857       int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3858 
3859       Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3860                       CI->getArgOperand(2)};
3861 
3862       Function *FMA = Intrinsic::getOrInsertDeclaration(
3863           CI->getModule(), Intrinsic::fma, Ops[0]->getType());
3864       Value *Odd = Builder.CreateCall(FMA, Ops);
3865       Ops[2] = Builder.CreateFNeg(Ops[2]);
3866       Value *Even = Builder.CreateCall(FMA, Ops);
3867 
3868       if (IsSubAdd)
3869         std::swap(Even, Odd);
3870 
3871       SmallVector<int, 32> Idxs(NumElts);
3872       for (int i = 0; i != NumElts; ++i)
3873         Idxs[i] = i + (i % 2) * NumElts;
3874 
3875       Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3876     }
3877 
3878     Value *PassThru = IsMaskZ   ? llvm::Constant::getNullValue(CI->getType())
3879                       : IsMask3 ? CI->getArgOperand(2)
3880                                 : CI->getArgOperand(0);
3881 
3882     Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3883   } else if (Name.starts_with("avx512.mask.pternlog.") ||
3884              Name.starts_with("avx512.maskz.pternlog.")) {
3885     bool ZeroMask = Name[11] == 'z';
3886     unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3887     unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3888     Intrinsic::ID IID;
3889     if (VecWidth == 128 && EltWidth == 32)
3890       IID = Intrinsic::x86_avx512_pternlog_d_128;
3891     else if (VecWidth == 256 && EltWidth == 32)
3892       IID = Intrinsic::x86_avx512_pternlog_d_256;
3893     else if (VecWidth == 512 && EltWidth == 32)
3894       IID = Intrinsic::x86_avx512_pternlog_d_512;
3895     else if (VecWidth == 128 && EltWidth == 64)
3896       IID = Intrinsic::x86_avx512_pternlog_q_128;
3897     else if (VecWidth == 256 && EltWidth == 64)
3898       IID = Intrinsic::x86_avx512_pternlog_q_256;
3899     else if (VecWidth == 512 && EltWidth == 64)
3900       IID = Intrinsic::x86_avx512_pternlog_q_512;
3901     else
3902       llvm_unreachable("Unexpected intrinsic");
3903 
3904     Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3905                      CI->getArgOperand(2), CI->getArgOperand(3)};
3906     Rep = Builder.CreateIntrinsic(IID, {}, Args);
3907     Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3908                                : CI->getArgOperand(0);
3909     Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3910   } else if (Name.starts_with("avx512.mask.vpmadd52") ||
3911              Name.starts_with("avx512.maskz.vpmadd52")) {
3912     bool ZeroMask = Name[11] == 'z';
3913     bool High = Name[20] == 'h' || Name[21] == 'h';
3914     unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3915     Intrinsic::ID IID;
3916     if (VecWidth == 128 && !High)
3917       IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3918     else if (VecWidth == 256 && !High)
3919       IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3920     else if (VecWidth == 512 && !High)
3921       IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3922     else if (VecWidth == 128 && High)
3923       IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3924     else if (VecWidth == 256 && High)
3925       IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3926     else if (VecWidth == 512 && High)
3927       IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3928     else
3929       llvm_unreachable("Unexpected intrinsic");
3930 
3931     Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3932                      CI->getArgOperand(2)};
3933     Rep = Builder.CreateIntrinsic(IID, {}, Args);
3934     Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3935                                : CI->getArgOperand(0);
3936     Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3937   } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
3938              Name.starts_with("avx512.mask.vpermt2var.") ||
3939              Name.starts_with("avx512.maskz.vpermt2var.")) {
3940     bool ZeroMask = Name[11] == 'z';
3941     bool IndexForm = Name[17] == 'i';
3942     Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
3943   } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
3944              Name.starts_with("avx512.maskz.vpdpbusd.") ||
3945              Name.starts_with("avx512.mask.vpdpbusds.") ||
3946              Name.starts_with("avx512.maskz.vpdpbusds.")) {
3947     bool ZeroMask = Name[11] == 'z';
3948     bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3949     unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3950     Intrinsic::ID IID;
3951     if (VecWidth == 128 && !IsSaturating)
3952       IID = Intrinsic::x86_avx512_vpdpbusd_128;
3953     else if (VecWidth == 256 && !IsSaturating)
3954       IID = Intrinsic::x86_avx512_vpdpbusd_256;
3955     else if (VecWidth == 512 && !IsSaturating)
3956       IID = Intrinsic::x86_avx512_vpdpbusd_512;
3957     else if (VecWidth == 128 && IsSaturating)
3958       IID = Intrinsic::x86_avx512_vpdpbusds_128;
3959     else if (VecWidth == 256 && IsSaturating)
3960       IID = Intrinsic::x86_avx512_vpdpbusds_256;
3961     else if (VecWidth == 512 && IsSaturating)
3962       IID = Intrinsic::x86_avx512_vpdpbusds_512;
3963     else
3964       llvm_unreachable("Unexpected intrinsic");
3965 
3966     Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3967                      CI->getArgOperand(2)};
3968     Rep = Builder.CreateIntrinsic(IID, {}, Args);
3969     Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3970                                : CI->getArgOperand(0);
3971     Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3972   } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
3973              Name.starts_with("avx512.maskz.vpdpwssd.") ||
3974              Name.starts_with("avx512.mask.vpdpwssds.") ||
3975              Name.starts_with("avx512.maskz.vpdpwssds.")) {
3976     bool ZeroMask = Name[11] == 'z';
3977     bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3978     unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3979     Intrinsic::ID IID;
3980     if (VecWidth == 128 && !IsSaturating)
3981       IID = Intrinsic::x86_avx512_vpdpwssd_128;
3982     else if (VecWidth == 256 && !IsSaturating)
3983       IID = Intrinsic::x86_avx512_vpdpwssd_256;
3984     else if (VecWidth == 512 && !IsSaturating)
3985       IID = Intrinsic::x86_avx512_vpdpwssd_512;
3986     else if (VecWidth == 128 && IsSaturating)
3987       IID = Intrinsic::x86_avx512_vpdpwssds_128;
3988     else if (VecWidth == 256 && IsSaturating)
3989       IID = Intrinsic::x86_avx512_vpdpwssds_256;
3990     else if (VecWidth == 512 && IsSaturating)
3991       IID = Intrinsic::x86_avx512_vpdpwssds_512;
3992     else
3993       llvm_unreachable("Unexpected intrinsic");
3994 
3995     Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3996                      CI->getArgOperand(2)};
3997     Rep = Builder.CreateIntrinsic(IID, {}, Args);
3998     Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3999                                : CI->getArgOperand(0);
4000     Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4001   } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4002              Name == "addcarry.u32" || Name == "addcarry.u64" ||
4003              Name == "subborrow.u32" || Name == "subborrow.u64") {
4004     Intrinsic::ID IID;
4005     if (Name[0] == 'a' && Name.back() == '2')
4006       IID = Intrinsic::x86_addcarry_32;
4007     else if (Name[0] == 'a' && Name.back() == '4')
4008       IID = Intrinsic::x86_addcarry_64;
4009     else if (Name[0] == 's' && Name.back() == '2')
4010       IID = Intrinsic::x86_subborrow_32;
4011     else if (Name[0] == 's' && Name.back() == '4')
4012       IID = Intrinsic::x86_subborrow_64;
4013     else
4014       llvm_unreachable("Unexpected intrinsic");
4015 
4016     // Make a call with 3 operands.
4017     Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4018                      CI->getArgOperand(2)};
4019     Value *NewCall = Builder.CreateIntrinsic(IID, {}, Args);
4020 
4021     // Extract the second result and store it.
4022     Value *Data = Builder.CreateExtractValue(NewCall, 1);
4023     Builder.CreateAlignedStore(Data, CI->getArgOperand(3), Align(1));
4024     // Replace the original call result with the first result of the new call.
4025     Value *CF = Builder.CreateExtractValue(NewCall, 0);
4026 
4027     CI->replaceAllUsesWith(CF);
4028     Rep = nullptr;
4029   } else if (Name.starts_with("avx512.mask.") &&
4030              upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4031     // Rep will be updated by the call in the condition.
4032   }
4033 
4034   return Rep;
4035 }
4036 
4037 static Value *upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI,
4038                                           Function *F, IRBuilder<> &Builder) {
4039   if (Name.starts_with("neon.bfcvt")) {
4040     if (Name.starts_with("neon.bfcvtn2")) {
4041       SmallVector<int, 32> LoMask(4);
4042       std::iota(LoMask.begin(), LoMask.end(), 0);
4043       SmallVector<int, 32> ConcatMask(8);
4044       std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4045       Value *Inactive = Builder.CreateShuffleVector(CI->getOperand(0), LoMask);
4046       Value *Trunc =
4047           Builder.CreateFPTrunc(CI->getOperand(1), Inactive->getType());
4048       return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4049     } else if (Name.starts_with("neon.bfcvtn")) {
4050       SmallVector<int, 32> ConcatMask(8);
4051       std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4052       Type *V4BF16 =
4053           FixedVectorType::get(Type::getBFloatTy(F->getContext()), 4);
4054       Value *Trunc = Builder.CreateFPTrunc(CI->getOperand(0), V4BF16);
4055       dbgs() << "Trunc: " << *Trunc << "\n";
4056       return Builder.CreateShuffleVector(
4057           Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
4058     } else {
4059       return Builder.CreateFPTrunc(CI->getOperand(0),
4060                                    Type::getBFloatTy(F->getContext()));
4061     }
4062   } else if (Name.starts_with("sve.fcvt")) {
4063     Intrinsic::ID NewID =
4064         StringSwitch<Intrinsic::ID>(Name)
4065             .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4066             .Case("sve.fcvtnt.bf16f32",
4067                   Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4068             .Default(Intrinsic::not_intrinsic);
4069     if (NewID == Intrinsic::not_intrinsic)
4070       llvm_unreachable("Unhandled Intrinsic!");
4071 
4072     SmallVector<Value *, 3> Args(CI->args());
4073 
4074     // The original intrinsics incorrectly used a predicate based on the
4075     // smallest element type rather than the largest.
4076     Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8);
4077     Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
4078 
4079     if (Args[1]->getType() != BadPredTy)
4080       llvm_unreachable("Unexpected predicate type!");
4081 
4082     Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4083                                       BadPredTy, Args[1]);
4084     Args[1] = Builder.CreateIntrinsic(
4085         Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4086 
4087     return Builder.CreateIntrinsic(NewID, {}, Args, /*FMFSource=*/nullptr,
4088                                    CI->getName());
4089   }
4090 
4091   llvm_unreachable("Unhandled Intrinsic!");
4092 }
4093 
4094 static Value *upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F,
4095                                       IRBuilder<> &Builder) {
4096   if (Name == "mve.vctp64.old") {
4097     // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4098     // correct type.
4099     Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4100                                           CI->getArgOperand(0),
4101                                           /*FMFSource=*/nullptr, CI->getName());
4102     Value *C1 = Builder.CreateIntrinsic(
4103         Intrinsic::arm_mve_pred_v2i,
4104         {VectorType::get(Builder.getInt1Ty(), 2, false)}, VCTP);
4105     return Builder.CreateIntrinsic(
4106         Intrinsic::arm_mve_pred_i2v,
4107         {VectorType::get(Builder.getInt1Ty(), 4, false)}, C1);
4108   } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4109              Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4110              Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4111              Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4112              Name ==
4113                  "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4114              Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4115              Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4116              Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4117              Name ==
4118                  "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4119              Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4120              Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4121              Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4122              Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4123              Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4124              Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4125              Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4126     std::vector<Type *> Tys;
4127     unsigned ID = CI->getIntrinsicID();
4128     Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
4129     switch (ID) {
4130     case Intrinsic::arm_mve_mull_int_predicated:
4131     case Intrinsic::arm_mve_vqdmull_predicated:
4132     case Intrinsic::arm_mve_vldr_gather_base_predicated:
4133       Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
4134       break;
4135     case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4136     case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4137     case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4138       Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
4139              V2I1Ty};
4140       break;
4141     case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4142       Tys = {CI->getType(), CI->getOperand(0)->getType(),
4143              CI->getOperand(1)->getType(), V2I1Ty};
4144       break;
4145     case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4146       Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
4147              CI->getOperand(2)->getType(), V2I1Ty};
4148       break;
4149     case Intrinsic::arm_cde_vcx1q_predicated:
4150     case Intrinsic::arm_cde_vcx1qa_predicated:
4151     case Intrinsic::arm_cde_vcx2q_predicated:
4152     case Intrinsic::arm_cde_vcx2qa_predicated:
4153     case Intrinsic::arm_cde_vcx3q_predicated:
4154     case Intrinsic::arm_cde_vcx3qa_predicated:
4155       Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4156       break;
4157     default:
4158       llvm_unreachable("Unhandled Intrinsic!");
4159     }
4160 
4161     std::vector<Value *> Ops;
4162     for (Value *Op : CI->args()) {
4163       Type *Ty = Op->getType();
4164       if (Ty->getScalarSizeInBits() == 1) {
4165         Value *C1 = Builder.CreateIntrinsic(
4166             Intrinsic::arm_mve_pred_v2i,
4167             {VectorType::get(Builder.getInt1Ty(), 4, false)}, Op);
4168         Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4169       }
4170       Ops.push_back(Op);
4171     }
4172 
4173     return Builder.CreateIntrinsic(ID, Tys, Ops, /*FMFSource=*/nullptr,
4174                                    CI->getName());
4175   }
4176   llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4177 }
4178 
4179 // These are expected to have the arguments:
4180 // atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4181 //
4182 // Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4183 //
4184 static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,
4185                                          Function *F, IRBuilder<> &Builder) {
4186   AtomicRMWInst::BinOp RMWOp =
4187       StringSwitch<AtomicRMWInst::BinOp>(Name)
4188           .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4189           .StartsWith("ds.fmin", AtomicRMWInst::FMin)
4190           .StartsWith("ds.fmax", AtomicRMWInst::FMax)
4191           .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4192           .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
4193           .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
4194           .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd)
4195           .StartsWith("global.atomic.fmin", AtomicRMWInst::FMin)
4196           .StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin)
4197           .StartsWith("global.atomic.fmax", AtomicRMWInst::FMax)
4198           .StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax);
4199 
4200   unsigned NumOperands = CI->getNumOperands();
4201   if (NumOperands < 3) // Malformed bitcode.
4202     return nullptr;
4203 
4204   Value *Ptr = CI->getArgOperand(0);
4205   PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4206   if (!PtrTy) // Malformed.
4207     return nullptr;
4208 
4209   Value *Val = CI->getArgOperand(1);
4210   if (Val->getType() != CI->getType()) // Malformed.
4211     return nullptr;
4212 
4213   ConstantInt *OrderArg = nullptr;
4214   bool IsVolatile = false;
4215 
4216   // These should have 5 arguments (plus the callee). A separate version of the
4217   // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4218   if (NumOperands > 3)
4219     OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4220 
4221   // Ignore scope argument at 3
4222 
4223   if (NumOperands > 5) {
4224     ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4225     IsVolatile = !VolatileArg || !VolatileArg->isZero();
4226   }
4227 
4228   AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent;
4229   if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4230     Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4231   if (Order == AtomicOrdering::NotAtomic || Order == AtomicOrdering::Unordered)
4232     Order = AtomicOrdering::SequentiallyConsistent;
4233 
4234   LLVMContext &Ctx = F->getContext();
4235 
4236   // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4237   Type *RetTy = CI->getType();
4238   if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4239     if (VT->getElementType()->isIntegerTy(16)) {
4240       VectorType *AsBF16 =
4241           VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4242       Val = Builder.CreateBitCast(Val, AsBF16);
4243     }
4244   }
4245 
4246   // The scope argument never really worked correctly. Use agent as the most
4247   // conservative option which should still always produce the instruction.
4248   SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4249   AtomicRMWInst *RMW =
4250       Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4251 
4252   unsigned AddrSpace = PtrTy->getAddressSpace();
4253   if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4254     MDNode *EmptyMD = MDNode::get(F->getContext(), {});
4255     RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
4256     if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
4257       RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
4258   }
4259 
4260   if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
4261     MDBuilder MDB(F->getContext());
4262     MDNode *RangeNotPrivate =
4263         MDB.createRange(APInt(32, AMDGPUAS::PRIVATE_ADDRESS),
4264                         APInt(32, AMDGPUAS::PRIVATE_ADDRESS + 1));
4265     RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4266   }
4267 
4268   if (IsVolatile)
4269     RMW->setVolatile(true);
4270 
4271   return Builder.CreateBitCast(RMW, RetTy);
4272 }
4273 
4274 /// Helper to unwrap intrinsic call MetadataAsValue operands.
4275 template <typename MDType>
4276 static MDType *unwrapMAVOp(CallBase *CI, unsigned Op) {
4277   if (MetadataAsValue *MAV = dyn_cast<MetadataAsValue>(CI->getArgOperand(Op)))
4278     return dyn_cast<MDType>(MAV->getMetadata());
4279   return nullptr;
4280 }
4281 
4282 /// Convert debug intrinsic calls to non-instruction debug records.
4283 /// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4284 /// \p CI - The debug intrinsic call.
4285 static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI) {
4286   DbgRecord *DR = nullptr;
4287   if (Name == "label") {
4288     DR = new DbgLabelRecord(unwrapMAVOp<DILabel>(CI, 0), CI->getDebugLoc());
4289   } else if (Name == "assign") {
4290     DR = new DbgVariableRecord(
4291         unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, 1),
4292         unwrapMAVOp<DIExpression>(CI, 2), unwrapMAVOp<DIAssignID>(CI, 3),
4293         unwrapMAVOp<Metadata>(CI, 4), unwrapMAVOp<DIExpression>(CI, 5),
4294         CI->getDebugLoc());
4295   } else if (Name == "declare") {
4296     DR = new DbgVariableRecord(
4297         unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, 1),
4298         unwrapMAVOp<DIExpression>(CI, 2), CI->getDebugLoc(),
4299         DbgVariableRecord::LocationType::Declare);
4300   } else if (Name == "addr") {
4301     // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4302     DIExpression *Expr = unwrapMAVOp<DIExpression>(CI, 2);
4303     Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
4304     DR = new DbgVariableRecord(unwrapMAVOp<Metadata>(CI, 0),
4305                                unwrapMAVOp<DILocalVariable>(CI, 1), Expr,
4306                                CI->getDebugLoc());
4307   } else if (Name == "value") {
4308     // An old version of dbg.value had an extra offset argument.
4309     unsigned VarOp = 1;
4310     unsigned ExprOp = 2;
4311     if (CI->arg_size() == 4) {
4312       auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1));
4313       // Nonzero offset dbg.values get dropped without a replacement.
4314       if (!Offset || !Offset->isZeroValue())
4315         return;
4316       VarOp = 2;
4317       ExprOp = 3;
4318     }
4319     DR = new DbgVariableRecord(
4320         unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, VarOp),
4321         unwrapMAVOp<DIExpression>(CI, ExprOp), CI->getDebugLoc());
4322   }
4323   assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4324   CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
4325 }
4326 
4327 /// Upgrade a call to an old intrinsic. All argument and return casting must be
4328 /// provided to seamlessly integrate with existing context.
4329 void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
4330   // Note dyn_cast to Function is not quite the same as getCalledFunction, which
4331   // checks the callee's function type matches. It's likely we need to handle
4332   // type changes here.
4333   Function *F = dyn_cast<Function>(CI->getCalledOperand());
4334   if (!F)
4335     return;
4336 
4337   LLVMContext &C = CI->getContext();
4338   IRBuilder<> Builder(C);
4339   Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
4340 
4341   if (!NewFn) {
4342     bool FallthroughToDefaultUpgrade = false;
4343     // Get the Function's name.
4344     StringRef Name = F->getName();
4345 
4346     assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'");
4347     Name = Name.substr(5);
4348 
4349     bool IsX86 = Name.consume_front("x86.");
4350     bool IsNVVM = Name.consume_front("nvvm.");
4351     bool IsAArch64 = Name.consume_front("aarch64.");
4352     bool IsARM = Name.consume_front("arm.");
4353     bool IsAMDGCN = Name.consume_front("amdgcn.");
4354     bool IsDbg = Name.consume_front("dbg.");
4355     Value *Rep = nullptr;
4356 
4357     if (!IsX86 && Name == "stackprotectorcheck") {
4358       Rep = nullptr;
4359     } else if (IsNVVM) {
4360       Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
4361     } else if (IsX86) {
4362       Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
4363     } else if (IsAArch64) {
4364       Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
4365     } else if (IsARM) {
4366       Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
4367     } else if (IsAMDGCN) {
4368       Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4369     } else if (IsDbg) {
4370       // We might have decided we don't want the new format after all between
4371       // first requesting the upgrade and now; skip the conversion if that is
4372       // the case, and check here to see if the intrinsic needs to be upgraded
4373       // normally.
4374       if (!CI->getModule()->IsNewDbgInfoFormat) {
4375         bool NeedsUpgrade =
4376             upgradeIntrinsicFunction1(CI->getCalledFunction(), NewFn, false);
4377         if (!NeedsUpgrade)
4378           return;
4379         FallthroughToDefaultUpgrade = true;
4380       } else {
4381         upgradeDbgIntrinsicToDbgRecord(Name, CI);
4382       }
4383     } else {
4384       llvm_unreachable("Unknown function for CallBase upgrade.");
4385     }
4386 
4387     if (!FallthroughToDefaultUpgrade) {
4388       if (Rep)
4389         CI->replaceAllUsesWith(Rep);
4390       CI->eraseFromParent();
4391       return;
4392     }
4393   }
4394 
4395   const auto &DefaultCase = [&]() -> void {
4396     if (CI->getFunctionType() == NewFn->getFunctionType()) {
4397       // Handle generic mangling change.
4398       assert(
4399           (CI->getCalledFunction()->getName() != NewFn->getName()) &&
4400           "Unknown function for CallBase upgrade and isn't just a name change");
4401       CI->setCalledFunction(NewFn);
4402       return;
4403     }
4404 
4405     // This must be an upgrade from a named to a literal struct.
4406     if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
4407       assert(OldST != NewFn->getReturnType() &&
4408              "Return type must have changed");
4409       assert(OldST->getNumElements() ==
4410                  cast<StructType>(NewFn->getReturnType())->getNumElements() &&
4411              "Must have same number of elements");
4412 
4413       SmallVector<Value *> Args(CI->args());
4414       CallInst *NewCI = Builder.CreateCall(NewFn, Args);
4415       NewCI->setAttributes(CI->getAttributes());
4416       Value *Res = PoisonValue::get(OldST);
4417       for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4418         Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4419         Res = Builder.CreateInsertValue(Res, Elem, Idx);
4420       }
4421       CI->replaceAllUsesWith(Res);
4422       CI->eraseFromParent();
4423       return;
4424     }
4425 
4426     // We're probably about to produce something invalid. Let the verifier catch
4427     // it instead of dying here.
4428     CI->setCalledOperand(
4429         ConstantExpr::getPointerCast(NewFn, CI->getCalledOperand()->getType()));
4430     return;
4431   };
4432   CallInst *NewCall = nullptr;
4433   switch (NewFn->getIntrinsicID()) {
4434   default: {
4435     DefaultCase();
4436     return;
4437   }
4438   case Intrinsic::arm_neon_vst1:
4439   case Intrinsic::arm_neon_vst2:
4440   case Intrinsic::arm_neon_vst3:
4441   case Intrinsic::arm_neon_vst4:
4442   case Intrinsic::arm_neon_vst2lane:
4443   case Intrinsic::arm_neon_vst3lane:
4444   case Intrinsic::arm_neon_vst4lane: {
4445     SmallVector<Value *, 4> Args(CI->args());
4446     NewCall = Builder.CreateCall(NewFn, Args);
4447     break;
4448   }
4449   case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4450   case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4451   case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4452     LLVMContext &Ctx = F->getParent()->getContext();
4453     SmallVector<Value *, 4> Args(CI->args());
4454     Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
4455                                cast<ConstantInt>(Args[3])->getZExtValue());
4456     NewCall = Builder.CreateCall(NewFn, Args);
4457     break;
4458   }
4459   case Intrinsic::aarch64_sve_ld3_sret:
4460   case Intrinsic::aarch64_sve_ld4_sret:
4461   case Intrinsic::aarch64_sve_ld2_sret: {
4462     StringRef Name = F->getName();
4463     Name = Name.substr(5);
4464     unsigned N = StringSwitch<unsigned>(Name)
4465                      .StartsWith("aarch64.sve.ld2", 2)
4466                      .StartsWith("aarch64.sve.ld3", 3)
4467                      .StartsWith("aarch64.sve.ld4", 4)
4468                      .Default(0);
4469     auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4470     unsigned MinElts = RetTy->getMinNumElements() / N;
4471     SmallVector<Value *, 2> Args(CI->args());
4472     Value *NewLdCall = Builder.CreateCall(NewFn, Args);
4473     Value *Ret = llvm::PoisonValue::get(RetTy);
4474     for (unsigned I = 0; I < N; I++) {
4475       Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4476       Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
4477       Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx);
4478     }
4479     NewCall = dyn_cast<CallInst>(Ret);
4480     break;
4481   }
4482 
4483   case Intrinsic::coro_end: {
4484     SmallVector<Value *, 3> Args(CI->args());
4485     Args.push_back(ConstantTokenNone::get(CI->getContext()));
4486     NewCall = Builder.CreateCall(NewFn, Args);
4487     break;
4488   }
4489 
4490   case Intrinsic::vector_extract: {
4491     StringRef Name = F->getName();
4492     Name = Name.substr(5); // Strip llvm
4493     if (!Name.starts_with("aarch64.sve.tuple.get")) {
4494       DefaultCase();
4495       return;
4496     }
4497     auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4498     unsigned MinElts = RetTy->getMinNumElements();
4499     unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4500     Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4501     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
4502     break;
4503   }
4504 
4505   case Intrinsic::vector_insert: {
4506     StringRef Name = F->getName();
4507     Name = Name.substr(5);
4508     if (!Name.starts_with("aarch64.sve.tuple")) {
4509       DefaultCase();
4510       return;
4511     }
4512     if (Name.starts_with("aarch64.sve.tuple.set")) {
4513       unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4514       auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
4515       Value *NewIdx =
4516           ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
4517       NewCall = Builder.CreateCall(
4518           NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
4519       break;
4520     }
4521     if (Name.starts_with("aarch64.sve.tuple.create")) {
4522       unsigned N = StringSwitch<unsigned>(Name)
4523                        .StartsWith("aarch64.sve.tuple.create2", 2)
4524                        .StartsWith("aarch64.sve.tuple.create3", 3)
4525                        .StartsWith("aarch64.sve.tuple.create4", 4)
4526                        .Default(0);
4527       assert(N > 1 && "Create is expected to be between 2-4");
4528       auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4529       Value *Ret = llvm::PoisonValue::get(RetTy);
4530       unsigned MinElts = RetTy->getMinNumElements() / N;
4531       for (unsigned I = 0; I < N; I++) {
4532         Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4533         Value *V = CI->getArgOperand(I);
4534         Ret = Builder.CreateInsertVector(RetTy, Ret, V, Idx);
4535       }
4536       NewCall = dyn_cast<CallInst>(Ret);
4537     }
4538     break;
4539   }
4540 
4541   case Intrinsic::arm_neon_bfdot:
4542   case Intrinsic::arm_neon_bfmmla:
4543   case Intrinsic::arm_neon_bfmlalb:
4544   case Intrinsic::arm_neon_bfmlalt:
4545   case Intrinsic::aarch64_neon_bfdot:
4546   case Intrinsic::aarch64_neon_bfmmla:
4547   case Intrinsic::aarch64_neon_bfmlalb:
4548   case Intrinsic::aarch64_neon_bfmlalt: {
4549     SmallVector<Value *, 3> Args;
4550     assert(CI->arg_size() == 3 &&
4551            "Mismatch between function args and call args");
4552     size_t OperandWidth =
4553         CI->getArgOperand(1)->getType()->getPrimitiveSizeInBits();
4554     assert((OperandWidth == 64 || OperandWidth == 128) &&
4555            "Unexpected operand width");
4556     Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
4557     auto Iter = CI->args().begin();
4558     Args.push_back(*Iter++);
4559     Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4560     Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4561     NewCall = Builder.CreateCall(NewFn, Args);
4562     break;
4563   }
4564 
4565   case Intrinsic::bitreverse:
4566     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4567     break;
4568 
4569   case Intrinsic::ctlz:
4570   case Intrinsic::cttz:
4571     assert(CI->arg_size() == 1 &&
4572            "Mismatch between function args and call args");
4573     NewCall =
4574         Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
4575     break;
4576 
4577   case Intrinsic::objectsize: {
4578     Value *NullIsUnknownSize =
4579         CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
4580     Value *Dynamic =
4581         CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
4582     NewCall = Builder.CreateCall(
4583         NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
4584     break;
4585   }
4586 
4587   case Intrinsic::ctpop:
4588     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4589     break;
4590 
4591   case Intrinsic::convert_from_fp16:
4592     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4593     break;
4594 
4595   case Intrinsic::dbg_value: {
4596     StringRef Name = F->getName();
4597     Name = Name.substr(5); // Strip llvm.
4598     // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
4599     if (Name.starts_with("dbg.addr")) {
4600       DIExpression *Expr = cast<DIExpression>(
4601           cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
4602       Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
4603       NewCall =
4604           Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4605                                      MetadataAsValue::get(C, Expr)});
4606       break;
4607     }
4608 
4609     // Upgrade from the old version that had an extra offset argument.
4610     assert(CI->arg_size() == 4);
4611     // Drop nonzero offsets instead of attempting to upgrade them.
4612     if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
4613       if (Offset->isZeroValue()) {
4614         NewCall = Builder.CreateCall(
4615             NewFn,
4616             {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
4617         break;
4618       }
4619     CI->eraseFromParent();
4620     return;
4621   }
4622 
4623   case Intrinsic::ptr_annotation:
4624     // Upgrade from versions that lacked the annotation attribute argument.
4625     if (CI->arg_size() != 4) {
4626       DefaultCase();
4627       return;
4628     }
4629 
4630     // Create a new call with an added null annotation attribute argument.
4631     NewCall =
4632         Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4633                                    CI->getArgOperand(2), CI->getArgOperand(3),
4634                                    Constant::getNullValue(Builder.getPtrTy())});
4635     NewCall->takeName(CI);
4636     CI->replaceAllUsesWith(NewCall);
4637     CI->eraseFromParent();
4638     return;
4639 
4640   case Intrinsic::var_annotation:
4641     // Upgrade from versions that lacked the annotation attribute argument.
4642     if (CI->arg_size() != 4) {
4643       DefaultCase();
4644       return;
4645     }
4646     // Create a new call with an added null annotation attribute argument.
4647     NewCall =
4648         Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4649                                    CI->getArgOperand(2), CI->getArgOperand(3),
4650                                    Constant::getNullValue(Builder.getPtrTy())});
4651     NewCall->takeName(CI);
4652     CI->replaceAllUsesWith(NewCall);
4653     CI->eraseFromParent();
4654     return;
4655 
4656   case Intrinsic::riscv_aes32dsi:
4657   case Intrinsic::riscv_aes32dsmi:
4658   case Intrinsic::riscv_aes32esi:
4659   case Intrinsic::riscv_aes32esmi:
4660   case Intrinsic::riscv_sm4ks:
4661   case Intrinsic::riscv_sm4ed: {
4662     // The last argument to these intrinsics used to be i8 and changed to i32.
4663     // The type overload for sm4ks and sm4ed was removed.
4664     Value *Arg2 = CI->getArgOperand(2);
4665     if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
4666       return;
4667 
4668     Value *Arg0 = CI->getArgOperand(0);
4669     Value *Arg1 = CI->getArgOperand(1);
4670     if (CI->getType()->isIntegerTy(64)) {
4671       Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
4672       Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
4673     }
4674 
4675     Arg2 = ConstantInt::get(Type::getInt32Ty(C),
4676                             cast<ConstantInt>(Arg2)->getZExtValue());
4677 
4678     NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
4679     Value *Res = NewCall;
4680     if (Res->getType() != CI->getType())
4681       Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4682     NewCall->takeName(CI);
4683     CI->replaceAllUsesWith(Res);
4684     CI->eraseFromParent();
4685     return;
4686   }
4687   case Intrinsic::riscv_sha256sig0:
4688   case Intrinsic::riscv_sha256sig1:
4689   case Intrinsic::riscv_sha256sum0:
4690   case Intrinsic::riscv_sha256sum1:
4691   case Intrinsic::riscv_sm3p0:
4692   case Intrinsic::riscv_sm3p1: {
4693     // The last argument to these intrinsics used to be i8 and changed to i32.
4694     // The type overload for sm4ks and sm4ed was removed.
4695     if (!CI->getType()->isIntegerTy(64))
4696       return;
4697 
4698     Value *Arg =
4699         Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
4700 
4701     NewCall = Builder.CreateCall(NewFn, Arg);
4702     Value *Res =
4703         Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4704     NewCall->takeName(CI);
4705     CI->replaceAllUsesWith(Res);
4706     CI->eraseFromParent();
4707     return;
4708   }
4709 
4710   case Intrinsic::x86_xop_vfrcz_ss:
4711   case Intrinsic::x86_xop_vfrcz_sd:
4712     NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
4713     break;
4714 
4715   case Intrinsic::x86_xop_vpermil2pd:
4716   case Intrinsic::x86_xop_vpermil2ps:
4717   case Intrinsic::x86_xop_vpermil2pd_256:
4718   case Intrinsic::x86_xop_vpermil2ps_256: {
4719     SmallVector<Value *, 4> Args(CI->args());
4720     VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
4721     VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
4722     Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
4723     NewCall = Builder.CreateCall(NewFn, Args);
4724     break;
4725   }
4726 
4727   case Intrinsic::x86_sse41_ptestc:
4728   case Intrinsic::x86_sse41_ptestz:
4729   case Intrinsic::x86_sse41_ptestnzc: {
4730     // The arguments for these intrinsics used to be v4f32, and changed
4731     // to v2i64. This is purely a nop, since those are bitwise intrinsics.
4732     // So, the only thing required is a bitcast for both arguments.
4733     // First, check the arguments have the old type.
4734     Value *Arg0 = CI->getArgOperand(0);
4735     if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
4736       return;
4737 
4738     // Old intrinsic, add bitcasts
4739     Value *Arg1 = CI->getArgOperand(1);
4740 
4741     auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
4742 
4743     Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
4744     Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
4745 
4746     NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
4747     break;
4748   }
4749 
4750   case Intrinsic::x86_rdtscp: {
4751     // This used to take 1 arguments. If we have no arguments, it is already
4752     // upgraded.
4753     if (CI->getNumOperands() == 0)
4754       return;
4755 
4756     NewCall = Builder.CreateCall(NewFn);
4757     // Extract the second result and store it.
4758     Value *Data = Builder.CreateExtractValue(NewCall, 1);
4759     Builder.CreateAlignedStore(Data, CI->getArgOperand(0), Align(1));
4760     // Replace the original call result with the first result of the new call.
4761     Value *TSC = Builder.CreateExtractValue(NewCall, 0);
4762 
4763     NewCall->takeName(CI);
4764     CI->replaceAllUsesWith(TSC);
4765     CI->eraseFromParent();
4766     return;
4767   }
4768 
4769   case Intrinsic::x86_sse41_insertps:
4770   case Intrinsic::x86_sse41_dppd:
4771   case Intrinsic::x86_sse41_dpps:
4772   case Intrinsic::x86_sse41_mpsadbw:
4773   case Intrinsic::x86_avx_dp_ps_256:
4774   case Intrinsic::x86_avx2_mpsadbw: {
4775     // Need to truncate the last argument from i32 to i8 -- this argument models
4776     // an inherently 8-bit immediate operand to these x86 instructions.
4777     SmallVector<Value *, 4> Args(CI->args());
4778 
4779     // Replace the last argument with a trunc.
4780     Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
4781     NewCall = Builder.CreateCall(NewFn, Args);
4782     break;
4783   }
4784 
4785   case Intrinsic::x86_avx512_mask_cmp_pd_128:
4786   case Intrinsic::x86_avx512_mask_cmp_pd_256:
4787   case Intrinsic::x86_avx512_mask_cmp_pd_512:
4788   case Intrinsic::x86_avx512_mask_cmp_ps_128:
4789   case Intrinsic::x86_avx512_mask_cmp_ps_256:
4790   case Intrinsic::x86_avx512_mask_cmp_ps_512: {
4791     SmallVector<Value *, 4> Args(CI->args());
4792     unsigned NumElts =
4793         cast<FixedVectorType>(Args[0]->getType())->getNumElements();
4794     Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
4795 
4796     NewCall = Builder.CreateCall(NewFn, Args);
4797     Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
4798 
4799     NewCall->takeName(CI);
4800     CI->replaceAllUsesWith(Res);
4801     CI->eraseFromParent();
4802     return;
4803   }
4804 
4805   case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
4806   case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
4807   case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
4808   case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
4809   case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
4810   case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
4811     SmallVector<Value *, 4> Args(CI->args());
4812     unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4813     if (NewFn->getIntrinsicID() ==
4814         Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
4815       Args[1] = Builder.CreateBitCast(
4816           Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4817 
4818     NewCall = Builder.CreateCall(NewFn, Args);
4819     Value *Res = Builder.CreateBitCast(
4820         NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
4821 
4822     NewCall->takeName(CI);
4823     CI->replaceAllUsesWith(Res);
4824     CI->eraseFromParent();
4825     return;
4826   }
4827   case Intrinsic::x86_avx512bf16_dpbf16ps_128:
4828   case Intrinsic::x86_avx512bf16_dpbf16ps_256:
4829   case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
4830     SmallVector<Value *, 4> Args(CI->args());
4831     unsigned NumElts =
4832         cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
4833     Args[1] = Builder.CreateBitCast(
4834         Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4835     Args[2] = Builder.CreateBitCast(
4836         Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4837 
4838     NewCall = Builder.CreateCall(NewFn, Args);
4839     break;
4840   }
4841 
4842   case Intrinsic::thread_pointer: {
4843     NewCall = Builder.CreateCall(NewFn, {});
4844     break;
4845   }
4846 
4847   case Intrinsic::memcpy:
4848   case Intrinsic::memmove:
4849   case Intrinsic::memset: {
4850     // We have to make sure that the call signature is what we're expecting.
4851     // We only want to change the old signatures by removing the alignment arg:
4852     //  @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
4853     //    -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
4854     //  @llvm.memset...(i8*, i8, i[32|64], i32, i1)
4855     //    -> @llvm.memset...(i8*, i8, i[32|64], i1)
4856     // Note: i8*'s in the above can be any pointer type
4857     if (CI->arg_size() != 5) {
4858       DefaultCase();
4859       return;
4860     }
4861     // Remove alignment argument (3), and add alignment attributes to the
4862     // dest/src pointers.
4863     Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
4864                       CI->getArgOperand(2), CI->getArgOperand(4)};
4865     NewCall = Builder.CreateCall(NewFn, Args);
4866     AttributeList OldAttrs = CI->getAttributes();
4867     AttributeList NewAttrs = AttributeList::get(
4868         C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
4869         {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
4870          OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
4871     NewCall->setAttributes(NewAttrs);
4872     auto *MemCI = cast<MemIntrinsic>(NewCall);
4873     // All mem intrinsics support dest alignment.
4874     const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
4875     MemCI->setDestAlignment(Align->getMaybeAlignValue());
4876     // Memcpy/Memmove also support source alignment.
4877     if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
4878       MTI->setSourceAlignment(Align->getMaybeAlignValue());
4879     break;
4880   }
4881   }
4882   assert(NewCall && "Should have either set this variable or returned through "
4883                     "the default case");
4884   NewCall->takeName(CI);
4885   CI->replaceAllUsesWith(NewCall);
4886   CI->eraseFromParent();
4887 }
4888 
4889 void llvm::UpgradeCallsToIntrinsic(Function *F) {
4890   assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
4891 
4892   // Check if this function should be upgraded and get the replacement function
4893   // if there is one.
4894   Function *NewFn;
4895   if (UpgradeIntrinsicFunction(F, NewFn)) {
4896     // Replace all users of the old function with the new function or new
4897     // instructions. This is not a range loop because the call is deleted.
4898     for (User *U : make_early_inc_range(F->users()))
4899       if (CallBase *CB = dyn_cast<CallBase>(U))
4900         UpgradeIntrinsicCall(CB, NewFn);
4901 
4902     // Remove old function, no longer used, from the module.
4903     F->eraseFromParent();
4904   }
4905 }
4906 
4907 MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
4908   const unsigned NumOperands = MD.getNumOperands();
4909   if (NumOperands == 0)
4910     return &MD; // Invalid, punt to a verifier error.
4911 
4912   // Check if the tag uses struct-path aware TBAA format.
4913   if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
4914     return &MD;
4915 
4916   auto &Context = MD.getContext();
4917   if (NumOperands == 3) {
4918     Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
4919     MDNode *ScalarType = MDNode::get(Context, Elts);
4920     // Create a MDNode <ScalarType, ScalarType, offset 0, const>
4921     Metadata *Elts2[] = {ScalarType, ScalarType,
4922                          ConstantAsMetadata::get(
4923                              Constant::getNullValue(Type::getInt64Ty(Context))),
4924                          MD.getOperand(2)};
4925     return MDNode::get(Context, Elts2);
4926   }
4927   // Create a MDNode <MD, MD, offset 0>
4928   Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
4929                                     Type::getInt64Ty(Context)))};
4930   return MDNode::get(Context, Elts);
4931 }
4932 
4933 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
4934                                       Instruction *&Temp) {
4935   if (Opc != Instruction::BitCast)
4936     return nullptr;
4937 
4938   Temp = nullptr;
4939   Type *SrcTy = V->getType();
4940   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4941       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4942     LLVMContext &Context = V->getContext();
4943 
4944     // We have no information about target data layout, so we assume that
4945     // the maximum pointer size is 64bit.
4946     Type *MidTy = Type::getInt64Ty(Context);
4947     Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
4948 
4949     return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
4950   }
4951 
4952   return nullptr;
4953 }
4954 
4955 Constant *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
4956   if (Opc != Instruction::BitCast)
4957     return nullptr;
4958 
4959   Type *SrcTy = C->getType();
4960   if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4961       SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4962     LLVMContext &Context = C->getContext();
4963 
4964     // We have no information about target data layout, so we assume that
4965     // the maximum pointer size is 64bit.
4966     Type *MidTy = Type::getInt64Ty(Context);
4967 
4968     return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
4969                                      DestTy);
4970   }
4971 
4972   return nullptr;
4973 }
4974 
4975 /// Check the debug info version number, if it is out-dated, drop the debug
4976 /// info. Return true if module is modified.
4977 bool llvm::UpgradeDebugInfo(Module &M) {
4978   if (DisableAutoUpgradeDebugInfo)
4979     return false;
4980 
4981   // We need to get metadata before the module is verified (i.e., getModuleFlag
4982   // makes assumptions that we haven't verified yet). Carefully extract the flag
4983   // from the metadata.
4984   unsigned Version = 0;
4985   if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
4986     auto OpIt = find_if(ModFlags->operands(), [](const MDNode *Flag) {
4987       if (Flag->getNumOperands() < 3)
4988         return false;
4989       if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
4990         return K->getString() == "Debug Info Version";
4991       return false;
4992     });
4993     if (OpIt != ModFlags->op_end()) {
4994       const MDOperand &ValOp = (*OpIt)->getOperand(2);
4995       if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(ValOp))
4996         Version = CI->getZExtValue();
4997     }
4998   }
4999 
5000   if (Version == DEBUG_METADATA_VERSION) {
5001     bool BrokenDebugInfo = false;
5002     if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
5003       report_fatal_error("Broken module found, compilation aborted!");
5004     if (!BrokenDebugInfo)
5005       // Everything is ok.
5006       return false;
5007     else {
5008       // Diagnose malformed debug info.
5009       DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
5010       M.getContext().diagnose(Diag);
5011     }
5012   }
5013   bool Modified = StripDebugInfo(M);
5014   if (Modified && Version != DEBUG_METADATA_VERSION) {
5015     // Diagnose a version mismatch.
5016     DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
5017     M.getContext().diagnose(DiagVersion);
5018   }
5019   return Modified;
5020 }
5021 
5022 /// This checks for objc retain release marker which should be upgraded. It
5023 /// returns true if module is modified.
5024 static bool upgradeRetainReleaseMarker(Module &M) {
5025   bool Changed = false;
5026   const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
5027   NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
5028   if (ModRetainReleaseMarker) {
5029     MDNode *Op = ModRetainReleaseMarker->getOperand(0);
5030     if (Op) {
5031       MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
5032       if (ID) {
5033         SmallVector<StringRef, 4> ValueComp;
5034         ID->getString().split(ValueComp, "#");
5035         if (ValueComp.size() == 2) {
5036           std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
5037           ID = MDString::get(M.getContext(), NewValue);
5038         }
5039         M.addModuleFlag(Module::Error, MarkerKey, ID);
5040         M.eraseNamedMetadata(ModRetainReleaseMarker);
5041         Changed = true;
5042       }
5043     }
5044   }
5045   return Changed;
5046 }
5047 
5048 void llvm::UpgradeARCRuntime(Module &M) {
5049   // This lambda converts normal function calls to ARC runtime functions to
5050   // intrinsic calls.
5051   auto UpgradeToIntrinsic = [&](const char *OldFunc,
5052                                 llvm::Intrinsic::ID IntrinsicFunc) {
5053     Function *Fn = M.getFunction(OldFunc);
5054 
5055     if (!Fn)
5056       return;
5057 
5058     Function *NewFn =
5059         llvm::Intrinsic::getOrInsertDeclaration(&M, IntrinsicFunc);
5060 
5061     for (User *U : make_early_inc_range(Fn->users())) {
5062       CallInst *CI = dyn_cast<CallInst>(U);
5063       if (!CI || CI->getCalledFunction() != Fn)
5064         continue;
5065 
5066       IRBuilder<> Builder(CI->getParent(), CI->getIterator());
5067       FunctionType *NewFuncTy = NewFn->getFunctionType();
5068       SmallVector<Value *, 2> Args;
5069 
5070       // Don't upgrade the intrinsic if it's not valid to bitcast the return
5071       // value to the return type of the old function.
5072       if (NewFuncTy->getReturnType() != CI->getType() &&
5073           !CastInst::castIsValid(Instruction::BitCast, CI,
5074                                  NewFuncTy->getReturnType()))
5075         continue;
5076 
5077       bool InvalidCast = false;
5078 
5079       for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
5080         Value *Arg = CI->getArgOperand(I);
5081 
5082         // Bitcast argument to the parameter type of the new function if it's
5083         // not a variadic argument.
5084         if (I < NewFuncTy->getNumParams()) {
5085           // Don't upgrade the intrinsic if it's not valid to bitcast the argument
5086           // to the parameter type of the new function.
5087           if (!CastInst::castIsValid(Instruction::BitCast, Arg,
5088                                      NewFuncTy->getParamType(I))) {
5089             InvalidCast = true;
5090             break;
5091           }
5092           Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
5093         }
5094         Args.push_back(Arg);
5095       }
5096 
5097       if (InvalidCast)
5098         continue;
5099 
5100       // Create a call instruction that calls the new function.
5101       CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
5102       NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5103       NewCall->takeName(CI);
5104 
5105       // Bitcast the return value back to the type of the old call.
5106       Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
5107 
5108       if (!CI->use_empty())
5109         CI->replaceAllUsesWith(NewRetVal);
5110       CI->eraseFromParent();
5111     }
5112 
5113     if (Fn->use_empty())
5114       Fn->eraseFromParent();
5115   };
5116 
5117   // Unconditionally convert a call to "clang.arc.use" to a call to
5118   // "llvm.objc.clang.arc.use".
5119   UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
5120 
5121   // Upgrade the retain release marker. If there is no need to upgrade
5122   // the marker, that means either the module is already new enough to contain
5123   // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
5124   if (!upgradeRetainReleaseMarker(M))
5125     return;
5126 
5127   std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
5128       {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
5129       {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
5130       {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
5131       {"objc_autoreleaseReturnValue",
5132        llvm::Intrinsic::objc_autoreleaseReturnValue},
5133       {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
5134       {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
5135       {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
5136       {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
5137       {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
5138       {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
5139       {"objc_release", llvm::Intrinsic::objc_release},
5140       {"objc_retain", llvm::Intrinsic::objc_retain},
5141       {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
5142       {"objc_retainAutoreleaseReturnValue",
5143        llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
5144       {"objc_retainAutoreleasedReturnValue",
5145        llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
5146       {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
5147       {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
5148       {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
5149       {"objc_unsafeClaimAutoreleasedReturnValue",
5150        llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
5151       {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
5152       {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
5153       {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
5154       {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
5155       {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
5156       {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
5157       {"objc_arc_annotation_topdown_bbstart",
5158        llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
5159       {"objc_arc_annotation_topdown_bbend",
5160        llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
5161       {"objc_arc_annotation_bottomup_bbstart",
5162        llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
5163       {"objc_arc_annotation_bottomup_bbend",
5164        llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
5165 
5166   for (auto &I : RuntimeFuncs)
5167     UpgradeToIntrinsic(I.first, I.second);
5168 }
5169 
5170 bool llvm::UpgradeModuleFlags(Module &M) {
5171   NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
5172   if (!ModFlags)
5173     return false;
5174 
5175   bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
5176   bool HasSwiftVersionFlag = false;
5177   uint8_t SwiftMajorVersion, SwiftMinorVersion;
5178   uint32_t SwiftABIVersion;
5179   auto Int8Ty = Type::getInt8Ty(M.getContext());
5180   auto Int32Ty = Type::getInt32Ty(M.getContext());
5181 
5182   for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
5183     MDNode *Op = ModFlags->getOperand(I);
5184     if (Op->getNumOperands() != 3)
5185       continue;
5186     MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
5187     if (!ID)
5188       continue;
5189     auto SetBehavior = [&](Module::ModFlagBehavior B) {
5190       Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
5191                               Type::getInt32Ty(M.getContext()), B)),
5192                           MDString::get(M.getContext(), ID->getString()),
5193                           Op->getOperand(2)};
5194       ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5195       Changed = true;
5196     };
5197 
5198     if (ID->getString() == "Objective-C Image Info Version")
5199       HasObjCFlag = true;
5200     if (ID->getString() == "Objective-C Class Properties")
5201       HasClassProperties = true;
5202     // Upgrade PIC from Error/Max to Min.
5203     if (ID->getString() == "PIC Level") {
5204       if (auto *Behavior =
5205               mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
5206         uint64_t V = Behavior->getLimitedValue();
5207         if (V == Module::Error || V == Module::Max)
5208           SetBehavior(Module::Min);
5209       }
5210     }
5211     // Upgrade "PIE Level" from Error to Max.
5212     if (ID->getString() == "PIE Level")
5213       if (auto *Behavior =
5214               mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0)))
5215         if (Behavior->getLimitedValue() == Module::Error)
5216           SetBehavior(Module::Max);
5217 
5218     // Upgrade branch protection and return address signing module flags. The
5219     // module flag behavior for these fields were Error and now they are Min.
5220     if (ID->getString() == "branch-target-enforcement" ||
5221         ID->getString().starts_with("sign-return-address")) {
5222       if (auto *Behavior =
5223               mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
5224         if (Behavior->getLimitedValue() == Module::Error) {
5225           Type *Int32Ty = Type::getInt32Ty(M.getContext());
5226           Metadata *Ops[3] = {
5227               ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
5228               Op->getOperand(1), Op->getOperand(2)};
5229           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5230           Changed = true;
5231         }
5232       }
5233     }
5234 
5235     // Upgrade Objective-C Image Info Section. Removed the whitespce in the
5236     // section name so that llvm-lto will not complain about mismatching
5237     // module flags that is functionally the same.
5238     if (ID->getString() == "Objective-C Image Info Section") {
5239       if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
5240         SmallVector<StringRef, 4> ValueComp;
5241         Value->getString().split(ValueComp, " ");
5242         if (ValueComp.size() != 1) {
5243           std::string NewValue;
5244           for (auto &S : ValueComp)
5245             NewValue += S.str();
5246           Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
5247                               MDString::get(M.getContext(), NewValue)};
5248           ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5249           Changed = true;
5250         }
5251       }
5252     }
5253 
5254     // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
5255     // If the higher bits are set, it adds new module flag for swift info.
5256     if (ID->getString() == "Objective-C Garbage Collection") {
5257       auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
5258       if (Md) {
5259         assert(Md->getValue() && "Expected non-empty metadata");
5260         auto Type = Md->getValue()->getType();
5261         if (Type == Int8Ty)
5262           continue;
5263         unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
5264         if ((Val & 0xff) != Val) {
5265           HasSwiftVersionFlag = true;
5266           SwiftABIVersion = (Val & 0xff00) >> 8;
5267           SwiftMajorVersion = (Val & 0xff000000) >> 24;
5268           SwiftMinorVersion = (Val & 0xff0000) >> 16;
5269         }
5270         Metadata *Ops[3] = {
5271           ConstantAsMetadata::get(ConstantInt::get(Int32Ty,Module::Error)),
5272           Op->getOperand(1),
5273           ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
5274         ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5275         Changed = true;
5276       }
5277     }
5278 
5279     if (ID->getString() == "amdgpu_code_object_version") {
5280       Metadata *Ops[3] = {
5281           Op->getOperand(0),
5282           MDString::get(M.getContext(), "amdhsa_code_object_version"),
5283           Op->getOperand(2)};
5284       ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5285       Changed = true;
5286     }
5287   }
5288 
5289   // "Objective-C Class Properties" is recently added for Objective-C. We
5290   // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
5291   // flag of value 0, so we can correclty downgrade this flag when trying to
5292   // link an ObjC bitcode without this module flag with an ObjC bitcode with
5293   // this module flag.
5294   if (HasObjCFlag && !HasClassProperties) {
5295     M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
5296                     (uint32_t)0);
5297     Changed = true;
5298   }
5299 
5300   if (HasSwiftVersionFlag) {
5301     M.addModuleFlag(Module::Error, "Swift ABI Version",
5302                     SwiftABIVersion);
5303     M.addModuleFlag(Module::Error, "Swift Major Version",
5304                     ConstantInt::get(Int8Ty, SwiftMajorVersion));
5305     M.addModuleFlag(Module::Error, "Swift Minor Version",
5306                     ConstantInt::get(Int8Ty, SwiftMinorVersion));
5307     Changed = true;
5308   }
5309 
5310   return Changed;
5311 }
5312 
5313 void llvm::UpgradeSectionAttributes(Module &M) {
5314   auto TrimSpaces = [](StringRef Section) -> std::string {
5315     SmallVector<StringRef, 5> Components;
5316     Section.split(Components, ',');
5317 
5318     SmallString<32> Buffer;
5319     raw_svector_ostream OS(Buffer);
5320 
5321     for (auto Component : Components)
5322       OS << ',' << Component.trim();
5323 
5324     return std::string(OS.str().substr(1));
5325   };
5326 
5327   for (auto &GV : M.globals()) {
5328     if (!GV.hasSection())
5329       continue;
5330 
5331     StringRef Section = GV.getSection();
5332 
5333     if (!Section.starts_with("__DATA, __objc_catlist"))
5334       continue;
5335 
5336     // __DATA, __objc_catlist, regular, no_dead_strip
5337     // __DATA,__objc_catlist,regular,no_dead_strip
5338     GV.setSection(TrimSpaces(Section));
5339   }
5340 }
5341 
5342 namespace {
5343 // Prior to LLVM 10.0, the strictfp attribute could be used on individual
5344 // callsites within a function that did not also have the strictfp attribute.
5345 // Since 10.0, if strict FP semantics are needed within a function, the
5346 // function must have the strictfp attribute and all calls within the function
5347 // must also have the strictfp attribute. This latter restriction is
5348 // necessary to prevent unwanted libcall simplification when a function is
5349 // being cloned (such as for inlining).
5350 //
5351 // The "dangling" strictfp attribute usage was only used to prevent constant
5352 // folding and other libcall simplification. The nobuiltin attribute on the
5353 // callsite has the same effect.
5354 struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
5355   StrictFPUpgradeVisitor() = default;
5356 
5357   void visitCallBase(CallBase &Call) {
5358     if (!Call.isStrictFP())
5359       return;
5360     if (isa<ConstrainedFPIntrinsic>(&Call))
5361       return;
5362     // If we get here, the caller doesn't have the strictfp attribute
5363     // but this callsite does. Replace the strictfp attribute with nobuiltin.
5364     Call.removeFnAttr(Attribute::StrictFP);
5365     Call.addFnAttr(Attribute::NoBuiltin);
5366   }
5367 };
5368 
5369 /// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
5370 struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
5371     : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
5372   AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
5373 
5374   void visitAtomicRMWInst(AtomicRMWInst &RMW) {
5375     if (!RMW.isFloatingPointOperation())
5376       return;
5377 
5378     MDNode *Empty = MDNode::get(RMW.getContext(), {});
5379     RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
5380     RMW.setMetadata("amdgpu.no.remote.memory.access", Empty);
5381     RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
5382   }
5383 };
5384 } // namespace
5385 
5386 void llvm::UpgradeFunctionAttributes(Function &F) {
5387   // If a function definition doesn't have the strictfp attribute,
5388   // convert any callsite strictfp attributes to nobuiltin.
5389   if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
5390     StrictFPUpgradeVisitor SFPV;
5391     SFPV.visit(F);
5392   }
5393 
5394   // Remove all incompatibile attributes from function.
5395   F.removeRetAttrs(AttributeFuncs::typeIncompatible(
5396       F.getReturnType(), F.getAttributes().getRetAttrs()));
5397   for (auto &Arg : F.args())
5398     Arg.removeAttrs(
5399         AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
5400 
5401   // Older versions of LLVM treated an "implicit-section-name" attribute
5402   // similarly to directly setting the section on a Function.
5403   if (Attribute A = F.getFnAttribute("implicit-section-name");
5404       A.isValid() && A.isStringAttribute()) {
5405     F.setSection(A.getValueAsString());
5406     F.removeFnAttr("implicit-section-name");
5407   }
5408 
5409   if (!F.empty()) {
5410     // For some reason this is called twice, and the first time is before any
5411     // instructions are loaded into the body.
5412 
5413     if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
5414         A.isValid()) {
5415 
5416       if (A.getValueAsBool()) {
5417         AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
5418         Visitor.visit(F);
5419       }
5420 
5421       // We will leave behind dead attribute uses on external declarations, but
5422       // clang never added these to declarations anyway.
5423       F.removeFnAttr("amdgpu-unsafe-fp-atomics");
5424     }
5425   }
5426 }
5427 
5428 static bool isOldLoopArgument(Metadata *MD) {
5429   auto *T = dyn_cast_or_null<MDTuple>(MD);
5430   if (!T)
5431     return false;
5432   if (T->getNumOperands() < 1)
5433     return false;
5434   auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
5435   if (!S)
5436     return false;
5437   return S->getString().starts_with("llvm.vectorizer.");
5438 }
5439 
5440 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
5441   StringRef OldPrefix = "llvm.vectorizer.";
5442   assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
5443 
5444   if (OldTag == "llvm.vectorizer.unroll")
5445     return MDString::get(C, "llvm.loop.interleave.count");
5446 
5447   return MDString::get(
5448       C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
5449              .str());
5450 }
5451 
5452 static Metadata *upgradeLoopArgument(Metadata *MD) {
5453   auto *T = dyn_cast_or_null<MDTuple>(MD);
5454   if (!T)
5455     return MD;
5456   if (T->getNumOperands() < 1)
5457     return MD;
5458   auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
5459   if (!OldTag)
5460     return MD;
5461   if (!OldTag->getString().starts_with("llvm.vectorizer."))
5462     return MD;
5463 
5464   // This has an old tag.  Upgrade it.
5465   SmallVector<Metadata *, 8> Ops;
5466   Ops.reserve(T->getNumOperands());
5467   Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
5468   for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
5469     Ops.push_back(T->getOperand(I));
5470 
5471   return MDTuple::get(T->getContext(), Ops);
5472 }
5473 
5474 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
5475   auto *T = dyn_cast<MDTuple>(&N);
5476   if (!T)
5477     return &N;
5478 
5479   if (none_of(T->operands(), isOldLoopArgument))
5480     return &N;
5481 
5482   SmallVector<Metadata *, 8> Ops;
5483   Ops.reserve(T->getNumOperands());
5484   for (Metadata *MD : T->operands())
5485     Ops.push_back(upgradeLoopArgument(MD));
5486 
5487   return MDTuple::get(T->getContext(), Ops);
5488 }
5489 
5490 std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
5491   Triple T(TT);
5492   // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
5493   // the address space of globals to 1. This does not apply to SPIRV Logical.
5494   if (((T.isAMDGPU() && !T.isAMDGCN()) ||
5495        (T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical()))) &&
5496       !DL.contains("-G") && !DL.starts_with("G")) {
5497     return DL.empty() ? std::string("G1") : (DL + "-G1").str();
5498   }
5499 
5500   if (T.isLoongArch64() || T.isRISCV64()) {
5501     // Make i32 a native type for 64-bit LoongArch and RISC-V.
5502     auto I = DL.find("-n64-");
5503     if (I != StringRef::npos)
5504       return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
5505     return DL.str();
5506   }
5507 
5508   std::string Res = DL.str();
5509   // AMDGCN data layout upgrades.
5510   if (T.isAMDGCN()) {
5511     // Define address spaces for constants.
5512     if (!DL.contains("-G") && !DL.starts_with("G"))
5513       Res.append(Res.empty() ? "G1" : "-G1");
5514 
5515     // Add missing non-integral declarations.
5516     // This goes before adding new address spaces to prevent incoherent string
5517     // values.
5518     if (!DL.contains("-ni") && !DL.starts_with("ni"))
5519       Res.append("-ni:7:8:9");
5520     // Update ni:7 to ni:7:8:9.
5521     if (DL.ends_with("ni:7"))
5522       Res.append(":8:9");
5523     if (DL.ends_with("ni:7:8"))
5524       Res.append(":9");
5525 
5526     // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
5527     // resources) An empty data layout has already been upgraded to G1 by now.
5528     if (!DL.contains("-p7") && !DL.starts_with("p7"))
5529       Res.append("-p7:160:256:256:32");
5530     if (!DL.contains("-p8") && !DL.starts_with("p8"))
5531       Res.append("-p8:128:128");
5532     if (!DL.contains("-p9") && !DL.starts_with("p9"))
5533       Res.append("-p9:192:256:256:32");
5534 
5535     return Res;
5536   }
5537 
5538   auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
5539     // If the datalayout matches the expected format, add pointer size address
5540     // spaces to the datalayout.
5541     StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
5542     if (!DL.contains(AddrSpaces)) {
5543       SmallVector<StringRef, 4> Groups;
5544       Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
5545       if (R.match(Res, &Groups))
5546         Res = (Groups[1] + AddrSpaces + Groups[3]).str();
5547     }
5548   };
5549 
5550   // AArch64 data layout upgrades.
5551   if (T.isAArch64()) {
5552     // Add "-Fn32"
5553     if (!DL.empty() && !DL.contains("-Fn32"))
5554       Res.append("-Fn32");
5555     AddPtr32Ptr64AddrSpaces();
5556     return Res;
5557   }
5558 
5559   if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC64() ||
5560       T.isWasm()) {
5561     // Mips64 with o32 ABI did not add "-i128:128".
5562     // Add "-i128:128"
5563     std::string I64 = "-i64:64";
5564     std::string I128 = "-i128:128";
5565     if (!StringRef(Res).contains(I128)) {
5566       size_t Pos = Res.find(I64);
5567       if (Pos != size_t(-1))
5568         Res.insert(Pos + I64.size(), I128);
5569     }
5570     return Res;
5571   }
5572 
5573   if (!T.isX86())
5574     return Res;
5575 
5576   AddPtr32Ptr64AddrSpaces();
5577 
5578   // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
5579   // for i128 operations prior to this being reflected in the data layout, and
5580   // clang mostly produced LLVM IR that already aligned i128 to 16 byte
5581   // boundaries, so although this is a breaking change, the upgrade is expected
5582   // to fix more IR than it breaks.
5583   // Intel MCU is an exception and uses 4-byte-alignment.
5584   if (!T.isOSIAMCU()) {
5585     std::string I128 = "-i128:128";
5586     if (StringRef Ref = Res; !Ref.contains(I128)) {
5587       SmallVector<StringRef, 4> Groups;
5588       Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
5589       if (R.match(Res, &Groups))
5590         Res = (Groups[1] + I128 + Groups[3]).str();
5591     }
5592   }
5593 
5594   // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
5595   // Raising the alignment is safe because Clang did not produce f80 values in
5596   // the MSVC environment before this upgrade was added.
5597   if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
5598     StringRef Ref = Res;
5599     auto I = Ref.find("-f80:32-");
5600     if (I != StringRef::npos)
5601       Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
5602   }
5603 
5604   return Res;
5605 }
5606 
5607 void llvm::UpgradeAttributes(AttrBuilder &B) {
5608   StringRef FramePointer;
5609   Attribute A = B.getAttribute("no-frame-pointer-elim");
5610   if (A.isValid()) {
5611     // The value can be "true" or "false".
5612     FramePointer = A.getValueAsString() == "true" ? "all" : "none";
5613     B.removeAttribute("no-frame-pointer-elim");
5614   }
5615   if (B.contains("no-frame-pointer-elim-non-leaf")) {
5616     // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
5617     if (FramePointer != "all")
5618       FramePointer = "non-leaf";
5619     B.removeAttribute("no-frame-pointer-elim-non-leaf");
5620   }
5621   if (!FramePointer.empty())
5622     B.addAttribute("frame-pointer", FramePointer);
5623 
5624   A = B.getAttribute("null-pointer-is-valid");
5625   if (A.isValid()) {
5626     // The value can be "true" or "false".
5627     bool NullPointerIsValid = A.getValueAsString() == "true";
5628     B.removeAttribute("null-pointer-is-valid");
5629     if (NullPointerIsValid)
5630       B.addAttribute(Attribute::NullPointerIsValid);
5631   }
5632 }
5633 
5634 void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
5635   // clang.arc.attachedcall bundles are now required to have an operand.
5636   // If they don't, it's okay to drop them entirely: when there is an operand,
5637   // the "attachedcall" is meaningful and required, but without an operand,
5638   // it's just a marker NOP.  Dropping it merely prevents an optimization.
5639   erase_if(Bundles, [&](OperandBundleDef &OBD) {
5640     return OBD.getTag() == "clang.arc.attachedcall" &&
5641            OBD.inputs().empty();
5642   });
5643 }
5644